159 files changed, 57545 insertions, 12182 deletions
diff --git a/sql/CMakeLists.txt b/sql/CMakeLists.txt
index 0d4265152b9..f4776f8f7df 100755..100644
--- a/sql/CMakeLists.txt
+++ b/sql/CMakeLists.txt
@@ -62,9 +62,12 @@ SET (SQL_SOURCE
                sp_rcontext.cc spatial.cc sql_acl.cc sql_analyse.cc sql_base.cc 
                sql_cache.cc sql_class.cc sql_client.cc sql_crypt.cc sql_crypt.h 
                sql_cursor.cc sql_db.cc sql_delete.cc sql_derived.cc sql_do.cc 
-               sql_error.cc sql_handler.cc sql_help.cc sql_insert.cc sql_lex.cc 
-               sql_list.cc sql_load.cc sql_manager.cc sql_map.cc sql_parse.cc 
-               sql_partition.cc sql_plugin.cc sql_prepare.cc sql_rename.cc 
+               sql_error.cc sql_handler.cc sql_help.cc sql_insert.cc
+               sql_lifo_buffer.h
+               sql_join_cache.h sql_join_cache.cc
+               sql_lex.cc sql_list.cc sql_load.cc sql_manager.cc
+               sql_map.cc sql_parse.cc  sql_partition.cc sql_plugin.cc
+               sql_prepare.cc sql_rename.cc 
                debug_sync.cc debug_sync.h
                signal_handler.cc
                sql_repl.cc sql_select.cc sql_show.cc sql_state.c sql_string.cc 
@@ -78,7 +81,12 @@ SET (SQL_SOURCE
                rpl_rli.cc rpl_mi.cc sql_servers.cc
                sql_connect.cc scheduler.cc 
                sql_profile.cc event_parse_data.cc opt_table_elimination.cc
+               gcalc_slicescan.cc gcalc_tools.cc
+               multi_range_read.cc
+               opt_subselect.cc
+               opt_index_cond_pushdown.cc
                create_options.cc
+               sql_expression_cache.cc
                ${CMAKE_BINARY_DIR}/sql/sql_yacc.cc
                ${CMAKE_BINARY_DIR}/sql/sql_yacc.h
                ${CMAKE_BINARY_DIR}/include/mysqld_error.h
@@ -87,6 +95,7 @@ SET (SQL_SOURCE
                ${CMAKE_BINARY_DIR}/include/mysql_version.h 
                ${CMAKE_BINARY_DIR}/sql/sql_builtin.cc
                ${CMAKE_BINARY_DIR}/sql/lex_hash.h)
+
 ADD_LIBRARY(sql ${SQL_SOURCE})
 
 CONFIGURE_FILE(${CMAKE_SOURCE_DIR}/win/cmake/dummy.in cmake_dummy.cc COPYONLY)
diff --git a/sql/Makefile.am b/sql/Makefile.am
index 1c51bd4d722..875bea0e2a8 100644
--- a/sql/Makefile.am
+++ b/sql/Makefile.am
@@ -56,15 +56,18 @@ noinst_HEADERS =	item.h item_func.h item_sum.h item_cmpfunc.h \
 			sql_map.h sql_string.h unireg.h \
 			sql_error.h field.h handler.h mysqld_suffix.h \
 			sql_profile.h \
+			gcalc_slicescan.h gcalc_tools.h \
 			ha_ndbcluster.h ha_ndbcluster_cond.h \
 			ha_ndbcluster_binlog.h ha_ndbcluster_tables.h \
 			ha_partition.h rpl_constants.h \
 			debug_sync.h \
 			opt_range.h protocol.h rpl_tblmap.h rpl_utility.h \
+                        opt_subselect.h \
 			rpl_reporting.h \
 			log.h log_slow.h sql_show.h rpl_rli.h rpl_mi.h \
 			sql_select.h structs.h table.h sql_udf.h hash_filo.h \
 			lex.h lex_symbol.h sql_acl.h sql_crypt.h  \
+                        sql_lifo_buffer.h \
 			sql_repl.h slave.h rpl_filter.h rpl_injector.h \
 			log_event.h rpl_record.h \
 			log_event_old.h rpl_record_old.h \
@@ -79,7 +82,11 @@ noinst_HEADERS =	item.h item_func.h item_sum.h item_cmpfunc.h \
 			event_data_objects.h event_scheduler.h \
 			sql_partition.h partition_info.h partition_element.h \
 			contributors.h sql_servers.h \
-			create_options.h
+                        multi_range_read.h sql_handler.h \
+                        sql_join_cache.h \
+			create_options.h \
+			sql_expression_cache.h \
+			gcalc_slicescan.h gcalc_tools.h plistsort.c
 
 mysqld_SOURCES =	sql_lex.cc sql_handler.cc sql_partition.cc \
 			item.cc item_sum.cc item_buff.cc item_func.cc \
@@ -93,8 +100,10 @@ mysqld_SOURCES =	sql_lex.cc sql_handler.cc sql_partition.cc \
 			mysqld.cc password.c hash_filo.cc hostname.cc \
 			sql_connect.cc scheduler.cc sql_parse.cc \
 			set_var.cc sql_yacc.yy \
+                        sql_join_cache.cc \
 			sql_base.cc table.cc sql_select.cc sql_insert.cc \
 			sql_profile.cc \
+			gcalc_slicescan.cc gcalc_tools.cc \
 			sql_prepare.cc sql_error.cc sql_locale.cc \
 			sql_update.cc sql_delete.cc uniques.cc sql_do.cc \
 			procedure.cc sql_test.cc \
@@ -102,7 +111,8 @@ mysqld_SOURCES =	sql_lex.cc sql_handler.cc sql_partition.cc \
 			unireg.cc des_key_file.cc \
 			log_event.cc rpl_record.cc \
 			log_event_old.cc rpl_record_old.cc \
-			discover.cc time.cc opt_range.cc opt_sum.cc \
+			discover.cc time.cc opt_range.cc opt_subselect.cc \
+                        opt_sum.cc \
 		   	records.cc filesort.cc handler.cc \
 		        ha_partition.cc \
 			debug_sync.cc \
@@ -126,7 +136,9 @@ mysqld_SOURCES =	sql_lex.cc sql_handler.cc sql_partition.cc \
 			sql_plugin.cc sql_binlog.cc \
 			sql_builtin.cc sql_tablespace.cc partition_info.cc \
 			sql_servers.cc event_parse_data.cc \
-                        opt_table_elimination.cc create_options.cc
+                        opt_table_elimination.cc create_options.cc \
+			multi_range_read.cc \
+			opt_index_cond_pushdown.cc sql_expression_cache.cc
 
 nodist_mysqld_SOURCES =	mini_client_errors.c pack.c client.c my_time.c my_user.c client_plugin.c
 
@@ -155,7 +167,7 @@ EXTRA_DIST =		udf_example.c udf_example.def $(BUILT_MAINT_SRC) \
 			nt_servc.cc nt_servc.h mysql_install_db.cc mysql_upgrade_service.cc \
 			message.mc  message.h message.rc MSG00001.bin \
 			winservice.c winservice.h \
-			CMakeLists.txt
+			CMakeLists.txt opt_range_mrr.cc
 
 CLEANFILES =        	lex_hash.h sql_yacc.output link_sources
 DISTCLEANFILES =        $(EXTRA_PROGRAMS)
diff --git a/sql/debug_sync.cc b/sql/debug_sync.cc
index 74f6256df25..585be68a7f6 100644
--- a/sql/debug_sync.cc
+++ b/sql/debug_sync.cc
@@ -1078,7 +1078,7 @@ static bool debug_sync_set_action(THD *thd, st_debug_sync_action *action)
       point decremented it to 0. In this case the following happened:
 
       - an error message was reported with my_error() and
-      - the statement was killed with thd->killed= THD::KILL_QUERY.
+      - the statement was killed with thd->killed= KILL_QUERY.
 
       If a statement reports an error, it must not call send_ok().
       The calling functions will not call send_ok(), if we return TRUE
@@ -1852,7 +1852,7 @@ static void debug_sync_execute(THD *thd, st_debug_sync_action *action)
   {
     if (!--action->hit_limit)
     {
-      thd->killed= THD::KILL_QUERY;
+      thd->killed= KILL_QUERY;
       my_error(ER_DEBUG_SYNC_HIT_LIMIT, MYF(0));
     }
     DBUG_PRINT("debug_sync_exec", ("hit_limit: %lu  at: '%s'",
@@ -1942,4 +1942,7 @@ bool debug_sync_set_action(THD *thd, const char *action_str, size_t len)
 }
 
 
+#else /* defined(ENABLED_DEBUG_SYNC) */
+/* prevent linker/lib warning about file without public symbols */
+int debug_sync_dummy; 
 #endif /* defined(ENABLED_DEBUG_SYNC) */
diff --git a/sql/derror.cc b/sql/derror.cc
index 9f05c95157b..2aaef0e46d9 100644
--- a/sql/derror.cc
+++ b/sql/derror.cc
@@ -111,7 +111,7 @@ static bool check_error_mesg(const char *file_name, const char **errmsg)
     it means that the error file doesn't contain all MySQL messages
     and is probably from an older version of MySQL / MariaDB.
   */
-  if (errmsg[ER_ERROR_LAST - ER_ERROR_FIRST][0] == 0)
+  if (errmsg[ER_LAST_MYSQL_ERROR_MESSAGE -1 - ER_ERROR_FIRST][0] == 0)
   {
     sql_print_error("Error message file '%s' is probably from and older "
                     "version of MariaDB / MYSQL as it doesn't contain all "
diff --git a/sql/discover.cc b/sql/discover.cc
index 56dc00cc5c4..92af5d56016 100644
--- a/sql/discover.cc
+++ b/sql/discover.cc
@@ -67,7 +67,7 @@ int readfrm(const char *name, uchar **frmdata, size_t *len)
   error= 2;
   if (my_fstat(file, &state, MYF(0)))
     goto err;
-  read_len= state.st_size;  
+  read_len= (size_t)state.st_size;  
 
   // Read whole frm file
   error= 3;
diff --git a/sql/event_data_objects.cc b/sql/event_data_objects.cc
index e4436afd057..e90e6ec4bf9 100644
--- a/sql/event_data_objects.cc
+++ b/sql/event_data_objects.cc
@@ -466,7 +466,7 @@ Event_queue_element::load_from_row(THD *thd, TABLE *table)
     DBUG_RETURN(TRUE);
 
   starts_null= table->field[ET_FIELD_STARTS]->is_null();
-  my_bool not_used= FALSE;
+  uint not_used;
   if (!starts_null)
   {
     table->field[ET_FIELD_STARTS]->get_date(&time, TIME_NO_ZERO_DATE);
@@ -648,7 +648,7 @@ add_interval(MYSQL_TIME *ltime, const Time_zone *time_zone,
   if (date_add_interval(ltime, scale, interval))
     return 0;
 
-  my_bool not_used;
+  uint not_used;
   return time_zone->TIME_to_gmt_sec(ltime, &not_used);
 }
 
@@ -929,7 +929,7 @@ Event_queue_element::compute_next_execution_time()
     goto ret;
   }
 
-  time_now= (my_time_t) current_thd->query_start();
+  time_now= current_thd->query_start();
 
   DBUG_PRINT("info",("NOW: [%lu]", (ulong) time_now));
 
@@ -1132,7 +1132,7 @@ err:
 void
 Event_queue_element::mark_last_executed(THD *thd)
 {
-  last_executed= (my_time_t) thd->query_start();
+  last_executed= thd->query_start();
   last_executed_changed= TRUE;
 
   execution_count++;
@@ -1192,7 +1192,7 @@ append_datetime(String *buf, Time_zone *time_zone, my_time_t secs,
   */
   MYSQL_TIME time;
   time_zone->gmt_sec_to_TIME(&time, secs);
-  buf->append(dtime_buff, my_datetime_to_str(&time, dtime_buff));
+  buf->append(dtime_buff, my_datetime_to_str(&time, dtime_buff, 0));
   buf->append(STRING_WITH_LEN("'"));
 }
 
diff --git a/sql/event_db_repository.cc b/sql/event_db_repository.cc
index 960b5933a99..73b8eaf473c 100644
--- a/sql/event_db_repository.cc
+++ b/sql/event_db_repository.cc
@@ -241,6 +241,9 @@ mysql_event_fill_row(THD *thd,
     rs|= fields[ET_FIELD_STATUS]->store((longlong)et->status, TRUE);
   rs|= fields[ET_FIELD_ORIGINATOR]->store((longlong)et->originator, TRUE);
 
+  if (!is_update)
+    rs|= fields[ET_FIELD_CREATED]->set_time();
+
   /*
     Change the SQL_MODE only if body was present in an ALTER EVENT and of course
     always during CREATE EVENT.
@@ -287,7 +290,7 @@ mysql_event_fill_row(THD *thd,
       my_tz_OFFSET0->gmt_sec_to_TIME(&time, et->starts);
 
       fields[ET_FIELD_STARTS]->set_notnull();
-      fields[ET_FIELD_STARTS]->store_time(&time, MYSQL_TIMESTAMP_DATETIME);
+      fields[ET_FIELD_STARTS]->store_time(&time);
     }
 
     if (!et->ends_null)
@@ -296,7 +299,7 @@ mysql_event_fill_row(THD *thd,
       my_tz_OFFSET0->gmt_sec_to_TIME(&time, et->ends);
 
       fields[ET_FIELD_ENDS]->set_notnull();
-      fields[ET_FIELD_ENDS]->store_time(&time, MYSQL_TIMESTAMP_DATETIME);
+      fields[ET_FIELD_ENDS]->store_time(&time);
     }
   }
   else if (et->execute_at)
@@ -315,8 +318,7 @@ mysql_event_fill_row(THD *thd,
     my_tz_OFFSET0->gmt_sec_to_TIME(&time, et->execute_at);
 
     fields[ET_FIELD_EXECUTE_AT]->set_notnull();
-    fields[ET_FIELD_EXECUTE_AT]->
-                        store_time(&time, MYSQL_TIMESTAMP_DATETIME);
+    fields[ET_FIELD_EXECUTE_AT]->store_time(&time);
   }
   else
   {
@@ -327,7 +329,7 @@ mysql_event_fill_row(THD *thd,
     */
   }
 
-  ((Field_timestamp *)fields[ET_FIELD_MODIFIED])->set_time();
+  rs|= fields[ET_FIELD_MODIFIED]->set_time();
 
   if (et->comment.str)
   {
@@ -697,8 +699,6 @@ Event_db_repository::create_event(THD *thd, Event_parse_data *parse_data,
     goto end;
   }
 
-  ((Field_timestamp *)table->field[ET_FIELD_CREATED])->set_time();
-
   /*
     mysql_event_fill_row() calls my_error() in case of error so no need to
     handle it here
@@ -917,7 +917,11 @@ Event_db_repository::find_named_event(LEX_STRING db, LEX_STRING name,
     same fields.
   */
   if (db.length > table->field[ET_FIELD_DB]->field_length ||
-      name.length > table->field[ET_FIELD_NAME]->field_length)
+      name.length > table->field[ET_FIELD_NAME]->field_length ||
+      table->s->keys == 0 ||
+      table->key_info[0].key_parts != 2 ||
+      table->key_info[0].key_part[0].fieldnr != ET_FIELD_DB+1 ||
+      table->key_info[0].key_part[1].fieldnr != ET_FIELD_NAME+1)
     DBUG_RETURN(TRUE);
 
   table->field[ET_FIELD_DB]->store(db.str, db.length, &my_charset_bin);
@@ -1104,8 +1108,7 @@ update_timing_fields_for_event(THD *thd,
     my_tz_OFFSET0->gmt_sec_to_TIME(&time, last_executed);
 
     fields[ET_FIELD_LAST_EXECUTED]->set_notnull();
-    fields[ET_FIELD_LAST_EXECUTED]->store_time(&time,
-                                               MYSQL_TIMESTAMP_DATETIME);
+    fields[ET_FIELD_LAST_EXECUTED]->store_time(&time);
   }
   if (update_status)
   {
diff --git a/sql/event_parse_data.cc b/sql/event_parse_data.cc
index 695c6b8650c..5ae6db4ee13 100644
--- a/sql/event_parse_data.cc
+++ b/sql/event_parse_data.cc
@@ -112,7 +112,7 @@ Event_parse_data::init_name(THD *thd, sp_name *spn)
 void
 Event_parse_data::check_if_in_the_past(THD *thd, my_time_t ltime_utc)
 {
-  if (ltime_utc >= (my_time_t) thd->query_start())
+  if (ltime_utc >= thd->query_start())
     return;
 
   /*
@@ -200,7 +200,7 @@ Event_parse_data::check_dates(THD *thd, int previous_on_completion)
 int
 Event_parse_data::init_execute_at(THD *thd)
 {
-  my_bool not_used;
+  uint not_used;
   MYSQL_TIME ltime;
   my_time_t ltime_utc;
 
@@ -217,7 +217,7 @@ Event_parse_data::init_execute_at(THD *thd)
                       (starts_null && ends_null)));
   DBUG_ASSERT(starts_null && ends_null);
 
-  if ((not_used= item_execute_at->get_date(&ltime, TIME_NO_ZERO_DATE)))
+  if (item_execute_at->get_date(&ltime, TIME_NO_ZERO_DATE))
     goto wrong_value;
 
   ltime_utc= TIME_to_timestamp(thd,&ltime,&not_used);
@@ -370,7 +370,7 @@ wrong_value:
 int
 Event_parse_data::init_starts(THD *thd)
 {
-  my_bool not_used;
+  uint not_used;
   MYSQL_TIME ltime;
   my_time_t ltime_utc;
 
@@ -381,7 +381,7 @@ Event_parse_data::init_starts(THD *thd)
   if (item_starts->fix_fields(thd, &item_starts))
     goto wrong_value;
 
-  if ((not_used= item_starts->get_date(&ltime, TIME_NO_ZERO_DATE)))
+  if (item_starts->get_date(&ltime, TIME_NO_ZERO_DATE))
     goto wrong_value;
 
   ltime_utc= TIME_to_timestamp(thd, &ltime, &not_used);
@@ -424,7 +424,7 @@ wrong_value:
 int
 Event_parse_data::init_ends(THD *thd)
 {
-  my_bool not_used;
+  uint not_used;
   MYSQL_TIME ltime;
   my_time_t ltime_utc;
 
@@ -436,7 +436,7 @@ Event_parse_data::init_ends(THD *thd)
     goto error_bad_params;
 
   DBUG_PRINT("info", ("convert to TIME"));
-  if ((not_used= item_ends->get_date(&ltime, TIME_NO_ZERO_DATE)))
+  if (item_ends->get_date(&ltime, TIME_NO_ZERO_DATE))
     goto error_bad_params;
 
   ltime_utc= TIME_to_timestamp(thd, &ltime, &not_used);
diff --git a/sql/event_queue.cc b/sql/event_queue.cc
index b9585dc9c51..6016048aa9c 100644
--- a/sql/event_queue.cc
+++ b/sql/event_queue.cc
@@ -136,9 +136,9 @@ Event_queue::init_queue(THD *thd)
 
   LOCK_QUEUE_DATA();
 
-  if (init_queue_ex(&queue, EVENT_QUEUE_INITIAL_SIZE , 0 /*offset*/,
-                    0 /*max_on_top*/, event_queue_element_compare_q,
-                    NULL, EVENT_QUEUE_EXTENT))
+  if (::init_queue(&queue, EVENT_QUEUE_INITIAL_SIZE , 0 /*offset*/,
+                   0 /*max_on_top*/, event_queue_element_compare_q,
+                   NullS, 0, EVENT_QUEUE_EXTENT))
   {
     sql_print_error("Event Scheduler: Can't initialize the execution queue");
     goto err;
@@ -325,11 +325,13 @@ void
 Event_queue::drop_matching_events(THD *thd, LEX_STRING pattern,
                            bool (*comparator)(LEX_STRING, Event_basic *))
 {
-  uint i= 0;
+  uint i;
   DBUG_ENTER("Event_queue::drop_matching_events");
   DBUG_PRINT("enter", ("pattern=%s", pattern.str));
 
-  while (i < queue.elements)
+  for (i= queue_first_element(&queue) ;
+       i <= queue_last_element(&queue) ;
+       )
   {
     Event_queue_element *et= (Event_queue_element *) queue_element(&queue, i);
     DBUG_PRINT("info", ("[%s.%s]?", et->dbname.str, et->name.str));
@@ -339,7 +341,8 @@ Event_queue::drop_matching_events(THD *thd, LEX_STRING pattern,
         The queue is ordered. If we remove an element, then all elements
         after it will shift one position to the left, if we imagine it as
         an array from left to the right. In this case we should not
-        increment the counter and the (i < queue.elements) condition is ok.
+        increment the counter and the (i <= queue_last_element() condition
+        is ok.
       */
       queue_remove(&queue, i);
       delete et;
@@ -403,7 +406,9 @@ Event_queue::find_n_remove_event(LEX_STRING db, LEX_STRING name)
   uint i;
   DBUG_ENTER("Event_queue::find_n_remove_event");
 
-  for (i= 0; i < queue.elements; ++i)
+  for (i= queue_first_element(&queue);
+       i <= queue_last_element(&queue);
+       i++)
   {
     Event_queue_element *et= (Event_queue_element *) queue_element(&queue, i);
     DBUG_PRINT("info", ("[%s.%s]==[%s.%s]?", db.str, name.str,
@@ -441,7 +446,9 @@ Event_queue::recalculate_activation_times(THD *thd)
 
   LOCK_QUEUE_DATA();
   DBUG_PRINT("info", ("%u loaded events to be recalculated", queue.elements));
-  for (i= 0; i < queue.elements; i++)
+  for (i= queue_first_element(&queue);
+       i <= queue_last_element(&queue);
+       i++)
   {
     ((Event_queue_element*)queue_element(&queue, i))->compute_next_execution_time();
     ((Event_queue_element*)queue_element(&queue, i))->update_timing_fields(thd);
@@ -454,16 +461,19 @@ Event_queue::recalculate_activation_times(THD *thd)
     have removed all. The queue has been ordered in a way the disabled
     events are at the end.
   */
-  for (i= queue.elements; i > 0; i--)
+  for (i= queue_last_element(&queue);
+       (int) i >= (int) queue_first_element(&queue);
+       i--)
   {
-    Event_queue_element *element = (Event_queue_element*)queue_element(&queue, i - 1);
+    Event_queue_element *element=
+      (Event_queue_element*)queue_element(&queue, i);
     if (element->status != Event_parse_data::DISABLED)
       break;
     /*
       This won't cause queue re-order, because we remove
       always the last element.
     */
-    queue_remove(&queue, i - 1);
+    queue_remove(&queue, i);
     delete element;
   }
   UNLOCK_QUEUE_DATA();
@@ -499,7 +509,9 @@ Event_queue::empty_queue()
   sql_print_information("Event Scheduler: Purging the queue. %u events",
                         queue.elements);
   /* empty the queue */
-  for (i= 0; i < queue.elements; ++i)
+  for (i= queue_first_element(&queue);
+       i <= queue_last_element(&queue);
+       i++)
   {
     Event_queue_element *et= (Event_queue_element *) queue_element(&queue, i);
     delete et;
@@ -518,14 +530,17 @@ Event_queue::empty_queue()
 */
 
 void
-Event_queue::dbug_dump_queue(time_t now)
+Event_queue::dbug_dump_queue(my_time_t when)
 {
 #ifndef DBUG_OFF
+  my_time_t now= when;
   Event_queue_element *et;
   uint i;
   DBUG_ENTER("Event_queue::dbug_dump_queue");
   DBUG_PRINT("info", ("Dumping queue . Elements=%u", queue.elements));
-  for (i = 0; i < queue.elements; i++)
+  for (i= queue_first_element(&queue);
+       i <= queue_last_element(&queue);
+       i++)
   {
     et= ((Event_queue_element*)queue_element(&queue, i));
     DBUG_PRINT("info", ("et: 0x%lx  name: %s.%s", (long) et,
@@ -592,7 +607,7 @@ Event_queue::get_top_for_execution_if_time(THD *thd,
       continue;
     }
 
-    top= ((Event_queue_element*) queue_element(&queue, 0));
+    top= (Event_queue_element*) queue_top(&queue);
 
     thd->set_current_time(); /* Get current time */
 
@@ -604,9 +619,9 @@ Event_queue::get_top_for_execution_if_time(THD *thd,
         time or until signaled. Release LOCK_queue while waiting.
       */
       struct timespec top_time;
-      set_timespec(top_time, next_activation_at - thd->query_start());
-      cond_wait(thd, &top_time, queue_wait_msg, SCHED_FUNC, __LINE__);
+      set_timespec_time_nsec(top_time, next_activation_at*1000000000ULL);
 
+      cond_wait(thd, &top_time, queue_wait_msg, SCHED_FUNC, __LINE__);
       continue;
     }
 
@@ -634,10 +649,10 @@ Event_queue::get_top_for_execution_if_time(THD *thd,
                             top->dbname.str, top->name.str,
                             top->dropped? "Dropping.":"");
       delete top;
-      queue_remove(&queue, 0);
+      queue_remove_top(&queue);
     }
     else
-      queue_replaced(&queue);
+      queue_replace_top(&queue);
 
     dbug_dump_queue(thd->query_start());
     break;
diff --git a/sql/event_queue.h b/sql/event_queue.h
index 2870ecb4d0b..e1f814adf20 100644
--- a/sql/event_queue.h
+++ b/sql/event_queue.h
@@ -98,7 +98,7 @@ private:
 
 
   void
-  dbug_dump_queue(time_t now);
+  dbug_dump_queue(my_time_t now);
 
   /* LOCK_event_queue is the mutex which protects the access to the queue. */
   pthread_mutex_t LOCK_event_queue;
diff --git a/sql/event_scheduler.cc b/sql/event_scheduler.cc
index 7704ed3528d..ad21e26dd2e 100755..100644
--- a/sql/event_scheduler.cc
+++ b/sql/event_scheduler.cc
@@ -645,7 +645,7 @@ Event_scheduler::stop()
     sql_print_information("Event Scheduler: Killing the scheduler thread, "
                           "thread id %lu",
                           scheduler_thd->thread_id);
-    scheduler_thd->awake(THD::KILL_CONNECTION);
+    scheduler_thd->awake(KILL_CONNECTION);
     pthread_mutex_unlock(&scheduler_thd->LOCK_thd_data);
 
     /* thd could be 0x0, when shutting down */
diff --git a/sql/field.cc b/sql/field.cc
index 2a52963afa9..aeaf1a7a21e 100644
--- a/sql/field.cc
+++ b/sql/field.cc
@@ -49,6 +49,17 @@ template class List<Create_field>;
 template class List_iterator<Create_field>;
 #endif
 
+static const char *zero_timestamp="0000-00-00 00:00:00.000000";
+
+/* number of bytes to store second_part part of the TIMESTAMP(N) */
+static uint sec_part_bytes[MAX_DATETIME_PRECISION+1]= { 0, 1, 1, 2, 2, 3, 3 };
+
+/* number of bytes to store DATETIME(N) */
+static uint datetime_hires_bytes[MAX_DATETIME_PRECISION+1]= { 5, 6, 6, 7, 7, 7, 8 };
+
+/* number of bytes to store TIME(N) */
+static uint time_hires_bytes[MAX_DATETIME_PRECISION+1]= { 3, 4, 4, 5, 5, 5, 6 };
+
 uchar Field_null::null[1]={1};
 const char field_separator=',';
 
@@ -71,7 +82,7 @@ const char field_separator=',';
 #define FIELDTYPE_TEAR_FROM (MYSQL_TYPE_BIT + 1)
 #define FIELDTYPE_TEAR_TO   (MYSQL_TYPE_NEWDECIMAL - 1)
 #define FIELDTYPE_NUM (FIELDTYPE_TEAR_FROM + (255 - FIELDTYPE_TEAR_TO))
-inline int field_type2index (enum_field_types field_type)
+static inline int field_type2index (enum_field_types field_type)
 {
   return (field_type < FIELDTYPE_TEAR_FROM ?
           field_type :
@@ -1410,13 +1421,6 @@ int Field::store(const char *to, uint length, CHARSET_INFO *cs,
    should be overridden. The other functions are just convenience
    functions and hence should not be overridden.
 
-   The value of <code>low_byte_first</code> is dependent on how the
-   packed data is going to be used: for local use, e.g., temporary
-   store on disk or in memory, use the native format since that is
-   faster. For data that is going to be transfered to other machines
-   (e.g., when writing data to the binary log), data should always be
-   stored in little-endian format.
-
    @note The default method for packing fields just copy the raw bytes
    of the record into the destination, but never more than
    <code>max_length</code> characters.
@@ -1434,15 +1438,9 @@ int Field::store(const char *to, uint length, CHARSET_INFO *cs,
    is 1000. This information is sometimes needed to decide how to pack
    the data.
 
-   @param low_byte_first
-   @c TRUE if integers should be stored little-endian, @c FALSE if
-   native format should be used. Note that for little-endian machines,
-   the value of this flag is a moot point since the native format is
-   little-endian.
 */
 uchar *
-Field::pack(uchar *to, const uchar *from, uint max_length,
-            bool low_byte_first __attribute__((unused)))
+Field::pack(uchar *to, const uchar *from, uint max_length)
 {
   uint32 length= pack_length();
   set_if_smaller(length, max_length);
@@ -1473,18 +1471,14 @@ Field::pack(uchar *to, const uchar *from, uint max_length,
    @param   param_data Real type and original pack length of the field
                        data
 
-   @param low_byte_first
-   If this flag is @c true, all composite entities (e.g., lengths)
-   should be unpacked in little-endian format; otherwise, the entities
-   are unpacked in native order.
-
    @return  New pointer into memory based on from + length of the data
+   @return  0 if wrong data
 */
 const uchar *
-Field::unpack(uchar* to, const uchar *from, uint param_data,
-              bool low_byte_first __attribute__((unused)))
+Field::unpack(uchar* to, const uchar *from, const uchar *from_end,
+              uint param_data)
 {
-  uint length=pack_length();
+  uint length=pack_length(), len;
   int from_type= 0;
   /*
     If from length is > 255, it has encoded data in the upper bits. Need
@@ -1500,14 +1494,19 @@ Field::unpack(uchar* to, const uchar *from, uint param_data,
       (length == param_data) ||
       (from_type != real_type()))
   {
+    if (from + length > from_end)
+      return 0;                                 // Error in data
+
     memcpy(to, from, length);
     return from+length;
   }
 
-  uint len= (param_data && (param_data < length)) ?
-            param_data : length;
+  len= (param_data && (param_data < length)) ? param_data : length;
+
+  if (from + len > from_end)
+    return 0;                                   // Error in data
 
-  memcpy(to, from, param_data > length ? length : len);
+  memcpy(to, from, len);
   return from+len;
 }
 
@@ -1580,17 +1579,19 @@ longlong Field::convert_decimal2longlong(const my_decimal *val,
       i= 0;
       *err= 1;
     }
-    else if (warn_if_overflow(my_decimal2int(E_DEC_ERROR &
-                                           ~E_DEC_OVERFLOW & ~E_DEC_TRUNCATED,
-                                           val, TRUE, &i)))
+    else if (warn_if_overflow(my_decimal2int((E_DEC_ERROR &
+                                              ~E_DEC_OVERFLOW &
+                                              ~E_DEC_TRUNCATED),
+                                             val, TRUE, &i)))
     {
       i= ~(longlong) 0;
       *err= 1;
     }
   }
-  else if (warn_if_overflow(my_decimal2int(E_DEC_ERROR &
-                                         ~E_DEC_OVERFLOW & ~E_DEC_TRUNCATED,
-                                         val, FALSE, &i)))
+  else if (warn_if_overflow(my_decimal2int((E_DEC_ERROR &
+                                            ~E_DEC_OVERFLOW &
+                                            ~E_DEC_TRUNCATED),
+                                           val, FALSE, &i)))
   {
     i= (val->sign() ? LONGLONG_MIN : LONGLONG_MAX);
     *err= 1;
@@ -1708,8 +1709,8 @@ my_decimal *Field_str::val_decimal(my_decimal *decimal_value)
 uint Field::fill_cache_field(CACHE_FIELD *copy)
 {
   uint store_length;
-  copy->str=ptr;
-  copy->length=pack_length();
+  copy->str= ptr;
+  copy->length= pack_length();
   copy->field= this;
   if (flags & BLOB_FLAG)
   {
@@ -1721,9 +1722,15 @@ uint Field::fill_cache_field(CACHE_FIELD *copy)
            (type() == MYSQL_TYPE_STRING && copy->length >= 4 &&
             copy->length < 256))
   {
-    copy->type= CACHE_STRIPPED;
+    copy->type= CACHE_STRIPPED;			    /* Remove end space */
     store_length= 2;
   }
+  else if (type() ==  MYSQL_TYPE_VARCHAR)
+  {
+    copy->type= pack_length()-row_pack_length() == 1 ? CACHE_VARSTR1:
+                                                      CACHE_VARSTR2;
+    store_length= 0;
+  }
   else
   {
     copy->type= 0;
@@ -1744,16 +1751,6 @@ bool Field::get_date(MYSQL_TIME *ltime,uint fuzzydate)
   return 0;
 }
 
-bool Field::get_time(MYSQL_TIME *ltime)
-{
-  char buff[40];
-  String tmp(buff,sizeof(buff),&my_charset_bin),*res;
-  if (!(res=val_str(&tmp)) ||
-      str_to_time_with_warn(res->ptr(), res->length(), ltime))
-    return 1;
-  return 0;
-}
-
 /**
   This is called when storing a date in a string.
 
@@ -1761,11 +1758,11 @@ bool Field::get_time(MYSQL_TIME *ltime)
     Needs to be changed if/when we want to support different time formats.
 */
 
-int Field::store_time(MYSQL_TIME *ltime, timestamp_type type_arg)
+int Field::store_time_dec(MYSQL_TIME *ltime, uint dec)
 {
   ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
   char buff[MAX_DATE_STRING_REP_LENGTH];
-  uint length= (uint) my_TIME_to_str(ltime, buff);
+  uint length= (uint) my_TIME_to_str(ltime, buff, dec);
   return store(buff, length, &my_charset_bin);
 }
 
@@ -2755,10 +2752,10 @@ int Field_new_decimal::store_decimal(const my_decimal *decimal_value)
 }
 
 
-int Field_new_decimal::store_time(MYSQL_TIME *ltime, timestamp_type t_type)
+int Field_new_decimal::store_time_dec(MYSQL_TIME *ltime, uint dec)
 {
-    my_decimal decimal_value;
-    return store_value(date2my_decimal(ltime, &decimal_value));
+  my_decimal decimal_value;
+  return store_value(date2my_decimal(ltime, &decimal_value));
 }
 
 
@@ -2926,13 +2923,11 @@ uint Field_new_decimal::is_equal(Create_field *new_field)
    @return  New pointer into memory based on from + length of the data
 */
 const uchar *
-Field_new_decimal::unpack(uchar* to,
-                          const uchar *from,
-                          uint param_data,
-                          bool low_byte_first)
+Field_new_decimal::unpack(uchar* to, const uchar *from, const uchar *from_end,
+                          uint param_data)
 {
   if (param_data == 0)
-    return Field::unpack(to, from, param_data, low_byte_first);
+    return Field::unpack(to, from, from_end, param_data);
 
   uint from_precision= (param_data & 0xff00) >> 8U;
   uint from_decimal= param_data & 0x00ff;
@@ -2962,10 +2957,23 @@ Field_new_decimal::unpack(uchar* to,
     decimal2bin(&dec_val, to, precision, decimals());
   }
   else
+  {
+    if (from + len > from_end)
+      return 0;                                 // Wrong data
     memcpy(to, from, len); // Sizes are the same, just copy the data.
+  }
   return from+len;
 }
 
+int Field_num::store_time_dec(MYSQL_TIME *ltime, uint dec)
+{
+  longlong v= TIME_to_ulonglong(ltime);
+  if (ltime->neg == 0)
+    return store(v, true);
+  return store(-v, false);
+}
+
+
 /****************************************************************************
 ** tiny int
 ****************************************************************************/
@@ -3154,14 +3162,7 @@ int Field_short::store(const char *from,uint len,CHARSET_INFO *cs)
   
   error= get_int(cs, from, len, &rnd, UINT_MAX16, INT_MIN16, INT_MAX16);
   store_tmp= unsigned_flag ? (int) (ulonglong) rnd : (int) rnd;
-#ifdef WORDS_BIGENDIAN
-  if (table->s->db_low_byte_first)
-  {
-    int2store(ptr, store_tmp);
-  }
-  else
-#endif
-    shortstore(ptr, (short) store_tmp);
+  int2store(ptr, store_tmp);
   return error;
 }
 
@@ -3206,14 +3207,7 @@ int Field_short::store(double nr)
     else
       res=(int16) (int) nr;
   }
-#ifdef WORDS_BIGENDIAN
-  if (table->s->db_low_byte_first)
-  {
-    int2store(ptr,res);
-  }
-  else
-#endif
-    shortstore(ptr,res);
+  int2store(ptr,res);
   return error;
 }
 
@@ -3261,14 +3255,7 @@ int Field_short::store(longlong nr, bool unsigned_val)
     else
       res=(int16) nr;
   }
-#ifdef WORDS_BIGENDIAN
-  if (table->s->db_low_byte_first)
-  {
-    int2store(ptr,res);
-  }
-  else
-#endif
-    shortstore(ptr,res);
+  int2store(ptr,res);
   return error;
 }
 
@@ -3277,12 +3264,7 @@ double Field_short::val_real(void)
 {
   ASSERT_COLUMN_MARKED_FOR_READ;
   short j;
-#ifdef WORDS_BIGENDIAN
-  if (table->s->db_low_byte_first)
-    j=sint2korr(ptr);
-  else
-#endif
-    shortget(j,ptr);
+  j=sint2korr(ptr);
   return unsigned_flag ? (double) (unsigned short) j : (double) j;
 }
 
@@ -3290,12 +3272,7 @@ longlong Field_short::val_int(void)
 {
   ASSERT_COLUMN_MARKED_FOR_READ;
   short j;
-#ifdef WORDS_BIGENDIAN
-  if (table->s->db_low_byte_first)
-    j=sint2korr(ptr);
-  else
-#endif
-    shortget(j,ptr);
+  j=sint2korr(ptr);
   return unsigned_flag ? (longlong) (unsigned short) j : (longlong) j;
 }
 
@@ -3310,12 +3287,7 @@ String *Field_short::val_str(String *val_buffer,
   val_buffer->alloc(mlength);
   char *to=(char*) val_buffer->ptr();
   short j;
-#ifdef WORDS_BIGENDIAN
-  if (table->s->db_low_byte_first)
-    j=sint2korr(ptr);
-  else
-#endif
-    shortget(j,ptr);
+  j=sint2korr(ptr);
 
   if (unsigned_flag)
     length=(uint) cs->cset->long10_to_str(cs, to, mlength, 10, 
@@ -3338,18 +3310,8 @@ bool Field_short::send_binary(Protocol *protocol)
 int Field_short::cmp(const uchar *a_ptr, const uchar *b_ptr)
 {
   short a,b;
-#ifdef WORDS_BIGENDIAN
-  if (table->s->db_low_byte_first)
-  {
-    a=sint2korr(a_ptr);
-    b=sint2korr(b_ptr);
-  }
-  else
-#endif
-  {
-    shortget(a,a_ptr);
-    shortget(b,b_ptr);
-  }
+  a=sint2korr(a_ptr);
+  b=sint2korr(b_ptr);
 
   if (unsigned_flag)
     return ((unsigned short) a < (unsigned short) b) ? -1 :
@@ -3359,24 +3321,11 @@ int Field_short::cmp(const uchar *a_ptr, const uchar *b_ptr)
 
 void Field_short::sort_string(uchar *to,uint length __attribute__((unused)))
 {
-#ifdef WORDS_BIGENDIAN
-  if (!table->s->db_low_byte_first)
-  {
-    if (unsigned_flag)
-      to[0] = ptr[0];
-    else
-      to[0] = (char) (ptr[0] ^ 128);		/* Revers signbit */
-    to[1]   = ptr[1];
-  }
+  if (unsigned_flag)
+    to[0] = ptr[1];
   else
-#endif
-  {
-    if (unsigned_flag)
-      to[0] = ptr[1];
-    else
-      to[0] = (char) (ptr[1] ^ 128);		/* Revers signbit */
-    to[1]   = ptr[0];
-  }
+    to[0] = (char) (ptr[1] ^ 128);              /* Revers signbit */
+  to[1]   = ptr[0];
 }
 
 void Field_short::sql_type(String &res) const
@@ -3591,14 +3540,7 @@ int Field_long::store(const char *from,uint len,CHARSET_INFO *cs)
   
   error= get_int(cs, from, len, &rnd, UINT_MAX32, INT_MIN32, INT_MAX32);
   store_tmp= unsigned_flag ? (long) (ulonglong) rnd : (long) rnd;
-#ifdef WORDS_BIGENDIAN
-  if (table->s->db_low_byte_first)
-  {
-    int4store(ptr, store_tmp);
-  }
-  else
-#endif
-    longstore(ptr, store_tmp);
+  int4store(ptr, store_tmp);
   return error;
 }
 
@@ -3643,14 +3585,7 @@ int Field_long::store(double nr)
   if (error)
     set_warning(MYSQL_ERROR::WARN_LEVEL_WARN, ER_WARN_DATA_OUT_OF_RANGE, 1);
 
-#ifdef WORDS_BIGENDIAN
-  if (table->s->db_low_byte_first)
-  {
-    int4store(ptr,res);
-  }
-  else
-#endif
-    longstore(ptr,res);
+  int4store(ptr,res);
   return error;
 }
 
@@ -3696,14 +3631,7 @@ int Field_long::store(longlong nr, bool unsigned_val)
   if (error)
     set_warning(MYSQL_ERROR::WARN_LEVEL_WARN, ER_WARN_DATA_OUT_OF_RANGE, 1);
 
-#ifdef WORDS_BIGENDIAN
-  if (table->s->db_low_byte_first)
-  {
-    int4store(ptr,res);
-  }
-  else
-#endif
-    longstore(ptr,res);
+  int4store(ptr,res);
   return error;
 }
 
@@ -3712,12 +3640,7 @@ double Field_long::val_real(void)
 {
   ASSERT_COLUMN_MARKED_FOR_READ;
   int32 j;
-#ifdef WORDS_BIGENDIAN
-  if (table->s->db_low_byte_first)
-    j=sint4korr(ptr);
-  else
-#endif
-    longget(j,ptr);
+  j=sint4korr(ptr);
   return unsigned_flag ? (double) (uint32) j : (double) j;
 }
 
@@ -3727,12 +3650,7 @@ longlong Field_long::val_int(void)
   int32 j;
   /* See the comment in Field_long::store(long long) */
   DBUG_ASSERT(table->in_use == current_thd);
-#ifdef WORDS_BIGENDIAN
-  if (table->s->db_low_byte_first)
-    j=sint4korr(ptr);
-  else
-#endif
-    longget(j,ptr);
+  j=sint4korr(ptr);
   return unsigned_flag ? (longlong) (uint32) j : (longlong) j;
 }
 
@@ -3746,12 +3664,7 @@ String *Field_long::val_str(String *val_buffer,
   val_buffer->alloc(mlength);
   char *to=(char*) val_buffer->ptr();
   int32 j;
-#ifdef WORDS_BIGENDIAN
-  if (table->s->db_low_byte_first)
-    j=sint4korr(ptr);
-  else
-#endif
-    longget(j,ptr);
+  j=sint4korr(ptr);
 
   if (unsigned_flag)
     length=cs->cset->long10_to_str(cs,to,mlength, 10,(long) (uint32)j);
@@ -3773,18 +3686,8 @@ bool Field_long::send_binary(Protocol *protocol)
 int Field_long::cmp(const uchar *a_ptr, const uchar *b_ptr)
 {
   int32 a,b;
-#ifdef WORDS_BIGENDIAN
-  if (table->s->db_low_byte_first)
-  {
-    a=sint4korr(a_ptr);
-    b=sint4korr(b_ptr);
-  }
-  else
-#endif
-  {
-    longget(a,a_ptr);
-    longget(b,b_ptr);
-  }
+  a=sint4korr(a_ptr);
+  b=sint4korr(b_ptr);
   if (unsigned_flag)
     return ((uint32) a < (uint32) b) ? -1 : ((uint32) a > (uint32) b) ? 1 : 0;
   return (a < b) ? -1 : (a > b) ? 1 : 0;
@@ -3792,28 +3695,13 @@ int Field_long::cmp(const uchar *a_ptr, const uchar *b_ptr)
 
 void Field_long::sort_string(uchar *to,uint length __attribute__((unused)))
 {
-#ifdef WORDS_BIGENDIAN
-  if (!table->s->db_low_byte_first)
-  {
-    if (unsigned_flag)
-      to[0] = ptr[0];
-    else
-      to[0] = (char) (ptr[0] ^ 128);		/* Revers signbit */
-    to[1]   = ptr[1];
-    to[2]   = ptr[2];
-    to[3]   = ptr[3];
-  }
+  if (unsigned_flag)
+    to[0] = ptr[3];
   else
-#endif
-  {
-    if (unsigned_flag)
-      to[0] = ptr[3];
-    else
-      to[0] = (char) (ptr[3] ^ 128);		/* Revers signbit */
-    to[1]   = ptr[2];
-    to[2]   = ptr[1];
-    to[3]   = ptr[0];
-  }
+    to[0] = (char) (ptr[3] ^ 128);              /* Revers signbit */
+  to[1]   = ptr[2];
+  to[2]   = ptr[1];
+  to[3]   = ptr[0];
 }
 
 
@@ -3847,14 +3735,7 @@ int Field_longlong::store(const char *from,uint len,CHARSET_INFO *cs)
     error= 1;
   else
     error= 0;
-#ifdef WORDS_BIGENDIAN
-  if (table->s->db_low_byte_first)
-  {
-    int8store(ptr,tmp);
-  }
-  else
-#endif
-    longlongstore(ptr,tmp);
+  int8store(ptr,tmp);
   return error;
 }
 
@@ -3862,51 +3743,15 @@ int Field_longlong::store(const char *from,uint len,CHARSET_INFO *cs)
 int Field_longlong::store(double nr)
 {
   ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
-  int error= 0;
+  bool error;
   longlong res;
 
-  nr= rint(nr);
-  if (unsigned_flag)
-  {
-    if (nr < 0)
-    {
-      res=0;
-      error= 1;
-    }
-    else if (nr >= (double) ULONGLONG_MAX)
-    {
-      res= ~(longlong) 0;
-      error= 1;
-    }
-    else
-      res=(longlong) double2ulonglong(nr);
-  }
-  else
-  {
-    if (nr <= (double) LONGLONG_MIN)
-    {
-      res= LONGLONG_MIN;
-      error= (nr < (double) LONGLONG_MIN);
-    }
-    else if (nr >= (double) (ulonglong) LONGLONG_MAX)
-    {
-      res= LONGLONG_MAX;
-      error= (nr > (double) LONGLONG_MAX);
-    }
-    else
-      res=(longlong) nr;
-  }
+  res= double_to_longlong(nr, unsigned_flag, &error);
+
   if (error)
     set_warning(MYSQL_ERROR::WARN_LEVEL_WARN, ER_WARN_DATA_OUT_OF_RANGE, 1);
 
-#ifdef WORDS_BIGENDIAN
-  if (table->s->db_low_byte_first)
-  {
-    int8store(ptr,res);
-  }
-  else
-#endif
-    longlongstore(ptr,res);
+  int8store(ptr,res);
   return error;
 }
 
@@ -3930,14 +3775,7 @@ int Field_longlong::store(longlong nr, bool unsigned_val)
     }
   }
 
-#ifdef WORDS_BIGENDIAN
-  if (table->s->db_low_byte_first)
-  {
-    int8store(ptr,nr);
-  }
-  else
-#endif
-    longlongstore(ptr,nr);
+  int8store(ptr,nr);
   return error;
 }
 
@@ -3946,14 +3784,7 @@ double Field_longlong::val_real(void)
 {
   ASSERT_COLUMN_MARKED_FOR_READ;
   longlong j;
-#ifdef WORDS_BIGENDIAN
-  if (table->s->db_low_byte_first)
-  {
-    j=sint8korr(ptr);
-  }
-  else
-#endif
-    longlongget(j,ptr);
+  j=sint8korr(ptr);
   /* The following is open coded to avoid a bug in gcc 3.3 */
   if (unsigned_flag)
   {
@@ -3968,12 +3799,7 @@ longlong Field_longlong::val_int(void)
 {
   ASSERT_COLUMN_MARKED_FOR_READ;
   longlong j;
-#ifdef WORDS_BIGENDIAN
-  if (table->s->db_low_byte_first)
-    j=sint8korr(ptr);
-  else
-#endif
-    longlongget(j,ptr);
+  j=sint8korr(ptr);
   return j;
 }
 
@@ -3987,12 +3813,7 @@ String *Field_longlong::val_str(String *val_buffer,
   val_buffer->alloc(mlength);
   char *to=(char*) val_buffer->ptr();
   longlong j;
-#ifdef WORDS_BIGENDIAN
-  if (table->s->db_low_byte_first)
-    j=sint8korr(ptr);
-  else
-#endif
-    longlongget(j,ptr);
+  j=sint8korr(ptr);
 
   length=(uint) (cs->cset->longlong10_to_str)(cs,to,mlength,
 					unsigned_flag ? 10 : -10, j);
@@ -4013,18 +3834,8 @@ bool Field_longlong::send_binary(Protocol *protocol)
 int Field_longlong::cmp(const uchar *a_ptr, const uchar *b_ptr)
 {
   longlong a,b;
-#ifdef WORDS_BIGENDIAN
-  if (table->s->db_low_byte_first)
-  {
-    a=sint8korr(a_ptr);
-    b=sint8korr(b_ptr);
-  }
-  else
-#endif
-  {
-    longlongget(a,a_ptr);
-    longlongget(b,b_ptr);
-  }
+  a=sint8korr(a_ptr);
+  b=sint8korr(b_ptr);
   if (unsigned_flag)
     return ((ulonglong) a < (ulonglong) b) ? -1 :
     ((ulonglong) a > (ulonglong) b) ? 1 : 0;
@@ -4033,36 +3844,17 @@ int Field_longlong::cmp(const uchar *a_ptr, const uchar *b_ptr)
 
 void Field_longlong::sort_string(uchar *to,uint length __attribute__((unused)))
 {
-#ifdef WORDS_BIGENDIAN
-  if (!table->s->db_low_byte_first)
-  {
-    if (unsigned_flag)
-      to[0] = ptr[0];
-    else
-      to[0] = (char) (ptr[0] ^ 128);		/* Revers signbit */
-    to[1]   = ptr[1];
-    to[2]   = ptr[2];
-    to[3]   = ptr[3];
-    to[4]   = ptr[4];
-    to[5]   = ptr[5];
-    to[6]   = ptr[6];
-    to[7]   = ptr[7];
-  }
+  if (unsigned_flag)
+    to[0] = ptr[7];
   else
-#endif
-  {
-    if (unsigned_flag)
-      to[0] = ptr[7];
-    else
-      to[0] = (char) (ptr[7] ^ 128);		/* Revers signbit */
-    to[1]   = ptr[6];
-    to[2]   = ptr[5];
-    to[3]   = ptr[4];
-    to[4]   = ptr[3];
-    to[5]   = ptr[2];
-    to[6]   = ptr[1];
-    to[7]   = ptr[0];
-  }
+    to[0] = (char) (ptr[7] ^ 128);		/* Revers signbit */
+  to[1]   = ptr[6];
+  to[2]   = ptr[5];
+  to[3]   = ptr[4];
+  to[4]   = ptr[3];
+  to[5]   = ptr[2];
+  to[6]   = ptr[1];
+  to[7]   = ptr[0];
 }
 
 
@@ -4079,43 +3871,6 @@ void Field_longlong::sql_type(String &res) const
   Floating-point numbers
  */
 
-uchar *
-Field_real::pack(uchar *to, const uchar *from,
-                 uint max_length, bool low_byte_first)
-{
-  DBUG_ENTER("Field_real::pack");
-  DBUG_ASSERT(max_length >= pack_length());
-#ifdef WORDS_BIGENDIAN
-  if (low_byte_first != table->s->db_low_byte_first)
-  {
-    const uchar *dptr= from + pack_length();
-    while (dptr-- > from)
-      *to++ = *dptr;
-    DBUG_RETURN(to);
-  }
-  else
-#endif
-    DBUG_RETURN(Field::pack(to, from, max_length, low_byte_first));
-}
-
-const uchar *
-Field_real::unpack(uchar *to, const uchar *from,
-                   uint param_data, bool low_byte_first)
-{
-  DBUG_ENTER("Field_real::unpack");
-#ifdef WORDS_BIGENDIAN
-  if (low_byte_first != table->s->db_low_byte_first)
-  {
-    const uchar *dptr= from + pack_length();
-    while (dptr-- > from)
-      *to++ = *dptr;
-    DBUG_RETURN(from + pack_length());
-  }
-  else
-#endif
-    DBUG_RETURN(Field::unpack(to, from, param_data, low_byte_first));
-}
-
 /****************************************************************************
   single precision float
 ****************************************************************************/
@@ -4140,17 +3895,21 @@ int Field_float::store(const char *from,uint len,CHARSET_INFO *cs)
 int Field_float::store(double nr)
 {
   ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
-  int error= truncate(&nr, FLT_MAX);
-  float j= (float)nr;
-
-#ifdef WORDS_BIGENDIAN
-  if (table->s->db_low_byte_first)
+  int error= truncate_double(&nr, field_length,
+                             not_fixed ? NOT_FIXED_DEC : dec,
+                             unsigned_flag, FLT_MAX);
+  if (error)
   {
-    float4store(ptr,j);
+    set_warning(MYSQL_ERROR::WARN_LEVEL_WARN, ER_WARN_DATA_OUT_OF_RANGE, 1);
+    if (error < 0)                                // Wrong double value
+    {
+      error= 1;
+      set_null();
+    }
   }
-  else
-#endif
-    memcpy_fixed(ptr,(uchar*) &j,sizeof(j));
+  float j= (float)nr;
+
+  float4store(ptr,j);
   return error;
 }
 
@@ -4166,28 +3925,14 @@ double Field_float::val_real(void)
 {
   ASSERT_COLUMN_MARKED_FOR_READ;
   float j;
-#ifdef WORDS_BIGENDIAN
-  if (table->s->db_low_byte_first)
-  {
-    float4get(j,ptr);
-  }
-  else
-#endif
-    memcpy_fixed((uchar*) &j,ptr,sizeof(j));
+  float4get(j,ptr);
   return ((double) j);
 }
 
 longlong Field_float::val_int(void)
 {
   float j;
-#ifdef WORDS_BIGENDIAN
-  if (table->s->db_low_byte_first)
-  {
-    float4get(j,ptr);
-  }
-  else
-#endif
-    memcpy_fixed((uchar*) &j,ptr,sizeof(j));
+  float4get(j,ptr);
   return (longlong) rint(j);
 }
 
@@ -4197,14 +3942,7 @@ String *Field_float::val_str(String *val_buffer,
 {
   ASSERT_COLUMN_MARKED_FOR_READ;
   float nr;
-#ifdef WORDS_BIGENDIAN
-  if (table->s->db_low_byte_first)
-  {
-    float4get(nr,ptr);
-  }
-  else
-#endif
-    memcpy_fixed((uchar*) &nr,ptr,sizeof(nr));
+  float4get(nr,ptr);
 
   uint to_length=max(field_length,70);
   val_buffer->alloc(to_length);
@@ -4279,18 +4017,8 @@ String *Field_float::val_str(String *val_buffer,
 int Field_float::cmp(const uchar *a_ptr, const uchar *b_ptr)
 {
   float a,b;
-#ifdef WORDS_BIGENDIAN
-  if (table->s->db_low_byte_first)
-  {
-    float4get(a,a_ptr);
-    float4get(b,b_ptr);
-  }
-  else
-#endif
-  {
-    memcpy_fixed(&a,a_ptr,sizeof(float));
-    memcpy_fixed(&b,b_ptr,sizeof(float));
-  }
+  float4get(a,a_ptr);
+  float4get(b,b_ptr);
   return (a < b) ? -1 : (a > b) ? 1 : 0;
 }
 
@@ -4299,14 +4027,7 @@ int Field_float::cmp(const uchar *a_ptr, const uchar *b_ptr)
 void Field_float::sort_string(uchar *to,uint length __attribute__((unused)))
 {
   float nr;
-#ifdef WORDS_BIGENDIAN
-  if (table->s->db_low_byte_first)
-  {
-    float4get(nr,ptr);
-  }
-  else
-#endif
-    memcpy_fixed(&nr,ptr,sizeof(float));
+  float4get(nr,ptr);
 
   uchar *tmp= to;
   if (nr == (float) 0.0)
@@ -4402,16 +4123,20 @@ int Field_double::store(const char *from,uint len,CHARSET_INFO *cs)
 int Field_double::store(double nr)
 {
   ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
-  int error= truncate(&nr, DBL_MAX);
-
-#ifdef WORDS_BIGENDIAN
-  if (table->s->db_low_byte_first)
+  int error= truncate_double(&nr, field_length,
+                             not_fixed ? NOT_FIXED_DEC : dec,
+                             unsigned_flag, DBL_MAX);
+  if (error)
   {
-    float8store(ptr,nr);
+    set_warning(MYSQL_ERROR::WARN_LEVEL_WARN, ER_WARN_DATA_OUT_OF_RANGE, 1);
+    if (error < 0)                                // Wrong double value
+    {
+      error= 1;
+      set_null();
+    }
   }
-  else
-#endif
-    doublestore(ptr,nr);
+
+  float8store(ptr,nr);
   return error;
 }
 
@@ -4426,28 +4151,31 @@ int Field_double::store(longlong nr, bool unsigned_val)
   If a field has fixed length, truncate the double argument pointed to by 'nr'
   appropriately.
   Also ensure that the argument is within [-max_value; max_value] range.
+
+  return
+    0   ok
+    -1  Illegal double value
+    1   Value was truncated
 */
 
-int Field_real::truncate(double *nr, double max_value)
+int truncate_double(double *nr, uint field_length, uint dec,
+                    bool unsigned_flag, double max_value)
 {
-  int error= 1;
+  int error= 0;
   double res= *nr;
   
   if (isnan(res))
   {
-    res= 0;
-    set_null();
-    set_warning(MYSQL_ERROR::WARN_LEVEL_WARN, ER_WARN_DATA_OUT_OF_RANGE, 1);
-    goto end;
+    *nr= 0;
+    return -1;
   }
   else if (unsigned_flag && res < 0)
   {
-    res= 0;
-    set_warning(MYSQL_ERROR::WARN_LEVEL_WARN, ER_WARN_DATA_OUT_OF_RANGE, 1);
-    goto end;
+    *nr= 0;
+    return 1;
   }
 
-  if (!not_fixed)
+  if (dec < NOT_FIXED_DEC)
   {
     uint order= field_length - dec;
     uint step= array_elements(log_10) - 1;
@@ -4463,22 +4191,70 @@ int Field_real::truncate(double *nr, double max_value)
   
   if (res < -max_value)
   {
-   res= -max_value;
-   set_warning(MYSQL_ERROR::WARN_LEVEL_WARN, ER_WARN_DATA_OUT_OF_RANGE, 1);
+    res= -max_value;
+    error= 1;
   }
   else if (res > max_value)
   {
     res= max_value;
-    set_warning(MYSQL_ERROR::WARN_LEVEL_WARN, ER_WARN_DATA_OUT_OF_RANGE, 1);
+    error= 1;
   }
-  else
-    error= 0;
 
-end:
   *nr= res;
   return error;
 }
 
+/*
+  Convert double to longlong / ulonglong.
+  If double is outside of range, adjust return value and set error.
+
+  SYNOPSIS
+  double_to_longlong()
+  nr	  	 Number to convert
+  unsigned_flag  1 if result is unsigned
+  error		 Will be set to 1 in case of overflow.
+*/
+
+longlong double_to_longlong(double nr, bool unsigned_flag, bool *error)
+{
+  longlong res;
+
+  *error= 0;
+
+  nr= rint(nr);
+  if (unsigned_flag)
+  {
+    if (nr < 0)
+    {
+      res= 0;
+      *error= 1;
+    }
+    else if (nr >= (double) ULONGLONG_MAX)
+    {
+      res= ~(longlong) 0;
+      *error= 1;
+    }
+    else
+      res= (longlong) double2ulonglong(nr);
+  }
+  else
+  {
+    if (nr <= (double) LONGLONG_MIN)
+    {
+      res= LONGLONG_MIN;
+      *error= (nr < (double) LONGLONG_MIN);
+    }
+    else if (nr >= (double) (ulonglong) LONGLONG_MAX)
+    {
+      res= LONGLONG_MAX;
+      *error= (nr > (double) LONGLONG_MAX);
+    }
+    else
+      res= (longlong) nr;
+  }
+  return res;
+}
+
 
 int Field_real::store_decimal(const my_decimal *dm)
 {
@@ -4487,18 +4263,17 @@ int Field_real::store_decimal(const my_decimal *dm)
   return store(dbl);
 }
 
+int Field_real::store_time_dec(MYSQL_TIME *ltime, uint dec)
+{
+  return store(TIME_to_double(ltime));
+}
+
+
 double Field_double::val_real(void)
 {
   ASSERT_COLUMN_MARKED_FOR_READ;
   double j;
-#ifdef WORDS_BIGENDIAN
-  if (table->s->db_low_byte_first)
-  {
-    float8get(j,ptr);
-  }
-  else
-#endif
-    doubleget(j,ptr);
+  float8get(j,ptr);
   return j;
 }
 
@@ -4507,28 +4282,11 @@ longlong Field_double::val_int(void)
   ASSERT_COLUMN_MARKED_FOR_READ;
   double j;
   longlong res;
-#ifdef WORDS_BIGENDIAN
-  if (table->s->db_low_byte_first)
-  {
-    float8get(j,ptr);
-  }
-  else
-#endif
-    doubleget(j,ptr);
-  /* Check whether we fit into longlong range */
-  if (j <= (double) LONGLONG_MIN)
-  {
-    res= (longlong) LONGLONG_MIN;
-    goto warn;
-  }
-  if (j >= (double) (ulonglong) LONGLONG_MAX)
-  {
-    res= (longlong) LONGLONG_MAX;
-    goto warn;
-  }
-  return (longlong) rint(j);
+  bool error;
+  float8get(j,ptr);
 
-warn:
+  res= double_to_longlong(j, 0, &error);
+  if (error)
   {
     char buf[DOUBLE_TO_STRING_CONVERSION_BUFFER_SIZE];
     String tmp(buf, sizeof(buf), &my_charset_latin1), *str;
@@ -4550,19 +4308,20 @@ my_decimal *Field_real::val_decimal(my_decimal *decimal_value)
 }
 
 
+bool Field_real::get_date(MYSQL_TIME *ltime,uint fuzzydate)
+{
+  ASSERT_COLUMN_MARKED_FOR_READ;
+  double nr= val_real();
+  return double_to_datetime_with_warn(nr, ltime, fuzzydate, field_name);
+}
+
+
 String *Field_double::val_str(String *val_buffer,
 			      String *val_ptr __attribute__((unused)))
 {
   ASSERT_COLUMN_MARKED_FOR_READ;
   double nr;
-#ifdef WORDS_BIGENDIAN
-  if (table->s->db_low_byte_first)
-  {
-    float8get(nr,ptr);
-  }
-  else
-#endif
-    doubleget(nr,ptr);
+  float8get(nr,ptr);
 
   uint to_length= DOUBLE_TO_STRING_CONVERSION_BUFFER_SIZE;
   val_buffer->alloc(to_length);
@@ -4643,18 +4402,8 @@ bool Field_double::send_binary(Protocol *protocol)
 int Field_double::cmp(const uchar *a_ptr, const uchar *b_ptr)
 {
   double a,b;
-#ifdef WORDS_BIGENDIAN
-  if (table->s->db_low_byte_first)
-  {
-    float8get(a,a_ptr);
-    float8get(b,b_ptr);
-  }
-  else
-#endif
-  {
-    doubleget(a, a_ptr);
-    doubleget(b, b_ptr);
-  }
+  float8get(a,a_ptr);
+  float8get(b,b_ptr);
   return (a < b) ? -1 : (a > b) ? 1 : 0;
 }
 
@@ -4666,14 +4415,7 @@ int Field_double::cmp(const uchar *a_ptr, const uchar *b_ptr)
 void Field_double::sort_string(uchar *to,uint length __attribute__((unused)))
 {
   double nr;
-#ifdef WORDS_BIGENDIAN
-  if (table->s->db_low_byte_first)
-  {
-    float8get(nr,ptr);
-  }
-  else
-#endif
-    doubleget(nr,ptr);
+  float8get(nr,ptr);
   change_double_for_sort(nr, to);
 }
 
@@ -4761,12 +4503,12 @@ Field_timestamp::Field_timestamp(uchar *ptr_arg, uint32 len_arg,
 				 const char *field_name_arg,
 				 TABLE_SHARE *share,
 				 CHARSET_INFO *cs)
-  :Field_str(ptr_arg, MAX_DATETIME_WIDTH, null_ptr_arg, null_bit_arg,
+  :Field_str(ptr_arg, len_arg, null_ptr_arg, null_bit_arg,
 	     unireg_check_arg, field_name_arg, cs)
 {
   /* For 4.0 MYD and 4.0 InnoDB compatibility */
-  flags|= ZEROFILL_FLAG | UNSIGNED_FLAG;
-  if (!share->timestamp_field && unireg_check != NONE)
+  flags|= UNSIGNED_FLAG;
+  if (unireg_check != NONE && !share->timestamp_field)
   {
     /* This timestamp has auto-update */
     share->timestamp_field= this;
@@ -4777,20 +4519,6 @@ Field_timestamp::Field_timestamp(uchar *ptr_arg, uint32 len_arg,
 }
 
 
-Field_timestamp::Field_timestamp(bool maybe_null_arg,
-                                 const char *field_name_arg,
-                                 CHARSET_INFO *cs)
-  :Field_str((uchar*) 0, MAX_DATETIME_WIDTH,
-             maybe_null_arg ? (uchar*) "": 0, 0,
-	     NONE, field_name_arg, cs)
-{
-  /* For 4.0 MYD and 4.0 InnoDB compatibility */
-  flags|= ZEROFILL_FLAG | UNSIGNED_FLAG;
-    if (unireg_check != TIMESTAMP_DN_FIELD)
-      flags|= ON_UPDATE_NOW_FLAG;
-}
-
-
 /**
   Get auto-set type for TIMESTAMP field.
 
@@ -4825,136 +4553,135 @@ timestamp_auto_set_type Field_timestamp::get_auto_set_type() const
   }
 }
 
+my_time_t Field_timestamp::get_timestamp(ulong *sec_part) const
+{
+  ASSERT_COLUMN_MARKED_FOR_READ;
+  *sec_part= 0;
+  return sint4korr(ptr);
+}
 
-int Field_timestamp::store(const char *from,uint len,CHARSET_INFO *cs)
+int Field_timestamp::store_TIME_with_warning(THD *thd, MYSQL_TIME *l_time,
+                                             const Lazy_string *str,
+                                             bool was_cut,
+                                             bool have_smth_to_conv)
 {
   ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
-  MYSQL_TIME l_time;
-  my_time_t tmp= 0;
-  int error;
-  bool have_smth_to_conv;
-  my_bool in_dst_time_gap;
-  THD *thd= table ? table->in_use : current_thd;
-
-  /* We don't want to store invalid or fuzzy datetime values in TIMESTAMP */
-  have_smth_to_conv= (str_to_datetime(from, len, &l_time,
-                                      (thd->variables.sql_mode &
-                                       MODE_NO_ZERO_DATE) |
-                                      MODE_NO_ZERO_IN_DATE, &error) >
-                      MYSQL_TIMESTAMP_ERROR);
+  uint error = 0;
+  my_time_t timestamp;
 
-  if (error || !have_smth_to_conv)
+  if (was_cut || !have_smth_to_conv)
   {
     error= 1;
     set_datetime_warning(MYSQL_ERROR::WARN_LEVEL_WARN, WARN_DATA_TRUNCATED,
-                         from, len, MYSQL_TIMESTAMP_DATETIME, 1);
+                         str, MYSQL_TIMESTAMP_DATETIME, 1);
   }
-
   /* Only convert a correct date (not a zero date) */
-  if (have_smth_to_conv && l_time.month)
+  if (have_smth_to_conv && l_time->month)
   {
-    if (!(tmp= TIME_to_timestamp(thd, &l_time, &in_dst_time_gap)))
+    uint conversion_error;
+    timestamp= TIME_to_timestamp(thd, l_time, &conversion_error);
+    if (timestamp == 0 && l_time->second_part == 0)
+      conversion_error= ER_WARN_DATA_OUT_OF_RANGE;
+    if (conversion_error)
     {
-      set_datetime_warning(MYSQL_ERROR::WARN_LEVEL_WARN,
-                           ER_WARN_DATA_OUT_OF_RANGE,
-                           from, len, MYSQL_TIMESTAMP_DATETIME, !error);
-      error= 1;
-    }
-    else if (in_dst_time_gap)
-    {
-      set_datetime_warning(MYSQL_ERROR::WARN_LEVEL_WARN,
-                           ER_WARN_INVALID_TIMESTAMP,
-                           from, len, MYSQL_TIMESTAMP_DATETIME, !error);
+      set_datetime_warning(MYSQL_ERROR::WARN_LEVEL_WARN, conversion_error,
+                           str, MYSQL_TIMESTAMP_DATETIME, !error);
       error= 1;
     }
   }
-  store_timestamp(tmp);
+  else
+  {
+    timestamp= 0;
+    l_time->second_part= 0;
+  }
+  store_TIME(timestamp, l_time->second_part);
   return error;
 }
 
 
-int Field_timestamp::store(double nr)
+int Field_timestamp::store_time_dec(MYSQL_TIME *ltime, uint dec)
 {
-  int error= 0;
-  if (nr < 0 || nr > 99991231235959.0)
-  {
-    set_datetime_warning(MYSQL_ERROR::WARN_LEVEL_WARN,
-                         ER_WARN_DATA_OUT_OF_RANGE,
-                         nr, MYSQL_TIMESTAMP_DATETIME);
-    nr= 0;					// Avoid overflow on buff
-    error= 1;
-  }
-  error|= Field_timestamp::store((longlong) rint(nr), FALSE);
-  return error;
+  THD *thd= table->in_use;
+  int unused;
+  MYSQL_TIME l_time= *ltime;
+  Lazy_string_time str(ltime);
+  bool valid= !check_date(&l_time, pack_time(&l_time) != 0,
+                          (thd->variables.sql_mode & MODE_NO_ZERO_DATE) |
+                                       MODE_NO_ZERO_IN_DATE, &unused);
+
+  return store_TIME_with_warning(thd, &l_time, &str, false, valid);
 }
 
 
-int Field_timestamp::store(longlong nr, bool unsigned_val)
+int Field_timestamp::store(const char *from,uint len,CHARSET_INFO *cs)
 {
-  ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
   MYSQL_TIME l_time;
-  my_time_t timestamp= 0;
   int error;
-  my_bool in_dst_time_gap;
-  THD *thd= table ? table->in_use : current_thd;
+  int have_smth_to_conv;
+  Lazy_string_str str(from, len);
+  THD *thd= table->in_use;
 
   /* We don't want to store invalid or fuzzy datetime values in TIMESTAMP */
-  longlong tmp= number_to_datetime(nr, &l_time, (thd->variables.sql_mode &
+  have_smth_to_conv= (str_to_datetime(from, len, &l_time,
+                                      (thd->variables.sql_mode &
+                                       MODE_NO_ZERO_DATE) |
+                                       MODE_NO_ZERO_IN_DATE, &error) >
+                      MYSQL_TIMESTAMP_ERROR);
+  return store_TIME_with_warning(thd, &l_time, &str, error, have_smth_to_conv);
+}
+
+
+int Field_timestamp::store(double nr)
+{
+  MYSQL_TIME l_time;
+  int error;
+  Lazy_string_double str(nr);
+  THD *thd= table->in_use;
+
+  longlong tmp= double_to_datetime(nr, &l_time, (thd->variables.sql_mode &
                                                  MODE_NO_ZERO_DATE) |
                                    MODE_NO_ZERO_IN_DATE, &error);
-  if (tmp == LL(-1))
-  {
-    error= 2;
-  }
+  return store_TIME_with_warning(thd, &l_time, &str, error, tmp != -1);
+}
 
-  if (!error && tmp)
-  {
-    if (!(timestamp= TIME_to_timestamp(thd, &l_time, &in_dst_time_gap)))
-    {
-      set_datetime_warning(MYSQL_ERROR::WARN_LEVEL_WARN,
-                           ER_WARN_DATA_OUT_OF_RANGE,
-                           nr, MYSQL_TIMESTAMP_DATETIME, 1);
-      error= 1;
-    }
-    if (in_dst_time_gap)
-    {
-      set_datetime_warning(MYSQL_ERROR::WARN_LEVEL_WARN,
-                           ER_WARN_INVALID_TIMESTAMP,
-                           nr, MYSQL_TIMESTAMP_DATETIME, 1);
-      error= 1;
-    }
-  } else if (error)
-    set_datetime_warning(MYSQL_ERROR::WARN_LEVEL_WARN,
-                         WARN_DATA_TRUNCATED,
-                         nr, MYSQL_TIMESTAMP_DATETIME, 1);
 
-  store_timestamp(timestamp);
-  return error;
+int Field_timestamp::store(longlong nr, bool unsigned_val)
+{
+  MYSQL_TIME l_time;
+  int error;
+  Lazy_string_num str(nr);
+  THD *thd= table->in_use;
+
+  /* We don't want to store invalid or fuzzy datetime values in TIMESTAMP */
+  longlong tmp= number_to_datetime(nr, 0, &l_time, (thd->variables.sql_mode &
+                                                 MODE_NO_ZERO_DATE) |
+                                   MODE_NO_ZERO_IN_DATE, &error);
+  return store_TIME_with_warning(thd, &l_time, &str, error, tmp != LL(-1));
 }
 
+
 double Field_timestamp::val_real(void)
 {
-  ASSERT_COLUMN_MARKED_FOR_READ;
   return (double) Field_timestamp::val_int();
 }
 
+
 longlong Field_timestamp::val_int(void)
 {
-  ASSERT_COLUMN_MARKED_FOR_READ;
-  uint32 temp;
   MYSQL_TIME time_tmp;
-  THD  *thd= table ? table->in_use : current_thd;
+  THD  *thd= table->in_use;
 
   thd->time_zone_used= 1;
-#ifdef WORDS_BIGENDIAN
-  if (table && table->s->db_low_byte_first)
-    temp=uint4korr(ptr);
-  else
-#endif
-    longget(temp,ptr);
+  ulong sec_part;
+  my_time_t temp= get_timestamp(&sec_part);
 
-  if (temp == 0L)				// No time
-    return(0);					/* purecov: inspected */
+  /*
+    Field_timestamp() and Field_timestamp_hres() shares this code.
+    This is why are also testing sec_part below.
+  */
+
+  if (temp == 0 && sec_part == 0)
+    return(0);
   
   thd->variables.time_zone->gmt_sec_to_TIME(&time_tmp, (my_time_t)temp);
   
@@ -4966,10 +4693,9 @@ longlong Field_timestamp::val_int(void)
 
 String *Field_timestamp::val_str(String *val_buffer, String *val_ptr)
 {
-  ASSERT_COLUMN_MARKED_FOR_READ;
-  uint32 temp, temp2;
+  uint32 temp2;
+  THD *thd= table->in_use;
   MYSQL_TIME time_tmp;
-  THD *thd= table ? table->in_use : current_thd;
   char *to;
 
   val_buffer->alloc(field_length+1);
@@ -4977,20 +4703,16 @@ String *Field_timestamp::val_str(String *val_buffer, String *val_ptr)
   val_buffer->length(field_length);
 
   thd->time_zone_used= 1;
-#ifdef WORDS_BIGENDIAN
-  if (table && table->s->db_low_byte_first)
-    temp=uint4korr(ptr);
-  else
-#endif
-    longget(temp,ptr);
+  ulong sec_part;
+  my_time_t temp= get_timestamp(&sec_part);
 
-  if (temp == 0L)
+  if (temp == 0 && sec_part == 0)
   {				      /* Zero time is "000000" */
-    val_ptr->set(STRING_WITH_LEN("0000-00-00 00:00:00"), &my_charset_bin);
+    val_ptr->set(zero_timestamp, field_length, &my_charset_bin);
     return val_ptr;
   }
   val_buffer->set_charset(&my_charset_bin);	// Safety
-  
+    
   thd->variables.time_zone->gmt_sec_to_TIME(&time_tmp,(my_time_t)temp);
 
   temp= time_tmp.year % 100;
@@ -5039,16 +4761,11 @@ String *Field_timestamp::val_str(String *val_buffer, String *val_ptr)
 
 bool Field_timestamp::get_date(MYSQL_TIME *ltime, uint fuzzydate)
 {
-  long temp;
-  THD *thd= table ? table->in_use : current_thd;
+  THD *thd= table->in_use;
   thd->time_zone_used= 1;
-#ifdef WORDS_BIGENDIAN
-  if (table && table->s->db_low_byte_first)
-    temp=uint4korr(ptr);
-  else
-#endif
-    longget(temp,ptr);
-  if (temp == 0L)
+  ulong sec_part;
+  my_time_t temp= get_timestamp(&sec_part);
+  if (temp == 0 && sec_part == 0)
   {				      /* Zero time is "000000" */
     if (fuzzydate & TIME_NO_ZERO_DATE)
       return 1;
@@ -5057,61 +4774,35 @@ bool Field_timestamp::get_date(MYSQL_TIME *ltime, uint fuzzydate)
   else
   {
     thd->variables.time_zone->gmt_sec_to_TIME(ltime, (my_time_t)temp);
+    ltime->second_part= sec_part;
   }
   return 0;
 }
 
-bool Field_timestamp::get_time(MYSQL_TIME *ltime)
-{
-  return Field_timestamp::get_date(ltime,0);
-}
-
 
 bool Field_timestamp::send_binary(Protocol *protocol)
 {
-  MYSQL_TIME tm;
-  Field_timestamp::get_date(&tm, 0);
-  return protocol->store(&tm);
+  MYSQL_TIME ltime;
+  Field_timestamp::get_date(&ltime, 0);
+  return protocol->store(&ltime, 0);
 }
 
 
 int Field_timestamp::cmp(const uchar *a_ptr, const uchar *b_ptr)
 {
   int32 a,b;
-#ifdef WORDS_BIGENDIAN
-  if (table && table->s->db_low_byte_first)
-  {
-    a=sint4korr(a_ptr);
-    b=sint4korr(b_ptr);
-  }
-  else
-#endif
-  {
-  longget(a,a_ptr);
-  longget(b,b_ptr);
-  }
+  a=sint4korr(a_ptr);
+  b=sint4korr(b_ptr);
   return ((uint32) a < (uint32) b) ? -1 : ((uint32) a > (uint32) b) ? 1 : 0;
 }
 
 
 void Field_timestamp::sort_string(uchar *to,uint length __attribute__((unused)))
 {
-#ifdef WORDS_BIGENDIAN
-  if (!table || !table->s->db_low_byte_first)
-  {
-    to[0] = ptr[0];
-    to[1] = ptr[1];
-    to[2] = ptr[2];
-    to[3] = ptr[3];
-  }
-  else
-#endif
-  {
-    to[0] = ptr[3];
-    to[1] = ptr[2];
-    to[2] = ptr[1];
-    to[3] = ptr[0];
-  }
+  to[0] = ptr[3];
+  to[1] = ptr[2];
+  to[2] = ptr[1];
+  to[3] = ptr[0];
 }
 
 
@@ -5121,149 +4812,450 @@ void Field_timestamp::sql_type(String &res) const
 }
 
 
-void Field_timestamp::set_time()
+int Field_timestamp::set_time()
 {
-  THD *thd= table ? table->in_use : current_thd;
-  long tmp= (long) thd->query_start();
+  THD *thd= table->in_use;
   set_notnull();
-  store_timestamp(tmp);
+  store_TIME(thd->query_start(), 0);
+  return 0;
 }
 
-/****************************************************************************
-** time type
-** In string context: HH:MM:SS
-** In number context: HHMMSS
-** Stored as a 3 byte unsigned int
-****************************************************************************/
+void Field_timestamp_hires::sql_type(String &res) const
+{
+  CHARSET_INFO *cs=res.charset();
+  res.length(cs->cset->snprintf(cs, (char*) res.ptr(), res.alloced_length(),
+                                "timestamp(%u)", dec));
+}
 
-int Field_time::store(const char *from,uint len,CHARSET_INFO *cs)
+#ifdef NOT_USED
+static void store_native(ulonglong num, uchar *to, uint bytes)
+{
+  switch(bytes) {
+  case 1: *to= (uchar)num;              break;
+  case 2: shortstore(to, (ushort)num);  break;
+  case 3: int3store(to, num); /* Sic!*/ break;
+  case 4: longstore(to, (ulong)num);    break;
+  case 8: longlongstore(to, num);       break;
+  default: DBUG_ASSERT(0);
+  }
+}
+
+static longlong read_native(const uchar *from, uint bytes)
+{
+  switch(bytes) {
+  case 1: return from[0];
+  case 2: { uint16 tmp; shortget(tmp, from); return tmp; }
+  case 3: return uint3korr(from);
+  case 4: { uint32 tmp; longget(tmp, from); return tmp; }
+  case 8: { longlong tmp; longlongget(tmp, from); return tmp; }
+  default: DBUG_ASSERT(0); return 0;
+  }
+}
+#endif
+
+static void store_lowendian(ulonglong num, uchar *to, uint bytes)
+{
+  switch(bytes) {
+  case 1: *to= (uchar)num;    break;
+  case 2: int2store(to, num); break;
+  case 3: int3store(to, num); break;
+  case 4: int4store(to, num); break;
+  case 8: int8store(to, num); break;
+  default: DBUG_ASSERT(0);
+  }
+}
+
+static longlong read_lowendian(const uchar *from, uint bytes)
+{
+  switch(bytes) {
+  case 1: return from[0];
+  case 2: return uint2korr(from);
+  case 3: return uint3korr(from);
+  case 4: return uint4korr(from);
+  case 8: return sint8korr(from);
+  default: DBUG_ASSERT(0); return 0;
+  }
+}
+
+static void store_bigendian(ulonglong num, uchar *to, uint bytes)
+{
+  switch(bytes) {
+  case 1: mi_int1store(to, num); break;
+  case 2: mi_int2store(to, num); break;
+  case 3: mi_int3store(to, num); break;
+  case 4: mi_int4store(to, num); break;
+  case 5: mi_int5store(to, num); break;
+  case 6: mi_int6store(to, num); break;
+  case 7: mi_int7store(to, num); break;
+  case 8: mi_int8store(to, num); break;
+  default: DBUG_ASSERT(0);
+  }
+}
+
+static longlong read_bigendian(const uchar *from, uint bytes)
+{
+  switch(bytes) {
+  case 1: return mi_uint1korr(from);
+  case 2: return mi_uint2korr(from);
+  case 3: return mi_uint3korr(from);
+  case 4: return mi_uint4korr(from);
+  case 5: return mi_uint5korr(from);
+  case 6: return mi_uint6korr(from);
+  case 7: return mi_uint7korr(from);
+  case 8: return mi_sint8korr(from);
+  default: DBUG_ASSERT(0); return 0;
+  }
+}
+
+void Field_timestamp_hires::store_TIME(my_time_t timestamp, ulong sec_part)
+{
+  mi_int4store(ptr, timestamp);
+  store_bigendian(sec_part_shift(sec_part, dec), ptr+4, sec_part_bytes[dec]);
+}
+
+my_time_t Field_timestamp_hires::get_timestamp(ulong *sec_part) const
+{
+  ASSERT_COLUMN_MARKED_FOR_READ;
+  *sec_part= (long)sec_part_unshift(read_bigendian(ptr+4, sec_part_bytes[dec]), dec);
+  return mi_uint4korr(ptr);
+}
+
+double Field_timestamp_hires::val_real(void)
+{
+  MYSQL_TIME time_tmp;
+  THD  *thd= table->in_use;
+
+  thd->time_zone_used= 1;
+  ulong sec_part;
+  my_time_t temp= get_timestamp(&sec_part);
+
+  if (temp == 0 && sec_part == 0)
+    return(0);
+  
+  thd->variables.time_zone->gmt_sec_to_TIME(&time_tmp, (my_time_t)temp);
+  
+  return time_tmp.year * 1e10 + time_tmp.month * 1e8 +
+         time_tmp.day * 1e6 + time_tmp.hour * 1e4 +
+         time_tmp.minute * 1e2 + time_tmp.second + sec_part*1e-6;
+}
+
+String *Field_timestamp_hires::val_str(String *val_buffer, String *val_ptr)
+{
+  String *tmp= Field_timestamp::val_str(val_buffer, val_ptr);
+  ulong sec_part= (ulong)read_bigendian(ptr+4, sec_part_bytes[dec]);
+  
+  if (tmp->ptr() == zero_timestamp)
+    return tmp;
+
+  char *buf= const_cast<char*>(tmp->ptr() + MAX_DATETIME_WIDTH);
+  for (int i=dec; i>0; i--, sec_part/=10)
+    buf[i]= (char)(sec_part % 10) + '0';
+  buf[0]= '.';
+  buf[dec+1]= 0;
+  return tmp;
+}
+
+
+my_decimal *Field_timestamp_hires::val_decimal(my_decimal *d)
 {
   MYSQL_TIME ltime;
-  long tmp;
-  int error= 0;
-  int warning;
+  get_date(&ltime, 0);
+  longlong intg= TIME_to_ulonglong(&ltime);
+  return seconds2my_decimal(ltime.neg, intg, ltime.second_part, d);
+}
+ 
+int Field_timestamp_hires::store_decimal(const my_decimal *d)
+{
+  ulonglong nr;
+  ulong sec_part;
+  int error;
+  MYSQL_TIME ltime;
+  longlong tmp;
+  THD *thd= table->in_use;
+  Lazy_string_decimal str(d);
 
-  if (str_to_time(from, len, &ltime, &warning))
+  if (my_decimal2seconds(d, &nr, &sec_part))
   {
-    tmp=0L;
+    tmp= -1;
     error= 2;
-    set_datetime_warning(MYSQL_ERROR::WARN_LEVEL_WARN, WARN_DATA_TRUNCATED,
-                         from, len, MYSQL_TIMESTAMP_TIME, 1);
   }
   else
-  {
-    if (warning & MYSQL_TIME_WARN_TRUNCATED)
-    {
-      set_datetime_warning(MYSQL_ERROR::WARN_LEVEL_WARN,
-                           WARN_DATA_TRUNCATED,
-                           from, len, MYSQL_TIMESTAMP_TIME, 1);
-      error= 1;
-    }
-    if (warning & MYSQL_TIME_WARN_OUT_OF_RANGE)
-    {
-      set_datetime_warning(MYSQL_ERROR::WARN_LEVEL_WARN, 
-                           ER_WARN_DATA_OUT_OF_RANGE,
-                           from, len, MYSQL_TIMESTAMP_TIME, !error);
-      error= 1;
-    }
-    if (ltime.month)
-      ltime.day=0;
-    tmp=(ltime.day*24L+ltime.hour)*10000L+(ltime.minute*100+ltime.second);
-  }
-  
-  if (ltime.neg)
-    tmp= -tmp;
-  int3store(ptr,tmp);
-  return error;
+    tmp= number_to_datetime(nr, sec_part, &ltime, TIME_NO_ZERO_IN_DATE |
+                                                  (thd->variables.sql_mode &
+                                                   MODE_NO_ZERO_DATE), &error);
+
+  return store_TIME_with_warning(thd, &ltime, &str, error, tmp != -1);
 }
 
+int Field_timestamp_hires::set_time()
+{
+  THD *thd= table->in_use;
+  set_notnull();
+  store_TIME(thd->query_start(), thd->query_start_sec_part());
+  return 0;
+}
 
-int Field_time::store_time(MYSQL_TIME *ltime, timestamp_type time_type)
+bool Field_timestamp_hires::send_binary(Protocol *protocol)
 {
-  long tmp= ((ltime->month ? 0 : ltime->day * 24L) + ltime->hour) * 10000L +
-            (ltime->minute * 100 + ltime->second);
-  if (ltime->neg)
-    tmp= -tmp;
-  return Field_time::store((longlong) tmp, FALSE);
+  MYSQL_TIME ltime;
+  Field_timestamp::get_date(&ltime, 0);
+  return protocol->store(&ltime, dec);
 }
 
 
-int Field_time::store(double nr)
+int Field_timestamp_hires::cmp(const uchar *a_ptr, const uchar *b_ptr)
+{
+  int32 a,b;
+  ulong a_sec_part, b_sec_part;
+  a= mi_uint4korr(a_ptr);
+  a_sec_part= (ulong)read_bigendian(a_ptr+4, sec_part_bytes[dec]);
+  b= mi_uint4korr(b_ptr);
+  b_sec_part= (ulong)read_bigendian(b_ptr+4, sec_part_bytes[dec]);
+  return ((uint32) a < (uint32) b) ? -1 : ((uint32) a > (uint32) b) ? 1 :
+          a_sec_part < b_sec_part  ? -1 :  a_sec_part > b_sec_part  ? 1 : 0;
+}
+
+
+void Field_timestamp_hires::sort_string(uchar *to,uint length)
+{
+  DBUG_ASSERT(length == Field_timestamp_hires::pack_length());
+  memcpy(to, ptr, length);
+}
+
+uint32 Field_timestamp_hires::pack_length() const
+{
+  return 4 + sec_part_bytes[dec];
+}
+
+void Field_timestamp_hires::make_field(Send_field *field)
+{
+  Field::make_field(field);
+  field->decimals= dec;
+}
+
+/*
+  Store string into a date/time field
+
+  RETURN
+    0  ok
+    1  Value was cut during conversion
+    2  value was out of range
+    3  Datetime value that was cut (warning level NOTE)
+       This is used by opt_range.cc:get_mm_leaf().
+*/
+int Field_temporal::store_TIME_with_warning(MYSQL_TIME *ltime,
+                                            const Lazy_string *str,
+                                            int was_cut, int have_smth_to_conv)
 {
+  MYSQL_ERROR::enum_warning_level trunc_level= MYSQL_ERROR::WARN_LEVEL_WARN;
+  int ret= 2;
+  
   ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
-  long tmp;
-  int error= 0;
-  if (nr > (double)TIME_MAX_VALUE)
+
+  if (was_cut == 0 &&
+      have_smth_to_conv == 0 &&
+      temporal_type() != MYSQL_TIMESTAMP_TIME) // special case: zero date
+    was_cut= MYSQL_TIME_WARN_OUT_OF_RANGE;
+  else
+  if (!have_smth_to_conv)
   {
-    tmp= TIME_MAX_VALUE;
-    set_datetime_warning(MYSQL_ERROR::WARN_LEVEL_WARN,
-                         ER_WARN_DATA_OUT_OF_RANGE, nr, MYSQL_TIMESTAMP_TIME);
-    error= 1;
+    bzero(ltime, sizeof(*ltime));
+    was_cut=  MYSQL_TIME_WARN_TRUNCATED;
+    ret= 1;
   }
-  else if (nr < (double)-TIME_MAX_VALUE)
+  else if (!(was_cut & MYSQL_TIME_WARN_TRUNCATED) &&
+           temporal_type() == MYSQL_TIMESTAMP_DATE &&
+           (ltime->hour || ltime->minute || ltime->second || ltime->second_part))
   {
-    tmp= -TIME_MAX_VALUE;
-    set_datetime_warning(MYSQL_ERROR::WARN_LEVEL_WARN, 
-                         ER_WARN_DATA_OUT_OF_RANGE, nr, MYSQL_TIMESTAMP_TIME);
-    error= 1;
+    trunc_level= MYSQL_ERROR::WARN_LEVEL_NOTE;
+    was_cut|=  MYSQL_TIME_WARN_TRUNCATED;
+    ret= 3;
   }
-  else
+  else if (!(was_cut & MYSQL_TIME_WARN_TRUNCATED) &&
+           temporal_type() == MYSQL_TIMESTAMP_TIME &&
+           (ltime->year || ltime->month))
   {
-    tmp=(long) floor(fabs(nr));			// Remove fractions
-    if (nr < 0)
-      tmp= -tmp;
-    if (tmp % 100 > 59 || tmp/100 % 100 > 59)
-    {
-      tmp=0;
-      set_datetime_warning(MYSQL_ERROR::WARN_LEVEL_WARN, 
-                           ER_WARN_DATA_OUT_OF_RANGE, nr,
-                           MYSQL_TIMESTAMP_TIME);
-      error= 1;
-    }
+    ltime->year= ltime->month= ltime->day= 0;
+    trunc_level= MYSQL_ERROR::WARN_LEVEL_NOTE;
+    was_cut|=  MYSQL_TIME_WARN_TRUNCATED;
+    ret= 3;
   }
-  int3store(ptr,tmp);
-  return error;
+
+  /*
+    error code logic:
+    MYSQL_TIME_WARN_TRUNCATED means that the value was not a date/time at all.
+      it will be stored as zero date/time.
+    MYSQL_TIME_WARN_OUT_OF_RANGE means that the value was a date/time,
+      that is, it was parsed as such, but the value was invalid.
+
+    Also, MYSQL_TIME_WARN_TRUNCATED is used when storing a DATETIME in
+    a DATE field and non-zero time part is thrown away.
+  */
+  if (was_cut & MYSQL_TIME_WARN_TRUNCATED)
+    set_datetime_warning(trunc_level, WARN_DATA_TRUNCATED,
+                         str, temporal_type(), 1);
+  if (was_cut & MYSQL_TIME_WARN_OUT_OF_RANGE)
+    set_datetime_warning(MYSQL_ERROR::WARN_LEVEL_WARN, ER_WARN_DATA_OUT_OF_RANGE,
+                         str, temporal_type(), 1);
+
+  store_TIME(ltime);
+  return was_cut ? ret : 0;
 }
 
 
-int Field_time::store(longlong nr, bool unsigned_val)
+int Field_temporal::store(const char *from,uint len,CHARSET_INFO *cs)
+{
+  MYSQL_TIME ltime;
+  int error;
+  enum enum_mysql_timestamp_type func_res;
+  THD *thd= table->in_use;
+  Lazy_string_str str(from, len);
+
+  func_res= str_to_datetime(from, len, &ltime,
+                            (TIME_FUZZY_DATE |
+                             (thd->variables.sql_mode &
+                              (MODE_NO_ZERO_IN_DATE | MODE_NO_ZERO_DATE |
+                               MODE_INVALID_DATES))),
+                            &error);
+  return store_TIME_with_warning(&ltime, &str, error, func_res > MYSQL_TIMESTAMP_ERROR);
+}
+
+
+int Field_temporal::store(double nr)
 {
-  ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
-  long tmp;
   int error= 0;
-  if (nr < (longlong) -TIME_MAX_VALUE && !unsigned_val)
-  {
-    tmp= -TIME_MAX_VALUE;
-    set_datetime_warning(MYSQL_ERROR::WARN_LEVEL_WARN, 
-                         ER_WARN_DATA_OUT_OF_RANGE, nr,
-                         MYSQL_TIMESTAMP_TIME, 1);
-    error= 1;
-  }
-  else if (nr > (longlong) TIME_MAX_VALUE || (nr < 0 && unsigned_val))
-  {
-    tmp= TIME_MAX_VALUE;
-    set_datetime_warning(MYSQL_ERROR::WARN_LEVEL_WARN, 
-                         ER_WARN_DATA_OUT_OF_RANGE, nr,
-                         MYSQL_TIMESTAMP_TIME, 1);
-    error= 1;
-  }
-  else
+  MYSQL_TIME ltime;
+  THD *thd= table->in_use;
+  Lazy_string_double str(nr);
+
+  longlong tmp= double_to_datetime(nr, &ltime,
+                                    (TIME_FUZZY_DATE |
+                                       (thd->variables.sql_mode &
+                                        (MODE_NO_ZERO_IN_DATE |
+                                         MODE_NO_ZERO_DATE |
+                                         MODE_INVALID_DATES))), &error);
+  return store_TIME_with_warning(&ltime, &str, error, tmp != -1);
+}
+
+
+int Field_temporal::store(longlong nr, bool unsigned_val)
+{
+  int error;
+  MYSQL_TIME ltime;
+  longlong tmp;
+  THD *thd= table->in_use;
+  Lazy_string_num str(nr);
+
+  tmp= number_to_datetime(nr, 0, &ltime, (TIME_FUZZY_DATE |
+                                      (thd->variables.sql_mode &
+                                       (MODE_NO_ZERO_IN_DATE |
+                                        MODE_NO_ZERO_DATE |
+                                        MODE_INVALID_DATES))), &error);
+
+  return store_TIME_with_warning(&ltime, &str, error, tmp != -1);
+}
+
+
+int Field_temporal::store_time_dec(MYSQL_TIME *ltime, uint dec)
+{
+  int error = 0, have_smth_to_conv= 1;
+  MYSQL_TIME l_time= *ltime;
+  Lazy_string_time str(ltime);
+  /*
+    We don't perform range checking here since values stored in TIME
+    structure always fit into DATETIME range.
+  */
+  have_smth_to_conv= !check_date(&l_time, pack_time(&l_time) != 0,
+                                 (TIME_FUZZY_DATE |
+                                  (current_thd->variables.sql_mode &
+                                   (MODE_NO_ZERO_IN_DATE | MODE_NO_ZERO_DATE |
+                                    MODE_INVALID_DATES))), &error);
+  return store_TIME_with_warning(&l_time, &str, error, have_smth_to_conv);
+}
+
+my_decimal *Field_temporal::val_decimal(my_decimal *d)
+{
+  MYSQL_TIME ltime;
+  if (get_date(&ltime, TIME_FUZZY_DATE))
   {
-    tmp=(long) nr;
-    if (tmp % 100 > 59 || tmp/100 % 100 > 59)
-    {
-      tmp=0;
-      set_datetime_warning(MYSQL_ERROR::WARN_LEVEL_WARN, 
-                           ER_WARN_DATA_OUT_OF_RANGE, nr,
-                           MYSQL_TIMESTAMP_TIME, 1);
-      error= 1;
-    }
+    bzero(&ltime, sizeof(ltime));
+    ltime.time_type= mysql_type_to_time_type(type());
   }
+  longlong intg= TIME_to_ulonglong(&ltime);
+  return seconds2my_decimal(ltime.neg, intg, ltime.second_part, d);
+}
+
+/****************************************************************************
+** time type
+** In string context: HH:MM:SS
+** In number context: HHMMSS
+** Stored as a 3 byte unsigned int
+****************************************************************************/
+
+void Field_time::store_TIME(MYSQL_TIME *ltime)
+{
+  long tmp= (ltime->day*24L+ltime->hour)*10000L +
+            (ltime->minute*100+ltime->second);
+  if (ltime->neg)
+    tmp= -tmp;
   int3store(ptr,tmp);
-  return error;
 }
 
+int Field_time::store(const char *from,uint len,CHARSET_INFO *cs)
+{
+  MYSQL_TIME ltime;
+  Lazy_string_str str(from, len);
+  int was_cut;
+  int have_smth_to_conv=
+    str_to_time(from, len, &ltime,
+                table->in_use->variables.sql_mode &
+                (MODE_NO_ZERO_DATE | MODE_NO_ZERO_IN_DATE |
+                 MODE_INVALID_DATES),
+                &was_cut) > MYSQL_TIMESTAMP_ERROR;
+  
+  return store_TIME_with_warning(&ltime, &str, was_cut, have_smth_to_conv);
+}
+
+
+int Field_time::store_time_dec(MYSQL_TIME *ltime, uint dec)
+{
+  MYSQL_TIME l_time= *ltime;
+  Lazy_string_time str(ltime);
+  int was_cut= 0;
+
+  int have_smth_to_conv= !check_time_range(&l_time, decimals(), &was_cut);
+  return store_TIME_with_warning(&l_time, &str, was_cut, have_smth_to_conv);
+}
+
+
+int Field_time::store(double nr)
+{
+  MYSQL_TIME ltime;
+  Lazy_string_double str(nr);
+  int was_cut;
+  bool neg= nr < 0;
+  if (neg)
+    nr= -nr;
+  int have_smth_to_conv= !number_to_time(neg, (longlong)nr,
+                                         (ulong)((nr - floor(nr)) * TIME_SECOND_PART_FACTOR),
+                                         &ltime, &was_cut);
+
+  return store_TIME_with_warning(&ltime, &str, was_cut, have_smth_to_conv);
+}
+
+
+int Field_time::store(longlong nr, bool unsigned_val)
+{
+  MYSQL_TIME ltime;
+  Lazy_string_num str(nr);
+  int was_cut;
+  int have_smth_to_conv= !number_to_time(nr < 0, nr < 0 ? -nr : nr,
+                                         0, &ltime, &was_cut);
 
+  return store_TIME_with_warning(&ltime, &str, was_cut, have_smth_to_conv);
+}
+  
+  
 double Field_time::val_real(void)
 {
   ASSERT_COLUMN_MARKED_FOR_READ;
@@ -5289,7 +5281,6 @@ String *Field_time::val_str(String *val_buffer,
 {
   ASSERT_COLUMN_MARKED_FOR_READ;
   MYSQL_TIME ltime;
-  val_buffer->alloc(MAX_DATE_STRING_REP_LENGTH);
   long tmp=(long) sint3korr(ptr);
   ltime.neg= 0;
   if (tmp < 0)
@@ -5297,11 +5288,19 @@ String *Field_time::val_str(String *val_buffer,
     tmp= -tmp;
     ltime.neg= 1;
   }
+  ltime.year= ltime.month= 0;
   ltime.day= (uint) 0;
   ltime.hour= (uint) (tmp/10000);
   ltime.minute= (uint) (tmp/100 % 100);
   ltime.second= (uint) (tmp % 100);
-  make_time((DATE_TIME_FORMAT*) 0, &ltime, val_buffer);
+  ltime.second_part= 0;
+
+  val_buffer->alloc(MAX_DATE_STRING_REP_LENGTH);
+  uint length= (uint) my_time_to_str(&ltime,
+                                     const_cast<char*>(val_buffer->ptr()), 0);
+  val_buffer->length(length);
+  val_buffer->set_charset(&my_charset_bin);
+
   return val_buffer;
 }
 
@@ -5315,8 +5314,8 @@ String *Field_time::val_str(String *val_buffer,
  
 bool Field_time::get_date(MYSQL_TIME *ltime, uint fuzzydate)
 {
-  THD *thd= table ? table->in_use : current_thd;
-  if (!(fuzzydate & TIME_FUZZY_DATE))
+  THD *thd= table->in_use;
+  if (!(fuzzydate & (TIME_FUZZY_DATE|TIME_TIME_ONLY)))
   {
     push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
                         ER_WARN_DATA_OUT_OF_RANGE,
@@ -5324,12 +5323,6 @@ bool Field_time::get_date(MYSQL_TIME *ltime, uint fuzzydate)
                         thd->row_count);
     return 1;
   }
-  return Field_time::get_time(ltime);
-}
-
-
-bool Field_time::get_time(MYSQL_TIME *ltime)
-{
   long tmp=(long) sint3korr(ptr);
   ltime->neg=0;
   if (tmp < 0)
@@ -5350,11 +5343,9 @@ bool Field_time::get_time(MYSQL_TIME *ltime)
 
 bool Field_time::send_binary(Protocol *protocol)
 {
-  MYSQL_TIME tm;
-  Field_time::get_time(&tm);
-  tm.day= tm.hour/24;				// Move hours to days
-  tm.hour-= tm.day*24;
-  return protocol->store_time(&tm);
+  MYSQL_TIME ltime;
+  Field_time::get_date(&ltime, TIME_TIME_ONLY);
+  return protocol->store_time(&ltime, 0);
 }
 
 
@@ -5378,6 +5369,120 @@ void Field_time::sql_type(String &res) const
   res.set_ascii(STRING_WITH_LEN("time"));
 }
 
+int Field_time_hires::reset()
+{
+  store_bigendian(zero_point, ptr, Field_time_hires::pack_length());
+  return 0;
+}
+
+void Field_time_hires::store_TIME(MYSQL_TIME *ltime)
+{
+  ulonglong packed= sec_part_shift(pack_time(ltime), dec) + zero_point;
+  store_bigendian(packed, ptr, Field_time_hires::pack_length());
+}
+
+int Field_time_hires::store_decimal(const my_decimal *d)
+{
+  ulonglong nr;
+  ulong sec_part;
+  Lazy_string_decimal str(d);
+  MYSQL_TIME ltime;
+  int was_cut;
+  bool neg= my_decimal2seconds(d, &nr, &sec_part);
+
+  int have_smth_to_conv= !number_to_time(neg, nr, sec_part, &ltime, &was_cut);
+
+  return store_TIME_with_warning(&ltime, &str, was_cut, have_smth_to_conv);
+}
+
+uint32 Field_time_hires::pack_length() const
+{
+  return time_hires_bytes[dec];
+}
+
+longlong Field_time_hires::val_int(void)
+{
+  ASSERT_COLUMN_MARKED_FOR_READ;
+  MYSQL_TIME ltime;
+  Field_time_hires::get_date(&ltime, TIME_TIME_ONLY);
+  longlong val= TIME_to_ulonglong_time(&ltime);
+  return ltime.neg ? -val : val;
+}
+
+double Field_time_hires::val_real(void)
+{
+  ASSERT_COLUMN_MARKED_FOR_READ;
+  MYSQL_TIME ltime;
+  Field_time_hires::get_date(&ltime, TIME_TIME_ONLY);
+  return TIME_to_double(&ltime);
+}
+
+String *Field_time_hires::val_str(String *str,
+                                  String *unused __attribute__((unused)))
+{
+  ASSERT_COLUMN_MARKED_FOR_READ;
+  MYSQL_TIME ltime;
+  Field_time_hires::get_date(&ltime, TIME_TIME_ONLY);
+  str->alloc(field_length+1);
+  str->length(my_time_to_str(&ltime, (char*) str->ptr(), dec));
+  str->set_charset(&my_charset_bin);
+  return str;
+}
+
+bool Field_time_hires::get_date(MYSQL_TIME *ltime, uint fuzzydate)
+{
+  uint32 len= pack_length();
+  longlong packed= read_bigendian(ptr, len);
+
+  packed= sec_part_unshift(packed - zero_point, dec);
+
+  unpack_time(packed, ltime);
+  /*
+    unpack_time() returns MYSQL_TIMESTAMP_DATETIME.
+    To get MYSQL_TIMESTAMP_TIME we few adjustments
+  */
+  ltime->time_type= MYSQL_TIMESTAMP_TIME;
+  ltime->hour+= (ltime->month*32+ltime->day)*24;
+  ltime->month= ltime->day= 0;
+  return fuzzydate & (TIME_FUZZY_DATE | TIME_TIME_ONLY) ? 0 : 1;
+}
+
+
+bool Field_time_hires::send_binary(Protocol *protocol)
+{
+  MYSQL_TIME ltime;
+  Field_time_hires::get_date(&ltime, TIME_TIME_ONLY);
+  return protocol->store_time(&ltime, dec);
+}
+
+
+int Field_time_hires::cmp(const uchar *a_ptr, const uchar *b_ptr)
+{
+  ulonglong a=read_bigendian(a_ptr, Field_time_hires::pack_length());
+  ulonglong b=read_bigendian(b_ptr, Field_time_hires::pack_length());
+  return (a < b) ? -1 : (a > b) ? 1 : 0;
+}
+
+void Field_time_hires::sort_string(uchar *to,uint length __attribute__((unused)))
+{
+  DBUG_ASSERT(length == Field_time_hires::pack_length());
+  memcpy(to, ptr, length);
+  to[0]^= 128;
+}
+
+void Field_time_hires::sql_type(String &res) const
+{
+  CHARSET_INFO *cs=res.charset();
+  res.length(cs->cset->snprintf(cs, (char*) res.ptr(), res.alloced_length(),
+                                "time(%u)", dec));
+}
+
+void Field_time_hires::make_field(Send_field *field)
+{
+  Field::make_field(field);
+  field->decimals= dec;
+}
+
 /****************************************************************************
 ** year type
 ** Save in a byte the year 0, 1901->2155
@@ -5453,6 +5558,17 @@ int Field_year::store(longlong nr, bool unsigned_val)
 }
 
 
+int Field_year::store_time_dec(MYSQL_TIME *ltime, uint dec)
+{
+  Lazy_string_time str(ltime);
+  if (Field_year::store(ltime->year, 0))
+    return 1;
+
+  set_datetime_warning(MYSQL_ERROR::WARN_LEVEL_WARN, WARN_DATA_TRUNCATED,
+                       &str, ltime->time_type, 1);
+  return 0;
+}
+
 bool Field_year::send_binary(Protocol *protocol)
 {
   ASSERT_COLUMN_MARKED_FOR_READ;
@@ -5492,6 +5608,15 @@ String *Field_year::val_str(String *val_buffer,
 }
 
 
+bool Field_year::get_date(MYSQL_TIME *ltime,uint fuzzydate)
+{
+  int tmp= (int) ptr[0];
+  if (tmp || field_length != 4)
+    tmp+= 1900;
+  return int_to_datetime_with_warn(tmp * 10000, ltime, fuzzydate, field_name);
+}
+
+
 void Field_year::sql_type(String &res) const
 {
   CHARSET_INFO *cs=res.charset();
@@ -5507,102 +5632,12 @@ void Field_year::sql_type(String &res) const
 ** Stored as a 4 byte unsigned int
 ****************************************************************************/
 
-int Field_date::store(const char *from, uint len,CHARSET_INFO *cs)
-{
-  ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
-  MYSQL_TIME l_time;
-  uint32 tmp;
-  int error;
-  THD *thd= table ? table->in_use : current_thd;
-
-  if (str_to_datetime(from, len, &l_time, TIME_FUZZY_DATE |
-                      (thd->variables.sql_mode &
-                       (MODE_NO_ZERO_IN_DATE | MODE_NO_ZERO_DATE |
-                        MODE_INVALID_DATES)),
-                      &error) <= MYSQL_TIMESTAMP_ERROR)
-  {
-    tmp= 0;
-    error= 2;
-  }
-  else
-    tmp=(uint32) l_time.year*10000L + (uint32) (l_time.month*100+l_time.day);
-
-  if (error)
-    set_datetime_warning(MYSQL_ERROR::WARN_LEVEL_WARN, WARN_DATA_TRUNCATED,
-                         from, len, MYSQL_TIMESTAMP_DATE, 1);
-
-#ifdef WORDS_BIGENDIAN
-  if (table && table->s->db_low_byte_first)
-  {
-    int4store(ptr,tmp);
-  }
-  else
-#endif
-    longstore(ptr,tmp);
-  return error;
-}
-
-
-int Field_date::store(double nr)
-{
-  longlong tmp;
-  if (nr >= 19000000000000.0 && nr <= 99991231235959.0)
-    nr=floor(nr/1000000.0);			// Timestamp to date
-  if (nr < 0.0 || nr > 99991231.0)
-  {
-    tmp= LL(0);
-    set_datetime_warning(MYSQL_ERROR::WARN_LEVEL_WARN,
-                         ER_WARN_DATA_OUT_OF_RANGE,
-                         nr, MYSQL_TIMESTAMP_DATE);
-  }
-  else
-    tmp= (longlong) rint(nr);
-
-  return Field_date::store(tmp, TRUE);
-}
-
-
-int Field_date::store(longlong nr, bool unsigned_val)
+void Field_date::store_TIME(MYSQL_TIME *ltime)
 {
-  ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
-  MYSQL_TIME not_used;
-  int error;
-  longlong initial_nr= nr;
-  THD *thd= table ? table->in_use : current_thd;
-
-  nr= number_to_datetime(nr, &not_used, (TIME_FUZZY_DATE |
-                                         (thd->variables.sql_mode &
-                                          (MODE_NO_ZERO_IN_DATE |
-                                           MODE_NO_ZERO_DATE |
-                                           MODE_INVALID_DATES))), &error);
-
-  if (nr == LL(-1))
-  {
-    nr= 0;
-    error= 2;
-  }
-
-  if (nr >= 19000000000000.0 && nr <= 99991231235959.0)
-    nr= (longlong) floor(nr/1000000.0);         // Timestamp to date
-
-  if (error)
-    set_datetime_warning(MYSQL_ERROR::WARN_LEVEL_WARN,
-                         error == 2 ? ER_WARN_DATA_OUT_OF_RANGE :
-                         WARN_DATA_TRUNCATED, initial_nr,
-                         MYSQL_TIMESTAMP_DATETIME, 1);
-
-#ifdef WORDS_BIGENDIAN
-  if (table && table->s->db_low_byte_first)
-  {
-    int4store(ptr, nr);
-  }
-  else
-#endif
-    longstore(ptr, nr);
-  return error;
+  uint tmp= ltime->year*10000L + ltime->month*100+ltime->day;
+  int4store(ptr,tmp);
 }
 
-
 bool Field_date::send_binary(Protocol *protocol)
 {
   longlong tmp= Field_date::val_int();
@@ -5618,12 +5653,7 @@ double Field_date::val_real(void)
 {
   ASSERT_COLUMN_MARKED_FOR_READ;
   int32 j;
-#ifdef WORDS_BIGENDIAN
-  if (table && table->s->db_low_byte_first)
-    j=sint4korr(ptr);
-  else
-#endif
-    longget(j,ptr);
+  j=sint4korr(ptr);
   return (double) (uint32) j;
 }
 
@@ -5632,12 +5662,7 @@ longlong Field_date::val_int(void)
 {
   ASSERT_COLUMN_MARKED_FOR_READ;
   int32 j;
-#ifdef WORDS_BIGENDIAN
-  if (table && table->s->db_low_byte_first)
-    j=sint4korr(ptr);
-  else
-#endif
-    longget(j,ptr);
+  j=sint4korr(ptr);
   return (longlong) (uint32) j;
 }
 
@@ -5647,67 +5672,38 @@ String *Field_date::val_str(String *val_buffer,
 {
   ASSERT_COLUMN_MARKED_FOR_READ;
   MYSQL_TIME ltime;
-  val_buffer->alloc(field_length);
   int32 tmp;
-#ifdef WORDS_BIGENDIAN
-  if (table && table->s->db_low_byte_first)
-    tmp=sint4korr(ptr);
-  else
-#endif
-    longget(tmp,ptr);
+  tmp=sint4korr(ptr);
   ltime.neg= 0;
   ltime.year= (int) ((uint32) tmp/10000L % 10000);
   ltime.month= (int) ((uint32) tmp/100 % 100);
   ltime.day= (int) ((uint32) tmp % 100);
-  make_date((DATE_TIME_FORMAT *) 0, &ltime, val_buffer);
-  return val_buffer;
-}
 
+  val_buffer->alloc(MAX_DATE_STRING_REP_LENGTH);
+  uint length= (uint) my_date_to_str(&ltime,
+                                     const_cast<char*>(val_buffer->ptr()));
+  val_buffer->length(length);
+  val_buffer->set_charset(&my_charset_bin);
 
-bool Field_date::get_time(MYSQL_TIME *ltime)
-{
-  bzero((char *)ltime, sizeof(MYSQL_TIME));
-  return 0;
+  return val_buffer;
 }
 
 
 int Field_date::cmp(const uchar *a_ptr, const uchar *b_ptr)
 {
   int32 a,b;
-#ifdef WORDS_BIGENDIAN
-  if (table && table->s->db_low_byte_first)
-  {
-    a=sint4korr(a_ptr);
-    b=sint4korr(b_ptr);
-  }
-  else
-#endif
-  {
-    longget(a,a_ptr);
-    longget(b,b_ptr);
-  }
+  a=sint4korr(a_ptr);
+  b=sint4korr(b_ptr);
   return ((uint32) a < (uint32) b) ? -1 : ((uint32) a > (uint32) b) ? 1 : 0;
 }
 
 
 void Field_date::sort_string(uchar *to,uint length __attribute__((unused)))
 {
-#ifdef WORDS_BIGENDIAN
-  if (!table || !table->s->db_low_byte_first)
-  {
-    to[0] = ptr[0];
-    to[1] = ptr[1];
-    to[2] = ptr[2];
-    to[3] = ptr[3];
-  }
-  else
-#endif
-  {
-    to[0] = ptr[3];
-    to[1] = ptr[2];
-    to[2] = ptr[1];
-    to[3] = ptr[0];
-  }
+  to[0] = ptr[3];
+  to[1] = ptr[2];
+  to[2] = ptr[1];
+  to[3] = ptr[0];
 }
 
 void Field_date::sql_type(String &res) const
@@ -5722,153 +5718,10 @@ void Field_date::sql_type(String &res) const
 ** In number context: YYYYMMDD
 ****************************************************************************/
 
-/*
-  Store string into a date field
-
-  SYNOPSIS
-    Field_newdate::store()
-    from                Date string
-    len                 Length of date field
-    cs                  Character set (not used)
-
-  RETURN
-    0  ok
-    1  Value was cut during conversion
-    2  Wrong date string
-    3  Datetime value that was cut (warning level NOTE)
-       This is used by opt_range.cc:get_mm_leaf(). Note that there is a
-       nearly-identical class Field_date doesn't ever return 3 from its
-       store function.
-*/
-
-int Field_newdate::store(const char *from,uint len,CHARSET_INFO *cs)
-{
-  ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
-  long tmp;
-  MYSQL_TIME l_time;
-  int error;
-  THD *thd= table ? table->in_use : current_thd;
-  enum enum_mysql_timestamp_type ret;
-  if ((ret= str_to_datetime(from, len, &l_time,
-                            (TIME_FUZZY_DATE |
-                             (thd->variables.sql_mode &
-                              (MODE_NO_ZERO_IN_DATE | MODE_NO_ZERO_DATE |
-                               MODE_INVALID_DATES))),
-                            &error)) <= MYSQL_TIMESTAMP_ERROR)
-  {
-    tmp= 0;
-    error= 2;
-  }
-  else
-  {
-    tmp= l_time.day + l_time.month*32 + l_time.year*16*32;
-    if (!error && (ret != MYSQL_TIMESTAMP_DATE) &&
-        (l_time.hour || l_time.minute || l_time.second || l_time.second_part))
-      error= 3;                                 // Datetime was cut (note)
-  }
-
-  if (error)
-    set_datetime_warning(error == 3 ? MYSQL_ERROR::WARN_LEVEL_NOTE :
-                         MYSQL_ERROR::WARN_LEVEL_WARN,
-                         WARN_DATA_TRUNCATED,
-                         from, len, MYSQL_TIMESTAMP_DATE, 1);
-
-  int3store(ptr, tmp);
-  return error;
-}
-
-
-int Field_newdate::store(double nr)
-{
-  if (nr < 0.0 || nr > 99991231235959.0)
-  {
-    int3store(ptr,(int32) 0);
-    set_datetime_warning(MYSQL_ERROR::WARN_LEVEL_WARN,
-                         WARN_DATA_TRUNCATED, nr, MYSQL_TIMESTAMP_DATE);
-    return 1;
-  }
-  return Field_newdate::store((longlong) rint(nr), FALSE);
-}
-
-
-int Field_newdate::store(longlong nr, bool unsigned_val)
-{
-  ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
-  MYSQL_TIME l_time;
-  longlong tmp;
-  int error;
-  THD *thd= table ? table->in_use : current_thd;
-  if (number_to_datetime(nr, &l_time,
-                         (TIME_FUZZY_DATE |
-                          (thd->variables.sql_mode &
-                           (MODE_NO_ZERO_IN_DATE | MODE_NO_ZERO_DATE |
-                            MODE_INVALID_DATES))),
-                         &error) == LL(-1))
-  {
-    tmp= 0L;
-    error= 2;
-  }
-  else
-    tmp= l_time.day + l_time.month*32 + l_time.year*16*32;
-
-  if (!error && l_time.time_type != MYSQL_TIMESTAMP_DATE &&
-      (l_time.hour || l_time.minute || l_time.second || l_time.second_part))
-    error= 3;
-
-  if (error)
-    set_datetime_warning(error == 3 ? MYSQL_ERROR::WARN_LEVEL_NOTE :
-                         MYSQL_ERROR::WARN_LEVEL_WARN,
-                         error == 2 ? 
-                         ER_WARN_DATA_OUT_OF_RANGE : WARN_DATA_TRUNCATED,
-                         nr,MYSQL_TIMESTAMP_DATE, 1);
-
-  int3store(ptr,tmp);
-  return error;
-}
-
-
-int Field_newdate::store_time(MYSQL_TIME *ltime,timestamp_type time_type)
+void Field_newdate::store_TIME(MYSQL_TIME *ltime)
 {
-  ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
-  long tmp;
-  int error= 0;
-  if (time_type == MYSQL_TIMESTAMP_DATE ||
-      time_type == MYSQL_TIMESTAMP_DATETIME)
-  {
-    tmp=ltime->year*16*32+ltime->month*32+ltime->day;
-    if (check_date(ltime, tmp != 0,
-                   (TIME_FUZZY_DATE |
-                    (current_thd->variables.sql_mode &
-                     (MODE_NO_ZERO_IN_DATE | MODE_NO_ZERO_DATE |
-                      MODE_INVALID_DATES))), &error))
-    {
-      char buff[MAX_DATE_STRING_REP_LENGTH];
-      String str(buff, sizeof(buff), &my_charset_latin1);
-      tmp= 0;
-      make_date((DATE_TIME_FORMAT *) 0, ltime, &str);
-      set_datetime_warning(MYSQL_ERROR::WARN_LEVEL_WARN, WARN_DATA_TRUNCATED,
-                           str.ptr(), str.length(), MYSQL_TIMESTAMP_DATE, 1);
-    }
-    if (!error && ltime->time_type != MYSQL_TIMESTAMP_DATE &&
-        (ltime->hour || ltime->minute || ltime->second || ltime->second_part))
-    {
-      char buff[MAX_DATE_STRING_REP_LENGTH];
-      String str(buff, sizeof(buff), &my_charset_latin1);
-      make_datetime((DATE_TIME_FORMAT *) 0, ltime, &str);
-      set_datetime_warning(MYSQL_ERROR::WARN_LEVEL_NOTE,
-                           WARN_DATA_TRUNCATED,
-                           str.ptr(), str.length(), MYSQL_TIMESTAMP_DATE, 1);
-      error= 3;
-    }
-  }
-  else
-  {
-    tmp=0;
-    error= 1;
-    set_warning(MYSQL_ERROR::WARN_LEVEL_WARN, WARN_DATA_TRUNCATED, 1);
-  }
+  uint tmp= ltime->year*16*32 + ltime->month*32+ltime->day;
   int3store(ptr,tmp);
-  return error;
 }
 
 
@@ -5933,14 +5786,11 @@ bool Field_newdate::get_date(MYSQL_TIME *ltime,uint fuzzydate)
   ltime->year=  (tmp >> 9);
   ltime->time_type= MYSQL_TIMESTAMP_DATE;
   ltime->hour= ltime->minute= ltime->second= ltime->second_part= ltime->neg= 0;
-  return ((!(fuzzydate & TIME_FUZZY_DATE) && (!ltime->month || !ltime->day)) ?
-          1 : 0);
-}
-
-
-bool Field_newdate::get_time(MYSQL_TIME *ltime)
-{
-  return Field_newdate::get_date(ltime,0);
+  if (!tmp)
+    return fuzzydate & TIME_NO_ZERO_DATE;
+  if (!ltime->month || !ltime->day)
+    return !(fuzzydate & TIME_FUZZY_DATE);
+  return 0;
 }
 
 
@@ -5974,150 +5824,20 @@ void Field_newdate::sql_type(String &res) const
 ** Stored as a 8 byte unsigned int. Should sometimes be change to a 6 byte int.
 ****************************************************************************/
 
-int Field_datetime::store(const char *from,uint len,CHARSET_INFO *cs)
-{
-  ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
-  MYSQL_TIME time_tmp;
-  int error;
-  ulonglong tmp= 0;
-  enum enum_mysql_timestamp_type func_res;
-  THD *thd= table ? table->in_use : current_thd;
-
-  func_res= str_to_datetime(from, len, &time_tmp,
-                            (TIME_FUZZY_DATE |
-                             (thd->variables.sql_mode &
-                              (MODE_NO_ZERO_IN_DATE | MODE_NO_ZERO_DATE |
-                               MODE_INVALID_DATES))),
-                            &error);
-  if ((int) func_res > (int) MYSQL_TIMESTAMP_ERROR)
-    tmp= TIME_to_ulonglong_datetime(&time_tmp);
-  else
-    error= 1;                                 // Fix if invalid zero date
-
-  if (error)
-    set_datetime_warning(MYSQL_ERROR::WARN_LEVEL_WARN,
-                         ER_WARN_DATA_OUT_OF_RANGE,
-                         from, len, MYSQL_TIMESTAMP_DATETIME, 1);
-
-#ifdef WORDS_BIGENDIAN
-  if (table && table->s->db_low_byte_first)
-  {
-    int8store(ptr,tmp);
-  }
-  else
-#endif
-    longlongstore(ptr,tmp);
-  return error;
-}
-
-
-int Field_datetime::store(double nr)
-{
-  int error= 0;
-  if (nr < 0.0 || nr > 99991231235959.0)
-  {
-    set_datetime_warning(MYSQL_ERROR::WARN_LEVEL_WARN, 
-                         ER_WARN_DATA_OUT_OF_RANGE,
-                         nr, MYSQL_TIMESTAMP_DATETIME);
-    nr= 0.0;
-    error= 1;
-  }
-  error|= Field_datetime::store((longlong) rint(nr), FALSE);
-  return error;
-}
-
-
-int Field_datetime::store(longlong nr, bool unsigned_val)
-{
-  ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
-  MYSQL_TIME not_used;
-  int error;
-  longlong initial_nr= nr;
-  THD *thd= table ? table->in_use : current_thd;
-
-  nr= number_to_datetime(nr, &not_used, (TIME_FUZZY_DATE |
-                                         (thd->variables.sql_mode &
-                                          (MODE_NO_ZERO_IN_DATE |
-                                           MODE_NO_ZERO_DATE |
-                                           MODE_INVALID_DATES))), &error);
-
-  if (nr == LL(-1))
-  {
-    nr= 0;
-    error= 2;
-  }
-
-  if (error)
-    set_datetime_warning(MYSQL_ERROR::WARN_LEVEL_WARN,
-                         error == 2 ? ER_WARN_DATA_OUT_OF_RANGE :
-                         WARN_DATA_TRUNCATED, initial_nr,
-                         MYSQL_TIMESTAMP_DATETIME, 1);
-
-#ifdef WORDS_BIGENDIAN
-  if (table && table->s->db_low_byte_first)
-  {
-    int8store(ptr,nr);
-  }
-  else
-#endif
-    longlongstore(ptr,nr);
-  return error;
-}
-
-
-int Field_datetime::store_time(MYSQL_TIME *ltime,timestamp_type time_type)
+void Field_datetime::store_TIME(MYSQL_TIME *ltime)
 {
-  ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
-  longlong tmp;
-  int error= 0;
-  /*
-    We don't perform range checking here since values stored in TIME
-    structure always fit into DATETIME range.
-  */
-  if (time_type == MYSQL_TIMESTAMP_DATE ||
-      time_type == MYSQL_TIMESTAMP_DATETIME)
-  {
-    tmp=((ltime->year*10000L+ltime->month*100+ltime->day)*LL(1000000)+
-	 (ltime->hour*10000L+ltime->minute*100+ltime->second));
-    if (check_date(ltime, tmp != 0,
-                   (TIME_FUZZY_DATE |
-                    (current_thd->variables.sql_mode &
-                     (MODE_NO_ZERO_IN_DATE | MODE_NO_ZERO_DATE |
-                      MODE_INVALID_DATES))), &error))
-    {
-      char buff[MAX_DATE_STRING_REP_LENGTH];
-      String str(buff, sizeof(buff), &my_charset_latin1);
-      tmp= 0;
-      make_datetime((DATE_TIME_FORMAT *) 0, ltime, &str);
-      set_datetime_warning(MYSQL_ERROR::WARN_LEVEL_WARN, WARN_DATA_TRUNCATED,
-                           str.ptr(), str.length(), MYSQL_TIMESTAMP_DATETIME,1);
-    }
-  }
-  else
-  {
-    tmp=0;
-    error= 1;
-    set_warning(MYSQL_ERROR::WARN_LEVEL_WARN, WARN_DATA_TRUNCATED, 1);
-  }
-#ifdef WORDS_BIGENDIAN
-  if (table && table->s->db_low_byte_first)
-  {
-    int8store(ptr,tmp);
-  }
-  else
-#endif
-    longlongstore(ptr,tmp);
-  return error;
+  ulonglong tmp= TIME_to_ulonglong_datetime(ltime);
+  int8store(ptr,tmp);
 }
 
 bool Field_datetime::send_binary(Protocol *protocol)
 {
   MYSQL_TIME tm;
   Field_datetime::get_date(&tm, TIME_FUZZY_DATE);
-  return protocol->store(&tm);
+  return protocol->store(&tm, 0);
 }
-
-
+  
+  
 double Field_datetime::val_real(void)
 {
   return (double) Field_datetime::val_int();
@@ -6127,12 +5847,7 @@ longlong Field_datetime::val_int(void)
 {
   ASSERT_COLUMN_MARKED_FOR_READ;
   longlong j;
-#ifdef WORDS_BIGENDIAN
-  if (table && table->s->db_low_byte_first)
-    j=sint8korr(ptr);
-  else
-#endif
-    longlongget(j,ptr);
+  j=sint8korr(ptr);
   return j;
 }
 
@@ -6140,20 +5855,16 @@ longlong Field_datetime::val_int(void)
 String *Field_datetime::val_str(String *val_buffer,
 				String *val_ptr __attribute__((unused)))
 {
-  ASSERT_COLUMN_MARKED_FOR_READ;
   val_buffer->alloc(field_length);
   val_buffer->length(field_length);
+
+  ASSERT_COLUMN_MARKED_FOR_READ;
   ulonglong tmp;
   long part1,part2;
   char *pos;
   int part3;
 
-#ifdef WORDS_BIGENDIAN
-  if (table && table->s->db_low_byte_first)
-    tmp=sint8korr(ptr);
-  else
-#endif
-    longlongget(tmp,ptr);
+  tmp= Field_datetime::val_int();
 
   /*
     Avoid problem with slow longlong arithmetic and sprintf
@@ -6202,65 +5913,148 @@ bool Field_datetime::get_date(MYSQL_TIME *ltime, uint fuzzydate)
   ltime->day=		(int) (part1%100);
   ltime->month= 	(int) (part1/100%100);
   ltime->year= 		(int) (part1/10000);
-  return (!(fuzzydate & TIME_FUZZY_DATE) && (!ltime->month || !ltime->day)) ? 1 : 0;
-}
-
-bool Field_datetime::get_time(MYSQL_TIME *ltime)
-{
-  return Field_datetime::get_date(ltime,0);
+  if (!tmp)
+    return (fuzzydate & TIME_NO_ZERO_DATE) != 0;
+  if (!ltime->month || !ltime->day)
+    return !(fuzzydate & TIME_FUZZY_DATE);
+  return 0;
 }
 
 int Field_datetime::cmp(const uchar *a_ptr, const uchar *b_ptr)
 {
   longlong a,b;
-#ifdef WORDS_BIGENDIAN
-  if (table && table->s->db_low_byte_first)
-  {
-    a=sint8korr(a_ptr);
-    b=sint8korr(b_ptr);
-  }
-  else
-#endif
-  {
-    longlongget(a,a_ptr);
-    longlongget(b,b_ptr);
-  }
+  a=sint8korr(a_ptr);
+  b=sint8korr(b_ptr);
   return ((ulonglong) a < (ulonglong) b) ? -1 :
     ((ulonglong) a > (ulonglong) b) ? 1 : 0;
 }
 
 void Field_datetime::sort_string(uchar *to,uint length __attribute__((unused)))
 {
-#ifdef WORDS_BIGENDIAN
-  if (!table || !table->s->db_low_byte_first)
+  to[0] = ptr[7];
+  to[1] = ptr[6];
+  to[2] = ptr[5];
+  to[3] = ptr[4];
+  to[4] = ptr[3];
+  to[5] = ptr[2];
+  to[6] = ptr[1];
+  to[7] = ptr[0];
+}
+
+
+void Field_datetime::sql_type(String &res) const
+{
+  res.set_ascii(STRING_WITH_LEN("datetime"));
+}
+
+void Field_datetime_hires::store_TIME(MYSQL_TIME *ltime)
+{
+  ulonglong packed= sec_part_shift(pack_time(ltime), dec);
+  store_bigendian(packed, ptr, pack_length());
+}
+
+int Field_datetime_hires::store_decimal(const my_decimal *d)
+{
+  ulonglong nr;
+  ulong sec_part;
+  int error;
+  MYSQL_TIME ltime;
+  longlong tmp;
+  THD *thd= table->in_use;
+  Lazy_string_decimal str(d);
+
+  if (my_decimal2seconds(d, &nr, &sec_part))
   {
-    to[0] = ptr[0];
-    to[1] = ptr[1];
-    to[2] = ptr[2];
-    to[3] = ptr[3];
-    to[4] = ptr[4];
-    to[5] = ptr[5];
-    to[6] = ptr[6];
-    to[7] = ptr[7];
+    tmp= -1;
+    error= 2;
   }
   else
-#endif
-  {
-    to[0] = ptr[7];
-    to[1] = ptr[6];
-    to[2] = ptr[5];
-    to[3] = ptr[4];
-    to[4] = ptr[3];
-    to[5] = ptr[2];
-    to[6] = ptr[1];
-    to[7] = ptr[0];
-  }
+    tmp= number_to_datetime(nr, sec_part, &ltime, (TIME_FUZZY_DATE |
+                                          (thd->variables.sql_mode &
+                                           (MODE_NO_ZERO_IN_DATE |
+                                            MODE_NO_ZERO_DATE |
+                                            MODE_INVALID_DATES))), &error);
+
+  return store_TIME_with_warning(&ltime, &str, error, tmp != -1);
+}
+
+bool Field_datetime_hires::send_binary(Protocol *protocol)
+{
+  MYSQL_TIME ltime;
+  Field_datetime_hires::get_date(&ltime, TIME_FUZZY_DATE);
+  return protocol->store(&ltime, dec);
 }
 
 
-void Field_datetime::sql_type(String &res) const
+double Field_datetime_hires::val_real(void)
 {
-  res.set_ascii(STRING_WITH_LEN("datetime"));
+  MYSQL_TIME ltime;
+  Field_datetime_hires::get_date(&ltime, TIME_FUZZY_DATE);
+  return TIME_to_double(&ltime);
+}
+
+longlong Field_datetime_hires::val_int(void)
+{
+  MYSQL_TIME ltime;
+  Field_datetime_hires::get_date(&ltime, TIME_FUZZY_DATE);
+  return TIME_to_ulonglong_datetime(&ltime);
+}
+
+
+String *Field_datetime_hires::val_str(String *str,
+                                      String *unused __attribute__((unused)))
+{
+  MYSQL_TIME ltime;
+  Field_datetime_hires::get_date(&ltime, TIME_FUZZY_DATE);
+  str->alloc(field_length+1);
+  str->length(field_length);
+  my_datetime_to_str(&ltime, (char*) str->ptr(), dec);
+  str->set_charset(&my_charset_bin);
+  return str;
+}
+
+bool Field_datetime_hires::get_date(MYSQL_TIME *ltime, uint fuzzydate)
+{
+  ulonglong packed= read_bigendian(ptr, pack_length());
+  unpack_time(sec_part_unshift(packed, dec), ltime);
+  if (!packed)
+    return fuzzydate & TIME_NO_ZERO_DATE;
+  if (!ltime->month || !ltime->day)
+    return !(fuzzydate & TIME_FUZZY_DATE);
+  return 0;
+}
+
+uint32 Field_datetime_hires::pack_length() const
+{
+  return datetime_hires_bytes[dec];
+}
+
+int Field_datetime_hires::cmp(const uchar *a_ptr, const uchar *b_ptr)
+{
+  ulonglong a=read_bigendian(a_ptr, pack_length());
+  ulonglong b=read_bigendian(b_ptr, pack_length());
+  return a < b ? -1 : a > b ? 1 : 0;
+}
+
+void Field_datetime_hires::sort_string(uchar *to,
+                                       uint length __attribute__((unused)))
+{
+  DBUG_ASSERT(length == pack_length());
+  memcpy(to, ptr, length);
+}
+
+
+void Field_datetime_hires::sql_type(String &res) const
+{
+  CHARSET_INFO *cs=res.charset();
+  res.length(cs->cset->snprintf(cs, (char*) res.ptr(), res.alloced_length(),
+                                "datetime(%u)", dec));
+}
+
+void Field_datetime_hires::make_field(Send_field *field)
+{
+  Field::make_field(field);
+  field->decimals= dec;
 }
 
 /****************************************************************************
@@ -6718,9 +6512,7 @@ void Field_string::sql_type(String &res) const
 }
 
 
-uchar *Field_string::pack(uchar *to, const uchar *from,
-                          uint max_length,
-                          bool low_byte_first __attribute__((unused)))
+uchar *Field_string::pack(uchar *to, const uchar *from, uint max_length)
 {
   uint length=      min(field_length,max_length);
   uint local_char_length= max_length/field_charset->mbmaxlen;
@@ -6762,10 +6554,8 @@ uchar *Field_string::pack(uchar *to, const uchar *from,
    @return  New pointer into memory based on from + length of the data
 */
 const uchar *
-Field_string::unpack(uchar *to,
-                     const uchar *from,
-                     uint param_data,
-                     bool low_byte_first __attribute__((unused)))
+Field_string::unpack(uchar *to, const uchar *from, const uchar *from_end,
+                     uint param_data)
 {
   uint from_length, length;
 
@@ -6787,11 +6577,19 @@ Field_string::unpack(uchar *to,
    */
   if (from_length > 255)
   {
+    if (from + 2 > from_end)
+      return 0;
     length= uint2korr(from);
     from+= 2;
   }
   else
+  {
+    if (from + 1 > from_end)
+      return 0;
     length= (uint) *from++;
+  }
+  if (from + length > from_end || length > field_length)
+    return 0;
 
   memcpy(to, from, length);
   // Pad the string with the pad character of the fields charset
@@ -7227,9 +7025,7 @@ uint32 Field_varstring::data_length()
   Here the number of length bytes are depending on the given max_length
 */
 
-uchar *Field_varstring::pack(uchar *to, const uchar *from,
-                             uint max_length,
-                             bool low_byte_first __attribute__((unused)))
+uchar *Field_varstring::pack(uchar *to, const uchar *from, uint max_length)
 {
   uint length= length_bytes == 1 ? (uint) *from : uint2korr(from);
   set_if_smaller(max_length, field_length);
@@ -7249,8 +7045,7 @@ uchar *Field_varstring::pack(uchar *to, const uchar *from,
 
 
 uchar *
-Field_varstring::pack_key(uchar *to, const uchar *key, uint max_length,
-                          bool low_byte_first __attribute__((unused)))
+Field_varstring::pack_key(uchar *to, const uchar *key, uint max_length)
 {
   uint length=  length_bytes == 1 ? (uint) *key : uint2korr(key);
   uint local_char_length= ((field_charset->mbmaxlen > 1) ?
@@ -7286,9 +7081,9 @@ Field_varstring::pack_key(uchar *to, const uchar *key, uint max_length,
     Pointer to end of 'key' (To the next key part if multi-segment key)
 */
 
+#ifdef NOT_USED
 const uchar *
-Field_varstring::unpack_key(uchar *to, const uchar *key, uint max_length,
-                            bool low_byte_first __attribute__((unused)))
+Field_varstring::unpack_key(uchar *to, const uchar *key, uint max_length)
 {
   /* get length of the blob key */
   uint32 length= *key++;
@@ -7303,6 +7098,7 @@ Field_varstring::unpack_key(uchar *to, const uchar *key, uint max_length,
   memcpy(ptr + length_bytes, key, length);
   return key + length;
 }
+#endif
 
 /**
   Create a packed key that will be used for storage in the index tree.
@@ -7315,9 +7111,8 @@ Field_varstring::unpack_key(uchar *to, const uchar *key, uint max_length,
     end of key storage
 */
 
-uchar *
-Field_varstring::pack_key_from_key_image(uchar *to, const uchar *from, uint max_length,
-                                         bool low_byte_first __attribute__((unused)))
+uchar * Field_varstring::pack_key_from_key_image(uchar *to, const uchar *from,
+                                                 uint max_length)
 {
   /* Key length is always stored as 2 bytes */
   uint length= uint2korr(from);
@@ -7348,13 +7143,16 @@ Field_varstring::pack_key_from_key_image(uchar *to, const uchar *from, uint max_
    @return  New pointer into memory based on from + length of the data
 */
 const uchar *
-Field_varstring::unpack(uchar *to, const uchar *from,
-                        uint param_data,
-                        bool low_byte_first __attribute__((unused)))
+Field_varstring::unpack(uchar *to, const uchar *from, const uchar *from_end,
+                        uint param_data)
 {
   uint length;
   uint l_bytes= (param_data && (param_data < field_length)) ? 
                 (param_data <= 255) ? 1 : 2 : length_bytes;
+
+  if (from + l_bytes > from_end)
+    return 0;                                 // Error in data
+
   if (l_bytes == 1)
   {
     to[0]= *from++;
@@ -7369,7 +7167,11 @@ Field_varstring::unpack(uchar *to, const uchar *from,
     to[1]= *from++;
   }
   if (length)
+  {
+    if (from + length > from_end || length > field_length)
+      return 0;                                 // Error in data
     memcpy(to+ length_bytes, from, length);
+  }
   return from+length;
 }
 
@@ -7576,103 +7378,15 @@ Field_blob::Field_blob(uchar *ptr_arg, uchar *null_ptr_arg, uchar null_bit_arg,
 }
 
 
-void Field_blob::store_length(uchar *i_ptr, 
-                              uint i_packlength, 
-                              uint32 i_number, 
-                              bool low_byte_first)
-{
-  switch (i_packlength) {
-  case 1:
-    i_ptr[0]= (uchar) i_number;
-    break;
-  case 2:
-#ifdef WORDS_BIGENDIAN
-    if (low_byte_first)
-    {
-      int2store(i_ptr,(unsigned short) i_number);
-    }
-    else
-#endif
-      shortstore(i_ptr,(unsigned short) i_number);
-    break;
-  case 3:
-    int3store(i_ptr,i_number);
-    break;
-  case 4:
-#ifdef WORDS_BIGENDIAN
-    if (low_byte_first)
-    {
-      int4store(i_ptr,i_number);
-    }
-    else
-#endif
-      longstore(i_ptr,i_number);
-  }
-}
-
-
-uint32 Field_blob::get_length(const uchar *pos, uint packlength_arg, bool low_byte_first)
+void Field_blob::store_length(uchar *i_ptr, uint i_packlength, uint32 i_number)
 {
-  switch (packlength_arg) {
-  case 1:
-    return (uint32) pos[0];
-  case 2:
-    {
-      uint16 tmp;
-#ifdef WORDS_BIGENDIAN
-      if (low_byte_first)
-	tmp=sint2korr(pos);
-      else
-#endif
-	shortget(tmp,pos);
-      return (uint32) tmp;
-    }
-  case 3:
-    return (uint32) uint3korr(pos);
-  case 4:
-    {
-      uint32 tmp;
-#ifdef WORDS_BIGENDIAN
-      if (low_byte_first)
-	tmp=uint4korr(pos);
-      else
-#endif
-	longget(tmp,pos);
-      return (uint32) tmp;
-    }
-  }
-  /* When expanding this, see also MAX_FIELD_BLOBLENGTH. */
-  return 0;					// Impossible
+  store_lowendian(i_number, i_ptr, i_packlength);
 }
 
 
-/**
-  Put a blob length field into a record buffer.
-
-  Depending on the maximum length of a blob, its length field is
-  put into 1 to 4 bytes. This is a property of the blob object,
-  described by 'packlength'.
-
-  @param pos                 Pointer into the record buffer.
-  @param length              The length value to put.
-*/
-
-void Field_blob::put_length(uchar *pos, uint32 length)
+uint32 Field_blob::get_length(const uchar *pos, uint packlength_arg)
 {
-  switch (packlength) {
-  case 1:
-    *pos= (char) length;
-    break;
-  case 2:
-    int2store(pos, length);
-    break;
-  case 3:
-    int3store(pos, length);
-    break;
-  case 4:
-    int4store(pos, length);
-    break;
-  }
+  return (uint32)read_lowendian(pos, packlength_arg);
 }
 
 
@@ -8011,20 +7725,7 @@ void Field_blob::sort_string(uchar *to,uint length)
       length-= packlength;
       pos= to+length;
 
-      switch (packlength) {
-      case 1:
-        *pos= (char) blob_length;
-        break;
-      case 2:
-        mi_int2store(pos, blob_length);
-        break;
-      case 3:
-        mi_int3store(pos, blob_length);
-        break;
-      case 4:
-        mi_int4store(pos, blob_length);
-        break;
-      }
+      store_bigendian(blob_length, pos, packlength);
     }
     memcpy_fixed(&blob,ptr+packlength,sizeof(char*));
     
@@ -8054,14 +7755,11 @@ void Field_blob::sql_type(String &res) const
   }
 }
 
-uchar *Field_blob::pack(uchar *to, const uchar *from,
-                        uint max_length, bool low_byte_first)
+uchar *Field_blob::pack(uchar *to, const uchar *from, uint max_length)
 {
   DBUG_ENTER("Field_blob::pack");
-  DBUG_PRINT("enter", ("to: 0x%lx; from: 0x%lx;"
-                       " max_length: %u; low_byte_first: %d",
-                       (ulong) to, (ulong) from,
-                       max_length, low_byte_first));
+  DBUG_PRINT("enter", ("to: 0x%lx; from: 0x%lx; max_length: %u",
+                       (ulong) to, (ulong) from, max_length));
   DBUG_DUMP("record", from, table->s->reclength);
   uchar *save= ptr;
   ptr= (uchar*) from;
@@ -8072,7 +7770,7 @@ uchar *Field_blob::pack(uchar *to, const uchar *from,
     length given is smaller than the actual length of the blob, we
     just store the initial bytes of the blob.
   */
-  store_length(to, packlength, min(length, max_length), low_byte_first);
+  store_length(to, packlength, min(length, max_length));
 
   /*
     Store the actual blob data, which will occupy 'length' bytes.
@@ -8105,20 +7803,21 @@ uchar *Field_blob::pack(uchar *to, const uchar *from,
 
    @return  New pointer into memory based on from + length of the data
 */
-const uchar *Field_blob::unpack(uchar *to, 
-                                const uchar *from,
-                                uint param_data,
-                                bool low_byte_first)
+const uchar *Field_blob::unpack(uchar *to, const uchar *from,
+                                const uchar *from_end, uint param_data)
 {
   DBUG_ENTER("Field_blob::unpack");
-  DBUG_PRINT("enter", ("to: 0x%lx; from: 0x%lx;"
-                       " param_data: %u; low_byte_first: %d",
-                       (ulong) to, (ulong) from, param_data, low_byte_first));
+  DBUG_PRINT("enter", ("to: 0x%lx; from: 0x%lx; param_data: %u",
+                       (ulong) to, (ulong) from, param_data));
   uint const master_packlength=
     param_data > 0 ? param_data & 0xFF : packlength;
-  uint32 const length= get_length(from, master_packlength, low_byte_first);
+  if (from + master_packlength > from_end)
+    DBUG_RETURN(0);                             // Error in data
+  uint32 const length= get_length(from, master_packlength);
   DBUG_DUMP("packed", from, length + master_packlength);
   bitmap_set_bit(table->write_set, field_index);
+  if (from + master_packlength + length > from_end)
+    DBUG_RETURN(0);
   store(reinterpret_cast<const char*>(from) + master_packlength,
         length, field_charset);
   DBUG_DUMP("record", to, table->s->reclength);
@@ -8173,8 +7872,7 @@ int Field_blob::pack_cmp(const uchar *b, uint key_length_arg,
 /** Create a packed key that will be used for storage from a MySQL row. */
 
 uchar *
-Field_blob::pack_key(uchar *to, const uchar *from, uint max_length,
-                     bool low_byte_first __attribute__((unused)))
+Field_blob::pack_key(uchar *to, const uchar *from, uint max_length)
 {
   uchar *save= ptr;
   ptr= (uchar*) from;
@@ -8217,9 +7915,9 @@ Field_blob::pack_key(uchar *to, const uchar *from, uint max_length,
     Pointer into 'from' past the last byte copied from packed key.
 */
 
+#ifdef NOT_USED
 const uchar *
-Field_blob::unpack_key(uchar *to, const uchar *from, uint max_length,
-                       bool low_byte_first __attribute__((unused)))
+Field_blob::unpack_key(uchar *to, const uchar *from, uint max_length)
 {
   /* get length of the blob key */
   uint32 length= *from++;
@@ -8227,7 +7925,7 @@ Field_blob::unpack_key(uchar *to, const uchar *from, uint max_length,
     length+= *from++ << 8;
 
   /* put the length into the record buffer */
-  put_length(to, length);
+  store_length(to, packlength, length);
 
   /* put the address of the blob buffer or NULL */
   if (length)
@@ -8238,13 +7936,12 @@ Field_blob::unpack_key(uchar *to, const uchar *from, uint max_length,
   /* point to first byte of next field in 'from' */
   return from + length;
 }
-
+#endif
 
 /** Create a packed key that will be used for storage from a MySQL key. */
 
-uchar *
-Field_blob::pack_key_from_key_image(uchar *to, const uchar *from, uint max_length,
-                                    bool low_byte_first __attribute__((unused)))
+uchar *Field_blob::pack_key_from_key_image(uchar *to, const uchar *from,
+                                           uint max_length)
 {
   uint length=uint2korr(from);
   if (length > max_length)
@@ -8351,7 +8048,7 @@ int Field_geom::store(const char *from, uint length, CHARSET_INFO *cs)
       goto err;
     // Check given WKB
     uint32 wkb_type;
-    if (length < SRID_SIZE + WKB_HEADER_SIZE + SIZEOF_STORED_DOUBLE*2)
+    if (length < SRID_SIZE + WKB_HEADER_SIZE + 4)
       goto err;
     wkb_type= uint4korr(from + SRID_SIZE + 1);
     if (wkb_type < (uint32) Geometry::wkb_point ||
@@ -8395,39 +8092,7 @@ enum ha_base_keytype Field_enum::key_type() const
 
 void Field_enum::store_type(ulonglong value)
 {
-  switch (packlength) {
-  case 1: ptr[0]= (uchar) value;  break;
-  case 2:
-#ifdef WORDS_BIGENDIAN
-  if (table->s->db_low_byte_first)
-  {
-    int2store(ptr,(unsigned short) value);
-  }
-  else
-#endif
-    shortstore(ptr,(unsigned short) value);
-  break;
-  case 3: int3store(ptr,(long) value); break;
-  case 4:
-#ifdef WORDS_BIGENDIAN
-  if (table->s->db_low_byte_first)
-  {
-    int4store(ptr,value);
-  }
-  else
-#endif
-    longstore(ptr,(long) value);
-  break;
-  case 8:
-#ifdef WORDS_BIGENDIAN
-  if (table->s->db_low_byte_first)
-  {
-    int8store(ptr,value);
-  }
-  else
-#endif
-    longlongstore(ptr,value); break;
-  }
+  store_lowendian(value, ptr, packlength);
 }
 
 
@@ -8513,46 +8178,7 @@ double Field_enum::val_real(void)
 longlong Field_enum::val_int(void)
 {
   ASSERT_COLUMN_MARKED_FOR_READ;
-  switch (packlength) {
-  case 1:
-    return (longlong) ptr[0];
-  case 2:
-  {
-    uint16 tmp;
-#ifdef WORDS_BIGENDIAN
-    if (table->s->db_low_byte_first)
-      tmp=sint2korr(ptr);
-    else
-#endif
-      shortget(tmp,ptr);
-    return (longlong) tmp;
-  }
-  case 3:
-    return (longlong) uint3korr(ptr);
-  case 4:
-  {
-    uint32 tmp;
-#ifdef WORDS_BIGENDIAN
-    if (table->s->db_low_byte_first)
-      tmp=uint4korr(ptr);
-    else
-#endif
-      longget(tmp,ptr);
-    return (longlong) tmp;
-  }
-  case 8:
-  {
-    longlong tmp;
-#ifdef WORDS_BIGENDIAN
-    if (table->s->db_low_byte_first)
-      tmp=sint8korr(ptr);
-    else
-#endif
-      longlongget(tmp,ptr);
-    return tmp;
-  }
-  }
-  return 0;					// impossible
+  return read_lowendian(ptr, packlength);
 }
 
 
@@ -9305,8 +8931,7 @@ void Field_bit::sql_type(String &res) const
 
 
 uchar *
-Field_bit::pack(uchar *to, const uchar *from, uint max_length,
-                bool low_byte_first __attribute__((unused)))
+Field_bit::pack(uchar *to, const uchar *from, uint max_length)
 {
   DBUG_ASSERT(max_length > 0);
   uint length;
@@ -9353,8 +8978,8 @@ Field_bit::pack(uchar *to, const uchar *from, uint max_length,
    @return  New pointer into memory based on from + length of the data
 */
 const uchar *
-Field_bit::unpack(uchar *to, const uchar *from, uint param_data,
-                  bool low_byte_first __attribute__((unused)))
+Field_bit::unpack(uchar *to, const uchar *from, const uchar *from_end,
+                  uint param_data)
 {
   uint const from_len= (param_data >> 8U) & 0x00ff;
   uint const from_bit_len= param_data & 0x00ff;
@@ -9365,6 +8990,9 @@ Field_bit::unpack(uchar *to, const uchar *from, uint param_data,
   if (param_data == 0 ||
       ((from_bit_len == bit_len) && (from_len == bytes_in_rec)))
   {
+    if (from + bytes_in_rec + test(bit_len) > from_end)
+      return 0;                                 // Error in data
+
     if (bit_len > 0)
     {
       /*
@@ -9389,10 +9017,16 @@ Field_bit::unpack(uchar *to, const uchar *from, uint param_data,
     Lastly the odd bits need to be masked out if the bytes_in_rec > 0.
     Otherwise stray bits can cause spurious values.
   */
+
+  uint len= from_len + ((from_bit_len > 0) ? 1 : 0);
   uint new_len= (field_length + 7) / 8;
+
+  if (from + len > from_end || new_len < len)
+    return 0;                                 // Error in data
+
   char *value= (char *)my_alloca(new_len);
   bzero(value, new_len);
-  uint len= from_len + ((from_bit_len > 0) ? 1 : 0);
+
   memcpy(value + (new_len - len), from, len);
   /*
     Mask out the unused bits in the partial byte. 
@@ -9811,28 +9445,14 @@ bool Create_field::init(THD *thd, char *fld_name, enum_field_types fld_type,
     }
     break;
   case MYSQL_TYPE_TIMESTAMP:
-    if (fld_length == NULL)
+    if (length > MAX_DATETIME_PRECISION)
     {
-      length= MAX_DATETIME_WIDTH;
-    }
-    else if (length != MAX_DATETIME_WIDTH)
-    {
-      /*
-        We support only even TIMESTAMP lengths less or equal than 14
-        and 19 as length of 4.1 compatible representation.  Silently 
-        shrink it to MAX_DATETIME_COMPRESSED_WIDTH.
-      */
-      DBUG_ASSERT(MAX_DATETIME_COMPRESSED_WIDTH < UINT_MAX);
-      if (length != UINT_MAX)  /* avoid overflow; is safe because of min() */
-        length= ((length+1)/2)*2;
-      length= min(length, MAX_DATETIME_COMPRESSED_WIDTH);
+      my_error(ER_TOO_BIG_PRECISION, MYF(0), length, fld_name,
+               MAX_DATETIME_PRECISION);
+      DBUG_RETURN(TRUE);
     }
-    flags|= ZEROFILL_FLAG | UNSIGNED_FLAG;
-    /*
-      Since we silently rewrite down to MAX_DATETIME_COMPRESSED_WIDTH bytes,
-      the parser should not raise errors unless bizzarely large. 
-     */
-    max_field_charlength= UINT_MAX;
+    length+= MAX_DATETIME_WIDTH + (length ? 1 : 0);
+    flags|= UNSIGNED_FLAG;
 
     if (fld_default_value)
     {
@@ -9880,10 +9500,22 @@ bool Create_field::init(THD *thd, char *fld_name, enum_field_types fld_type,
     length= MAX_DATE_WIDTH;
     break;
   case MYSQL_TYPE_TIME:
-    length= 10;
+    if (length > MAX_DATETIME_PRECISION)
+    {
+      my_error(ER_TOO_BIG_PRECISION, MYF(0), length, fld_name,
+               MAX_DATETIME_PRECISION);
+      DBUG_RETURN(TRUE);
+    }
+    length+= MIN_TIME_WIDTH + (length ? 1 : 0);
     break;
   case MYSQL_TYPE_DATETIME:
-    length= MAX_DATETIME_WIDTH;
+    if (length > MAX_DATETIME_PRECISION)
+    {
+      my_error(ER_TOO_BIG_PRECISION, MYF(0), length, fld_name,
+               MAX_DATETIME_PRECISION);
+      DBUG_RETURN(TRUE);
+    }
+    length+= MAX_DATETIME_WIDTH + (length ? 1 : 0);
     break;
   case MYSQL_TYPE_SET:
     {
@@ -10003,14 +9635,22 @@ uint32 calc_pack_length(enum_field_types type,uint32 length)
   case MYSQL_TYPE_TINY	: return 1;
   case MYSQL_TYPE_SHORT : return 2;
   case MYSQL_TYPE_INT24:
-  case MYSQL_TYPE_NEWDATE:
-  case MYSQL_TYPE_TIME:   return 3;
+  case MYSQL_TYPE_NEWDATE: return 3;
+  case MYSQL_TYPE_TIME:   return length > MIN_TIME_WIDTH
+                            ? time_hires_bytes[length - 1 - MIN_TIME_WIDTH]
+                            : 3;
   case MYSQL_TYPE_TIMESTAMP:
+                          return length > MAX_DATETIME_WIDTH
+                            ? 4 + sec_part_bytes[length - 1 - MAX_DATETIME_WIDTH]
+                            : 4;
   case MYSQL_TYPE_DATE:
   case MYSQL_TYPE_LONG	: return 4;
   case MYSQL_TYPE_FLOAT : return sizeof(float);
   case MYSQL_TYPE_DOUBLE: return sizeof(double);
   case MYSQL_TYPE_DATETIME:
+                          return length > MAX_DATETIME_WIDTH
+                            ? datetime_hires_bytes[length - 1 - MAX_DATETIME_WIDTH]
+                            : 8;
   case MYSQL_TYPE_LONGLONG: return 8;	/* Don't crash if no longlong */
   case MYSQL_TYPE_NULL	: return 0;
   case MYSQL_TYPE_TINY_BLOB:	return 1+portable_sizeof_char_ptr;
@@ -10182,9 +9822,12 @@ Field *make_field(TABLE_SHARE *share, uchar *ptr, uint32 field_length,
 			      f_is_zerofill(pack_flag) != 0,
 			      f_is_dec(pack_flag) == 0);
   case MYSQL_TYPE_TIMESTAMP:
-    return new Field_timestamp(ptr,field_length, null_pos, null_bit,
-                               unireg_check, field_name, share,
-                               field_charset);
+  {
+    uint dec= field_length > MAX_DATETIME_WIDTH ?
+                       field_length - MAX_DATETIME_WIDTH - 1: 0;
+    return new_Field_timestamp(ptr, null_pos, null_bit, unireg_check,
+                               field_name, share, dec, field_charset);
+  }
   case MYSQL_TYPE_YEAR:
     return new Field_year(ptr,field_length,null_pos,null_bit,
 			  unireg_check, field_name);
@@ -10195,11 +9838,19 @@ Field *make_field(TABLE_SHARE *share, uchar *ptr, uint32 field_length,
     return new Field_newdate(ptr,null_pos,null_bit,
 			     unireg_check, field_name, field_charset);
   case MYSQL_TYPE_TIME:
-    return new Field_time(ptr,null_pos,null_bit,
-			  unireg_check, field_name, field_charset);
+  {
+    uint dec= field_length > MIN_TIME_WIDTH ?
+                       field_length - MIN_TIME_WIDTH - 1: 0;
+    return new_Field_time(ptr, null_pos, null_bit, unireg_check,
+                              field_name, dec, field_charset);
+  }
   case MYSQL_TYPE_DATETIME:
-    return new Field_datetime(ptr,null_pos,null_bit,
-			      unireg_check, field_name, field_charset);
+  {
+    uint dec= field_length > MAX_DATETIME_WIDTH ?
+                       field_length - MAX_DATETIME_WIDTH - 1: 0;
+    return new_Field_datetime(ptr, null_pos, null_bit, unireg_check,
+                              field_name, dec, field_charset);
+  }
   case MYSQL_TYPE_NULL:
     return new Field_null(ptr, field_length, unireg_check, field_name,
                           field_charset);
@@ -10372,7 +10023,7 @@ uint32 Field_blob::max_display_length()
     0 otherwise
 */
 
-bool 
+void 
 Field::set_warning(MYSQL_ERROR::enum_warning_level level, uint code,
                    int cuted_increment)
 {
@@ -10386,9 +10037,7 @@ Field::set_warning(MYSQL_ERROR::enum_warning_level level, uint code,
     thd->cuted_fields+= cuted_increment;
     push_warning_printf(thd, level, code, ER(code), field_name,
                         thd->row_count);
-    return 0;
   }
-  return level >= MYSQL_ERROR::WARN_LEVEL_WARN;
 }
 
 
@@ -10398,7 +10047,6 @@ Field::set_warning(MYSQL_ERROR::enum_warning_level level, uint code,
   @param level            level of message (Note/Warning/Error)
   @param code             error code of message to be produced
   @param str              string value which we tried to save
-  @param str_length       length of string which we tried to save
   @param ts_type          type of datetime value (datetime/date/time)
   @param cuted_increment  whenever we should increase cut fields count or not
 
@@ -10406,80 +10054,42 @@ Field::set_warning(MYSQL_ERROR::enum_warning_level level, uint code,
     This function will always produce some warning but won't increase cut
     fields counter if count_cuted_fields ==FIELD_CHECK_IGNORE for current
     thread.
-*/
-
-void 
-Field::set_datetime_warning(MYSQL_ERROR::enum_warning_level level, uint code, 
-                            const char *str, uint str_length, 
-                            timestamp_type ts_type, int cuted_increment)
-{
-  THD *thd= table ? table->in_use : current_thd;
-  if ((thd->really_abort_on_warning() &&
-       level >= MYSQL_ERROR::WARN_LEVEL_WARN) ||
-      set_warning(level, code, cuted_increment))
-    make_truncated_value_warning(thd, level, str, str_length, ts_type,
-                                 field_name);
-}
-
-
-/**
-  Produce warning or note about integer datetime value saved into field.
 
-  @param level            level of message (Note/Warning/Error)
-  @param code             error code of message to be produced
-  @param nr               numeric value which we tried to save
-  @param ts_type          type of datetime value (datetime/date/time)
-  @param cuted_increment  whenever we should increase cut fields count or not
-
-  @note
-    This function will always produce some warning but won't increase cut
-    fields counter if count_cuted_fields == FIELD_CHECK_IGNORE for current
-    thread.
+    See also bug#2336
 */
 
-void 
-Field::set_datetime_warning(MYSQL_ERROR::enum_warning_level level, uint code, 
-                            longlong nr, timestamp_type ts_type,
-                            int cuted_increment)
+
+void Field::set_datetime_warning(MYSQL_ERROR::enum_warning_level level,
+                                 uint code, const Lazy_string *str,
+                                 timestamp_type ts_type, int cuted_increment)
 {
-  THD *thd= table ? table->in_use : current_thd;
-  if (thd->really_abort_on_warning() ||
-      set_warning(level, code, cuted_increment))
-  {
-    char str_nr[22];
-    char *str_end= longlong10_to_str(nr, str_nr, -10);
-    make_truncated_value_warning(thd, level, str_nr, (uint) (str_end - str_nr), 
-                                 ts_type, field_name);
-  }
+  THD *thd= table->in_use;
+  if (thd->really_abort_on_warning() && level >= MYSQL_ERROR::WARN_LEVEL_WARN)
+    make_truncated_value_warning(thd, level, str, ts_type, field_name);
+  else
+    set_warning(level, code, cuted_increment);
 }
 
 
-/**
-  Produce warning or note about double datetime data saved into field.
-
-  @param level            level of message (Note/Warning/Error)
-  @param code             error code of message to be produced
-  @param nr               double value which we tried to save
-  @param ts_type          type of datetime value (datetime/date/time)
-
-  @note
-    This function will always produce some warning but won't increase cut
-    fields counter if count_cuted_fields == FIELD_CHECK_IGNORE for current
-    thread.
+/*
+  @brief
+  Return possible keys for a field
+
+  @details
+  Return bit map of keys over this field which can be used by the range
+  optimizer. For a field of a generic table such keys are all keys that starts
+  from this field. For a field of a materialized derived table/view such keys
+  are all keys in which this field takes a part. This is less restrictive as
+  keys for a materialized derived table/view are generated on the fly from
+  present fields, thus the case when a field for the beginning of a key is
+  absent is impossible.
+
+  @return map of possible keys
 */
 
-void 
-Field::set_datetime_warning(MYSQL_ERROR::enum_warning_level level, uint code, 
-                            double nr, timestamp_type ts_type)
+key_map Field::get_possible_keys()
 {
-  THD *thd= table ? table->in_use : current_thd;
-  if (thd->really_abort_on_warning() ||
-      set_warning(level, code, 1))
-  {
-    /* DBL_DIG is enough to print '-[digits].E+###' */
-    char str_nr[DBL_DIG + 8];
-    uint str_len= my_sprintf(str_nr, (str_nr, "%g", nr));
-    make_truncated_value_warning(thd, level, str_nr, str_len, ts_type,
-                                 field_name);
-  }
+  DBUG_ASSERT(table->pos_in_table_list);
+  return (table->pos_in_table_list->is_materialized_derived() ?
+          part_of_key : key_start);
 }
diff --git a/sql/field.h b/sql/field.h
index 3cfcf308282..d83980c2a29 100644
--- a/sql/field.h
+++ b/sql/field.h
@@ -27,7 +27,6 @@
 #endif
 
 #define NOT_FIXED_DEC			31
-#define DATETIME_DEC                     6
 const uint32 max_field_size= (uint32) 4294967295U;
 
 class Send_field;
@@ -38,6 +37,9 @@ struct ha_field_option_struct;
 
 struct st_cache_field;
 int field_conv(Field *to,Field *from);
+int truncate_double(double *nr, uint field_length, uint dec,
+                    bool unsigned_flag, double max_value);
+longlong double_to_longlong(double nr, bool unsigned_flag, bool *error);
 
 inline uint get_enum_pack_length(int elements)
 {
@@ -203,7 +205,9 @@ public:
   virtual int  store(double nr)=0;
   virtual int  store(longlong nr, bool unsigned_val)=0;
   virtual int  store_decimal(const my_decimal *d)=0;
-  virtual int store_time(MYSQL_TIME *ltime, timestamp_type t_type);
+  virtual int  store_time_dec(MYSQL_TIME *ltime, uint dec);
+  int store_time(MYSQL_TIME *ltime)
+  { return store_time_dec(ltime, TIME_SECOND_PART_DIGITS); }
   int store(const char *to, uint length, CHARSET_INFO *cs,
             enum_check_fields check_level);
   virtual double val_real(void)=0;
@@ -231,7 +235,6 @@ public:
   virtual bool str_needs_quotes() { return FALSE; }
   virtual Item_result result_type () const=0;
   virtual Item_result cmp_type () const { return result_type(); }
-  virtual Item_result cast_to_int_type () const { return result_type(); }
   static bool type_can_have_key_part(enum_field_types);
   static enum_field_types field_type_merge(enum_field_types, enum_field_types);
   static Item_result result_merge_type(enum_field_types);
@@ -386,14 +389,6 @@ public:
   virtual void make_field(Send_field *);
   virtual void sort_string(uchar *buff,uint length)=0;
   virtual bool optimize_range(uint idx, uint part);
-  /*
-    This should be true for fields which, when compared with constant
-    items, can be casted to longlong. In this case we will at 'fix_fields'
-    stage cast the constant items to longlongs and at the execution stage
-    use field->val_int() for comparison.  Used to optimize clauses like
-    'a_column BETWEEN date_const, date_const'.
-  */
-  virtual bool can_be_compared_as_longlong() const { return FALSE; }
   virtual void free() {}
   virtual Field *new_field(MEM_ROOT *root, struct st_table *new_table,
                            bool keep_type);
@@ -477,45 +472,33 @@ public:
   }
   virtual bool send_binary(Protocol *protocol);
 
-  virtual uchar *pack(uchar *to, const uchar *from,
-                      uint max_length, bool low_byte_first);
+  virtual uchar *pack(uchar *to, const uchar *from, uint max_length);
   /**
      @overload Field::pack(uchar*, const uchar*, uint, bool)
   */
   uchar *pack(uchar *to, const uchar *from)
   {
     DBUG_ENTER("Field::pack");
-    uchar *result= this->pack(to, from, UINT_MAX, table->s->db_low_byte_first);
+    uchar *result= this->pack(to, from, UINT_MAX);
     DBUG_RETURN(result);
   }
 
   virtual const uchar *unpack(uchar* to, const uchar *from,
-                              uint param_data, bool low_byte_first);
-  /**
-     @overload Field::unpack(uchar*, const uchar*, uint, bool)
-  */
-  const uchar *unpack(uchar* to, const uchar *from)
-  {
-    DBUG_ENTER("Field::unpack");
-    const uchar *result= unpack(to, from, 0U, table->s->db_low_byte_first);
-    DBUG_RETURN(result);
-  }
-
-  virtual uchar *pack_key(uchar* to, const uchar *from,
-                          uint max_length, bool low_byte_first)
+                              const uchar *from_end, uint param_data=0);
+  virtual uchar *pack_key(uchar* to, const uchar *from, uint max_length)
   {
-    return pack(to, from, max_length, low_byte_first);
+    return pack(to, from, max_length);
   }
-  virtual uchar *pack_key_from_key_image(uchar* to, const uchar *from,
-					uint max_length, bool low_byte_first)
+  virtual uchar *pack_key_from_key_image(uchar* to, const uchar *from, uint max_length)
   {
-    return pack(to, from, max_length, low_byte_first);
+    return pack(to, from, max_length);
   }
-  virtual const uchar *unpack_key(uchar* to, const uchar *from,
-                                  uint max_length, bool low_byte_first)
+#ifdef NOT_USED
+  virtual const uchar *unpack_key(uchar* to, const uchar *from, uint max_length)
   {
-    return unpack(to, from, max_length, low_byte_first);
+    return unpack(to, from, from + max_length+2, max_length);
   }
+#endif
   virtual uint packed_col_length(const uchar *to, uint length)
   { return length;}
   virtual uint max_packed_col_length(uint max_length)
@@ -534,7 +517,7 @@ public:
   void copy_from_tmp(int offset);
   uint fill_cache_field(struct st_cache_field *copy);
   virtual bool get_date(MYSQL_TIME *ltime,uint fuzzydate);
-  virtual bool get_time(MYSQL_TIME *ltime);
+  bool get_time(MYSQL_TIME *ltime) { return get_date(ltime, TIME_TIME_ONLY); }
   virtual CHARSET_INFO *charset(void) const { return &my_charset_bin; }
   virtual CHARSET_INFO *sort_charset(void) const { return charset(); }
   virtual bool has_charset(void) const { return FALSE; }
@@ -542,16 +525,12 @@ public:
   virtual enum Derivation derivation(void) const
   { return DERIVATION_IMPLICIT; }
   virtual void set_derivation(enum Derivation derivation_arg) { }
-  bool set_warning(MYSQL_ERROR::enum_warning_level, unsigned int code,
+  virtual int set_time() { return 1; }
+  void set_warning(MYSQL_ERROR::enum_warning_level, unsigned int code,
                    int cuted_increment);
   void set_datetime_warning(MYSQL_ERROR::enum_warning_level, uint code, 
-                            const char *str, uint str_len,
-                            timestamp_type ts_type, int cuted_increment);
-  void set_datetime_warning(MYSQL_ERROR::enum_warning_level, uint code, 
-                            longlong nr, timestamp_type ts_type,
+                            const Lazy_string *str, timestamp_type ts_type,
                             int cuted_increment);
-  void set_datetime_warning(MYSQL_ERROR::enum_warning_level, const uint code, 
-                            double nr, timestamp_type ts_type);
   inline bool check_overflow(int op_result)
   {
     return (op_result == E_DEC_OVERFLOW);
@@ -593,8 +572,16 @@ public:
     DBUG_ASSERT(0);
     return GEOM_GEOMETRY;
   }
+
+  key_map get_possible_keys();
+
   /* Hash value */
   virtual void hash(ulong *nr, ulong *nr2);
+
+  /* Check whether the field can be used as a join attribute in hash join */
+  virtual bool hash_join_is_possible() { return TRUE; }
+  virtual bool eq_cmp_as_binary() { return TRUE; }
+
   friend bool reopen_table(THD *,struct st_table *,bool);
   friend int cre_myisam(char * name, register TABLE *form, uint options,
 			ulonglong auto_increment_value);
@@ -639,75 +626,26 @@ private:
   { return 0; }
 
 protected:
-  /*
-    Helper function to pack()/unpack() int32 values
-  */
-  static void handle_int32(uchar *to, const uchar *from,
-                           bool low_byte_first_from, bool low_byte_first_to)
+  uchar *pack_int(uchar *to, const uchar *from, size_t size)
   {
-    int32 val;
-#ifdef WORDS_BIGENDIAN
-    if (low_byte_first_from)
-      val = sint4korr(from);
-    else
-#endif
-      longget(val, from);
-
-#ifdef WORDS_BIGENDIAN
-    if (low_byte_first_to)
-      int4store(to, val);
-    else
-#endif
-      longstore(to, val);
+    memcpy(to, from, size);
+    return to + size;
   }
 
-  /*
-    Helper function to pack()/unpack() int64 values
-  */
-  static void handle_int64(uchar* to, const uchar *from,
-                           bool low_byte_first_from, bool low_byte_first_to)
+  const uchar *unpack_int(uchar* to, const uchar *from, size_t size)
   {
-    int64 val;
-#ifdef WORDS_BIGENDIAN
-    if (low_byte_first_from)
-      val = sint8korr(from);
-    else
-#endif
-      longlongget(val, from);
-
-#ifdef WORDS_BIGENDIAN
-    if (low_byte_first_to)
-      int8store(to, val);
-    else
-#endif
-      longlongstore(to, val);
-  }
-
-  uchar *pack_int32(uchar *to, const uchar *from, bool low_byte_first_to)
-  {
-    handle_int32(to, from, table->s->db_low_byte_first, low_byte_first_to);
-    return to  + sizeof(int32);
-  }
-
-  const uchar *unpack_int32(uchar* to, const uchar *from,
-                            bool low_byte_first_from)
-  {
-    handle_int32(to, from, low_byte_first_from, table->s->db_low_byte_first);
-    return from + sizeof(int32);
+    memcpy(to, from, size);
+    return from + size;
   }
 
-  uchar *pack_int64(uchar* to, const uchar *from, bool low_byte_first_to)
-  {
-    handle_int64(to, from, table->s->db_low_byte_first, low_byte_first_to);
-    return to + sizeof(int64);
-  }
-
-  const uchar *unpack_int64(uchar* to, const uchar *from,
-                            bool low_byte_first_from)
-  {
-    handle_int64(to, from, low_byte_first_from, table->s->db_low_byte_first);
-    return from + sizeof(int64);
-  }
+  uchar *pack_int32(uchar *to, const uchar *from)
+  { return pack_int(to, from, 4); }
+  const uchar *unpack_int32(uchar* to, const uchar *from)
+  { return unpack_int(to, from, 4); }
+  uchar *pack_int64(uchar* to, const uchar *from)
+  { return pack_int(to, from, 8); }
+  const uchar *unpack_int64(uchar* to, const uchar *from)
+  { return unpack_int(to, from, 8); }
 
   bool field_flags_are_binary()
   {
@@ -725,7 +663,7 @@ public:
 	    uchar null_bit_arg, utype unireg_check_arg,
 	    const char *field_name_arg,
             uint8 dec_arg, bool zero_arg, bool unsigned_arg);
-  Item_result result_type () const { return REAL_RESULT; }
+  enum Item_result result_type () const { return INT_RESULT; }
   void prepend_zeros(String *value);
   void add_zerofill_and_unsigned(String &res) const;
   friend class Create_field;
@@ -736,6 +674,7 @@ public:
   int store_decimal(const my_decimal *);
   my_decimal *val_decimal(my_decimal *);
   uint is_equal(Create_field *new_field);
+  int  store_time_dec(MYSQL_TIME *ltime, uint dec);
   int check_int(CHARSET_INFO *cs, const char *str, int length,
                 const char *int_end, int error);
   bool get_int(CHARSET_INFO *cs, const char *from, uint len, 
@@ -770,9 +709,9 @@ public:
   my_decimal *val_decimal(my_decimal *);
   virtual bool str_needs_quotes() { return TRUE; }
   uint is_equal(Create_field *new_field);
+  bool eq_cmp_as_binary() { return test(flags & BINARY_FLAG); }
 };
 
-
 /* base class for Field_string, Field_varstring and Field_blob */
 
 class Field_longstr :public Field_str
@@ -805,15 +744,13 @@ public:
                field_name_arg, dec_arg, zero_arg, unsigned_arg),
     not_fixed(dec_arg >= NOT_FIXED_DEC)
     {}
+  Item_result result_type () const { return REAL_RESULT; }
   int store_decimal(const my_decimal *);
+  int  store_time_dec(MYSQL_TIME *ltime, uint dec);
+  bool get_date(MYSQL_TIME *ltime,uint fuzzydate);
   my_decimal *val_decimal(my_decimal *);
-  int truncate(double *nr, double max_length);
   uint32 max_display_length() { return field_length; }
   uint size_of() const { return sizeof(*this); }
-  virtual const uchar *unpack(uchar* to, const uchar *from,
-                              uint param_data, bool low_byte_first);
-  virtual uchar *pack(uchar* to, const uchar *from,
-                      uint max_length, bool low_byte_first);
 };
 
 
@@ -842,15 +779,9 @@ public:
   void overflow(bool negative);
   bool zero_pack() const { return 0; }
   void sql_type(String &str) const;
-  virtual const uchar *unpack(uchar* to, const uchar *from,
-                              uint param_data, bool low_byte_first)
+  virtual uchar *pack(uchar* to, const uchar *from, uint max_length)
   {
-    return Field::unpack(to, from, param_data, low_byte_first);
-  }
-  virtual uchar *pack(uchar* to, const uchar *from,
-                      uint max_length, bool low_byte_first)
-  {
-    return Field::pack(to, from, max_length, low_byte_first);
+    return Field::pack(to, from, max_length);
   }
 };
 
@@ -885,7 +816,7 @@ public:
   int  store(const char *to, uint length, CHARSET_INFO *charset);
   int  store(double nr);
   int  store(longlong nr, bool unsigned_val);
-  int store_time(MYSQL_TIME *ltime, timestamp_type t_type);
+  int  store_time_dec(MYSQL_TIME *ltime, uint dec);
   int  store_decimal(const my_decimal *);
   double val_real(void);
   longlong val_int(void);
@@ -903,8 +834,7 @@ public:
   int compatible_field_size(uint field_metadata,
                             const Relay_log_info *rli, uint16 mflags);
   uint is_equal(Create_field *new_field);
-  virtual const uchar *unpack(uchar* to, const uchar *from,
-                              uint param_data, bool low_byte_first);
+  virtual const uchar *unpack(uchar* to, const uchar *from, const uchar *from_end, uint param_data);
   static Field *create_from_item (Item *);
 };
 
@@ -919,7 +849,6 @@ public:
 	       unireg_check_arg, field_name_arg,
 	       0, zero_arg,unsigned_arg)
     {}
-  enum Item_result result_type () const { return INT_RESULT; }
   enum_field_types type() const { return MYSQL_TYPE_TINY;}
   enum ha_base_keytype key_type() const
     { return unsigned_flag ? HA_KEYTYPE_BINARY : HA_KEYTYPE_INT8; }
@@ -937,16 +866,17 @@ public:
   void sql_type(String &str) const;
   uint32 max_display_length() { return 4; }
 
-  virtual uchar *pack(uchar* to, const uchar *from,
-                      uint max_length, bool low_byte_first)
+  virtual uchar *pack(uchar* to, const uchar *from, uint max_length)
   {
     *to= *from;
     return to + 1;
   }
 
   virtual const uchar *unpack(uchar* to, const uchar *from,
-                              uint param_data, bool low_byte_first)
+                              const uchar *from_end, uint param_data)
   {
+    if (from == from_end)
+      return 0;
     *to= *from;
     return from + 1;
   }
@@ -968,7 +898,6 @@ public:
     :Field_num((uchar*) 0, len_arg, maybe_null_arg ? (uchar*) "": 0,0,
 	       NONE, field_name_arg, 0, 0, unsigned_arg)
     {}
-  enum Item_result result_type () const { return INT_RESULT; }
   enum_field_types type() const { return MYSQL_TYPE_SHORT;}
   enum ha_base_keytype key_type() const
     { return unsigned_flag ? HA_KEYTYPE_USHORT_INT : HA_KEYTYPE_SHORT_INT;}
@@ -986,44 +915,24 @@ public:
   void sql_type(String &str) const;
   uint32 max_display_length() { return 6; }
 
-  virtual uchar *pack(uchar* to, const uchar *from,
-                      uint max_length, bool low_byte_first)
+  virtual uchar *pack(uchar* to, const uchar *from, uint max_length)
   {
     int16 val;
-#ifdef WORDS_BIGENDIAN
-    if (table->s->db_low_byte_first)
-      val = sint2korr(from);
-    else
-#endif
-      shortget(val, from);
-
-#ifdef WORDS_BIGENDIAN
-    if (low_byte_first)
-      int2store(to, val);
-    else
-#endif
-      shortstore(to, val);
-    return to + sizeof(val);
+    val = sint2korr(from);
+    int2store(to, val);
+    return to + 2;
   }
 
   virtual const uchar *unpack(uchar* to, const uchar *from,
-                              uint param_data, bool low_byte_first)
+                              const uchar *from_end, uint param_data)
   {
     int16 val;
-#ifdef WORDS_BIGENDIAN
-    if (low_byte_first)
-      val = sint2korr(from);
-    else
-#endif
-      shortget(val, from);
+    if (from +2 > from_end)
+      return 0;
 
-#ifdef WORDS_BIGENDIAN
-    if (table->s->db_low_byte_first)
-      int2store(to, val);
-    else
-#endif
-      shortstore(to, val);
-    return from + sizeof(val);
+    val = sint2korr(from);
+    int2store(to, val);
+    return from + 2;
   }
 };
 
@@ -1037,7 +946,6 @@ public:
 	       unireg_check_arg, field_name_arg,
 	       0, zero_arg,unsigned_arg)
     {}
-  enum Item_result result_type () const { return INT_RESULT; }
   enum_field_types type() const { return MYSQL_TYPE_INT24;}
   enum ha_base_keytype key_type() const
     { return unsigned_flag ? HA_KEYTYPE_UINT24 : HA_KEYTYPE_INT24; }
@@ -1055,16 +963,9 @@ public:
   void sql_type(String &str) const;
   uint32 max_display_length() { return 8; }
 
-  virtual uchar *pack(uchar* to, const uchar *from,
-                      uint max_length, bool low_byte_first)
-  {
-    return Field::pack(to, from, max_length, low_byte_first);
-  }
-
-  virtual const uchar *unpack(uchar* to, const uchar *from,
-                              uint param_data, bool low_byte_first)
+  virtual uchar *pack(uchar* to, const uchar *from, uint max_length)
   {
-    return Field::unpack(to, from, param_data, low_byte_first);
+    return Field::pack(to, from, max_length);
   }
 };
 
@@ -1084,7 +985,6 @@ public:
     :Field_num((uchar*) 0, len_arg, maybe_null_arg ? (uchar*) "": 0,0,
 	       NONE, field_name_arg,0,0,unsigned_arg)
     {}
-  enum Item_result result_type () const { return INT_RESULT; }
   enum_field_types type() const { return MYSQL_TYPE_LONG;}
   enum ha_base_keytype key_type() const
     { return unsigned_flag ? HA_KEYTYPE_ULONG_INT : HA_KEYTYPE_LONG_INT; }
@@ -1102,21 +1002,21 @@ public:
   void sql_type(String &str) const;
   uint32 max_display_length() { return MY_INT32_NUM_DECIMAL_DIGITS; }
   virtual uchar *pack(uchar* to, const uchar *from,
-                      uint max_length __attribute__((unused)),
-                      bool low_byte_first)
+                      uint max_length __attribute__((unused)))
   {
-    return pack_int32(to, from, low_byte_first);
+    return pack_int32(to, from);
   }
   virtual const uchar *unpack(uchar* to, const uchar *from,
-                              uint param_data __attribute__((unused)),
-                              bool low_byte_first)
+                              const uchar *from_end,
+                              uint param_data __attribute__((unused)))
   {
-    return unpack_int32(to, from, low_byte_first);
+    if (from + 4 > from_end)
+      return 0;
+    return unpack_int32(to, from);
   }
 };
 
 
-#ifdef HAVE_LONG_LONG
 class Field_longlong :public Field_num {
 public:
   Field_longlong(uchar *ptr_arg, uint32 len_arg, uchar *null_ptr_arg,
@@ -1133,7 +1033,6 @@ public:
     :Field_num((uchar*) 0, len_arg, maybe_null_arg ? (uchar*) "": 0,0,
 	       NONE, field_name_arg,0,0,unsigned_arg)
     {}
-  enum Item_result result_type () const { return INT_RESULT; }
   enum_field_types type() const { return MYSQL_TYPE_LONGLONG;}
   enum ha_base_keytype key_type() const
     { return unsigned_flag ? HA_KEYTYPE_ULONGLONG : HA_KEYTYPE_LONGLONG; }
@@ -1153,22 +1052,20 @@ public:
   void sort_string(uchar *buff,uint length);
   uint32 pack_length() const { return 8; }
   void sql_type(String &str) const;
-  bool can_be_compared_as_longlong() const { return TRUE; }
   uint32 max_display_length() { return 20; }
   virtual uchar *pack(uchar* to, const uchar *from,
-                      uint max_length  __attribute__((unused)),
-                      bool low_byte_first)
+                      uint max_length  __attribute__((unused)))
   {
-    return pack_int64(to, from, low_byte_first);
+    return pack_int64(to, from);
   }
-  virtual const uchar *unpack(uchar* to, const uchar *from,
-                              uint param_data __attribute__((unused)),
-                              bool low_byte_first)
+  const uchar *unpack(uchar* to, const uchar *from, const uchar *from_end,
+                      uint param_data __attribute__((unused)))
   {
-    return unpack_int64(to, from, low_byte_first);
+    if (from + 8 > from_end)
+      return 0;
+    return unpack_int64(to, from);
   }
 };
-#endif
 
 
 class Field_float :public Field_real {
@@ -1275,10 +1172,14 @@ public:
   void sql_type(String &str) const;
   uint size_of() const { return sizeof(*this); }
   uint32 max_display_length() { return 4; }
+  void move_field_offset(my_ptrdiff_t ptr_diff) {}
 };
 
 
 class Field_timestamp :public Field_str {
+protected:
+  int store_TIME_with_warning(THD *, MYSQL_TIME *, const Lazy_string *,
+                              bool, bool);
 public:
   Field_timestamp(uchar *ptr_arg, uint32 len_arg,
                   uchar *null_ptr_arg, uchar null_bit_arg,
@@ -1288,11 +1189,11 @@ public:
 		  CHARSET_INFO *cs);
   enum_field_types type() const { return MYSQL_TYPE_TIMESTAMP;}
   enum ha_base_keytype key_type() const { return HA_KEYTYPE_ULONG_INT; }
-  enum Item_result cmp_type () const { return INT_RESULT; }
+  enum Item_result cmp_type () const { return TIME_RESULT; }
   int  store(const char *to,uint length,CHARSET_INFO *charset);
   int  store(double nr);
   int  store(longlong nr, bool unsigned_val);
-  int  reset(void) { ptr[0]=ptr[1]=ptr[2]=ptr[3]=0; return 0; }
+  int  store_time_dec(MYSQL_TIME *ltime, uint dec);
   double val_real(void);
   longlong val_int(void);
   String *val_str(String*,String *);
@@ -1301,9 +1202,9 @@ public:
   void sort_string(uchar *buff,uint length);
   uint32 pack_length() const { return 4; }
   void sql_type(String &str) const;
-  bool can_be_compared_as_longlong() const { return TRUE; }
   bool zero_pack() const { return 0; }
-  void set_time();
+  uint decimals() const { return 0; }
+  virtual int set_time();
   virtual void set_default()
   {
     if (table->timestamp_field == this &&
@@ -1313,46 +1214,68 @@ public:
       Field::set_default();
   }
   /* Get TIMESTAMP field value as seconds since begging of Unix Epoch */
-  inline long get_timestamp(bool *null_value)
+  virtual my_time_t get_timestamp(ulong *sec_part) const;
+  virtual void store_TIME(my_time_t timestamp, ulong sec_part)
   {
-    if ((*null_value= is_null()))
-      return 0;
-#ifdef WORDS_BIGENDIAN
-    if (table && table->s->db_low_byte_first)
-      return sint4korr(ptr);
-#endif
-    long tmp;
-    longget(tmp,ptr);
-    return tmp;
-  }
-  inline void store_timestamp(my_time_t timestamp)
-  {
-#ifdef WORDS_BIGENDIAN
-    if (table && table->s->db_low_byte_first)
-    {
-      int4store(ptr,timestamp);
-    }
-    else
-#endif
-      longstore(ptr,(uint32) timestamp);
+    int4store(ptr,timestamp);
   }
   bool get_date(MYSQL_TIME *ltime,uint fuzzydate);
-  bool get_time(MYSQL_TIME *ltime);
   timestamp_auto_set_type get_auto_set_type() const;
   uchar *pack(uchar *to, const uchar *from,
-              uint max_length __attribute__((unused)), bool low_byte_first)
+              uint max_length __attribute__((unused)))
   {
-    return pack_int32(to, from, low_byte_first);
+    return pack_int32(to, from);
   }
-  const uchar *unpack(uchar* to, const uchar *from,
-                      uint param_data __attribute__((unused)),
-                      bool low_byte_first)
+  const uchar *unpack(uchar* to, const uchar *from, const uchar *from_end,
+                      uint param_data __attribute__((unused)))
   {
-    return unpack_int32(to, from, low_byte_first);
+    if (from + 4 > from_end)
+      return 0;
+    return unpack_int32(to, from);
   }
 };
 
 
+class Field_timestamp_hires :public Field_timestamp {
+  uint dec;
+public:
+  Field_timestamp_hires(uchar *ptr_arg,
+                  uchar *null_ptr_arg, uchar null_bit_arg,
+                  enum utype unireg_check_arg, const char *field_name_arg,
+                  TABLE_SHARE *share, uint dec_arg, CHARSET_INFO *cs) :
+  Field_timestamp(ptr_arg, MAX_DATETIME_WIDTH + dec_arg + 1, null_ptr_arg,
+                  null_bit_arg, unireg_check_arg, field_name_arg, share, cs),
+  dec(dec_arg)
+  {
+    DBUG_ASSERT(dec);
+    DBUG_ASSERT(dec <= TIME_SECOND_PART_DIGITS);
+  }
+  void sql_type(String &str) const;
+  my_time_t get_timestamp(ulong *sec_part) const;
+  void store_TIME(my_time_t timestamp, ulong sec_part);
+  int store_decimal(const my_decimal *d);
+  my_decimal* val_decimal(my_decimal*);
+  double val_real(void);
+  String *val_str(String*,String *);
+  bool send_binary(Protocol *protocol);
+  int cmp(const uchar *,const uchar *);
+  void sort_string(uchar *buff,uint length);
+  uint decimals() const { return dec; }
+  int set_time();
+  enum ha_base_keytype key_type() const { return HA_KEYTYPE_BINARY; }
+  void make_field(Send_field *field);
+  uint32 pack_length() const;
+  uchar *pack(uchar *to, const uchar *from, uint max_length)
+  { return Field::pack(to, from, max_length); }
+  const uchar *unpack(uchar* to, const uchar *from, const uchar *from_end,
+                      uint param_data)
+  { return Field::unpack(to, from, from_end, param_data); }
+  uint size_of() const { return sizeof(*this); }
+  bool eq_def(Field *field)
+  { return Field_str::eq_def(field) && dec == field->decimals(); }
+};
+
+
 class Field_year :public Field_tiny {
 public:
   Field_year(uchar *ptr_arg, uint32 len_arg, uchar *null_ptr_arg,
@@ -1365,80 +1288,95 @@ public:
   int  store(const char *to,uint length,CHARSET_INFO *charset);
   int  store(double nr);
   int  store(longlong nr, bool unsigned_val);
+  int  store_time_dec(MYSQL_TIME *ltime, uint dec);
   double val_real(void);
   longlong val_int(void);
   String *val_str(String*,String *);
+  bool get_date(MYSQL_TIME *ltime,uint fuzzydate);
   bool send_binary(Protocol *protocol);
+  uint32 max_display_length() { return field_length; }
   void sql_type(String &str) const;
-  bool can_be_compared_as_longlong() const { return TRUE; }
 };
 
 
-class Field_date :public Field_str {
+class Field_temporal: public Field_str {
+protected:
+  int store_TIME_with_warning(MYSQL_TIME *ltime, const Lazy_string *str,
+                              int was_cut, int have_smth_to_conv);
+  virtual void store_TIME(MYSQL_TIME *ltime) = 0;
+  virtual timestamp_type temporal_type() = 0;
+public:
+  Field_temporal(uchar *ptr_arg,uint32 len_arg, uchar *null_ptr_arg,
+                 uchar null_bit_arg, utype unireg_check_arg,
+                 const char *field_name_arg, CHARSET_INFO *charset_arg)
+    :Field_str(ptr_arg, len_arg, null_ptr_arg, null_bit_arg, unireg_check_arg,
+               field_name_arg, charset_arg) { }
+  enum Item_result cmp_type () const { return TIME_RESULT; }
+  int  store(const char *to,uint length,CHARSET_INFO *charset);
+  int  store(double nr);
+  int  store(longlong nr, bool unsigned_val);
+  int  store_time_dec(MYSQL_TIME *ltime, uint dec);
+  my_decimal *val_decimal(my_decimal*);
+  bool eq_def(Field *field)
+  {
+    return (Field_str::eq_def(field) && decimals() == field->decimals());
+  }
+};
+
+class Field_date :public Field_temporal {
+  void store_TIME(MYSQL_TIME *ltime);
+  timestamp_type temporal_type()
+  { return MYSQL_TIMESTAMP_DATE; }
 public:
   Field_date(uchar *ptr_arg, uchar *null_ptr_arg, uchar null_bit_arg,
 	     enum utype unireg_check_arg, const char *field_name_arg,
 	     CHARSET_INFO *cs)
-    :Field_str(ptr_arg, MAX_DATE_WIDTH, null_ptr_arg, null_bit_arg,
-	       unireg_check_arg, field_name_arg, cs)
+    :Field_temporal(ptr_arg, MAX_DATE_WIDTH, null_ptr_arg, null_bit_arg,
+                    unireg_check_arg, field_name_arg, cs)
     {}
-  Field_date(bool maybe_null_arg, const char *field_name_arg,
-             CHARSET_INFO *cs)
-    :Field_str((uchar*) 0, MAX_DATE_WIDTH, maybe_null_arg ? (uchar*) "": 0,0,
-	       NONE, field_name_arg, cs) {}
   enum_field_types type() const { return MYSQL_TYPE_DATE;}
   enum ha_base_keytype key_type() const { return HA_KEYTYPE_ULONG_INT; }
-  enum Item_result cmp_type () const { return INT_RESULT; }
-  int store(const char *to,uint length,CHARSET_INFO *charset);
-  int store(double nr);
-  int store(longlong nr, bool unsigned_val);
   int reset(void) { ptr[0]=ptr[1]=ptr[2]=ptr[3]=0; return 0; }
   double val_real(void);
   longlong val_int(void);
   String *val_str(String*,String *);
-  bool get_time(MYSQL_TIME *ltime);
+  uint decimals() const { return 0; }
   bool send_binary(Protocol *protocol);
   int cmp(const uchar *,const uchar *);
   void sort_string(uchar *buff,uint length);
   uint32 pack_length() const { return 4; }
   void sql_type(String &str) const;
-  bool can_be_compared_as_longlong() const { return TRUE; }
   bool zero_pack() const { return 1; }
   uchar *pack(uchar* to, const uchar *from,
-              uint max_length __attribute__((unused)), bool low_byte_first)
+              uint max_length __attribute__((unused)))
   {
-    return pack_int32(to, from, low_byte_first);
+    return pack_int32(to, from);
   }
-  const uchar *unpack(uchar* to, const uchar *from,
-                      uint param_data __attribute__((unused)),
-                      bool low_byte_first)
+  const uchar *unpack(uchar* to, const uchar *from, const uchar *from_end,
+                      uint param_data __attribute__((unused)))
   {
-    return unpack_int32(to, from, low_byte_first);
+    if (from + 4 > from_end)
+      return 0;
+    return unpack_int32(to, from);
   }
 };
 
 
-class Field_newdate :public Field_str {
+class Field_newdate :public Field_temporal {
+  void store_TIME(MYSQL_TIME *ltime);
+  timestamp_type temporal_type() { return MYSQL_TIMESTAMP_DATE; }
 public:
   Field_newdate(uchar *ptr_arg, uchar *null_ptr_arg, uchar null_bit_arg,
 		enum utype unireg_check_arg, const char *field_name_arg,
 		CHARSET_INFO *cs)
-    :Field_str(ptr_arg, 10, null_ptr_arg, null_bit_arg,
-	       unireg_check_arg, field_name_arg, cs)
+    :Field_temporal(ptr_arg, MAX_DATE_WIDTH, null_ptr_arg, null_bit_arg,
+                    unireg_check_arg, field_name_arg, cs)
     {}
-  Field_newdate(bool maybe_null_arg, const char *field_name_arg,
-                CHARSET_INFO *cs)
-    :Field_str((uchar*) 0,10, maybe_null_arg ? (uchar*) "": 0,0,
-               NONE, field_name_arg, cs) {}
   enum_field_types type() const { return MYSQL_TYPE_DATE;}
   enum_field_types real_type() const { return MYSQL_TYPE_NEWDATE; }
   enum ha_base_keytype key_type() const { return HA_KEYTYPE_UINT24; }
-  enum Item_result cmp_type () const { return INT_RESULT; }
-  int  store(const char *to,uint length,CHARSET_INFO *charset);
-  int  store(double nr);
-  int  store(longlong nr, bool unsigned_val);
-  int store_time(MYSQL_TIME *ltime, timestamp_type type);
   int reset(void) { ptr[0]=ptr[1]=ptr[2]=0; return 0; }
+  uint decimals() const { return 0; }
   double val_real(void);
   longlong val_int(void);
   String *val_str(String*,String *);
@@ -1447,75 +1385,89 @@ public:
   void sort_string(uchar *buff,uint length);
   uint32 pack_length() const { return 3; }
   void sql_type(String &str) const;
-  bool can_be_compared_as_longlong() const { return TRUE; }
   bool zero_pack() const { return 1; }
   bool get_date(MYSQL_TIME *ltime,uint fuzzydate);
-  bool get_time(MYSQL_TIME *ltime);
 };
 
 
-class Field_time :public Field_str {
+class Field_time :public Field_temporal {
+  void store_TIME(MYSQL_TIME *ltime);
+  timestamp_type temporal_type()
+  { return MYSQL_TIMESTAMP_TIME; }
 public:
-  Field_time(uchar *ptr_arg, uchar *null_ptr_arg, uchar null_bit_arg,
-	     enum utype unireg_check_arg, const char *field_name_arg,
-	     CHARSET_INFO *cs)
-    :Field_str(ptr_arg, 8, null_ptr_arg, null_bit_arg,
-	       unireg_check_arg, field_name_arg, cs)
+  Field_time(uchar *ptr_arg, uint length_arg, uchar *null_ptr_arg,
+             uchar null_bit_arg, enum utype unireg_check_arg,
+             const char *field_name_arg, CHARSET_INFO *cs)
+    :Field_temporal(ptr_arg, length_arg, null_ptr_arg, null_bit_arg,
+                    unireg_check_arg, field_name_arg, cs)
     {}
-  Field_time(bool maybe_null_arg, const char *field_name_arg,
-             CHARSET_INFO *cs)
-    :Field_str((uchar*) 0,8, maybe_null_arg ? (uchar*) "": 0,0,
-	       NONE, field_name_arg, cs) {}
   enum_field_types type() const { return MYSQL_TYPE_TIME;}
   enum ha_base_keytype key_type() const { return HA_KEYTYPE_INT24; }
-  enum Item_result cmp_type () const { return INT_RESULT; }
-  int store_time(MYSQL_TIME *ltime, timestamp_type type);
+  int store_time_dec(MYSQL_TIME *ltime, uint dec);
   int store(const char *to,uint length,CHARSET_INFO *charset);
   int store(double nr);
   int store(longlong nr, bool unsigned_val);
-  int reset(void) { ptr[0]=ptr[1]=ptr[2]=0; return 0; }
+  uint decimals() const { return 0; }
   double val_real(void);
   longlong val_int(void);
   String *val_str(String*,String *);
   bool get_date(MYSQL_TIME *ltime, uint fuzzydate);
   bool send_binary(Protocol *protocol);
-  bool get_time(MYSQL_TIME *ltime);
   int cmp(const uchar *,const uchar *);
   void sort_string(uchar *buff,uint length);
   uint32 pack_length() const { return 3; }
   void sql_type(String &str) const;
-  bool can_be_compared_as_longlong() const { return TRUE; }
   bool zero_pack() const { return 1; }
 };
 
+class Field_time_hires :public Field_time {
+  uint dec;
+  longlong zero_point;
+  void store_TIME(MYSQL_TIME *ltime);
+public:
+  Field_time_hires(uchar *ptr_arg, uchar *null_ptr_arg, uchar null_bit_arg,
+             enum utype unireg_check_arg, const char *field_name_arg,
+             uint dec_arg, CHARSET_INFO *cs)
+    :Field_time(ptr_arg, MIN_TIME_WIDTH + dec_arg + 1, null_ptr_arg,
+                null_bit_arg, unireg_check_arg, field_name_arg, cs),
+     dec(dec_arg)
+  {
+    DBUG_ASSERT(dec);
+    DBUG_ASSERT(dec <= TIME_SECOND_PART_DIGITS);
+    zero_point= sec_part_shift(
+                   ((TIME_MAX_VALUE_SECONDS+1LL)*TIME_SECOND_PART_FACTOR), dec);
+  }
+  enum ha_base_keytype key_type() const { return HA_KEYTYPE_BINARY; }
+  uint decimals() const { return dec; }
+  int store_decimal(const my_decimal *d);
+  longlong val_int(void);
+  double val_real(void);
+  String *val_str(String*,String *);
+  int reset(void);
+  bool get_date(MYSQL_TIME *ltime, uint fuzzydate);
+  bool send_binary(Protocol *protocol);
+  int cmp(const uchar *,const uchar *);
+  void sort_string(uchar *buff,uint length);
+  uint32 pack_length() const;
+  void sql_type(String &str) const;
+  void make_field(Send_field *);
+  uint size_of() const { return sizeof(*this); }
+};
 
-class Field_datetime :public Field_str {
+class Field_datetime :public Field_temporal {
+  void store_TIME(MYSQL_TIME *ltime);
+  timestamp_type temporal_type()
+  { return MYSQL_TIMESTAMP_DATETIME; }
 public:
-  Field_datetime(uchar *ptr_arg, uchar *null_ptr_arg, uchar null_bit_arg,
-		 enum utype unireg_check_arg, const char *field_name_arg,
-		 CHARSET_INFO *cs)
-    :Field_str(ptr_arg, MAX_DATETIME_WIDTH, null_ptr_arg, null_bit_arg,
-	       unireg_check_arg, field_name_arg, cs)
+  Field_datetime(uchar *ptr_arg, uint length_arg, uchar *null_ptr_arg,
+                 uchar null_bit_arg, enum utype unireg_check_arg,
+                 const char *field_name_arg, CHARSET_INFO *cs)
+    :Field_temporal(ptr_arg, length_arg, null_ptr_arg, null_bit_arg,
+                    unireg_check_arg, field_name_arg, cs)
     {}
-  Field_datetime(bool maybe_null_arg, const char *field_name_arg,
-		 CHARSET_INFO *cs)
-    :Field_str((uchar*) 0, MAX_DATETIME_WIDTH, maybe_null_arg ? (uchar*) "": 0,0,
-	       NONE, field_name_arg, cs) {}
   enum_field_types type() const { return MYSQL_TYPE_DATETIME;}
-#ifdef HAVE_LONG_LONG
   enum ha_base_keytype key_type() const { return HA_KEYTYPE_ULONGLONG; }
-#endif
-  enum Item_result cmp_type () const { return INT_RESULT; }
-  uint decimals() const { return DATETIME_DEC; }
-  int  store(const char *to,uint length,CHARSET_INFO *charset);
-  int  store(double nr);
-  int  store(longlong nr, bool unsigned_val);
-  int store_time(MYSQL_TIME *ltime, timestamp_type type);
-  int reset(void)
-  {
-    ptr[0]=ptr[1]=ptr[2]=ptr[3]=ptr[4]=ptr[5]=ptr[6]=ptr[7]=0;
-    return 0;
-  }
+  uint decimals() const { return 0; }
   double val_real(void);
   longlong val_int(void);
   String *val_str(String*,String *);
@@ -1524,24 +1476,101 @@ public:
   void sort_string(uchar *buff,uint length);
   uint32 pack_length() const { return 8; }
   void sql_type(String &str) const;
-  bool can_be_compared_as_longlong() const { return TRUE; }
   bool zero_pack() const { return 1; }
   bool get_date(MYSQL_TIME *ltime,uint fuzzydate);
-  bool get_time(MYSQL_TIME *ltime);
   uchar *pack(uchar* to, const uchar *from,
-              uint max_length __attribute__((unused)), bool low_byte_first)
+              uint max_length __attribute__((unused)))
   {
-    return pack_int64(to, from, low_byte_first);
+    return pack_int64(to, from);
   }
-  const uchar *unpack(uchar* to, const uchar *from,
-                      uint param_data __attribute__((unused)),
-                      bool low_byte_first)
+  const uchar *unpack(uchar* to, const uchar *from, const uchar *from_end,
+                      uint param_data __attribute__((unused)))
   {
-    return unpack_int64(to, from, low_byte_first);
+    if (from + 8 > from_end)
+      return 0;
+    return unpack_int64(to, from);
   }
 };
 
 
+class Field_datetime_hires :public Field_datetime {
+  void store_TIME(MYSQL_TIME *ltime);
+  uint dec;
+public:
+  Field_datetime_hires(uchar *ptr_arg, uchar *null_ptr_arg,
+                       uchar null_bit_arg, enum utype unireg_check_arg,
+                       const char *field_name_arg, uint dec_arg,
+                       CHARSET_INFO *cs)
+    :Field_datetime(ptr_arg, MAX_DATETIME_WIDTH + dec_arg + 1,
+                    null_ptr_arg, null_bit_arg, unireg_check_arg,
+                    field_name_arg, cs), dec(dec_arg)
+  {
+    DBUG_ASSERT(dec);
+    DBUG_ASSERT(dec <= TIME_SECOND_PART_DIGITS);
+  }
+  enum ha_base_keytype key_type() const { return HA_KEYTYPE_BINARY; }
+  uint decimals() const { return dec; }
+  void make_field(Send_field *field);
+  int store_decimal(const my_decimal *d);
+  double val_real(void);
+  longlong val_int(void);
+  String *val_str(String*,String *);
+  bool send_binary(Protocol *protocol);
+  int cmp(const uchar *,const uchar *);
+  void sort_string(uchar *buff,uint length);
+  uint32 pack_length() const;
+  void sql_type(String &str) const;
+  bool get_date(MYSQL_TIME *ltime,uint fuzzydate);
+  uchar *pack(uchar *to, const uchar *from, uint max_length)
+  { return Field::pack(to, from, max_length); }
+  const uchar *unpack(uchar* to, const uchar *from, const uchar *from_end,
+                      uint param_data)
+  { return Field::unpack(to, from, from_end, param_data); }
+  uint size_of() const { return sizeof(*this); }
+};
+
+static inline Field_timestamp *
+new_Field_timestamp(uchar *ptr, uchar *null_ptr, uchar null_bit,
+                    enum Field::utype unireg_check, const char *field_name,
+                    TABLE_SHARE *share, uint dec, CHARSET_INFO *cs)
+{
+  if (dec==0)
+    return new Field_timestamp(ptr, MAX_DATETIME_WIDTH, null_ptr, null_bit,
+                                unireg_check, field_name, share, cs);
+  if (dec == NOT_FIXED_DEC)
+    dec= MAX_DATETIME_PRECISION;
+  return new Field_timestamp_hires(ptr, null_ptr, null_bit, unireg_check,
+                                   field_name, share, dec, cs);
+}
+
+static inline Field_time *
+new_Field_time(uchar *ptr, uchar *null_ptr, uchar null_bit,
+               enum Field::utype unireg_check, const char *field_name,
+               uint dec, CHARSET_INFO *cs)
+{
+  if (dec == 0)
+    return new Field_time(ptr, MIN_TIME_WIDTH, null_ptr, null_bit,
+                          unireg_check, field_name, cs);
+  if (dec == NOT_FIXED_DEC)
+    dec= MAX_DATETIME_PRECISION;
+  return new Field_time_hires(ptr, null_ptr, null_bit,
+                                  unireg_check, field_name, dec, cs);
+}
+
+static inline Field_datetime *
+new_Field_datetime(uchar *ptr, uchar *null_ptr, uchar null_bit,
+                   enum Field::utype unireg_check,
+                   const char *field_name, uint dec, CHARSET_INFO *cs)
+{
+  if (dec == 0)
+    return new Field_datetime(ptr, MAX_DATETIME_WIDTH, null_ptr, null_bit,
+                              unireg_check, field_name, cs);
+  if (dec == NOT_FIXED_DEC)
+    dec= MAX_DATETIME_PRECISION;
+  return new Field_datetime_hires(ptr, null_ptr, null_bit,
+                                  unireg_check, field_name, dec, cs);
+}
+
 class Field_string :public Field_longstr {
 public:
   bool can_alter_field_type;
@@ -1586,9 +1615,9 @@ public:
   void sort_string(uchar *buff,uint length);
   void sql_type(String &str) const;
   virtual uchar *pack(uchar *to, const uchar *from,
-                      uint max_length, bool low_byte_first);
+                      uint max_length);
   virtual const uchar *unpack(uchar* to, const uchar *from,
-                              uint param_data, bool low_byte_first);
+                              const uchar *from_end,uint param_data);
   uint pack_length_from_metadata(uint field_metadata)
   {
     DBUG_PRINT("debug", ("field_metadata: 0x%04x", field_metadata));
@@ -1673,15 +1702,14 @@ public:
   uint get_key_image(uchar *buff,uint length, imagetype type);
   void set_key_image(const uchar *buff,uint length);
   void sql_type(String &str) const;
-  virtual uchar *pack(uchar *to, const uchar *from,
-                      uint max_length, bool low_byte_first);
-  uchar *pack_key(uchar *to, const uchar *from, uint max_length, bool low_byte_first);
-  uchar *pack_key_from_key_image(uchar* to, const uchar *from,
-                                 uint max_length, bool low_byte_first);
+  virtual uchar *pack(uchar *to, const uchar *from, uint max_length);
+  uchar *pack_key(uchar *to, const uchar *from, uint max_length);
+  uchar *pack_key_from_key_image(uchar* to, const uchar *from, uint max_length);
   virtual const uchar *unpack(uchar* to, const uchar *from,
-                              uint param_data, bool low_byte_first);
-  const uchar *unpack_key(uchar* to, const uchar *from,
-                          uint max_length, bool low_byte_first);
+                              const uchar *from_end, uint param_data);
+#ifdef NOT_USED
+  const uchar *unpack_key(uchar* to, const uchar *from, uint max_length);
+#endif
   int pack_cmp(const uchar *a, const uchar *b, uint key_length,
                bool insert_or_update);
   int pack_cmp(const uchar *b, uint key_length,bool insert_or_update);
@@ -1789,14 +1817,7 @@ public:
   int reset(void) { bzero(ptr, packlength+sizeof(uchar*)); return 0; }
   void reset_fields() { bzero((uchar*) &value,sizeof(value)); }
   uint32 get_field_buffer_size(void) { return value.alloced_length(); }
-#ifndef WORDS_BIGENDIAN
-  static
-#endif
-  void store_length(uchar *i_ptr, uint i_packlength, uint32 i_number, bool low_byte_first);
-  void store_length(uchar *i_ptr, uint i_packlength, uint32 i_number)
-  {
-    store_length(i_ptr, i_packlength, i_number, table->s->db_low_byte_first);
-  }
+  void store_length(uchar *i_ptr, uint i_packlength, uint32 i_number);
   inline void store_length(uint32 number)
   {
     store_length(ptr, packlength, number);
@@ -1810,38 +1831,37 @@ public:
 
      @returns The length in the row plus the size of the data.
   */
-  uint32 get_packed_size(const uchar *ptr_arg, bool low_byte_first)
-    {return packlength + get_length(ptr_arg, packlength, low_byte_first);}
+  uint32 get_packed_size(const uchar *ptr_arg)
+    {return packlength + get_length(ptr_arg, packlength);}
 
   inline uint32 get_length(uint row_offset= 0)
-  { return get_length(ptr+row_offset, this->packlength, table->s->db_low_byte_first); }
-  uint32 get_length(const uchar *ptr, uint packlength, bool low_byte_first);
+  { return get_length(ptr+row_offset, this->packlength); }
+  uint32 get_length(const uchar *ptr, uint packlength);
   uint32 get_length(const uchar *ptr_arg)
-  { return get_length(ptr_arg, this->packlength, table->s->db_low_byte_first); }
-  void put_length(uchar *pos, uint32 length);
+  { return get_length(ptr_arg, this->packlength); }
   inline void get_ptr(uchar **str)
-    {
-      memcpy_fixed((uchar*) str,ptr+packlength,sizeof(uchar*));
-    }
+  {
+    memcpy_fixed((uchar*) str,ptr+packlength,sizeof(uchar*));
+  }
   inline void get_ptr(uchar **str, uint row_offset)
-    {
-      memcpy_fixed((uchar*) str,ptr+packlength+row_offset,sizeof(char*));
-    }
+  {
+    memcpy_fixed((uchar*) str,ptr+packlength+row_offset,sizeof(char*));
+  }
   inline void set_ptr(uchar *length, uchar *data)
-    {
-      memcpy(ptr,length,packlength);
-      memcpy_fixed(ptr+packlength,&data,sizeof(char*));
-    }
+  {
+    memcpy(ptr,length,packlength);
+    memcpy_fixed(ptr+packlength,&data,sizeof(char*));
+  }
   void set_ptr_offset(my_ptrdiff_t ptr_diff, uint32 length, uchar *data)
-    {
-      uchar *ptr_ofs= ADD_TO_PTR(ptr,ptr_diff,uchar*);
-      store_length(ptr_ofs, packlength, length);
-      memcpy_fixed(ptr_ofs+packlength,&data,sizeof(char*));
-    }
+  {
+    uchar *ptr_ofs= ADD_TO_PTR(ptr,ptr_diff,uchar*);
+    store_length(ptr_ofs, packlength, length);
+    memcpy_fixed(ptr_ofs+packlength,&data,sizeof(char*));
+  }
   inline void set_ptr(uint32 length, uchar *data)
-    {
-      set_ptr_offset(0, length, data);
-    }
+  {
+    set_ptr_offset(0, length, data);
+  }
   uint get_key_image(uchar *buff,uint length, imagetype type);
   void set_key_image(const uchar *buff,uint length);
   void sql_type(String &str) const;
@@ -1858,21 +1878,20 @@ public:
     memcpy_fixed(ptr+packlength,&tmp,sizeof(char*));
     return 0;
   }
-  virtual uchar *pack(uchar *to, const uchar *from,
-                      uint max_length, bool low_byte_first);
-  uchar *pack_key(uchar *to, const uchar *from,
-                  uint max_length, bool low_byte_first);
-  uchar *pack_key_from_key_image(uchar* to, const uchar *from,
-                                 uint max_length, bool low_byte_first);
+  virtual uchar *pack(uchar *to, const uchar *from, uint max_length);
+  uchar *pack_key(uchar *to, const uchar *from, uint max_length);
+  uchar *pack_key_from_key_image(uchar* to, const uchar *from, uint max_length);
   virtual const uchar *unpack(uchar *to, const uchar *from,
-                              uint param_data, bool low_byte_first);
-  const uchar *unpack_key(uchar* to, const uchar *from,
-                          uint max_length, bool low_byte_first);
+                              const uchar *from_end, uint param_data);
+#ifdef NOT_USED
+  const uchar *unpack_key(uchar* to, const uchar *from, uint max_length);
+#endif
   int pack_cmp(const uchar *a, const uchar *b, uint key_length,
                bool insert_or_update);
   int pack_cmp(const uchar *b, uint key_length,bool insert_or_update);
   uint packed_col_length(const uchar *col_ptr, uint length);
   uint max_packed_col_length(uint max_length);
+  /* Set if we should convert constant item's with convert_const_item */
   void free() { value.free(); }
   inline void clear_temporary() { bzero((uchar*) &value,sizeof(value)); }
   friend int field_conv(Field *to,Field *from);
@@ -1914,6 +1933,7 @@ public:
   uint size_of() const { return sizeof(*this); }
   int  reset(void) { return !maybe_null() || Field_blob::reset(); }
   geometry_type get_geometry_type() { return geom_type; };
+  bool hash_join_is_possible() { return FALSE; }
 };
 #endif /*HAVE_SPATIAL*/
 
@@ -1938,7 +1958,6 @@ public:
   Field *new_field(MEM_ROOT *root, struct st_table *new_table, bool keep_type);
   enum_field_types type() const { return MYSQL_TYPE_STRING; }
   enum Item_result cmp_type () const { return INT_RESULT; }
-  enum Item_result cast_to_int_type () const { return INT_RESULT; }
   enum ha_base_keytype key_type() const;
   int  store(const char *to,uint length,CHARSET_INFO *charset);
   int  store(double nr);
@@ -2068,10 +2087,9 @@ public:
   int compatible_field_size(uint field_metadata,
                             const Relay_log_info *rli, uint16 mflags);
   void sql_type(String &str) const;
-  virtual uchar *pack(uchar *to, const uchar *from,
-                      uint max_length, bool low_byte_first);
+  virtual uchar *pack(uchar *to, const uchar *from, uint max_length);
   virtual const uchar *unpack(uchar *to, const uchar *from,
-                              uint param_data, bool low_byte_first);
+                              const uchar *from_end, uint param_data);
   virtual void set_default();
 
   Field *new_key_field(MEM_ROOT *root, struct st_table *new_table,
diff --git a/sql/field_conv.cc b/sql/field_conv.cc
index 5408736f976..c99eb62eb57 100644
--- a/sql/field_conv.cc
+++ b/sql/field_conv.cc
@@ -117,6 +117,11 @@ static void do_outer_field_to_null_str(Copy_field *copy)
 int
 set_field_to_null(Field *field)
 {
+  if (field->table->null_catch_flags & CHECK_ROW_FOR_NULLS_TO_REJECT)
+  {
+    field->table->null_catch_flags|= REJECT_ROW_DUE_TO_NULL_FIELDS;
+    return -1;
+  }
   if (field->real_maybe_null())
   {
     field->set_null();
@@ -160,6 +165,11 @@ set_field_to_null(Field *field)
 int
 set_field_to_null_with_conversions(Field *field, bool no_conversions)
 {
+  if (field->table->null_catch_flags & CHECK_ROW_FOR_NULLS_TO_REJECT)
+  {
+    field->table->null_catch_flags|= REJECT_ROW_DUE_TO_NULL_FIELDS;
+    return -1;
+  }
   if (field->real_maybe_null())
   {
     field->set_null();
@@ -206,6 +216,14 @@ static void do_skip(Copy_field *copy __attribute__((unused)))
 }
 
 
+/* 
+  Copy: (NULLable field) -> (NULLable field) 
+
+  note: if the record we're copying from is NULL-complemetned (i.e. 
+  from_field->table->null_row==1), it will also have all NULLable columns to be
+  set to NULLs, so we dont need to check table->null_row here.
+*/
+
 static void do_copy_null(Copy_field *copy)
 {
   if (*copy->from_null_ptr & copy->from_bit)
@@ -220,6 +238,10 @@ static void do_copy_null(Copy_field *copy)
   }
 }
 
+/*
+  Copy: (not-NULL field in table that can be NULL-complemented) -> (NULLable 
+     field)
+*/
 
 static void do_outer_field_null(Copy_field *copy)
 {
@@ -236,7 +258,27 @@ static void do_outer_field_null(Copy_field *copy)
   }
 }
 
+/*
+  Copy: (not-NULL field in table that can be NULL-complemented) -> (not-NULL
+  field)
+*/
+static void do_copy_nullable_row_to_notnull(Copy_field *copy)
+{
+  if (*copy->null_row ||
+      (copy->from_null_ptr && (*copy->from_null_ptr & copy->from_bit)))
+  {
+    copy->to_field->set_warning(MYSQL_ERROR::WARN_LEVEL_WARN,
+                                WARN_DATA_TRUNCATED, 1);
+    copy->to_field->reset();
+  }
+  else
+  {
+    (copy->do_copy2)(copy);
+  }
 
+}
+
+/* Copy: (NULL-able field) -> (not NULL-able field) */
 static void do_copy_not_null(Copy_field *copy)
 {
   if (*copy->from_null_ptr & copy->from_bit)
@@ -250,6 +292,7 @@ static void do_copy_not_null(Copy_field *copy)
 }
 
 
+/* Copy: (non-NULLable field) -> (NULLable field) */
 static void do_copy_maybe_null(Copy_field *copy)
 {
   *copy->to_null_ptr&= ~copy->to_bit;
@@ -367,6 +410,14 @@ static void do_field_decimal(Copy_field *copy)
 }
 
 
+static void do_field_temporal(Copy_field *copy)
+{
+  MYSQL_TIME ltime;
+  copy->from_field->get_date(&ltime, TIME_FUZZY_DATE);
+  copy->to_field->store_time_dec(&ltime, copy->from_field->decimals());
+}
+
+
 /**
   string copy for single byte characters set when to string is shorter than
   from string.
@@ -451,7 +502,8 @@ static void do_varstring1(Copy_field *copy)
   if (length > copy->to_length- 1)
   {
     length=copy->to_length - 1;
-    if (copy->from_field->table->in_use->count_cuted_fields)
+    if (copy->from_field->table->in_use->count_cuted_fields &&
+        copy->to_field)
       copy->to_field->set_warning(MYSQL_ERROR::WARN_LEVEL_WARN,
                                   WARN_DATA_TRUNCATED, 1);
   }
@@ -487,7 +539,8 @@ static void do_varstring2(Copy_field *copy)
   if (length > copy->to_length- HA_KEY_BLOB_LENGTH)
   {
     length=copy->to_length-HA_KEY_BLOB_LENGTH;
-    if (copy->from_field->table->in_use->count_cuted_fields)
+    if (copy->from_field->table->in_use->count_cuted_fields &&
+        copy->to_field)
       copy->to_field->set_warning(MYSQL_ERROR::WARN_LEVEL_WARN,
                                   WARN_DATA_TRUNCATED, 1);
   }
@@ -551,9 +604,9 @@ void Copy_field::set(uchar *to,Field *from)
       do_copy=	  do_field_to_null_str;
   }
   else
-  {
+  { 
     to_null_ptr=  0;				// For easy debugging
-    do_copy=	  do_field_eq;
+    do_copy= do_field_eq;
   }
 }
 
@@ -561,7 +614,7 @@ void Copy_field::set(uchar *to,Field *from)
 /*
   To do: 
 
-  If 'save\ is set to true and the 'from' is a blob field, do_copy is set to
+  If 'save' is set to true and the 'from' is a blob field, do_copy is set to
   do_save_blob rather than do_conv_blob.  The only differences between them
   appears to be:
 
@@ -614,7 +667,15 @@ void Copy_field::set(Field *to,Field *from,bool save)
       else if (to_field == to_field->table->next_number_field)
         do_copy= do_copy_next_number;
       else
-        do_copy= do_copy_not_null;
+      {
+        if (!from_null_ptr)
+        {
+          null_row= &from->table->null_row;
+          do_copy= do_copy_nullable_row_to_notnull;
+        }
+        else
+          do_copy= do_copy_not_null;
+      }
     }
   }
   else if (to_field->real_maybe_null())
@@ -638,13 +699,11 @@ void Copy_field::set(Field *to,Field *from,bool save)
 Copy_field::Copy_func *
 Copy_field::get_copy_func(Field *to,Field *from)
 {
-  bool compatible_db_low_byte_first= (to->table->s->db_low_byte_first ==
-                                     from->table->s->db_low_byte_first);
   if (to->flags & BLOB_FLAG)
   {
     if (!(from->flags & BLOB_FLAG) || from->charset() != to->charset())
       return do_conv_blob;
-    if (from_length != to_length || !compatible_db_low_byte_first)
+    if (from_length != to_length)
     {
       // Correct pointer to point at char pointer
       to_ptr+=   to_length - to->table->s->blob_ptr_size;
@@ -659,6 +718,16 @@ Copy_field::get_copy_func(Field *to,Field *from)
       return do_field_int;
     if (to->result_type() == DECIMAL_RESULT)
       return do_field_decimal;
+    if (from->cmp_type() == TIME_RESULT)
+    {
+      /* If types are not 100 % identical then convert trough get_date() */
+      if (!to->eq_def(from) ||
+          ((to->table->in_use->variables.sql_mode &
+            (MODE_NO_ZERO_IN_DATE | MODE_NO_ZERO_DATE)) &&
+             mysql_type_to_time_type(to->type()) != MYSQL_TIMESTAMP_TIME))
+        return do_field_temporal;
+      /* Do binary copy */
+    }
     // Check if identical fields
     if (from->result_type() == STRING_RESULT)
     {
@@ -671,16 +740,7 @@ Copy_field::get_copy_func(Field *to,Field *from)
           to->type() == MYSQL_TYPE_VARCHAR && !to->has_charset())
         return do_field_varbinary_pre50;
 
-      /*
-        If we are copying date or datetime's we have to check the dates
-        if we don't allow 'all' dates.
-      */
-      if (to->real_type() != from->real_type() ||
-          !compatible_db_low_byte_first ||
-          ((to->table->in_use->variables.sql_mode &
-            (MODE_NO_ZERO_IN_DATE | MODE_NO_ZERO_DATE | MODE_INVALID_DATES)) &&
-           (to->type() == MYSQL_TYPE_DATE ||
-            to->type() == MYSQL_TYPE_DATETIME)))
+      if (to->real_type() != from->real_type())
       {
 	if (from->real_type() == MYSQL_TYPE_ENUM ||
 	    from->real_type() == MYSQL_TYPE_SET)
@@ -706,12 +766,11 @@ Copy_field::get_copy_func(Field *to,Field *from)
         if (((Field_varstring*) to)->length_bytes !=
             ((Field_varstring*) from)->length_bytes)
           return do_field_string;
-        else
-          return (((Field_varstring*) to)->length_bytes == 1 ?
-                  (from->charset()->mbmaxlen == 1 ? do_varstring1 :
-                                                    do_varstring1_mb) :
-                  (from->charset()->mbmaxlen == 1 ? do_varstring2 :
-                                                    do_varstring2_mb));
+        return (((Field_varstring*) to)->length_bytes == 1 ?
+                (from->charset()->mbmaxlen == 1 ? do_varstring1 :
+                 do_varstring1_mb) :
+                (from->charset()->mbmaxlen == 1 ? do_varstring2 :
+                 do_varstring2_mb));
       }
       else if (to_length < from_length)
 	return (from->charset()->mbmaxlen == 1 ?
@@ -724,8 +783,7 @@ Copy_field::get_copy_func(Field *to,Field *from)
       }
     }
     else if (to->real_type() != from->real_type() ||
-	     to_length != from_length ||
-             !compatible_db_low_byte_first)
+	     to_length != from_length)
     {
       if (to->real_type() == MYSQL_TYPE_DECIMAL ||
 	  to->result_type() == STRING_RESULT)
@@ -736,7 +794,7 @@ Copy_field::get_copy_func(Field *to,Field *from)
     }
     else
     {
-      if (!to->eq_def(from) || !compatible_db_low_byte_first)
+      if (!to->eq_def(from))
       {
 	if (to->real_type() == MYSQL_TYPE_DECIMAL)
 	  return do_field_string;
@@ -769,14 +827,13 @@ int field_conv(Field *to,Field *from)
   {
     if (to->pack_length() == from->pack_length() &&
         !(to->flags & UNSIGNED_FLAG && !(from->flags & UNSIGNED_FLAG)) &&
+        to->decimals() == from->decimals() &&
 	to->real_type() != MYSQL_TYPE_ENUM &&
 	to->real_type() != MYSQL_TYPE_SET &&
         to->real_type() != MYSQL_TYPE_BIT &&
         (to->real_type() != MYSQL_TYPE_NEWDECIMAL ||
-         ((to->field_length == from->field_length &&
-           (((Field_num*)to)->dec == ((Field_num*)from)->dec)))) &&
+         to->field_length == from->field_length) &&
         from->charset() == to->charset() &&
-	to->table->s->db_low_byte_first == from->table->s->db_low_byte_first &&
         (!(to->table->in_use->variables.sql_mode &
            (MODE_NO_ZERO_IN_DATE | MODE_NO_ZERO_DATE | MODE_INVALID_DATES)) ||
          (to->type() != MYSQL_TYPE_DATE &&
@@ -817,7 +874,22 @@ int field_conv(Field *to,Field *from)
     ((Field_enum *)(to))->store_type(0);
     return 0;
   }
-  else if ((from->result_type() == STRING_RESULT &&
+  if (from->result_type() == REAL_RESULT)
+    return to->store(from->val_real());
+  if (from->result_type() == DECIMAL_RESULT)
+  {
+    my_decimal buff;
+    return to->store_decimal(from->val_decimal(&buff));
+  }
+  if (from->cmp_type() == TIME_RESULT)
+  {
+    MYSQL_TIME ltime;
+    if (from->get_date(&ltime, TIME_FUZZY_DATE))
+      return to->reset();
+    else
+      return to->store_time_dec(&ltime, from->decimals());
+  }
+  if ((from->result_type() == STRING_RESULT &&
             (to->result_type() == STRING_RESULT ||
              (from->real_type() != MYSQL_TYPE_ENUM &&
               from->real_type() != MYSQL_TYPE_SET))) ||
@@ -834,13 +906,5 @@ int field_conv(Field *to,Field *from)
     */
     return to->store(result.c_ptr_quick(),result.length(),from->charset());
   }
-  else if (from->result_type() == REAL_RESULT)
-    return to->store(from->val_real());
-  else if (from->result_type() == DECIMAL_RESULT)
-  {
-    my_decimal buff;
-    return to->store_decimal(from->val_decimal(&buff));
-  }
-  else
-    return to->store(from->val_int(), test(from->flags & UNSIGNED_FLAG));
+  return to->store(from->val_int(), test(from->flags & UNSIGNED_FLAG));
 }
diff --git a/sql/filesort.cc b/sql/filesort.cc
index d2166ad701c..c12337ee9be 100644
--- a/sql/filesort.cc
+++ b/sql/filesort.cc
@@ -52,10 +52,6 @@ static int write_keys(SORTPARAM *param,uchar * *sort_keys,
 		      uint count, IO_CACHE *buffer_file, IO_CACHE *tempfile);
 static void make_sortkey(SORTPARAM *param,uchar *to, uchar *ref_pos);
 static void register_used_fields(SORTPARAM *param);
-static int merge_index(SORTPARAM *param,uchar *sort_buffer,
-		       BUFFPEK *buffpek,
-		       uint maxbuffer,IO_CACHE *tempfile,
-		       IO_CACHE *outfile);
 static bool save_index(SORTPARAM *param,uchar **sort_keys, uint count, 
                        FILESORT_INFO *table_sort);
 static uint suffix_length(ulong string_length);
@@ -64,7 +60,7 @@ static uint sortlength(THD *thd, SORT_FIELD *sortorder, uint s_length,
 static SORT_ADDON_FIELD *get_addon_fields(THD *thd, Field **ptabfield,
                                           uint sortlength, uint *plength);
 static void unpack_addon_fields(struct st_sort_addon_field *addon_field,
-                                uchar *buff);
+                                uchar *buff, uchar *buff_end);
 /**
   Sort a table.
   Creates a set of pointers that can be used to read the rows
@@ -145,8 +141,6 @@ ha_rows filesort(THD *thd, TABLE *table, SORT_FIELD *sortorder, uint s_length,
   bzero((char*) &param,sizeof(param));
   param.sort_length= sortlength(thd, sortorder, s_length, &multi_byte_charset);
   param.ref_length= table->file->ref_length;
-  param.addon_field= 0;
-  param.addon_length= 0;
   if (!(table->file->ha_table_flags() & HA_FAST_KEY_READ) &&
       !table->fulltext_searched && !sort_positions)
   {
@@ -237,7 +231,7 @@ ha_rows filesort(THD *thd, TABLE *table, SORT_FIELD *sortorder, uint s_length,
     goto err;
   }
   if (open_cached_file(&buffpek_pointers,mysql_tmpdir,TEMP_PREFIX,
-		       DISK_BUFFER_SIZE, MYF(MY_WME)))
+		       DISK_BUFFER_SIZE, MYF(ME_ERROR | MY_WME)))
     goto err;
 
   param.keys--;  			/* TODO: check why we do this */
@@ -274,7 +268,7 @@ ha_rows filesort(THD *thd, TABLE *table, SORT_FIELD *sortorder, uint s_length,
 	/* Open cached file if it isn't open */
     if (! my_b_inited(outfile) &&
 	open_cached_file(outfile,mysql_tmpdir,TEMP_PREFIX,READ_RECORD_BUFFER,
-			  MYF(MY_WME)))
+			  MYF(ME_ERROR | MY_WME)))
       goto err;
     if (reinit_io_cache(outfile,WRITE_CACHE,0L,0,0))
       goto err;
@@ -326,8 +320,7 @@ ha_rows filesort(THD *thd, TABLE *table, SORT_FIELD *sortorder, uint s_length,
     }
   }
   if (error)
-    my_message(ER_FILSORT_ABORT, ER(ER_FILSORT_ABORT),
-               MYF(ME_ERROR+ME_WAITTANG));
+    my_message(ER_FILSORT_ABORT, ER(ER_FILSORT_ABORT), MYF(0));
   else
     statistic_add(thd->status_var.filesort_rows,
 		  (ulong) records, &LOCK_status);
@@ -515,7 +508,7 @@ static ha_rows find_all_keys(SORTPARAM *param, SQL_SELECT *select,
   my_off_t record;
   TABLE *sort_form;
   THD *thd= current_thd;
-  volatile THD::killed_state *killed= &thd->killed;
+  volatile killed_state *killed= &thd->killed;
   handler *file;
   MY_BITMAP *save_read_set, *save_write_set, *save_vcol_set;
   DBUG_ENTER("find_all_keys");
@@ -545,11 +538,6 @@ static ha_rows find_all_keys(SORTPARAM *param, SQL_SELECT *select,
 		    current_thd->variables.read_buff_size);
   }
 
-  if (quick_select)
-  {
-    if (select->quick->reset())
-      DBUG_RETURN(HA_POS_ERROR);
-  }
 
   /* Remember original bitmaps */
   save_read_set=  sort_form->read_set;
@@ -560,12 +548,21 @@ static ha_rows find_all_keys(SORTPARAM *param, SQL_SELECT *select,
   /* Temporary set for register_used_fields and register_field_in_read_map */
   sort_form->read_set= &sort_form->tmp_set;
   register_used_fields(param);
-  if (select && select->cond)
-    select->cond->walk(&Item::register_field_in_read_map, 1,
-                       (uchar*) sort_form);
+  Item *sort_cond= !select ?  
+                     0 : !select->pre_idx_push_select_cond ? 
+                           select->cond : select->pre_idx_push_select_cond;
+  if (sort_cond)
+    sort_cond->walk(&Item::register_field_in_read_map, 1, (uchar*) sort_form);
   sort_form->column_bitmaps_set(&sort_form->tmp_set, &sort_form->tmp_set, 
                                 &sort_form->tmp_set);
 
+
+  if (quick_select)
+  {
+    if (select->quick->reset())
+      DBUG_RETURN(HA_POS_ERROR);
+  }
+
   for (;;)
   {
     if (quick_select)
@@ -615,10 +612,40 @@ static ha_rows find_all_keys(SORTPARAM *param, SQL_SELECT *select,
       }
       DBUG_RETURN(HA_POS_ERROR);		/* purecov: inspected */
     }
+
+    bool write_record= false;
     if (error == 0)
+    {
       param->examined_rows++;
-   
-    if (error == 0 && (!select || select->skip_record(thd) > 0))
+      if (select && select->cond)
+      {
+        /*
+          If the condition 'select->cond' contains a subquery, restore the
+          original read/write sets of the table 'sort_form' because when
+          SQL_SELECT::skip_record evaluates this condition. it may include a
+          correlated subquery predicate, such that some field in the subquery
+          refers to 'sort_form'.
+
+          PSergey-todo: discuss the above with Timour.
+        */
+        MY_BITMAP *tmp_read_set= sort_form->read_set;
+        MY_BITMAP *tmp_write_set= sort_form->write_set;
+        MY_BITMAP *tmp_vcol_set= sort_form->vcol_set;
+
+        if (select->cond->with_subselect)
+          sort_form->column_bitmaps_set(save_read_set, save_write_set,
+                                        save_vcol_set);
+        write_record= (select->skip_record(thd) > 0);
+        if (select->cond->with_subselect)
+          sort_form->column_bitmaps_set(tmp_read_set,
+                                        tmp_write_set,
+                                        tmp_vcol_set);
+      }
+      else
+        write_record= true;
+    }
+
+    if (write_record)
     {
       if (idx == param->keys)
       {
@@ -631,7 +658,7 @@ static ha_rows find_all_keys(SORTPARAM *param, SQL_SELECT *select,
     }
     else
       file->unlock_row();
-      
+
     /* It does not make sense to read more keys in case of a fatal error */
     if (thd->is_error())
       break;
@@ -853,25 +880,28 @@ static void make_sortkey(register SORTPARAM *param,
         break;
       }
       case INT_RESULT:
+      case TIME_RESULT:
 	{
-          longlong value= item->val_int_result();
+          longlong UNINIT_VAR(value);
+          if (sort_field->result_type == INT_RESULT)
+            value= item->val_int_result();
+          else
+          {
+            MYSQL_TIME buf;
+            if (item->get_date_result(&buf, TIME_FUZZY_DATE | TIME_INVALID_DATES))
+              DBUG_ASSERT(maybe_null && item->null_value);
+            else
+              value= pack_time(&buf);
+          }
           if (maybe_null)
           {
-	    *to++=1;				/* purecov: inspected */
             if (item->null_value)
             {
-              if (maybe_null)
-                bzero((char*) to-1,sort_field->length+1);
-              else
-              {
-                DBUG_PRINT("warning",
-                           ("Got null on something that shouldn't be null"));
-                bzero((char*) to,sort_field->length);
-              }
+              bzero((char*) to++, sort_field->length+1);
               break;
             }
+	    *to++=1;				/* purecov: inspected */
           }
-#if SIZEOF_LONG_LONG > 4
 	  to[7]= (uchar) value;
 	  to[6]= (uchar) (value >> 8);
 	  to[5]= (uchar) (value >> 16);
@@ -883,15 +913,6 @@ static void make_sortkey(register SORTPARAM *param,
             to[0]= (uchar) (value >> 56);
           else
             to[0]= (uchar) (value >> 56) ^ 128;	/* Reverse signbit */
-#else
-	  to[3]= (uchar) value;
-	  to[2]= (uchar) (value >> 8);
-	  to[1]= (uchar) (value >> 16);
-          if (item->unsigned_flag)                    /* Fix sign */
-            to[0]= (uchar) (value >> 24);
-          else
-            to[0]= (uchar) (value >> 24) ^ 128;	/* Reverse signbit */
-#endif
 	  break;
 	}
       case DECIMAL_RESULT:
@@ -901,8 +922,7 @@ static void make_sortkey(register SORTPARAM *param,
           {
             if (item->null_value)
             { 
-              bzero((char*)to, sort_field->length+1);
-              to++;
+              bzero((char*) to++, sort_field->length+1);
               break;
             }
             *to++=1;
@@ -1163,7 +1183,9 @@ uint read_to_buffer(IO_CACHE *fromfile, BUFFPEK *buffpek,
 void reuse_freed_buff(QUEUE *queue, BUFFPEK *reuse, uint key_length)
 {
   uchar *reuse_end= reuse->base + reuse->max_keys * key_length;
-  for (uint i= 0; i < queue->elements; ++i)
+  for (uint i= queue_first_element(queue);
+       i <= queue_last_element(queue);
+       i++)
   {
     BUFFPEK *bp= (BUFFPEK *) queue_element(queue, i);
     if (bp->base + bp->max_keys * key_length == reuse->base)
@@ -1216,8 +1238,11 @@ int merge_buffers(SORTPARAM *param, IO_CACHE *from_file,
   QUEUE queue;
   qsort2_cmp cmp;
   void *first_cmp_arg;
-  volatile THD::killed_state *killed= &current_thd->killed;
-  THD::killed_state not_killable;
+  element_count dupl_count= 0;
+  uchar *src;
+  killed_state not_killable;
+  uchar *unique_buff= param->unique_buff;
+  volatile killed_state *killed= &current_thd->killed;
   DBUG_ENTER("merge_buffers");
 
   status_var_increment(current_thd->status_var.filesort_merge_passes);
@@ -1225,14 +1250,20 @@ int merge_buffers(SORTPARAM *param, IO_CACHE *from_file,
   if (param->not_killable)
   {
     killed= &not_killable;
-    not_killable= THD::NOT_KILLED;
+    not_killable= NOT_KILLED;
   }
 
   error=0;
   rec_length= param->rec_length;
   res_length= param->res_length;
   sort_length= param->sort_length;
-  offset= rec_length-res_length;
+  uint dupl_count_ofs= rec_length-sizeof(element_count);
+  uint min_dupl_count= param->min_dupl_count;
+  bool check_dupl_count= flag && min_dupl_count;
+  offset= (rec_length-
+           (flag && min_dupl_count ? sizeof(dupl_count) : 0)-res_length);
+  uint wr_len= flag ? res_length : rec_length;
+  uint wr_offset= flag ? offset : 0;
   maxcount= (ulong) (param->keys/((uint) (Tb-Fb) +1));
   to_start_filepos= my_b_tell(to_file);
   strpos= sort_buffer;
@@ -1241,7 +1272,7 @@ int merge_buffers(SORTPARAM *param, IO_CACHE *from_file,
   /* The following will fire if there is not enough space in sort_buffer */
   DBUG_ASSERT(maxcount!=0);
   
-  if (param->unique_buff)
+  if (unique_buff)
   {
     cmp= param->compare;
     first_cmp_arg= (void *) &param->cmp_context;
@@ -1252,7 +1283,7 @@ int merge_buffers(SORTPARAM *param, IO_CACHE *from_file,
     first_cmp_arg= (void*) &sort_length;
   }
   if (init_queue(&queue, (uint) (Tb-Fb)+1, offsetof(BUFFPEK,key), 0,
-                 (queue_compare) cmp, first_cmp_arg))
+                 (queue_compare) cmp, first_cmp_arg, 0, 0))
     DBUG_RETURN(1);                                /* purecov: inspected */
   for (buffpek= Fb ; buffpek <= Tb ; buffpek++)
   {
@@ -1266,30 +1297,31 @@ int merge_buffers(SORTPARAM *param, IO_CACHE *from_file,
     queue_insert(&queue, (uchar*) buffpek);
   }
 
-  if (param->unique_buff)
+  if (unique_buff)
   {
     /* 
        Called by Unique::get()
-       Copy the first argument to param->unique_buff for unique removal.
+       Copy the first argument to unique_buff for unique removal.
        Store it also in 'to_file'.
-
-       This is safe as we know that there is always more than one element
-       in each block to merge (This is guaranteed by the Unique:: algorithm
     */
     buffpek= (BUFFPEK*) queue_top(&queue);
-    memcpy(param->unique_buff, buffpek->key, rec_length);
-    if (my_b_write(to_file, (uchar*) buffpek->key, rec_length))
-    {
-      error=1; goto err;                        /* purecov: inspected */
-    }
+    memcpy(unique_buff, buffpek->key, rec_length);
+    if (min_dupl_count)
+      memcpy(&dupl_count, unique_buff+dupl_count_ofs, 
+             sizeof(dupl_count));
     buffpek->key+= rec_length;
-    buffpek->mem_count--;
-    if (!--max_rows)
+    if (! --buffpek->mem_count)
     {
-      error= 0;                                       /* purecov: inspected */
-      goto end;                                       /* purecov: inspected */
+      if (!(error= (int) read_to_buffer(from_file, buffpek,
+                                        rec_length)))
+      {
+        VOID(queue_remove(&queue,0));
+        reuse_freed_buff(&queue, buffpek, rec_length);
+      }
+      else if (error == -1)
+        goto err;                        /* purecov: inspected */ 
     }
-    queue_replaced(&queue);                        // Top element has been used
+    queue_replace_top(&queue);            // Top element has been used
   }
   else
     cmp= 0;                                        // Not unique
@@ -1303,27 +1335,50 @@ int merge_buffers(SORTPARAM *param, IO_CACHE *from_file,
     for (;;)
     {
       buffpek= (BUFFPEK*) queue_top(&queue);
+      src= buffpek->key;
       if (cmp)                                        // Remove duplicates
       {
-        if (!(*cmp)(first_cmp_arg, &(param->unique_buff),
+        if (!(*cmp)(first_cmp_arg, &unique_buff,
                     (uchar**) &buffpek->key))
-              goto skip_duplicate;
-            memcpy(param->unique_buff, (uchar*) buffpek->key, rec_length);
-      }
-      if (flag == 0)
-      {
-        if (my_b_write(to_file,(uchar*) buffpek->key, rec_length))
-        {
-          error=1; goto err;                        /* purecov: inspected */
+	{
+          if (min_dupl_count)
+	  {
+            element_count cnt;
+            memcpy(&cnt, (uchar *) buffpek->key+dupl_count_ofs, sizeof(cnt));
+            dupl_count+= cnt;
+          }
+          goto skip_duplicate;
+        }
+        if (min_dupl_count)
+	{
+          memcpy(unique_buff+dupl_count_ofs, &dupl_count,
+                 sizeof(dupl_count));
         }
+	src= unique_buff;
       }
-      else
+        
+      /* 
+        Do not write into the output file if this is the final merge called
+        for a Unique object used for intersection and dupl_count is less
+        than min_dupl_count.
+        If the Unique object is used to intersect N sets of unique elements
+        then for any element:
+        dupl_count >= N <=> the element is occurred in each of these N sets.
+      */          
+      if (!check_dupl_count || dupl_count >= min_dupl_count)
       {
-        if (my_b_write(to_file, (uchar*) buffpek->key+offset, res_length))
+        if (my_b_write(to_file, src+wr_offset, wr_len))
         {
           error=1; goto err;                        /* purecov: inspected */
         }
       }
+      if (cmp)
+      {   
+        memcpy(unique_buff, (uchar*) buffpek->key, rec_length);
+        if (min_dupl_count)
+          memcpy(&dupl_count, unique_buff+dupl_count_ofs, 
+                 sizeof(dupl_count));
+      }
       if (!--max_rows)
       {
         error= 0;                               /* purecov: inspected */
@@ -1334,17 +1389,17 @@ int merge_buffers(SORTPARAM *param, IO_CACHE *from_file,
       buffpek->key+= rec_length;
       if (! --buffpek->mem_count)
       {
-        if (!(error= (int) read_to_buffer(from_file,buffpek,
+        if (!(error= (int) read_to_buffer(from_file, buffpek,
                                           rec_length)))
         {
-          VOID(queue_remove(&queue,0));
+          VOID(queue_remove_top(&queue));
           reuse_freed_buff(&queue, buffpek, rec_length);
           break;                        /* One buffer have been removed */
         }
         else if (error == -1)
           goto err;                        /* purecov: inspected */
       }
-      queue_replaced(&queue);              /* Top element has been replaced */
+      queue_replace_top(&queue);   	/* Top element has been replaced */
     }
   }
   buffpek= (BUFFPEK*) queue_top(&queue);
@@ -1357,11 +1412,35 @@ int merge_buffers(SORTPARAM *param, IO_CACHE *from_file,
   */
   if (cmp)
   {
-    if (!(*cmp)(first_cmp_arg, &(param->unique_buff), (uchar**) &buffpek->key))
+    if (!(*cmp)(first_cmp_arg, &unique_buff, (uchar**) &buffpek->key))
     {
-      buffpek->key+= rec_length;         // Remove duplicate
+      if (min_dupl_count)
+      {
+        element_count cnt;
+        memcpy(&cnt, (uchar *) buffpek->key+dupl_count_ofs, sizeof(cnt));
+        dupl_count+= cnt;
+      }
+      buffpek->key+= rec_length;         
       --buffpek->mem_count;
     }
+
+    if (min_dupl_count)
+      memcpy(unique_buff+dupl_count_ofs, &dupl_count,
+             sizeof(dupl_count));
+
+    if (!check_dupl_count || dupl_count >= min_dupl_count)
+    {
+      src= unique_buff;
+      if (my_b_write(to_file, src+wr_offset, wr_len))
+      {
+        error=1; goto err;                        /* purecov: inspected */
+      }
+      if (!--max_rows)
+      {
+        error= 0;                               
+        goto end;                             
+      }
+    }   
   }
 
   do
@@ -1374,7 +1453,7 @@ int merge_buffers(SORTPARAM *param, IO_CACHE *from_file,
     max_rows-= buffpek->mem_count;
     if (flag == 0)
     {
-      if (my_b_write(to_file,(uchar*) buffpek->key,
+      if (my_b_write(to_file, (uchar*) buffpek->key,
                      (rec_length*buffpek->mem_count)))
       {
         error= 1; goto err;                        /* purecov: inspected */
@@ -1383,19 +1462,25 @@ int merge_buffers(SORTPARAM *param, IO_CACHE *from_file,
     else
     {
       register uchar *end;
-      strpos= buffpek->key+offset;
-      for (end= strpos+buffpek->mem_count*rec_length ;
-           strpos != end ;
-           strpos+= rec_length)
-      {     
-        if (my_b_write(to_file, strpos, res_length))
+      src= buffpek->key+offset;
+      for (end= src+buffpek->mem_count*rec_length ;
+           src != end ;
+           src+= rec_length)
+      {
+        if (check_dupl_count)
+        {
+          memcpy((uchar *) &dupl_count, src+dupl_count_ofs, sizeof(dupl_count)); 
+          if (dupl_count < min_dupl_count)
+	    continue;
+        }
+        if (my_b_write(to_file, src, wr_len))
         {
           error=1; goto err;                        
         }
       }
     }
   }
-  while ((error=(int) read_to_buffer(from_file,buffpek, rec_length))
+  while ((error=(int) read_to_buffer(from_file, buffpek, rec_length))
          != -1 && error != 0);
 
 end:
@@ -1409,7 +1494,7 @@ err:
 
 	/* Do a merge to output-file (save only positions) */
 
-static int merge_index(SORTPARAM *param, uchar *sort_buffer,
+int merge_index(SORTPARAM *param, uchar *sort_buffer,
 		       BUFFPEK *buffpek, uint maxbuffer,
 		       IO_CACHE *tempfile, IO_CACHE *outfile)
 {
@@ -1481,9 +1566,7 @@ sortlength(THD *thd, SORT_FIELD *sortorder, uint s_length,
     }
     else
     {
-      sortorder->result_type= sortorder->item->result_type();
-      if (sortorder->item->result_as_longlong())
-        sortorder->result_type= INT_RESULT;
+      sortorder->result_type= sortorder->item->cmp_type();
       switch (sortorder->result_type) {
       case STRING_RESULT:
 	sortorder->length=sortorder->item->max_length;
@@ -1501,12 +1584,9 @@ sortlength(THD *thd, SORT_FIELD *sortorder, uint s_length,
           sortorder->length+= sortorder->suffix_length;
         }
 	break;
+      case TIME_RESULT:
       case INT_RESULT:
-#if SIZEOF_LONG_LONG > 4
 	sortorder->length=8;			// Size of intern longlong
-#else
-	sortorder->length=4;
-#endif
 	break;
       case DECIMAL_RESULT:
         sortorder->length=
@@ -1581,6 +1661,7 @@ get_addon_fields(THD *thd, Field **ptabfield, uint sortlength, uint *plength)
     Actually we need only the fields referred in the
     result set. And for some of them it makes sense to use 
     the values directly from sorted fields.
+    But beware the case when item->cmp_type() != item->result_type()
   */
   *plength= 0;
 
@@ -1651,7 +1732,8 @@ get_addon_fields(THD *thd, Field **ptabfield, uint sortlength, uint *plength)
 */
 
 static void 
-unpack_addon_fields(struct st_sort_addon_field *addon_field, uchar *buff)
+unpack_addon_fields(struct st_sort_addon_field *addon_field, uchar *buff,
+                    uchar *buff_end)
 {
   Field *field;
   SORT_ADDON_FIELD *addonf= addon_field;
@@ -1664,7 +1746,7 @@ unpack_addon_fields(struct st_sort_addon_field *addon_field, uchar *buff)
       continue;
     }
     field->set_notnull();
-    field->unpack(field->ptr, buff + addonf->offset);
+    field->unpack(field->ptr, buff + addonf->offset, buff_end, 0);
   }
 }
 
@@ -1715,3 +1797,4 @@ void change_double_for_sort(double nr,uchar *to)
     }
   }
 }
+
diff --git a/sql/gcalc_slicescan.cc b/sql/gcalc_slicescan.cc
new file mode 100644
index 00000000000..9e88f0a00ad
--- /dev/null
+++ b/sql/gcalc_slicescan.cc
@@ -0,0 +1,1996 @@
+/* Copyright (c) 2000, 2010 Oracle and/or its affiliates. All rights reserved.
+   Copyright (C) 2011 Monty Program Ab.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
+
+
+#include "mysql_priv.h"
+
+#ifdef HAVE_SPATIAL
+
+#include "gcalc_slicescan.h"
+
+
+#define PH_DATA_OFFSET 8
+#define coord_to_float(d) ((double) d)
+#define coord_eq(a, b) (a == b)
+
+typedef int (*sc_compare_func)(const void*, const void*);
+
+#define LS_LIST_ITEM Gcalc_dyn_list::Item
+#define LS_COMPARE_FUNC_DECL sc_compare_func compare,
+#define LS_COMPARE_FUNC_CALL(list_el1, list_el2) (*compare)(list_el1, list_el2)
+#define LS_NEXT(A) (A)->next
+#define LS_SET_NEXT(A,val) (A)->next= val
+#define LS_P_NEXT(A) &(A)->next
+#define LS_NAME sort_list
+#define LS_SCOPE static
+#define LS_STRUCT_NAME sort_list_stack_struct
+#include "plistsort.c"
+
+
+#define GCALC_COORD_MINUS     0x80000000
+#define FIRST_DIGIT(d) ((d) & 0x7FFFFFFF)
+#define GCALC_SIGN(d)  ((d) & 0x80000000)
+
+static Gcalc_scan_iterator::point *eq_sp(const Gcalc_heap::Info *pi)
+{
+  GCALC_DBUG_ASSERT(pi->type == Gcalc_heap::nt_eq_node);
+  return (Gcalc_scan_iterator::point *) pi->eq_data;
+}
+
+
+static Gcalc_scan_iterator::intersection_info *i_data(const Gcalc_heap::Info *pi)
+{
+  GCALC_DBUG_ASSERT(pi->type == Gcalc_heap::nt_intersection);
+  return (Gcalc_scan_iterator::intersection_info *) pi->intersection_data;
+}
+
+
+#ifndef GCALC_DBUG_OFF
+
+int gcalc_step_counter= 0;
+
+void GCALC_DBUG_CHECK_COUNTER()
+{
+  if (++gcalc_step_counter == 0)
+    GCALC_DBUG_PRINT(("step_counter_0"));
+  else
+    GCALC_DBUG_PRINT(("%d step_counter", gcalc_step_counter));
+}
+
+
+const char *gcalc_ev_name(int ev)
+{
+  switch (ev)
+  {
+    case scev_none:
+      return "n";
+    case scev_thread:
+      return "t";
+    case scev_two_threads:
+      return "tt";
+    case scev_end:
+      return "e";
+    case scev_two_ends:
+      return "ee";
+    case scev_intersection:
+      return "i";
+    case scev_point:
+      return "p";
+    case scev_single_point:
+      return "sp";
+    default:;
+  };
+  GCALC_DBUG_ASSERT(0);
+  return "unk";
+}
+
+
+static int gcalc_pi_str(char *str, const Gcalc_heap::Info *pi, const char *postfix)
+{
+  return sprintf(str, "%s %d %d | %s %d %d%s",
+                     GCALC_SIGN(pi->ix[0]) ? "-":"", FIRST_DIGIT(pi->ix[0]),pi->ix[1],
+                     GCALC_SIGN(pi->iy[0]) ? "-":"", FIRST_DIGIT(pi->iy[0]),pi->iy[1],
+                     postfix);
+
+}
+
+
+static void GCALC_DBUG_PRINT_PI(const Gcalc_heap::Info *pi)
+{
+  char buf[128];
+  int n_buf;
+  if (pi->type == Gcalc_heap::nt_intersection)
+  {
+    const Gcalc_scan_iterator::intersection_info *ic= i_data(pi);
+
+    GCALC_DBUG_PRINT(("intersection point %d %d",
+                      ic->edge_a->thread, ic->edge_b->thread));
+    return;
+  }
+  if (pi->type == Gcalc_heap::nt_eq_node)
+  {
+    const Gcalc_scan_iterator::point *e= eq_sp(pi);
+    GCALC_DBUG_PRINT(("eq point %d", e->thread));
+    return;
+  }
+  n_buf= gcalc_pi_str(buf, pi, "");
+  buf[n_buf]= 0;
+  GCALC_DBUG_PRINT(("%s", buf));
+}
+
+
+static void GCALC_DBUG_PRINT_SLICE(const char *header,
+                                   const Gcalc_scan_iterator::point *slice)
+{
+  int nbuf;
+  char buf[1024];
+  nbuf= strlen(header);
+  strcpy(buf, header);
+  for (; slice; slice= slice->get_next())
+  {
+    int lnbuf= nbuf;
+    lnbuf+= sprintf(buf + lnbuf, "%d\t", slice->thread);
+    lnbuf+= sprintf(buf + lnbuf, "%s\t", gcalc_ev_name(slice->event));
+
+    lnbuf+= gcalc_pi_str(buf + lnbuf, slice->pi, "\t");
+    if (slice->is_bottom())
+      lnbuf+= sprintf(buf+lnbuf, "bt\t");
+    else
+      lnbuf+= gcalc_pi_str(buf+lnbuf, slice->next_pi, "\t");
+    buf[lnbuf]= 0;
+    GCALC_DBUG_PRINT(("%s", buf));
+  }
+}
+
+
+#else
+#define GCALC_DBUG_CHECK_COUNTER()              do { } while(0)
+#define GCALC_DBUG_PRINT_PI(pi)                 do { } while(0)
+#define GCALC_DBUG_PRINT_SLICE(a, b)            do { } while(0)
+#define GCALC_DBUG_PRINT_INTERSECTIONS(a)       do { } while(0)
+#define GCALC_DBUG_PRINT_STATE(a)               do { } while(0)
+#endif /*GCALC_DBUG_OFF*/
+
+
+Gcalc_dyn_list::Gcalc_dyn_list(size_t blk_size, size_t sizeof_item):
+  m_blk_size(blk_size - ALLOC_ROOT_MIN_BLOCK_SIZE),
+  m_sizeof_item(ALIGN_SIZE(sizeof_item)),
+  m_points_per_blk((m_blk_size - PH_DATA_OFFSET) / m_sizeof_item),
+  m_blk_hook(&m_first_blk),
+  m_free(NULL),
+  m_keep(NULL)
+{}
+
+
+void Gcalc_dyn_list::format_blk(void* block)
+{
+  Item *pi_end, *cur_pi, *first_pi;
+  GCALC_DBUG_ASSERT(m_free == NULL);
+  first_pi= cur_pi= (Item *)(((char *)block) + PH_DATA_OFFSET);
+  pi_end= ptr_add(first_pi, m_points_per_blk - 1);
+  do {
+    cur_pi= cur_pi->next= ptr_add(cur_pi, 1);
+  } while (cur_pi<pi_end);
+  cur_pi->next= m_free;
+  m_free= first_pi;
+}
+
+
+Gcalc_dyn_list::Item *Gcalc_dyn_list::alloc_new_blk()
+{
+  void *new_block= my_malloc(m_blk_size, MYF(MY_WME));
+  if (!new_block)
+    return NULL;
+  *m_blk_hook= new_block;
+  m_blk_hook= (void**)new_block;
+  format_blk(new_block);
+  return new_item();
+}
+
+
+static void free_blk_list(void *list)
+{
+  void *next_blk;
+  while (list)
+  {
+    next_blk= *((void **)list);
+    my_free(list, MYF(0));
+    list= next_blk;
+  }
+}
+
+
+void Gcalc_dyn_list::cleanup()
+{
+  *m_blk_hook= NULL;
+  free_blk_list(m_first_blk);
+  m_first_blk= NULL;
+  m_blk_hook= &m_first_blk;
+  m_free= NULL;
+}
+
+
+Gcalc_dyn_list::~Gcalc_dyn_list()
+{
+  cleanup();
+}
+
+
+void Gcalc_dyn_list::reset()
+{
+  *m_blk_hook= NULL;
+  if (m_first_blk)
+  {
+    free_blk_list(*((void **)m_first_blk));
+    m_blk_hook= (void**)m_first_blk;
+    m_free= NULL;
+    format_blk(m_first_blk);
+  }
+}
+
+
+/* Internal coordinate operations implementations */
+
+void gcalc_set_zero(Gcalc_internal_coord *d, int d_len)
+{
+  do
+  {
+    d[--d_len]= 0;
+  } while (d_len); 
+}
+
+
+int gcalc_is_zero(const Gcalc_internal_coord *d, int d_len)
+{
+  do
+  {
+    if (d[--d_len] != 0)
+      return 0;
+  } while (d_len); 
+  return 1;
+}
+
+
+#ifdef GCALC_CHECK_WITH_FLOAT
+static double *gcalc_coord_extent= NULL;
+
+long double gcalc_get_double(const Gcalc_internal_coord *d, int d_len)
+{
+  int n= 1;
+  long double res= (long double) FIRST_DIGIT(d[0]);
+  do
+  {
+    res*= (long double) GCALC_DIG_BASE;
+    res+= (long double) d[n];
+  } while(++n < d_len);
+
+  n= 0;
+  do
+  {
+    if ((n & 1) && gcalc_coord_extent)
+      res/= *gcalc_coord_extent;
+  } while(++n < d_len);
+
+  if (GCALC_SIGN(d[0]))
+    res*= -1.0;
+  return res;
+}
+#endif /*GCALC_CHECK_WITH_FLOAT*/
+
+
+static void do_add(Gcalc_internal_coord *result, int result_len,
+                   const Gcalc_internal_coord *a,
+                   const Gcalc_internal_coord *b)
+{
+  int n_digit= result_len-1;
+  gcalc_digit_t carry= 0;
+
+  do
+  {
+    if ((result[n_digit]=
+          a[n_digit] + b[n_digit] + carry) >= GCALC_DIG_BASE)
+    {
+      carry= 1;
+      result[n_digit]-= GCALC_DIG_BASE;
+    }
+    else
+      carry= 0;
+  } while (--n_digit);
+
+  result[0]= (a[0] + FIRST_DIGIT(b[0]) + carry);
+
+  GCALC_DBUG_ASSERT(FIRST_DIGIT(result[0]) < GCALC_DIG_BASE);
+}
+
+
+static void do_sub(Gcalc_internal_coord *result, int result_len,
+                   const Gcalc_internal_coord *a,
+                   const Gcalc_internal_coord *b)
+{
+  int n_digit= result_len-1;
+  gcalc_digit_t carry= 0;
+  gcalc_digit_t cur_b, cur_a;
+
+  do
+  {
+    cur_b= b[n_digit] + carry;
+    cur_a= a[n_digit];
+    if (cur_a < cur_b)
+    {
+      carry= 1;
+      result[n_digit]= (GCALC_DIG_BASE - cur_b) + cur_a;
+    }
+    else
+    {
+      carry= 0;
+      result[n_digit]= cur_a - cur_b;
+    }
+  } while (--n_digit);
+
+
+  result[0]= a[0] - FIRST_DIGIT(b[0]) - carry;
+
+  GCALC_DBUG_ASSERT(FIRST_DIGIT(a[0]) >= FIRST_DIGIT(b[0]) + carry);
+  GCALC_DBUG_ASSERT(!gcalc_is_zero(result, result_len));
+}
+/*
+static void do_sub(Gcalc_internal_coord *result, int result_len,
+                   const Gcalc_internal_coord *a,
+                   const Gcalc_internal_coord *b)
+{
+  int n_digit= result_len-1;
+  gcalc_digit_t carry= 0;
+
+  do
+  {
+    if ((result[n_digit]= a[n_digit] - b[n_digit] - carry) < 0)
+    {
+      carry= 1;
+      result[n_digit]+= GCALC_DIG_BASE;
+    }
+    else
+      carry= 0;
+  } while (--n_digit);
+
+
+  result[0]= a[0] - FIRST_DIGIT(b[0]) - carry;
+
+  GCALC_DBUG_ASSERT(FIRST_DIGIT(a[0]) - FIRST_DIGIT(b[0]) - carry >= 0);
+  GCALC_DBUG_ASSERT(!gcalc_is_zero(result, result_len));
+}
+*/
+
+static int do_cmp(const Gcalc_internal_coord *a,
+                  const Gcalc_internal_coord *b, int len)
+{
+  int n_digit= 1;
+
+  if ((FIRST_DIGIT(a[0]) != FIRST_DIGIT(b[0])))
+    return FIRST_DIGIT(a[0]) > FIRST_DIGIT(b[0]) ? 1 : -1;
+
+  do
+  {
+    if ((a[n_digit] != b[n_digit]))
+      return a[n_digit] > b[n_digit] ? 1 : -1;
+  } while (++n_digit < len);
+
+  return 0;
+}
+
+
+#ifdef GCALC_CHECK_WITH_FLOAT
+static int de_weak_check(long double a, long double b, long double ex)
+{
+  long double d= a - b;
+  if (d < ex && d > -ex)
+    return 1;
+
+  d/= fabsl(a) + fabsl(b);
+  if (d < ex && d > -ex)
+    return 1;
+  return 0;
+}
+
+static int de_check(long double a, long double b)
+{
+  return de_weak_check(a, b, (long double) 1e-9);
+}
+#endif /*GCALC_CHECK_WITH_FLOAT*/
+
+
+void gcalc_mul_coord(Gcalc_internal_coord *result, int result_len,
+                     const Gcalc_internal_coord *a, int a_len,
+                     const Gcalc_internal_coord *b, int b_len)
+{
+  GCALC_DBUG_ASSERT(result_len == a_len + b_len);
+  GCALC_DBUG_ASSERT(a_len >= b_len);
+  int n_a, n_b, n_res;
+  gcalc_digit_t carry= 0;
+
+  gcalc_set_zero(result, result_len);
+
+  n_a= a_len - 1;
+  do
+  {
+    gcalc_coord2 cur_a= n_a ? a[n_a] : FIRST_DIGIT(a[0]);
+    n_b= b_len - 1;
+    do
+    {
+      gcalc_coord2 cur_b= n_b ? b[n_b] : FIRST_DIGIT(b[0]);
+      gcalc_coord2 mul= cur_a * cur_b + carry + result[n_a + n_b + 1];
+      result[n_a + n_b + 1]= mul % GCALC_DIG_BASE;
+      carry= (gcalc_digit_t) (mul / (gcalc_coord2) GCALC_DIG_BASE);
+    } while (n_b--);
+    if (carry)
+    {
+      for (n_res= n_a; (result[n_res]+= carry) >= GCALC_DIG_BASE;
+           n_res--)
+      {
+        result[n_res]-= GCALC_DIG_BASE;
+        carry= 1;
+      }
+      carry= 0;
+    }
+  } while (n_a--);
+  if (!gcalc_is_zero(result, result_len))
+    result[0]|= GCALC_SIGN(a[0] ^ b[0]);
+#ifdef GCALC_CHECK_WITH_FLOAT
+  GCALC_DBUG_ASSERT(de_check(gcalc_get_double(a, a_len) *
+                               gcalc_get_double(b, b_len),
+                       gcalc_get_double(result, result_len)));
+#endif /*GCALC_CHECK_WITH_FLOAT*/
+}
+
+
+inline void gcalc_mul_coord1(Gcalc_coord1 result,
+                             const Gcalc_coord1 a, const Gcalc_coord1 b)
+{
+  return gcalc_mul_coord(result, GCALC_COORD_BASE2,
+                         a, GCALC_COORD_BASE, b, GCALC_COORD_BASE);
+}
+
+
+void gcalc_add_coord(Gcalc_internal_coord *result, int result_len,
+                     const Gcalc_internal_coord *a,
+                     const Gcalc_internal_coord *b)
+{
+  if (GCALC_SIGN(a[0]) == GCALC_SIGN(b[0]))
+    do_add(result, result_len, a, b);
+  else
+  {
+    int cmp_res= do_cmp(a, b, result_len);
+    if (cmp_res == 0)
+      gcalc_set_zero(result, result_len);
+    else if (cmp_res > 0)
+      do_sub(result, result_len, a, b);
+    else
+      do_sub(result, result_len, b, a);
+  }
+#ifdef GCALC_CHECK_WITH_FLOAT
+  GCALC_DBUG_ASSERT(de_check(gcalc_get_double(a, result_len) +
+                               gcalc_get_double(b, result_len),
+                       gcalc_get_double(result, result_len)));
+#endif /*GCALC_CHECK_WITH_FLOAT*/
+}
+
+
+void gcalc_sub_coord(Gcalc_internal_coord *result, int result_len,
+                     const Gcalc_internal_coord *a,
+                     const Gcalc_internal_coord *b)
+{
+  if (GCALC_SIGN(a[0] ^ b[0]))
+    do_add(result, result_len, a, b);
+  else
+  {
+    int cmp_res= do_cmp(a, b, result_len);
+    if (cmp_res == 0)
+      gcalc_set_zero(result, result_len);
+    else if (cmp_res > 0)
+      do_sub(result, result_len, a, b);
+    else
+    {
+      do_sub(result, result_len, b, a);
+      result[0]^= GCALC_COORD_MINUS;
+    }
+  }
+#ifdef GCALC_CHECK_WITH_FLOAT
+  GCALC_DBUG_ASSERT(de_check(gcalc_get_double(a, result_len) -
+                               gcalc_get_double(b, result_len),
+                       gcalc_get_double(result, result_len)));
+#endif /*GCALC_CHECK_WITH_FLOAT*/
+}
+
+
+inline void gcalc_sub_coord1(Gcalc_coord1 result,
+                             const Gcalc_coord1 a, const Gcalc_coord1 b)
+{
+  return gcalc_sub_coord(result, GCALC_COORD_BASE, a, b);
+}
+
+
+int gcalc_cmp_coord(const Gcalc_internal_coord *a,
+                    const Gcalc_internal_coord *b, int len)
+{
+  int n_digit= 0;
+  int result= 0;
+
+  do
+  {
+    if (a[n_digit] == b[n_digit])
+    {
+      n_digit++;
+      continue;
+    }
+    if (a[n_digit] > b[n_digit])
+      result= GCALC_SIGN(a[0]) ? -1 : 1;
+    else
+      result= GCALC_SIGN(b[0]) ? 1 : -1;
+    break;
+
+  } while (n_digit < len);
+
+#ifdef GCALC_CHECK_WITH_FLOAT
+  if (result == 0)
+    GCALC_DBUG_ASSERT(de_check(gcalc_get_double(a, len),
+                                 gcalc_get_double(b, len)));
+  else if (result == 1)
+    GCALC_DBUG_ASSERT(de_check(gcalc_get_double(a, len),
+                                 gcalc_get_double(b, len)) ||
+                gcalc_get_double(a, len) > gcalc_get_double(b, len));
+  else
+    GCALC_DBUG_ASSERT(de_check(gcalc_get_double(a, len),
+                                 gcalc_get_double(b, len)) ||
+                gcalc_get_double(a, len) < gcalc_get_double(b, len));
+#endif /*GCALC_CHECK_WITH_FLOAT*/
+  return result;
+}
+
+
+#define gcalc_cmp_coord1(a, b) gcalc_cmp_coord(a, b, GCALC_COORD_BASE)
+
+int gcalc_set_double(Gcalc_internal_coord *c, double d, double ext)
+{
+  int sign;
+  double ds= d * ext;
+  if ((sign= ds < 0))
+    ds= -ds;
+  c[0]= (gcalc_digit_t) (ds / (double) GCALC_DIG_BASE);
+  c[1]= (gcalc_digit_t) (ds - ((double) c[0]) * (double) GCALC_DIG_BASE);
+  if (c[1] >= GCALC_DIG_BASE)
+  {
+    c[1]= 0;
+    c[0]++;
+  }
+  if (sign)
+    c[0]|= GCALC_COORD_MINUS;
+#ifdef GCALC_CHECK_WITH_FLOAT
+  GCALC_DBUG_ASSERT(de_check(d, gcalc_get_double(c, 2)));
+#endif /*GCALC_CHECK_WITH_FLOAT*/
+  return 0;
+} 
+
+
+typedef gcalc_digit_t Gcalc_coord4[GCALC_COORD_BASE*4];
+typedef gcalc_digit_t Gcalc_coord5[GCALC_COORD_BASE*5];
+
+
+void Gcalc_scan_iterator::intersection_info::do_calc_t()
+{
+  Gcalc_coord1 a2_a1x, a2_a1y;
+  Gcalc_coord2 x1y2, x2y1;
+
+  gcalc_sub_coord1(a2_a1x, edge_b->pi->ix, edge_a->pi->ix);
+  gcalc_sub_coord1(a2_a1y, edge_b->pi->iy, edge_a->pi->iy);
+
+  GCALC_DBUG_ASSERT(!gcalc_is_zero(edge_a->dy, GCALC_COORD_BASE) ||
+                    !gcalc_is_zero(edge_b->dy, GCALC_COORD_BASE));
+
+  gcalc_mul_coord1(x1y2, edge_a->dx, edge_b->dy);
+  gcalc_mul_coord1(x2y1, edge_a->dy, edge_b->dx);
+  gcalc_sub_coord(t_b, GCALC_COORD_BASE2, x1y2, x2y1);
+
+
+  gcalc_mul_coord1(x1y2, a2_a1x, edge_b->dy);
+  gcalc_mul_coord1(x2y1, a2_a1y, edge_b->dx);
+  gcalc_sub_coord(t_a, GCALC_COORD_BASE2, x1y2, x2y1);
+  t_calculated= 1;
+}
+
+
+void Gcalc_scan_iterator::intersection_info::do_calc_y()
+{
+  GCALC_DBUG_ASSERT(t_calculated);
+
+  Gcalc_coord3 a_tb, b_ta;
+
+  gcalc_mul_coord(a_tb, GCALC_COORD_BASE3,
+                  t_b, GCALC_COORD_BASE2, edge_a->pi->iy, GCALC_COORD_BASE);
+  gcalc_mul_coord(b_ta, GCALC_COORD_BASE3,
+                  t_a, GCALC_COORD_BASE2, edge_a->dy, GCALC_COORD_BASE);
+
+  gcalc_add_coord(y_exp, GCALC_COORD_BASE3, a_tb, b_ta);
+  y_calculated= 1;
+}
+
+
+void Gcalc_scan_iterator::intersection_info::do_calc_x()
+{
+  GCALC_DBUG_ASSERT(t_calculated);
+
+  Gcalc_coord3 a_tb, b_ta;
+
+  gcalc_mul_coord(a_tb, GCALC_COORD_BASE3,
+                  t_b, GCALC_COORD_BASE2, edge_a->pi->ix, GCALC_COORD_BASE);
+  gcalc_mul_coord(b_ta, GCALC_COORD_BASE3,
+                  t_a, GCALC_COORD_BASE2, edge_a->dx, GCALC_COORD_BASE);
+
+  gcalc_add_coord(x_exp, GCALC_COORD_BASE3, a_tb, b_ta);
+  x_calculated= 1;
+}
+
+
+static int cmp_node_isc(const Gcalc_heap::Info *node,
+                        const Gcalc_heap::Info *isc)
+{
+  GCALC_DBUG_ASSERT(node->type == Gcalc_heap::nt_shape_node);
+  Gcalc_scan_iterator::intersection_info *inf= i_data(isc);
+  Gcalc_coord3 exp;
+  int result;
+
+  inf->calc_t();
+  inf->calc_y_exp();
+
+  gcalc_mul_coord(exp, GCALC_COORD_BASE3,
+                  inf->t_b, GCALC_COORD_BASE2, node->iy, GCALC_COORD_BASE);
+
+  result= gcalc_cmp_coord(exp, inf->y_exp, GCALC_COORD_BASE3);
+#ifdef GCALC_CHECK_WITH_FLOAT
+  long double int_x, int_y;
+  isc->calc_xy_ld(&int_x, &int_y);
+  if (result < 0)
+  {
+    if (!de_check(int_y, node->y) && node->y > int_y)
+      GCALC_DBUG_PRINT(("floatcheck cmp_nod_iscy %g < %LG", node->y, int_y));
+  }
+  else if (result > 0)
+  {
+    if (!de_check(int_y, node->y) && node->y < int_y)
+      GCALC_DBUG_PRINT(("floatcheck cmp_nod_iscy %g > %LG", node->y, int_y));
+  }
+  else
+  {
+    if (!de_check(int_y, node->y))
+      GCALC_DBUG_PRINT(("floatcheck cmp_nod_iscy %g == %LG", node->y, int_y));
+  }
+#endif /*GCALC_CHECK_WITH_FLOAT*/
+  if (result)
+    goto exit;
+
+
+  inf->calc_x_exp();
+  gcalc_mul_coord(exp, GCALC_COORD_BASE3,
+                  inf->t_b, GCALC_COORD_BASE2, node->ix, GCALC_COORD_BASE);
+
+  result= gcalc_cmp_coord(exp, inf->x_exp, GCALC_COORD_BASE3);
+#ifdef GCALC_CHECK_WITH_FLOAT
+  if (result < 0)
+  {
+    if (!de_check(int_x, node->x) && node->x > int_x)
+      GCALC_DBUG_PRINT(("floatcheck cmp_nod_iscx failed %g < %LG",
+                         node->x, int_x));
+  }
+  else if (result > 0)
+  {
+    if (!de_check(int_x, node->x) && node->x < int_x)
+      GCALC_DBUG_PRINT(("floatcheck cmp_nod_iscx failed %g > %LG",
+                        node->x, int_x));
+  }
+  else
+  {
+    if (!de_check(int_x, node->x))
+      GCALC_DBUG_PRINT(("floatcheck cmp_nod_iscx failed %g == %LG",
+                        node->x, int_x));
+  }
+#endif /*GCALC_CHECK_WITH_FLOAT*/
+exit:
+  return result;
+}
+
+
+static int cmp_intersections(const Gcalc_heap::Info *i1,
+                             const Gcalc_heap::Info *i2)
+{
+  Gcalc_scan_iterator::intersection_info *inf1= i_data(i1);
+  Gcalc_scan_iterator::intersection_info *inf2= i_data(i2);
+  Gcalc_coord5 exp_a, exp_b;
+  int result;
+
+  inf1->calc_t();
+  inf2->calc_t();
+
+  inf1->calc_y_exp();
+  inf2->calc_y_exp();
+
+  gcalc_mul_coord(exp_a, GCALC_COORD_BASE5,
+                  inf1->y_exp, GCALC_COORD_BASE3, inf2->t_b, GCALC_COORD_BASE2);
+  gcalc_mul_coord(exp_b, GCALC_COORD_BASE5,
+                  inf2->y_exp, GCALC_COORD_BASE3, inf1->t_b, GCALC_COORD_BASE2);
+
+  result= gcalc_cmp_coord(exp_a, exp_b, GCALC_COORD_BASE5);
+#ifdef GCALC_CHECK_WITH_FLOAT
+  long double x1, y1, x2, y2;
+  i1->calc_xy_ld(&x1, &y1);
+  i2->calc_xy_ld(&x2, &y2);
+
+  if (result < 0)
+  {
+    if (!de_check(y1, y2) && y2 > y1)
+      GCALC_DBUG_PRINT(("floatcheck cmp_intersections_y failed %LG < %LG",
+                        y2, y1));
+  }
+  else if (result > 0)
+  {
+    if (!de_check(y1, y2) && y2 < y1)
+      GCALC_DBUG_PRINT(("floatcheck cmp_intersections_y failed %LG > %LG",
+                        y2, y1));
+  }
+  else
+  {
+    if (!de_check(y1, y2))
+      GCALC_DBUG_PRINT(("floatcheck cmp_intersections_y failed %LG == %LG",
+                        y2, y1));
+  }
+#endif /*GCALC_CHECK_WITH_FLOAT*/
+
+  if (result != 0)
+    return result;
+
+
+  inf1->calc_x_exp();
+  inf2->calc_x_exp();
+  gcalc_mul_coord(exp_a, GCALC_COORD_BASE5,
+                  inf1->x_exp, GCALC_COORD_BASE3, inf2->t_b, GCALC_COORD_BASE2);
+  gcalc_mul_coord(exp_b, GCALC_COORD_BASE5,
+                  inf2->x_exp, GCALC_COORD_BASE3, inf1->t_b, GCALC_COORD_BASE2);
+
+  result= gcalc_cmp_coord(exp_a, exp_b, GCALC_COORD_BASE5);
+#ifdef GCALC_CHECK_WITH_FLOAT
+  if (result < 0)
+  {
+    if (!de_check(x1, x2) && x2 > x1)
+      GCALC_DBUG_PRINT(("floatcheck cmp_intersectionsx failed %LG < %LG",
+                        x2, x1));
+  }
+  else if (result > 0)
+  {
+    if (!de_check(x1, x2) && x2 < x1)
+      GCALC_DBUG_PRINT(("floatcheck cmp_intersectionsx failed %LG > %LG",
+                        x2, x1));
+  }
+  else
+  {
+    if (!de_check(x1, x2))
+      GCALC_DBUG_PRINT(("floatcheck cmp_intersectionsx failed %LG == %LG",
+                        x2, x1));
+  }
+#endif /*GCALC_CHECK_WITH_FLOAT*/
+  return result;
+}
+/* Internal coordinates implementation end */
+
+
+#define GCALC_SCALE_1 1e18
+
+static double find_scale(double extent)
+{
+  double scale= 1e-2;
+  while (scale < extent)
+    scale*= (double ) 10;
+  return GCALC_SCALE_1 / scale / 10;
+}
+
+
+void Gcalc_heap::set_extent(double xmin, double xmax, double ymin, double ymax)
+{
+  xmin= fabs(xmin);
+  xmax= fabs(xmax);
+  ymin= fabs(ymin);
+  ymax= fabs(ymax);
+
+  if (xmax < xmin)
+    xmax= xmin;
+  if (ymax < ymin)
+    ymax= ymin;
+
+  coord_extent= xmax > ymax ? xmax : ymax;
+  coord_extent= find_scale(coord_extent);
+#ifdef GCALC_CHECK_WITH_FLOAT
+  gcalc_coord_extent= &coord_extent;
+#endif /*GCALC_CHECK_WITH_FLOAT*/
+}
+
+
+void Gcalc_heap::free_point_info(Gcalc_heap::Info *i,
+                                 Gcalc_dyn_list::Item **i_hook)
+{
+  if (m_hook == &i->next)
+    m_hook= i_hook;
+  *i_hook= i->next;
+  free_item(i);
+  m_n_points--;
+}
+
+
+Gcalc_heap::Info *Gcalc_heap::new_point_info(double x, double y,
+                                             gcalc_shape_info shape)
+{
+  Info *result= (Info *)new_item();
+  if (!result)
+    return NULL;
+  *m_hook= result;
+  m_hook= &result->next;
+  result->x= x;
+  result->y= y;
+  result->shape= shape;
+  result->top_node= 1;
+  result->type= nt_shape_node;
+  gcalc_set_double(result->ix, x, coord_extent);
+  gcalc_set_double(result->iy, y, coord_extent);
+
+  m_n_points++;
+  return result;
+}
+
+
+static Gcalc_heap::Info *new_intersection(
+    Gcalc_heap *heap, Gcalc_scan_iterator::intersection_info *ii)
+{
+  Gcalc_heap::Info *isc= (Gcalc_heap::Info *)heap->new_item();
+  if (!isc)
+    return 0;
+  isc->type= Gcalc_heap::nt_intersection;
+  isc->p1= ii->edge_a->pi;
+  isc->p2= ii->edge_a->next_pi;
+  isc->p3= ii->edge_b->pi;
+  isc->p4= ii->edge_b->next_pi;
+  isc->intersection_data= ii;
+  return isc;
+}
+
+
+static Gcalc_heap::Info *new_eq_point(
+    Gcalc_heap *heap, const Gcalc_heap::Info *p,
+    Gcalc_scan_iterator::point *edge)
+{
+  Gcalc_heap::Info *eqp= (Gcalc_heap::Info *)heap->new_item();
+  if (!eqp)
+    return 0;
+  eqp->type= Gcalc_heap::nt_eq_node;
+  eqp->node= p;
+  eqp->eq_data= edge;
+  return eqp;
+}
+
+
+void Gcalc_heap::Info::calc_xy(double *x, double *y) const
+{
+  double b0_x= p2->x - p1->x;
+  double b0_y= p2->y - p1->y;
+  double b1_x= p4->x - p3->x;
+  double b1_y= p4->y - p3->y;
+  double b0xb1= b0_x * b1_y - b0_y * b1_x;
+  double t= (p3->x - p1->x) * b1_y - (p3->y - p1->y) * b1_x;
+
+  t/= b0xb1;
+
+  *x= p1->x + b0_x * t;
+  *y= p1->y + b0_y * t;
+}
+
+
+#ifdef GCALC_CHECK_WITH_FLOAT
+void Gcalc_heap::Info::calc_xy_ld(long double *x, long double *y) const
+{
+  long double b0_x= ((long double) p2->x) - p1->x;
+  long double b0_y= ((long double) p2->y) - p1->y;
+  long double b1_x= ((long double) p4->x) - p3->x;
+  long double b1_y= ((long double) p4->y) - p3->y;
+  long double b0xb1= b0_x * b1_y - b0_y * b1_x;
+  long double ax=   ((long double) p3->x) - p1->x;
+  long double ay=   ((long double) p3->y) - p1->y;
+  long double t_a= ax * b1_y - ay * b1_x;
+  long double hx= (b0xb1 * (long double) p1->x + b0_x * t_a);
+  long double hy= (b0xb1 * (long double) p1->y + b0_y * t_a);
+
+  if (fabs(b0xb1) < 1e-15)
+  {
+    *x= p1->x;
+    *y= p1->y;
+    return;
+  }
+
+  *x= hx/b0xb1;
+  *y= hy/b0xb1;
+}
+#endif /*GCALC_CHECK_WITH_FLOAT*/
+
+
+static int cmp_point_info(const Gcalc_heap::Info *i0,
+                          const Gcalc_heap::Info *i1)
+{
+  int cmp_y= gcalc_cmp_coord1(i0->iy, i1->iy);
+  if (cmp_y)
+    return cmp_y;
+  return gcalc_cmp_coord1(i0->ix, i1->ix);
+}
+
+
+static inline void trim_node(Gcalc_heap::Info *node, Gcalc_heap::Info *prev_node)
+{
+  if (!node)
+    return;
+  node->top_node= 0;
+  GCALC_DBUG_ASSERT((node->left == prev_node) || (node->right == prev_node));
+  if (node->left == prev_node)
+    node->left= node->right;
+  node->right= NULL;
+  GCALC_DBUG_ASSERT(cmp_point_info(node, prev_node));
+}
+
+
+static int compare_point_info(const void *e0, const void *e1)
+{
+  const Gcalc_heap::Info *i0= (const Gcalc_heap::Info *)e0;
+  const Gcalc_heap::Info *i1= (const Gcalc_heap::Info *)e1;
+  return cmp_point_info(i0, i1) > 0;
+}
+
+
+void Gcalc_heap::prepare_operation()
+{
+  Info *cur;
+  GCALC_DBUG_ASSERT(m_hook);
+  *m_hook= NULL;
+  m_hook= NULL; /* just to check it's not called twice */
+  m_first= sort_list(compare_point_info, m_first, m_n_points);
+
+  /* TODO - move this to the 'normal_scan' loop */
+  for (cur= get_first(); cur; cur= cur->get_next())
+  {
+    trim_node(cur->left, cur);
+    trim_node(cur->right, cur);
+  }
+}
+
+
+void Gcalc_heap::reset()
+{
+  if (m_n_points)
+  {
+    free_list(m_first);
+    m_n_points= 0;
+  }
+  m_hook= &m_first;
+}
+
+
+int Gcalc_shape_transporter::int_single_point(gcalc_shape_info Info,
+                                              double x, double y)
+{
+  Gcalc_heap::Info *point= m_heap->new_point_info(x, y, Info);
+  if (!point)
+    return 1;
+  point->left= point->right= 0;
+  return 0;
+}
+
+
+int Gcalc_shape_transporter::int_add_point(gcalc_shape_info Info,
+                                           double x, double y)
+{
+  Gcalc_heap::Info *point;
+  Gcalc_dyn_list::Item **hook;
+
+  hook= m_heap->get_cur_hook();
+
+  if (!(point= m_heap->new_point_info(x, y, Info)))
+    return 1;
+  if (m_first)
+  {
+    if (cmp_point_info(m_prev, point) == 0)
+    {
+      /* Coinciding points, do nothing */
+      m_heap->free_point_info(point, hook);
+      return 0;
+    }
+    GCALC_DBUG_ASSERT(!m_prev || m_prev->x != x || m_prev->y != y);
+    m_prev->left= point;
+    point->right= m_prev;
+  }
+  else
+    m_first= point;
+  m_prev= point;
+  m_prev_hook= hook;
+  return 0;
+}
+
+
+void Gcalc_shape_transporter::int_complete()
+{
+  GCALC_DBUG_ASSERT(m_shape_started == 1 || m_shape_started == 3);
+
+  if (!m_first)
+    return;
+
+  /* simple point */
+  if (m_first == m_prev)
+  {
+    m_first->right= m_first->left= NULL;
+    return;
+  }
+
+  /* line */
+  if (m_shape_started == 1)
+  {
+    m_first->right= NULL;
+    m_prev->left= m_prev->right;
+    m_prev->right= NULL;
+    return;
+  }
+
+  /* polygon */
+  if (cmp_point_info(m_first, m_prev) == 0)
+  {
+    /* Coinciding points, remove the last one from the list */
+    m_prev->right->left= m_first;
+    m_first->right= m_prev->right;
+    m_heap->free_point_info(m_prev, m_prev_hook);
+  }
+  else
+  {
+    GCALC_DBUG_ASSERT(m_prev->x != m_first->x || m_prev->y != m_first->y);
+    m_first->right= m_prev;
+    m_prev->left= m_first;
+  }
+}
+
+
+inline void calc_dx_dy(Gcalc_scan_iterator::point *p)
+{
+  gcalc_sub_coord1(p->dx, p->next_pi->ix, p->pi->ix);
+  gcalc_sub_coord1(p->dy, p->next_pi->iy, p->pi->iy);
+  if (GCALC_SIGN(p->dx[0]))
+  {
+    p->l_border= &p->next_pi->ix;
+    p->r_border= &p->pi->ix;
+  }
+  else
+  {
+    p->r_border= &p->next_pi->ix;
+    p->l_border= &p->pi->ix;
+  }
+}
+
+
+Gcalc_scan_iterator::Gcalc_scan_iterator(size_t blk_size) :
+  Gcalc_dyn_list(blk_size, sizeof(point) > sizeof(intersection_info) ?
+                             sizeof(point) :
+                             sizeof(intersection_info))
+{
+  state.slice= NULL;
+  m_bottom_points= NULL;
+  m_bottom_hook= &m_bottom_points;
+}
+		  
+
+void Gcalc_scan_iterator::init(Gcalc_heap *points)
+{
+  GCALC_DBUG_ASSERT(points->ready());
+  GCALC_DBUG_ASSERT(!state.slice);
+
+  if (!(m_cur_pi= points->get_first()))
+    return;
+  m_heap= points;
+  state.event_position_hook= &state.slice;
+  state.event_end= NULL;
+#ifndef GCALC_DBUG_OFF
+  m_cur_thread= 0;
+#endif /*GCALC_DBUG_OFF*/
+  GCALC_SET_TERMINATED(killed, 0);
+}
+
+void Gcalc_scan_iterator::reset()
+{
+  state.slice= NULL;
+  m_bottom_points= NULL;
+  m_bottom_hook= &m_bottom_points;
+  Gcalc_dyn_list::reset();
+}
+
+
+int Gcalc_scan_iterator::point::cmp_dx_dy(const Gcalc_coord1 dx_a,
+                                          const Gcalc_coord1 dy_a,
+                                          const Gcalc_coord1 dx_b,
+                                          const Gcalc_coord1 dy_b)
+{
+  Gcalc_coord2 dx_a_dy_b;
+  Gcalc_coord2 dy_a_dx_b;
+  gcalc_mul_coord1(dx_a_dy_b, dx_a, dy_b);
+  gcalc_mul_coord1(dy_a_dx_b, dy_a, dx_b);
+
+  return gcalc_cmp_coord(dx_a_dy_b, dy_a_dx_b, GCALC_COORD_BASE2);
+}
+
+
+int Gcalc_scan_iterator::point::cmp_dx_dy(const Gcalc_heap::Info *p1,
+                                          const Gcalc_heap::Info *p2,
+                                          const Gcalc_heap::Info *p3,
+                                          const Gcalc_heap::Info *p4)
+{
+  Gcalc_coord1 dx_a, dy_a, dx_b, dy_b;
+  gcalc_sub_coord1(dx_a, p2->ix, p1->ix);
+  gcalc_sub_coord1(dy_a, p2->iy, p1->iy);
+  gcalc_sub_coord1(dx_b, p4->ix, p3->ix);
+  gcalc_sub_coord1(dy_b, p4->iy, p3->iy);
+  return cmp_dx_dy(dx_a, dy_a, dx_b, dy_b);
+}
+
+
+int Gcalc_scan_iterator::point::cmp_dx_dy(const point *p) const
+{
+  GCALC_DBUG_ASSERT(!is_bottom());
+  return cmp_dx_dy(dx, dy, p->dx, p->dy);
+}
+
+
+#ifdef GCALC_CHECK_WITH_FLOAT
+void Gcalc_scan_iterator::point::calc_x(long double *x, long double y,
+                                        long double ix) const
+{
+  long double ddy= gcalc_get_double(dy, GCALC_COORD_BASE);
+  if (fabsl(ddy) < (long double) 1e-20)
+  {
+    *x= ix;
+  }
+  else
+    *x= (ddy * (long double) pi->x + gcalc_get_double(dx, GCALC_COORD_BASE) *
+          (y - pi->y)) / ddy;
+}
+#endif /*GCALC_CHECK_WITH_FLOAT*/
+
+
+static int compare_events(const void *e0, const void *e1)
+{
+  const Gcalc_scan_iterator::point *p0= (const Gcalc_scan_iterator::point *)e0;
+  const Gcalc_scan_iterator::point *p1= (const Gcalc_scan_iterator::point *)e1;
+  return p0->cmp_dx_dy(p1) > 0;
+}
+
+
+int Gcalc_scan_iterator::arrange_event(int do_sorting, int n_intersections)
+{
+  int ev_counter;
+  point *sp;
+  point **sp_hook;
+
+  ev_counter= 0;
+
+  *m_bottom_hook= NULL;
+  for (sp= m_bottom_points; sp; sp= sp->get_next())
+    sp->ev_next= sp->get_next();
+
+  for (sp= state.slice, sp_hook= &state.slice;
+       sp; sp_hook= sp->next_ptr(), sp= sp->get_next())
+  {
+    if (sp->event)
+    {
+      state.event_position_hook= sp_hook;
+      break;
+    }
+  }
+
+  for (sp= *(sp_hook= state.event_position_hook);
+       sp && sp->event; sp_hook= sp->next_ptr(), sp= sp->get_next())
+  {
+    ev_counter++;
+    if (sp->get_next() && sp->get_next()->event)
+      sp->ev_next= sp->get_next();
+    else
+      sp->ev_next= m_bottom_points;
+  }
+
+#ifndef GCALC_DBUG_OFF
+  {
+    point *cur_p= sp;
+    for (; cur_p; cur_p= cur_p->get_next())
+      GCALC_DBUG_ASSERT(!cur_p->event);
+  }
+#endif /*GCALC_DBUG_OFF*/
+
+  state.event_end= sp;
+
+  if (ev_counter == 2 && n_intersections == 1)
+  {
+    /* If we had only intersection, just swap the two points. */
+    sp= *state.event_position_hook;
+    *state.event_position_hook= sp->get_next();
+    sp->next= (*state.event_position_hook)->next;
+    (*state.event_position_hook)->next= sp;
+
+    /* The list of the events should be restored. */
+    (*state.event_position_hook)->ev_next= sp;
+    sp->ev_next= m_bottom_points;
+  }
+  else if (ev_counter == 2 && get_events()->event == scev_two_threads)
+  {
+    /* Do nothing. */
+  }
+  else if (ev_counter > 1 && do_sorting)
+  {
+    point *cur_p;
+    *sp_hook= NULL;
+    sp= (point *) sort_list(compare_events, *state.event_position_hook,
+                            ev_counter);
+    /* Find last item in the list, it's changed after the sorting. */
+    for (cur_p= sp->get_next(); cur_p->get_next();
+        cur_p= cur_p->get_next())
+    {}
+    cur_p->next= state.event_end;
+    *state.event_position_hook= sp;
+    /* The list of the events should be restored. */
+    for (; sp && sp->event; sp= sp->get_next())
+    {
+      if (sp->get_next() && sp->get_next()->event)
+        sp->ev_next= sp->get_next();
+      else
+        sp->ev_next= m_bottom_points;
+    }
+  }
+
+#ifndef GCALC_DBUG_OFF
+  {
+    const event_point *ev= get_events();
+    for (; ev && ev->get_next(); ev= ev->get_next())
+    {
+      if (ev->is_bottom() || ev->get_next()->is_bottom())
+        break;
+      GCALC_DBUG_ASSERT(ev->cmp_dx_dy(ev->get_next()) <= 0);
+    }
+  }
+#endif /*GCALC_DBUG_OFF*/
+  return 0;
+}
+
+
+int Gcalc_heap::Info::equal_pi(const Info *pi) const
+{
+  if (type == nt_intersection)
+    return equal_intersection;
+  if (pi->type == nt_eq_node)
+    return 1;
+  if (type == nt_eq_node || pi->type == nt_intersection)
+    return 0;
+  return cmp_point_info(this, pi) == 0;
+}
+
+int Gcalc_scan_iterator::step()
+{
+  int result= 0;
+  int do_sorting= 0;
+  int n_intersections= 0;
+  point *sp;
+  GCALC_DBUG_ENTER("Gcalc_scan_iterator::step");
+  GCALC_DBUG_ASSERT(more_points());
+
+  if (GCALC_TERMINATED(killed))
+    GCALC_DBUG_RETURN(0xFFFF);
+
+  /* Clear the old event marks. */
+  if (m_bottom_points)
+  {
+    free_list((Gcalc_dyn_list::Item **) &m_bottom_points,
+              (Gcalc_dyn_list::Item **) m_bottom_hook);
+    m_bottom_points= NULL;
+    m_bottom_hook= &m_bottom_points;
+  }
+  for (sp= *state.event_position_hook;
+       sp != state.event_end; sp= sp->get_next())
+    sp->event= scev_none;
+
+//#ifndef GCALC_DBUG_OFF
+  state.event_position_hook= NULL;
+  state.pi= NULL;
+//#endif /*GCALC_DBUG_OFF*/
+
+  do
+  {
+#ifndef GCALC_DBUG_OFF
+    if (m_cur_pi->type == Gcalc_heap::nt_intersection &&
+        m_cur_pi->get_next()->type == Gcalc_heap::nt_intersection &&
+        m_cur_pi->equal_intersection)
+      GCALC_DBUG_ASSERT(cmp_intersections(m_cur_pi, m_cur_pi->get_next()) == 0);
+#endif /*GCALC_DBUG_OFF*/
+    GCALC_DBUG_CHECK_COUNTER();
+    GCALC_DBUG_PRINT_SLICE("step:", state.slice);
+    GCALC_DBUG_PRINT_PI(m_cur_pi);
+    if (m_cur_pi->type == Gcalc_heap::nt_shape_node)
+    {
+      if (m_cur_pi->is_top())
+      {
+        result= insert_top_node();
+        if (!m_cur_pi->is_bottom())
+          do_sorting++;
+      }
+      else if (m_cur_pi->is_bottom())
+        remove_bottom_node();
+      else
+      {
+        do_sorting++;
+        result= node_scan();
+      }
+      if (result)
+        GCALC_DBUG_RETURN(result);
+      state.pi= m_cur_pi;
+    }
+    else if (m_cur_pi->type == Gcalc_heap::nt_eq_node)
+    {
+      do_sorting++;
+      eq_scan();
+    }
+    else
+    {
+      /* nt_intersection */
+      do_sorting++;
+      n_intersections++;
+      intersection_scan();
+      if (!state.pi || state.pi->type == Gcalc_heap::nt_intersection)
+        state.pi= m_cur_pi;
+    }
+
+    m_cur_pi= m_cur_pi->get_next();
+  } while (m_cur_pi && state.pi->equal_pi(m_cur_pi));
+
+  GCALC_DBUG_RETURN(arrange_event(do_sorting, n_intersections));
+}
+
+
+static int node_on_right(const Gcalc_heap::Info *node, 
+    const Gcalc_heap::Info *edge_a, const Gcalc_heap::Info *edge_b)
+{
+  Gcalc_coord1 a_x, a_y;
+  Gcalc_coord1 b_x, b_y;
+  Gcalc_coord2 ax_by, ay_bx;
+  int result;
+
+  gcalc_sub_coord1(a_x, node->ix, edge_a->ix);
+  gcalc_sub_coord1(a_y, node->iy, edge_a->iy);
+  gcalc_sub_coord1(b_x, edge_b->ix, edge_a->ix);
+  gcalc_sub_coord1(b_y, edge_b->iy, edge_a->iy);
+  gcalc_mul_coord1(ax_by, a_x, b_y);
+  gcalc_mul_coord1(ay_bx, a_y, b_x);
+  result= gcalc_cmp_coord(ax_by, ay_bx, GCALC_COORD_BASE2);
+#ifdef GCALC_CHECK_WITH_FLOAT
+  {
+    long double dx= gcalc_get_double(edge_b->ix, GCALC_COORD_BASE) -
+                      gcalc_get_double(edge_a->ix, GCALC_COORD_BASE);
+    long double dy= gcalc_get_double(edge_b->iy, GCALC_COORD_BASE) -
+                      gcalc_get_double(edge_a->iy, GCALC_COORD_BASE);
+    long double ax= gcalc_get_double(node->ix, GCALC_COORD_BASE) -
+                      gcalc_get_double(edge_a->ix, GCALC_COORD_BASE);
+    long double ay= gcalc_get_double(node->iy, GCALC_COORD_BASE) -
+                      gcalc_get_double(edge_a->iy, GCALC_COORD_BASE);
+    long double d= ax * dy - ay * dx;
+    if (result == 0)
+      GCALC_DBUG_ASSERT(de_check(d, 0.0));
+    else if (result < 0)
+      GCALC_DBUG_ASSERT(de_check(d, 0.0) || d < 0);
+    else
+      GCALC_DBUG_ASSERT(de_check(d, 0.0) || d > 0);
+  }
+#endif /*GCALC_CHECK_WITH_FLOAT*/
+  return result;
+}
+
+
+static int cmp_tops(const Gcalc_heap::Info *top_node, 
+    const Gcalc_heap::Info *edge_a, const Gcalc_heap::Info *edge_b)
+{
+  int cmp_res_a, cmp_res_b;
+
+  cmp_res_a= gcalc_cmp_coord1(edge_a->ix, top_node->ix);
+  cmp_res_b= gcalc_cmp_coord1(edge_b->ix, top_node->ix);
+
+  if (cmp_res_a <= 0 && cmp_res_b > 0)
+    return -1;
+  if (cmp_res_b <= 0 && cmp_res_a > 0)
+    return 1;
+  if (cmp_res_a == 0 && cmp_res_b == 0)
+    return 0;
+
+  return node_on_right(edge_a, top_node, edge_b);
+}
+
+
+int Gcalc_scan_iterator::insert_top_node()
+{
+  point *sp= state.slice;
+  point **prev_hook= &state.slice;
+  point *sp1= NULL;
+  point *sp0= new_slice_point();
+  int cmp_res;
+
+  GCALC_DBUG_ENTER("Gcalc_scan_iterator::insert_top_node");
+  if (!sp0)
+    GCALC_DBUG_RETURN(1);
+  sp0->pi= m_cur_pi;
+  sp0->next_pi= m_cur_pi->left;
+#ifndef GCALC_DBUG_OFF
+  sp0->thread= m_cur_thread++;
+#endif /*GCALC_DBUG_OFF*/
+  if (m_cur_pi->left)
+  {
+    calc_dx_dy(sp0);
+    if (m_cur_pi->right)
+    {
+      if (!(sp1= new_slice_point()))
+        GCALC_DBUG_RETURN(1);
+      sp1->event= sp0->event= scev_two_threads;
+      sp1->pi= m_cur_pi;
+      sp1->next_pi= m_cur_pi->right;
+#ifndef GCALC_DBUG_OFF
+      sp1->thread= m_cur_thread++;
+#endif /*GCALC_DBUG_OFF*/
+      calc_dx_dy(sp1);
+      /* We have two threads so should decide which one will be first */
+      cmp_res= cmp_tops(m_cur_pi, m_cur_pi->left, m_cur_pi->right);
+      if (cmp_res > 0)
+      {
+        point *tmp= sp0;
+        sp0= sp1;
+        sp1= tmp;
+      }
+      else if (cmp_res == 0)
+      {
+        /* Exactly same direction of the edges. */
+        cmp_res= gcalc_cmp_coord1(m_cur_pi->left->iy, m_cur_pi->right->iy);
+        if (cmp_res != 0)
+        {
+          if (cmp_res < 0)
+          {
+            if (add_eq_node(sp0->next_pi, sp1))
+              GCALC_DBUG_RETURN(1);
+          }
+          else
+          {
+            if (add_eq_node(sp1->next_pi, sp0))
+              GCALC_DBUG_RETURN(1);
+          }
+        }
+        else
+        {
+          cmp_res= gcalc_cmp_coord1(m_cur_pi->left->ix, m_cur_pi->right->ix);
+          if (cmp_res != 0)
+          {
+            if (cmp_res < 0)
+            {
+              if (add_eq_node(sp0->next_pi, sp1))
+                GCALC_DBUG_RETURN(1);
+            }
+            else
+            {
+              if (add_eq_node(sp1->next_pi, sp0))
+                GCALC_DBUG_RETURN(1);
+            }
+          }
+        }
+      }
+    }
+    else
+      sp0->event= scev_thread;
+  }
+  else
+    sp0->event= scev_single_point;
+
+
+  /* Check if we already have an event - then we'll place the node there */
+  for (; sp && !sp->event; prev_hook= sp->next_ptr(), sp=sp->get_next())
+  {}
+  if (!sp)
+  {
+    sp= state.slice;
+    prev_hook= &state.slice;
+    /* We need to find the place to insert. */
+    for (; sp; prev_hook= sp->next_ptr(), sp=sp->get_next())
+    {
+      if (sp->event || gcalc_cmp_coord1(*sp->r_border, m_cur_pi->ix) < 0)
+        continue;
+      cmp_res= node_on_right(m_cur_pi, sp->pi, sp->next_pi);
+      if (cmp_res == 0)
+      {
+        /* The top node lies on the edge. */
+        /* Nodes of that edge will be handled in other places. */
+        sp->event= scev_intersection;
+      }
+      else if (cmp_res < 0)
+        break;
+    }
+  }
+
+  if (sp0->event == scev_single_point)
+  {
+    /* Add single point to the bottom list. */
+    *m_bottom_hook= sp0;
+    m_bottom_hook= sp0->next_ptr();
+    state.event_position_hook= prev_hook;
+  }
+  else
+  {
+    *prev_hook= sp0;
+    sp0->next= sp;
+    if (add_events_for_node(sp0))
+      GCALC_DBUG_RETURN(1);
+
+    if (sp0->event == scev_two_threads)
+    {
+      *prev_hook= sp1;
+      sp1->next= sp;
+      if (add_events_for_node(sp1))
+        GCALC_DBUG_RETURN(1);
+
+      sp0->next= sp1;
+      *prev_hook= sp0;
+    }
+  }
+
+  GCALC_DBUG_RETURN(0);
+}
+
+
+void Gcalc_scan_iterator::remove_bottom_node()
+{
+  point *sp= state.slice;
+  point **sp_hook= &state.slice;
+  point *first_bottom_point= NULL;
+
+  GCALC_DBUG_ENTER("Gcalc_scan_iterator::remove_bottom_node");
+  for (; sp; sp= sp->get_next())
+  {
+    if (sp->next_pi == m_cur_pi)
+    {
+      *sp_hook= sp->get_next();
+      sp->pi= m_cur_pi;
+      sp->next_pi= NULL;
+      if (first_bottom_point)
+      {
+        first_bottom_point->event= sp->event= scev_two_ends;
+        break;
+      }
+      first_bottom_point= sp;
+      sp->event= scev_end;
+      state.event_position_hook= sp_hook;
+    }
+    else
+      sp_hook= sp->next_ptr();
+  }
+  GCALC_DBUG_ASSERT(first_bottom_point);
+  *m_bottom_hook= first_bottom_point;
+  m_bottom_hook= first_bottom_point->next_ptr();
+  if (sp)
+  {
+    *m_bottom_hook= sp;
+    m_bottom_hook= sp->next_ptr();
+  }
+
+  GCALC_DBUG_VOID_RETURN;
+}
+
+
+int Gcalc_scan_iterator::add_events_for_node(point *sp_node)
+{
+  point *sp= state.slice;
+  int cur_pi_r, sp_pi_r;
+
+  GCALC_DBUG_ENTER("Gcalc_scan_iterator::add_events_for_node");
+
+  /* Scan to the event point. */
+  for (; sp != sp_node; sp= sp->get_next())
+  {
+    GCALC_DBUG_ASSERT(!sp->is_bottom());
+    GCALC_DBUG_PRINT(("left cut_edge %d", sp->thread));
+    if (sp->next_pi == sp_node->next_pi ||
+        gcalc_cmp_coord1(*sp->r_border, *sp_node->l_border) < 0)
+      continue;
+    sp_pi_r= node_on_right(sp->next_pi, sp_node->pi, sp_node->next_pi);
+    if (sp_pi_r < 0)
+      continue;
+    cur_pi_r= node_on_right(sp_node->next_pi, sp->pi, sp->next_pi);
+    if (cur_pi_r > 0)
+      continue;
+    if (cur_pi_r == 0 && sp_pi_r == 0)
+    {
+      int cmp_res= cmp_point_info(sp->next_pi, sp_node->next_pi);
+      if (cmp_res > 0)
+      {
+        if (add_eq_node(sp_node->next_pi, sp))
+          GCALC_DBUG_RETURN(1);
+      }
+      else if (cmp_res < 0)
+      {
+        if (add_eq_node(sp->next_pi, sp_node))
+          GCALC_DBUG_RETURN(1);
+      }
+      continue;
+    }
+
+    if (cur_pi_r == 0)
+    {
+      if (add_eq_node(sp_node->next_pi, sp))
+        GCALC_DBUG_RETURN(1);
+      continue;
+    }
+    else if (sp_pi_r == 0)
+    {
+      if (add_eq_node(sp->next_pi, sp_node))
+        GCALC_DBUG_RETURN(1);
+      continue;
+    }
+
+    if (sp->event)
+    {
+#ifndef GCALC_DBUG_OFF
+      cur_pi_r= node_on_right(sp_node->pi, sp->pi, sp->next_pi);
+      GCALC_DBUG_ASSERT(cur_pi_r == 0);
+#endif /*GCALC_DBUG_OFF*/
+      continue;
+    }
+    cur_pi_r= node_on_right(sp_node->pi, sp->pi, sp->next_pi);
+    GCALC_DBUG_ASSERT(cur_pi_r >= 0);
+    //GCALC_DBUG_ASSERT(cur_pi_r > 0); /* Is it ever violated? */
+    if (cur_pi_r > 0 && add_intersection(sp, sp_node, m_cur_pi))
+      GCALC_DBUG_RETURN(1);
+  }
+
+  /* Scan to the end of the slice */
+  sp= sp->get_next();
+
+  for (; sp; sp= sp->get_next())
+  {
+    GCALC_DBUG_ASSERT(!sp->is_bottom());
+    GCALC_DBUG_PRINT(("right cut_edge %d", sp->thread));
+    if (sp->next_pi == sp_node->next_pi ||
+        gcalc_cmp_coord1(*sp_node->r_border, *sp->l_border) < 0)
+      continue;
+    sp_pi_r= node_on_right(sp->next_pi, sp_node->pi, sp_node->next_pi);
+    if (sp_pi_r > 0)
+      continue;
+    cur_pi_r= node_on_right(sp_node->next_pi, sp->pi, sp->next_pi);
+    if (cur_pi_r < 0)
+      continue;
+    if (cur_pi_r == 0 && sp_pi_r == 0)
+    {
+      int cmp_res= cmp_point_info(sp->next_pi, sp_node->next_pi);
+      if (cmp_res > 0)
+      {
+        if (add_eq_node(sp_node->next_pi, sp))
+          GCALC_DBUG_RETURN(1);
+      }
+      else if (cmp_res < 0)
+      {
+        if (add_eq_node(sp->next_pi, sp_node))
+          GCALC_DBUG_RETURN(1);
+      }
+      continue;
+    }
+    if (cur_pi_r == 0)
+    {
+      if (add_eq_node(sp_node->next_pi, sp))
+        GCALC_DBUG_RETURN(1);
+      continue;
+    }
+    else if (sp_pi_r == 0)
+    {
+      if (add_eq_node(sp->next_pi, sp_node))
+        GCALC_DBUG_RETURN(1);
+      continue;
+    }
+
+    if (sp->event)
+    {
+#ifndef GCALC_DBUG_OFF
+      cur_pi_r= node_on_right(sp_node->pi, sp->pi, sp->next_pi);
+      GCALC_DBUG_ASSERT(cur_pi_r == 0);
+#endif /*GCALC_DBUG_OFF*/
+      continue;
+    }
+    cur_pi_r= node_on_right(sp_node->pi, sp->pi, sp->next_pi);
+    GCALC_DBUG_ASSERT(cur_pi_r <= 0);
+    //GCALC_DBUG_ASSERT(cur_pi_r < 0); /* Is it ever violated? */
+    if (cur_pi_r < 0 && add_intersection(sp_node, sp, m_cur_pi))
+      GCALC_DBUG_RETURN(1);
+  }
+
+  GCALC_DBUG_RETURN(0);
+}
+
+
+int Gcalc_scan_iterator::node_scan()
+{
+  point *sp= state.slice;
+  Gcalc_heap::Info *cur_pi= m_cur_pi;
+
+  GCALC_DBUG_ENTER("Gcalc_scan_iterator::node_scan");
+
+  /* Scan to the event point.                             */
+  /* Can be avoided if we add link to the sp to the Info. */
+  for (; sp->next_pi != cur_pi; sp= sp->get_next())
+  {}
+
+  GCALC_DBUG_PRINT(("node for %d", sp->thread));
+  /* Handle the point itself. */
+  sp->pi= cur_pi;
+  sp->next_pi= cur_pi->left;
+  sp->event= scev_point;
+  calc_dx_dy(sp);
+
+  GCALC_DBUG_RETURN(add_events_for_node(sp));
+}
+
+
+void Gcalc_scan_iterator::eq_scan()
+{
+  point *sp= eq_sp(m_cur_pi);
+  GCALC_DBUG_ENTER("Gcalc_scan_iterator::eq_scan");
+  
+#ifndef GCALC_DBUG_OFF
+  {
+    point *cur_p= state.slice;
+    for (; cur_p && cur_p != sp; cur_p= cur_p->get_next())
+    {}
+    GCALC_DBUG_ASSERT(cur_p);
+  }
+#endif /*GCALC_DBUG_OFF*/
+  if (!sp->event)
+  {
+    sp->event= scev_intersection;
+    sp->ev_pi= m_cur_pi;
+  }
+
+  GCALC_DBUG_VOID_RETURN;
+}
+
+
+void Gcalc_scan_iterator::intersection_scan()
+{
+  intersection_info *ii= i_data(m_cur_pi);
+  GCALC_DBUG_ENTER("Gcalc_scan_iterator::intersection_scan");
+  
+#ifndef GCALC_DBUG_OFF
+  {
+    point *sp= state.slice;
+    for (; sp && sp != ii->edge_a; sp= sp->get_next())
+    {}
+    GCALC_DBUG_ASSERT(sp);
+    for (; sp && sp != ii->edge_b; sp= sp->get_next())
+    {}
+    GCALC_DBUG_ASSERT(sp);
+  }
+#endif /*GCALC_DBUG_OFF*/
+
+  ii->edge_a->event= ii->edge_b->event= scev_intersection;
+  ii->edge_a->ev_pi= ii->edge_b->ev_pi= m_cur_pi;
+  free_item(ii);
+  m_cur_pi->intersection_data= NULL;
+
+  GCALC_DBUG_VOID_RETURN;
+}
+
+
+int Gcalc_scan_iterator::add_intersection(point *sp_a, point *sp_b,
+                                          Gcalc_heap::Info *pi_from)
+{
+  Gcalc_heap::Info *ii;
+  intersection_info *i_calc;
+  int cmp_res;
+  int skip_next= 0;
+
+  GCALC_DBUG_ENTER("Gcalc_scan_iterator::add_intersection");
+  if (!(i_calc= new_intersection_info(sp_a, sp_b)) ||
+      !(ii= new_intersection(m_heap, i_calc)))
+    GCALC_DBUG_RETURN(1);
+
+  ii->equal_intersection= 0;
+
+  for (;
+       pi_from->get_next() != sp_a->next_pi &&
+         pi_from->get_next() != sp_b->next_pi;
+       pi_from= pi_from->get_next())
+  {
+    Gcalc_heap::Info *cur= pi_from->get_next();
+    if (skip_next)
+    {
+      if (cur->type == Gcalc_heap::nt_intersection)
+        skip_next= cur->equal_intersection;
+      else
+        skip_next= 0;
+      continue;
+    }
+    if (cur->type == Gcalc_heap::nt_intersection)
+    {
+      cmp_res= cmp_intersections(cur, ii);
+      skip_next= cur->equal_intersection;
+    }
+    else if (cur->type == Gcalc_heap::nt_eq_node)
+      continue;
+    else
+      cmp_res= cmp_node_isc(cur, ii);
+    if (cmp_res == 0)
+    {
+      ii->equal_intersection= 1;
+      break;
+    }
+    else if (cmp_res > 0)
+      break;
+  }
+
+  /* Intersection inserted before the equal point. */
+  ii->next= pi_from->get_next();
+  pi_from->next= ii;
+
+  GCALC_DBUG_RETURN(0);
+}
+
+
+int Gcalc_scan_iterator::add_eq_node(Gcalc_heap::Info *node, point *sp)
+{
+  Gcalc_heap::Info *en;
+
+  GCALC_DBUG_ENTER("Gcalc_scan_iterator::add_intersection");
+  en= new_eq_point(m_heap, node, sp);
+  if (!en)
+    GCALC_DBUG_RETURN(1);
+
+  /* eq_node iserted after teh equal point. */
+  en->next= node->get_next();
+  node->next= en;
+
+  GCALC_DBUG_RETURN(0);
+}
+
+
+void calc_t(Gcalc_coord2 t_a, Gcalc_coord2 t_b,
+            Gcalc_coord1 dxa, Gcalc_coord1 dxb,
+            const Gcalc_heap::Info *p1, const Gcalc_heap::Info *p2,
+            const Gcalc_heap::Info *p3, const Gcalc_heap::Info *p4)
+{
+  Gcalc_coord1 a2_a1x, a2_a1y;
+  Gcalc_coord2 x1y2, x2y1;
+  Gcalc_coord1 dya, dyb;
+
+  gcalc_sub_coord1(a2_a1x, p3->ix, p1->ix);
+  gcalc_sub_coord1(a2_a1y, p3->iy, p1->iy);
+
+  gcalc_sub_coord1(dxa, p2->ix, p1->ix);
+  gcalc_sub_coord1(dya, p2->iy, p1->iy);
+  gcalc_sub_coord1(dxb, p4->ix, p3->ix);
+  gcalc_sub_coord1(dyb, p4->iy, p3->iy);
+
+  gcalc_mul_coord1(x1y2, dxa, dyb);
+  gcalc_mul_coord1(x2y1, dya, dxb);
+  gcalc_sub_coord(t_b, GCALC_COORD_BASE2, x1y2, x2y1);
+
+
+  gcalc_mul_coord1(x1y2, a2_a1x, dyb);
+  gcalc_mul_coord1(x2y1, a2_a1y, dxb);
+  gcalc_sub_coord(t_a, GCALC_COORD_BASE2, x1y2, x2y1);
+}
+
+
+double Gcalc_scan_iterator::get_y() const
+{
+  if (state.pi->type == Gcalc_heap::nt_intersection)
+  {
+    Gcalc_coord1 dxa, dya;
+    Gcalc_coord2 t_a, t_b;
+    Gcalc_coord3 a_tb, b_ta, y_exp;
+    calc_t(t_a, t_b, dxa, dya,
+           state.pi->p1, state.pi->p2, state.pi->p3, state.pi->p4);
+
+
+    gcalc_mul_coord(a_tb, GCALC_COORD_BASE3,
+        t_b, GCALC_COORD_BASE2, state.pi->p1->iy, GCALC_COORD_BASE);
+    gcalc_mul_coord(b_ta, GCALC_COORD_BASE3,
+        t_a, GCALC_COORD_BASE2, dya, GCALC_COORD_BASE);
+
+    gcalc_add_coord(y_exp, GCALC_COORD_BASE3, a_tb, b_ta);
+
+    return (get_pure_double(y_exp, GCALC_COORD_BASE3) /
+             get_pure_double(t_b, GCALC_COORD_BASE2)) / m_heap->coord_extent;
+  }
+  else
+    return state.pi->y;
+}
+
+
+double Gcalc_scan_iterator::get_event_x() const
+{
+  if (state.pi->type == Gcalc_heap::nt_intersection)
+  {
+    Gcalc_coord1 dxa, dya;
+    Gcalc_coord2 t_a, t_b;
+    Gcalc_coord3 a_tb, b_ta, x_exp;
+    calc_t(t_a, t_b, dxa, dya,
+           state.pi->p1, state.pi->p2, state.pi->p3, state.pi->p4);
+
+
+    gcalc_mul_coord(a_tb, GCALC_COORD_BASE3,
+        t_b, GCALC_COORD_BASE2, state.pi->p1->ix, GCALC_COORD_BASE);
+    gcalc_mul_coord(b_ta, GCALC_COORD_BASE3,
+        t_a, GCALC_COORD_BASE2, dxa, GCALC_COORD_BASE);
+
+    gcalc_add_coord(x_exp, GCALC_COORD_BASE3, a_tb, b_ta);
+
+    return (get_pure_double(x_exp, GCALC_COORD_BASE3) /
+             get_pure_double(t_b, GCALC_COORD_BASE2)) / m_heap->coord_extent;
+  }
+  else
+    return state.pi->x;
+}
+
+double Gcalc_scan_iterator::get_h() const
+{
+  double cur_y= get_y();
+  double next_y;
+  if (state.pi->type == Gcalc_heap::nt_intersection)
+  {
+    double x;
+    state.pi->calc_xy(&x, &next_y);
+  }
+  else
+    next_y= state.pi->y;
+  return next_y - cur_y;
+}
+
+
+double Gcalc_scan_iterator::get_sp_x(const point *sp) const
+{
+  double dy;
+  if (sp->event & (scev_end | scev_two_ends | scev_point))
+    return sp->pi->x;
+  dy= sp->next_pi->y - sp->pi->y;
+  if (fabs(dy) < 1e-12)
+    return sp->pi->x;
+  return (sp->next_pi->x - sp->pi->x) * dy;
+}
+
+
+double Gcalc_scan_iterator::get_pure_double(const Gcalc_internal_coord *d,
+                                            int d_len)
+{
+  int n= 1;
+  long double res= (long double) FIRST_DIGIT(d[0]);
+  do
+  {
+    res*= (long double) GCALC_DIG_BASE;
+    res+= (long double) d[n];
+  } while(++n < d_len);
+
+  if (GCALC_SIGN(d[0]))
+    res*= -1.0;
+  return res;
+}
+
+
+#endif /* HAVE_SPATIAL */
diff --git a/sql/gcalc_slicescan.h b/sql/gcalc_slicescan.h
new file mode 100644
index 00000000000..55de497f1ee
--- /dev/null
+++ b/sql/gcalc_slicescan.h
@@ -0,0 +1,600 @@
+/* Copyright (c) 2000, 2010 Oracle and/or its affiliates. All rights reserved.
+   Copyright (C) 2011 Monty Program Ab.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
+
+
+#ifndef GCALC_SLICESCAN_INCLUDED
+#define GCALC_SLICESCAN_INCLUDED
+
+#ifndef DBUG_OFF
+// #define GCALC_CHECK_WITH_FLOAT
+#else
+#define GCALC_DBUG_OFF
+#endif /*DBUG_OFF*/
+
+#ifndef GCALC_DBUG_OFF
+#define GCALC_DBUG_PRINT(b) DBUG_PRINT("Gcalc", b)
+#define GCALC_DBUG_ENTER(a) DBUG_ENTER("Gcalc "a)
+#define GCALC_DBUG_RETURN(r) DBUG_RETURN(r)
+#define GCALC_DBUG_VOID_RETURN DBUG_VOID_RETURN
+#define GCALC_DBUG_ASSERT(r) DBUG_ASSERT(r)
+#else
+#define GCALC_DBUG_PRINT(b)     do {} while(0)
+#define GCALC_DBUG_ENTER(a)     do {} while(0)
+#define GCALC_DBUG_RETURN(r)    return (r)
+#define GCALC_DBUG_VOID_RETURN  do {} while(0)
+#define GCALC_DBUG_ASSERT(r)    do {} while(0)
+#endif /*GCALC_DBUG_OFF*/
+
+#define GCALC_TERMINATED(state_var) (state_var && (*state_var))
+#define GCALC_SET_TERMINATED(state_var, val) state_var= val
+#define GCALC_DECL_TERMINATED_STATE(varname) \
+  volatile int *varname;
+
+/*
+  Gcalc_dyn_list class designed to manage long lists of same-size objects
+  with the possible efficiency.
+  It allocates fixed-size blocks of memory (blk_size specified at the time
+  of creation). When new object is added to the list, it occupies part of
+  this block until it's full. Then the new block is allocated.
+  Freed objects are chained to the m_free list, and if it's not empty, the
+  newly added object is taken from this list instead the block.
+*/
+
+class Gcalc_dyn_list
+{
+public:
+  class Item
+  {
+  public:
+    Item *next;
+  };
+
+  Gcalc_dyn_list(size_t blk_size, size_t sizeof_item);
+  ~Gcalc_dyn_list();
+  Item *new_item()
+  {
+    Item *result;
+    if (m_free)
+    {
+      result= m_free;
+      m_free= m_free->next;
+    }
+    else
+      result= alloc_new_blk();
+
+    return result;
+  }
+  inline void free_item(Item *item)
+  {
+    item->next= m_free;
+    m_free= item;
+  }
+  inline void free_list(Item **list, Item **hook)
+  {
+    *hook= m_free;
+    m_free= *list;
+  }
+
+  void free_list(Item *list)
+  {
+    Item **hook= &list;
+    while (*hook)
+      hook= &(*hook)->next;
+    free_list(&list, hook);
+  }
+
+  void reset();
+  void cleanup();
+
+protected:
+  size_t m_blk_size;
+  size_t m_sizeof_item;
+  unsigned int m_points_per_blk;
+  void *m_first_blk;
+  void **m_blk_hook;
+  Item *m_free;
+  Item *m_keep;
+
+  Item *alloc_new_blk();
+  void format_blk(void* block);
+  inline Item *ptr_add(Item *ptr, int n_items)
+  {
+    return (Item *)(((char*)ptr) + n_items * m_sizeof_item);
+  }
+};
+
+/* Internal Gcalc coordinates to provide the precise calculations */
+
+#define GCALC_DIG_BASE     1000000000
+typedef uint32 gcalc_digit_t;
+typedef unsigned long long gcalc_coord2;
+typedef gcalc_digit_t Gcalc_internal_coord;
+#define GCALC_COORD_BASE 2
+#define GCALC_COORD_BASE2 4
+#define GCALC_COORD_BASE3 6
+#define GCALC_COORD_BASE4 8
+#define GCALC_COORD_BASE5 10
+
+typedef gcalc_digit_t Gcalc_coord1[GCALC_COORD_BASE];
+typedef gcalc_digit_t Gcalc_coord2[GCALC_COORD_BASE*2];
+typedef gcalc_digit_t Gcalc_coord3[GCALC_COORD_BASE*3];
+
+
+void gcalc_mul_coord(Gcalc_internal_coord *result, int result_len,
+                     const Gcalc_internal_coord *a, int a_len,
+                     const Gcalc_internal_coord *b, int b_len);
+
+void gcalc_add_coord(Gcalc_internal_coord *result, int result_len,
+                     const Gcalc_internal_coord *a,
+                     const Gcalc_internal_coord *b);
+
+void gcalc_sub_coord(Gcalc_internal_coord *result, int result_len,
+                     const Gcalc_internal_coord *a,
+                     const Gcalc_internal_coord *b);
+
+int gcalc_cmp_coord(const Gcalc_internal_coord *a,
+                    const Gcalc_internal_coord *b, int len);
+
+/* Internal coordinates declarations end. */
+
+
+typedef uint gcalc_shape_info;
+
+/*
+  Gcalc_heap represents the 'dynamic list' of Info objects, that
+  contain information about vertexes of all the shapes that take
+  part in some spatial calculation. Can become quite long.
+  After filled, the list is usually sorted and then walked through
+  in the slicescan algorithm.
+  The Gcalc_heap and the algorithm can only operate with two
+  kinds of shapes - polygon and polyline. So all the spatial
+  objects should be represented as sets of these two.
+*/
+
+class Gcalc_heap : public Gcalc_dyn_list
+{
+public:
+  enum node_type
+  {
+    nt_shape_node,
+    nt_intersection,
+    nt_eq_node
+  };
+  class Info : public Gcalc_dyn_list::Item
+  {
+  public:
+    node_type type;
+    union
+    {
+      struct
+      {
+        /* nt_shape_node */
+        gcalc_shape_info shape;
+        Info *left;
+        Info *right;
+        double x,y;
+        Gcalc_coord1 ix, iy;
+        int top_node;
+      };
+      struct
+      {
+        /* nt_intersection */
+        /* Line p1-p2 supposed to intersect line p3-p4 */
+        const Info *p1;
+        const Info *p2;
+        const Info *p3;
+        const Info *p4;
+        void *intersection_data;
+        int equal_intersection;
+      };
+      struct
+      {
+        /* nt_eq_node */
+        const Info *node;
+        void *eq_data;
+      };
+    };
+
+    bool is_bottom() const
+      { GCALC_DBUG_ASSERT(type == nt_shape_node); return !left; }
+    bool is_top() const
+      { GCALC_DBUG_ASSERT(type == nt_shape_node); return top_node; }
+    bool is_single_node() const
+      { return is_bottom() && is_top(); }
+
+    void calc_xy(double *x, double *y) const;
+    int equal_pi(const Info *pi) const;
+#ifdef GCALC_CHECK_WITH_FLOAT
+    void calc_xy_ld(long double *x, long double *y) const;
+#endif /*GCALC_CHECK_WITH_FLOAT*/
+
+    Info *get_next() { return (Info *)next; }
+    const Info *get_next() const { return (const Info *)next; }
+  };
+
+  Gcalc_heap(size_t blk_size=8192) :
+    Gcalc_dyn_list(blk_size, sizeof(Info)),
+    m_hook(&m_first), m_n_points(0)
+  {}
+  void set_extent(double xmin, double xmax, double ymin, double ymax);
+  Info *new_point_info(double x, double y, gcalc_shape_info shape);
+  void free_point_info(Info *i, Gcalc_dyn_list::Item **i_hook);
+  Info *new_intersection(const Info *p1, const Info *p2,
+                         const Info *p3, const Info *p4);
+  void prepare_operation();
+  inline bool ready() const { return m_hook == NULL; }
+  Info *get_first() { return (Info *)m_first; }
+  const Info *get_first() const { return (const Info *)m_first; }
+  Gcalc_dyn_list::Item **get_last_hook() { return m_hook; }
+  void reset();
+#ifdef GCALC_CHECK_WITH_FLOAT
+  long double get_double(const Gcalc_internal_coord *c) const;
+#endif /*GCALC_CHECK_WITH_FLOAT*/
+  double coord_extent;
+  Gcalc_dyn_list::Item **get_cur_hook() { return m_hook; }
+
+private:
+  Gcalc_dyn_list::Item *m_first;
+  Gcalc_dyn_list::Item **m_hook;
+  int m_n_points;
+};
+
+
+/*
+  the spatial object has to be represented as a set of
+  simple polygones and polylines to be sent to the slicescan.
+
+  Gcalc_shape_transporter class and his descendants are used to
+  simplify storing the information about the shape into necessary structures.
+  This base class only fills the Gcalc_heap with the information about
+  shapes and vertices.
+
+  Normally the Gcalc_shape_transporter family object is sent as a parameter
+  to the 'get_shapes' method of an 'spatial' object so it can pass
+  the spatial information about itself. The virtual methods are
+  treating this data in a way the caller needs.
+*/
+
+class Gcalc_shape_transporter
+{
+private:
+  Gcalc_heap::Info *m_first;
+  Gcalc_heap::Info *m_prev;
+  Gcalc_dyn_list::Item **m_prev_hook;
+  int m_shape_started;
+  void int_complete();
+protected:
+  Gcalc_heap *m_heap;
+  int int_single_point(gcalc_shape_info Info, double x, double y);
+  int int_add_point(gcalc_shape_info Info, double x, double y);
+  void int_start_line()
+  {
+    DBUG_ASSERT(!m_shape_started);
+    m_shape_started= 1;
+    m_first= m_prev= NULL;
+  }
+  void int_complete_line()
+  {
+    DBUG_ASSERT(m_shape_started== 1);
+    int_complete();
+    m_shape_started= 0;
+  }
+  void int_start_ring()
+  {
+    DBUG_ASSERT(m_shape_started== 2);
+    m_shape_started= 3;
+    m_first= m_prev= NULL;
+  }
+  void int_complete_ring()
+  {
+    DBUG_ASSERT(m_shape_started== 3);
+    int_complete();
+    m_shape_started= 2;
+  }
+  void int_start_poly()
+  {
+    DBUG_ASSERT(!m_shape_started);
+    m_shape_started= 2;
+  }
+  void int_complete_poly()
+  {
+    DBUG_ASSERT(m_shape_started== 2);
+    m_shape_started= 0;
+  }
+  bool line_started() { return m_shape_started == 1; };
+public:
+  Gcalc_shape_transporter(Gcalc_heap *heap) :
+    m_shape_started(0), m_heap(heap) {}
+
+  virtual int single_point(double x, double y)=0;
+  virtual int start_line()=0;
+  virtual int complete_line()=0;
+  virtual int start_poly()=0;
+  virtual int complete_poly()=0;
+  virtual int start_ring()=0;
+  virtual int complete_ring()=0;
+  virtual int add_point(double x, double y)=0;
+  virtual int start_collection(int n_objects) { return 0; }
+  virtual int empty_shape() { return 0; }
+  int start_simple_poly()
+  {
+    return start_poly() || start_ring();
+  }
+  int complete_simple_poly()
+  {
+    return complete_ring() || complete_poly();
+  }
+  virtual ~Gcalc_shape_transporter() {}
+};
+
+
+enum Gcalc_scan_events
+{
+  scev_none= 0,
+  scev_point= 1,         /* Just a new point in thread */
+  scev_thread= 2,        /* Start of the new thread */
+  scev_two_threads= 4,   /* A couple of new threads started */
+  scev_intersection= 8,  /* Intersection happened */
+  scev_end= 16,          /* Single thread finished */
+  scev_two_ends= 32,     /* A couple of threads finished */
+  scev_single_point= 64  /* Got single point */
+};
+
+
+/* 
+   Gcalc_scan_iterator incapsulates the slisescan algorithm.
+   It takes filled Gcalc_heap as an datasource. Then can be
+   iterated trought the vertexes and intersection points with
+   the step() method. After the 'step()' one usually observes
+   the current 'slice' to do the necessary calculations, like
+   looking for intersections, calculating the area, whatever.
+*/
+
+class Gcalc_scan_iterator : public Gcalc_dyn_list
+{
+public:
+  class point : public Gcalc_dyn_list::Item
+  {
+  public:
+    Gcalc_coord1 dx;
+    Gcalc_coord1 dy;
+    Gcalc_heap::Info *pi;
+    Gcalc_heap::Info *next_pi;
+    Gcalc_heap::Info *ev_pi;
+    const Gcalc_coord1 *l_border;
+    const Gcalc_coord1 *r_border;
+    point *ev_next;
+
+    Gcalc_scan_events event;
+
+    inline const point *c_get_next() const
+      { return (const point *)next; }
+    inline bool is_bottom() const { return !next_pi; }
+    gcalc_shape_info get_shape() const { return pi->shape; }
+    inline point *get_next() { return (point *)next; }
+    inline const point *get_next() const { return (const point *)next; }
+    /* Compare the dx_dy parameters regarding the horiz_dir */
+    /* returns -1 if less, 0 if equal, 1 if bigger          */
+    static int cmp_dx_dy(const Gcalc_coord1 dx_a,
+                         const Gcalc_coord1 dy_a,
+                         const Gcalc_coord1 dx_b,
+                         const Gcalc_coord1 dy_b);
+    static int cmp_dx_dy(const Gcalc_heap::Info *p1,
+                         const Gcalc_heap::Info *p2,
+                         const Gcalc_heap::Info *p3,
+                         const Gcalc_heap::Info *p4);
+    int cmp_dx_dy(const point *p) const;
+    point **next_ptr() { return (point **) &next; }
+#ifndef GCALC_DBUG_OFF
+    unsigned int thread;
+#endif /*GCALC_DBUG_OFF*/
+#ifdef GCALC_CHECK_WITH_FLOAT
+    void calc_x(long double *x, long double y, long double ix) const;
+#endif /*GCALC_CHECK_WITH_FLOAT*/
+  };
+
+  /* That class introduced mostly for the 'typecontrol' reason.      */
+  /* only difference from the point classis the get_next() function. */
+  class event_point : public point
+  {
+  public:
+    inline const event_point *get_next() const
+    { return (const event_point*) ev_next; }
+    int simple_event() const
+    {
+      return !ev_next ? (event & (scev_point | scev_end)) : 
+        (!ev_next->ev_next && event == scev_two_ends);
+    }
+  };
+
+  class intersection_info : public Gcalc_dyn_list::Item
+  {
+  public:
+    point *edge_a;
+    point *edge_b;
+
+    Gcalc_coord2 t_a;
+    Gcalc_coord2 t_b;
+    int t_calculated;
+    Gcalc_coord3 x_exp;
+    int x_calculated;
+    Gcalc_coord3 y_exp;
+    int y_calculated;
+    void calc_t()
+    {if (!t_calculated) do_calc_t(); }
+    void calc_y_exp()
+    { if (!y_calculated) do_calc_y(); }
+    void calc_x_exp()
+    { if (!x_calculated) do_calc_x(); }
+
+    void do_calc_t();
+    void do_calc_x();
+    void do_calc_y();
+  };
+
+
+  class slice_state
+  {
+  public:
+    point *slice;
+    point **event_position_hook;
+    point *event_end;
+    const Gcalc_heap::Info *pi;
+  };
+
+public:
+  Gcalc_scan_iterator(size_t blk_size= 8192);
+
+  GCALC_DECL_TERMINATED_STATE(killed)
+
+  void init(Gcalc_heap *points); /* Iterator can be reused */
+  void reset();
+  int step();
+
+  Gcalc_heap::Info *more_points() { return m_cur_pi; }
+  bool more_trapezoids()
+    { return m_cur_pi && m_cur_pi->next; }
+
+  const point *get_bottom_points() const
+    { return m_bottom_points; }
+  const point *get_event_position() const
+    { return *state.event_position_hook; }
+  const point *get_event_end() const
+    { return state.event_end; }
+  const event_point *get_events() const
+    { return (const event_point *)
+        (*state.event_position_hook == state.event_end ?
+            m_bottom_points : *state.event_position_hook); }
+  const point *get_b_slice() const { return state.slice; }
+  double get_h() const;
+  double get_y() const;
+  double get_event_x() const;
+  double get_sp_x(const point *sp) const;
+  int intersection_step() const
+    { return state.pi->type == Gcalc_heap::nt_intersection; }
+  const Gcalc_heap::Info *get_cur_pi() const
+  {
+    return state.pi;
+  }
+
+private:
+  Gcalc_heap *m_heap;
+  Gcalc_heap::Info *m_cur_pi;
+  slice_state state;
+
+#ifndef GCALC_DBUG_OFF
+  unsigned int m_cur_thread;
+#endif /*GCALC_DBUG_OFF*/
+
+  point *m_bottom_points;
+  point **m_bottom_hook;
+
+  int node_scan();
+  void eq_scan();
+  void intersection_scan();
+  void remove_bottom_node();
+  int insert_top_node();
+  int add_intersection(point *sp_a, point *sp_b,
+                       Gcalc_heap::Info *pi_from);
+  int add_eq_node(Gcalc_heap::Info *node, point *sp);
+  int add_events_for_node(point *sp_node);
+
+  point *new_slice_point()
+  {
+    point *new_point= (point *)new_item();
+    return new_point;
+  }
+  intersection_info *new_intersection_info(point *a, point *b)
+  {
+    intersection_info *ii= (intersection_info *)new_item();
+    ii->edge_a= a;
+    ii->edge_b= b;
+    ii->t_calculated= ii->x_calculated= ii->y_calculated= 0;
+    return ii;
+  }
+  int arrange_event(int do_sorting, int n_intersections);
+  static double get_pure_double(const Gcalc_internal_coord *d, int d_len);
+};
+
+
+/* 
+   Gcalc_trapezoid_iterator simplifies the calculations on
+   the current slice of the Gcalc_scan_iterator.
+   One can walk through the trapezoids formed between
+   previous and current slices.
+*/
+
+#ifdef TMP_BLOCK
+class Gcalc_trapezoid_iterator
+{
+protected:
+  const Gcalc_scan_iterator::point *sp0;
+  const Gcalc_scan_iterator::point *sp1;
+public:
+  Gcalc_trapezoid_iterator(const Gcalc_scan_iterator *scan_i) :
+    sp0(scan_i->get_b_slice()),
+    sp1(scan_i->get_t_slice())
+    {}
+
+  inline bool more() const { return sp1 && sp1->next; }
+
+  const Gcalc_scan_iterator::point *lt() const { return sp1; }
+  const Gcalc_scan_iterator::point *lb() const { return sp0; }
+  const Gcalc_scan_iterator::point *rb() const
+  {
+    const Gcalc_scan_iterator::point *result= sp0;
+    while ((result= result->c_get_next())->is_bottom())
+    {}
+    return result;
+  }
+  const Gcalc_scan_iterator::point *rt() const
+    { return sp1->c_get_next(); }
+
+  void operator++()
+  {
+    sp0= rb();
+    sp1= rt();
+  }
+};
+#endif /*TMP_BLOCK*/
+
+
+/* 
+   Gcalc_point_iterator simplifies the calculations on
+   the current slice of the Gcalc_scan_iterator.
+   One can walk through the points on the current slice.
+*/
+
+class Gcalc_point_iterator
+{
+protected:
+  const Gcalc_scan_iterator::point *sp;
+public:
+  Gcalc_point_iterator(const Gcalc_scan_iterator *scan_i):
+    sp(scan_i->get_b_slice())
+    {}
+
+  inline bool more() const { return sp != NULL; }
+  inline void operator++() { sp= sp->c_get_next(); }
+  inline const Gcalc_scan_iterator::point *point() const { return sp; }
+  inline const Gcalc_heap::Info *get_pi() const { return sp->pi; }
+  inline gcalc_shape_info get_shape() const { return sp->get_shape(); }
+  inline void restart(const Gcalc_scan_iterator *scan_i)
+  { sp= scan_i->get_b_slice(); }
+};
+
+#endif /*GCALC_SLICESCAN_INCLUDED*/
+
diff --git a/sql/gcalc_tools.cc b/sql/gcalc_tools.cc
new file mode 100644
index 00000000000..729b322769c
--- /dev/null
+++ b/sql/gcalc_tools.cc
@@ -0,0 +1,1425 @@
+/* Copyright (c) 2000, 2010 Oracle and/or its affiliates. All rights reserved.
+   Copyright (C) 2011 Monty Program Ab.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
+
+
+#include "mysql_priv.h"
+
+#ifdef HAVE_SPATIAL
+
+#include "gcalc_tools.h"
+#include "spatial.h"
+
+#define float_to_coord(d) ((double) d)
+
+
+/*
+  Adds new shape to the relation.
+  After that it can be used as an argument of an operation.
+*/
+
+gcalc_shape_info Gcalc_function::add_new_shape(uint32 shape_id,
+                                               shape_type shape_kind)
+{
+  shapes_buffer.q_append((uint32) shape_kind);
+  return n_shapes++;
+}
+
+
+/*
+  Adds new operation to the constructed relation.
+  To construct the complex relation one has to specify operations
+  in prefix style.
+*/
+
+void Gcalc_function::add_operation(uint operation, uint32 n_operands)
+{
+  uint32 op_code= (uint32 ) operation + n_operands;
+  function_buffer.q_append(op_code);
+}
+
+
+/*
+  Sometimes the number of arguments is unknown at the moment the operation
+  is added. That allows to specify it later.
+*/
+
+void Gcalc_function::add_operands_to_op(uint32 operation_pos, uint32 n_operands)
+{
+  uint32 op_code= uint4korr(function_buffer.ptr() + operation_pos) + n_operands;
+  function_buffer.write_at_position(operation_pos, op_code);
+}
+
+
+/*
+  Just like the add_operation() but the result will be the inverted
+  value of an operation.
+*/
+
+void Gcalc_function::add_not_operation(op_type operation, uint32 n_operands)
+{
+  uint32 op_code= ((uint32) op_not | (uint32 ) operation) + n_operands;
+  function_buffer.q_append(op_code);
+}
+
+
+int Gcalc_function::single_shape_op(shape_type shape_kind, gcalc_shape_info *si)
+{
+  if (reserve_shape_buffer(1) || reserve_op_buffer(1))
+    return 1;
+  *si= add_new_shape(0, shape_kind);
+  add_operation(op_shape, *si);
+  return 0;
+}
+
+
+int Gcalc_function::repeat_expression(uint32 exp_pos)
+{
+  if (reserve_op_buffer(1))
+    return 1;
+  add_operation(op_repeat, exp_pos);
+  return 0;
+}
+
+
+/*
+  Specify how many arguments we're going to have.
+*/
+
+int Gcalc_function::reserve_shape_buffer(uint n_shapes)
+{
+  return shapes_buffer.reserve(n_shapes * 4, 512);
+}
+
+
+/*
+  Specify how many operations we're going to have.
+*/
+
+int Gcalc_function::reserve_op_buffer(uint n_ops)
+{
+  return function_buffer.reserve(n_ops * 4, 512);
+}
+
+
+int Gcalc_function::alloc_states()
+{
+  if (function_buffer.reserve((n_shapes+1) * 2 * sizeof(int)))
+    return 1;
+  i_states= (int *) (function_buffer.ptr() + ALIGN_SIZE(function_buffer.length()));
+  b_states= i_states + (n_shapes + 1);
+  return 0;
+}
+
+
+int Gcalc_function::count_internal(const char *cur_func, uint set_type,
+                                   const char **end)
+{
+  uint c_op= uint4korr(cur_func);
+  op_type next_func= (op_type) (c_op & op_any);
+  int mask= (c_op & op_not) ? 1:0;
+  uint n_ops= c_op & ~(op_any | op_not | v_mask);
+  uint n_shape= c_op & ~(op_any | op_not | v_mask); /* same as n_ops */
+  value v_state= (value) (c_op & v_mask);
+  int result= 0;
+  const char *sav_cur_func= cur_func;
+
+  // GCALC_DBUG_ENTER("Gcalc_function::count_internal");
+
+  cur_func+= 4;
+  if (next_func == op_shape)
+  {
+    if (set_type == 0)
+      result= i_states[n_shape] | b_states[n_shape];
+    else if (set_type == op_border)
+      result= b_states[n_shape];
+    else if (set_type == op_internals)
+      result= i_states[n_shape] && !b_states[n_shape];
+    goto exit;
+  }
+
+  if (next_func == op_false)
+  {
+    result= 0;
+    goto exit;
+  }
+
+  if (next_func == op_border || next_func == op_internals)
+  {
+    result= count_internal(cur_func, next_func, &cur_func);
+    goto exit;
+  }
+
+  if (next_func == op_repeat)
+  {
+    result= count_internal(function_buffer.ptr() + n_ops, set_type, 0);
+    goto exit;
+  }
+
+  if (n_ops == 0)
+    return mask;
+    //GCALC_DBUG_RETURN(mask);
+
+  result= count_internal(cur_func, set_type, &cur_func);
+
+  while (--n_ops)
+  {
+    int next_res= count_internal(cur_func, set_type, &cur_func);
+    switch (next_func)
+    {
+      case op_union:
+        result= result | next_res;
+        break;
+      case op_intersection:
+        result= result & next_res;
+        break;
+      case op_symdifference:
+        result= result ^ next_res;
+        break;
+      case op_difference:
+        result= result & !next_res;
+        break;
+      default:
+        GCALC_DBUG_ASSERT(FALSE);
+    };
+  }
+
+exit:
+  result^= mask;
+  if (v_state != v_empty)
+  {
+    switch (v_state)
+    {
+      case v_find_t:
+        if (result)
+        {
+          c_op= (c_op & ~v_mask) | v_t_found;
+          int4store(sav_cur_func, c_op);
+        };
+        break;
+      case v_find_f:
+        if (!result)
+        {
+          c_op= (c_op & ~v_mask) | v_f_found;
+          int4store(sav_cur_func, c_op);
+        };
+        break;
+      case v_t_found:
+        result= 1;
+        break;
+      case v_f_found:
+        result= 0;
+        break;
+      default:
+        GCALC_DBUG_ASSERT(0);
+    };
+  }
+  
+  if (end)
+    *end= cur_func;
+  return result;
+  //GCALC_DBUG_RETURN(result);
+}
+
+
+void Gcalc_function::clear_i_states()
+{
+  for (uint i= 0; i < n_shapes; i++)
+    i_states[i]= 0;
+}
+
+
+void Gcalc_function::clear_b_states()
+{
+  for (uint i= 0; i < n_shapes; i++)
+    b_states[i]= 0;
+}
+
+
+/*
+  Clear the state of the object.
+*/
+
+void Gcalc_function::reset()
+{
+  n_shapes= 0;
+  shapes_buffer.length(0);
+  function_buffer.length(0);
+}
+
+
+int Gcalc_function::check_function(Gcalc_scan_iterator &scan_it)
+{
+  const Gcalc_scan_iterator::point *eq_start, *cur_eq;
+  const Gcalc_scan_iterator::event_point *events;
+  GCALC_DBUG_ENTER("Gcalc_function::check_function");
+
+  while (scan_it.more_points())
+  {
+    if (scan_it.step())
+      GCALC_DBUG_RETURN(-1);
+    events= scan_it.get_events();
+
+    /* these kinds of events don't change the function */
+    Gcalc_point_iterator pit(&scan_it);
+    clear_b_states();
+    clear_i_states();
+    /* Walk to the event, marking polygons we met */
+    for (; pit.point() != scan_it.get_event_position(); ++pit)
+    {
+      gcalc_shape_info si= pit.point()->get_shape();
+      if ((get_shape_kind(si) == Gcalc_function::shape_polygon))
+        invert_i_state(si);
+    }
+    if (events->simple_event())
+    {
+      if (events->event == scev_end)
+        set_b_state(events->get_shape());
+
+      if (count())
+        GCALC_DBUG_RETURN(1);
+      clear_b_states();
+      continue;
+    }
+
+    /* Check the status of the event point */
+    for (; events; events= events->get_next())
+    {
+      gcalc_shape_info si= events->get_shape();
+      if (events->event == scev_thread ||
+          events->event == scev_end ||
+          events->event == scev_single_point ||
+          (get_shape_kind(si) == Gcalc_function::shape_polygon))
+        set_b_state(si);
+      else if (get_shape_kind(si) == Gcalc_function::shape_line)
+        set_i_state(si);
+    }
+
+    if (count())
+      GCALC_DBUG_RETURN(1);
+
+    /* Set back states changed in the loop above. */
+    for (events= scan_it.get_events(); events; events= events->get_next())
+    {
+      gcalc_shape_info si= events->get_shape();
+      if (events->event == scev_thread ||
+          events->event == scev_end ||
+          events->event == scev_single_point ||
+          (get_shape_kind(si) == Gcalc_function::shape_polygon))
+        clear_b_state(si);
+      else if (get_shape_kind(si) == Gcalc_function::shape_line)
+        clear_i_state(si);
+    }
+
+    if (scan_it.get_event_position() == scan_it.get_event_end())
+      continue;
+
+    /* Check the status after the event */
+    eq_start= pit.point();
+    do
+    {
+      ++pit;
+      if (pit.point() != scan_it.get_event_end() &&
+          eq_start->cmp_dx_dy(pit.point()) == 0)
+        continue;
+      for (cur_eq= eq_start; cur_eq != pit.point();
+          cur_eq= cur_eq->get_next())
+      {
+        gcalc_shape_info si= cur_eq->get_shape();
+        if (get_shape_kind(si) == Gcalc_function::shape_polygon)
+          set_b_state(si);
+        else
+          invert_i_state(si);
+      }
+      if (count())
+        GCALC_DBUG_RETURN(1);
+
+      for (cur_eq= eq_start; cur_eq != pit.point(); cur_eq= cur_eq->get_next())
+      {
+        gcalc_shape_info si= cur_eq->get_shape();
+        if ((get_shape_kind(si) == Gcalc_function::shape_polygon))
+        {
+          clear_b_state(si);
+          invert_i_state(si);
+        }
+        else
+          invert_i_state(cur_eq->get_shape());
+      }
+      if (count())
+        GCALC_DBUG_RETURN(1);
+      eq_start= pit.point();
+    } while (pit.point() != scan_it.get_event_end());
+  }
+  GCALC_DBUG_RETURN(0);
+}
+
+
+int Gcalc_operation_transporter::single_point(double x, double y)
+{
+  gcalc_shape_info si;
+  return m_fn->single_shape_op(Gcalc_function::shape_point, &si) ||
+         int_single_point(si, x, y);
+}
+
+
+int Gcalc_operation_transporter::start_line()
+{
+  int_start_line();
+  return m_fn->single_shape_op(Gcalc_function::shape_line, &m_si);
+}
+
+
+int Gcalc_operation_transporter::complete_line()
+{
+  int_complete_line();
+  return 0;
+}
+
+
+int Gcalc_operation_transporter::start_poly()
+{
+  int_start_poly();
+  return m_fn->single_shape_op(Gcalc_function::shape_polygon, &m_si);
+}
+
+
+int Gcalc_operation_transporter::complete_poly()
+{
+  int_complete_poly();
+  return 0;
+}
+
+
+int Gcalc_operation_transporter::start_ring()
+{
+  int_start_ring();
+  return 0;
+}
+
+
+int Gcalc_operation_transporter::complete_ring()
+{
+  int_complete_ring();
+  return 0;
+}
+
+
+int Gcalc_operation_transporter::add_point(double x, double y)
+{
+  return int_add_point(m_si, x, y);
+}
+
+
+int Gcalc_operation_transporter::start_collection(int n_objects)
+{
+  if (m_fn->reserve_shape_buffer(n_objects) || m_fn->reserve_op_buffer(1))
+        return 1;
+  m_fn->add_operation(Gcalc_function::op_union, n_objects);
+  return 0;
+}
+
+
+int Gcalc_operation_transporter::empty_shape()
+{
+  if (m_fn->reserve_op_buffer(1))
+        return 1;
+  m_fn->add_operation(Gcalc_function::op_false, 0);
+  return 0;
+}
+
+
+int Gcalc_result_receiver::start_shape(Gcalc_function::shape_type shape)
+{
+  GCALC_DBUG_ENTER("Gcalc_result_receiver::start_shape");
+  if (buffer.reserve(4*2, 512))
+    GCALC_DBUG_RETURN(1);
+  cur_shape= shape;
+  shape_pos= buffer.length();
+  buffer.length(shape_pos + ((shape == Gcalc_function::shape_point) ? 4:8));
+  n_points= 0;
+  shape_area= 0.0;
+
+  GCALC_DBUG_RETURN(0);
+}
+
+
+int Gcalc_result_receiver::add_point(double x, double y)
+{
+  GCALC_DBUG_ENTER("Gcalc_result_receiver::add_point");
+  if (n_points && x == prev_x && y == prev_y)
+    GCALC_DBUG_RETURN(0);
+
+  if (!n_points++)
+  {
+    prev_x= first_x= x;
+    prev_y= first_y= y;
+    GCALC_DBUG_RETURN(0);
+  }
+
+  shape_area+= prev_x*y - prev_y*x;
+
+  if (buffer.reserve(8*2, 512))
+    GCALC_DBUG_RETURN(1);
+  buffer.q_append(prev_x);
+  buffer.q_append(prev_y);
+  prev_x= x;
+  prev_y= y;
+  GCALC_DBUG_RETURN(0);
+}
+
+
+int Gcalc_result_receiver::complete_shape()
+{
+  GCALC_DBUG_ENTER("Gcalc_result_receiver::complete_shape");
+  if (n_points == 0)
+  {
+    buffer.length(shape_pos);
+    GCALC_DBUG_RETURN(0);
+  }
+  if (n_points == 1)
+  {
+    if (cur_shape != Gcalc_function::shape_point)
+    {
+      if (cur_shape == Gcalc_function::shape_hole)
+      {
+        buffer.length(shape_pos);
+        GCALC_DBUG_RETURN(0);
+      }
+      cur_shape= Gcalc_function::shape_point;
+      buffer.length(buffer.length()-4);
+    }
+  }
+  else
+  {
+    GCALC_DBUG_ASSERT(cur_shape != Gcalc_function::shape_point);
+    if (cur_shape == Gcalc_function::shape_hole)
+    {
+      shape_area+= prev_x*first_y - prev_y*first_x;
+      if (fabs(shape_area) < 1e-8)
+      {
+        buffer.length(shape_pos);
+        GCALC_DBUG_RETURN(0);
+      }
+    }
+
+    if ((cur_shape == Gcalc_function::shape_polygon ||
+          cur_shape == Gcalc_function::shape_hole) &&
+        prev_x == first_x && prev_y == first_y)
+    {
+      n_points--;
+      buffer.write_at_position(shape_pos+4, n_points);
+      goto do_complete;
+    }
+    buffer.write_at_position(shape_pos+4, n_points);
+  }
+
+  if (buffer.reserve(8*2, 512))
+    GCALC_DBUG_RETURN(1);
+  buffer.q_append(prev_x);
+  buffer.q_append(prev_y);
+  
+do_complete:
+  buffer.write_at_position(shape_pos, (uint32) cur_shape);
+
+  if (!n_shapes++)
+  {
+    GCALC_DBUG_ASSERT(cur_shape != Gcalc_function::shape_hole);
+    common_shapetype= cur_shape;
+  }
+  else if (cur_shape == Gcalc_function::shape_hole)
+  {
+    ++n_holes;
+  }
+  else if (!collection_result && (cur_shape != common_shapetype))
+  {
+      collection_result= true;
+  }
+  GCALC_DBUG_RETURN(0);
+}
+
+
+int Gcalc_result_receiver::single_point(double x, double y)
+{
+  return start_shape(Gcalc_function::shape_point) ||
+         add_point(x, y) ||
+         complete_shape();
+}
+
+
+int Gcalc_result_receiver::done()
+{
+  return 0;
+}
+
+
+void Gcalc_result_receiver::reset()
+{
+  buffer.length(0);
+  collection_result= FALSE;
+  n_shapes= n_holes= 0;
+}
+
+
+int Gcalc_result_receiver::get_result_typeid()
+{
+  if (!n_shapes || collection_result)
+    return Geometry::wkb_geometrycollection;
+
+  switch (common_shapetype)
+  {
+    case Gcalc_function::shape_polygon:
+      return (n_shapes - n_holes == 1) ?
+              Geometry::wkb_polygon : Geometry::wkb_multipolygon;
+    case Gcalc_function::shape_point:
+      return (n_shapes == 1) ? Geometry::wkb_point : Geometry::wkb_multipoint;
+    case Gcalc_function::shape_line:
+      return (n_shapes == 1) ? Geometry::wkb_linestring :
+                               Geometry::wkb_multilinestring;
+    default:
+      GCALC_DBUG_ASSERT(0);
+  }
+  return 0;
+}
+
+
+int Gcalc_result_receiver::move_hole(uint32 dest_position, uint32 source_position,
+                                     uint32 *position_shift)
+{
+  char *ptr;
+  int source_len;
+  GCALC_DBUG_ENTER("Gcalc_result_receiver::move_hole");
+  GCALC_DBUG_PRINT(("ps %d %d", dest_position, source_position));
+
+  *position_shift= source_len= buffer.length() - source_position;
+
+  if (dest_position == source_position)
+    GCALC_DBUG_RETURN(0);
+
+  if (buffer.reserve(source_len, MY_ALIGN(source_len, 512)))
+    GCALC_DBUG_RETURN(1);
+
+  ptr= (char *) buffer.ptr();
+  memmove(ptr + dest_position + source_len, ptr + dest_position,
+          buffer.length() - dest_position);
+  memcpy(ptr + dest_position, ptr + buffer.length(), source_len);
+  GCALC_DBUG_RETURN(0);
+}
+
+
+Gcalc_operation_reducer::Gcalc_operation_reducer(size_t blk_size) :
+  Gcalc_dyn_list(blk_size, sizeof(res_point)),
+#ifndef GCALC_DBUG_OFF
+  n_res_points(0),
+#endif /*GCALC_DBUG_OFF*/
+  m_res_hook((Gcalc_dyn_list::Item **)&m_result),
+  m_first_active_thread(NULL)
+{}
+
+
+void Gcalc_operation_reducer::init(Gcalc_function *fn, modes mode)
+{
+  m_fn= fn;
+  m_mode= mode;
+  m_first_active_thread= NULL;
+  m_lines= NULL;
+  m_lines_hook= (Gcalc_dyn_list::Item **) &m_lines;
+  m_poly_borders= NULL;
+  m_poly_borders_hook= (Gcalc_dyn_list::Item **) &m_poly_borders;
+  GCALC_SET_TERMINATED(killed, 0);
+}
+
+
+Gcalc_operation_reducer::
+Gcalc_operation_reducer(Gcalc_function *fn, modes mode, size_t blk_size) :
+  Gcalc_dyn_list(blk_size, sizeof(res_point)),
+  m_res_hook((Gcalc_dyn_list::Item **)&m_result)
+{
+  init(fn, mode);
+}
+
+
+void Gcalc_operation_reducer::res_point::set(const Gcalc_scan_iterator *si)
+{
+  intersection_point= si->intersection_step();
+  pi= si->get_cur_pi();
+}
+
+
+Gcalc_operation_reducer::res_point *
+  Gcalc_operation_reducer::add_res_point(Gcalc_function::shape_type type)
+{
+  GCALC_DBUG_ENTER("Gcalc_operation_reducer::add_res_point");
+  res_point *result= (res_point *)new_item();
+  *m_res_hook= result;
+  result->prev_hook= m_res_hook;
+  m_res_hook= &result->next;
+  result->type= type;
+#ifndef GCALC_DBUG_OFF
+  result->point_n= n_res_points++;
+#endif /*GCALC_DBUG_OFF*/
+  GCALC_DBUG_RETURN(result);
+}
+
+int Gcalc_operation_reducer::add_line(int incoming, active_thread *t,
+    const Gcalc_scan_iterator::point *p)
+{
+  line *l= new_line();
+  GCALC_DBUG_ENTER("Gcalc_operation_reducer::add_line");
+  if (!l)
+    GCALC_DBUG_RETURN(1);
+  l->incoming= incoming;
+  l->t= t;
+  l->p= p;
+  *m_lines_hook= l;
+  m_lines_hook= &l->next;
+  GCALC_DBUG_RETURN(0);
+}
+
+
+int Gcalc_operation_reducer::add_poly_border(int incoming,
+    active_thread *t, int prev_state, const Gcalc_scan_iterator::point *p)
+{
+  poly_border *b= new_poly_border();
+  GCALC_DBUG_ENTER("Gcalc_operation_reducer::add_poly_border");
+  if (!b)
+    GCALC_DBUG_RETURN(1);
+  b->incoming= incoming;
+  b->t= t;
+  b->prev_state= prev_state;
+  b->p= p;
+  *m_poly_borders_hook= b;
+  m_poly_borders_hook= &b->next;
+  GCALC_DBUG_RETURN(0);
+}
+
+
+int Gcalc_operation_reducer::continue_range(active_thread *t,
+                                            const Gcalc_heap::Info *p,
+                                            const Gcalc_heap::Info *p_next)
+{
+  res_point *rp= add_res_point(t->rp->type);
+  GCALC_DBUG_ENTER("Gcalc_operation_reducer::continue_range");
+  if (!rp)
+    GCALC_DBUG_RETURN(1);
+  rp->glue= NULL;
+  rp->down= t->rp;
+  t->rp->up= rp;
+  rp->intersection_point= false;
+  rp->pi= p;
+  t->rp= rp;
+  t->p1= p;
+  t->p2= p_next;
+  GCALC_DBUG_RETURN(0);
+}
+
+
+inline int Gcalc_operation_reducer::continue_i_range(active_thread *t,
+			            const Gcalc_heap::Info *ii)
+{
+  res_point *rp= add_res_point(t->rp->type);
+  GCALC_DBUG_ENTER("Gcalc_operation_reducer::continue_i_range");
+  if (!rp)
+    GCALC_DBUG_RETURN(1);
+  rp->glue= NULL;
+  rp->down= t->rp;
+  t->rp->up= rp;
+  rp->intersection_point= true;
+  rp->pi= ii;
+  t->rp= rp;
+  GCALC_DBUG_RETURN(0);
+}
+
+int Gcalc_operation_reducer::end_couple(active_thread *t0, active_thread *t1,
+				     const Gcalc_heap::Info *p)
+{
+  res_point *rp0, *rp1;
+  GCALC_DBUG_ENTER("Gcalc_operation_reducer::end_couple");
+  GCALC_DBUG_ASSERT(t0->rp->type == t1->rp->type);
+  if (!(rp0= add_res_point(t0->rp->type)) ||
+      !(rp1= add_res_point(t0->rp->type)))
+    GCALC_DBUG_RETURN(1);
+  rp0->down= t0->rp;
+  rp1->down= t1->rp;
+  rp1->glue= rp0;
+  rp0->glue= rp1;
+  rp0->up= rp1->up= NULL;
+  t0->rp->up= rp0;
+  t1->rp->up= rp1;
+  rp0->intersection_point= rp1->intersection_point= false;
+  rp0->pi= rp1->pi= p;
+  GCALC_DBUG_RETURN(0);
+}
+
+
+int Gcalc_operation_reducer::count_slice(Gcalc_scan_iterator *si)
+{
+  Gcalc_point_iterator pi(si);
+  int prev_state= 0;
+  int sav_prev_state;
+  active_thread *prev_range= NULL;
+  const Gcalc_scan_iterator::event_point *events;
+  const Gcalc_scan_iterator::point *eq_start;
+  active_thread **cur_t_hook= &m_first_active_thread;
+  active_thread **starting_t_hook;
+  active_thread *bottom_threads= NULL;
+  active_thread *eq_thread, *point_thread;;
+  GCALC_DBUG_ENTER("Gcalc_operation_reducer::count_slice");
+
+  m_fn->clear_i_states();
+  /* Walk to the event, remembering what is needed. */
+  for (; pi.point() != si->get_event_position();
+       ++pi, cur_t_hook= (active_thread **) &(*cur_t_hook)->next)
+  {
+    active_thread *cur_t= *cur_t_hook;
+    if (cur_t->enabled() &&
+        cur_t->rp->type == Gcalc_function::shape_polygon)
+    {
+      prev_state^= 1;
+      prev_range= prev_state ? cur_t : 0;
+    }
+    if (m_fn->get_shape_kind(pi.get_shape()) == Gcalc_function::shape_polygon)
+      m_fn->invert_i_state(pi.get_shape());
+  }
+
+  events= si->get_events();
+  if (events->simple_event())
+  {
+    active_thread *cur_t= *cur_t_hook;
+    switch (events->event)
+    {
+      case scev_point:
+      {
+        if (cur_t->enabled() &&
+            continue_range(cur_t, events->pi, events->next_pi))
+          GCALC_DBUG_RETURN(1);
+        break;
+      }
+      case scev_end:
+      {
+        if (cur_t->enabled() && end_line(cur_t, si))
+          GCALC_DBUG_RETURN(1);
+        *cur_t_hook= cur_t->get_next();
+        free_item(cur_t);
+        break;
+      }
+      case scev_two_ends:
+      {
+        if (cur_t->enabled() && cur_t->get_next()->enabled())
+        {
+          /* When two threads are ended here */
+          if (end_couple(cur_t, cur_t->get_next(), events->pi))
+            GCALC_DBUG_RETURN(1);
+        }
+        else if (cur_t->enabled() || cur_t->get_next()->enabled())
+        {
+          /* Rare case when edges of a polygon coincide */
+          if (end_line(cur_t->enabled() ? cur_t : cur_t->get_next(), si))
+            GCALC_DBUG_RETURN(1);
+        }
+        *cur_t_hook= cur_t->get_next()->get_next();
+        free_item(cur_t->next);
+        free_item(cur_t);
+        break;
+      }
+      default:
+        GCALC_DBUG_ASSERT(0);
+    }
+    GCALC_DBUG_RETURN(0);
+  }
+
+  starting_t_hook= cur_t_hook;
+  sav_prev_state= prev_state;
+
+  /* Walk through the event, collecting all the 'incoming' threads */
+  for (; events; events= events->get_next())
+  {
+    active_thread *cur_t= *cur_t_hook;
+
+    if (events->event == scev_single_point)
+      continue;
+
+    if (events->event == scev_thread ||
+        events->event == scev_two_threads)
+    {
+      active_thread *new_t= new_active_thread();
+      if (!new_t)
+        GCALC_DBUG_RETURN(1);
+      new_t->rp= NULL;
+      /* Insert into the main thread list before the current */
+      new_t->next= cur_t;
+      *cur_t_hook= new_t;
+      cur_t_hook= (active_thread **) &new_t->next;
+    }
+    else
+    {
+      if (events->is_bottom())
+      {
+        /* Move thread from the main list to the bottom_threads. */
+        *cur_t_hook= cur_t->get_next();
+        cur_t->next= bottom_threads;
+        bottom_threads= cur_t;
+      }
+      if (cur_t->enabled())
+      {
+        if (cur_t->rp->type == Gcalc_function::shape_line)
+        {
+          GCALC_DBUG_ASSERT(!prev_state);
+          add_line(1, cur_t, events);
+        }
+        else
+        {
+          add_poly_border(1, cur_t, prev_state, events);
+          prev_state^= 1;
+        }
+        if (!events->is_bottom())
+        {
+          active_thread *new_t= new_active_thread();
+          if (!new_t)
+            GCALC_DBUG_RETURN(1);
+          new_t->rp= NULL;
+          /* Replace the current thread with the new. */
+          new_t->next= cur_t->next;
+          *cur_t_hook= new_t;
+          cur_t_hook= (active_thread **) &new_t->next;
+          /* And move old to the bottom list */
+          cur_t->next= bottom_threads;
+          bottom_threads= cur_t;
+        }
+      }
+      else if (!events->is_bottom())
+        cur_t_hook= (active_thread **) &cur_t->next;
+    }
+  }
+  prev_state= sav_prev_state;
+  cur_t_hook= starting_t_hook;
+
+  eq_start= pi.point();
+  eq_thread= point_thread= *starting_t_hook;
+  m_fn->clear_b_states();
+  while (eq_start != si->get_event_end())
+  {
+    const Gcalc_scan_iterator::point *cur_eq;
+    int in_state, after_state;
+
+    ++pi;
+    point_thread= point_thread->get_next();
+
+    if (pi.point() != si->get_event_end() &&
+        eq_start->cmp_dx_dy(pi.point()) == 0)
+      continue;
+
+    for (cur_eq= eq_start; cur_eq != pi.point(); cur_eq= cur_eq->get_next())
+      m_fn->set_b_state(cur_eq->get_shape());
+    in_state= m_fn->count();
+
+    m_fn->clear_b_states();
+    for (cur_eq= eq_start; cur_eq != pi.point(); cur_eq= cur_eq->get_next())
+    {
+      gcalc_shape_info si= cur_eq->get_shape();
+      if ((m_fn->get_shape_kind(si) == Gcalc_function::shape_polygon))
+        m_fn->invert_i_state(si);
+    }
+    after_state= m_fn->count();
+    if (prev_state != after_state)
+    {
+      if (add_poly_border(0, eq_thread, prev_state, eq_start))
+        GCALC_DBUG_RETURN(1);
+    }
+    else if (!prev_state /* &&!after_state */ && in_state)
+    {
+      if (add_line(0, eq_thread, eq_start))
+        GCALC_DBUG_RETURN(1);
+    }
+
+    prev_state= after_state;
+    eq_start= pi.point();
+    eq_thread= point_thread;
+  }
+
+  if (!sav_prev_state && !m_poly_borders && !m_lines)
+  {
+    /* Check if we need to add the event point itself */
+    m_fn->clear_i_states();
+    /* b_states supposed to be clean already */
+    for (pi.restart(si); pi.point() != si->get_event_position(); ++pi)
+    {
+      if (m_fn->get_shape_kind(pi.get_shape()) == Gcalc_function::shape_polygon)
+        m_fn->invert_i_state(pi.get_shape());
+    }
+    for (events= si->get_events(); events; events= events->get_next())
+      m_fn->set_b_state(events->get_shape());
+
+    GCALC_DBUG_RETURN(m_fn->count() ? add_single_point(si) : 0);
+  }
+
+  if (m_poly_borders)
+  {
+    *m_poly_borders_hook= NULL;
+    while (m_poly_borders)
+    {
+      poly_border *pb1, *pb2;
+      pb1= m_poly_borders;
+      GCALC_DBUG_ASSERT(m_poly_borders->next);
+
+      pb2= get_pair_border(pb1);
+      /* Remove pb1 from the list. The pb2 already removed in get_pair_border. */
+      m_poly_borders= pb1->get_next();
+      if (connect_threads(pb1->incoming, pb2->incoming,
+                          pb1->t, pb2->t, pb1->p, pb2->p,
+                          prev_range, si, Gcalc_function::shape_polygon))
+        GCALC_DBUG_RETURN(1);
+
+      free_item(pb1);
+      free_item(pb2);
+    }
+    m_poly_borders_hook= (Gcalc_dyn_list::Item **) &m_poly_borders;
+    m_poly_borders= NULL;
+  }
+
+  if (m_lines)
+  {
+    *m_lines_hook= NULL;
+    if (m_lines->get_next() &&
+        !m_lines->get_next()->get_next())
+    {
+      if (connect_threads(m_lines->incoming, m_lines->get_next()->incoming,
+                          m_lines->t, m_lines->get_next()->t,
+                          m_lines->p, m_lines->get_next()->p,
+                          NULL, si, Gcalc_function::shape_line))
+        GCALC_DBUG_RETURN(1);
+    }
+    else
+    {
+      for (line *cur_line= m_lines; cur_line; cur_line= cur_line->get_next())
+      {
+        if (cur_line->incoming)
+        {
+          if (end_line(cur_line->t, si))
+            GCALC_DBUG_RETURN(1);
+        }
+        else
+          start_line(cur_line->t, cur_line->p, si);
+      }
+    }
+    free_list(m_lines);
+    m_lines= NULL;
+    m_lines_hook= (Gcalc_dyn_list::Item **) &m_lines;
+  }
+
+  if (bottom_threads)
+    free_list(bottom_threads);
+
+  GCALC_DBUG_RETURN(0);
+}
+
+
+int Gcalc_operation_reducer::add_single_point(const Gcalc_scan_iterator *si)
+{
+  res_point *rp= add_res_point(Gcalc_function::shape_point);
+  GCALC_DBUG_ENTER("Gcalc_operation_reducer::add_single_point");
+  if (!rp)
+    GCALC_DBUG_RETURN(1);
+  rp->glue= rp->up= rp->down= NULL;
+  rp->set(si);
+  GCALC_DBUG_RETURN(0);
+}
+
+
+Gcalc_operation_reducer::poly_border
+  *Gcalc_operation_reducer::get_pair_border(poly_border *b1)
+{
+  poly_border *prev_b= b1;
+  poly_border *result= b1->get_next();
+  GCALC_DBUG_ENTER("Gcalc_operation_reducer::get_pair_border");
+  if (b1->prev_state)
+  {
+    if (b1->incoming)
+    {
+      /* Find the first outgoing, otherwise the last one. */
+      while (result->incoming && result->get_next())
+      {
+        prev_b= result;
+        result= result->get_next();
+      }
+    }
+    else
+    {
+      /* Get the last one */
+      while (result->get_next())
+      {
+        prev_b= result;
+        result= result->get_next();
+      }
+    }
+  }
+  else /* !b1->prev_state */
+  {
+    if (b1->incoming)
+    {
+      /* Get the next incoming, otherwise the last one. */
+      while (!result->incoming && result->get_next())
+      {
+        prev_b= result;
+        result= result->get_next();
+      }
+    }
+    else
+    {
+      /* Just pick the next one */
+    }
+  }
+  /* Delete the result from the list. */
+  prev_b->next= result->next;
+  GCALC_DBUG_RETURN(result);
+}
+
+
+int Gcalc_operation_reducer::connect_threads(
+    int incoming_a, int incoming_b,
+    active_thread *ta, active_thread *tb,
+    const Gcalc_scan_iterator::point *pa, const Gcalc_scan_iterator::point *pb,
+    active_thread *prev_range,
+    const Gcalc_scan_iterator *si, Gcalc_function::shape_type s_t)
+{
+  GCALC_DBUG_ENTER("Gcalc_operation_reducer::connect_threads");
+  GCALC_DBUG_PRINT(("incoming %d %d", incoming_a, incoming_b));
+  if (incoming_a && incoming_b)
+  {
+    res_point *rpa, *rpb;
+    GCALC_DBUG_ASSERT(ta->rp->type == tb->rp->type);
+    if (!(rpa= add_res_point(ta->rp->type)) ||
+        !(rpb= add_res_point(ta->rp->type)))
+      GCALC_DBUG_RETURN(1);
+    rpa->down= ta->rp;
+    rpb->down= tb->rp;
+    rpb->glue= rpa;
+    rpa->glue= rpb;
+    rpa->up= rpb->up= NULL;
+    ta->rp->up= rpa;
+    tb->rp->up= rpb;
+    rpa->set(si);
+    rpb->set(si);
+    ta->rp= tb->rp= NULL;
+    GCALC_DBUG_RETURN(0);
+  }
+  if (!incoming_a)
+  {
+    GCALC_DBUG_ASSERT(!incoming_b);
+
+    res_point *rp0, *rp1;
+    if (!(rp0= add_res_point(s_t)) || !(rp1= add_res_point(s_t)))
+      GCALC_DBUG_RETURN(1);
+    rp0->glue= rp1;
+    rp1->glue= rp0;
+    rp0->set(si);
+    rp1->set(si);
+    rp0->down= rp1->down= NULL;
+    ta->rp= rp0;
+    tb->rp= rp1;
+    ta->p1= pa->pi;
+    ta->p2= pa->next_pi;
+
+    tb->p1= pb->pi;
+    tb->p2= pb->next_pi;
+
+    if (prev_range)
+    {
+      rp0->outer_poly= prev_range->thread_start;
+      tb->thread_start= prev_range->thread_start;
+      /* Chack if needed */
+      ta->thread_start= prev_range->thread_start;
+    }
+    else
+    {
+      rp0->outer_poly= 0;
+      ta->thread_start= rp0;
+      /* Chack if needed */
+      tb->thread_start= rp0;
+    }
+    GCALC_DBUG_RETURN(0);
+  }
+  /* else, if only ta is incoming */
+
+  GCALC_DBUG_ASSERT(tb != ta);
+  tb->rp= ta->rp;
+  tb->thread_start= ta->thread_start;
+  if (Gcalc_scan_iterator::point::
+      cmp_dx_dy(ta->p1, ta->p2, pb->pi, pb->next_pi) != 0)
+  {
+    if (si->intersection_step() ?
+          continue_i_range(tb, si->get_cur_pi()) :
+          continue_range(tb, si->get_cur_pi(), pb->next_pi))
+      GCALC_DBUG_RETURN(1);
+  }
+  tb->p1= pb->pi;
+  tb->p2= pb->next_pi;
+
+  GCALC_DBUG_RETURN(0);
+}
+
+
+int Gcalc_operation_reducer::start_line(active_thread *t,
+                                        const Gcalc_scan_iterator::point *p,
+                                        const Gcalc_scan_iterator *si)
+{
+  res_point *rp= add_res_point(Gcalc_function::shape_line);
+  GCALC_DBUG_ENTER("Gcalc_operation_reducer::start_line");
+  if (!rp)
+    GCALC_DBUG_RETURN(1);
+  rp->glue= rp->down= NULL;
+  rp->set(si);
+  t->rp= rp;
+  t->p1= p->pi;
+  t->p2= p->next_pi;
+  GCALC_DBUG_RETURN(0);
+}
+
+
+int Gcalc_operation_reducer::end_line(active_thread *t,
+                                      const Gcalc_scan_iterator *si)
+{
+  GCALC_DBUG_ENTER("Gcalc_operation_reducer::end_line");
+  GCALC_DBUG_ASSERT(t->rp->type == Gcalc_function::shape_line);
+  res_point *rp= add_res_point(Gcalc_function::shape_line);
+  if (!rp)
+    GCALC_DBUG_RETURN(1);
+  rp->glue= rp->up= NULL;
+  rp->down= t->rp;
+  rp->set(si);
+  t->rp->up= rp;
+  t->rp= NULL;
+
+  GCALC_DBUG_RETURN(0);
+}
+
+
+int Gcalc_operation_reducer::count_all(Gcalc_heap *hp)
+{
+  Gcalc_scan_iterator si;
+  GCALC_DBUG_ENTER("Gcalc_operation_reducer::count_all");
+  si.init(hp);
+  GCALC_SET_TERMINATED(si.killed, killed);
+  while (si.more_points())
+  {
+    if (si.step())
+      GCALC_DBUG_RETURN(1);
+    if (count_slice(&si))
+      GCALC_DBUG_RETURN(1);
+  }
+  GCALC_DBUG_RETURN(0);
+}
+
+inline void Gcalc_operation_reducer::free_result(res_point *res)
+{
+  if ((*res->prev_hook= res->next))
+  {
+    res->get_next()->prev_hook= res->prev_hook;
+  }
+  free_item(res);
+}
+
+
+inline int Gcalc_operation_reducer::get_single_result(res_point *res,
+						   Gcalc_result_receiver *storage)
+{
+  GCALC_DBUG_ENTER("Gcalc_operation_reducer::get_single_result");
+  if (res->intersection_point)
+  {
+    double x, y;
+    res->pi->calc_xy(&x, &y);
+    if (storage->single_point(x,y))
+      GCALC_DBUG_RETURN(1);
+  }
+  else
+    if (storage->single_point(res->pi->x, res->pi->y))
+      GCALC_DBUG_RETURN(1);
+  free_result(res);
+  GCALC_DBUG_RETURN(0);
+}
+
+
+int Gcalc_operation_reducer::get_result_thread(res_point *cur,
+                                               Gcalc_result_receiver *storage,
+                                               int move_upward,
+                                               res_point *first_poly_node)
+{
+  res_point *next;
+  bool glue_step= false;
+  double x, y;
+  GCALC_DBUG_ENTER("Gcalc_operation_reducer::get_result_thread");
+  while (cur)
+  {
+    if (!glue_step)
+    {
+      if (cur->intersection_point)
+      {
+        cur->pi->calc_xy(&x, &y);
+      }
+      else
+      {
+	x= cur->pi->x;
+        y= cur->pi->y;
+      }
+      if (storage->add_point(x, y))
+        GCALC_DBUG_RETURN(1);
+    }
+    
+    next= move_upward ? cur->up : cur->down;
+    if (!next && !glue_step)
+    {
+      next= cur->glue;
+      move_upward^= 1;
+      glue_step= true;
+      if (next)
+	next->glue= NULL;
+    }
+    else
+      glue_step= false;
+
+    cur->first_poly_node= first_poly_node;
+    free_result(cur);
+    cur= next;
+  }
+  GCALC_DBUG_RETURN(0);
+}
+
+
+int Gcalc_operation_reducer::get_polygon_result(res_point *cur,
+                                                Gcalc_result_receiver *storage,
+                                                res_point *first_poly_node)
+{
+  GCALC_DBUG_ENTER("Gcalc_operation_reducer::get_polygon_result");
+  res_point *glue= cur->glue;
+  glue->up->down= NULL;
+  free_result(glue);
+  GCALC_DBUG_RETURN(get_result_thread(cur, storage, 1, first_poly_node) ||
+                    storage->complete_shape());
+}
+
+
+int Gcalc_operation_reducer::get_line_result(res_point *cur,
+                                             Gcalc_result_receiver *storage)
+{
+  res_point *next;
+  res_point *cur_orig= cur;
+  int move_upward= 1;
+  GCALC_DBUG_ENTER("Gcalc_operation_reducer::get_line_result");
+  if (cur->glue)
+  {
+    /* Here we have to find the beginning of the line */
+    next= cur->up;
+    move_upward= 1;
+    while (next)
+    {
+      cur= next;
+      next= move_upward ? next->up : next->down;
+      if (!next)
+      {
+	next= cur->glue;
+        if (next == cur_orig)
+        {
+          /* It's the line loop */
+          cur= cur_orig;
+          cur->glue->glue= NULL;
+          move_upward= 1;
+          break;
+        }
+	move_upward^= 1;
+      }
+    }
+  }
+
+  GCALC_DBUG_RETURN(get_result_thread(cur, storage, move_upward, 0) ||
+                    storage->complete_shape());
+}
+
+
+int Gcalc_operation_reducer::get_result(Gcalc_result_receiver *storage)
+{
+  poly_instance *polygons= NULL;
+
+  GCALC_DBUG_ENTER("Gcalc_operation_reducer::get_result");
+  *m_res_hook= NULL;
+
+  /* This is to workaround an old gcc's bug */
+  if (m_res_hook == (Gcalc_dyn_list::Item **) &m_result)
+    goto done;
+
+  while (m_result)
+  {
+    Gcalc_function::shape_type shape= m_result->type;
+    if (shape == Gcalc_function::shape_point)
+    {
+      if (get_single_result(m_result, storage))
+        GCALC_DBUG_RETURN(1);
+      continue;
+    }
+    if (shape == Gcalc_function::shape_polygon)
+    {
+      if (m_result->outer_poly)
+      {
+        uint32 insert_position, hole_position, position_shift;
+        poly_instance *cur_poly;
+        insert_position= m_result->outer_poly->first_poly_node->poly_position;
+        GCALC_DBUG_ASSERT(insert_position);
+        hole_position= storage->position();
+        storage->start_shape(Gcalc_function::shape_hole);
+        if (get_polygon_result(m_result, storage,
+                               m_result->outer_poly->first_poly_node) ||
+            storage->move_hole(insert_position, hole_position,
+                               &position_shift))
+          GCALC_DBUG_RETURN(1);
+        for (cur_poly= polygons;
+             cur_poly && *cur_poly->after_poly_position >= insert_position;
+             cur_poly= cur_poly->get_next())
+          *cur_poly->after_poly_position+= position_shift;
+      }
+      else
+      {
+        uint32 *poly_position= &m_result->poly_position;
+        poly_instance *p= new_poly();
+        p->after_poly_position= poly_position;
+        p->next= polygons;
+        polygons= p;
+        storage->start_shape(Gcalc_function::shape_polygon);
+        if (get_polygon_result(m_result, storage, m_result))
+          GCALC_DBUG_RETURN(1);
+        *poly_position= storage->position();
+      }
+    }
+    else
+    {
+      storage->start_shape(shape);
+      if (get_line_result(m_result, storage))
+        GCALC_DBUG_RETURN(1);
+    }
+  }
+  
+done:
+  m_res_hook= (Gcalc_dyn_list::Item **)&m_result;
+  storage->done();
+  GCALC_DBUG_RETURN(0);
+}
+
+
+void Gcalc_operation_reducer::reset()
+{
+  free_list((Gcalc_heap::Item **) &m_result, m_res_hook);
+  m_res_hook= (Gcalc_dyn_list::Item **)&m_result;
+  free_list(m_first_active_thread);
+}
+
+#endif /*HAVE_SPATIAL*/
+
diff --git a/sql/gcalc_tools.h b/sql/gcalc_tools.h
new file mode 100644
index 00000000000..eed9be0803f
--- /dev/null
+++ b/sql/gcalc_tools.h
@@ -0,0 +1,347 @@
+/* Copyright (c) 2000, 2010 Oracle and/or its affiliates. All rights reserved.
+   Copyright (C) 2011 Monty Program Ab.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
+
+
+#ifndef GCALC_TOOLS_INCLUDED
+#define GCALC_TOOLS_INCLUDED
+
+#include "gcalc_slicescan.h"
+
+
+/*
+  The Gcalc_function class objects are used to check for a binary relation.
+  The relation can be constructed with the prefix notation using predicates as
+        op_not (as !A)
+        op_union ( A || B || C... )
+        op_intersection ( A && B && C ... )
+        op_symdifference ( A+B+C+... == 1 )
+        op_difference ( A && !(B||C||..))
+  with the calls of the add_operation(operation, n_operands) method.
+  The relation is calculated over a set of shapes, that in turn have
+  to be added with the add_new_shape() method. All the 'shapes' can
+  be set to 0 with clear_shapes() method and single value
+  can be changed with the invert_state() method.
+  Then the value of the relation can be calculated with the count() method.
+  Frequently used method is find_function(Gcalc_scan_iterator it) that
+  iterates through the 'it' until the relation becomes TRUE.
+*/
+
+class Gcalc_function
+{
+private:
+  String shapes_buffer;
+  String function_buffer;
+  int *i_states;
+  int *b_states;
+  uint32 cur_object_id;
+  uint n_shapes;
+  int count_internal(const char *cur_func, uint set_type,
+                     const char **end);
+public:
+  enum value
+  {
+    v_empty=   0x0000000,
+    v_find_t=  0x1000000,
+    v_find_f=  0x2000000,
+    v_t_found= 0x3000000,
+    v_f_found= 0x4000000,
+    v_mask=    0x7000000
+  };
+  enum op_type
+  {
+    op_not=           0x80000000,
+    op_shape=         0x00000000,
+    op_union=         0x10000000,
+    op_intersection=  0x20000000,
+    op_symdifference= 0x30000000,
+    op_difference=    0x40000000,
+    op_repeat=        0x50000000,
+    op_border=        0x60000000,
+    op_internals=     0x70000000,
+    op_false=         0x08000000,
+    op_any=           0x78000000 /* The mask to get any of the operations */
+  };
+  enum shape_type
+  {
+    shape_point= 0,
+    shape_line= 1,
+    shape_polygon= 2,
+    shape_hole= 3
+  };
+  Gcalc_function() : n_shapes(0) {}
+  gcalc_shape_info add_new_shape(uint32 shape_id, shape_type shape_kind);
+  /*
+    Adds the leaf operation that returns the shape value.
+    Also adds the shape to the list of operands.
+  */
+  int single_shape_op(shape_type shape_kind, gcalc_shape_info *si);
+  void add_operation(uint operation, uint32 n_operands);
+  void add_not_operation(op_type operation, uint32 n_operands);
+  uint32 get_next_expression_pos() { return function_buffer.length(); }
+  void add_operands_to_op(uint32 operation_pos, uint32 n_operands);
+  int repeat_expression(uint32 exp_pos);
+  void set_cur_obj(uint32 cur_obj) { cur_object_id= cur_obj; }
+  int reserve_shape_buffer(uint n_shapes);
+  int reserve_op_buffer(uint n_ops);
+  uint get_nshapes() const { return n_shapes; }
+  shape_type get_shape_kind(gcalc_shape_info si) const
+  {
+    return (shape_type) uint4korr(shapes_buffer.ptr() + (si*4));
+  }
+
+  void set_states(int *shape_states) { i_states= shape_states; }
+  int alloc_states();
+  void invert_i_state(gcalc_shape_info shape) { i_states[shape]^= 1; }
+  void set_i_state(gcalc_shape_info shape) { i_states[shape]= 1; }
+  void clear_i_state(gcalc_shape_info shape) { i_states[shape]= 0; }
+  void set_b_state(gcalc_shape_info shape) { b_states[shape]= 1; }
+  void clear_b_state(gcalc_shape_info shape) { b_states[shape]= 0; }
+  int get_state(gcalc_shape_info shape)
+    { return i_states[shape] | b_states[shape]; }
+  int get_i_state(gcalc_shape_info shape) { return i_states[shape]; }
+  int get_b_state(gcalc_shape_info shape) { return b_states[shape]; }
+  int count()
+    { return count_internal(function_buffer.ptr(), 0, 0); }
+  void clear_i_states();
+  void clear_b_states();
+  void reset();
+
+  int check_function(Gcalc_scan_iterator &scan_it);
+};
+
+
+/*
+  Gcalc_operation_transporter class extends the Gcalc_shape_transporter.
+  In addition to the parent's functionality, it fills the Gcalc_function
+  object so it has the function that determines the proper shape.
+  For example Multipolyline will be represented as an union of polylines.
+*/
+
+class Gcalc_operation_transporter : public Gcalc_shape_transporter
+{
+protected:
+  Gcalc_function *m_fn;
+  gcalc_shape_info m_si;
+public:
+  Gcalc_operation_transporter(Gcalc_function *fn, Gcalc_heap *heap) :
+    Gcalc_shape_transporter(heap), m_fn(fn) {}
+
+  int single_point(double x, double y);
+  int start_line();
+  int complete_line();
+  int start_poly();
+  int complete_poly();
+  int start_ring();
+  int complete_ring();
+  int add_point(double x, double y);
+  int start_collection(int n_objects);
+  int empty_shape();
+};
+
+
+/*
+   When we calculate the result of an spatial operation like
+   Union or Intersection, we receive vertexes of the result
+   one-by-one, and probably need to treat them in variative ways.
+   So, the Gcalc_result_receiver class designed to get these
+   vertexes and construct shapes/objects out of them.
+   and to store the result in an appropriate format
+*/
+
+class Gcalc_result_receiver
+{
+  String buffer;
+  uint32 n_points;
+  Gcalc_function::shape_type common_shapetype;
+  bool collection_result;
+  uint32 n_shapes;
+  uint32 n_holes;
+
+  Gcalc_function::shape_type cur_shape;
+  uint32 shape_pos;
+  double first_x, first_y, prev_x, prev_y;
+  double shape_area;
+public:
+  Gcalc_result_receiver() : collection_result(FALSE), n_shapes(0), n_holes(0)
+    {}
+  int start_shape(Gcalc_function::shape_type shape);
+  int add_point(double x, double y);
+  int complete_shape();
+  int single_point(double x, double y);
+  int done();
+  void reset();
+
+  const char *result() { return buffer.ptr(); }
+  uint length() { return buffer.length(); }
+  int get_nshapes() { return n_shapes; }
+  int get_nholes() { return n_holes; }
+  int get_result_typeid();
+  uint32 position() { return buffer.length(); }
+  int move_hole(uint32 dest_position, uint32 source_position,
+                uint32 *position_shift);
+};
+
+
+/*
+  Gcalc_operation_reducer class incapsulates the spatial
+  operation functionality. It analyses the slices generated by
+  the slicescan and calculates the shape of the result defined
+  by some Gcalc_function.
+*/
+
+class Gcalc_operation_reducer : public Gcalc_dyn_list
+{
+public:
+  enum modes
+  {
+    /* Numeric values important here - careful with changing */
+    default_mode= 0,
+    prefer_big_with_holes= 1,
+    polygon_selfintersections_allowed= 2,  /* allowed in the result */
+    line_selfintersections_allowed= 4      /* allowed in the result */
+  };
+
+  Gcalc_operation_reducer(size_t blk_size=8192);
+  void init(Gcalc_function *fn, modes mode= default_mode);
+  Gcalc_operation_reducer(Gcalc_function *fn, modes mode= default_mode,
+		       size_t blk_size=8192);
+  GCALC_DECL_TERMINATED_STATE(killed)
+  int count_slice(Gcalc_scan_iterator *si);
+  int count_all(Gcalc_heap *hp);
+  int get_result(Gcalc_result_receiver *storage);
+  void reset();
+
+#ifndef GCALC_DBUG_OFF
+  int n_res_points;
+#endif /*GCALC_DBUG_OFF*/
+  class res_point : public Gcalc_dyn_list::Item
+  {
+  public:
+    int intersection_point;
+    union
+    {
+      const Gcalc_heap::Info *pi;
+      res_point *first_poly_node;
+    };
+    union
+    {
+      res_point *outer_poly;
+      uint32 poly_position;
+    };
+    res_point *up;
+    res_point *down;
+    res_point *glue;
+    Gcalc_function::shape_type type;
+    Gcalc_dyn_list::Item **prev_hook;
+#ifndef GCALC_DBUG_OFF
+    int point_n;
+#endif /*GCALC_DBUG_OFF*/
+    void set(const Gcalc_scan_iterator *si);
+    res_point *get_next() { return (res_point *)next; }
+  };
+
+  class active_thread : public Gcalc_dyn_list::Item
+  {
+  public:
+    res_point *rp;
+    res_point *thread_start;
+
+    const Gcalc_heap::Info *p1, *p2;
+    res_point *enabled() { return rp; }
+    active_thread *get_next() { return (active_thread *)next; }
+  };
+
+  class poly_instance : public Gcalc_dyn_list::Item
+  {
+  public:
+    uint32 *after_poly_position;
+    poly_instance *get_next() { return (poly_instance *)next; }
+  };
+
+  class line : public Gcalc_dyn_list::Item
+  {
+  public:
+    active_thread *t;
+    int incoming;
+    const Gcalc_scan_iterator::point *p;
+    line *get_next() { return (line *)next; }
+  };
+
+  class poly_border : public Gcalc_dyn_list::Item
+  {
+  public:
+    active_thread *t;
+    int incoming;
+    int prev_state;
+    const Gcalc_scan_iterator::point *p;
+    poly_border *get_next() { return (poly_border *)next; }
+  };
+
+  line *m_lines;
+  Gcalc_dyn_list::Item **m_lines_hook;
+  poly_border *m_poly_borders;
+  Gcalc_dyn_list::Item **m_poly_borders_hook;
+  line *new_line() { return (line *) new_item(); }
+  poly_border *new_poly_border() { return (poly_border *) new_item(); }
+  int add_line(int incoming, active_thread *t,
+               const Gcalc_scan_iterator::point *p);
+  int add_poly_border(int incoming, active_thread *t, int prev_state,
+                      const Gcalc_scan_iterator::point *p);
+
+protected:
+  Gcalc_function *m_fn;
+  Gcalc_dyn_list::Item **m_res_hook;
+  res_point *m_result;
+  int m_mode;
+
+  res_point *result_heap;
+  active_thread *m_first_active_thread;
+
+  res_point *add_res_point(Gcalc_function::shape_type type);
+  active_thread *new_active_thread() { return (active_thread *)new_item(); }
+
+  poly_instance *new_poly() { return (poly_instance *) new_item(); }
+
+private:
+  int start_line(active_thread *t, const Gcalc_scan_iterator::point *p,
+                 const Gcalc_scan_iterator *si);
+  int end_line(active_thread *t, const Gcalc_scan_iterator *si);
+  int connect_threads(int incoming_a, int incoming_b,
+                      active_thread *ta, active_thread *tb,
+                      const Gcalc_scan_iterator::point *pa,
+                      const Gcalc_scan_iterator::point *pb,
+                      active_thread *prev_range,
+                      const Gcalc_scan_iterator *si,
+                      Gcalc_function::shape_type s_t);
+  int add_single_point(const Gcalc_scan_iterator *si);
+  poly_border *get_pair_border(poly_border *b1);
+  int continue_range(active_thread *t, const Gcalc_heap::Info *p,
+                     const Gcalc_heap::Info *p_next);
+  int continue_i_range(active_thread *t,
+                       const Gcalc_heap::Info *ii);
+  int end_couple(active_thread *t0, active_thread *t1, const Gcalc_heap::Info *p);
+  int get_single_result(res_point *res, Gcalc_result_receiver *storage);
+  int get_result_thread(res_point *cur, Gcalc_result_receiver *storage,
+			int move_upward, res_point *first_poly_node);
+  int get_polygon_result(res_point *cur, Gcalc_result_receiver *storage,
+                         res_point *first_poly_node);
+  int get_line_result(res_point *cur, Gcalc_result_receiver *storage);
+
+  void free_result(res_point *res);
+};
+
+#endif /*GCALC_TOOLS_INCLUDED*/
+
diff --git a/sql/gstream.cc b/sql/gstream.cc
index 25bf599ff4e..f0a8fdad27c 100644
--- a/sql/gstream.cc
+++ b/sql/gstream.cc
@@ -42,6 +42,29 @@ enum Gis_read_stream::enum_tok_types Gis_read_stream::get_next_toc_type()
 }
 
 
+bool Gis_read_stream::lookup_next_word(LEX_STRING *res)
+{
+  const char *cur= m_cur;
+
+  skip_space();
+  res->str= (char*) cur;
+  /* The following will also test for \0 */
+  if ((cur >= m_limit) || !my_isvar_start(&my_charset_bin, *cur))
+    return 1;
+
+  /*
+    We can't combine the following increment with my_isvar() because
+    my_isvar() is a macro that would cause side effects
+  */
+  cur++;
+  while ((cur < m_limit) && my_isvar(&my_charset_bin, *cur))
+    cur++;
+
+  res->length= (uint32) (cur - res->str);
+  return 0;
+}
+
+
 bool Gis_read_stream::get_next_word(LEX_STRING *res)
 {
   skip_space();
diff --git a/sql/gstream.h b/sql/gstream.h
index 1ef90ad5bf0..d3a40f4e7e0 100644
--- a/sql/gstream.h
+++ b/sql/gstream.h
@@ -39,6 +39,7 @@ public:
   }
 
   enum enum_tok_types get_next_toc_type();
+  bool lookup_next_word(LEX_STRING *res);
   bool get_next_word(LEX_STRING *);
   bool get_next_number(double *);
   bool check_next_symbol(char);
@@ -57,6 +58,14 @@ public:
     m_cur++;
     return 0;
   }
+  /* Returns the next notempty character. */
+  char next_symbol() 
+  {
+    skip_space();
+    if (m_cur >= m_limit)
+      return 0;                                 /* EOL meet. */
+    return *m_cur;
+  }
   void set_error_msg(const char *msg);
 
   // caller should free this pointer
diff --git a/sql/ha_ndbcluster.cc b/sql/ha_ndbcluster.cc
index 140931f94f1..fa27a13bcd0 100644
--- a/sql/ha_ndbcluster.cc
+++ b/sql/ha_ndbcluster.cc
@@ -8690,6 +8690,8 @@ ha_ndbcluster::null_value_index_search(KEY_MULTI_RANGE *ranges,
   DBUG_RETURN(FALSE);
 }
 
+#if 0 
+/* MRR/NDB is disabled, for details see method declarations in ha_ndbcluster.h */
 int
 ha_ndbcluster::read_multi_range_first(KEY_MULTI_RANGE **found_range_p,
                                       KEY_MULTI_RANGE *ranges, 
@@ -9062,6 +9064,7 @@ found_next:
   m_multi_range_result_ptr += reclength;
   DBUG_RETURN(0);
 }
+#endif 
 
 int
 ha_ndbcluster::setup_recattr(const NdbRecAttr* curr)
diff --git a/sql/ha_ndbcluster.h b/sql/ha_ndbcluster.h
index a17323d3fd6..7c9dba9e30e 100644
--- a/sql/ha_ndbcluster.h
+++ b/sql/ha_ndbcluster.h
@@ -259,10 +259,20 @@ class ha_ndbcluster: public handler
   /**
    * Multi range stuff
    */
+#if 0 
+  /*
+    MRR/NDB is disabled in MariaDB. This is because in MariaDB, we've
+    backported
+     - the latest version of MRR interface (BKA needs this)
+     - the latest version of DS-MRR implementation
+    but didn't backport the latest version MRR/NDB implementation.
+
+  */
   int read_multi_range_first(KEY_MULTI_RANGE **found_range_p,
                              KEY_MULTI_RANGE*ranges, uint range_count,
                              bool sorted, HANDLER_BUFFER *buffer);
   int read_multi_range_next(KEY_MULTI_RANGE **found_range_p);
+#endif  
   bool null_value_index_search(KEY_MULTI_RANGE *ranges,
 			       KEY_MULTI_RANGE *end_range,
 			       HANDLER_BUFFER *buffer);
diff --git a/sql/ha_ndbcluster_binlog.cc b/sql/ha_ndbcluster_binlog.cc
index 31afaebc8cc..e32736ba5ba 100644
--- a/sql/ha_ndbcluster_binlog.cc
+++ b/sql/ha_ndbcluster_binlog.cc
@@ -1856,7 +1856,7 @@ static void ndb_binlog_query(THD *thd, Cluster_schema *schema)
   else
     thd->server_id= schema->any_value;
   thd->db= schema->db;
-  int errcode = query_error_code(thd, thd->killed == THD::NOT_KILLED);
+  int errcode = query_error_code(thd, thd->killed == NOT_KILLED);
   thd->binlog_query(THD::STMT_QUERY_TYPE, schema->query,
                     schema->query_length, FALSE,
                     schema->name[0] == 0 || thd->db[0] == 0,
diff --git a/sql/ha_partition.cc b/sql/ha_partition.cc
index 412734faf23..03a879815e8 100644
--- a/sql/ha_partition.cc
+++ b/sql/ha_partition.cc
@@ -270,7 +270,6 @@ void ha_partition::init_handler_variables()
   m_extra_prepare_for_update= FALSE;
   m_extra_cache_part_id= NO_CURRENT_PART_ID;
   m_handler_status= handler_not_initialized;
-  m_low_byte_first= 1;
   m_part_field_array= NULL;
   m_ordered_rec_buffer= NULL;
   m_top_entry= NO_CURRENT_PART_ID;
@@ -419,18 +418,11 @@ bool ha_partition::initialize_partition(MEM_ROOT *mem_root)
     Verify that all partitions have the same table_flags.
   */
   check_table_flags= m_file[0]->ha_table_flags();
-  m_low_byte_first= m_file[0]->low_byte_first();
   m_pkey_is_clustered= TRUE;
   file_array= m_file;
   do
   {
     file= *file_array;
-    if (m_low_byte_first != file->low_byte_first())
-    {
-      // Cannot have handlers with different endian
-      my_error(ER_MIX_HANDLER_ERROR, MYF(0));
-      DBUG_RETURN(1);
-    }
     if (!file->primary_key_is_clustered())
       m_pkey_is_clustered= FALSE;
     if (check_table_flags != file->ha_table_flags())
@@ -1315,7 +1307,7 @@ int ha_partition::prepare_new_partition(TABLE *tbl,
 
   DBUG_RETURN(0);
 error_external_lock:
-  VOID(file->close());
+  VOID(file->ha_close());
 error_open:
   VOID(file->ha_delete_table(part_name));
 error_create:
@@ -1361,7 +1353,7 @@ void ha_partition::cleanup_new_partition(uint part_count)
     while ((part_count > 0) && (*file))
     {
       (*file)->ha_external_lock(thd, F_UNLCK);
-      (*file)->close();
+      (*file)->ha_close();
 
       /* Leave the (*file)->ha_delete_table(part_name) to the ddl-log */
 
@@ -2809,7 +2801,7 @@ int ha_partition::open(const char *name, int mode, uint test_if_locked)
     Initialize priority queue, initialized to reading forward.
   */
   if ((error= init_queue(&m_queue, m_tot_parts, (uint) PARTITION_BYTES_IN_POS,
-                         0, key_rec_cmp, (void*)this)))
+                         0, key_rec_cmp, (void*)this, 0, 0)))
     goto err_handler;
 
   /*
@@ -2856,7 +2848,7 @@ int ha_partition::open(const char *name, int mode, uint test_if_locked)
 err_handler:
   DEBUG_SYNC(ha_thd(), "partition_open_error");
   while (file-- != m_file)
-    (*file)->close();
+    (*file)->ha_close();
 err_alloc:
   bitmap_free(&m_bulk_insert_started);
   if (!m_is_clone_of)
@@ -2942,7 +2934,7 @@ int ha_partition::close(void)
 repeat:
   do
   {
-    (*file)->close();
+    (*file)->ha_close();
   } while (*(++file));
 
   if (first && m_added_file && m_added_file[0])
@@ -3946,6 +3938,8 @@ int ha_partition::rnd_next(uchar *buf)
   int result= HA_ERR_END_OF_FILE;
   uint part_id= m_part_spec.start_part;
   DBUG_ENTER("ha_partition::rnd_next");
+
+  /* upper level will increment this once again at end of call */
   decrement_statistics(&SSV::ha_read_rnd_next_count);
 
   if (NO_CURRENT_PART_ID == part_id)
@@ -4181,6 +4175,7 @@ int ha_partition::index_init(uint inx, bool sorted)
   m_part_spec.start_part= NO_CURRENT_PART_ID;
   m_start_key.length= 0;
   m_ordered= sorted;
+  m_ordered_scan_ongoing= FALSE;
   m_curr_key_info[0]= table->key_info+inx;
   if (m_pkey_is_clustered && table->s->primary_key != MAX_KEY)
   {
@@ -4983,7 +4978,7 @@ int ha_partition::handle_unordered_scan_next_partition(uchar * buf)
 int ha_partition::handle_ordered_index_scan(uchar *buf, bool reverse_order)
 {
   uint i;
-  uint j= 0;
+  uint j= queue_first_element(&m_queue);
   bool found= FALSE;
   DBUG_ENTER("ha_partition::handle_ordered_index_scan");
 
@@ -4999,6 +4994,12 @@ int ha_partition::handle_ordered_index_scan(uchar *buf, bool reverse_order)
     int error;
     handler *file= m_file[i];
 
+    /*
+      Reset null bits (to avoid valgrind warnings) and to give a default
+      value for not read null fields.
+    */
+    bfill(rec_buf_ptr, table->s->null_bytes, 255);
+
     switch (m_index_scan_type) {
     case partition_index_read:
       error= file->ha_index_read_map(rec_buf_ptr,
@@ -5051,7 +5052,7 @@ int ha_partition::handle_ordered_index_scan(uchar *buf, bool reverse_order)
     */
     queue_set_max_at_top(&m_queue, reverse_order);
     queue_set_cmp_arg(&m_queue, (void*)m_curr_key_info);
-    m_queue.elements= j;
+    m_queue.elements= j - queue_first_element(&m_queue);
     queue_fix(&m_queue);
     return_top_record(buf);
     table->status= 0;
@@ -5122,7 +5123,7 @@ int ha_partition::handle_ordered_next(uchar *buf, bool is_next_same)
     if (error == HA_ERR_END_OF_FILE)
     {
       /* Return next buffered row */
-      queue_remove(&m_queue, (uint) 0);
+      queue_remove_top(&m_queue);
       if (m_queue.elements)
       {
          DBUG_PRINT("info", ("Record returned from partition %u (2)",
@@ -5134,7 +5135,7 @@ int ha_partition::handle_ordered_next(uchar *buf, bool is_next_same)
     }
     DBUG_RETURN(error);
   }
-  queue_replaced(&m_queue);
+  queue_replace_top(&m_queue);
   return_top_record(buf);
   DBUG_PRINT("info", ("Record returned from partition %u", m_top_entry));
   DBUG_RETURN(0);
@@ -5165,7 +5166,7 @@ int ha_partition::handle_ordered_prev(uchar *buf)
   {
     if (error == HA_ERR_END_OF_FILE)
     {
-      queue_remove(&m_queue, (uint) 0);
+      queue_remove_top(&m_queue);
       if (m_queue.elements)
       {
 	return_top_record(buf);
@@ -5177,7 +5178,7 @@ int ha_partition::handle_ordered_prev(uchar *buf)
     }
     DBUG_RETURN(error);
   }
-  queue_replaced(&m_queue);
+  queue_replace_top(&m_queue);
   return_top_record(buf);
   DBUG_PRINT("info", ("Record returned from partition %d", m_top_entry));
   DBUG_RETURN(0);
diff --git a/sql/ha_partition.h b/sql/ha_partition.h
index 73d1c25c0c4..b69ef7534aa 100644
--- a/sql/ha_partition.h
+++ b/sql/ha_partition.h
@@ -112,7 +112,6 @@ private:
     for this since the MySQL Server sometimes allocating the handler object
     without freeing them.
   */
-  ulong m_low_byte_first;
   enum enum_handler_status
   {
     handler_not_initialized= 0,
@@ -879,6 +878,10 @@ public:
   */
   virtual ulong index_flags(uint inx, uint part, bool all_parts) const
   {
+    /*
+      The following code is not safe if you are using different
+      storage engines or different index types per partition.
+    */
     return m_file[0]->index_flags(inx, part, all_parts);
   }
 
@@ -905,12 +908,6 @@ public:
   virtual uint max_supported_key_part_length() const;
 
   /*
-    All handlers in a partitioned table must have the same low_byte_first
-  */
-  virtual bool low_byte_first() const
-  { return m_low_byte_first; }
-
-  /*
     The extra record buffer length is the maximum needed by all handlers.
     The minimum record length is the maximum of all involved handlers.
   */
diff --git a/sql/handler.cc b/sql/handler.cc
index 1f6a3b9f665..04f10586484 100644
--- a/sql/handler.cc
+++ b/sql/handler.cc
@@ -79,6 +79,8 @@ TYPELIB tx_isolation_typelib= {array_elements(tx_isolation_names)-1,"",
 static TYPELIB known_extensions= {0,"known_exts", NULL, NULL};
 uint known_extensions_id= 0;
 
+static int commit_one_phase_2(THD *thd, bool all, THD_TRANS *trans,
+                              bool is_real_trans);
 
 
 static plugin_ref ha_default_plugin(THD *thd)
@@ -343,6 +345,7 @@ int ha_init_errors(void)
   SETMSG(HA_ERR_AUTOINC_READ_FAILED,    ER(ER_AUTOINC_READ_FAILED));
   SETMSG(HA_ERR_AUTOINC_ERANGE,         ER(ER_WARN_DATA_OUT_OF_RANGE));
   SETMSG(HA_ERR_TOO_MANY_CONCURRENT_TRXS, ER(ER_TOO_MANY_CONCURRENT_TRXS));
+  SETMSG(HA_ERR_DISK_FULL,              ER(ER_DISK_FULL));
 
   /* Register the error messages for use with my_error(). */
   return my_error_register(errmsgs, HA_ERR_FIRST, HA_ERR_LAST);
@@ -596,6 +599,23 @@ void ha_drop_database(char* path)
 }
 
 
+static my_bool checkpoint_state_handlerton(THD *unused1, plugin_ref plugin,
+                                           void *disable)
+{
+  handlerton *hton= plugin_data(plugin, handlerton *);
+  if (hton->state == SHOW_OPTION_YES && hton->checkpoint_state)
+    hton->checkpoint_state(hton, (int) *(bool*) disable);
+  return FALSE;
+}
+
+
+void ha_checkpoint_state(bool disable)
+{
+  plugin_foreach(NULL, checkpoint_state_handlerton, MYSQL_STORAGE_ENGINE_PLUGIN, &disable);
+}
+
+
+
 static my_bool closecon_handlerton(THD *thd, plugin_ref plugin,
                                    void *unused)
 {
@@ -1077,7 +1097,7 @@ ha_check_and_coalesce_trx_read_only(THD *thd, Ha_trx_info *ha_list,
 */
 int ha_commit_trans(THD *thd, bool all)
 {
-  int error= 0, cookie= 0;
+  int error= 0, cookie;
   /*
     'all' means that this is either an explicit commit issued by
     user, or an implicit commit issued by a DDL.
@@ -1092,7 +1112,8 @@ int ha_commit_trans(THD *thd, bool all)
   */
   bool is_real_trans= all || thd->transaction.all.ha_list == 0;
   Ha_trx_info *ha_info= trans->ha_list;
-  my_xid xid= thd->transaction.xid_state.xid.get_my_xid();
+  bool need_prepare_ordered, need_commit_ordered;
+  my_xid xid;
   DBUG_ENTER("ha_commit_trans");
 
   /* Just a random warning to test warnings pushed during autocommit. */
@@ -1131,89 +1152,114 @@ int ha_commit_trans(THD *thd, bool all)
     DBUG_RETURN(2);
   }
 #ifdef USING_TRANSACTIONS
-  if (ha_info)
+  if (!ha_info)
   {
-    uint rw_ha_count;
-    bool rw_trans;
+    /* Free resources and perform other cleanup even for 'empty' transactions. */
+    if (is_real_trans)
+      thd->transaction.cleanup();
+    DBUG_RETURN(0);
+  }
 
-    DBUG_EXECUTE_IF("crash_commit_before", DBUG_SUICIDE(););
+  DBUG_EXECUTE_IF("crash_commit_before", DBUG_SUICIDE(););
 
-    /* Close all cursors that can not survive COMMIT */
-    if (is_real_trans)                          /* not a statement commit */
-      thd->stmt_map.close_transient_cursors();
+  /* Close all cursors that can not survive COMMIT */
+  if (is_real_trans)                          /* not a statement commit */
+    thd->stmt_map.close_transient_cursors();
 
-    rw_ha_count= ha_check_and_coalesce_trx_read_only(thd, ha_info, all);
-    /* rw_trans is TRUE when we in a transaction changing data */
-    rw_trans= is_real_trans && (rw_ha_count > 0);
+  uint rw_ha_count= ha_check_and_coalesce_trx_read_only(thd, ha_info, all);
+  /* rw_trans is TRUE when we in a transaction changing data */
+  bool rw_trans= is_real_trans && (rw_ha_count > 0);
 
-    if (rw_trans &&
-        wait_if_global_read_lock(thd, 0, 0))
-    {
-      ha_rollback_trans(thd, all);
-      DBUG_RETURN(1);
-    }
+  if (rw_trans &&
+      wait_if_global_read_lock(thd, 0, 0))
+  {
+    ha_rollback_trans(thd, all);
+    DBUG_RETURN(1);
+  }
 
-    if (rw_trans &&
-        opt_readonly &&
-        !(thd->security_ctx->master_access & SUPER_ACL) &&
-        !thd->slave_thread)
-    {
-      my_error(ER_OPTION_PREVENTS_STATEMENT, MYF(0), "--read-only");
-      ha_rollback_trans(thd, all);
-      error= 1;
-      goto end;
-    }
+  if (rw_trans &&
+      opt_readonly &&
+      !(thd->security_ctx->master_access & SUPER_ACL) &&
+      !thd->slave_thread)
+  {
+    my_error(ER_OPTION_PREVENTS_STATEMENT, MYF(0), "--read-only");
+    goto err;
+  }
 
-    if (!trans->no_2pc && (rw_ha_count > 1))
-    {
-      for (; ha_info && !error; ha_info= ha_info->next())
-      {
-        int err;
-        handlerton *ht= ha_info->ht();
-        /*
-          Do not call two-phase commit if this particular
-          transaction is read-only. This allows for simpler
-          implementation in engines that are always read-only.
-        */
-        if (! ha_info->is_trx_read_write())
-          continue;
-        /*
-          Sic: we know that prepare() is not NULL since otherwise
-          trans->no_2pc would have been set.
-        */
-        if ((err= ht->prepare(ht, thd, all)))
-        {
-          my_error(ER_ERROR_DURING_COMMIT, MYF(0), err);
-          error= 1;
-        }
-        status_var_increment(thd->status_var.ha_prepare_count);
-      }
-      DBUG_EXECUTE_IF("crash_commit_after_prepare", DBUG_SUICIDE(););
-      if (error || (is_real_trans && xid &&
-                    (error= !(cookie= tc_log->log_xid(thd, xid)))))
-      {
-        ha_rollback_trans(thd, all);
-        error= 1;
-        goto end;
-      }
-      DBUG_EXECUTE_IF("crash_commit_after_log", DBUG_SUICIDE(););
-    }
-    error=ha_commit_one_phase(thd, all) ? (cookie ? 2 : 1) : 0;
-    DBUG_EXECUTE_IF("crash_commit_before_unlog", DBUG_SUICIDE(););
-    if (cookie)
-      if(tc_log->unlog(cookie, xid))
-      {
-        error= 2;
-        goto end;
-      }
+  if (trans->no_2pc || (rw_ha_count <= 1))
+  {
+    error= ha_commit_one_phase(thd, all);
     DBUG_EXECUTE_IF("crash_commit_after", DBUG_SUICIDE(););
-end:
-    if (rw_trans)
-      start_waiting_global_read_lock(thd);
+    goto end;
   }
-  /* Free resources and perform other cleanup even for 'empty' transactions. */
-  else if (is_real_trans)
-    thd->transaction.cleanup();
+
+  need_prepare_ordered= FALSE;
+  need_commit_ordered= FALSE;
+  xid= thd->transaction.xid_state.xid.get_my_xid();
+
+  for (Ha_trx_info *hi= ha_info; hi; hi= hi->next())
+  {
+    int err;
+    handlerton *ht= hi->ht();
+    /*
+      Do not call two-phase commit if this particular
+      transaction is read-only. This allows for simpler
+      implementation in engines that are always read-only.
+    */
+    if (! hi->is_trx_read_write())
+      continue;
+    /*
+      Sic: we know that prepare() is not NULL since otherwise
+      trans->no_2pc would have been set.
+    */
+    err= ht->prepare(ht, thd, all);
+    status_var_increment(thd->status_var.ha_prepare_count);
+    if (err)
+      my_error(ER_ERROR_DURING_COMMIT, MYF(0), err);
+
+    if (err)
+      goto err;
+
+    need_prepare_ordered|= (ht->prepare_ordered != NULL);
+    need_commit_ordered|= (ht->commit_ordered != NULL);
+  }
+  DBUG_EXECUTE_IF("crash_commit_after_prepare", DBUG_SUICIDE(););
+
+  if (!is_real_trans)
+  {
+    error= commit_one_phase_2(thd, all, trans, is_real_trans);
+    DBUG_EXECUTE_IF("crash_commit_after", DBUG_SUICIDE(););
+    goto end;
+  }
+
+  cookie= tc_log->log_and_order(thd, xid, all, need_prepare_ordered,
+                                need_commit_ordered);
+  if (!cookie)
+    goto err;
+
+  DBUG_EXECUTE_IF("crash_commit_after_log", DBUG_SUICIDE(););
+
+  error= commit_one_phase_2(thd, all, trans, is_real_trans) ? 2 : 0;
+  DBUG_EXECUTE_IF("crash_commit_after", DBUG_SUICIDE(););
+
+  DBUG_EXECUTE_IF("crash_commit_before_unlog", DBUG_SUICIDE(););
+  if (tc_log->unlog(cookie, xid))
+  {
+    error= 2;                                /* Error during commit */
+    goto end;
+  }
+
+  DBUG_EXECUTE_IF("crash_commit_after", DBUG_SUICIDE(););
+  goto end;
+
+  /* Come here if error and we need to rollback. */
+err:
+  error= 1;                                  /* Transaction was rolled back */
+  ha_rollback_trans(thd, all);
+
+end:
+  if (rw_trans)
+    start_waiting_global_read_lock(thd);
 #endif /* USING_TRANSACTIONS */
   DBUG_RETURN(error);
 }
@@ -1224,7 +1270,6 @@ end:
 */
 int ha_commit_one_phase(THD *thd, bool all)
 {
-  int error=0;
   THD_TRANS *trans=all ? &thd->transaction.all : &thd->transaction.stmt;
   /*
     "real" is a nick name for a transaction for which a commit will
@@ -1234,8 +1279,16 @@ int ha_commit_one_phase(THD *thd, bool all)
     enclosing 'all' transaction is rolled back.
   */
   bool is_real_trans=all || thd->transaction.all.ha_list == 0;
-  Ha_trx_info *ha_info= trans->ha_list, *ha_info_next;
   DBUG_ENTER("ha_commit_one_phase");
+  DBUG_RETURN(commit_one_phase_2(thd, all, trans, is_real_trans));
+}
+
+static int
+commit_one_phase_2(THD *thd, bool all, THD_TRANS *trans, bool is_real_trans)
+{
+  int error= 0;
+  Ha_trx_info *ha_info= trans->ha_list, *ha_info_next;
+  DBUG_ENTER("commit_one_phase_2");
 #ifdef USING_TRANSACTIONS
   if (ha_info)
   {
@@ -1259,7 +1312,7 @@ int ha_commit_one_phase(THD *thd, bool all)
     {
 #ifdef HAVE_QUERY_CACHE
       if (thd->transaction.changed_tables)
-        query_cache.invalidate(thd->transaction.changed_tables);
+        query_cache.invalidate(thd, thd->transaction.changed_tables);
 #endif
       thd->variables.tx_isolation=thd->session_tx_isolation;
     }
@@ -1351,13 +1404,12 @@ int ha_rollback_trans(THD *thd, bool all)
     the error log; but we don't want users to wonder why they have this
     message in the error log, so we don't send it.
 
-    We don't have to test for thd->killed == THD::KILL_SYSTEM_THREAD as
+    We don't have to test for thd->killed == KILL_SYSTEM_THREAD as
     it doesn't matter if a warning is pushed to a system thread or not:
     No one will see it...
   */
   if (is_real_trans && thd->transaction.all.modified_non_trans_table &&
-      !thd->slave_thread && thd->killed != THD::KILL_CONNECTION &&
-      thd->killed != THD::KILL_SERVER)
+      !thd->slave_thread && thd->killed < KILL_CONNECTION)
     push_warning(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
                  ER_WARNING_NOT_COMPLETE_ROLLBACK,
                  ER(ER_WARNING_NOT_COMPLETE_ROLLBACK));
@@ -1858,7 +1910,16 @@ int ha_start_consistent_snapshot(THD *thd)
 {
   bool warn= true;
 
+  /*
+    Holding the LOCK_commit_ordered mutex ensures that we get the same
+    snapshot for all engines (including the binary log).  This allows us
+    among other things to do backups with
+    START TRANSACTION WITH CONSISTENT SNAPSHOT and
+    have a consistent binlog position.
+  */
+  pthread_mutex_lock(&LOCK_commit_ordered);
   plugin_foreach(thd, snapshot_handlerton, MYSQL_STORAGE_ENGINE_PLUGIN, &warn);
+  pthread_mutex_unlock(&LOCK_commit_ordered);
 
   /*
     Same idea as when one wants to CREATE TABLE in one engine which does not
@@ -2112,7 +2173,7 @@ THD *handler::ha_thd(void) const
     Don't wait for locks if not HA_OPEN_WAIT_IF_LOCKED is set
 */
 int handler::ha_open(TABLE *table_arg, const char *name, int mode,
-                     int test_if_locked)
+                     uint test_if_locked)
 {
   int error;
   DBUG_ENTER("handler::ha_open");
@@ -2156,11 +2217,22 @@ int handler::ha_open(TABLE *table_arg, const char *name, int mode,
       dup_ref=ref+ALIGN_SIZE(ref_length);
     cached_table_flags= table_flags();
   }
-  rows_read= rows_changed= 0;
-  memset(index_rows_read, 0, sizeof(index_rows_read));
+  reset_statistics();
+  internal_tmp_table= test(test_if_locked & HA_OPEN_INTERNAL_TABLE);
   DBUG_RETURN(error);
 }
 
+int handler::ha_close()
+{
+  DBUG_ENTER("ha_close");
+  /*
+    Increment global statistics for temporary tables.
+    In_use is 0 for tables that was closed from the table cache.
+  */
+  if (table->in_use)
+    status_var_add(table->in_use->status_var.rows_tmp_read, rows_tmp_read);
+  DBUG_RETURN(close());
+}
 
 /* Initialize handler for random reading, with error handling */
 
@@ -2492,7 +2564,7 @@ int handler::update_auto_increment()
     /*
       first test if the query was aborted due to strict mode constraints
     */
-    if (thd->killed == THD::KILL_BAD_DATA)
+    if (killed_mask_hard(thd->killed) == KILL_BAD_DATA)
       DBUG_RETURN(HA_ERR_AUTOINC_ERANGE);
 
     /*
@@ -2553,8 +2625,9 @@ int handler::update_auto_increment()
 void handler::column_bitmaps_signal()
 {
   DBUG_ENTER("column_bitmaps_signal");
-  DBUG_PRINT("info", ("read_set: 0x%lx  write_set: 0x%lx", (long) table->read_set,
-                      (long) table->write_set));
+  if (table)
+    DBUG_PRINT("info", ("read_set: 0x%lx  write_set: 0x%lx",
+                        (long) table->read_set, (long) table->write_set));
   DBUG_VOID_RETURN;
 }
 
@@ -2631,6 +2704,7 @@ void handler::get_auto_increment(ulonglong offset, ulonglong increment,
 
 void handler::ha_release_auto_increment()
 {
+  DBUG_ENTER("ha_release_auto_increment");
   release_auto_increment();
   insert_id_for_cur_row= 0;
   auto_inc_interval_for_cur_row.replace(0, 0, 0);
@@ -2644,6 +2718,7 @@ void handler::ha_release_auto_increment()
     */
     table->in_use->auto_inc_intervals_forced.empty();
   }
+  DBUG_VOID_RETURN;
 }
 
 
@@ -2704,6 +2779,11 @@ void handler::print_error(int error, myf errflag)
   case ENOENT:
     textno=ER_FILE_NOT_FOUND;
     break;
+  case ENOSPC:
+  case HA_ERR_DISK_FULL:
+    textno= ER_DISK_FULL;
+    SET_FATAL_ERROR;                            // Ensure error is logged
+    break;
   case HA_ERR_KEY_NOT_FOUND:
   case HA_ERR_NO_ACTIVE_RECORD:
   case HA_ERR_END_OF_FILE:
@@ -2716,6 +2796,12 @@ void handler::print_error(int error, myf errflag)
     SET_FATAL_ERROR;
     textno=ER_KEY_NOT_FOUND;
     break;
+  case HA_ERR_ABORTED_BY_USER:
+  {
+    DBUG_ASSERT(table->in_use->killed);
+    table->in_use->send_kill_message();
+    DBUG_VOID_RETURN;
+  }
   case HA_ERR_WRONG_MRG_TABLE_DEF:
     textno=ER_WRONG_MRG_TABLE;
     break;
@@ -2767,15 +2853,15 @@ void handler::print_error(int error, myf errflag)
         str.length(max_length-4);
         str.append(STRING_WITH_LEN("..."));
       }
-      my_error(ER_FOREIGN_DUPLICATE_KEY, MYF(0), table_share->table_name.str,
-        str.c_ptr_safe(), key_nr+1);
+      my_error(ER_FOREIGN_DUPLICATE_KEY, errflag, table_share->table_name.str,
+               str.c_ptr_safe(), key_nr+1);
       DBUG_VOID_RETURN;
     }
     textno= ER_DUP_KEY;
     break;
   }
   case HA_ERR_NULL_IN_SPATIAL:
-    my_error(ER_CANT_CREATE_GEOMETRY_OBJECT, MYF(0));
+    my_error(ER_CANT_CREATE_GEOMETRY_OBJECT, errflag);
     DBUG_VOID_RETURN;
   case HA_ERR_FOUND_DUPP_UNIQUE:
     textno=ER_DUP_UNIQUE;
@@ -2845,21 +2931,21 @@ void handler::print_error(int error, myf errflag)
   {
     String str;
     get_error_message(error, &str);
-    my_error(ER_ROW_IS_REFERENCED_2, MYF(0), str.c_ptr_safe());
+    my_error(ER_ROW_IS_REFERENCED_2, errflag, str.c_ptr_safe());
     DBUG_VOID_RETURN;
   }
   case HA_ERR_NO_REFERENCED_ROW:
   {
     String str;
     get_error_message(error, &str);
-    my_error(ER_NO_REFERENCED_ROW_2, MYF(0), str.c_ptr_safe());
+    my_error(ER_NO_REFERENCED_ROW_2, errflag, str.c_ptr_safe());
     DBUG_VOID_RETURN;
   }
   case HA_ERR_TABLE_DEF_CHANGED:
     textno=ER_TABLE_DEF_CHANGED;
     break;
   case HA_ERR_NO_SUCH_TABLE:
-    my_error(ER_NO_SUCH_TABLE, MYF(0), table_share->db.str,
+    my_error(ER_NO_SUCH_TABLE, errflag, table_share->db.str,
              table_share->table_name.str);
     DBUG_VOID_RETURN;
   case HA_ERR_RBR_LOGGING_FAILED:
@@ -2871,7 +2957,7 @@ void handler::print_error(int error, myf errflag)
     uint key_nr= get_dup_key(error);
     if ((int) key_nr >= 0)
       ptr= table->key_info[key_nr].name;
-    my_error(ER_DROP_INDEX_FK, MYF(0), ptr);
+    my_error(ER_DROP_INDEX_FK, errflag, ptr);
     DBUG_VOID_RETURN;
   }
   case HA_ERR_TABLE_NEEDS_UPGRADE:
@@ -2900,12 +2986,12 @@ void handler::print_error(int error, myf errflag)
       {
 	const char* engine= table_type();
 	if (temporary)
-	  my_error(ER_GET_TEMPORARY_ERRMSG, MYF(0), error, str.c_ptr(),
+	  my_error(ER_GET_TEMPORARY_ERRMSG, errflag, error, str.c_ptr(),
                    engine);
 	else
         {
           SET_FATAL_ERROR;
-	  my_error(ER_GET_ERRMSG, MYF(0), error, str.c_ptr(), engine);
+	  my_error(ER_GET_ERRMSG, errflag, error, str.c_ptr(), engine);
         }
       }
       else
@@ -2913,15 +2999,19 @@ void handler::print_error(int error, myf errflag)
       DBUG_VOID_RETURN;
     }
   }
-  if (fatal_error && (debug_assert_if_crashed_table ||
-                      global_system_variables.log_warnings > 1))
+  if (fatal_error)
   {
-    /*
-      Log error to log before we crash or if extended warnings are requested
-    */
-    errflag|= ME_NOREFRESH;
-  }
-    
+    /* Ensure this becomes a true error */
+    errflag&= ~(ME_JUST_WARNING | ME_JUST_INFO);
+    if ((debug_assert_if_crashed_table ||
+                      global_system_variables.log_warnings > 1))
+    {
+      /*
+        Log error to log before we crash or if extended warnings are requested
+      */
+      errflag|= ME_NOREFRESH;
+    }
+  }    
   my_error(textno, errflag, table_share->table_name.str, error);
   DBUG_ASSERT(!fatal_error || !debug_assert_if_crashed_table);
   DBUG_VOID_RETURN;
@@ -3184,7 +3274,7 @@ int handler::rename_table(const char * from, const char * to)
 
 void handler::drop_table(const char *name)
 {
-  close();
+  ha_close();
   delete_table(name);
 }
 
@@ -3496,6 +3586,9 @@ handler::ha_delete_table(const char *name)
   Drop table in the engine: public interface.
 
   @sa handler::drop_table()
+
+  The difference between this and delete_table() is that the table is open in
+  drop_table().
 */
 
 void
@@ -3700,6 +3793,7 @@ void handler::update_global_table_stats()
   TABLE_STATS * table_stats;
 
   status_var_add(table->in_use->status_var.rows_read, rows_read);
+  DBUG_ASSERT(rows_tmp_read == 0);
 
   if (!table->in_use->userstat_running)
   {
@@ -4332,133 +4426,6 @@ void ha_binlog_log_query(THD *thd, handlerton *hton,
 }
 #endif
 
-/**
-  Read the first row of a multi-range set.
-
-  @param found_range_p       Returns a pointer to the element in 'ranges' that
-                             corresponds to the returned row.
-  @param ranges              An array of KEY_MULTI_RANGE range descriptions.
-  @param range_count         Number of ranges in 'ranges'.
-  @param sorted	      If result should be sorted per key.
-  @param buffer              A HANDLER_BUFFER for internal handler usage.
-
-  @note
-    - Record is read into table->record[0].
-    - *found_range_p returns a valid value only if read_multi_range_first()
-    returns 0.
-    - Sorting is done within each range. If you want an overall sort, enter
-    'ranges' with sorted ranges.
-
-  @retval
-    0			OK, found a row
-  @retval
-    HA_ERR_END_OF_FILE	No rows in range
-  @retval
-    \#			Error code
-*/
-int handler::read_multi_range_first(KEY_MULTI_RANGE **found_range_p,
-                                    KEY_MULTI_RANGE *ranges, uint range_count,
-                                    bool sorted, HANDLER_BUFFER *buffer)
-{
-  int result= HA_ERR_END_OF_FILE;
-  DBUG_ENTER("handler::read_multi_range_first");
-  multi_range_sorted= sorted;
-  multi_range_buffer= buffer;
-
-  table->mark_columns_used_by_index_no_reset(active_index, table->read_set);
-  table->column_bitmaps_set(table->read_set, table->write_set);
-
-  for (multi_range_curr= ranges, multi_range_end= ranges + range_count;
-       multi_range_curr < multi_range_end;
-       multi_range_curr++)
-  {
-    result= read_range_first(multi_range_curr->start_key.keypart_map ?
-                             &multi_range_curr->start_key : 0,
-                             multi_range_curr->end_key.keypart_map ?
-                             &multi_range_curr->end_key : 0,
-                             test(multi_range_curr->range_flag & EQ_RANGE),
-                             multi_range_sorted);
-    if (result != HA_ERR_END_OF_FILE)
-      break;
-  }
-
-  *found_range_p= multi_range_curr;
-  DBUG_PRINT("exit",("result %d", result));
-  DBUG_RETURN(result);
-}
-
-
-/**
-  Read the next row of a multi-range set.
-
-  @param found_range_p       Returns a pointer to the element in 'ranges' that
-                             corresponds to the returned row.
-
-  @note
-    - Record is read into table->record[0].
-    - *found_range_p returns a valid value only if read_multi_range_next()
-    returns 0.
-
-  @retval
-    0			OK, found a row
-  @retval
-    HA_ERR_END_OF_FILE	No (more) rows in range
-  @retval
-    \#			Error code
-*/
-int handler::read_multi_range_next(KEY_MULTI_RANGE **found_range_p)
-{
-  int result= 0;
-  DBUG_ENTER("handler::read_multi_range_next");
-
-  /* We should not be called after the last call returned EOF. */
-  DBUG_ASSERT(multi_range_curr < multi_range_end);
-
-  do
-  {
-    /* Save a call if there can be only one row in range. */
-    if (multi_range_curr->range_flag != (UNIQUE_RANGE | EQ_RANGE))
-    {
-      result= read_range_next();
-
-      /* On success or non-EOF errors jump to the end. */
-      if (result != HA_ERR_END_OF_FILE)
-        break;
-    }
-    else
-    {
-      if (was_semi_consistent_read())
-        goto scan_it_again;
-      /*
-        We need to set this for the last range only, but checking this
-        condition is more expensive than just setting the result code.
-      */
-      result= HA_ERR_END_OF_FILE;
-    }
-
-    multi_range_curr++;
-scan_it_again:
-    /* Try the next range(s) until one matches a record. */
-    for (; multi_range_curr < multi_range_end; multi_range_curr++)
-    {
-      result= read_range_first(multi_range_curr->start_key.keypart_map ?
-                               &multi_range_curr->start_key : 0,
-                               multi_range_curr->end_key.keypart_map ?
-                               &multi_range_curr->end_key : 0,
-                               test(multi_range_curr->range_flag & EQ_RANGE),
-                               multi_range_sorted);
-      if (result != HA_ERR_END_OF_FILE)
-        break;
-    }
-  }
-  while ((result == HA_ERR_END_OF_FILE) &&
-         (multi_range_curr < multi_range_end));
-
-  *found_range_p= multi_range_curr;
-  DBUG_PRINT("exit",("handler::read_multi_range_next: result %d", result));
-  DBUG_RETURN(result);
-}
-
 
 /**
   Read first row between two ranges.
@@ -4562,7 +4529,7 @@ int handler::read_range_next()
 int handler::compare_key(key_range *range)
 {
   int cmp;
-  if (!range)
+  if (!range || in_range_check_pushed_down)
     return 0;					// No max range
   cmp= key_cmp(range_key_part, range->key, range->length);
   if (!cmp)
@@ -4571,6 +4538,44 @@ int handler::compare_key(key_range *range)
 }
 
 
+/*
+  Same as compare_key() but doesn't check have in_range_check_pushed_down.
+  This is used by index condition pushdown implementation.
+*/
+
+int handler::compare_key2(key_range *range)
+{
+  int cmp;
+  if (!range)
+    return 0;					// no max range
+  cmp= key_cmp(range_key_part, range->key, range->length);
+  if (!cmp)
+    cmp= key_compare_result_on_equal;
+  return cmp;
+}
+
+
+/**
+  ICP callback - to be called by an engine to check the pushed condition
+*/
+extern "C" enum icp_result handler_index_cond_check(void* h_arg)
+{
+  handler *h= (handler*)h_arg;
+  THD *thd= h->table->in_use;
+  enum icp_result res;
+
+  if (thd_killed(thd))
+    return ICP_ABORTED_BY_USER;
+
+  if (h->end_range && h->compare_key2(h->end_range) > 0)
+    return ICP_OUT_OF_RANGE;
+  h->increment_statistics(&SSV::ha_icp_attempts);
+  if ((res= h->pushed_idx_cond->val_int()? ICP_MATCH : ICP_NO_MATCH) ==
+      ICP_MATCH)
+    h->increment_statistics(&SSV::ha_icp_match);
+  return res;
+}
+
 int handler::index_read_idx_map(uchar * buf, uint index, const uchar * key,
                                 key_part_map keypart_map,
                                 enum ha_rkey_function find_flag)
@@ -4757,7 +4762,8 @@ static bool check_table_binlog_row_based(THD *thd, TABLE *table)
 
 /** @brief
    Write table maps for all (manually or automatically) locked tables
-   to the binary log.
+   to the binary log. Also, if binlog_annotate_row_events is ON,
+   write Annotate_rows event before the first table map.
 
    SYNOPSIS
      write_locked_table_maps()
@@ -4794,6 +4800,9 @@ static int write_locked_table_maps(THD *thd)
     locks[0]= thd->extra_lock;
     locks[1]= thd->lock;
     locks[2]= thd->locked_tables;
+    my_bool with_annotate= thd->variables.binlog_annotate_row_events &&
+                           thd->query() && thd->query_length();
+
     for (uint i= 0 ; i < sizeof(locks)/sizeof(*locks) ; ++i )
     {
       MYSQL_LOCK const *const lock= locks[i];
@@ -4811,7 +4820,8 @@ static int write_locked_table_maps(THD *thd)
             check_table_binlog_row_based(thd, table))
         {
           int const has_trans= table->file->has_transactions();
-          int const error= thd->binlog_write_table_map(table, has_trans);
+          int const error= thd->binlog_write_table_map(table, has_trans,
+                                                       &with_annotate);
           /*
             If an error occurs, it is the responsibility of the caller to
             roll back the transaction.
@@ -4910,6 +4920,9 @@ int handler::ha_reset()
   /* reset the bitmaps to point to defaults */
   table->default_column_bitmaps();
   pushed_cond= NULL;
+  /* Reset information about pushed engine conditions */
+  cancel_pushed_idx_cond();
+  /* Reset information about pushed index conditions */
   DBUG_RETURN(reset());
 }
 
diff --git a/sql/handler.h b/sql/handler.h
index 79e838d2ca6..28d8a96a895 100644
--- a/sql/handler.h
+++ b/sql/handler.h
@@ -1,5 +1,6 @@
 /*
    Copyright (c) 2000, 2011, Oracle and/or its affiliates.
+   Copyright 2009-2011 Monty Program Ab
 
    This program is free software; you can redistribute it and/or
    modify it under the terms of the GNU General Public License
@@ -140,6 +141,9 @@
 #define HA_HAS_NEW_CHECKSUM    (LL(1) << 36)
 #define HA_CAN_VIRTUAL_COLUMNS (LL(1) << 37)
 
+#define HA_MRR_CANT_SORT       (LL(1) << 37)
+#define HA_RECORD_MUST_BE_CLEAN_ON_WRITE (LL(1) << 38)
+
 /*
   Set of all binlog flags. Currently only contain the capabilities
   flags.
@@ -155,6 +159,18 @@
 #define HA_KEYREAD_ONLY         64	/* Support HA_EXTRA_KEYREAD */
 
 /*
+  Index scan will not return records in rowid order. Not guaranteed to be
+  set for unordered (e.g. HASH) indexes.
+*/
+#define HA_KEY_SCAN_NOT_ROR     128 
+#define HA_DO_INDEX_COND_PUSHDOWN  256 /* Supports Index Condition Pushdown */
+/*
+  Data is clustered on this key. This means that when you read the key
+  you also get the row data without any additional disk reads.
+*/
+#define HA_CLUSTERED_INDEX      512
+
+/*
   bits in alter_table_flags:
 */
 /*
@@ -207,12 +223,6 @@
 #define HA_FAST_CHANGE_PARTITION                (1L << 13)
 #define HA_PARTITION_ONE_PHASE                  (1L << 14)
 
-/*
-  Index scan will not return records in rowid order. Not guaranteed to be
-  set for unordered (e.g. HASH) indexes.
-*/
-#define HA_KEY_SCAN_NOT_ROR     128 
-
 /* operations for disable/enable indexes */
 #define HA_KEY_SWITCH_NONUNIQ      0
 #define HA_KEY_SWITCH_ALL          1
@@ -771,12 +781,113 @@ struct handlerton
      NOTE 'all' is also false in auto-commit mode where 'end of statement'
      and 'real commit' mean the same event.
    */
-   int  (*commit)(handlerton *hton, THD *thd, bool all);
+   int (*commit)(handlerton *hton, THD *thd, bool all);
+   /*
+     The commit_ordered() method is called prior to the commit() method, after
+     the transaction manager has decided to commit (not rollback) the
+     transaction. Unlike commit(), commit_ordered() is called only when the
+     full transaction is committed, not for each commit of statement
+     transaction in a multi-statement transaction.
+
+     Not that like prepare(), commit_ordered() is only called when 2-phase
+     commit takes place. Ie. when no binary log and only a single engine
+     participates in a transaction, one commit() is called, no
+     commit_ordered(). So engines must be prepared for this.
+
+     The calls to commit_ordered() in multiple parallel transactions is
+     guaranteed to happen in the same order in every participating
+     handler. This can be used to ensure the same commit order among multiple
+     handlers (eg. in table handler and binlog). So if transaction T1 calls
+     into commit_ordered() of handler A before T2, then T1 will also call
+     commit_ordered() of handler B before T2.
+
+     Engines that implement this method should during this call make the
+     transaction visible to other transactions, thereby making the order of
+     transaction commits be defined by the order of commit_ordered() calls.
+
+     The intention is that commit_ordered() should do the minimal amount of
+     work that needs to happen in consistent commit order among handlers. To
+     preserve ordering, calls need to be serialised on a global mutex, so
+     doing any time-consuming or blocking operations in commit_ordered() will
+     limit scalability.
+
+     Handlers can rely on commit_ordered() calls to be serialised (no two
+     calls can run in parallel, so no extra locking on the handler part is
+     required to ensure this).
+
+     Note that commit_ordered() can be called from a different thread than the
+     one handling the transaction! So it can not do anything that depends on
+     thread local storage, in particular it can not call my_error() and
+     friends (instead it can store the error code and delay the call of
+     my_error() to the commit() method).
+
+     Similarly, since commit_ordered() returns void, any return error code
+     must be saved and returned from the commit() method instead.
+
+     The commit_ordered method is optional, and can be left unset if not
+     needed in a particular handler (then there will be no ordering guarantees
+     wrt. other engines and binary log).
+   */
+   void (*commit_ordered)(handlerton *hton, THD *thd, bool all);
    int  (*rollback)(handlerton *hton, THD *thd, bool all);
    int  (*prepare)(handlerton *hton, THD *thd, bool all);
+   /*
+     The prepare_ordered method is optional. If set, it will be called after
+     successful prepare() in all handlers participating in 2-phase
+     commit. Like commit_ordered(), it is called only when the full
+     transaction is committed, not for each commit of statement transaction.
+
+     The calls to prepare_ordered() among multiple parallel transactions are
+     ordered consistently with calls to commit_ordered(). This means that
+     calls to prepare_ordered() effectively define the commit order, and that
+     each handler will see the same sequence of transactions calling into
+     prepare_ordered() and commit_ordered().
+
+     Thus, prepare_ordered() can be used to define commit order for handlers
+     that need to do this in the prepare step (like binlog). It can also be
+     used to release transaction's locks early in an order consistent with the
+     order transactions will be eventually committed.
+
+     Like commit_ordered(), prepare_ordered() calls are serialised to maintain
+     ordering, so the intention is that they should execute fast, with only
+     the minimal amount of work needed to define commit order. Handlers can
+     rely on this serialisation, and do not need to do any extra locking to
+     avoid two prepare_ordered() calls running in parallel.
+
+     Like commit_ordered(), prepare_ordered() is not guaranteed to be called
+     in the context of the thread handling the rest of the transaction. So it
+     cannot invoke code that relies on thread local storage, in particular it
+     cannot call my_error().
+
+     prepare_ordered() cannot cause a rollback by returning an error, all
+     possible errors must be handled in prepare() (the prepare_ordered()
+     method returns void). In case of some fatal error, a record of the error
+     must be made internally by the engine and returned from commit() later.
+
+     Note that for user-level XA SQL commands, no consistent ordering among
+     prepare_ordered() and commit_ordered() is guaranteed (as that would
+     require blocking all other commits for an indefinite time).
+
+     When 2-phase commit is not used (eg. only one engine (and no binlog) in
+     transaction), neither prepare() nor prepare_ordered() is called.
+   */
+   void (*prepare_ordered)(handlerton *hton, THD *thd, bool all);
    int  (*recover)(handlerton *hton, XID *xid_list, uint len);
    int  (*commit_by_xid)(handlerton *hton, XID *xid);
    int  (*rollback_by_xid)(handlerton *hton, XID *xid);
+  /*
+    "Disable or enable checkpointing internal to the storage engine. This is
+    used for FLUSH TABLES WITH READ LOCK AND DISABLE CHECKPOINT to ensure that
+    the engine will never start any recovery from a time between
+    FLUSH TABLES ... ; UNLOCK TABLES.
+
+    While checkpointing is disabled, the engine should pause any background
+    write activity (such as tablespace checkpointing) that require consistency
+    between different files (such as transaction log and tablespace files) for
+    crash recovery to succeed. The idea is to use this to make safe
+    multi-volume LVM snapshot backups.
+  */
+   int  (*checkpoint_state)(handlerton *hton, bool disabled);
    void *(*create_cursor_read_view)(handlerton *hton, THD *thd);
    void (*set_cursor_read_view)(handlerton *hton, THD *thd, void *read_view);
    void (*close_cursor_read_view)(handlerton *hton, THD *thd, void *read_view);
@@ -1149,6 +1260,244 @@ typedef struct st_ha_check_opt
 } HA_CHECK_OPT;
 
 
+/********************************************************************************
+ * MRR
+ ********************************************************************************/
+
+typedef void *range_seq_t;
+
+typedef struct st_range_seq_if
+{
+  /*
+    Get key information
+ 
+    SYNOPSIS
+      get_key_info()
+        init_params  The seq_init_param parameter 
+        length       OUT length of the keys in this range sequence
+        map          OUT key_part_map of the keys in this range sequence
+
+    DESCRIPTION
+      This function is set only when using HA_MRR_FIXED_KEY mode. In that mode, 
+      all ranges are single-point equality ranges that use the same set of key
+      parts. This function allows the MRR implementation to get the length of
+      a key, and which keyparts it uses.
+  */
+  void (*get_key_info)(void *init_params, uint *length, key_part_map *map);
+
+  /*
+    Initialize the traversal of range sequence
+    
+    SYNOPSIS
+      init()
+        init_params  The seq_init_param parameter 
+        n_ranges     The number of ranges obtained 
+        flags        A combination of HA_MRR_SINGLE_POINT, HA_MRR_FIXED_KEY
+
+    RETURN
+      An opaque value to be used as RANGE_SEQ_IF::next() parameter
+  */
+  range_seq_t (*init)(void *init_params, uint n_ranges, uint flags);
+
+
+  /*
+    Get the next range in the range sequence
+
+    SYNOPSIS
+      next()
+        seq    The value returned by RANGE_SEQ_IF::init()
+        range  OUT Information about the next range
+    
+    RETURN
+      FALSE - Ok, the range structure filled with info about the next range
+      TRUE  - No more ranges
+  */
+  bool (*next) (range_seq_t seq, KEY_MULTI_RANGE *range);
+
+  /*
+    Check whether range_info orders to skip the next record
+
+    SYNOPSIS
+      skip_record()
+        seq         The value returned by RANGE_SEQ_IF::init()
+        range_info  Information about the next range 
+                    (Ignored if MRR_NO_ASSOCIATION is set)
+        rowid       Rowid of the record to be checked (ignored if set to 0)
+    
+    RETURN
+      1 - Record with this range_info and/or this rowid shall be filtered
+          out from the stream of records returned by multi_range_read_next()
+      0 - The record shall be left in the stream
+  */ 
+  bool (*skip_record) (range_seq_t seq, range_id_t range_info, uchar *rowid);
+
+  /*
+    Check if the record combination matches the index condition
+    SYNOPSIS
+      skip_index_tuple()
+        seq         The value returned by RANGE_SEQ_IF::init()
+        range_info  Information about the next range 
+    
+    RETURN
+      0 - The record combination satisfies the index condition
+      1 - Otherwise
+  */ 
+  bool (*skip_index_tuple) (range_seq_t seq, range_id_t range_info);
+} RANGE_SEQ_IF;
+
+typedef bool (*SKIP_INDEX_TUPLE_FUNC) (range_seq_t seq, range_id_t range_info);
+
+class COST_VECT
+{ 
+public:
+  double io_count;     /* number of I/O                 */
+  double avg_io_cost;  /* cost of an average I/O oper.  */
+  double cpu_cost;     /* cost of operations in CPU     */
+  double mem_cost;     /* cost of used memory           */ 
+  double import_cost;  /* cost of remote operations     */
+  
+  enum { IO_COEFF=1 };
+  enum { CPU_COEFF=1 };
+  enum { MEM_COEFF=1 };
+  enum { IMPORT_COEFF=1 };
+
+  COST_VECT() {}                              // keep gcc happy
+
+  double total_cost() 
+  {
+    return IO_COEFF*io_count*avg_io_cost + CPU_COEFF * cpu_cost +
+           MEM_COEFF*mem_cost + IMPORT_COEFF*import_cost;
+  }
+
+  void zero()
+  {
+    avg_io_cost= 1.0;
+    io_count= cpu_cost= mem_cost= import_cost= 0.0;
+  }
+
+  void multiply(double m)
+  {
+    io_count *= m;
+    cpu_cost *= m;
+    import_cost *= m;
+    /* Don't multiply mem_cost */
+  }
+
+  void add(const COST_VECT* cost)
+  {
+    double io_count_sum= io_count + cost->io_count;
+    add_io(cost->io_count, cost->avg_io_cost);
+    io_count= io_count_sum;
+    cpu_cost += cost->cpu_cost;
+  }
+  void add_io(double add_io_cnt, double add_avg_cost)
+  {
+    /* In edge cases add_io_cnt may be zero */
+    if (add_io_cnt > 0)
+    {
+      double io_count_sum= io_count + add_io_cnt;
+      avg_io_cost= (io_count * avg_io_cost + 
+                    add_io_cnt * add_avg_cost) / io_count_sum;
+      io_count= io_count_sum;
+    }
+  }
+
+  /*
+    To be used when we go from old single value-based cost calculations to
+    the new COST_VECT-based.
+  */
+  void convert_from_cost(double cost)
+  {
+    zero();
+    avg_io_cost= 1.0;
+    io_count= cost;
+  }
+};
+
+void get_sweep_read_cost(TABLE *table, ha_rows nrows, bool interrupted, 
+                         COST_VECT *cost);
+
+/*
+  Indicates that all scanned ranges will be singlepoint (aka equality) ranges.
+  The ranges may not use the full key but all of them will use the same number
+  of key parts.
+*/
+#define HA_MRR_SINGLE_POINT 1
+#define HA_MRR_FIXED_KEY  2
+
+/* 
+  Indicates that RANGE_SEQ_IF::next(&range) doesn't need to fill in the
+  'range' parameter.
+*/
+#define HA_MRR_NO_ASSOCIATION 4
+
+/* 
+  The MRR user will provide ranges in key order, and MRR implementation
+  must return rows in key order.
+*/
+#define HA_MRR_SORTED 8
+
+/* MRR implementation doesn't have to retrieve full records */
+#define HA_MRR_INDEX_ONLY 16
+
+/* 
+  The passed memory buffer is of maximum possible size, the caller can't
+  assume larger buffer.
+*/
+#define HA_MRR_LIMITS 32
+
+
+/*
+  Flag set <=> default MRR implementation is used
+  (The choice is made by **_info[_const]() function which may set this
+   flag. SQL layer remembers the flag value and then passes it to
+   multi_read_range_init().
+*/
+#define HA_MRR_USE_DEFAULT_IMPL 64
+
+/*
+  Used only as parameter to multi_range_read_info():
+  Flag set <=> the caller guarantees that the bounds of the scanned ranges
+  will not have NULL values.
+*/
+#define HA_MRR_NO_NULL_ENDPOINTS 128
+
+/*
+  The MRR user has materialized range keys somewhere in the user's buffer.
+  This can be used for optimization of the procedure that sorts these keys
+  since in this case key values don't have to be copied into the MRR buffer.
+
+  In other words, it is guaranteed that after RANGE_SEQ_IF::next() call the 
+  pointer in range->start_key.key will point to a key value that will remain 
+  there until the end of the MRR scan.
+*/
+#define HA_MRR_MATERIALIZED_KEYS 256
+
+/*
+  The following bits are reserved for use by MRR implementation. The intended
+  use scenario:
+
+  * sql layer calls handler->multi_range_read_info[_const]() 
+    - MRR implementation figures out what kind of scan it will perform, saves
+      the result in *mrr_mode parameter.
+  * sql layer remembers what was returned in *mrr_mode
+
+  * the optimizer picks the query plan (which may or may not include the MRR 
+    scan that was estimated by the multi_range_read_info[_const] call)
+
+  * if the query is an EXPLAIN statement, sql layer will call 
+    handler->multi_range_read_explain_info(mrr_mode) to get a text description
+    of the picked MRR scan; the description will be a part of EXPLAIN output.
+*/
+#define HA_MRR_IMPLEMENTATION_FLAG1 512
+#define HA_MRR_IMPLEMENTATION_FLAG2 1024
+#define HA_MRR_IMPLEMENTATION_FLAG3 2048
+#define HA_MRR_IMPLEMENTATION_FLAG4 4096
+#define HA_MRR_IMPLEMENTATION_FLAG5 8192
+#define HA_MRR_IMPLEMENTATION_FLAG6 16384
+
+#define HA_MRR_IMPLEMENTATION_FLAGS \
+  (512 | 1024 | 2048 | 4096 | 8192 | 16384)
 
 /*
   This is a buffer area that the handler can use to store rows.
@@ -1159,8 +1508,8 @@ typedef struct st_ha_check_opt
 
 typedef struct st_handler_buffer
 {
-  const uchar *buffer;         /* Buffer one can start using */
-  const uchar *buffer_end;     /* End of buffer */
+  /* const? */uchar *buffer;         /* Buffer one can start using */
+  /* const? */uchar *buffer_end;     /* End of buffer */
   uchar *end_of_used_area;     /* End of area that was used by handler */
 } HANDLER_BUFFER;
 
@@ -1191,14 +1540,21 @@ public:
   time_t update_time;
   uint block_size;			/* index block size */
 
+  /*
+    number of buffer bytes that native mrr implementation needs,
+  */
+  uint mrr_length_per_rec; 
+
   ha_statistics():
     data_file_length(0), max_data_file_length(0),
     index_file_length(0), delete_length(0), auto_increment_value(0),
     records(0), deleted(0), mean_rec_length(0), create_time(0),
-    check_time(0), update_time(0), block_size(0)
+    check_time(0), update_time(0), block_size(0), mrr_length_per_rec(0)
   {}
 };
 
+extern "C" enum icp_result handler_index_cond_check(void* h_arg);
+
 uint calculate_key_len(TABLE *, uint, const uchar *, key_part_map);
 /*
   bitmap with first N+1 bits set
@@ -1234,17 +1590,31 @@ public:
 
   ha_statistics stats;
 
-  /** The following are for read_multi_range */
-  bool multi_range_sorted;
-  KEY_MULTI_RANGE *multi_range_curr;
-  KEY_MULTI_RANGE *multi_range_end;
-  HANDLER_BUFFER *multi_range_buffer;
+  /** MultiRangeRead-related members: */
+  range_seq_t mrr_iter;    /* Interator to traverse the range sequence */
+  RANGE_SEQ_IF mrr_funcs;  /* Range sequence traversal functions */
+  HANDLER_BUFFER *multi_range_buffer; /* MRR buffer info */
+  uint ranges_in_seq; /* Total number of ranges in the traversed sequence */
+  /* TRUE <=> source MRR ranges and the output are ordered */
+  bool mrr_is_output_sorted;
+
+  /** TRUE <=> we're currently traversing a range in mrr_cur_range. */
+  bool mrr_have_range;
+  /** Current range (the one we're now returning rows from) */
+  KEY_MULTI_RANGE mrr_cur_range;
 
   /** The following are for read_range() */
   key_range save_end_range, *end_range;
   KEY_PART_INFO *range_key_part;
   int key_compare_result_on_equal;
   bool eq_range;
+  bool internal_tmp_table;                      /* If internal tmp table */
+
+  /* 
+    TRUE <=> the engine guarantees that returned records are within the range
+    being scanned.
+  */
+  bool in_range_check_pushed_down;
 
   uint errkey;				/* Last dup key */
   uint key_used_on_scan;
@@ -1258,6 +1628,8 @@ public:
   bool mark_trx_done;
   bool cloned;                          /* 1 if this was created with clone */
   const COND *pushed_cond;
+  Item *pushed_idx_cond;
+  uint pushed_idx_cond_keyno;  /* The index which the above condition is for */
   /**
     next_insert_id is the next value which should be inserted into the
     auto_increment column: in a inserting-multi-row statement (like INSERT
@@ -1281,6 +1653,7 @@ public:
   */
   /* Statistics  variables */
   ulonglong rows_read;
+  ulonglong rows_tmp_read;
   ulonglong rows_changed;
   /* One bigger than needed to avoid to test if key == MAX_KEY */
   ulonglong index_rows_read[MAX_KEY+1];
@@ -1297,11 +1670,14 @@ public:
   handler(handlerton *ht_arg, TABLE_SHARE *share_arg)
     :table_share(share_arg), table(0),
     estimation_rows_to_insert(0), ht(ht_arg),
-    ref(0), key_used_on_scan(MAX_KEY), active_index(MAX_KEY),
+    ref(0), end_range(NULL), in_range_check_pushed_down(FALSE),
+    key_used_on_scan(MAX_KEY), active_index(MAX_KEY),
     ref_length(sizeof(my_off_t)),
     ft_handler(0), inited(NONE),
     locked(FALSE), implicit_emptied(FALSE), mark_trx_done(FALSE), cloned(0),
-    pushed_cond(0), next_insert_id(0), insert_id_for_cur_row(0),
+    pushed_cond(0), pushed_idx_cond(NULL),
+    pushed_idx_cond_keyno(MAX_KEY),
+    next_insert_id(0), insert_id_for_cur_row(0),
     auto_inc_intervals_count(0)
     {
       reset_statistics();
@@ -1320,7 +1696,7 @@ public:
   }
   /* ha_ methods: pubilc wrappers for private virtual API */
 
-  int ha_open(TABLE *table, const char *name, int mode, int test_if_locked);
+  int ha_open(TABLE *table, const char *name, int mode, uint test_if_locked);
   int ha_index_init(uint idx, bool sorted)
   {
     int result;
@@ -1330,6 +1706,7 @@ public:
     {
       inited=       INDEX;
       active_index= idx;
+      end_range= NULL;
     }
     DBUG_RETURN(result);
   }
@@ -1339,6 +1716,7 @@ public:
     DBUG_ASSERT(inited==INDEX);
     inited=       NONE;
     active_index= MAX_KEY;
+    end_range=    NULL;
     DBUG_RETURN(index_end());
   }
   /* This is called after index_init() if we need to do a index scan */
@@ -1349,6 +1727,7 @@ public:
     DBUG_ENTER("ha_rnd_init");
     DBUG_ASSERT(inited==NONE || (inited==RND && scan));
     inited= (result= rnd_init(scan)) ? NONE: RND;
+    end_range= NULL;
     DBUG_RETURN(result);
   }
   int ha_rnd_end()
@@ -1356,6 +1735,7 @@ public:
     DBUG_ENTER("ha_rnd_end");
     DBUG_ASSERT(inited==RND);
     inited=NONE;
+    end_range= NULL;
     DBUG_RETURN(rnd_end());
   }
   int ha_rnd_init_with_error(bool scan) __attribute__ ((warn_unused_result));
@@ -1442,7 +1822,7 @@ public:
   uint get_dup_key(int error);
   void reset_statistics()
   {
-    rows_read= rows_changed= 0;
+    rows_read= rows_changed= rows_tmp_read= 0;
     bzero(index_rows_read, sizeof(index_rows_read));
   }
   virtual void change_table_ptr(TABLE *table_arg, TABLE_SHARE *share)
@@ -1527,7 +1907,7 @@ public:
   */
   uint get_index(void) const
   { return inited == INDEX ? active_index : MAX_KEY; }
-  virtual int close(void)=0;
+  int ha_close(void);
 
   /**
     @retval  0   Bulk update used by handler
@@ -1603,10 +1983,18 @@ protected:
   virtual int index_last(uchar * buf)
    { return  HA_ERR_WRONG_COMMAND; }
   virtual int index_next_same(uchar *buf, const uchar *key, uint keylen);
+  virtual int close(void)=0;
+  inline void update_rows_read()
+  {
+    if (likely(!internal_tmp_table))
+      rows_read++;
+    else
+      rows_tmp_read++;
+  }
   inline void update_index_statistics()
   {
     index_rows_read[active_index]++;
-    rows_read++;
+    update_rows_read();
   }
 public:
 
@@ -1622,16 +2010,53 @@ public:
   inline int ha_index_first(uchar * buf);
   inline int ha_index_last(uchar * buf);
   inline int ha_index_next_same(uchar *buf, const uchar *key, uint keylen);
+  /*
+    TODO: should we make for those functions non-virtual ha_func_name wrappers,
+    too?
+  */
+  virtual ha_rows multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
+                                              void *seq_init_param, 
+                                              uint n_ranges, uint *bufsz,
+                                              uint *mrr_mode, COST_VECT *cost);
+  virtual ha_rows multi_range_read_info(uint keyno, uint n_ranges, uint keys,
+                                        uint key_parts, uint *bufsz, 
+                                        uint *mrr_mode, COST_VECT *cost);
+  virtual int multi_range_read_init(RANGE_SEQ_IF *seq, void *seq_init_param,
+                                    uint n_ranges, uint mrr_mode, 
+                                    HANDLER_BUFFER *buf);
+  virtual int multi_range_read_next(range_id_t *range_info);
+  /*
+    Return string representation of the MRR plan.
+
+    This is intended to be used for EXPLAIN, via the following scenario:
+    1. SQL layer calls handler->multi_range_read_info().
+    1.1. Storage engine figures out whether it will use some non-default
+         MRR strategy, sets appropritate bits in *mrr_mode, and returns 
+         control to SQL layer
+    2. SQL layer remembers the returned mrr_mode
+    3. SQL layer compares various options and choses the final query plan. As
+       a part of that, it makes a choice of whether to use the MRR strategy
+       picked in 1.1
+    4. EXPLAIN code converts the query plan to its text representation. If MRR
+       strategy is part of the plan, it calls
+       multi_range_read_explain_info(mrr_mode) to get a text representation of
+       the picked MRR strategy.
+
+    @param mrr_mode   Mode which was returned by multi_range_read_info[_const]
+    @param str        INOUT string to be printed for EXPLAIN
+    @param str_end    End of the string buffer. The function is free to put the 
+                      string into [str..str_end] memory range.
+  */
+  virtual int multi_range_read_explain_info(uint mrr_mode, char *str, 
+                                            size_t size)
+  { return 0; }
 
-  virtual int read_multi_range_first(KEY_MULTI_RANGE **found_range_p,
-                                     KEY_MULTI_RANGE *ranges, uint range_count,
-                                     bool sorted, HANDLER_BUFFER *buffer);
-  virtual int read_multi_range_next(KEY_MULTI_RANGE **found_range_p);
   virtual int read_range_first(const key_range *start_key,
                                const key_range *end_key,
                                bool eq_range, bool sorted);
   virtual int read_range_next();
   int compare_key(key_range *range);
+  int compare_key2(key_range *range);
   virtual int ft_init() { return HA_ERR_WRONG_COMMAND; }
   void ft_end() { ft_handler=NULL; }
   virtual FT_INFO *ft_init_ext(uint flags, uint inx,String *key)
@@ -1830,7 +2255,6 @@ public:
   virtual uint max_supported_key_part_length() const { return 255; }
   virtual uint min_record_length(uint options) const { return 1; }
 
-  virtual bool low_byte_first() const { return 1; }
   virtual uint checksum() const { return 0; }
   virtual bool is_crashed() const  { return 0; }
   virtual bool auto_repair() const { return 0; }
@@ -1910,9 +2334,28 @@ public:
 
 
  /*
-   @retval TRUE   Primary key (if there is one) is clustered
-                  key covering all fields
-   @retval FALSE  otherwise
+   Check if the primary key (if there is one) is a clustered and a
+   reference key. This means:
+
+   - Data is stored together with the primary key (no secondary lookup
+     needed to find the row data). The optimizer uses this to find out
+     the cost of fetching data.
+   - The primary key is part of each secondary key and is used
+     to find the row data in the primary index when reading trough
+     secondary indexes.
+   - When doing a HA_KEYREAD_ONLY we get also all the primary key parts
+     into the row. This is critical property used by index_merge.
+
+   All the above is usually true for engines that store the row
+   data in the primary key index (e.g. in a b-tree), and use the primary
+   key value as a position().  InnoDB is an example of such an engine.
+
+   For such a clustered primary key, the following should also hold:
+   index_flags() should contain HA_CLUSTERED_INDEX
+   table_flags() should contain HA_TABLE_SCAN_ON_INDEX
+
+   @retval TRUE   yes
+   @retval FALSE  No.
  */
  virtual bool primary_key_is_clustered() { return FALSE; }
  virtual int cmp_ref(const uchar *ref1, const uchar *ref2)
@@ -1953,6 +2396,14 @@ public:
    Pops the top if condition stack, if stack is not empty.
  */
  virtual void cond_pop() { return; };
+ virtual Item *idx_cond_push(uint keyno, Item* idx_cond) { return idx_cond; }
+ /** Reset information about pushed index conditions */
+ virtual void cancel_pushed_idx_cond()
+ {
+   pushed_idx_cond= NULL;
+   pushed_idx_cond_keyno= MAX_KEY;
+   in_range_check_pushed_down= false;
+ }
  virtual bool check_if_incompatible_data(HA_CREATE_INFO *create_info,
 					 uint table_changes)
  { return COMPATIBLE_DATA_NO; }
@@ -1972,6 +2423,18 @@ public:
 
   LEX_STRING *engine_name() { return hton_name(ht); }
 
+  /*
+    @brief
+    Check whether the engine supports virtual columns
+    
+    @retval
+      FALSE   if the engine does not support virtual columns    
+    @retval
+      TRUE    if the engine supports virtual columns
+  */
+  virtual bool check_if_supported_virtual_columns(void) { return FALSE;}
+  
+  TABLE* get_table() { return table; }
 protected:
   /* deprecated, don't use in new engines */
   inline void ha_statistic_increment(ulong SSV::*offset) const { }
@@ -2012,6 +2475,7 @@ private:
   */
 
   virtual int open(const char *name, int mode, uint test_if_locked)=0;
+  /* Note: ha_index_read_idx_map() may buypass index_init() */
   virtual int index_init(uint idx, bool sorted) { return 0; }
   virtual int index_end() { return 0; }
   /**
@@ -2168,8 +2632,15 @@ public:
   /* XXX to be removed, see ha_partition::partition_ht() */
   virtual handlerton *partition_ht() const
   { return ht; }
+  inline int ha_write_tmp_row(uchar *buf);
+  inline int ha_update_tmp_row(const uchar * old_data, uchar * new_data);
+
+  friend enum icp_result handler_index_cond_check(void* h_arg);
 };
 
+#include "multi_range_read.h"
+
+bool key_uses_partial_cols(TABLE *table, uint keyno);
 
 	/* Some extern variables used with handlers */
 
@@ -2228,6 +2699,7 @@ int ha_panic(enum ha_panic_function flag);
 void ha_close_connection(THD* thd);
 bool ha_flush_logs(handlerton *db_type);
 void ha_drop_database(char* path);
+void ha_checkpoint_state(bool disable);
 int ha_create_table(THD *thd, const char *path,
                     const char *db, const char *table_name,
                     HA_CREATE_INFO *create_info,
diff --git a/sql/item.cc b/sql/item.cc
index 72adb9d0b52..f9bbc4aeead 100644
--- a/sql/item.cc
+++ b/sql/item.cc
@@ -30,6 +30,19 @@
 
 const String my_null_string("NULL", 4, default_charset_info);
 
+static int save_field_in_field(Field *from, bool *null_value,
+                               Field *to, bool no_conversions);
+
+
+/**
+  Compare two Items for List<Item>::add_unique()
+*/
+
+bool cmp_items(Item *a, Item *b)
+{
+  return a->eq(b, FALSE);
+}
+
 /****************************************************************************/
 
 /* Hybrid_type_traits {_real} */
@@ -196,10 +209,12 @@ bool Item::val_bool()
   case STRING_RESULT:
     return val_real() != 0.0;
   case ROW_RESULT:
-  default:
+  case TIME_RESULT:
+  case IMPOSSIBLE_RESULT:
     DBUG_ASSERT(0);
     return 0;                                   // Wrong (but safe)
   }
+  return 0;                                   // Wrong (but safe)
 }
 
 
@@ -234,6 +249,21 @@ String *Item::val_string_from_decimal(String *str)
 }
 
 
+String *Item::val_string_from_date(String *str)
+{
+  MYSQL_TIME ltime;
+  if (get_date(&ltime, TIME_FUZZY_DATE) ||
+      str->alloc(MAX_DATE_STRING_REP_LENGTH))
+  {
+    null_value= 1;
+    return (String *) 0;
+  }
+  str->length(my_TIME_to_str(&ltime, const_cast<char*>(str->ptr()), decimals));
+  str->set_charset(&my_charset_bin);
+  return str;
+}
+
+
 my_decimal *Item::val_decimal_from_real(my_decimal *decimal_value)
 {
   double nr= val_real();
@@ -330,7 +360,7 @@ int Item::save_time_in_field(Field *field)
   if (get_time(&ltime))
     return set_field_to_null_with_conversions(field, 0);
   field->set_notnull();
-  return field->store_time(&ltime, MYSQL_TIMESTAMP_TIME);
+  return field->store_time_dec(&ltime, decimals);
 }
 
 
@@ -340,7 +370,7 @@ int Item::save_date_in_field(Field *field)
   if (get_date(&ltime, TIME_FUZZY_DATE))
     return set_field_to_null_with_conversions(field, 0);
   field->set_notnull();
-  return field->store_time(&ltime, MYSQL_TIMESTAMP_DATETIME);
+  return field->store_time_dec(&ltime, decimals);
 }
 
 
@@ -376,16 +406,18 @@ int Item::save_str_value_in_field(Field *field, String *result)
 
 
 Item::Item():
-  rsize(0), name(0), orig_name(0), name_length(0), fixed(0),
-  is_autogenerated_name(TRUE),
+  is_expensive_cache(-1), rsize(0), name(0), orig_name(0), name_length(0),
+  fixed(0), is_autogenerated_name(TRUE),
   collation(&my_charset_bin, DERIVATION_COERCIBLE)
 {
   marker= 0;
-  maybe_null=null_value=with_sum_func=unsigned_flag=0;
+  maybe_null=null_value=with_sum_func=with_field=unsigned_flag=0;
   in_rollup= 0;
   decimals= 0; max_length= 0;
   with_subselect= 0;
   cmp_context= IMPOSSIBLE_RESULT;
+   /* Initially this item is not attached to any JOIN_TAB. */
+  join_tab_idx= MAX_TABLES;
 
   /* Put item in free list so that we can free all items at end */
   THD *thd= current_thd;
@@ -414,6 +446,8 @@ Item::Item():
   tables.
 */
 Item::Item(THD *thd, Item *item):
+  join_tab_idx(item->join_tab_idx),
+  is_expensive_cache(-1),
   rsize(0),
   str_value(item->str_value),
   name(item->name),
@@ -427,6 +461,7 @@ Item::Item(THD *thd, Item *item):
   null_value(item->null_value),
   unsigned_flag(item->unsigned_flag),
   with_sum_func(item->with_sum_func),
+  with_field(item->with_field),
   fixed(item->fixed),
   is_autogenerated_name(item->is_autogenerated_name),
   with_subselect(item->with_subselect),
@@ -465,11 +500,40 @@ void Item::print_item_w_name(String *str, enum_query_type query_type)
 }
 
 
+void Item::print_value(String *str)
+{
+  char buff[MAX_FIELD_WIDTH];
+  String *ptr, tmp(buff,sizeof(buff),str->charset());
+  ptr= val_str(&tmp);
+  if (!ptr)
+    str->append("NULL");
+  else
+  {
+    switch (result_type()) {
+    case STRING_RESULT:
+      append_unescaped(str, ptr->ptr(), ptr->length());
+      break;
+    case DECIMAL_RESULT:
+    case REAL_RESULT:
+    case INT_RESULT:
+      str->append(*ptr);
+      break;
+    case ROW_RESULT:
+    case TIME_RESULT:
+    case IMPOSSIBLE_RESULT:
+      DBUG_ASSERT(0);
+    }
+  }
+}
+
+
 void Item::cleanup()
 {
   DBUG_ENTER("Item::cleanup");
+  DBUG_PRINT("enter", ("this: %p", this));
   fixed=0;
   marker= 0;
+  join_tab_idx= MAX_TABLES;
   if (orig_name)
     name= orig_name;
   DBUG_VOID_RETURN;
@@ -507,6 +571,45 @@ void Item::rename(char *new_name)
   name= new_name;
 }
 
+Item_result Item::cmp_type() const
+{
+  switch (field_type()) {
+  case MYSQL_TYPE_DECIMAL:
+  case MYSQL_TYPE_NEWDECIMAL:
+                           return DECIMAL_RESULT;
+  case MYSQL_TYPE_TINY:
+  case MYSQL_TYPE_SHORT:
+  case MYSQL_TYPE_LONG:
+  case MYSQL_TYPE_LONGLONG:
+  case MYSQL_TYPE_INT24:
+  case MYSQL_TYPE_YEAR:
+  case MYSQL_TYPE_BIT:
+                           return INT_RESULT;
+  case MYSQL_TYPE_FLOAT:
+  case MYSQL_TYPE_DOUBLE:
+                           return REAL_RESULT;
+  case MYSQL_TYPE_NULL:
+  case MYSQL_TYPE_VARCHAR:
+  case MYSQL_TYPE_TINY_BLOB:
+  case MYSQL_TYPE_MEDIUM_BLOB:
+  case MYSQL_TYPE_LONG_BLOB:
+  case MYSQL_TYPE_BLOB:
+  case MYSQL_TYPE_VAR_STRING:
+  case MYSQL_TYPE_STRING:
+  case MYSQL_TYPE_ENUM:
+  case MYSQL_TYPE_SET:
+  case MYSQL_TYPE_GEOMETRY:
+                           return STRING_RESULT;
+  case MYSQL_TYPE_TIMESTAMP:
+  case MYSQL_TYPE_DATE:
+  case MYSQL_TYPE_TIME:
+  case MYSQL_TYPE_DATETIME:
+  case MYSQL_TYPE_NEWDATE:
+                           return TIME_RESULT;
+  };
+  DBUG_ASSERT(0);
+  return (Item_result)-1;
+}
 
 /**
   Traverse item tree possibly transforming it (replacing items).
@@ -545,6 +648,36 @@ Item* Item::transform(Item_transformer transformer, uchar *arg)
 }
 
 
+/**
+  Create and set up an expression cache for this item
+
+  @param thd             Thread handle
+  @param depends_on      List of the expression parameters
+
+  @details
+  The function creates an expression cache for an item and its parameters
+  specified by the 'depends_on' list. Then the expression cache is placed
+  into a cache wrapper that is returned as the result of the function.
+
+  @returns
+  A pointer to created wrapper item if successful, NULL - otherwise
+*/
+
+Item* Item::set_expr_cache(THD *thd)
+{
+  DBUG_ENTER("Item::set_expr_cache");
+  Item_cache_wrapper *wrapper;
+  if ((wrapper= new Item_cache_wrapper(this)) &&
+      !wrapper->fix_fields(thd, (Item**)&wrapper))
+  {
+    if (wrapper->set_cache(thd))
+      DBUG_RETURN(NULL);
+    DBUG_RETURN(wrapper);
+  }
+  DBUG_RETURN(NULL);
+}
+
+
 Item_ident::Item_ident(Name_resolution_context *context_arg,
                        const char *db_name_arg,const char *table_name_arg,
 		       const char *field_name_arg)
@@ -553,7 +686,7 @@ Item_ident::Item_ident(Name_resolution_context *context_arg,
    db_name(db_name_arg), table_name(table_name_arg),
    field_name(field_name_arg),
    alias_name_used(FALSE), cached_field_index(NO_CACHED_FIELD_INDEX),
-   cached_table(0), depended_from(0)
+   cached_table(0), depended_from(0), can_be_depended(TRUE)
 {
   name = (char*) field_name_arg;
 }
@@ -565,7 +698,7 @@ Item_ident::Item_ident(TABLE_LIST *view_arg, const char *field_name_arg)
    db_name(NullS), table_name(view_arg->alias),
    field_name(field_name_arg),
    alias_name_used(FALSE), cached_field_index(NO_CACHED_FIELD_INDEX),
-   cached_table(NULL), depended_from(NULL)
+   cached_table(NULL), depended_from(NULL), can_be_depended(TRUE)
 {
   name = (char*) field_name_arg;
 }
@@ -587,7 +720,8 @@ Item_ident::Item_ident(THD *thd, Item_ident *item)
    alias_name_used(item->alias_name_used),
    cached_field_index(item->cached_field_index),
    cached_table(item->cached_table),
-   depended_from(item->depended_from)
+   depended_from(item->depended_from),
+   can_be_depended(item->can_be_depended)
 {}
 
 void Item_ident::cleanup()
@@ -605,20 +739,32 @@ void Item_ident::cleanup()
   db_name= orig_db_name; 
   table_name= orig_table_name;
   field_name= orig_field_name;
-  depended_from= 0;
+  /* Store if this Item was depended */
+  can_be_depended= test(depended_from);
   DBUG_VOID_RETURN;
 }
 
 bool Item_ident::remove_dependence_processor(uchar * arg)
 {
   DBUG_ENTER("Item_ident::remove_dependence_processor");
-  if (depended_from == (st_select_lex *) arg)
+  if (get_depended_from() == (st_select_lex *) arg)
     depended_from= 0;
   context= &((st_select_lex *) arg)->context;
   DBUG_RETURN(0);
 }
 
 
+bool Item_ident::collect_outer_ref_processor(uchar *param)
+{
+  Collect_deps_prm *prm= (Collect_deps_prm *)param;
+  if (depended_from && 
+      depended_from->nest_level_base == prm->nest_level_base &&
+      depended_from->nest_level < prm->nest_level)
+    prm->parameters->add_unique(this, &cmp_items);
+  return FALSE;
+}
+
+
 /**
   Store the pointer to this item field into a list if not already there.
 
@@ -654,6 +800,16 @@ bool Item_field::collect_item_field_processor(uchar *arg)
 }
 
 
+bool Item_field::add_field_to_set_processor(uchar *arg)
+{
+  DBUG_ENTER("Item_field::add_field_to_set_processor");
+  DBUG_PRINT("info", ("%s", field->field_name ? field->field_name : "noname"));
+  TABLE *table= (TABLE *) arg;
+  if (field->table == table)
+    bitmap_set_bit(&table->tmp_set, field->field_index);
+  DBUG_RETURN(FALSE);
+}
+
 /**
   Check if an Item_field references some field from a list of fields.
 
@@ -717,6 +873,23 @@ bool Item_field::register_field_in_bitmap(uchar *arg)
   return 0;
 }
 
+
+/*
+  Mark field in write_map
+
+  NOTES
+    This is used by UPDATE to register underlying fields of used view fields.
+*/
+
+bool Item_field::register_field_in_write_map(uchar *arg)
+{
+  TABLE *table= (TABLE *) arg;
+  if (field->table == table || !table)
+    bitmap_set_bit(field->table->write_set, field->field_index);
+  return 0;
+}
+
+
 bool Item::check_cols(uint c)
 {
   if (c != 1)
@@ -939,7 +1112,47 @@ bool Item_string::eq(const Item *item, bool binary_cmp) const
 
 bool Item::get_date(MYSQL_TIME *ltime,uint fuzzydate)
 {
-  if (result_type() == STRING_RESULT)
+  if (field_type() == MYSQL_TYPE_TIME)
+    fuzzydate|= TIME_TIME_ONLY;
+
+  switch (result_type()) {
+  case INT_RESULT:
+  {
+    longlong value= val_int();
+    if (field_type() == MYSQL_TYPE_YEAR)
+    {
+      if (max_length == 2)
+      {
+        if (value < 70)
+          value+= 2000;
+        else if (value <= 1900)
+          value+= 1900;
+      }
+      value*= 10000; /* make it YYYYMMHH */
+    }
+    if (null_value || int_to_datetime_with_warn(value, ltime, fuzzydate,
+                                                field_name_or_null()))
+      goto err;
+    break;
+  }
+  case REAL_RESULT:
+  {
+    double value= val_real();
+    if (null_value || double_to_datetime_with_warn(value, ltime, fuzzydate,
+                                                   field_name_or_null()))
+      goto err;
+    break;
+  }
+  case DECIMAL_RESULT:
+  {
+    my_decimal value, *res;
+    if (!(res= val_decimal(&value)) ||
+        decimal_to_datetime_with_warn(res, ltime, fuzzydate,
+                                      field_name_or_null()))
+      goto err;
+    break;
+  }
+  case STRING_RESULT:
   {
     char buff[40];
     String tmp(buff,sizeof(buff), &my_charset_bin),*res;
@@ -947,25 +1160,12 @@ bool Item::get_date(MYSQL_TIME *ltime,uint fuzzydate)
         str_to_datetime_with_warn(res->ptr(), res->length(),
                                   ltime, fuzzydate) <= MYSQL_TIMESTAMP_ERROR)
       goto err;
+    break;
   }
-  else
-  {
-    int was_cut;
-    longlong value= val_int();
-
-    if (null_value)
-      goto err;
-
-    if (number_to_datetime(value, ltime, fuzzydate, &was_cut) == LL(-1))
-    {
-      char buff[22], *end;
-      end= longlong10_to_str(value, buff, -10);
-      make_truncated_value_warning(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN,
-                                   buff, (int) (end-buff), MYSQL_TIMESTAMP_NONE,
-                                   NullS);
-      goto err;
-    }
+  default:
+    DBUG_ASSERT(0);
   }
+
   return 0;
 
 err:
@@ -973,23 +1173,20 @@ err:
   return 1;
 }
 
-/**
-  Get time of first argument.\
-
-  As a extra convenience the time structure is reset on error!
-*/
-
-bool Item::get_time(MYSQL_TIME *ltime)
+bool Item::get_seconds(ulonglong *sec, ulong *sec_part)
 {
-  char buff[40];
-  String tmp(buff,sizeof(buff),&my_charset_bin),*res;
-  if (!(res=val_str(&tmp)) ||
-      str_to_time_with_warn(res->ptr(), res->length(), ltime))
-  {
-    bzero((char*) ltime,sizeof(*ltime));
-    return 1;
+  if (result_type() == INT_RESULT)
+  { // optimize for an important special case
+    longlong val= val_int();
+    bool neg= val < 0 && !unsigned_flag;
+    *sec= neg ? -val : val;
+    *sec_part= 0;
+    return neg;
   }
-  return 0;
+  my_decimal tmp, *dec= val_decimal(&tmp);
+  if (!dec)
+    return 0;
+  return my_decimal2seconds(dec, sec, sec_part);
 }
 
 CHARSET_INFO *Item::default_charset()
@@ -1015,6 +1212,7 @@ int Item::save_in_field_no_warnings(Field *field, bool no_conversions)
   my_bitmap_map *old_map= dbug_tmp_use_all_columns(table, table->write_set);
   ulong sql_mode= thd->variables.sql_mode;
   thd->variables.sql_mode&= ~(MODE_NO_ZERO_IN_DATE | MODE_NO_ZERO_DATE);
+  thd->variables.sql_mode|= MODE_INVALID_DATES;
   thd->count_cuted_fields= CHECK_FIELD_IGNORE;
   res= save_in_field(field, no_conversions);
   thd->count_cuted_fields= tmp;
@@ -1427,7 +1625,7 @@ void Item::split_sum_func2(THD *thd, Item **ref_pointer_array,
   /* An item of type Item_sum  is registered <=> ref_by != 0 */ 
   if (type() == SUM_FUNC_ITEM && skip_registered && 
       ((Item_sum *) this)->ref_by)
-    return;                                                 
+    return;
   if ((type() != SUM_FUNC_ITEM && with_sum_func) ||
       (type() == FUNC_ITEM &&
        (((Item_func *) this)->functype() == Item_func::ISNOTNULLTEST_FUNC ||
@@ -1453,6 +1651,11 @@ void Item::split_sum_func2(THD *thd, Item **ref_pointer_array,
     */
     Item_aggregate_ref *item_ref;
     uint el= fields.elements;
+    /*
+      If this is an item_ref, get the original item
+      This is a safety measure if this is called for things that is
+      already a reference.
+    */
     Item *real_itm= real_item();
 
     ref_pointer_array[el]= real_itm;
@@ -1850,6 +2053,7 @@ Item_field::Item_field(Field *f)
     if this item is to be reused
   */
   orig_table_name= orig_field_name= "";
+  with_field= 1;
 }
 
 
@@ -1898,6 +2102,7 @@ Item_field::Item_field(THD *thd, Name_resolution_context *context_arg,
     name= (char*) orig_field_name;
   }
   set_field(f);
+  with_field= 1;
 }
 
 
@@ -1912,6 +2117,7 @@ Item_field::Item_field(Name_resolution_context *context_arg,
   collation.set(DERIVATION_IMPLICIT);
   if (select && select->parsing_place != IN_HAVING)
       select->select_n_where_fields++;
+  with_field= 1;
 }
 
 /**
@@ -1928,6 +2134,7 @@ Item_field::Item_field(THD *thd, Item_field *item)
    any_privileges(item->any_privileges)
 {
   collation.set(DERIVATION_IMPLICIT);
+  with_field= 1;
 }
 
 void Item_field::set_field(Field *field_par)
@@ -1965,10 +2172,15 @@ void Item_field::reset_field(Field *f)
 bool Item_field::enumerate_field_refs_processor(uchar *arg)
 {
   Field_enumerator *fe= (Field_enumerator*)arg;
-  fe->visit_field(field);
+  fe->visit_field(this);
   return FALSE;
 }
 
+bool Item_field::update_table_bitmaps_processor(uchar *arg)
+{
+  update_table_bitmaps();
+  return FALSE;
+}
 
 const char *Item_ident::full_name() const
 {
@@ -2115,16 +2327,12 @@ bool Item_field::get_date_result(MYSQL_TIME *ltime,uint fuzzydate)
   return 0;
 }
 
-bool Item_field::get_time(MYSQL_TIME *ltime)
+void Item_field::save_result(Field *to)
 {
-  if ((null_value=field->is_null()) || field->get_time(ltime))
-  {
-    bzero((char*) ltime,sizeof(*ltime));
-    return 1;
-  }
-  return 0;
+  save_field_in_field(result_field, &null_value, to, TRUE);
 }
 
+
 double Item_field::val_result()
 {
   if ((null_value=result_field->is_null()))
@@ -2167,10 +2375,12 @@ bool Item_field::val_bool_result()
   case STRING_RESULT:
     return result_field->val_real() != 0.0;
   case ROW_RESULT:
-  default:
+  case TIME_RESULT:
+  case IMPOSSIBLE_RESULT:
     DBUG_ASSERT(0);
     return 0;                                   // Shut up compiler
   }
+  return 0;
 }
 
 
@@ -2214,7 +2424,29 @@ table_map Item_field::used_tables() const
 {
   if (field->table->const_table)
     return 0;					// const item
-  return (depended_from ? OUTER_REF_TABLE_BIT : field->table->map);
+  return (get_depended_from() ? OUTER_REF_TABLE_BIT : field->table->map);
+}
+
+table_map Item_field::all_used_tables() const
+{
+  return (get_depended_from() ? OUTER_REF_TABLE_BIT : field->table->map);
+}
+
+void Item_field::fix_after_pullout(st_select_lex *new_parent, Item **ref)
+{
+  if (new_parent == get_depended_from())
+    depended_from= NULL;
+  Name_resolution_context *ctx= new Name_resolution_context();
+  ctx->outer_context= NULL; // We don't build a complete name resolver
+  ctx->table_list= NULL;    // We rely on first_name_resolution_table instead
+  ctx->select_lex= new_parent;
+  ctx->first_name_resolution_table= context->first_name_resolution_table;
+  ctx->last_name_resolution_table=  context->last_name_resolution_table;
+  ctx->error_processor=             context->error_processor;
+  ctx->error_processor_data=        context->error_processor_data;
+  ctx->resolve_in_select_list=      context->resolve_in_select_list;
+  ctx->security_ctx=                context->security_ctx;
+  this->context=ctx;
 }
 
 
@@ -2451,7 +2683,7 @@ my_decimal *Item_float::val_decimal(my_decimal *decimal_value)
 
 void Item_string::print(String *str, enum_query_type query_type)
 {
-  if (query_type == QT_ORDINARY && is_cs_specified())
+  if (query_type != QT_IS && is_cs_specified())
   {
     str->append('_');
     str->append(collation.collation->csname);
@@ -2459,7 +2691,7 @@ void Item_string::print(String *str, enum_query_type query_type)
 
   str->append('\'');
 
-  if (query_type == QT_ORDINARY ||
+  if (query_type != QT_IS ||
       my_charset_same(str_value.charset(), system_charset_info))
   {
     str_value.print(str);
@@ -2487,18 +2719,19 @@ void Item_string::print(String *str, enum_query_type query_type)
 
 
 double 
-double_from_string_with_check (CHARSET_INFO *cs, const char *cptr, char *end)
+double_from_string_with_check(CHARSET_INFO *cs, const char *cptr,
+                              const char *end)
 {
   int error;
-  char *org_end;
+  char *end_of_num= (char*) end;
   double tmp;
 
-  org_end= end;
-  tmp= my_strntod(cs, (char*) cptr, end - cptr, &end, &error);
-  if (error || (end != org_end && !check_if_only_end_space(cs, end, org_end)))
+  tmp= my_strntod(cs, (char*) cptr, end - cptr, &end_of_num, &error);
+  if (error || (end != end_of_num &&
+                !check_if_only_end_space(cs, end_of_num, end)))
   {
     char buff[80];
-    strmake(buff, cptr, min(sizeof(buff)-1, (size_t) (org_end-cptr)));
+    strmake(buff, cptr, min(sizeof(buff)-1, (size_t) (end-cptr)));
     push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN,
                         ER_TRUNCATED_WRONG_VALUE,
                         ER(ER_TRUNCATED_WRONG_VALUE), "DOUBLE",
@@ -2513,26 +2746,27 @@ double Item_string::val_real()
   DBUG_ASSERT(fixed == 1);
   return double_from_string_with_check(str_value.charset(),
                                        str_value.ptr(), 
-                                       (char *) str_value.ptr() +
+                                       str_value.ptr() +
                                        str_value.length());
 }
 
 
 longlong 
-longlong_from_string_with_check (CHARSET_INFO *cs, const char *cptr, char *end)
+longlong_from_string_with_check(CHARSET_INFO *cs, const char *cptr,
+                                const char *end)
 {
   int err;
   longlong tmp;
-  char *org_end= end;
+  char *end_of_num= (char*) end;
 
-  tmp= (*(cs->cset->strtoll10))(cs, cptr, &end, &err);
+  tmp= (*(cs->cset->strtoll10))(cs, cptr, &end_of_num, &err);
   /*
     TODO: Give error if we wanted a signed integer and we got an unsigned
     one
   */
   if (!current_thd->no_errors &&
       (err > 0 ||
-       (end != org_end && !check_if_only_end_space(cs, end, org_end))))
+       (end != end_of_num && !check_if_only_end_space(cs, end_of_num, end))))
   {
     push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN,
                         ER_TRUNCATED_WRONG_VALUE,
@@ -2551,7 +2785,7 @@ longlong Item_string::val_int()
 {
   DBUG_ASSERT(fixed == 1);
   return longlong_from_string_with_check(str_value.charset(), str_value.ptr(),
-                             (char *) str_value.ptr()+ str_value.length());
+                                         str_value.ptr()+ str_value.length());
 }
 
 
@@ -2731,19 +2965,19 @@ void Item_param::set_time(MYSQL_TIME *tm, timestamp_type time_type,
   if (value.time.year > 9999 || value.time.month > 12 ||
       value.time.day > 31 ||
       (time_type != MYSQL_TIMESTAMP_TIME && value.time.hour > 23) ||
-      value.time.minute > 59 || value.time.second > 59)
+      value.time.minute > 59 || value.time.second > 59 ||
+      value.time.second_part > TIME_MAX_SECOND_PART)
   {
-    char buff[MAX_DATE_STRING_REP_LENGTH];
-    uint length= my_TIME_to_str(&value.time, buff);
+    Lazy_string_time str(&value.time);
     make_truncated_value_warning(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN,
-                                 buff, length, time_type, 0);
+                                 &str, time_type, 0);
     set_zero_time(&value.time, MYSQL_TIMESTAMP_ERROR);
   }
 
   state= TIME_VALUE;
   maybe_null= 0;
   max_length= max_length_arg;
-  decimals= 0;
+  decimals= tm->second_part > 0 ? TIME_SECOND_PART_DIGITS : 0;
   DBUG_VOID_RETURN;
 }
 
@@ -2830,10 +3064,12 @@ bool Item_param::set_from_user_var(THD *thd, const user_var_entry *entry)
     case REAL_RESULT:
       set_double(*(double*)entry->value);
       item_type= Item::REAL_ITEM;
+      param_type= MYSQL_TYPE_DOUBLE;
       break;
     case INT_RESULT:
       set_int(*(longlong*)entry->value, MY_INT64_NUM_DECIMAL_DIGITS);
       item_type= Item::INT_ITEM;
+      param_type= MYSQL_TYPE_LONGLONG;
       break;
     case STRING_RESULT:
     {
@@ -2856,6 +3092,7 @@ bool Item_param::set_from_user_var(THD *thd, const user_var_entry *entry)
         charset of connection, so we have to set it later.
       */
       item_type= Item::STRING_ITEM;
+      param_type= MYSQL_TYPE_VARCHAR;
 
       if (set_str((const char *)entry->value, entry->length))
         DBUG_RETURN(1);
@@ -2871,9 +3108,12 @@ bool Item_param::set_from_user_var(THD *thd, const user_var_entry *entry)
         my_decimal_precision_to_length_no_truncation(ent_value->precision(),
                                                      decimals, unsigned_flag);
       item_type= Item::DECIMAL_ITEM;
+      param_type= MYSQL_TYPE_NEWDECIMAL;
       break;
     }
-    default:
+    case ROW_RESULT:
+    case TIME_RESULT:
+    case IMPOSSIBLE_RESULT:
       DBUG_ASSERT(0);
       set_null();
     }
@@ -2935,7 +3175,7 @@ int Item_param::save_in_field(Field *field, bool no_conversions)
   case DECIMAL_VALUE:
     return field->store_decimal(&decimal_value);
   case TIME_VALUE:
-    field->store_time(&value.time, value.time.time_type);
+    field->store_time_dec(&value.time, decimals);
     return 0;
   case STRING_VALUE:
   case LONG_DATA_VALUE:
@@ -2951,21 +3191,6 @@ int Item_param::save_in_field(Field *field, bool no_conversions)
 }
 
 
-bool Item_param::get_time(MYSQL_TIME *res)
-{
-  if (state == TIME_VALUE)
-  {
-    *res= value.time;
-    return 0;
-  }
-  /*
-    If parameter value isn't supplied assertion will fire in val_str()
-    which is called from Item::get_time().
-  */
-  return Item::get_time(res);
-}
-
-
 bool Item_param::get_date(MYSQL_TIME *res, uint fuzzydate)
 {
   if (state == TIME_VALUE)
@@ -3095,7 +3320,8 @@ String *Item_param::val_str(String* str)
   {
     if (str->reserve(MAX_DATE_STRING_REP_LENGTH))
       break;
-    str->length((uint) my_TIME_to_str(&value.time, (char*) str->ptr()));
+    str->length((uint) my_TIME_to_str(&value.time, (char*) str->ptr(),
+                decimals));
     str->set_charset(&my_charset_bin);
     return str;
   }
@@ -3147,7 +3373,7 @@ const String *Item_param::query_val_str(String* str) const
       buf= str->c_ptr_quick();
       ptr= buf;
       *ptr++= '\'';
-      ptr+= (uint) my_TIME_to_str(&value.time, ptr);
+      ptr+= (uint) my_TIME_to_str(&value.time, ptr, decimals);
       *ptr++= '\'';
       str->length((uint32) (ptr - buf));
       break;
@@ -3341,6 +3567,7 @@ Item_param::set_param_type_and_swap_value(Item_param *src)
 /****************************************************************************
   Item_copy
 ****************************************************************************/
+
 Item_copy *Item_copy::create (Item *item)
 {
   switch (item->result_type())
@@ -3354,7 +3581,9 @@ Item_copy *Item_copy::create (Item *item)
         new Item_copy_uint (item) : new Item_copy_int (item);
     case DECIMAL_RESULT:
       return new Item_copy_decimal (item);
-    default:
+    case TIME_RESULT:
+    case ROW_RESULT:
+  case IMPOSSIBLE_RESULT:
       DBUG_ASSERT (0);
   }
   /* should not happen */
@@ -3427,8 +3656,7 @@ void Item_copy_int::copy()
   null_value=item->null_value;
 }
 
-static int save_int_value_in_field (Field *field, longlong nr, 
-                                    bool null_value, bool unsigned_flag);
+static int save_int_value_in_field (Field *, longlong, bool, bool);
 
 int Item_copy_int::save_in_field(Field *field, bool no_conversions)
 {
@@ -3582,6 +3810,15 @@ bool Item::fix_fields(THD *thd, Item **ref)
   return FALSE;
 }
 
+
+void Item_ref_null_helper::save_val(Field *to)
+{
+  DBUG_ASSERT(fixed == 1);
+  (*ref)->save_val(to);
+  owner->was_null|= null_value= (*ref)->null_value;
+}
+
+
 double Item_ref_null_helper::val_real()
 {
   DBUG_ASSERT(fixed == 1);
@@ -3629,7 +3866,7 @@ String* Item_ref_null_helper::val_str(String* s)
 
 bool Item_ref_null_helper::get_date(MYSQL_TIME *ltime, uint fuzzydate)
 {  
-  return (owner->was_null|= null_value= (*ref)->get_date(ltime, fuzzydate));
+  return (owner->was_null|= null_value= (*ref)->get_date_result(ltime, fuzzydate));
 }
 
 
@@ -3645,7 +3882,7 @@ bool Item_ref_null_helper::get_date(MYSQL_TIME *ltime, uint fuzzydate)
                          substitution)
 */
 
-static void mark_as_dependent(THD *thd, SELECT_LEX *last, SELECT_LEX *current,
+static bool mark_as_dependent(THD *thd, SELECT_LEX *last, SELECT_LEX *current,
                               Item_ident *resolved_item,
                               Item_ident *mark_item)
 {
@@ -3654,9 +3891,11 @@ static void mark_as_dependent(THD *thd, SELECT_LEX *last, SELECT_LEX *current,
   const char *table_name= (resolved_item->table_name ?
                            resolved_item->table_name : "");
   /* store pointer on SELECT_LEX from which item is dependent */
-  if (mark_item)
+  if (mark_item && mark_item->can_be_depended)
     mark_item->depended_from= last;
-  current->mark_as_dependent(last, resolved_item);
+  if (current->mark_as_dependent(thd, last, /** resolved_item psergey-thu
+    **/mark_item))
+    return TRUE;
   if (thd->lex->describe & DESCRIBE_EXTENDED)
   {
     push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_NOTE,
@@ -3666,6 +3905,7 @@ static void mark_as_dependent(THD *thd, SELECT_LEX *last, SELECT_LEX *current,
                  resolved_item->field_name,
                  current->select_number, last->select_number);
   }
+  return FALSE;
 }
 
 
@@ -3949,6 +4189,34 @@ resolve_ref_in_select_and_group(THD *thd, Item_ident *ref, SELECT_LEX *select)
 }
 
 
+/*
+  @brief
+  Whether a table belongs to an outer select.
+
+  @param table table to check
+  @param select current select
+
+  @details
+  Try to find select the table belongs to by ascending the derived tables chain.
+*/
+
+static
+bool is_outer_table(TABLE_LIST *table, SELECT_LEX *select)
+{
+  DBUG_ASSERT(table->select_lex != select);
+  TABLE_LIST *tl;
+
+  for (tl= select->master_unit()->derived;
+       tl && tl->is_merged_derived();
+       select= tl->select_lex, tl= select->master_unit()->derived)
+  {
+    if (tl->select_lex == table->select_lex)
+      return FALSE;
+  }
+  return TRUE;
+}
+
+
 /**
   Resolve the name of an outer select column reference.
 
@@ -4215,8 +4483,9 @@ Item_field::fix_outer_field(THD *thd, Field **from_field, Item **reference)
       return -1;
 
     mark_as_dependent(thd, last_checked_context->select_lex,
-                      context->select_lex, this,
+                      context->select_lex, rf,
                       rf);
+
     return 0;
   }
   else
@@ -4387,7 +4656,9 @@ bool Item_field::fix_fields(THD *thd, Item **reference)
 
     if (!outer_fixed && cached_table && cached_table->select_lex &&
         context->select_lex &&
-        cached_table->select_lex != context->select_lex)
+        cached_table->select_lex != context->select_lex &&
+        !context->select_lex->is_merged_child_of(cached_table->select_lex) &&
+        is_outer_table(cached_table, context->select_lex))
     {
       int ret;
       if ((ret= fix_outer_field(thd, &from_field, reference)) < 0)
@@ -4530,6 +4801,7 @@ void Item_field::cleanup()
 {
   DBUG_ENTER("Item_field::cleanup");
   Item_ident::cleanup();
+  depended_from= NULL;
   /*
     Even if this object was created by direct link to field in setup_wild()
     it will be linked correctly next time by name of field and table alias.
@@ -4581,13 +4853,14 @@ Item_equal *Item_field::find_item_equal(COND_EQUAL *cond_equal)
 
 
 /**
-  Check whether a field can be substituted by an equal item.
+  Check whether a field item can be substituted for an equal item
 
-  The function checks whether a substitution of the field
-  occurrence for an equal item is valid.
+  @details
+  The function checks whether a substitution of a field item for
+  an equal item is valid.
 
-  @param arg   *arg != NULL <-> the field is in the context where
-               substitution for an equal item is valid
+  @param arg   *arg != NULL <-> the field is in the context
+               where substitution for an equal item is valid
 
   @note
     The following statement is not always true:
@@ -4612,7 +4885,10 @@ Item_equal *Item_field::find_item_equal(COND_EQUAL *cond_equal)
 
 bool Item_field::subst_argument_checker(uchar **arg)
 {
-  return (result_type() != STRING_RESULT) || (*arg);
+  return *arg &&
+         (*arg == (uchar *) Item::ANY_SUBST ||
+          result_type() != STRING_RESULT || 
+          (field->flags & BINARY_FLAG));
 }
 
 
@@ -4649,6 +4925,7 @@ static void convert_zerofill_number_to_string(Item **item, Field_num *field)
   Set a pointer to the multiple equality the field reference belongs to
   (if any).
 
+  @details
   The function looks for a multiple equality containing the field item
   among those referenced by arg.
   In the case such equality exists the function does the following.
@@ -4693,12 +4970,8 @@ Item *Item_field::equal_fields_propagator(uchar *arg)
     item= this;
   else if (field && (field->flags & ZEROFILL_FLAG) && IS_NUM(field->type()))
   {
-    /*
-      We don't need to zero-fill timestamp columns here because they will be 
-      first converted to a string (in date/time format) and compared as such if
-      compared with another string.
-    */
-    if (item && field->type() != FIELD_TYPE_TIMESTAMP && cmp_context != INT_RESULT)
+    if (item && field->type() != FIELD_TYPE_TIMESTAMP &&
+        cmp_context != INT_RESULT)
       convert_zerofill_number_to_string(&item, (Field_num *)field);
     else
       item= this;
@@ -4725,7 +4998,9 @@ bool Item_field::set_no_const_sub(uchar *arg)
   Replace an Item_field for an equal Item_field that evaluated earlier
   (if any).
 
-  The function returns a pointer to an item that is taken from
+  @details
+  If this->item_equal points to some item and coincides with arg then
+  the function returns a pointer to an item that is taken from
   the very beginning of the item_equal list which the Item_field
   object refers to (belongs to) unless item_equal contains  a constant
   item. In this case the function returns this constant item, 
@@ -4733,12 +5008,12 @@ bool Item_field::set_no_const_sub(uchar *arg)
   If the Item_field object does not refer any Item_equal object
   'this' is returned .
 
-  @param arg   a dummy parameter, is not used here
+  @param arg   NULL or points to so some item of the Item_equal type  
 
 
   @note
     This function is supposed to be called as a callback parameter in calls
-    of the thransformer method.
+    of the transformer method.
 
   @return
     - pointer to a replacement Item_field if there is a better equal item or
@@ -4748,7 +5023,8 @@ bool Item_field::set_no_const_sub(uchar *arg)
 
 Item *Item_field::replace_equal_field(uchar *arg)
 {
-  if (item_equal)
+  REPLACE_EQUAL_FIELD_ARG* param= (REPLACE_EQUAL_FIELD_ARG*)arg;
+  if (item_equal && item_equal == param->item_equal)
   {
     Item *const_item= item_equal->get_const();
     if (const_item)
@@ -4758,8 +5034,11 @@ Item *Item_field::replace_equal_field(uchar *arg)
         return this;
       return const_item;
     }
-    Item_field *subst= item_equal->get_first();
-    if (subst && field->table != subst->field->table && !field->eq(subst->field))
+    Item_field *subst= 
+      (Item_field *)(item_equal->get_first(param->context_tab, this));
+    if (subst)
+      subst= (Item_field *) (subst->real_item());
+    if (subst && !field->eq(subst->field))
       return subst;
   }
   return this;
@@ -4817,25 +5096,12 @@ enum_field_types Item::field_type() const
   case DECIMAL_RESULT: return MYSQL_TYPE_NEWDECIMAL;
   case REAL_RESULT:    return MYSQL_TYPE_DOUBLE;
   case ROW_RESULT:
-  default:
+  case TIME_RESULT:
+  case IMPOSSIBLE_RESULT:
     DBUG_ASSERT(0);
     return MYSQL_TYPE_VARCHAR;
   }
-}
-
-
-bool Item::is_datetime()
-{
-  switch (field_type())
-  {
-    case MYSQL_TYPE_DATE:
-    case MYSQL_TYPE_DATETIME:
-    case MYSQL_TYPE_TIMESTAMP:
-      return TRUE;
-    default:
-      break;
-  }
-  return FALSE;
+  return MYSQL_TYPE_VARCHAR;
 }
 
 
@@ -4937,6 +5203,10 @@ Field *Item::make_string_field(TABLE *table)
 {
   Field *field;
   DBUG_ASSERT(collation.collation);
+  /* 
+    Note: the following check is repeated in 
+    subquery_types_allow_materialization():
+  */
   if (max_length/collation.collation->mbmaxlen > CONVERT_IF_BIGGER_TO_BLOB)
     field= new Field_blob(max_length, maybe_null, name,
                           collation.collation);
@@ -5015,16 +5285,19 @@ Field *Item::tmp_table_field_from_field_type(TABLE *table, bool fixed_length)
     break;
   case MYSQL_TYPE_NEWDATE:
   case MYSQL_TYPE_DATE:
-    field= new Field_newdate(maybe_null, name, &my_charset_bin);
+    field= new Field_newdate(0, null_ptr, 0, Field::NONE, name, &my_charset_bin);
     break;
   case MYSQL_TYPE_TIME:
-    field= new Field_time(maybe_null, name, &my_charset_bin);
+    field= new_Field_time(0, null_ptr, 0, Field::NONE, name,
+                              decimals, &my_charset_bin);
     break;
   case MYSQL_TYPE_TIMESTAMP:
-    field= new Field_timestamp(maybe_null, name, &my_charset_bin);
+    field= new_Field_timestamp(0, null_ptr, 0,
+                               Field::NONE, name, 0, decimals, &my_charset_bin);
     break;
   case MYSQL_TYPE_DATETIME:
-    field= new Field_datetime(maybe_null, name, &my_charset_bin);
+    field= new_Field_datetime(0, null_ptr, 0, Field::NONE, name,
+                              decimals, &my_charset_bin);
     break;
   case MYSQL_TYPE_YEAR:
     field= new Field_year((uchar*) 0, max_length, null_ptr, 0, Field::NONE,
@@ -5088,47 +5361,69 @@ void Item_field::make_field(Send_field *tmp_field)
 
 
 /**
-  Set a field's value from a item.
-*/
+  Save a field value in another field
 
-void Item_field::save_org_in_field(Field *to)
-{
-  if (field->is_null())
-  {
-    null_value=1;
-    set_field_to_null_with_conversions(to, 1);
-  }
-  else
-  {
-    to->set_notnull();
-    field_conv(to,field);
-    null_value=0;
-  }
-}
+  @param from             Field to take the value from
+  @param [out] null_value Pointer to the null_value flag to set
+  @param to               Field to save the value in
+  @param no_conversions   How to deal with NULL value
 
-int Item_field::save_in_field(Field *to, bool no_conversions)
+  @details
+  The function takes the value of the field 'from' and, if this value
+  is not null, it saves in the field 'to' setting off the flag referenced
+  by 'null_value'. Otherwise this flag is set on and field 'to' is
+  also set to null possibly with conversion.
+
+  @note
+  This function is used by the functions Item_field::save_in_field,
+  Item_field::save_org_in_field and Item_ref::save_in_field
+
+  @retval FALSE OK
+  @retval TRUE  Error
+
+*/
+
+static int save_field_in_field(Field *from, bool *null_value,
+                               Field *to, bool no_conversions)
 {
   int res;
-  if (result_field->is_null())
+  DBUG_ENTER("save_field_in_field");
+  if (from->is_null())
   {
-    null_value=1;
-    return set_field_to_null_with_conversions(to, no_conversions);
+    (*null_value)= 1;
+    DBUG_RETURN(set_field_to_null_with_conversions(to, no_conversions));
   }
   to->set_notnull();
 
   /*
     If we're setting the same field as the one we're reading from there's 
     nothing to do. This can happen in 'SET x = x' type of scenarios.
-  */  
-  if (to == result_field)
+  */
+  if (to == from)
   {
-    null_value=0;
-    return 0;
+    (*null_value)= 0;
+    DBUG_RETURN(0);
   }
 
-  res= field_conv(to,result_field);
-  null_value=0;
-  return res;
+  res= field_conv(to, from);
+  (*null_value)= 0;
+  DBUG_RETURN(res);
+}
+
+
+/**
+  Set a field's value from a item.
+*/
+
+void Item_field::save_org_in_field(Field *to)
+{
+  save_field_in_field(field, &null_value, to, TRUE);
+}
+
+
+int Item_field::save_in_field(Field *to, bool no_conversions)
+{
+  return save_field_in_field(result_field, &null_value, to, no_conversions);
 }
 
 
@@ -5239,12 +5534,6 @@ int Item_string::save_in_field(Field *field, bool no_conversions)
 }
 
 
-int Item_uint::save_in_field(Field *field, bool no_conversions)
-{
-  /* Item_int::save_in_field handles both signed and unsigned. */
-  return Item_int::save_in_field(field, no_conversions);
-}
-
 static int save_int_value_in_field (Field *field, longlong nr, 
                                     bool null_value, bool unsigned_flag)
 {
@@ -5261,6 +5550,22 @@ int Item_int::save_in_field(Field *field, bool no_conversions)
 }
 
 
+void Item_datetime::set(longlong packed)
+{
+  unpack_time(packed, &ltime);
+}
+
+int Item_datetime::save_in_field(Field *field, bool no_conversions)
+{
+  field->set_notnull();
+  return field->store_time_dec(&ltime, decimals);
+}
+
+longlong Item_datetime::val_int()
+{
+  return TIME_to_ulonglong(&ltime);
+}
+
 int Item_decimal::save_in_field(Field *field, bool no_conversions)
 {
   field->set_notnull();
@@ -5278,7 +5583,9 @@ bool Item_int::eq(const Item *arg, bool binary_cmp) const
       a basic constant.
     */
     Item *item= (Item*) arg;
-    return item->val_int() == value && item->unsigned_flag == unsigned_flag;
+    return (item->val_int() == value &&
+            ((longlong) value >= 0 ||
+             (item->unsigned_flag == unsigned_flag)));
   }
   return FALSE;
 }
@@ -5622,7 +5929,10 @@ bool Item::send(Protocol *protocol, String *buffer)
   {
     String *res;
     if ((res=val_str(buffer)))
+    {
+      DBUG_ASSERT(!null_value);
       result= protocol->store(res->ptr(),res->length(),res->charset());
+    }
     else
     {
       DBUG_ASSERT(null_value);
@@ -5683,13 +5993,13 @@ bool Item::send(Protocol *protocol, String *buffer)
   case MYSQL_TYPE_TIMESTAMP:
   {
     MYSQL_TIME tm;
-    get_date(&tm, TIME_FUZZY_DATE);
+    get_date(&tm, TIME_FUZZY_DATE | sql_mode_for_dates());
     if (!null_value)
     {
       if (f_type == MYSQL_TYPE_DATE)
 	return protocol->store_date(&tm);
       else
-	result= protocol->store(&tm);
+	result= protocol->store(&tm, decimals);
     }
     break;
   }
@@ -5698,7 +6008,7 @@ bool Item::send(Protocol *protocol, String *buffer)
     MYSQL_TIME tm;
     get_time(&tm);
     if (!null_value)
-      result= protocol->store_time(&tm);
+      result= protocol->store_time(&tm, decimals);
     break;
   }
   }
@@ -5779,17 +6089,7 @@ void Item_field::print(String *str, enum_query_type query_type)
 {
   if (field && field->table->const_table)
   {
-    char buff[MAX_FIELD_WIDTH];
-    String tmp(buff,sizeof(buff),str->charset());
-    field->val_str(&tmp);
-    if (field->is_null())
-      str->append("NULL");
-    else
-    {
-      str->append('\'');
-      str->append(tmp);
-      str->append('\'');
-    }
+    print_value(str);
     return;
   }
   Item_ident::print(str, query_type);
@@ -5801,7 +6101,7 @@ Item_ref::Item_ref(Name_resolution_context *context_arg,
                    const char *field_name_arg,
                    bool alias_name_used_arg)
   :Item_ident(context_arg, NullS, table_name_arg, field_name_arg),
-   result_field(0), ref(item)
+   result_field(0), ref(item), reference_trough_name(0)
 {
   alias_name_used= alias_name_used_arg;
   /*
@@ -5811,11 +6111,41 @@ Item_ref::Item_ref(Name_resolution_context *context_arg,
     set_properties();
 }
 
+/*
+  A Field_enumerator-compatible class that invokes mark_as_dependent() for
+  each field that is a reference to some ancestor of current_select.
+*/
+class Dependency_marker: public Field_enumerator
+{
+public:
+  THD *thd;
+  st_select_lex *current_select;
+  virtual void visit_field(Item_field *item)
+  {
+    // Find which select the field is in. This is achieved by walking up 
+    // the select tree and looking for the table of interest.
+    st_select_lex *sel;
+    for (sel= current_select; sel; sel= sel->outer_select())
+    {
+      List_iterator<TABLE_LIST> li(sel->leaf_tables);
+      TABLE_LIST *tbl;
+      while ((tbl= li++))
+      {
+        if (tbl->table == item->field->table)
+        {
+          if (sel != current_select)
+            mark_as_dependent(thd, sel, current_select, item, item);
+          return;
+        }
+      }
+    }
+  }
+};
 
 Item_ref::Item_ref(TABLE_LIST *view_arg, Item **item,
                    const char *field_name_arg, bool alias_name_used_arg)
   :Item_ident(view_arg, field_name_arg),
-   result_field(NULL), ref(item)
+   result_field(NULL), ref(item), reference_trough_name(0)
 {
   alias_name_used= alias_name_used_arg;
   /*
@@ -5898,6 +6228,7 @@ bool Item_ref::fix_fields(THD *thd, Item **reference)
 
   if (!ref || ref == not_found_item)
   {
+    DBUG_ASSERT(reference_trough_name != 0);
     if (!(ref= resolve_ref_in_select_and_group(thd, this,
                                                context->select_lex)))
       goto error;             /* Some error occurred (e.g. ambiguous names). */
@@ -6048,7 +6379,7 @@ bool Item_ref::fix_fields(THD *thd, Item **reference)
           goto error;
         thd->change_item_tree(reference, fld);
         mark_as_dependent(thd, last_checked_context->select_lex,
-                          thd->lex->current_select, this, fld);
+                          thd->lex->current_select, fld, fld);
         /*
           A reference is resolved to a nest level that's outer or the same as
           the nest level of the enclosing set function : adjust the value of
@@ -6084,6 +6415,19 @@ bool Item_ref::fix_fields(THD *thd, Item **reference)
                       last_checked_context->select_lex->nest_level);
     }
   }
+  else if (ref_type() != VIEW_REF)
+  {
+    /*
+      It could be that we're referring to something that's in ancestor selects.
+      We must make an appropriate mark_as_dependent() call for each such
+      outside reference.
+    */
+    Dependency_marker dep_marker;
+    dep_marker.current_select= current_sel;
+    dep_marker.thd= thd;
+    (*ref)->walk(&Item::enumerate_field_refs_processor, FALSE,
+                 (uchar*)&dep_marker);
+  }
 
   DBUG_ASSERT(*ref);
   /*
@@ -6129,6 +6473,7 @@ void Item_ref::set_properties()
     split_sum_func() doesn't try to change the reference.
   */
   with_sum_func= (*ref)->with_sum_func;
+  with_field= (*ref)->with_field;
   unsigned_flag= (*ref)->unsigned_flag;
   fixed= 1;
   if (alias_name_used)
@@ -6145,10 +6490,100 @@ void Item_ref::cleanup()
   DBUG_ENTER("Item_ref::cleanup");
   Item_ident::cleanup();
   result_field= 0;
+  if (reference_trough_name)
+  {
+    /* We have to reset the reference as it may been freed */
+    ref= 0;
+  }
   DBUG_VOID_RETURN;
 }
 
 
+/**
+  Transform an Item_ref object with a transformer callback function.
+
+  The function first applies the transform method to the item
+  referenced by this Item_reg object. If this returns a new item the
+  old item is substituted for a new one. After this the transformer
+  is applied to the Item_ref object.
+
+  @param transformer   the transformer callback function to be applied to
+                       the nodes of the tree of the object
+  @param argument      parameter to be passed to the transformer
+
+  @return Item returned as the result of transformation of the Item_ref object
+    @retval !NULL The transformation was successful
+    @retval NULL  Out of memory error
+*/
+
+Item* Item_ref::transform(Item_transformer transformer, uchar *arg)
+{
+  DBUG_ASSERT(!current_thd->is_stmt_prepare());
+  DBUG_ASSERT((*ref) != NULL);
+
+  /* Transform the object we are referencing. */
+  Item *new_item= (*ref)->transform(transformer, arg);
+  if (!new_item)
+    return NULL;
+
+  /*
+    THD::change_item_tree() should be called only if the tree was
+    really transformed, i.e. when a new item has been created.
+    Otherwise we'll be allocating a lot of unnecessary memory for
+    change records at each execution.
+  */
+  if (*ref != new_item)
+    current_thd->change_item_tree(ref, new_item);
+
+  /* Transform the item ref object. */
+  return (this->*transformer)(arg);
+}
+
+
+/**
+  Compile an Item_ref object with a processor and a transformer
+  callback functions.
+
+  First the function applies the analyzer to the Item_ref object. Then
+  if the analizer succeeeds we first applies the compile method to the
+  object the Item_ref object is referencing. If this returns a new
+  item the old item is substituted for a new one.  After this the
+  transformer is applied to the Item_ref object itself.
+  The compile function is not called if the analyzer returns NULL
+  in the parameter arg_p. 
+
+  @param analyzer      the analyzer callback function to be applied to the
+                       nodes of the tree of the object
+  @param[in,out] arg_p parameter to be passed to the processor
+  @param transformer   the transformer callback function to be applied to the
+                       nodes of the tree of the object
+  @param arg_t         parameter to be passed to the transformer
+
+  @return Item returned as the result of transformation of the Item_ref object
+*/
+
+Item* Item_ref::compile(Item_analyzer analyzer, uchar **arg_p,
+                        Item_transformer transformer, uchar *arg_t)
+{
+  /* Analyze this Item object. */
+  if (!(this->*analyzer)(arg_p))
+    return NULL;
+
+  /* Compile the Item we are referencing. */
+  DBUG_ASSERT((*ref) != NULL);
+  if (*arg_p)
+  {
+    uchar *arg_v= *arg_p;
+    Item *new_item= (*ref)->compile(analyzer, &arg_v, transformer, arg_t);
+    if (new_item && *ref != new_item)
+      current_thd->change_item_tree(ref, new_item);
+  }
+
+  /* Transform this Item object. */
+  return (this->*transformer)(arg_t);
+}
+
+
 void Item_ref::print(String *str, enum_query_type query_type)
 {
   if (ref)
@@ -6255,7 +6690,8 @@ bool Item_ref::val_bool_result()
     case STRING_RESULT:
       return result_field->val_real() != 0.0;
     case ROW_RESULT:
-    default:
+    case TIME_RESULT:
+    case IMPOSSIBLE_RESULT:
       DBUG_ASSERT(0);
     }
   }
@@ -6263,6 +6699,25 @@ bool Item_ref::val_bool_result()
 }
 
 
+void Item_ref::save_result(Field *to)
+{
+  if (result_field)
+  {
+    save_field_in_field(result_field, &null_value, to, TRUE);
+    return;
+  }
+  (*ref)->save_result(to);
+  null_value= (*ref)->null_value;
+}
+
+
+void Item_ref::save_val(Field *to)
+{
+  (*ref)->save_result(to);
+  null_value= (*ref)->null_value;
+}
+
+
 double Item_ref::val_real()
 {
   DBUG_ASSERT(fixed);
@@ -6324,7 +6779,19 @@ my_decimal *Item_ref::val_decimal(my_decimal *decimal_value)
 int Item_ref::save_in_field(Field *to, bool no_conversions)
 {
   int res;
-  DBUG_ASSERT(!result_field);
+  if (result_field)
+  {
+    if (result_field->is_null())
+    {
+      null_value= 1;
+      res= set_field_to_null_with_conversions(to, no_conversions);
+      return res;
+    }
+    to->set_notnull();
+    res= field_conv(to, result_field);
+    null_value= 0;
+    return res;
+  }
   res= (*ref)->save_in_field(to, no_conversions);
   null_value= (*ref)->null_value;
   return res;
@@ -6380,6 +6847,13 @@ void Item_ref_null_helper::print(String *str, enum_query_type query_type)
 }
 
 
+void Item_direct_ref::save_val(Field *to)
+{
+  (*ref)->save_val(to);
+  null_value=(*ref)->null_value;
+}
+
+
 double Item_direct_ref::val_real()
 {
   double tmp=(*ref)->val_real();
@@ -6432,6 +6906,390 @@ bool Item_direct_ref::get_date(MYSQL_TIME *ltime,uint fuzzydate)
 }
 
 
+Item_cache_wrapper::~Item_cache_wrapper()
+{
+  DBUG_ASSERT(expr_cache == 0);
+}
+
+
+Item_cache_wrapper::Item_cache_wrapper(Item *item_arg)
+:orig_item(item_arg), expr_cache(NULL), expr_value(NULL)
+{
+  DBUG_ASSERT(orig_item->fixed);
+  max_length= orig_item->max_length;
+  maybe_null= orig_item->maybe_null;
+  decimals=   orig_item->decimals;
+  collation.set(orig_item->collation);
+  with_sum_func= orig_item->with_sum_func;
+  with_field= orig_item->with_field;
+  unsigned_flag= orig_item->unsigned_flag;
+  name= item_arg->name;
+  name_length= item_arg->name_length;
+  with_subselect=  orig_item->with_subselect;
+
+  if ((expr_value= Item_cache::get_cache(orig_item)))
+    expr_value->setup(orig_item);
+
+  fixed= 1;
+}
+
+
+/**
+  Initialize the cache if it is needed
+*/
+
+void Item_cache_wrapper::init_on_demand()
+{
+    if (!expr_cache->is_inited())
+    {
+      orig_item->get_cache_parameters(parameters);
+      expr_cache->init();
+    }
+}
+
+
+void Item_cache_wrapper::print(String *str, enum_query_type query_type)
+{
+  str->append(func_name());
+  if (expr_cache)
+  {
+    init_on_demand();
+    expr_cache->print(str, query_type);
+  }
+  else
+    str->append(STRING_WITH_LEN("<<DISABLED>>"));
+  str->append('(');
+  orig_item->print(str, query_type);
+  str->append(')');
+}
+
+
+/**
+  Prepare the expression cache wrapper (do nothing)
+
+  @retval FALSE OK
+*/
+
+bool Item_cache_wrapper::fix_fields(THD *thd  __attribute__((unused)),
+                                    Item **it __attribute__((unused)))
+{
+  DBUG_ASSERT(orig_item->fixed);
+  DBUG_ASSERT(fixed);
+  return FALSE;
+}
+
+
+/**
+  Clean the expression cache wrapper up before reusing it.
+*/
+
+void Item_cache_wrapper::cleanup()
+{
+  DBUG_ENTER("Item_cache_wrapper::cleanup");
+  Item_result_field::cleanup();
+  delete expr_cache;
+  expr_cache= 0;
+  /* expr_value is Item so it will be destroyed from list of Items */
+  expr_value= 0;
+  parameters.empty();
+  DBUG_VOID_RETURN;
+}
+
+
+/**
+  Create an expression cache that uses a temporary table
+
+  @param thd           Thread handle
+  @param depends_on    Parameters of the expression to create cache for
+
+  @details
+  The function takes 'depends_on' as the list of all parameters for
+  the expression wrapped into this object and creates an expression
+  cache in a temporary table containing the field for the parameters
+  and the result of the expression.
+
+  @retval FALSE OK
+  @retval TRUE  Error
+*/
+
+bool Item_cache_wrapper::set_cache(THD *thd)
+{
+  DBUG_ENTER("Item_cache_wrapper::set_cache");
+  DBUG_ASSERT(expr_cache == 0);
+  expr_cache= new Expression_cache_tmptable(thd, parameters, expr_value);
+  DBUG_RETURN(expr_cache == NULL);
+}
+
+
+/**
+  Check if the current values of the parameters are in the expression cache
+
+  @details
+  The function checks whether the current set of the parameters of the
+  referenced item can be found in the expression cache. If so the function
+  returns the item by which the result of the expression can be easily
+  extracted from the cache with the corresponding val_* method.
+
+  @retval NULL    - parameters are not in the cache
+  @retval <item*> - item providing the result of the expression found in cache
+*/
+
+Item *Item_cache_wrapper::check_cache()
+{
+  DBUG_ENTER("Item_cache_wrapper::check_cache");
+  if (expr_cache)
+  {
+    Expression_cache_tmptable::result res;
+    Item *cached_value;
+    init_on_demand();
+    res= expr_cache->check_value(&cached_value);
+    if (res == Expression_cache_tmptable::HIT)
+      DBUG_RETURN(cached_value);
+  }
+  DBUG_RETURN(NULL);
+}
+
+
+/**
+  Get the value of the cached expression and put it in the cache
+*/
+
+inline void Item_cache_wrapper::cache()
+{
+  expr_value->store(orig_item);
+  expr_value->cache_value();
+  expr_cache->put_value(expr_value); // put in expr_cache
+}
+
+
+/**
+  Get the value of the possibly cached item into the field.
+*/
+
+void Item_cache_wrapper::save_val(Field *to)
+{
+  Item *cached_value;
+  DBUG_ENTER("Item_cache_wrapper::val_int");
+  if (!expr_cache)
+  {
+    orig_item->save_val(to);
+    null_value= orig_item->null_value;
+    DBUG_VOID_RETURN;
+  }
+
+  if ((cached_value= check_cache()))
+  {
+    cached_value->save_val(to);
+    null_value= cached_value->null_value;
+    DBUG_VOID_RETURN;
+  }
+  cache();
+  null_value= expr_value->null_value;
+  expr_value->save_val(to);
+  DBUG_VOID_RETURN;
+}
+
+
+/**
+  Get the integer value of the possibly cached item.
+*/
+
+longlong Item_cache_wrapper::val_int()
+{
+  Item *cached_value;
+  DBUG_ENTER("Item_cache_wrapper::val_int");
+  if (!expr_cache)
+  {
+    longlong tmp= orig_item->val_int();
+    null_value= orig_item->null_value;
+    DBUG_RETURN(tmp);
+  }
+
+  if ((cached_value= check_cache()))
+  {
+    longlong tmp= cached_value->val_int();
+    null_value= cached_value->null_value;
+    DBUG_RETURN(tmp);
+  }
+  cache();
+  null_value= expr_value->null_value;
+  DBUG_RETURN(expr_value->val_int());
+}
+
+
+/**
+  Get the real value of the possibly cached item
+*/
+
+double Item_cache_wrapper::val_real()
+{
+  Item *cached_value;
+  DBUG_ENTER("Item_cache_wrapper::val_real");
+  if (!expr_cache)
+  {
+    double tmp= orig_item->val_real();
+    null_value= orig_item->null_value;
+    DBUG_RETURN(tmp);
+  }
+
+  if ((cached_value= check_cache()))
+  {
+    double tmp= cached_value->val_real();
+    null_value= cached_value->null_value;
+    DBUG_RETURN(tmp);
+  }
+  cache();
+  null_value= expr_value->null_value;
+  DBUG_RETURN(expr_value->val_real());
+}
+
+
+/**
+  Get the string value of the possibly cached item
+*/
+
+String *Item_cache_wrapper::val_str(String* str)
+{
+  Item *cached_value;
+  DBUG_ENTER("Item_cache_wrapper::val_str");
+  if (!expr_cache)
+  {
+    String *tmp= orig_item->val_str(str);
+    null_value= orig_item->null_value;
+    DBUG_RETURN(tmp);
+  }
+
+  if ((cached_value= check_cache()))
+  {
+    String *tmp= cached_value->val_str(str);
+    null_value= cached_value->null_value;
+    DBUG_RETURN(tmp);
+  }
+  cache();
+  if ((null_value= expr_value->null_value))
+    DBUG_RETURN(NULL);
+  DBUG_RETURN(expr_value->val_str(str));
+}
+
+
+/**
+  Get the decimal value of the possibly cached item
+*/
+
+my_decimal *Item_cache_wrapper::val_decimal(my_decimal* decimal_value)
+{
+  Item *cached_value;
+  DBUG_ENTER("Item_cache_wrapper::val_decimal");
+  if (!expr_cache)
+  {
+    my_decimal *tmp= orig_item->val_decimal(decimal_value);
+    null_value= orig_item->null_value;
+    DBUG_RETURN(tmp);
+  }
+
+  if ((cached_value= check_cache()))
+  {
+    my_decimal *tmp= cached_value->val_decimal(decimal_value);
+    null_value= cached_value->null_value;
+    DBUG_RETURN(tmp);
+  }
+  cache();
+  if ((null_value= expr_value->null_value))
+    DBUG_RETURN(NULL);
+  DBUG_RETURN(expr_value->val_decimal(decimal_value));
+}
+
+
+/**
+  Get the boolean value of the possibly cached item
+*/
+
+bool Item_cache_wrapper::val_bool()
+{
+  Item *cached_value;
+  DBUG_ENTER("Item_cache_wrapper::val_bool");
+  if (!expr_cache)
+  {
+    bool tmp= orig_item->val_bool();
+    null_value= orig_item->null_value;
+    DBUG_RETURN(tmp);
+  }
+
+  if ((cached_value= check_cache()))
+  {
+    bool tmp= cached_value->val_bool();
+    null_value= cached_value->null_value;
+    DBUG_RETURN(tmp);
+  }
+  cache();
+  null_value= expr_value->null_value;
+  DBUG_RETURN(expr_value->val_bool());
+}
+
+
+/**
+  Check for NULL the value of the possibly cached item
+*/
+
+bool Item_cache_wrapper::is_null()
+{
+  Item *cached_value;
+  DBUG_ENTER("Item_cache_wrapper::is_null");
+  if (!expr_cache)
+  {
+    bool tmp= orig_item->is_null();
+    null_value= orig_item->null_value;
+    DBUG_RETURN(tmp);
+  }
+
+  if ((cached_value= check_cache()))
+  {
+    bool tmp= cached_value->is_null();
+    null_value= cached_value->null_value;
+    DBUG_RETURN(tmp);
+  }
+  cache();
+  DBUG_RETURN((null_value= expr_value->null_value));
+}
+
+
+/**
+  Get the date value of the possibly cached item
+*/
+
+bool Item_cache_wrapper::get_date(MYSQL_TIME *ltime, uint fuzzydate)
+{
+  Item *cached_value;
+  DBUG_ENTER("Item_cache_wrapper::get_date");
+  if (!expr_cache)
+    DBUG_RETURN((null_value= orig_item->get_date(ltime, fuzzydate)));
+
+  if ((cached_value= check_cache()))
+    DBUG_RETURN((null_value= cached_value->get_date(ltime, fuzzydate)));
+
+  cache();
+  DBUG_RETURN((null_value= expr_value->get_date(ltime, fuzzydate)));
+}
+
+
+int Item_cache_wrapper::save_in_field(Field *to, bool no_conversions)
+{
+  int res;
+  DBUG_ASSERT(!result_field);
+  res= orig_item->save_in_field(to, no_conversions);
+  null_value= orig_item->null_value;
+  return res;
+}
+
+
+Item* Item_cache_wrapper::get_tmp_table_item(THD *thd_arg)
+{
+  if (!orig_item->with_sum_func && !orig_item->const_item())
+    return new Item_field(result_field);
+  return copy_or_same(thd_arg);
+}
+
+
 /**
   Prepare referenced field then call usual Item_direct_ref::fix_fields .
 
@@ -6469,7 +7327,11 @@ bool Item_direct_view_ref::fix_fields(THD *thd, Item **reference)
            ((*ref)->fix_fields(thd, ref)))
     return TRUE;
 
-  return Item_direct_ref::fix_fields(thd, reference);
+  if (Item_direct_ref::fix_fields(thd, reference))
+    return TRUE;
+  if (view->table && view->table->maybe_null)
+    maybe_null= TRUE;
+  return FALSE;
 }
 
 /*
@@ -6500,6 +7362,23 @@ bool Item_outer_ref::fix_fields(THD *thd, Item **reference)
 }
 
 
+void Item_outer_ref::fix_after_pullout(st_select_lex *new_parent, Item **ref)
+{
+  if (get_depended_from() == new_parent)
+  {
+    *ref= outer_ref;
+    (*ref)->fix_after_pullout(new_parent, ref);
+  }
+}
+
+void Item_ref::fix_after_pullout(st_select_lex *new_parent, Item **refptr)
+{
+  (*ref)->fix_after_pullout(new_parent, ref);
+  if (get_depended_from() == new_parent)
+    depended_from= NULL;
+}
+
+
 /**
   Mark references from inner selects used in group by clause
 
@@ -6567,6 +7446,138 @@ bool Item_direct_view_ref::eq(const Item *item, bool binary_cmp) const
   return FALSE;
 }
 
+
+Item_equal *Item_direct_view_ref::find_item_equal(COND_EQUAL *cond_equal)
+{
+  Item* field_item= real_item();
+  if (field_item->type() != FIELD_ITEM)
+    return NULL;
+  return ((Item_field *) field_item)->find_item_equal(cond_equal);  
+}
+
+
+/**
+  Check whether a reference to field item can be substituted for an equal item
+
+  @details
+  The function checks whether a substitution of a reference to field item for
+  an equal item is valid.
+
+  @param arg   *arg != NULL <-> the reference is in the context
+               where substitution for an equal item is valid
+
+  @note
+    See also the note for Item_field::subst_argument_checker
+
+  @retval
+    TRUE   substitution is valid
+  @retval
+    FALSE  otherwise
+*/
+bool Item_direct_view_ref::subst_argument_checker(uchar **arg)
+{
+  bool res= FALSE;
+  if (*arg)
+  { 
+    Item *item= real_item();
+    if (item->type() == FIELD_ITEM &&
+        (*arg == (uchar *) Item::ANY_SUBST || 
+         result_type() != STRING_RESULT ||
+         (((Item_field *) item)->field->flags & BINARY_FLAG)))
+      res= TRUE;
+  }
+  /* Block any substitution into the wrapped object */
+  if (*arg)
+    *arg= NULL; 
+  return res; 
+}
+
+
+/**
+  Set a pointer to the multiple equality the view field reference belongs to
+  (if any).
+
+  @details
+  The function looks for a multiple equality containing this item of the type
+  Item_direct_view_ref among those referenced by arg.
+  In the case such equality exists the function does the following.
+  If the found multiple equality contains a constant, then the item
+  is substituted for this constant, otherwise the function sets a pointer
+  to the multiple equality in the item.
+
+  @param arg    reference to list of multiple equalities where
+                the item (this object) is to be looked for
+
+  @note
+    This function is supposed to be called as a callback parameter in calls
+    of the compile method.
+
+  @note 
+    The function calls Item_field::equal_fields_propagator for the field item
+    this->real_item() to do the job. Then it takes the pointer to equal_item
+    from this field item and assigns it to this->item_equal.
+
+  @return
+    - pointer to the replacing constant item, if the field item was substituted
+    - pointer to the field item, otherwise.
+*/
+
+Item *Item_direct_view_ref::equal_fields_propagator(uchar *arg)
+{
+  Item *field_item= real_item();
+  if (field_item->type() != FIELD_ITEM)
+    return this;
+  Item *item= field_item->equal_fields_propagator(arg);
+  set_item_equal(field_item->get_item_equal());
+  field_item->set_item_equal(NULL);
+  if (item != field_item)
+    return item;
+  return this;
+}
+
+
+/**
+  Replace an Item_direct_view_ref for an equal Item_field evaluated earlier
+  (if any).
+
+  @details
+  If this->item_equal points to some item and coincides with arg then
+  the function returns a pointer to a field item that is referred to by the 
+  first element of the item_equal list which the Item_direct_view_ref
+  object belongs to unless item_equal contains  a constant item. In this
+  case the function returns this constant item (if the substitution does
+   not require conversion).   
+  If the Item_direct_view_item object does not refer any Item_equal object
+  'this' is returned .
+
+  @param arg   NULL or points to so some item of the Item_equal type  
+
+  @note
+    This function is supposed to be called as a callback parameter in calls
+    of the transformer method.
+
+  @note 
+    The function calls Item_field::replace_equal_field for the field item
+    this->real_item() to do the job.
+
+  @return
+    - pointer to a replacement Item_field if there is a better equal item or
+      a pointer to a constant equal item;
+    - this - otherwise.
+*/
+
+Item *Item_direct_view_ref::replace_equal_field(uchar *arg)
+{
+  Item *field_item= real_item();
+  if (field_item->type() != FIELD_ITEM)
+    return this;
+  field_item->set_item_equal(item_equal);
+  Item *item= field_item->replace_equal_field(arg);
+  field_item->set_item_equal(0);
+  return item != field_item ? item : this;
+}
+
+
 bool Item_default_value::eq(const Item *item, bool binary_cmp) const
 {
   return item->type() == DEFAULT_VALUE_ITEM && 
@@ -6921,6 +7932,8 @@ Item_result item_cmp_type(Item_result a,Item_result b)
     return INT_RESULT;
   else if (a == ROW_RESULT || b == ROW_RESULT)
     return ROW_RESULT;
+  else if (a == TIME_RESULT || b == TIME_RESULT)
+    return TIME_RESULT;
   if ((a == INT_RESULT || a == DECIMAL_RESULT) &&
       (b == INT_RESULT || b == DECIMAL_RESULT))
     return DECIMAL_RESULT;
@@ -6934,11 +7947,20 @@ void resolve_const_item(THD *thd, Item **ref, Item *comp_item)
   Item *new_item= NULL;
   if (item->basic_const_item())
     return;                                     // Can't be better
-  Item_result res_type=item_cmp_type(comp_item->result_type(),
-				     item->result_type());
+  Item_result res_type=item_cmp_type(comp_item->cmp_type(), item->cmp_type());
   char *name=item->name;			// Alloced by sql_alloc
 
   switch (res_type) {
+  case TIME_RESULT:
+  {
+    bool is_null;
+    Item **ref_copy= ref;
+    /* the following call creates a constant and puts it in new_item */
+    get_datetime_value(thd, &ref_copy, &new_item, comp_item, &is_null);
+    if (is_null)
+      new_item= new Item_null(name);
+    break;
+  }
   case STRING_RESULT:
   {
     char buff[MAX_FIELD_WIDTH];
@@ -7013,8 +8035,9 @@ void resolve_const_item(THD *thd, Item **ref, Item *comp_item)
                (Item*) new Item_decimal(name, result, length, decimals));
     break;
   }
-  default:
+  case IMPOSSIBLE_RESULT:
     DBUG_ASSERT(0);
+    break;
   }
   if (new_item)
     thd->change_item_tree(ref, new_item);
@@ -7034,6 +8057,9 @@ void resolve_const_item(THD *thd, Item **ref, Item *comp_item)
   @note We only use this on the range optimizer/partition pruning,
         because in some cases we can't store the value in the field
         without some precision/character loss.
+
+  @todo rewrite it to use Arg_comparator (currently it's a simplified and
+        incomplete version of it)
 */
 
 int stored_field_cmp_to_item(THD *thd, Field *field, Item *item)
@@ -7089,6 +8115,25 @@ int stored_field_cmp_to_item(THD *thd, Field *field, Item *item)
     field_val= field->val_decimal(&field_buf);
     return my_decimal_cmp(item_val, field_val);
   }
+  /*
+    We have to check field->cmp_type() instead of res_type,
+    as result_type() - and thus res_type - can never be TIME_RESULT (yet).
+  */
+  if (field->cmp_type() == TIME_RESULT)
+  {
+    MYSQL_TIME field_time, item_time;
+    if (field->type() == MYSQL_TYPE_TIME)
+    {
+      field->get_time(&field_time);
+      item->get_time(&item_time);
+    }
+    else
+    {
+      field->get_date(&field_time, TIME_FUZZY_DATE | TIME_INVALID_DATES);
+      item->get_date(&item_time, TIME_FUZZY_DATE | TIME_INVALID_DATES);
+    }
+    return my_time_compare(&field_time, &item_time);
+  }
   double result= item->val_real();
   if (item->null_value)
     return 0;
@@ -7128,11 +8173,13 @@ Item_cache* Item_cache::get_cache(const Item *item, const Item_result type)
     return new Item_cache_str(item);
   case ROW_RESULT:
     return new Item_cache_row();
-  default:
-    // should never be in real life
+  case TIME_RESULT:
+    return new Item_cache_temporal(item->field_type());
+  case IMPOSSIBLE_RESULT:
     DBUG_ASSERT(0);
-    return 0;
+    break;
   }
+  return 0;                                     // Impossible
 }
 
 void Item_cache::store(Item *item)
@@ -7145,6 +8192,11 @@ void Item_cache::store(Item *item)
 
 void Item_cache::print(String *str, enum_query_type query_type)
 {
+  if (value_cached)
+  {
+    print_value(str);
+    return;
+  }
   str->append(STRING_WITH_LEN("<cache>("));
   if (example)
     example->print(str, query_type);
@@ -7153,6 +8205,20 @@ void Item_cache::print(String *str, enum_query_type query_type)
   str->append(')');
 }
 
+/**
+  Assign to this cache NULL value if it is possible
+*/
+
+void Item_cache::set_null()
+{
+  if (maybe_null)
+  {
+    null_value= TRUE;
+    value_cached= TRUE;
+  }
+}
+
+
 bool  Item_cache_int::cache_value()
 {
   if (!example)
@@ -7165,21 +8231,14 @@ bool  Item_cache_int::cache_value()
 }
 
 
-void Item_cache_int::store_longlong(Item *item, longlong val_arg)
-{
-  /* An explicit values is given, save it. */
-  value_cached= TRUE;
-  value= val_arg;
-  null_value= item->null_value;
-  unsigned_flag= item->unsigned_flag;
-}
-
-
 String *Item_cache_int::val_str(String *str)
 {
   DBUG_ASSERT(fixed == 1);
-  if (!value_cached && !cache_value())
+  if ((!value_cached && !cache_value()) || null_value)
+  {
+    null_value= TRUE;
     return NULL;
+  }
   str->set_int(value, unsigned_flag, default_charset());
   return str;
 }
@@ -7188,8 +8247,11 @@ String *Item_cache_int::val_str(String *str)
 my_decimal *Item_cache_int::val_decimal(my_decimal *decimal_val)
 {
   DBUG_ASSERT(fixed == 1);
-  if (!value_cached && !cache_value())
+  if ((!value_cached && !cache_value()) || null_value)
+  {
+    null_value= TRUE;
     return NULL;
+  }
   int2my_decimal(E_DEC_FATAL_ERROR, value, unsigned_flag, decimal_val);
   return decimal_val;
 }
@@ -7197,19 +8259,116 @@ my_decimal *Item_cache_int::val_decimal(my_decimal *decimal_val)
 double Item_cache_int::val_real()
 {
   DBUG_ASSERT(fixed == 1);
-  if (!value_cached && !cache_value())
+  if ((!value_cached && !cache_value()) || null_value)
+  {
+    null_value= TRUE;
     return 0.0;
+  }
   return (double) value;
 }
 
 longlong Item_cache_int::val_int()
 {
   DBUG_ASSERT(fixed == 1);
-  if (!value_cached && !cache_value())
+  if ((!value_cached && !cache_value()) || null_value)
+  {
+    null_value= TRUE;
     return 0;
+  }
   return value;
 }
 
+
+int Item_cache_int::save_in_field(Field *field, bool no_conversions)
+{
+  int error;
+  if ((!value_cached && !cache_value()) || null_value)
+    return set_field_to_null_with_conversions(field, no_conversions);
+
+  field->set_notnull();
+  error= field->store(value, unsigned_flag);
+
+  return error ? error : field->table->in_use->is_error() ? 1 : 0;
+}
+
+
+String *Item_cache_temporal::val_str(String *str)
+{
+  DBUG_ASSERT(fixed == 1);
+  if ((!value_cached && !cache_value()) || null_value)
+  {
+    null_value= true;
+    return NULL;
+  }
+  return val_string_from_date(str);
+}
+
+
+bool  Item_cache_temporal::cache_value()
+{
+  if (!example)
+    return false;
+  value_cached= true;
+ 
+  MYSQL_TIME ltime;
+  if (example->get_date(&ltime, TIME_FUZZY_DATE))
+    value=0;
+  else
+    value= pack_time(&ltime);
+  null_value= example->null_value;
+  return true;
+}
+
+
+bool Item_cache_temporal::get_date(MYSQL_TIME *ltime, uint fuzzydate)
+{
+  Lazy_string_num str(value);
+
+  if (!value_cached && !cache_value())
+  {
+    bzero((char*) ltime,sizeof(*ltime));
+    return 1;
+  }
+
+  unpack_time(value, ltime);
+  ltime->time_type= mysql_type_to_time_type(field_type());
+  if (ltime->time_type == MYSQL_TIMESTAMP_TIME)
+  {
+    ltime->hour+= (ltime->month*32+ltime->day)*24;
+    ltime->month= ltime->day= 0;
+  }
+  return 0;
+ 
+}
+
+
+int Item_cache_temporal::save_in_field(Field *field, bool no_conversions)
+{
+  int error;
+  if ((!value_cached && !cache_value()) || null_value)
+    return set_field_to_null_with_conversions(field, no_conversions);
+
+  field->set_notnull();
+ 
+  MYSQL_TIME ltime;
+  unpack_time(value, &ltime);
+  ltime.time_type= mysql_type_to_time_type(field_type());
+  error= field->store_time_dec(&ltime, decimals);
+ 
+
+  return error ? error : field->table->in_use->is_error() ? 1 : 0;
+}
+
+
+void Item_cache_temporal::store_packed(longlong val_arg)
+{
+  /* An explicit values is given, save it. */
+  value_cached= true;
+  value= val_arg;
+  null_value= false;
+}
+
+
 bool Item_cache_real::cache_value()
 {
   if (!example)
@@ -7224,16 +8383,22 @@ bool Item_cache_real::cache_value()
 double Item_cache_real::val_real()
 {
   DBUG_ASSERT(fixed == 1);
-  if (!value_cached && !cache_value())
+  if ((!value_cached && !cache_value()) || null_value)
+  {
+    null_value= TRUE;
     return 0.0;
+  }
   return value;
 }
 
 longlong Item_cache_real::val_int()
 {
   DBUG_ASSERT(fixed == 1);
-  if (!value_cached && !cache_value())
+  if ((!value_cached && !cache_value()) || null_value)
+  {
+    null_value= TRUE;
     return 0;
+  }
   return (longlong) rint(value);
 }
 
@@ -7241,8 +8406,11 @@ longlong Item_cache_real::val_int()
 String* Item_cache_real::val_str(String *str)
 {
   DBUG_ASSERT(fixed == 1);
-  if (!value_cached && !cache_value())
+  if ((!value_cached && !cache_value()) || null_value)
+  {
+    null_value= TRUE;
     return NULL;
+  }
   str->set_real(value, decimals, default_charset());
   return str;
 }
@@ -7251,8 +8419,11 @@ String* Item_cache_real::val_str(String *str)
 my_decimal *Item_cache_real::val_decimal(my_decimal *decimal_val)
 {
   DBUG_ASSERT(fixed == 1);
-  if (!value_cached && !cache_value())
+  if ((!value_cached && !cache_value()) || null_value)
+  {
+    null_value= TRUE;
     return NULL;
+  }
   double2my_decimal(E_DEC_FATAL_ERROR, value, decimal_val);
   return decimal_val;
 }
@@ -7273,8 +8444,11 @@ double Item_cache_decimal::val_real()
 {
   DBUG_ASSERT(fixed);
   double res;
-  if (!value_cached && !cache_value())
+  if ((!value_cached && !cache_value()) || null_value)
+  {
+    null_value= TRUE;
     return 0.0;
+  }
   my_decimal2double(E_DEC_FATAL_ERROR, &decimal_value, &res);
   return res;
 }
@@ -7283,8 +8457,11 @@ longlong Item_cache_decimal::val_int()
 {
   DBUG_ASSERT(fixed);
   longlong res;
-  if (!value_cached && !cache_value())
+  if ((!value_cached && !cache_value()) || null_value)
+  {
+    null_value= TRUE;
     return 0;
+  }
   my_decimal2int(E_DEC_FATAL_ERROR, &decimal_value, unsigned_flag, &res);
   return res;
 }
@@ -7292,8 +8469,11 @@ longlong Item_cache_decimal::val_int()
 String* Item_cache_decimal::val_str(String *str)
 {
   DBUG_ASSERT(fixed);
-  if (!value_cached && !cache_value())
+  if ((!value_cached && !cache_value()) || null_value)
+  {
+    null_value= TRUE;
     return NULL;
+  }
   my_decimal_round(E_DEC_FATAL_ERROR, &decimal_value, decimals, FALSE,
                    &decimal_value);
   my_decimal2string(E_DEC_FATAL_ERROR, &decimal_value, 0, 0, 0, str);
@@ -7303,8 +8483,11 @@ String* Item_cache_decimal::val_str(String *str)
 my_decimal *Item_cache_decimal::val_decimal(my_decimal *val)
 {
   DBUG_ASSERT(fixed);
-  if (!value_cached && !cache_value())
+  if ((!value_cached && !cache_value()) || null_value)
+  {
+    null_value= TRUE;
     return NULL;
+  }
   return &decimal_value;
 }
 
@@ -7339,12 +8522,13 @@ double Item_cache_str::val_real()
   DBUG_ASSERT(fixed == 1);
   int err_not_used;
   char *end_not_used;
-  if (!value_cached && !cache_value())
+  if ((!value_cached && !cache_value()) || null_value)
+  {
+    null_value= TRUE;
     return 0.0;
-  if (value)
-    return my_strntod(value->charset(), (char*) value->ptr(),
-		      value->length(), &end_not_used, &err_not_used);
-  return (double) 0;
+  }
+  return my_strntod(value->charset(), (char*) value->ptr(),
+                    value->length(), &end_not_used, &err_not_used);
 }
 
 
@@ -7352,21 +8536,24 @@ longlong Item_cache_str::val_int()
 {
   DBUG_ASSERT(fixed == 1);
   int err;
-  if (!value_cached && !cache_value())
+  if ((!value_cached && !cache_value()) || null_value)
+  {
+    null_value= TRUE;
     return 0;
-  if (value)
-    return my_strntoll(value->charset(), value->ptr(),
-		       value->length(), 10, (char**) 0, &err);
-  else
-    return (longlong)0;
+  }
+  return my_strntoll(value->charset(), value->ptr(),
+                     value->length(), 10, (char**) 0, &err);
 }
 
 
 String* Item_cache_str::val_str(String *str)
 {
   DBUG_ASSERT(fixed == 1);
-  if (!value_cached && !cache_value())
+  if ((!value_cached && !cache_value()) || null_value)
+  {
+    null_value= TRUE;
     return 0;
+  }
   return value;
 }
 
@@ -7374,20 +8561,21 @@ String* Item_cache_str::val_str(String *str)
 my_decimal *Item_cache_str::val_decimal(my_decimal *decimal_val)
 {
   DBUG_ASSERT(fixed == 1);
-  if (!value_cached && !cache_value())
+  if ((!value_cached && !cache_value()) || null_value)
+  {
+    null_value= TRUE;
     return NULL;
-  if (value)
-    string2my_decimal(E_DEC_FATAL_ERROR, value, decimal_val);
-  else
-    decimal_val= 0;
+  }
+  string2my_decimal(E_DEC_FATAL_ERROR, value, decimal_val);
   return decimal_val;
 }
 
 
 int Item_cache_str::save_in_field(Field *field, bool no_conversions)
 {
-  if (!value_cached && !cache_value())
-    return 0;
+  if ((!value_cached && !cache_value()) || null_value)
+    return set_field_to_null_with_conversions(field, no_conversions);
+
   int res= Item_cache::save_in_field(field, no_conversions);
   return (is_varbinary && field->type() == MYSQL_TYPE_STRING &&
           value->length() < field->field_length) ? 1 : res;
@@ -7501,6 +8689,20 @@ void Item_cache_row::bring_value()
 }
 
 
+/**
+  Assign to this cache NULL value if it is possible
+*/
+
+void Item_cache_row::set_null()
+{
+  Item_cache::set_null();
+  if (!values)
+    return;
+  for (uint i= 0; i < item_count; i++)
+    values[i]->set_null();
+};
+
+
 Item_type_holder::Item_type_holder(THD *thd, Item *item)
   :Item(thd, item), enum_set_typelib(0), fld_type(get_real_type(item))
 {
@@ -7541,12 +8743,14 @@ Item_result Item_type_holder::result_type() const
 
 enum_field_types Item_type_holder::get_real_type(Item *item)
 {
+  if (item->type() == REF_ITEM)
+    item= item->real_item();
   switch(item->type())
   {
   case FIELD_ITEM:
   {
     /*
-      Item_fields::field_type ask Field_type() but sometimes field return
+      Item_field::field_type ask Field_type() but sometimes field return
       a different type, like for enum/set, so we need to ask real type.
     */
     Field *field= ((Item_field *) item)->field;
@@ -7588,7 +8792,8 @@ enum_field_types Item_type_holder::get_real_type(Item *item)
       case DECIMAL_RESULT:
         return MYSQL_TYPE_NEWDECIMAL;
       case ROW_RESULT:
-      default:
+      case TIME_RESULT:
+      case IMPOSSIBLE_RESULT:
         DBUG_ASSERT(0);
         return MYSQL_TYPE_VAR_STRING;
       }
@@ -7847,13 +9052,13 @@ void Item_type_holder::get_full_info(Item *item)
     DBUG_ASSERT((enum_set_typelib &&
                  get_real_type(item) == MYSQL_TYPE_NULL) ||
                 (!enum_set_typelib &&
-                 item->type() == Item::FIELD_ITEM &&
-                 (get_real_type(item) == MYSQL_TYPE_ENUM ||
-                  get_real_type(item) == MYSQL_TYPE_SET) &&
-                 ((Field_enum*)((Item_field *) item)->field)->typelib));
+                 item->real_item()->type() == Item::FIELD_ITEM &&
+                 (get_real_type(item->real_item()) == MYSQL_TYPE_ENUM ||
+                  get_real_type(item->real_item()) == MYSQL_TYPE_SET) &&
+                 ((Field_enum*)((Item_field *) item->real_item())->field)->typelib));
     if (!enum_set_typelib)
     {
-      enum_set_typelib= ((Field_enum*)((Item_field *) item)->field)->typelib;
+      enum_set_typelib= ((Field_enum*)((Item_field *) item->real_item())->field)->typelib;
     }
   }
 }
@@ -7915,6 +9120,58 @@ void view_error_processor(THD *thd, void *data)
   ((TABLE_LIST *)data)->hide_view_error(thd);
 }
 
+
+st_select_lex *Item_ident::get_depended_from() const
+{
+  st_select_lex *dep;
+  if ((dep= depended_from))
+    for ( ; dep->merged_into; dep= dep->merged_into) ;
+  return dep;
+}
+
+
+table_map Item_ref::used_tables() const		
+{
+  return get_depended_from() ? OUTER_REF_TABLE_BIT : (*ref)->used_tables(); 
+}
+
+
+void Item_ref::update_used_tables() 
+{ 
+  if (!get_depended_from())
+    (*ref)->update_used_tables(); 
+}
+
+
+table_map Item_direct_view_ref::used_tables() const
+{
+  return get_depended_from() ?
+         OUTER_REF_TABLE_BIT :
+         ((view->merged || !view->table) ?
+          (*ref)->used_tables() :
+          view->table->map);
+}
+
+table_map Item_direct_view_ref::not_null_tables() const
+{
+  return get_depended_from() ?
+         0 :
+         ((view->merged || !view->table) ?
+          (*ref)->not_null_tables() :
+          view->table->map);
+}
+
+/*
+  we add RAND_TABLE_BIT to prevent moving this item from HAVING to WHERE
+*/
+table_map Item_ref_null_helper::used_tables() const
+{
+  return (get_depended_from() ?
+          OUTER_REF_TABLE_BIT :
+          (*ref)->used_tables() | RAND_TABLE_BIT);
+}
+
+
 #ifndef DBUG_OFF
 
 /* Debugger help function */
diff --git a/sql/item.h b/sql/item.h
index 0445cd4a9d5..aeae1b1faf2 100644
--- a/sql/item.h
+++ b/sql/item.h
@@ -1,5 +1,5 @@
 /* Copyright (c) 2000, 2011, Oracle and/or its affiliates.
-
+   Copyright (c) 2009-2011 Monty Program Ab
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; version 2 of the License.
@@ -18,6 +18,10 @@
 #pragma interface			/* gcc class implementation */
 #endif
 
+C_MODE_START
+#include <ma_dyncol.h>
+C_MODE_END
+
 inline
 bool trace_unsupported_func(const char *where, const char *processor_name)
 {
@@ -436,6 +440,16 @@ typedef enum monotonicity_info
 class sp_rcontext;
 
 
+class Item_equal;
+
+struct st_join_table* const NO_PARTICULAR_TAB= (struct st_join_table*)0x1;
+
+typedef struct replace_equal_field_arg 
+{
+  Item_equal *item_equal;
+  struct st_join_table *context_tab;
+} REPLACE_EQUAL_FIELD_ARG;
+
 class Settable_routine_parameter
 {
 public:
@@ -471,6 +485,17 @@ public:
 };
 
 
+struct st_dyncall_create_def
+{
+  Item  *num, *value;
+  CHARSET_INFO *cs;
+  uint len, frac;
+  DYNAMIC_COLUMN_TYPE type;
+};
+
+typedef struct st_dyncall_create_def DYNCALL_CREATE_DEF;
+
+
 typedef bool (Item::*Item_processor) (uchar *arg);
 /*
   Analyzer function
@@ -487,10 +512,25 @@ typedef bool (Item::*Item_analyzer) (uchar **argp);
 typedef Item* (Item::*Item_transformer) (uchar *arg);
 typedef void (*Cond_traverser) (const Item *item, void *arg);
 
+class Item_equal;
+class COND_EQUAL;
+
+class st_select_lex_unit;
 
 class Item {
   Item(const Item &);			/* Prevent use of these */
   void operator=(Item &);
+  /**
+    The index in the JOIN::join_tab array of the JOIN_TAB this Item is attached
+    to. Items are attached (or 'pushed') to JOIN_TABs during optimization by the
+    make_cond_for_table procedure. During query execution, this item is
+    evaluated when the join loop reaches the corresponding JOIN_TAB.
+
+    If the value of join_tab_idx >= MAX_TABLES, this means that there is no
+    corresponding JOIN_TAB.
+  */
+  uint join_tab_idx;
+
 public:
   static void *operator new(size_t size) throw ()
   { return sql_alloc(size); }
@@ -506,12 +546,16 @@ public:
 	     FIELD_VARIANCE_ITEM, INSERT_VALUE_ITEM,
              SUBSELECT_ITEM, ROW_ITEM, CACHE_ITEM, TYPE_HOLDER,
              PARAM_ITEM, TRIGGER_FIELD_ITEM, DECIMAL_ITEM,
-             XPATH_NODESET, XPATH_NODESET_CMP};
+             XPATH_NODESET, XPATH_NODESET_CMP,
+             VIEW_FIXER_ITEM, EXPR_CACHE_ITEM};
 
   enum cond_result { COND_UNDEF,COND_OK,COND_TRUE,COND_FALSE };
 
   enum traverse_order { POSTFIX, PREFIX };
   
+  /* Cache of the result of is_expensive(). */
+  int8 is_expensive_cache;
+  
   /* Reuse size, only used by SP local variable assignment, otherwize 0 */
   uint rsize;
 
@@ -545,7 +589,11 @@ public:
                                            of a query with ROLLUP */ 
   bool null_value;			/* if item is null */
   bool unsigned_flag;
-  bool with_sum_func;
+  bool with_sum_func;                   /* True if item contains a sum func */
+  /**
+    True if any item except Item_sum_func contains a field. Set during parsing.
+  */
+  bool with_field;
   bool fixed;                           /* If item fixed with fix_fields */
   bool is_autogenerated_name;           /* indicate was name of this Item
                                            autogenerated or set by user */
@@ -579,6 +627,12 @@ public:
   Field *make_string_field(TABLE *table);
   virtual bool fix_fields(THD *, Item **);
   /*
+    Fix after some tables has been pulled out. Basically re-calculate all
+    attributes that are dependent on the tables.
+  */
+  virtual void fix_after_pullout(st_select_lex *new_parent, Item **ref) {};
+
+  /*
     This method should be used in case where we are sure that we do not need
     complete fix_fields() procedure.
     Usually this method is used by the optimizer when it has to create a new
@@ -599,11 +653,20 @@ public:
   { return save_in_field(field, 1); }
   virtual bool send(Protocol *protocol, String *str);
   virtual bool eq(const Item *, bool binary_cmp) const;
+  /* result_type() of an item specifies how the value should be returned */
   virtual Item_result result_type() const { return REAL_RESULT; }
-  virtual Item_result cast_to_int_type() const { return result_type(); }
+  /* ... while cmp_type() specifies how it should be compared */
+  virtual Item_result cmp_type() const;
+  virtual Item_result cast_to_int_type() const { return cmp_type(); }
   virtual enum_field_types string_field_type() const;
   virtual enum_field_types field_type() const;
   virtual enum Type type() const =0;
+  /*
+    real_type() is the type of base item.  This is same as type() for
+    most items, except Item_ref() and Item_cache_wrapper() where it
+    shows the type for the underlaying item.
+  */
+  virtual enum Type real_type() const { return type(); }
   
   /*
     Return information about function monotonicity. See comment for
@@ -741,10 +804,22 @@ public:
   */
   virtual bool val_bool();
   virtual String *val_nodeset(String*) { return 0; }
+
+  /*
+    save_val() is method of val_* family which stores value in the given
+    field.
+  */
+  virtual void save_val(Field *to) { save_org_in_field(to); }
+  /*
+    save_result() is method of val*result() family which stores value in
+    the given field.
+  */
+  virtual void save_result(Field *to) { save_val(to); }
   /* Helper functions, see item_sum.cc */
   String *val_string_from_real(String *str);
   String *val_string_from_int(String *str);
   String *val_string_from_decimal(String *str);
+  String *val_string_from_date(String *str);
   my_decimal *val_decimal_from_real(my_decimal *decimal_value);
   my_decimal *val_decimal_from_int(my_decimal *decimal_value);
   my_decimal *val_decimal_from_string(my_decimal *decimal_value);
@@ -761,6 +836,8 @@ public:
   /* This is also used to create fields in CREATE ... SELECT: */
   virtual Field *tmp_table_field(TABLE *t_arg) { return 0; }
   virtual const char *full_name() const { return name ? name : "???"; }
+  const char *field_name_or_null()
+  { return real_item()->type() == Item::FIELD_ITEM ? name : NULL; }
 
   /*
     *result* family of methods is analog of *val* family (see above) but
@@ -775,13 +852,18 @@ public:
   { return val_decimal(val); }
   virtual bool val_bool_result() { return val_bool(); }
   virtual bool is_null_result() { return is_null(); }
-
+  /*
+    Returns 1 if result type and collation for val_str() can change between
+    calls
+  */
+  virtual bool dynamic_result() { return 0; }
   /* 
     Bitmap of tables used by item
     (note: if you need to check dependencies on individual columns, check out
      class Field_enumerator)
   */
   virtual table_map used_tables() const { return (table_map) 0L; }
+  virtual table_map all_used_tables() const { return used_tables(); }
   /*
     Return table map of tables that can't be NULL tables (tables that are
     used in a context where if they would contain a NULL row generated
@@ -836,6 +918,7 @@ public:
   }
 
   void print_item_w_name(String *, enum_query_type query_type);
+  void print_value(String *);
   virtual void update_used_tables() {}
   virtual void split_sum_func(THD *thd, Item **ref_pointer_array,
                               List<Item> &fields) {}
@@ -843,7 +926,9 @@ public:
   void split_sum_func2(THD *thd, Item **ref_pointer_array, List<Item> &fields,
                        Item **ref, bool skip_registered);
   virtual bool get_date(MYSQL_TIME *ltime,uint fuzzydate);
-  virtual bool get_time(MYSQL_TIME *ltime);
+  bool get_time(MYSQL_TIME *ltime)
+  { return get_date(ltime, TIME_TIME_ONLY | TIME_FUZZY_DATE); }
+  bool get_seconds(ulonglong *sec, ulong *sec_part);
   virtual bool get_date_result(MYSQL_TIME *ltime,uint fuzzydate)
   { return get_date(ltime,fuzzydate); }
   /*
@@ -933,13 +1018,25 @@ public:
   virtual bool remove_fixed(uchar * arg) { fixed= 0; return 0; }
   virtual bool cleanup_processor(uchar *arg);
   virtual bool collect_item_field_processor(uchar * arg) { return 0; }
+  virtual bool add_field_to_set_processor(uchar * arg) { return 0; }
   virtual bool find_item_in_field_list_processor(uchar *arg) { return 0; }
   virtual bool change_context_processor(uchar *context) { return 0; }
   virtual bool reset_query_id_processor(uchar *query_id_arg) { return 0; }
   virtual bool is_expensive_processor(uchar *arg) { return 0; }
   virtual bool register_field_in_read_map(uchar *arg) { return 0; }
+  virtual bool register_field_in_write_map(uchar *arg) { return 0; }
   virtual bool enumerate_field_refs_processor(uchar *arg) { return 0; }
   virtual bool mark_as_eliminated_processor(uchar *arg) { return 0; }
+  virtual bool eliminate_subselect_processor(uchar *arg) { return 0; }
+  virtual bool set_fake_select_as_master_processor(uchar *arg) { return 0; }
+  virtual bool update_table_bitmaps_processor(uchar *arg) { return 0; }
+  virtual bool view_used_tables_processor(uchar *arg) { return 0; }
+  virtual bool eval_not_null_tables(uchar *opt_arg) { return 0; }
+  virtual bool is_subquery_processor (uchar *opt_arg) { return 0; }
+  virtual bool limit_index_condition_pushdown_processor(uchar *opt_arg)
+  { 
+    return FALSE;
+  }
 
   /* To call bool function for all arguments */
   struct bool_func_call_args
@@ -955,6 +1052,7 @@ public:
       (this->*(info->bool_function))();
     return FALSE;
   }
+
   /*
     The next function differs from the previous one that a bitmap to be updated
     is passed as uchar *arg.
@@ -1027,12 +1125,23 @@ public:
     return FALSE;
   }
 
+  /*
+    The enumeration Subst_constraint is currently used only in implementations
+    of the virtual function subst_argument_checker.
+  */ 
+  enum Subst_constraint 
+  { 
+    NO_SUBST= 0,         /* No substitution for a field is allowed   */
+    ANY_SUBST,           /* Any substitution for a field is allowed  */ 
+    IDENTITY_SUBST       /* Substitution for a field is allowed if any two
+                            different values of the field type are not equal */
+  };
+
   virtual bool subst_argument_checker(uchar **arg)
-  {
-    if (*arg)
-      *arg= NULL;
-    return TRUE;
+  { 
+    return (*arg != NULL); 
   }
+
   /*
     @brief
     Processor used to check acceptability of an item in the defining
@@ -1053,6 +1162,7 @@ public:
 
   virtual Item *equal_fields_propagator(uchar * arg) { return this; }
   virtual bool set_no_const_sub(uchar *arg) { return FALSE; }
+  /* arg points to REPLACE_EQUAL_FIELD_ARG object */
   virtual Item *replace_equal_field(uchar * arg) { return this; }
   /*
     Check if an expression value has allowed arguments, like DATE/DATETIME
@@ -1063,6 +1173,17 @@ public:
   {
     return FALSE;
   }
+  struct Collect_deps_prm
+  {
+    List<Item> *parameters;
+    /* unit from which we count nest_level */
+    st_select_lex_unit *nest_level_base;
+    int nest_level;
+  };
+  /**
+    Collect outer references
+  */
+  virtual bool collect_outer_ref_processor(uchar *arg) {return FALSE; }
 
   /**
     Find a function of a given type
@@ -1111,6 +1232,8 @@ public:
 
   virtual Item *neg_transformer(THD *thd) { return NULL; }
   virtual Item *update_value_transformer(uchar *select_arg) { return this; }
+  virtual Item *expr_cache_insert_transformer(uchar *thd_arg) { return this; }
+  virtual bool expr_cache_is_needed(THD *) { return FALSE; }
   virtual Item *safe_charset_converter(CHARSET_INFO *tocs);
   void delete_self()
   {
@@ -1128,24 +1251,82 @@ public:
   {
     return 0;
   }
+
   /*
-    result_as_longlong() must return TRUE for Items representing DATE/TIME
-    functions and DATE/TIME table fields.
-    Those Items have result_type()==STRING_RESULT (and not INT_RESULT), but
-    their values should be compared as integers (because the integer
-    representation is more precise than the string one).
+    Test whether an expression is expensive to compute. Used during
+    optimization to avoid computing expensive expressions during this
+    phase. Also used to force temp tables when sorting on expensive
+    functions.
+    TODO:
+    Normally we should have a method:
+      cost Item::execution_cost(),
+    where 'cost' is either 'double' or some structure of various cost
+    parameters.
+
+    NOTE
+      This function is now used to prevent evaluation of materialized IN
+      subquery predicates before it is allowed. grep for 
+      DontEvaluateMaterializedSubqueryTooEarly to see the uses.
   */
-  virtual bool result_as_longlong() { return FALSE; }
-  bool is_datetime();
+  virtual bool is_expensive()
+  {
+    if (is_expensive_cache < 0)
+      is_expensive_cache= walk(&Item::is_expensive_processor, 0, (uchar*)0);
+    return test(is_expensive_cache);
+  }
   virtual Field::geometry_type get_geometry_type() const
     { return Field::GEOM_GEOMETRY; };
   String *check_well_formed_result(String *str, bool send_error= 0);
   bool eq_by_collation(Item *item, bool binary_cmp, CHARSET_INFO *cs); 
+  Item* set_expr_cache(THD *thd);
+  virtual Item *get_cached_item() { return NULL; }
+
+  virtual Item_equal *get_item_equal() { return NULL; }
+  virtual void set_item_equal(Item_equal *item_eq) {};
+  virtual Item_equal *find_item_equal(COND_EQUAL *cond_equal) { return NULL; }
+  /**
+    Set the join tab index to the minimal (left-most) JOIN_TAB to which this
+    Item is attached. The number is an index is depth_first_tab() traversal
+    order.
+  */
+  virtual void set_join_tab_idx(uint join_tab_idx_arg)
+  {
+    if (join_tab_idx_arg < join_tab_idx)
+      join_tab_idx= join_tab_idx_arg;
+  }
+  virtual uint get_join_tab_idx() { return join_tab_idx; }
+
+  table_map view_used_tables(TABLE_LIST *view)
+  {
+    view->view_used_tables= 0;
+    walk(&Item::view_used_tables_processor, 0, (uchar *) view);
+    return view->view_used_tables;
+  }
+
+  /**
+    Collect and add to the list cache parameters for this Item.
+
+    @note Now implemented only for subqueries and in_optimizer,
+    if we need it for general function then this method should
+    be defined for Item_func.
+  */
+  virtual void get_cache_parameters(List<Item> &parameters) { };
+
+  virtual void mark_as_condition_AND_part(TABLE_LIST *embedding) {};
 };
 
 
+/**
+  Compare two Items for List<Item>::add_unique()
+*/
+
+bool cmp_items(Item *a, Item *b);
+
+
 /*
-  Class to be used to enumerate all field references in an item tree.
+  Class to be used to enumerate all field references in an item tree. This
+  includes references to outside but not fields of the tables within a
+  subquery.
   Suggested usage:
 
     class My_enumerator : public Field_enumerator 
@@ -1162,7 +1343,7 @@ public:
 class Field_enumerator
 {
 public:
-  virtual void visit_field(Field *field)= 0;
+  virtual void visit_field(Item_field *field)= 0;
   virtual ~Field_enumerator() {};             /* purecov: inspected */
   Field_enumerator() {}                       /* Remove gcc warning */
 };
@@ -1518,6 +1699,19 @@ public:
   */
   TABLE_LIST *cached_table;
   st_select_lex *depended_from;
+  /*
+    Some Items resolved in another select should not be marked as dependency
+    of the subquery where they are. During normal name resolution, we check
+    this. Stored procedures and prepared statements first try to resolve an
+    ident item using a cached table reference and field position from the
+    previous query execution (cached_table/cached_field_index). If the
+    tables were not changed, the ident matches the table/field, and we have
+    faster resolution of the ident without looking through all tables and
+    fields in the query. But in this case, we can not check all conditions
+    about this ident item dependency, so we should cache the condition in
+    this variable.
+  */
+  bool can_be_depended;
   Item_ident(Name_resolution_context *context_arg,
              const char *db_name_arg, const char *table_name_arg,
              const char *field_name_arg);
@@ -1525,10 +1719,15 @@ public:
   Item_ident(TABLE_LIST *view_arg, const char *field_name_arg);
   const char *full_name() const;
   void cleanup();
+  st_select_lex *get_depended_from() const;
   bool remove_dependence_processor(uchar * arg);
   virtual void print(String *str, enum_query_type query_type);
   virtual bool change_context_processor(uchar *cntx)
     { context= (Name_resolution_context *)cntx; return FALSE; }
+  /**
+    Collect outer references
+  */
+  virtual bool collect_outer_ref_processor(uchar *arg);
   friend bool insert_fields(THD *thd, Name_resolution_context *context,
                             const char *db_name,
                             const char *table_name, List_iterator<Item> *it,
@@ -1557,9 +1756,6 @@ public:
 };
 
 
-class Item_equal;
-class COND_EQUAL;
-
 class Item_field :public Item_ident
 {
 protected:
@@ -1600,6 +1796,7 @@ public:
   longlong val_int();
   my_decimal *val_decimal(my_decimal *);
   String *val_str(String*);
+  void save_result(Field *to);
   double val_result();
   longlong val_int_result();
   String *str_result(String* tmp);
@@ -1609,17 +1806,19 @@ public:
   bool send(Protocol *protocol, String *str_arg);
   void reset_field(Field *f);
   bool fix_fields(THD *, Item **);
+  void fix_after_pullout(st_select_lex *new_parent, Item **ref);
   void make_field(Send_field *tmp_field);
   int save_in_field(Field *field,bool no_conversions);
   void save_org_in_field(Field *field);
   table_map used_tables() const;
+  table_map all_used_tables() const; 
   enum Item_result result_type () const
   {
     return field->result_type();
   }
   Item_result cast_to_int_type() const
   {
-    return field->cast_to_int_type();
+    return field->cmp_type();
   }
   enum_field_types field_type() const
   {
@@ -1634,23 +1833,37 @@ public:
   Field *tmp_table_field(TABLE *t_arg) { return result_field; }
   bool get_date(MYSQL_TIME *ltime,uint fuzzydate);
   bool get_date_result(MYSQL_TIME *ltime,uint fuzzydate);
-  bool get_time(MYSQL_TIME *ltime);
   bool is_null() { return field->is_null(); }
   void update_null_value();
+  void update_table_bitmaps()
+  {
+    if (field && field->table)
+    {
+      TABLE *tab= field->table;
+      tab->covering_keys.intersect(field->part_of_key);
+      tab->merge_keys.merge(field->part_of_key);
+      if (tab->read_set)
+        bitmap_fast_test_and_set(tab->read_set, field->field_index);
+      if (field->vcol_info)
+        tab->mark_virtual_col(field);
+    }  
+  }
+  void update_used_tables() { update_table_bitmaps(); }
   Item *get_tmp_table_item(THD *thd);
   bool collect_item_field_processor(uchar * arg);
+  bool add_field_to_set_processor(uchar * arg);
   bool find_item_in_field_list_processor(uchar *arg);
   bool register_field_in_read_map(uchar *arg);
+  bool register_field_in_write_map(uchar *arg);
   bool register_field_in_bitmap(uchar *arg);
   bool check_partition_func_processor(uchar *int_arg) {return FALSE;}
   bool vcol_in_partition_func_processor(uchar *bool_arg);
   bool check_vcol_func_processor(uchar *arg) { return FALSE;}
   bool enumerate_field_refs_processor(uchar *arg);
+  bool update_table_bitmaps_processor(uchar *arg);
   void cleanup();
-  bool result_as_longlong()
-  {
-    return field->can_be_compared_as_longlong();
-  }
+  Item_equal *get_item_equal() { return item_equal; }
+  void set_item_equal(Item_equal *item_eq) { item_equal= item_eq; }
   Item_equal *find_item_equal(COND_EQUAL *cond_equal);
   bool subst_argument_checker(uchar **arg);
   Item *equal_fields_propagator(uchar *arg);
@@ -1800,6 +2013,7 @@ public:
   Item_param(uint pos_in_query_arg);
 
   enum Item_result result_type () const { return item_result_type; }
+  enum Item_result cast_to_int_type() const { return item_result_type; }
   enum Type type() const { return item_type; }
   enum_field_types field_type() const { return param_type; }
 
@@ -1807,7 +2021,6 @@ public:
   longlong val_int();
   my_decimal *val_decimal(my_decimal*);
   String *val_str(String*);
-  bool get_time(MYSQL_TIME *tm);
   bool get_date(MYSQL_TIME *tm, uint fuzzydate);
   int  save_in_field(Field *field, bool no_conversions);
 
@@ -1908,18 +2121,28 @@ class Item_uint :public Item_int
 {
 public:
   Item_uint(const char *str_arg, uint length);
-  Item_uint(ulonglong i) :Item_int((ulonglong) i, 10) {}
+  Item_uint(ulonglong i) :Item_int(i, 10) {}
   Item_uint(const char *str_arg, longlong i, uint length);
   double val_real()
     { DBUG_ASSERT(fixed == 1); return ulonglong2double((ulonglong)value); }
   String *val_str(String*);
   Item *clone_item() { return new Item_uint(name, value, max_length); }
-  int save_in_field(Field *field, bool no_conversions);
   virtual void print(String *str, enum_query_type query_type);
   Item_num *neg ();
   uint decimal_precision() const { return max_length; }
-  bool check_partition_func_processor(uchar *bool_arg) { return FALSE;}
-  bool check_vcol_func_processor(uchar *arg) { return FALSE;}
+};
+
+
+class Item_datetime :public Item_int
+{
+protected:
+  MYSQL_TIME ltime;
+public:
+  Item_datetime() :Item_int(0) { unsigned_flag=0; }
+  int save_in_field(Field *field, bool no_conversions);
+  longlong val_int();
+  double val_real() { return (double)val_int(); }
+  void set(longlong packed);
 };
 
 
@@ -2166,9 +2389,11 @@ private:
 
 
 longlong 
-longlong_from_string_with_check (CHARSET_INFO *cs, const char *cptr, char *end);
+longlong_from_string_with_check(CHARSET_INFO *cs, const char *cptr,
+                                const char *end);
 double 
-double_from_string_with_check (CHARSET_INFO *cs, const char *cptr, char *end);
+double_from_string_with_check(CHARSET_INFO *cs, const char *cptr,
+                              const char *end);
 
 class Item_static_string_func :public Item_string
 {
@@ -2213,10 +2438,11 @@ class Item_return_date_time :public Item_partition_func_safe_string
 {
   enum_field_types date_time_field_type;
 public:
-  Item_return_date_time(const char *name_arg, enum_field_types field_type_arg)
-    :Item_partition_func_safe_string(name_arg, 0, &my_charset_bin),
+  Item_return_date_time(const char *name_arg, uint length_arg,
+                        enum_field_types field_type_arg)
+    :Item_partition_func_safe_string(name_arg, length_arg, &my_charset_bin),
      date_time_field_type(field_type_arg)
-  { }
+  { decimals= 0; }
   enum_field_types field_type() const { return date_time_field_type; }
 };
 
@@ -2347,11 +2573,12 @@ public:
   enum Ref_Type { REF, DIRECT_REF, VIEW_REF, OUTER_REF, AGGREGATE_REF };
   Field *result_field;			 /* Save result here */
   Item **ref;
+  bool reference_trough_name;
   Item_ref(Name_resolution_context *context_arg,
            const char *db_arg, const char *table_name_arg,
            const char *field_name_arg)
     :Item_ident(context_arg, db_arg, table_name_arg, field_name_arg),
-     result_field(0), ref(0) {}
+    result_field(0), ref(0), reference_trough_name(1) {}
   /*
     This constructor is used in two scenarios:
     A) *item = NULL
@@ -2376,11 +2603,15 @@ public:
   Item_ref(THD *thd, Item_ref *item)
     :Item_ident(thd, item), result_field(item->result_field), ref(item->ref) {}
   enum Type type() const		{ return REF_ITEM; }
+  enum Type real_type() const           { return ref ? (*ref)->type() :
+                                          REF_ITEM; }
   bool eq(const Item *item, bool binary_cmp) const
   { 
     Item *it= ((Item *) item)->real_item();
     return ref && (*ref)->eq(it, binary_cmp);
   }
+  void save_val(Field *to);
+  void save_result(Field *to);
   double val_real();
   longlong val_int();
   my_decimal *val_decimal(my_decimal *);
@@ -2397,6 +2628,7 @@ public:
   bool send(Protocol *prot, String *tmp);
   void make_field(Send_field *field);
   bool fix_fields(THD *, Item **);
+  void fix_after_pullout(st_select_lex *new_parent, Item **ref);
   int save_in_field(Field *field, bool no_conversions);
   void save_org_in_field(Field *field);
   enum Item_result result_type () const { return (*ref)->result_type(); }
@@ -2404,20 +2636,16 @@ public:
   Field *get_tmp_table_field()
   { return result_field ? result_field : (*ref)->get_tmp_table_field(); }
   Item *get_tmp_table_item(THD *thd);
-  table_map used_tables() const		
-  {
-    return depended_from ? OUTER_REF_TABLE_BIT : (*ref)->used_tables(); 
-  }
-  void update_used_tables() 
-  { 
-    if (!depended_from) 
-      (*ref)->update_used_tables(); 
-  }
+  table_map used_tables() const;		
+  void update_used_tables(); 
   bool const_item() const 
   {
     return (*ref)->const_item();
   }
-  table_map not_null_tables() const { return (*ref)->not_null_tables(); }
+  table_map not_null_tables() const 
+  { 
+    return depended_from ? 0 : (*ref)->not_null_tables();
+  }
   void set_result_field(Field *field)	{ result_field= field; }
   bool is_result_field() { return 1; }
   void save_in_result_field(bool no_conversions)
@@ -2429,10 +2657,18 @@ public:
     return ref ? (*ref)->real_item() : this;
   }
   bool walk(Item_processor processor, bool walk_subquery, uchar *arg)
-  {
-    return (*ref)->walk(processor, walk_subquery, arg) ||
-           (this->*processor)(arg);
+  { 
+    if (ref && *ref)
+      return (*ref)->walk(processor, walk_subquery, arg) ||
+             (this->*processor)(arg); 
+    else
+      return FALSE;
   }
+  Item* transform(Item_transformer, uchar *arg);
+  Item* compile(Item_analyzer analyzer, uchar **arg_p,
+                Item_transformer transformer, uchar *arg_t);
+  bool enumerate_field_refs_processor(uchar *arg)
+  { return (*ref)->enumerate_field_refs_processor(arg); }
   void no_rows_in_result()
   {
     (*ref)->no_rows_in_result();
@@ -2442,10 +2678,6 @@ public:
     (*ref)->restore_to_before_no_rows_in_result();
   }
   virtual void print(String *str, enum_query_type query_type);
-  bool result_as_longlong()
-  {
-    return (*ref)->result_as_longlong();
-  }
   void cleanup();
   Item_field *filed_for_view_update()
     { return (*ref)->filed_for_view_update(); }
@@ -2482,11 +2714,6 @@ public:
   {
     return trace_unsupported_by_check_vcol_func_processor("ref");
   }
-  bool get_time(MYSQL_TIME *ltime)
-  {
-    DBUG_ASSERT(fixed);
-    return (*ref)->get_time(ltime);
-  }
 };
 
 
@@ -2513,6 +2740,7 @@ public:
               alias_name_used_arg)
   {}
 
+  void save_val(Field *to);
   double val_real();
   longlong val_int();
   String *val_str(String* tmp);
@@ -2523,23 +2751,190 @@ public:
   virtual Ref_Type ref_type() { return DIRECT_REF; }
 };
 
+
+/**
+  This class is the same as Item_direct_ref but created to wrap Item_ident
+  before fix_fields() call
+*/
+
+class Item_direct_ref_to_ident :public Item_direct_ref
+{
+  Item_ident *ident;
+public:
+  Item_direct_ref_to_ident(Item_ident *item)
+    :Item_direct_ref(item->context, (Item**)&item, item->table_name, item->field_name,
+                     FALSE)
+  {
+    ident= item;
+    ref= (Item**)&ident;
+  }
+
+  bool fix_fields(THD *thd, Item **it)
+  {
+    DBUG_ASSERT(ident->type() == FIELD_ITEM || ident->type() == REF_ITEM);
+    if ((!ident->fixed && ident->fix_fields(thd, ref)) ||
+        ident->check_cols(1))
+      return TRUE;
+    set_properties();
+    return FALSE;
+  }
+
+  virtual void print(String *str, enum_query_type query_type)
+  { ident->print(str, query_type); }
+
+};
+
+
+class Expression_cache;
+class Item_cache;
+
+
+/**
+  The objects of this class can store its values in an expression cache.
+*/
+
+class Item_cache_wrapper :public Item_result_field
+{
+private:
+  /* Pointer on the cached expression */
+  Item *orig_item;
+  Expression_cache *expr_cache;
+  /*
+    In order to put the expression into the expression cache and return
+    value of val_*() method, we will need to get the expression value twice
+    (probably in different types).  In order to avoid making two
+    (potentially costly) orig_item->val_*() calls, we store expression value
+    in this Item_cache object.
+  */
+  Item_cache *expr_value;
+
+  List<Item> parameters;
+
+  Item *check_cache();
+  void cache();
+  void init_on_demand();
+
+public:
+  Item_cache_wrapper(Item *item_arg);
+  ~Item_cache_wrapper();
+
+  const char *func_name() const { return "<expr_cache>"; }
+  enum Type type() const { return EXPR_CACHE_ITEM; }
+  enum Type real_type() const { return orig_item->type(); }
+
+  bool set_cache(THD *thd);
+
+  bool fix_fields(THD *thd, Item **it);
+  void fix_length_and_dec() {}
+  void cleanup();
+
+  /* Methods of getting value which should be cached in the cache */
+  void save_val(Field *to);
+  double val_real();
+  longlong val_int();
+  String *val_str(String* tmp);
+  my_decimal *val_decimal(my_decimal *);
+  bool val_bool();
+  bool is_null();
+  bool get_date(MYSQL_TIME *ltime, uint fuzzydate);
+  bool send(Protocol *protocol, String *buffer)
+  {
+    if (result_field)
+      return protocol->store(result_field);
+    return Item::send(protocol, buffer);
+  }
+  void save_org_in_field(Field *field)
+  {
+    save_val(field);
+  }
+  void save_in_result_field(bool no_conversions)
+  {
+    save_val(result_field);
+  }
+  Item* get_tmp_table_item(THD *thd_arg);
+
+  /* Following methods make this item transparent as much as possible */
+
+  virtual void print(String *str, enum_query_type query_type);
+  virtual const char *full_name() const { return orig_item->full_name(); }
+  virtual void make_field(Send_field *field) { orig_item->make_field(field); }
+  bool eq(const Item *item, bool binary_cmp) const
+  {
+    Item *it= ((Item *) item)->real_item();
+    return orig_item->eq(it, binary_cmp);
+  }
+  void fix_after_pullout(st_select_lex *new_parent, Item **refptr)
+  {
+    orig_item->fix_after_pullout(new_parent, &orig_item);
+  }
+  int save_in_field(Field *to, bool no_conversions);
+  enum Item_result result_type () const { return orig_item->result_type(); }
+  enum_field_types field_type() const   { return orig_item->field_type(); }
+  table_map used_tables() const { return orig_item->used_tables(); }
+  void update_used_tables() { orig_item->update_used_tables(); }
+  bool const_item() const { return orig_item->const_item(); }
+  table_map not_null_tables() const { return orig_item->not_null_tables(); }
+  bool walk(Item_processor processor, bool walk_subquery, uchar *arg)
+  {
+    return orig_item->walk(processor, walk_subquery, arg) ||
+      (this->*processor)(arg);
+  }
+  bool enumerate_field_refs_processor(uchar *arg)
+  { return orig_item->enumerate_field_refs_processor(arg); }
+  Item_field *filed_for_view_update()
+  { return orig_item->filed_for_view_update(); }
+
+  /* Row emulation: forwarding of ROW-related calls to orig_item */
+  uint cols()
+  { return result_type() == ROW_RESULT ? orig_item->cols() : 1; }
+  Item* element_index(uint i)
+  { return result_type() == ROW_RESULT ? orig_item->element_index(i) : this; }
+  Item** addr(uint i)
+  { return result_type() == ROW_RESULT ? orig_item->addr(i) : 0; }
+  bool check_cols(uint c)
+  {
+    return (result_type() == ROW_RESULT ?
+            orig_item->check_cols(c) :
+            Item::check_cols(c));
+  }
+  bool null_inside()
+  { return result_type() == ROW_RESULT ? orig_item->null_inside() : 0; }
+  void bring_value()
+  {
+    if (result_type() == ROW_RESULT)
+      orig_item->bring_value();
+  }
+  virtual bool is_expensive() { return orig_item->is_expensive(); }
+  bool is_expensive_processor(uchar *arg)
+  { return orig_item->is_expensive_processor(arg); }
+  bool check_vcol_func_processor(uchar *arg)
+  {
+    return trace_unsupported_by_check_vcol_func_processor("cache");
+  }
+};
+
+
 /*
   Class for view fields, the same as Item_direct_ref, but call fix_fields
   of reference if it is not called yet
 */
 class Item_direct_view_ref :public Item_direct_ref
 {
+  Item_equal *item_equal;
+  TABLE_LIST *view;
 public:
   Item_direct_view_ref(Name_resolution_context *context_arg, Item **item,
-                  const char *table_name_arg,
-                  const char *field_name_arg)
-    :Item_direct_ref(context_arg, item, table_name_arg, field_name_arg) {}
+                       const char *table_name_arg,
+                       const char *field_name_arg,
+                       TABLE_LIST *view_arg)
+    :Item_direct_ref(context_arg, item, table_name_arg, field_name_arg),
+    item_equal(0), view(view_arg) {}
   /* Constructor need to process subselect with temporary tables (see Item) */
   Item_direct_view_ref(THD *thd, Item_direct_ref *item)
-    :Item_direct_ref(thd, item) {}
+    :Item_direct_ref(thd, item), item_equal(0) {}
   Item_direct_view_ref(TABLE_LIST *view_arg, Item **item,
                        const char *field_name_arg)
-    :Item_direct_ref(view_arg, item, field_name_arg)
+    :Item_direct_ref(view_arg, item, field_name_arg), item_equal(0)
   {}
 
   bool fix_fields(THD *, Item **);
@@ -2551,6 +2946,26 @@ public:
     return item;
   }
   virtual Ref_Type ref_type() { return VIEW_REF; }
+  Item_equal *get_item_equal() { return item_equal; }
+  void set_item_equal(Item_equal *item_eq) { item_equal= item_eq; }
+  Item_equal *find_item_equal(COND_EQUAL *cond_equal);
+  bool subst_argument_checker(uchar **arg);
+  Item *equal_fields_propagator(uchar *arg);
+  Item *replace_equal_field(uchar *arg);
+  table_map used_tables() const;	
+  table_map not_null_tables() const;
+  bool walk(Item_processor processor, bool walk_subquery, uchar *arg)
+  { 
+    return (*ref)->walk(processor, walk_subquery, arg) ||
+           (this->*processor)(arg);
+  }
+   bool view_used_tables_processor(uchar *arg) 
+  {
+    TABLE_LIST *view_arg= (TABLE_LIST *) arg;
+    if (view_arg == view)
+      view_arg->view_used_tables|= (*ref)->used_tables();
+    return 0;
+  }
 };
 
 
@@ -2600,6 +3015,7 @@ public:
     outer_ref->save_org_in_field(result_field);
   }
   bool fix_fields(THD *, Item **);
+  void fix_after_pullout(st_select_lex *new_parent, Item **ref);
   table_map used_tables() const
   {
     return (*ref)->const_item() ? 0 : OUTER_REF_TABLE_BIT;
@@ -2632,6 +3048,7 @@ public:
 		       const char *table_name_arg, const char *field_name_arg)
     :Item_ref(context_arg, item, table_name_arg, field_name_arg),
      owner(master) {}
+  void save_val(Field *to);
   double val_real();
   longlong val_int();
   String* val_str(String* s);
@@ -2639,15 +3056,7 @@ public:
   bool val_bool();
   bool get_date(MYSQL_TIME *ltime, uint fuzzydate);
   virtual void print(String *str, enum_query_type query_type);
-  /*
-    we add RAND_TABLE_BIT to prevent moving this item from HAVING to WHERE
-  */
-  table_map used_tables() const
-  {
-    return (depended_from ?
-            OUTER_REF_TABLE_BIT :
-            (*ref)->used_tables() | RAND_TABLE_BIT);
-  }
+  table_map used_tables() const;
 };
 
 /*
@@ -2894,6 +3303,28 @@ public:
 };
 
 
+/*
+  Cached_item_XXX objects are not exactly caches. They do the following:
+
+  Each Cached_item_XXX object has
+   - its source item
+   - saved value of the source item
+   - cmp() method that compares the saved value with the current value of the
+     source item, and if they were not equal saves item's value into the saved
+     value.
+*/
+
+/*
+  Cached_item_XXX objects are not exactly caches. They do the following:
+
+  Each Cached_item_XXX object has
+   - its source item
+   - saved value of the source item
+   - cmp() method that compares the saved value with the current value of the
+     source item, and if they were not equal saves item's value into the saved
+     value.
+*/
+
 class Cached_item :public Sql_alloc
 {
 public:
@@ -2950,9 +3381,10 @@ class Cached_item_field :public Cached_item
   uint length;
 
 public:
-  Cached_item_field(Item_field *item)
+  Cached_item_field(Field *arg_field) : field(arg_field)
   {
-    field= item->field;
+    field= arg_field;
+    /* TODO: take the memory allocation below out of the constructor. */
     buff= (uchar*) sql_calloc(length=field->pack_length());
   }
   bool cmp(void);
@@ -3130,6 +3562,13 @@ private:
 };
 
 
+/**
+  @todo
+  Implement the is_null() method for this class. Currently calling is_null()
+  on any Item_cache object resolves to Item::is_null(), which reutns FALSE
+  for any value.
+*/
+
 class Item_cache: public Item_basic_constant
 {
 protected:
@@ -3155,7 +3594,8 @@ public:
     example(0), used_table_map(0), cached_field(0), cached_field_type(MYSQL_TYPE_STRING),
     value_cached(0)
   {
-    fixed= 1; 
+    fixed= 1;
+    maybe_null= 1;
     null_value= 1;
   }
   Item_cache(enum_field_types field_type_arg):
@@ -3163,6 +3603,7 @@ public:
     value_cached(0)
   {
     fixed= 1;
+    maybe_null= 1;
     null_value= 1;
   }
 
@@ -3209,6 +3650,22 @@ public:
 
   virtual void store(Item *item);
   virtual bool cache_value()= 0;
+  bool is_null() { return null_value; }
+  virtual bool is_expensive()
+  {
+    DBUG_ASSERT(example);
+    if (value_cached)
+      return false;
+    return example->is_expensive();
+  }
+  bool is_expensive_processor(uchar *arg)
+  {
+    DBUG_ASSERT(example);
+    if (value_cached)
+      return false;
+    return example->is_expensive_processor(arg);
+  }
+  virtual void set_null();
 };
 
 
@@ -3217,28 +3674,54 @@ class Item_cache_int: public Item_cache
 protected:
   longlong value;
 public:
-  Item_cache_int(): Item_cache(),
+  Item_cache_int(): Item_cache(MYSQL_TYPE_LONGLONG),
     value(0) {}
   Item_cache_int(enum_field_types field_type_arg):
     Item_cache(field_type_arg), value(0) {}
 
-  virtual void store(Item *item){ Item_cache::store(item); }
-  void store_longlong(Item *item, longlong val_arg);
   double val_real();
   longlong val_int();
   String* val_str(String *str);
   my_decimal *val_decimal(my_decimal *);
   enum Item_result result_type() const { return INT_RESULT; }
-  bool result_as_longlong() { return TRUE; }
   bool cache_value();
+  int save_in_field(Field *field, bool no_conversions);
 };
 
 
+class Item_cache_temporal: public Item_cache_int
+{
+public:
+  Item_cache_temporal(enum_field_types field_type_arg):
+    Item_cache_int(field_type_arg)
+  {
+    if (mysql_type_to_time_type(cached_field_type) == MYSQL_TIMESTAMP_ERROR)
+      cached_field_type= MYSQL_TYPE_DATETIME;
+  }
+
+  String* val_str(String *str);
+  bool cache_value();
+  bool get_date(MYSQL_TIME *ltime, uint fuzzydate);
+  int save_in_field(Field *field, bool no_conversions);
+  void store_packed(longlong val_arg);
+  /*
+    Having a clone_item method tells optimizer that this object
+    is a constant and need not be optimized further.
+    Important when storing packed datetime values.
+  */
+  Item *clone_item()
+  {
+    Item_cache_temporal *item= new Item_cache_temporal(cached_field_type);
+    item->store_packed(value);
+    return item;
+  }
+};
+
 class Item_cache_real: public Item_cache
 {
   double value;
 public:
-  Item_cache_real(): Item_cache(),
+  Item_cache_real(): Item_cache(MYSQL_TYPE_DOUBLE),
     value(0) {}
 
   double val_real();
@@ -3255,7 +3738,7 @@ class Item_cache_decimal: public Item_cache
 protected:
   my_decimal decimal_value;
 public:
-  Item_cache_decimal(): Item_cache() {}
+  Item_cache_decimal(): Item_cache(MYSQL_TYPE_NEWDECIMAL) {}
 
   double val_real();
   longlong val_int();
@@ -3357,6 +3840,7 @@ public:
     DBUG_VOID_RETURN;
   }
   bool cache_value();
+  virtual void set_null();
 };
 
 
@@ -3403,7 +3887,8 @@ void mark_select_range_as_dependent(THD *thd,
                                     Field *found_field, Item *found_item,
                                     Item_ident *resolved_item);
 
-extern Cached_item *new_Cached_item(THD *thd, Item *item);
+extern Cached_item *new_Cached_item(THD *thd, Item *item,
+                                    bool pass_through_ref);
 extern Item_result item_cmp_type(Item_result a,Item_result b);
 extern void resolve_const_item(THD *thd, Item **ref, Item *cmp_item);
 extern int stored_field_cmp_to_item(THD *thd, Field *field, Item *item);
diff --git a/sql/item_buff.cc b/sql/item_buff.cc
index 798756ed9ef..f09ca08cc84 100644
--- a/sql/item_buff.cc
+++ b/sql/item_buff.cc
@@ -29,11 +29,15 @@
   Create right type of Cached_item for an item.
 */
 
-Cached_item *new_Cached_item(THD *thd, Item *item)
+Cached_item *new_Cached_item(THD *thd, Item *item, bool pass_through_ref)
 {
-  if (item->real_item()->type() == Item::FIELD_ITEM &&
+  if (pass_through_ref && item->real_item()->type() == Item::FIELD_ITEM &&
       !(((Item_field *) (item->real_item()))->field->flags & BLOB_FLAG))
-    return new Cached_item_field((Item_field *) (item->real_item()));
+  {
+    Item_field *real_item= (Item_field *) item->real_item();
+    Field *cached_field= real_item->field;
+    return new Cached_item_field(cached_field);
+  }
   switch (item->result_type()) {
   case STRING_RESULT:
     return new Cached_item_str(thd, (Item_field *) item);
@@ -119,14 +123,20 @@ bool Cached_item_int::cmp(void)
 
 bool Cached_item_field::cmp(void)
 {
-  bool tmp= field->cmp(buff) != 0;		// This is not a blob!
-  if (tmp)
-    field->get_image(buff,length,field->charset());
+  bool tmp= FALSE;                              // Value is identical
+  /* Note that field can't be a blob here ! */
   if (null_value != field->is_null())
   {
     null_value= !null_value;
-    tmp=TRUE;
+    tmp= TRUE;                                  // Value has changed
   }
+
+  /*
+    If value is not null and value changed (from null to not null or
+    becasue of value change), then copy the new value to buffer.
+    */
+  if (! null_value && (tmp || (tmp= (field->cmp(buff) != 0))))
+    field->get_image(buff,length,field->charset());
   return tmp;
 }
 
diff --git a/sql/item_cmpfunc.cc b/sql/item_cmpfunc.cc
index c4d56a93af6..1d9be2d60ec 100644
--- a/sql/item_cmpfunc.cc
+++ b/sql/item_cmpfunc.cc
@@ -29,11 +29,6 @@
 #include <m_ctype.h>
 #include "sql_select.h"
 
-static bool convert_constant_item(THD *, Item_field *, Item **);
-static longlong
-get_year_value(THD *thd, Item ***item_arg, Item **cache_arg,
-               Item *warn_item, bool *is_null);
-
 static Item_result item_store_type(Item_result a, Item *item,
                                    my_bool unsigned_flag)
 {
@@ -76,6 +71,30 @@ static void agg_result_type(Item_result *type, Item **items, uint nitems)
 }
 
 
+/**
+  find an temporal type (item) that others will be converted to
+  for the purpose of comparison.
+
+  this is the type that will be used in warnings like
+  "Incorrect <<TYPE>> value".
+*/
+Item *find_date_time_item(Item **args, uint nargs, uint col)
+{
+  Item *date_arg= 0, **arg, **arg_end;
+  for (arg= args, arg_end= args + nargs; arg != arg_end ; arg++)
+  {
+    Item *item= arg[0]->element_index(col);
+    if (item->cmp_type() != TIME_RESULT)
+      continue;
+    if (item->field_type() == MYSQL_TYPE_DATETIME)
+      return item;
+    if (!date_arg)
+      date_arg= item;
+  }
+  return date_arg;
+}
+
+
 /*
   Compare row signature of two expressions
 
@@ -138,10 +157,10 @@ static int cmp_row_type(Item* item1, Item* item2)
 static int agg_cmp_type(Item_result *type, Item **items, uint nitems)
 {
   uint i;
-  type[0]= items[0]->result_type();
+  type[0]= items[0]->cmp_type();
   for (i= 1 ; i < nitems ; i++)
   {
-    type[0]= item_cmp_type(type[0], items[i]->result_type());
+    type[0]= item_cmp_type(type[0], items[i]->cmp_type());
     /*
       When aggregating types of two row expressions we have to check
       that they have the same cardinality and that each component
@@ -207,7 +226,7 @@ static uint collect_cmp_types(Item **items, uint nitems, bool skip_nulls= FALSE)
 {
   uint i;
   uint found_types;
-  Item_result left_result= items[0]->result_type();
+  Item_result left_result= items[0]->cmp_type();
   DBUG_ASSERT(nitems > 1);
   found_types= 0;
   for (i= 1; i < nitems ; i++)
@@ -215,11 +234,11 @@ static uint collect_cmp_types(Item **items, uint nitems, bool skip_nulls= FALSE)
     if (skip_nulls && items[i]->type() == Item::NULL_ITEM)
       continue; // Skip NULL constant items
     if ((left_result == ROW_RESULT || 
-         items[i]->result_type() == ROW_RESULT) &&
+         items[i]->cmp_type() == ROW_RESULT) &&
         cmp_row_type(items[0], items[i]))
       return 0;
     found_types|= 1<< (uint)item_cmp_type(left_result,
-                                           items[i]->result_type());
+                                           items[i]->cmp_type());
   }
   /*
    Even if all right-hand items are NULLs and we are skipping them all, we need
@@ -420,12 +439,29 @@ longlong Item_func_nop_all::val_int()
     1  Item was replaced with an integer version of the item
 */
 
-static bool convert_constant_item(THD *thd, Item_field *field_item,
+static bool convert_const_to_int(THD *thd, Item_field *field_item,
                                   Item **item)
 {
   Field *field= field_item->field;
   int result= 0;
 
+  /*
+    We don't need to convert an integer to an integer,
+    pretend it's already converted.
+
+    But we still convert it if it is compared with a Field_year,
+    as YEAR(2) may change the value of an integer when converting it
+    to an integer (say, 0 to 70).
+
+    As a special hack, to avoid reevaluation of stored routines
+    where 5.2 didn't reevaluate them, we "convert" for BIGINT too.
+    In 5.5 it isn't necessary, as it caches constant expressions correctly.
+  */
+  if ((*item)->cmp_type() == INT_RESULT &&
+      field_item->field_type() != MYSQL_TYPE_YEAR &&
+      field_item->field_type() != MYSQL_TYPE_LONGLONG)
+    return 1;
+
   if ((*item)->const_item())
   {
     TABLE *table= field->table;
@@ -447,14 +483,12 @@ static bool convert_constant_item(THD *thd, Item_field *field_item,
     thd->count_cuted_fields= CHECK_FIELD_IGNORE;
 
     /*
-      Store the value of the field/constant if it references an outer field
-      because the call to save_in_field below overrides that value.
-      Don't save field value if no data has been read yet.
-      Outer constant values are always saved.
+      Store the value of the field/constant because the call to save_in_field
+      below overrides that value. Don't save field value if no data has been
+      read yet.
     */
-    bool save_field_value= (field_item->depended_from &&
-                            (field_item->const_item() ||
-                             !(field->table->status & STATUS_NO_RECORD)));
+    bool save_field_value= (field_item->const_item() ||
+                            !(field->table->status & STATUS_NO_RECORD));
     if (save_field_value)
       orig_field_val= field->val_int();
     if (!(*item)->is_null() && !(*item)->save_in_field(field, 1))
@@ -484,7 +518,6 @@ static bool convert_constant_item(THD *thd, Item_field *field_item,
 void Item_bool_func2::fix_length_and_dec()
 {
   max_length= 1;				     // Function returns 0 or 1
-  THD *thd;
 
   /*
     As some compare functions are generated after sql_yacc,
@@ -505,7 +538,6 @@ void Item_bool_func2::fix_length_and_dec()
     to the collation of A.
   */
 
-  
   DTCollation coll;
   if (args[0]->result_type() == STRING_RESULT &&
       args[1]->result_type() == STRING_RESULT &&
@@ -514,48 +546,28 @@ void Item_bool_func2::fix_length_and_dec()
     
   args[0]->cmp_context= args[1]->cmp_context=
     item_cmp_type(args[0]->result_type(), args[1]->result_type());
-  // Make a special case of compare with fields to get nicer DATE comparisons
 
-  if (functype() == LIKE_FUNC)  // Disable conversion in case of LIKE function.
-  {
-    set_cmp_func();
-    return;
-  }
+  /*
+    Make a special case of compare with fields to get nicer comparisons
+    of bigint numbers with constant string.
+    This directly contradicts the manual (number and a string should
+    be compared as doubles), but seems to provide more
+    "intuitive" behavior in some cases (but less intuitive in others).
 
-  thd= current_thd;
-  if (!thd->lex->is_ps_or_view_context_analysis())
+    But disable conversion in case of LIKE function.
+  */
+  THD *thd= current_thd;
+  if (functype() != LIKE_FUNC && !thd->lex->is_ps_or_view_context_analysis())
   {
-    if (args[0]->real_item()->type() == FIELD_ITEM)
+    int field;
+    if (args[field= 0]->real_item()->type() == FIELD_ITEM ||
+        args[field= 1]->real_item()->type() == FIELD_ITEM)
     {
-      Item_field *field_item= (Item_field*) (args[0]->real_item());
-      if (field_item->field->can_be_compared_as_longlong() &&
-          !(field_item->is_datetime() &&
-            args[1]->result_type() == STRING_RESULT))
-      {
-        if (convert_constant_item(thd, field_item, &args[1]))
-        {
-          cmp.set_cmp_func(this, tmp_arg, tmp_arg+1,
-                           INT_RESULT);		// Works for all types.
-          args[0]->cmp_context= args[1]->cmp_context= INT_RESULT;
-          return;
-        }
-      }
-    }
-    if (args[1]->real_item()->type() == FIELD_ITEM)
-    {
-      Item_field *field_item= (Item_field*) (args[1]->real_item());
-      if (field_item->field->can_be_compared_as_longlong() &&
-          !(field_item->is_datetime() &&
-            args[0]->result_type() == STRING_RESULT))
-      {
-        if (convert_constant_item(thd, field_item, &args[0]))
-        {
-          cmp.set_cmp_func(this, tmp_arg, tmp_arg+1,
-                           INT_RESULT); // Works for all types.
-          args[0]->cmp_context= args[1]->cmp_context= INT_RESULT;
-          return;
-        }
-      }
+      Item_field *field_item= (Item_field*) (args[field]->real_item());
+      if ((field_item->field_type() ==  MYSQL_TYPE_LONGLONG ||
+           field_item->field_type() ==  MYSQL_TYPE_YEAR) &&
+          convert_const_to_int(thd, field_item, &args[!field]))
+        args[0]->cmp_context= args[1]->cmp_context= INT_RESULT;
     }
   }
   set_cmp_func();
@@ -569,6 +581,8 @@ int Arg_comparator::set_compare_func(Item_result_field *item, Item_result type)
                          [is_owner_equal_func()];
 
   switch (type) {
+  case TIME_RESULT:
+    break;
   case ROW_RESULT:
   {
     uint n= (*a)->cols();
@@ -662,8 +676,9 @@ int Arg_comparator::set_compare_func(Item_result_field *item, Item_result type)
     }
     break;
   }
-  default:
+  case IMPOSSIBLE_RESULT:
     DBUG_ASSERT(0);
+    break;
   }
   return 0;
 }
@@ -677,12 +692,14 @@ int Arg_comparator::set_compare_func(Item_result_field *item, Item_result type)
   @param[in]   warn_name  Field name for issuing the warning
   @param[out]  l_time     The MYSQL_TIME objects is initialized.
 
-  Parses a date provided in the string str into a MYSQL_TIME object. If the
-  string contains an incorrect date or doesn't correspond to a date at all
-  then a warning is issued. The warn_type and the warn_name arguments are used
-  as the name and the type of the field when issuing the warning. If any input
-  was discarded (trailing or non-timestamp-y characters), return value will be
-  TRUE.
+  Parses a date provided in the string str into a MYSQL_TIME object.
+  The date is used for comparison, that is fuzzy dates are allowed
+  independently of sql_mode.
+  If the string contains an incorrect date or doesn't correspond to a date at
+  all then a warning is issued. The warn_type and the warn_name arguments are
+  used as the name and the type of the field when issuing the warning. If any
+  input was discarded (trailing or non-timestamp-y characters), return value
+  will be TRUE.
 
   @return Status flag
   @retval FALSE Success.
@@ -695,16 +712,15 @@ bool get_mysql_time_from_str(THD *thd, String *str, timestamp_type warn_type,
   bool value;
   int error;
   enum_mysql_timestamp_type timestamp_type;
+  int flags= TIME_FUZZY_DATE | MODE_INVALID_DATES;
+
+  if (warn_type == MYSQL_TIMESTAMP_TIME)
+    flags|= TIME_TIME_ONLY;
 
   timestamp_type= 
-    str_to_datetime(str->ptr(), str->length(), l_time,
-                    (TIME_FUZZY_DATE | MODE_INVALID_DATES |
-                     (thd->variables.sql_mode &
-                      (MODE_NO_ZERO_IN_DATE | MODE_NO_ZERO_DATE))),
-                    &error);
-
-  if (timestamp_type == MYSQL_TIMESTAMP_DATETIME || 
-      timestamp_type == MYSQL_TIMESTAMP_DATE)
+    str_to_datetime(str->ptr(), str->length(), l_time, flags, &error);
+
+  if (timestamp_type > MYSQL_TIMESTAMP_ERROR)
     /*
       Do not return yet, we may still want to throw a "trailing garbage"
       warning.
@@ -726,264 +742,31 @@ bool get_mysql_time_from_str(THD *thd, String *str, timestamp_type warn_type,
 
 
 /**
-  @brief Convert date provided in a string to the int representation.
-
-  @param[in]   thd        thread handle
-  @param[in]   str        a string to convert
-  @param[in]   warn_type  type of the timestamp for issuing the warning
-  @param[in]   warn_name  field name for issuing the warning
-  @param[out]  error_arg  could not extract a DATE or DATETIME
-
-  @details Convert date provided in the string str to the int
-    representation.  If the string contains wrong date or doesn't
-    contain it at all then a warning is issued.  The warn_type and
-    the warn_name arguments are used as the name and the type of the
-    field when issuing the warning.
-
-  @return
-    converted value. 0 on error and on zero-dates -- check 'failure'
-*/
-static ulonglong get_date_from_str(THD *thd, String *str, 
-                                   timestamp_type warn_type, 
-                                   const char *warn_name, bool *error_arg)
-{
-  MYSQL_TIME l_time;
-  *error_arg= get_mysql_time_from_str(thd, str, warn_type, warn_name, &l_time);
-
-  if (*error_arg)
-    return 0;
-  return TIME_to_ulonglong_datetime(&l_time);
-}
-
-
-/*
-  Check whether compare_datetime() can be used to compare items.
-
-  SYNOPSIS
-    Arg_comparator::can_compare_as_dates()
-    a, b          [in]  items to be compared
-    const_value   [out] converted value of the string constant, if any
-
-  DESCRIPTION
-    Check several cases when the DATE/DATETIME comparator should be used.
-    The following cases are checked:
-      1. Both a and b is a DATE/DATETIME field/function returning string or
-         int result.
-      2. Only a or b is a DATE/DATETIME field/function returning string or
-         int result and the other item (b or a) is an item with string result.
-         If the second item is a constant one then it's checked to be
-         convertible to the DATE/DATETIME type. If the constant can't be
-         converted to a DATE/DATETIME then the compare_datetime() comparator
-         isn't used and the warning about wrong DATE/DATETIME value is issued.
-      In all other cases (date-[int|real|decimal]/[int|real|decimal]-date)
-      the comparison is handled by other comparators.
-    If the datetime comparator can be used and one the operands of the
-    comparison is a string constant that was successfully converted to a
-    DATE/DATETIME type then the result of the conversion is returned in the
-    const_value if it is provided.  If there is no constant or
-    compare_datetime() isn't applicable then the *const_value remains
-    unchanged.
-
-  RETURN
-    the found type of date comparison
-*/
-
-enum Arg_comparator::enum_date_cmp_type
-Arg_comparator::can_compare_as_dates(Item *a, Item *b, ulonglong *const_value)
-{
-  enum enum_date_cmp_type cmp_type= CMP_DATE_DFLT;
-  Item *str_arg= 0, *date_arg= 0;
-
-  if (a->type() == Item::ROW_ITEM || b->type() == Item::ROW_ITEM)
-    return CMP_DATE_DFLT;
-
-  if (a->is_datetime())
-  {
-    if (b->is_datetime())
-      cmp_type= CMP_DATE_WITH_DATE;
-    else if (b->result_type() == STRING_RESULT)
-    {
-      cmp_type= CMP_DATE_WITH_STR;
-      date_arg= a;
-      str_arg= b;
-    }
-  }
-  else if (b->is_datetime() && a->result_type() == STRING_RESULT)
-  {
-    cmp_type= CMP_STR_WITH_DATE;
-    date_arg= b;
-    str_arg= a;
-  }
-
-  if (cmp_type != CMP_DATE_DFLT)
-  {
-    THD *thd= current_thd;
-    /*
-      Do not cache GET_USER_VAR() function as its const_item() may return TRUE
-      for the current thread but it still may change during the execution.
-      Don't use cache while in the context analysis mode only (i.e. for 
-      EXPLAIN/CREATE VIEW and similar queries). Cache is useless in such 
-      cases and can cause problems. For example evaluating subqueries can 
-      confuse storage engines since in context analysis mode tables 
-      aren't locked.
-    */
-    if (!thd->lex->is_ps_or_view_context_analysis() &&
-        cmp_type != CMP_DATE_WITH_DATE && str_arg->const_item() &&
-        (str_arg->type() != Item::FUNC_ITEM ||
-        ((Item_func*)str_arg)->functype() != Item_func::GUSERVAR_FUNC))
-    {
-      ulonglong value;
-      bool error;
-      String tmp, *str_val= 0;
-      timestamp_type t_type= (date_arg->field_type() == MYSQL_TYPE_DATE ?
-                              MYSQL_TIMESTAMP_DATE : MYSQL_TIMESTAMP_DATETIME);
-
-      str_val= str_arg->val_str(&tmp);
-      if (str_arg->null_value)
-        return CMP_DATE_DFLT;
-      value= get_date_from_str(thd, str_val, t_type, date_arg->name, &error);
-      if (error)
-        return CMP_DATE_DFLT;
-      if (const_value)
-        *const_value= value;
-    }
-  }
-  return cmp_type;
-}
+  Prepare the comparator (set the comparison function) for comparing
+  items *a1 and *a2 in the context of 'type'.
 
-/*
-  Retrieves correct TIME value from the given item.
+  @param[in]      owner_arg  Item, peforming the comparison (e.g. Item_func_eq)
+  @param[in,out]  a1         first argument to compare
+  @param[in,out]  a2         second argument to compare
+  @param[in]      type       type context to compare in
 
-  SYNOPSIS
-    get_time_value()
-    thd                 thread handle
-    item_arg   [in/out] item to retrieve TIME value from
-    cache_arg  [in/out] pointer to place to store the cache item to
-    warn_item  [in]     unused
-    is_null    [out]    TRUE <=> the item_arg is null
-
-  DESCRIPTION
-    Retrieves the correct TIME value from given item for comparison by the
-    compare_datetime() function.
-    If item's result can be compared as longlong then its int value is used
-    and a value returned by get_time function is used otherwise.
-    If an item is a constant one then its value is cached and it isn't
-    get parsed again. An Item_cache_int object is used for for cached values.
-    It seamlessly substitutes the original item.  The cache item is marked as
-    non-constant to prevent re-caching it again.
-
-  RETURN
-    obtained value
+  Both *a1 and *a2 can be replaced by this method - typically by constant
+  items, holding the cached converted value of the original (constant) item.
 */
 
-longlong
-get_time_value(THD *thd, Item ***item_arg, Item **cache_arg,
-               Item *warn_item, bool *is_null)
-{
-  longlong value;
-  Item *item= **item_arg;
-  MYSQL_TIME ltime;
-
-  if (item->result_as_longlong())
-  {
-    value= item->val_int();
-    *is_null= item->null_value;
-  }
-  else
-  {
-    *is_null= item->get_time(&ltime);
-    value= !*is_null ? (longlong) TIME_to_ulonglong_datetime(&ltime) : 0;
-  }
-  /*
-    Do not cache GET_USER_VAR() function as its const_item() may return TRUE
-    for the current thread but it still may change during the execution.
-  */
-  if (item->const_item() && cache_arg && (item->type() != Item::FUNC_ITEM ||
-      ((Item_func*)item)->functype() != Item_func::GUSERVAR_FUNC))
-  {
-    Query_arena backup;
-    Query_arena *save_arena= thd->switch_to_arena_for_cached_items(&backup);
-    Item_cache_int *cache= new Item_cache_int();
-    if (save_arena)
-      thd->set_query_arena(save_arena);
-
-    /* Mark the cache as non-const to prevent re-caching. */
-    cache->set_used_tables(1);
-    cache->store_longlong(item, value);
-    *cache_arg= cache;
-    *item_arg= cache_arg;
-  }
-  return value;
-}
-
-
 int Arg_comparator::set_cmp_func(Item_result_field *owner_arg,
                                         Item **a1, Item **a2,
                                         Item_result type)
 {
-  ulonglong const_value= (ulonglong)-1;
   thd= current_thd;
   owner= owner_arg;
   set_null= set_null && owner_arg;
   a= a1;
   b= a2;
-  thd= current_thd;
-
-  if (can_compare_as_dates(*a, *b, &const_value))
-  {
-    a_type= (*a)->field_type();
-    b_type= (*b)->field_type();
-    a_cache= 0;
-    b_cache= 0;
 
-    if (const_value != (ulonglong)-1)
-    {
-      /*
-        cache_converted_constant can't be used here because it can't
-        correctly convert a DATETIME value from string to int representation.
-      */
-      Query_arena backup;
-      Query_arena *save_arena= thd->switch_to_arena_for_cached_items(&backup);
-      Item_cache_int *cache= new Item_cache_int(MYSQL_TYPE_DATETIME);
-      if (save_arena)
-        thd->set_query_arena(save_arena);
-
-      /* Mark the cache as non-const to prevent re-caching. */
-      cache->set_used_tables(1);
-      if (!(*a)->is_datetime())
-      {
-        cache->store_longlong((*a), const_value);
-        a_cache= cache;
-        a= (Item **)&a_cache;
-      }
-      else
-      {
-        cache->store_longlong((*b), const_value);
-        b_cache= cache;
-        b= (Item **)&b_cache;
-      }
-    }
-    is_nulls_eq= is_owner_equal_func();
-    func= &Arg_comparator::compare_datetime;
-    get_value_a_func= &get_datetime_value;
-    get_value_b_func= &get_datetime_value;
-    return 0;
-  }
-  else if (type == STRING_RESULT && (*a)->field_type() == MYSQL_TYPE_TIME &&
-           (*b)->field_type() == MYSQL_TYPE_TIME)
-  {
-    /* Compare TIME values as integers. */
-    a_cache= 0;
-    b_cache= 0;
-    is_nulls_eq= is_owner_equal_func();
-    func= &Arg_comparator::compare_datetime;
-    get_value_a_func= &get_time_value;
-    get_value_b_func= &get_time_value;
-    return 0;
-  }
-  else if (type == STRING_RESULT &&
-           (*a)->result_type() == STRING_RESULT &&
-           (*b)->result_type() == STRING_RESULT)
+  if (type == STRING_RESULT &&
+      (*a)->result_type() == STRING_RESULT &&
+      (*b)->result_type() == STRING_RESULT)
   {
     DTCollation coll;
     coll.set((*a)->collation.collation);
@@ -991,8 +774,10 @@ int Arg_comparator::set_cmp_func(Item_result_field *owner_arg,
                                b, 1, MY_COLL_CMP_CONV, 1))
       return 1;
   }
-  else if (try_year_cmp_func(type))
-    return 0;
+  if (type == INT_RESULT &&
+      (*a)->field_type() == MYSQL_TYPE_YEAR &&
+      (*b)->field_type() == MYSQL_TYPE_YEAR)
+    type= TIME_RESULT;
 
   a= cache_converted_constant(thd, a, &a_cache, type);
   b= cache_converted_constant(thd, b, &b_cache, type);
@@ -1000,45 +785,6 @@ int Arg_comparator::set_cmp_func(Item_result_field *owner_arg,
 }
 
 
-/*
-  Helper function to call from Arg_comparator::set_cmp_func()
-*/
-
-bool Arg_comparator::try_year_cmp_func(Item_result type)
-{
-  if (type == ROW_RESULT)
-    return FALSE;
-
-  bool a_is_year= (*a)->field_type() == MYSQL_TYPE_YEAR;
-  bool b_is_year= (*b)->field_type() == MYSQL_TYPE_YEAR;
-
-  if (!a_is_year && !b_is_year)
-    return FALSE;
-
-  if (a_is_year && b_is_year)
-  {
-    get_value_a_func= &get_year_value;
-    get_value_b_func= &get_year_value;
-  }
-  else if (a_is_year && (*b)->is_datetime())
-  {
-    get_value_a_func= &get_year_value;
-    get_value_b_func= &get_datetime_value;
-  }
-  else if (b_is_year && (*a)->is_datetime())
-  {
-    get_value_b_func= &get_year_value;
-    get_value_a_func= &get_datetime_value;
-  }
-  else
-    return FALSE;
-
-  is_nulls_eq= is_owner_equal_func();
-  func= &Arg_comparator::compare_datetime;
-
-  return TRUE;
-}
-
 /**
   Convert and cache a constant.
 
@@ -1060,9 +806,14 @@ Item** Arg_comparator::cache_converted_constant(THD *thd_arg, Item **value,
                                                 Item **cache_item,
                                                 Item_result type)
 {
-  /* Don't need cache if doing context analysis only. */
+  /*
+    Don't need cache if doing context analysis only.
+    Also, get_datetime_value creates Item_cache internally.
+    Unless fixed, we should not do it here.
+  */
   if (!thd->lex->is_ps_or_view_context_analysis() &&
-      (*value)->const_item() && type != (*value)->result_type())
+      (*value)->const_item() && type != (*value)->result_type() &&
+      type != TIME_RESULT)
   {
     Item_cache *cache= Item_cache::get_cache(*value, type);
     cache->setup(*value);
@@ -1080,112 +831,88 @@ void Arg_comparator::set_datetime_cmp_func(Item_result_field *owner_arg,
   owner= owner_arg;
   a= a1;
   b= b1;
-  a_type= (*a)->field_type();
-  b_type= (*b)->field_type();
   a_cache= 0;
   b_cache= 0;
-  is_nulls_eq= FALSE;
-  func= &Arg_comparator::compare_datetime;
-  get_value_a_func= &get_datetime_value;
-  get_value_b_func= &get_datetime_value;
+  func= comparator_matrix[TIME_RESULT][is_owner_equal_func()];
 }
 
-
-/*
+/**
   Retrieves correct DATETIME value from given item.
 
-  SYNOPSIS
-    get_datetime_value()
-    thd                 thread handle
-    item_arg   [in/out] item to retrieve DATETIME value from
-    cache_arg  [in/out] pointer to place to store the caching item to
-    warn_item  [in]     item for issuing the conversion warning
-    is_null    [out]    TRUE <=> the item_arg is null
+  @param[in]     thd         thread handle
+  @param[in,out] item_arg    item to retrieve DATETIME value from
+  @param[in,out] cache_arg   pointer to place to store the caching item to
+  @param[in]     warn_item   item for issuing the conversion warning
+  @param[out]    is_null     TRUE <=> the item_arg is null
 
-  DESCRIPTION
+  @details
     Retrieves the correct DATETIME value from given item for comparison by the
     compare_datetime() function.
-    If item's result can be compared as longlong then its int value is used
-    and its string value is used otherwise. Strings are always parsed and
-    converted to int values by the get_date_from_str() function.
-    This allows us to compare correctly string dates with missed insignificant
-    zeros. If an item is a constant one then its value is cached and it isn't
-    get parsed again. An Item_cache_int object is used for caching values. It
-    seamlessly substitutes the original item.  The cache item is marked as
-    non-constant to prevent re-caching it again.  In order to compare
-    correctly DATE and DATETIME items the result of the former are treated as
-    a DATETIME with zero time (00:00:00).
 
-  RETURN
-    obtained value
+    If the value should be compared as time (TIME_RESULT), it's retrieved as
+    MYSQL_TIME. Otherwise it's read as a number/string and converted to time.
+    Constant items are cached, so the convertion is only done once for them.
+
+    Note the f_type behavior: if the item can be compared as time, then
+    f_type is this item's field_type(). Otherwise it's field_type() of
+    warn_item (which is the other operand of the comparison operator).
+    This logic provides correct string/number to date/time conversion
+    depending on the other operand (when comparing a string with a date, it's
+    parsed as a date, when comparing a string with a time it's parsed as a time)
+
+    If the item is a constant it is replaced by the Item_cache_int, that
+    holds the packed datetime value.
+
+  @return
+    MYSQL_TIME value, packed in a longlong, suitable for comparison.
 */
 
 longlong
 get_datetime_value(THD *thd, Item ***item_arg, Item **cache_arg,
                    Item *warn_item, bool *is_null)
 {
-  longlong value= 0;
-  String buf, *str= 0;
+  longlong UNINIT_VAR(value);
   Item *item= **item_arg;
+  enum_field_types f_type= item->cmp_type() == TIME_RESULT ?
+                           item->field_type() : warn_item->field_type();
 
-  if (item->result_as_longlong())
+  if (item->result_type() == INT_RESULT && item->cmp_type() == TIME_RESULT)
   {
+    /* it's our Item_cache_int, as created below */
     value= item->val_int();
-    *is_null= item->null_value;
-    enum_field_types f_type= item->field_type();
-    /*
-      Item_date_add_interval may return MYSQL_TYPE_STRING as the result
-      field type. To detect that the DATE value has been returned we
-      compare it with 100000000L - any DATE value should be less than it.
-      Don't shift cached DATETIME values up for the second time.
-    */
-    if (f_type == MYSQL_TYPE_DATE ||
-        (f_type != MYSQL_TYPE_DATETIME && value < 100000000L))
-      value*= 1000000L;
   }
   else
   {
-    str= item->val_str(&buf);
-    *is_null= item->null_value;
+    MYSQL_TIME ltime;
+    uint fuzzydate= TIME_FUZZY_DATE | TIME_INVALID_DATES;
+    if (f_type == MYSQL_TYPE_TIME)
+      fuzzydate|= TIME_TIME_ONLY;
+    if (item->get_date(&ltime, fuzzydate))
+      value= 0; /* invalid date */
+    else
+      value= pack_time(&ltime);
   }
-  if (*is_null)
+  if ((*is_null= item->null_value))
     return ~(ulonglong) 0;
-  /*
-    Convert strings to the integer DATE/DATETIME representation.
-    Even if both dates provided in strings we can't compare them directly as
-    strings as there is no warranty that they are correct and do not miss
-    some insignificant zeros.
-  */
-  if (str)
+  if (cache_arg && item->const_item() && item->type() != Item::CACHE_ITEM)
   {
-    bool error;
-    enum_field_types f_type= warn_item->field_type();
-    timestamp_type t_type= f_type ==
-      MYSQL_TYPE_DATE ? MYSQL_TIMESTAMP_DATE : MYSQL_TIMESTAMP_DATETIME;
-    value= (longlong) get_date_from_str(thd, str, t_type, warn_item->name, &error);
     /*
-      If str did not contain a valid date according to the current
-      SQL_MODE, get_date_from_str() has already thrown a warning,
-      and we don't want to throw NULL on invalid date (see 5.2.6
-      "SQL modes" in the manual), so we're done here.
+      cache the packed datetime value in the Item_cache object.
+      Because the packed datetime value is longlong, we use Item_cache_int,
+      and it has result_type() == INT_RESULT.
+      But we create it to have field_type() == MYSQL_TYPE_TIME (or
+      MYSQL_TIMESTAMP_DATE or MYSQL_TYPE_DATETIME), and thus it will have
+      cmp_type() == TIME_RESULT.
+      As no other item can have this combination of cmp_type() and
+      result_type(), it allows us to identify our cache items.
     */
-  }
-  /*
-    Do not cache GET_USER_VAR() function as its const_item() may return TRUE
-    for the current thread but it still may change during the execution.
-  */
-  if (item->const_item() && cache_arg && (item->type() != Item::FUNC_ITEM ||
-      ((Item_func*)item)->functype() != Item_func::GUSERVAR_FUNC))
-  {
     Query_arena backup;
     Query_arena *save_arena= thd->switch_to_arena_for_cached_items(&backup);
-    Item_cache_int *cache= new Item_cache_int(MYSQL_TYPE_DATETIME);
+    Item_cache_temporal *cache= new Item_cache_temporal(f_type);
     if (save_arena)
       thd->set_query_arena(save_arena);
-      
-    /* Mark the cache as non-const to prevent re-caching. */
-    cache->set_used_tables(1);
-    cache->store_longlong(item, value);
+
+    cache->store_packed(value);
     *cache_arg= cache;
     *item_arg= cache_arg;
   }
@@ -1194,67 +921,6 @@ get_datetime_value(THD *thd, Item ***item_arg, Item **cache_arg,
 
 
 /*
-  Retrieves YEAR value of 19XX-00-00 00:00:00 form from given item.
-
-  SYNOPSIS
-    get_year_value()
-    thd                 thread handle
-    item_arg   [in/out] item to retrieve YEAR value from
-    cache_arg  [in/out] pointer to place to store the caching item to
-    warn_item  [in]     item for issuing the conversion warning
-    is_null    [out]    TRUE <=> the item_arg is null
-
-  DESCRIPTION
-    Retrieves the YEAR value of 19XX form from given item for comparison by the
-    compare_datetime() function.
-    Converts year to DATETIME of form YYYY-00-00 00:00:00 for the compatibility
-    with the get_datetime_value function result.
-
-  RETURN
-    obtained value
-*/
-
-static longlong
-get_year_value(THD *thd, Item ***item_arg, Item **cache_arg,
-               Item *warn_item, bool *is_null)
-{
-  longlong value= 0;
-  Item *item= **item_arg;
-
-  value= item->val_int();
-  *is_null= item->null_value;
-  if (*is_null)
-    return ~(ulonglong) 0;
-
-  /*
-    Coerce value to the 19XX form in order to correctly compare
-    YEAR(2) & YEAR(4) types.
-    Here we are converting all item values but YEAR(4) fields since
-      1) YEAR(4) already has a regular YYYY form and
-      2) we don't want to convert zero/bad YEAR(4) values to the
-         value of 2000.
-  */
-  Item *real_item= item->real_item();
-  Field *field= NULL;
-  if (real_item->type() == Item::FIELD_ITEM)
-    field= ((Item_field *)real_item)->field;
-  else if (real_item->type() == Item::CACHE_ITEM)
-    field= ((Item_cache *)real_item)->field();
-  if (!(field && field->type() == MYSQL_TYPE_YEAR && field->field_length == 4))
-  {
-    if (value < 70)
-      value+= 100;
-    if (value <= 1900)
-      value+= 1900;
-  }
-  /* Convert year to DATETIME of form YYYY-00-00 00:00:00 (YYYY0000000000). */
-  value*= 10000000000LL;
-
-  return value;
-}
-
-
-/*
   Compare items values as dates.
 
   SYNOPSIS
@@ -1266,18 +932,9 @@ get_year_value(THD *thd, Item ***item_arg, Item **cache_arg,
     with help of the get_datetime_value() function.
 
   RETURN
-    If is_nulls_eq is TRUE:
-       1    if items are equal or both are null
-       0    otherwise
-    If is_nulls_eq is FALSE:
       -1   a < b or at least one item is null
        0   a == b
        1   a > b
-    See the table:
-    is_nulls_eq | 1 | 1 | 1 | 1 | 0 | 0 | 0 | 0 |
-    a_is_null   | 1 | 0 | 1 | 0 | 1 | 0 | 1 | 0 |
-    b_is_null   | 1 | 1 | 0 | 0 | 1 | 1 | 0 | 0 |
-    result      | 1 | 0 | 0 |0/1|-1 |-1 |-1 |-1/0/1|
 */
 
 int Arg_comparator::compare_datetime()
@@ -1285,34 +942,40 @@ int Arg_comparator::compare_datetime()
   bool a_is_null, b_is_null;
   longlong a_value, b_value;
 
+  if (set_null)
+    owner->null_value= 1;
+
   /* Get DATE/DATETIME/TIME value of the 'a' item. */
-  a_value= (*get_value_a_func)(thd, &a, &a_cache, *b, &a_is_null);
-  if (!is_nulls_eq && a_is_null)
-  {
-    if (set_null)
-      owner->null_value= 1;
+  a_value= get_datetime_value(thd, &a, &a_cache, *b, &a_is_null);
+  if (a_is_null)
     return -1;
-  }
 
   /* Get DATE/DATETIME/TIME value of the 'b' item. */
-  b_value= (*get_value_b_func)(thd, &b, &b_cache, *a, &b_is_null);
-  if (a_is_null || b_is_null)
-  {
-    if (set_null)
-      owner->null_value= is_nulls_eq ? 0 : 1;
-    return is_nulls_eq ? (a_is_null == b_is_null) : -1;
-  }
+  b_value= get_datetime_value(thd, &b, &b_cache, *a, &b_is_null);
+  if (b_is_null)
+    return -1;
 
   /* Here we have two not-NULL values. */
   if (set_null)
     owner->null_value= 0;
 
   /* Compare values. */
-  if (is_nulls_eq)
-    return (a_value == b_value);
-  return a_value < b_value ? -1 : (a_value > b_value ? 1 : 0);
+  return a_value < b_value ? -1 : a_value > b_value ? 1 : 0;
 }
 
+int Arg_comparator::compare_e_datetime()
+{
+  bool a_is_null, b_is_null;
+  longlong a_value, b_value;
+
+  /* Get DATE/DATETIME/TIME value of the 'a' item. */
+  a_value= get_datetime_value(thd, &a, &a_cache, *b, &a_is_null);
+
+  /* Get DATE/DATETIME/TIME value of the 'b' item. */
+  b_value= get_datetime_value(thd, &b, &b_cache, *a, &b_is_null);
+  return a_is_null || b_is_null ? a_is_null == b_is_null
+                                : a_value == b_value;
+}
 
 int Arg_comparator::compare_string()
 {
@@ -1739,6 +1402,36 @@ longlong Item_func_truth::val_int()
 }
 
 
+bool Item_in_optimizer::is_top_level_item()
+{
+  return ((Item_in_subselect *)args[1])->is_top_level_item();
+}
+
+
+void Item_in_optimizer::fix_after_pullout(st_select_lex *new_parent, Item **ref)
+{
+  /* This will re-calculate attributes of our Item_in_subselect: */
+  Item_bool_func::fix_after_pullout(new_parent, ref);
+
+  /* Then, re-calculate not_null_tables_cache: */
+  eval_not_null_tables(NULL);
+}
+
+
+bool Item_in_optimizer::eval_not_null_tables(uchar *opt_arg)
+{
+  not_null_tables_cache= 0;
+  if (is_top_level_item())
+  {
+    /*
+      It is possible to determine NULL-rejectedness of the left arguments
+      of IN only if it is a top-level predicate.
+    */
+    not_null_tables_cache= args[0]->not_null_tables();
+  }
+  return FALSE;
+}
+
 bool Item_in_optimizer::fix_left(THD *thd, Item **ref)
 {
   if ((!args[0]->fixed && args[0]->fix_fields(thd, args)) ||
@@ -1748,6 +1441,7 @@ bool Item_in_optimizer::fix_left(THD *thd, Item **ref)
   cache->setup(args[0]);
   if (cache->cols() == 1)
   {
+    DBUG_ASSERT(args[0]->type() != ROW_ITEM);
     if ((used_tables_cache= args[0]->used_tables()))
       cache->set_used_tables(OUTER_REF_TABLE_BIT);
     else
@@ -1758,6 +1452,14 @@ bool Item_in_optimizer::fix_left(THD *thd, Item **ref)
     uint n= cache->cols();
     for (uint i= 0; i < n; i++)
     {
+      /* Check that the expression (part of row) do not contain a subquery */
+      if (args[0]->element_index(i)->walk(&Item::is_subquery_processor,
+                                          FALSE, NULL))
+      {
+        my_error(ER_NOT_SUPPORTED_YET, MYF(0),
+                 "SUBQUERY in ROW in left expression of IN/ALL/ANY");
+        return 1;
+      }
       if (args[0]->element_index(i)->used_tables())
 	((Item_cache *)cache->element_index(i))->set_used_tables(OUTER_REF_TABLE_BIT);
       else
@@ -1765,10 +1467,14 @@ bool Item_in_optimizer::fix_left(THD *thd, Item **ref)
     }
     used_tables_cache= args[0]->used_tables();
   }
-  not_null_tables_cache= args[0]->not_null_tables();
+  eval_not_null_tables(NULL);
   with_sum_func= args[0]->with_sum_func;
+  with_field= args[0]->with_field;
   if ((const_item_cache= args[0]->const_item()))
+  {
     cache->store(args[0]);
+    cache->cache_value();
+  }
   return 0;
 }
 
@@ -1792,8 +1498,8 @@ bool Item_in_optimizer::fix_fields(THD *thd, Item **ref)
   if (args[1]->maybe_null)
     maybe_null=1;
   with_sum_func= with_sum_func || args[1]->with_sum_func;
+  with_field= with_field || args[1]->with_field;
   used_tables_cache|= args[1]->used_tables();
-  not_null_tables_cache|= args[1]->not_null_tables();
   const_item_cache&= args[1]->const_item();
   fixed= 1;
   return FALSE;
@@ -1801,6 +1507,65 @@ bool Item_in_optimizer::fix_fields(THD *thd, Item **ref)
 
 
 /**
+  Add an expression cache for this subquery if it is needed
+
+  @param thd_arg         Thread handle
+
+  @details
+  The function checks whether an expression cache is needed for this item
+  and if if so wraps the item into an item of the class
+  Item_exp_cache_wrapper with an appropriate expression cache set up there.
+
+  @note
+  used from Item::transform()
+
+  @return
+  new wrapper item if an expression cache is needed,
+  this item - otherwise
+*/
+
+Item *Item_in_optimizer::expr_cache_insert_transformer(uchar *thd_arg)
+{
+  THD *thd= (THD*) thd_arg;
+  DBUG_ENTER("Item_in_optimizer::expr_cache_insert_transformer");
+  if (args[1]->type() != Item::SUBSELECT_ITEM)
+    DBUG_RETURN(this); // MAX/MIN transformed => do nothing
+
+  if (expr_cache)
+    DBUG_RETURN(expr_cache);
+
+  if (args[1]->expr_cache_is_needed(thd) &&
+      (expr_cache= set_expr_cache(thd)))
+    DBUG_RETURN(expr_cache);
+
+  DBUG_RETURN(this);
+}
+
+
+
+/**
+    Collect and add to the list cache parameters for this Item.
+
+    @param parameters    The list where to add parameters
+*/
+
+void Item_in_optimizer::get_cache_parameters(List<Item> &parameters)
+{
+  /* Add left expression to the list of the parameters of the subquery */
+  if (args[0]->cols() == 1)
+    parameters.add_unique(args[0], &cmp_items);
+  else
+  {
+    for (uint i= 0; i < args[0]->cols(); i++)
+    {
+      parameters.add_unique(args[0]->element_index(i), &cmp_items);
+    }
+  }
+  args[1]->get_cache_parameters(parameters);
+}
+
+
+/*
    The implementation of optimized \<outer expression\> [NOT] IN \<subquery\>
    predicates. The implementation works as follows.
 
@@ -1870,13 +1635,22 @@ bool Item_in_optimizer::fix_fields(THD *thd, Item **ref)
      @see Item_in_subselect::val_bool()
      @see Item_is_not_null_test::val_int()
  */
+
 longlong Item_in_optimizer::val_int()
 {
   bool tmp;
   DBUG_ASSERT(fixed == 1);
   cache->store(args[0]);
   cache->cache_value();
-  
+
+  if (args[1]->type() != Item::SUBSELECT_ITEM)
+  {
+    /* MAX/MIN transformed => pass through */
+    longlong res= args[1]->val_int();
+    null_value= args[1]->null_value;
+    return (res);
+  }
+
   if (cache->null_value)
   {
     /*
@@ -1971,6 +1745,7 @@ void Item_in_optimizer::cleanup()
   Item_bool_func::cleanup();
   if (!save_cache)
     cache= 0;
+  expr_cache= 0;
   DBUG_VOID_RETURN;
 }
 
@@ -1982,6 +1757,94 @@ bool Item_in_optimizer::is_null()
 }
 
 
+/**
+  Transform an Item_in_optimizer and its arguments with a callback function.
+
+  @param transformer the transformer callback function to be applied to the
+         nodes of the tree of the object
+  @param parameter to be passed to the transformer
+
+  @detail
+    Recursively transform the left and the right operand of this Item. The
+    Right operand is an Item_in_subselect or its subclass. To avoid the
+    creation of new Items, we use the fact the the left operand of the
+    Item_in_subselect is the same as the one of 'this', so instead of
+    transforming its operand, we just assign the left operand of the
+    Item_in_subselect to be equal to the left operand of 'this'.
+    The transformation is not applied further to the subquery operand
+    if the IN predicate.
+
+  @returns
+    @retval pointer to the transformed item
+    @retval NULL if an error occurred
+*/
+
+Item *Item_in_optimizer::transform(Item_transformer transformer, uchar *argument)
+{
+  Item *new_item;
+
+  DBUG_ASSERT(!current_thd->is_stmt_prepare());
+  DBUG_ASSERT(arg_count == 2);
+
+  /* Transform the left IN operand. */
+  new_item= (*args)->transform(transformer, argument);
+  if (!new_item)
+    return 0;
+  /*
+    THD::change_item_tree() should be called only if the tree was
+    really transformed, i.e. when a new item has been created.
+    Otherwise we'll be allocating a lot of unnecessary memory for
+    change records at each execution.
+  */
+  if ((*args) != new_item)
+    current_thd->change_item_tree(args, new_item);
+
+  if (args[1]->type() != Item::SUBSELECT_ITEM)
+  {
+    /* MAX/MIN transformed => pass through */
+    new_item= args[1]->transform(transformer, argument);
+    if (!new_item)
+      return 0;
+    if (args[1] != new_item)
+      current_thd->change_item_tree(args + 1, new_item);
+  }
+  else
+  {
+    /*
+      Transform the right IN operand which should be an Item_in_subselect or a
+      subclass of it. The left operand of the IN must be the same as the left
+      operand of this Item_in_optimizer, so in this case there is no further
+      transformation, we only make both operands the same.
+      TODO: is it the way it should be?
+    */
+    DBUG_ASSERT((args[1])->type() == Item::SUBSELECT_ITEM &&
+                (((Item_subselect*)(args[1]))->substype() ==
+                 Item_subselect::IN_SUBS ||
+                 ((Item_subselect*)(args[1]))->substype() ==
+                 Item_subselect::ALL_SUBS ||
+                 ((Item_subselect*)(args[1]))->substype() ==
+                 Item_subselect::ANY_SUBS));
+
+    Item_in_subselect *in_arg= (Item_in_subselect*)args[1];
+    current_thd->change_item_tree(&in_arg->left_expr, args[0]);
+  }
+  return (this->*transformer)(argument);
+}
+
+
+bool Item_in_optimizer::is_expensive_processor(uchar *arg)
+{
+  return args[0]->is_expensive_processor(arg) ||
+         args[1]->is_expensive_processor(arg);
+}
+
+
+bool Item_in_optimizer::is_expensive()
+{
+  return args[0]->is_expensive() || args[1]->is_expensive();
+}
+
+
 longlong Item_func_eq::val_int()
 {
   DBUG_ASSERT(fixed == 1);
@@ -2140,6 +2003,7 @@ void Item_func_interval::fix_length_and_dec()
   used_tables_cache|= row->used_tables();
   not_null_tables_cache= row->not_null_tables();
   with_sum_func= with_sum_func || row->with_sum_func;
+  with_field= with_field || row->with_field;
   const_item_cache&= row->const_item();
 }
 
@@ -2272,6 +2136,16 @@ bool Item_func_between::fix_fields(THD *thd, Item **ref)
 
   thd->lex->current_select->between_count++;
 
+
+  return 0;
+}
+
+
+bool Item_func_between::eval_not_null_tables(uchar *opt_arg)
+{
+  if (Item_func_opt_neg::eval_not_null_tables(NULL))
+    return 1;
+
   /* not_null_tables_cache == union(T1(e),T1(e1),T1(e2)) */
   if (pred_level && !negated)
     return 0;
@@ -2280,19 +2154,23 @@ bool Item_func_between::fix_fields(THD *thd, Item **ref)
   not_null_tables_cache= (args[0]->not_null_tables() |
                           (args[1]->not_null_tables() &
                            args[2]->not_null_tables()));
-
   return 0;
-}
+}  
+
 
+void Item_func_between::fix_after_pullout(st_select_lex *new_parent, Item **ref)
+{
+  /* This will re-calculate attributes of the arguments */
+  Item_func_opt_neg::fix_after_pullout(new_parent, ref);
+  /* Then, re-calculate not_null_tables_cache according to our special rules */
+  eval_not_null_tables(NULL);
+}
 
 void Item_func_between::fix_length_and_dec()
 {
-  max_length= 1;
-  int i;
-  bool datetime_found= FALSE;
-  int time_items_found= 0;
-  compare_as_dates= TRUE;
   THD *thd= current_thd;
+  max_length= 1;
+  compare_as_dates= 0;
 
   /*
     As some compare functions are generated after sql_yacc,
@@ -2307,81 +2185,80 @@ void Item_func_between::fix_length_and_dec()
    return;
 
   /*
-    Detect the comparison of DATE/DATETIME items.
-    At least one of items should be a DATE/DATETIME item and other items
-    should return the STRING result.
+    When comparing as date/time, we need to convert non-temporal values
+    (e.g.  strings) to MYSQL_TIME. get_datetime_value() does it
+    automatically when one of the operands is a date/time.  But here we
+    may need to compare two strings as dates (str1 BETWEEN str2 AND date).
+    For this to work, we need to know what date/time type we compare
+    strings as.
   */
-  if (cmp_type == STRING_RESULT)
-  {
-    for (i= 0; i < 3; i++)
-    {
-      if (args[i]->is_datetime())
-      {
-        datetime_found= TRUE;
-        continue;
-      }
-      if (args[i]->field_type() == MYSQL_TYPE_TIME &&
-          args[i]->result_as_longlong())
-        time_items_found++;
-    }
-  }
-  if (!datetime_found)
-    compare_as_dates= FALSE;
+  if (cmp_type ==  TIME_RESULT)
+    compare_as_dates= find_date_time_item(args, 3, 0);
 
-  if (compare_as_dates)
-  {
-    ge_cmp.set_datetime_cmp_func(this, args, args + 1);
-    le_cmp.set_datetime_cmp_func(this, args, args + 2);
-  }
-  else if (time_items_found == 3)
-  {
-    /* Compare TIME items as integers. */
-    cmp_type= INT_RESULT;
-  }
-  else if (args[0]->real_item()->type() == FIELD_ITEM &&
-           thd->lex->sql_command != SQLCOM_CREATE_VIEW &&
-           thd->lex->sql_command != SQLCOM_SHOW_CREATE)
+  /* See the comment about the similar block in Item_bool_func2 */
+  if (args[0]->real_item()->type() == FIELD_ITEM &&
+      !thd->lex->is_ps_or_view_context_analysis())
   {
     Item_field *field_item= (Item_field*) (args[0]->real_item());
-    if (field_item->field->can_be_compared_as_longlong())
+    if (field_item->field_type() ==  MYSQL_TYPE_LONGLONG ||
+        field_item->field_type() ==  MYSQL_TYPE_YEAR)
     {
       /*
-        The following can't be recoded with || as convert_constant_item
+        The following can't be recoded with || as convert_const_to_int
         changes the argument
       */
-      if (convert_constant_item(thd, field_item, &args[1]))
-        cmp_type=INT_RESULT;			// Works for all types.
-      if (convert_constant_item(thd, field_item, &args[2]))
-        cmp_type=INT_RESULT;			// Works for all types.
+      if (convert_const_to_int(thd, field_item, &args[1]))
+        cmp_type=INT_RESULT;
+      if (convert_const_to_int(thd, field_item, &args[2]))
+        cmp_type=INT_RESULT;
     }
   }
 }
 
 
 longlong Item_func_between::val_int()
-{						// ANSI BETWEEN
+{
   DBUG_ASSERT(fixed == 1);
-  if (compare_as_dates)
+
+  switch (cmp_type) {
+  case TIME_RESULT:
   {
-    int ge_res, le_res;
+    THD *thd= current_thd;
+    longlong value, a, b;
+    Item *cache, **ptr;
+    bool value_is_null, a_is_null, b_is_null;
 
-    ge_res= ge_cmp.compare();
-    if ((null_value= args[0]->null_value))
+    ptr= &args[0];
+    value= get_datetime_value(thd, &ptr, &cache, compare_as_dates,
+                              &value_is_null);
+    if (ptr != &args[0])
+      thd->change_item_tree(&args[0], *ptr);
+
+    if ((null_value= value_is_null))
       return 0;
-    le_res= le_cmp.compare();
 
-    if (!args[1]->null_value && !args[2]->null_value)
-      return (longlong) ((ge_res >= 0 && le_res <=0) != negated);
-    else if (args[1]->null_value)
-    {
-      null_value= le_res > 0;			// not null if false range.
-    }
+    ptr= &args[1];
+    a= get_datetime_value(thd, &ptr, &cache, compare_as_dates, &a_is_null);
+    if (ptr != &args[1])
+      thd->change_item_tree(&args[1], *ptr);
+
+    ptr= &args[2];
+    b= get_datetime_value(thd, &ptr, &cache, compare_as_dates, &b_is_null);
+    if (ptr != &args[2])
+      thd->change_item_tree(&args[2], *ptr);
+
+    if (!a_is_null && !b_is_null)
+      return (longlong) ((value >= a && value <= b) != negated);
+    if (a_is_null && b_is_null)
+      null_value=1;
+    else if (a_is_null)
+      null_value= value <= b;			// not null if false range.
     else
-    {
-      null_value= ge_res < 0;
-    }
+      null_value= value >= a;
+    break;
   }
-  else if (cmp_type == STRING_RESULT)
+
+  case STRING_RESULT:
   {
     String *value,*a,*b;
     value=args[0]->val_str(&value0);
@@ -2405,8 +2282,9 @@ longlong Item_func_between::val_int()
       // Set to not null if false range.
       null_value= sortcmp(value,a,cmp_collation.collation) >= 0;
     }
+    break;
   }
-  else if (cmp_type == INT_RESULT)
+  case INT_RESULT:
   {
     longlong value=args[0]->val_int(), a, b;
     if ((null_value=args[0]->null_value))
@@ -2425,8 +2303,9 @@ longlong Item_func_between::val_int()
     {
       null_value= value >= a;
     }
+    break;
   }
-  else if (cmp_type == DECIMAL_RESULT)
+  case DECIMAL_RESULT:
   {
     my_decimal dec_buf, *dec= args[0]->val_decimal(&dec_buf),
                a_buf, *a_dec, b_buf, *b_dec;
@@ -2443,8 +2322,9 @@ longlong Item_func_between::val_int()
       null_value= (my_decimal_cmp(dec, b_dec) <= 0);
     else
       null_value= (my_decimal_cmp(dec, a_dec) >= 0);
+    break;
   }
-  else
+  case REAL_RESULT:
   {
     double value= args[0]->val_real(),a,b;
     if ((null_value=args[0]->null_value))
@@ -2463,6 +2343,13 @@ longlong Item_func_between::val_int()
     {
       null_value= value >= a;
     }
+    break;
+  }
+  case ROW_RESULT:
+  case IMPOSSIBLE_RESULT:
+    DBUG_ASSERT(0);
+    null_value= 1;
+    return 0;
   }
   return (longlong) (!null_value && negated);
 }
@@ -2513,7 +2400,8 @@ Item_func_ifnull::fix_length_and_dec()
     decimals= 0;
     break;
   case ROW_RESULT:
-  default:
+  case TIME_RESULT:
+  case IMPOSSIBLE_RESULT:
     DBUG_ASSERT(0);
   }
   cached_field_type= agg_field_type(args, 2);
@@ -2643,6 +2531,16 @@ Item_func_if::fix_fields(THD *thd, Item **ref)
   if (Item_func::fix_fields(thd, ref))
     return 1;
 
+  return 0;
+}
+
+
+bool
+Item_func_if::eval_not_null_tables(uchar *opt_arg)
+{
+  if (Item_func::eval_not_null_tables(NULL))
+    return 1;
+
   not_null_tables_cache= (args[1]->not_null_tables() &
                           args[2]->not_null_tables());
 
@@ -2650,6 +2548,15 @@ Item_func_if::fix_fields(THD *thd, Item **ref)
 }
 
 
+void Item_func_if::fix_after_pullout(st_select_lex *new_parent, Item **ref)
+{
+  /* This will re-calculate attributes of the arguments */
+  Item_func::fix_after_pullout(new_parent, ref);
+  /* Then, re-calculate not_null_tables_cache according to our special rules */
+  eval_not_null_tables(NULL);
+}
+
+
 void
 Item_func_if::fix_length_and_dec()
 {
@@ -2897,7 +2804,7 @@ Item *Item_func_case::find_item(String *str)
     {
       if (args[i]->real_item()->type() == NULL_ITEM)
         continue;
-      cmp_type= item_cmp_type(left_result_type, args[i]->result_type());
+      cmp_type= item_cmp_type(left_result_type, args[i]->cmp_type());
       DBUG_ASSERT(cmp_type != ROW_RESULT);
       DBUG_ASSERT(cmp_items[(uint)cmp_type]);
       if (!(value_added_map & (1<<(uint)cmp_type)))
@@ -3062,7 +2969,7 @@ void Item_func_case::fix_length_and_dec()
   {
     uint i;
     agg[0]= args[first_expr_num];
-    left_result_type= agg[0]->result_type();
+    left_result_type= agg[0]->cmp_type();
 
     for (nagg= 0; nagg < ncases/2 ; nagg++)
       agg[nagg+1]= args[nagg*2];
@@ -3080,7 +2987,8 @@ void Item_func_case::fix_length_and_dec()
       found_types |= (1 << item_cmp_type(left_result_type, STRING_RESULT));
     }
 
-    for (i= 0; i <= (uint)DECIMAL_RESULT; i++)
+    Item *date_arg= 0;
+    for (i= 0; i <= (uint)TIME_RESULT; i++)
     {
       if (found_types & (1 << i) && !cmp_items[i])
       {
@@ -3088,8 +2996,12 @@ void Item_func_case::fix_length_and_dec()
         if ((Item_result)i == STRING_RESULT &&
             agg_arg_charsets(cmp_collation, agg, nagg, MY_COLL_CMP_CONV, 1))
           return;
+
+        if ((Item_result)i == TIME_RESULT)
+          date_arg= find_date_time_item(args, arg_count, 0);
+
         if (!(cmp_items[i]=
-            cmp_item::get_comparator((Item_result)i,
+            cmp_item::get_comparator((Item_result)i, date_arg,
                                      cmp_collation.collation)))
           return;
       }
@@ -3169,7 +3081,7 @@ void Item_func_case::cleanup()
   uint i;
   DBUG_ENTER("Item_func_case::cleanup");
   Item_func::cleanup();
-  for (i= 0; i <= (uint)DECIMAL_RESULT; i++)
+  for (i= 0; i <= (uint)TIME_RESULT; i++)
   {
     delete cmp_items[i];
     cmp_items[i]= 0;
@@ -3225,6 +3137,21 @@ double Item_func_coalesce::real_op()
 }
 
 
+bool Item_func_coalesce::get_date(MYSQL_TIME *ltime,uint fuzzydate)
+{
+  DBUG_ASSERT(fixed == 1);
+  null_value= 0;
+  for (uint i= 0; i < arg_count; i++)
+  {
+    bool res= args[i]->get_date(ltime, fuzzydate);
+    if (!args[i]->null_value)
+      return res;
+  }
+  null_value=1;
+  return 1;
+}
+
+
 my_decimal *Item_func_coalesce::decimal_op(my_decimal *decimal_value)
 {
   DBUG_ASSERT(fixed == 1);
@@ -3244,6 +3171,14 @@ void Item_func_coalesce::fix_length_and_dec()
 {
   cached_field_type= agg_field_type(args, arg_count);
   agg_result_type(&hybrid_type, args, arg_count);
+  Item_result cmp_type;
+  agg_cmp_type(&cmp_type, args, arg_count);
+  ///< @todo let result_type() return TIME_RESULT and remove this special case
+  if (cmp_type == TIME_RESULT)
+  {
+    count_real_length();
+    return;
+  }
   switch (hybrid_type) {
   case STRING_RESULT:
     count_only_length();
@@ -3261,7 +3196,8 @@ void Item_func_coalesce::fix_length_and_dec()
     decimals= 0;
     break;
   case ROW_RESULT:
-  default:
+  case TIME_RESULT:
+  case IMPOSSIBLE_RESULT:
     DBUG_ASSERT(0);
   }
 }
@@ -3576,7 +3512,7 @@ uchar *in_decimal::get_value(Item *item)
 }
 
 
-cmp_item* cmp_item::get_comparator(Item_result type,
+cmp_item* cmp_item::get_comparator(Item_result type, Item *warn_item,
                                    CHARSET_INFO *cs)
 {
   switch (type) {
@@ -3590,7 +3526,9 @@ cmp_item* cmp_item::get_comparator(Item_result type,
     return new cmp_item_row;
   case DECIMAL_RESULT:
     return new cmp_item_decimal;
-  default:
+  case TIME_RESULT:
+    return new cmp_item_datetime(warn_item);
+  case IMPOSSIBLE_RESULT:
     DBUG_ASSERT(0);
     break;
   }
@@ -3654,10 +3592,13 @@ void cmp_item_row::store_value(Item *item)
     for (uint i=0; i < n; i++)
     {
       if (!comparators[i])
+      {
+        DBUG_ASSERT(item->element_index(i)->cmp_type() != TIME_RESULT);
         if (!(comparators[i]=
-              cmp_item::get_comparator(item->element_index(i)->result_type(),
+              cmp_item::get_comparator(item->element_index(i)->result_type(), 0,
                                        item->element_index(i)->collation.collation)))
 	  break;					// new failed
+      }
       comparators[i]->store_value(item->element_index(i));
       item->null_value|= item->element_index(i)->null_value;
     }
@@ -3831,11 +3772,22 @@ bool Item_func_in::nulls_in_row()
 bool
 Item_func_in::fix_fields(THD *thd, Item **ref)
 {
-  Item **arg, **arg_end;
 
   if (Item_func_opt_neg::fix_fields(thd, ref))
     return 1;
 
+  return 0;
+}
+
+
+bool
+Item_func_in::eval_not_null_tables(uchar *opt_arg)
+{
+  Item **arg, **arg_end;
+
+  if (Item_func_opt_neg::eval_not_null_tables(NULL))
+    return 1;
+
   /* not_null_tables_cache == union(T1(e),union(T1(ei))) */
   if (pred_level && negated)
     return 0;
@@ -3849,6 +3801,14 @@ Item_func_in::fix_fields(THD *thd, Item **ref)
 }
 
 
+void Item_func_in::fix_after_pullout(st_select_lex *new_parent, Item **ref)
+{
+  /* This will re-calculate attributes of the arguments */
+  Item_func_opt_neg::fix_after_pullout(new_parent, ref);
+  /* Then, re-calculate not_null_tables_cache according to our special rules */
+  eval_not_null_tables(NULL);
+}
+
 static int srtcmp_in(CHARSET_INFO *cs, const String *x,const String *y)
 {
   return cs->coll->strnncollsp(cs,
@@ -3856,20 +3816,17 @@ static int srtcmp_in(CHARSET_INFO *cs, const String *x,const String *y)
                                (uchar *) y->ptr(),y->length(), 0);
 }
 
-
 void Item_func_in::fix_length_and_dec()
 {
   Item **arg, **arg_end;
   bool const_itm= 1;
   THD *thd= current_thd;
-  bool datetime_found= FALSE;
   /* TRUE <=> arguments values will be compared as DATETIMEs. */
-  bool compare_as_datetime= FALSE;
   Item *date_arg= 0;
   uint found_types= 0;
   uint type_cnt= 0, i;
   Item_result cmp_type= STRING_RESULT;
-  left_result_type= args[0]->result_type();
+  left_result_type= args[0]->cmp_type();
   if (!(found_types= collect_cmp_types(args, arg_count, true)))
     return;
   
@@ -3881,7 +3838,7 @@ void Item_func_in::fix_length_and_dec()
       break;
     }
   }
-  for (i= 0; i <= (uint)DECIMAL_RESULT; i++)
+  for (i= 0; i <= (uint)TIME_RESULT; i++)
   {
     if (found_types & 1 << i)
     {
@@ -3896,16 +3853,12 @@ void Item_func_in::fix_length_and_dec()
         agg_arg_charsets(cmp_collation, args, arg_count, MY_COLL_CMP_CONV, 1))
       return;
     arg_types_compatible= TRUE;
-  }
-  if (type_cnt == 1)
-  {
-    /*
-      When comparing rows create the row comparator object beforehand to ease
-      the DATETIME comparison detection procedure.
-    */
+
     if (cmp_type == ROW_RESULT)
     {
+      uint cols= args[0]->cols();
       cmp_item_row *cmp= 0;
+
       if (const_itm && !nulls_in_row())
       {
         array= new in_row(arg_count-1, 0);
@@ -3917,66 +3870,20 @@ void Item_func_in::fix_length_and_dec()
           return;
         cmp_items[ROW_RESULT]= cmp;
       }
-      cmp->n= args[0]->cols();
+      cmp->n= cols;
       cmp->alloc_comparators();
-    }
-    /* All DATE/DATETIME fields/functions has the STRING result type. */
-    if (cmp_type == STRING_RESULT || cmp_type == ROW_RESULT)
-    {
-      uint col, cols= args[0]->cols();
 
-      for (col= 0; col < cols; col++)
+      for (uint col= 0; col < cols; col++)
       {
-        bool skip_column= FALSE;
-        /*
-          Check that all items to be compared has the STRING result type and at
-          least one of them is a DATE/DATETIME item.
-        */
-        for (arg= args, arg_end= args + arg_count; arg != arg_end ; arg++)
+        date_arg= find_date_time_item(args, arg_count, col);
+        if (date_arg)
         {
-          Item *itm= ((cmp_type == STRING_RESULT) ? arg[0] :
-                      arg[0]->element_index(col));
-          if (itm->result_type() != STRING_RESULT)
-          {
-            skip_column= TRUE;
-            break;
-          }
-          else if (itm->is_datetime())
-          {
-            datetime_found= TRUE;
-            /*
-              Internally all DATE/DATETIME values are converted to the DATETIME
-              type. So try to find a DATETIME item to issue correct warnings.
-            */
-            if (!date_arg)
-              date_arg= itm;
-            else if (itm->field_type() == MYSQL_TYPE_DATETIME)
-            {
-              date_arg= itm;
-              /* All arguments are already checked to have the STRING result. */
-              if (cmp_type == STRING_RESULT)
-                break;
-            }
-          }
-        }
-        if (skip_column)
-          continue;
-        if (datetime_found)
-        {
-          if (cmp_type == ROW_RESULT)
-          {
-            cmp_item **cmp= 0;
-            if (array)
-              cmp= ((in_row*)array)->tmp.comparators + col;
-            else
-              cmp= ((cmp_item_row*)cmp_items[ROW_RESULT])->comparators + col;
-            *cmp= new cmp_item_datetime(date_arg);
-            /* Reset variables for the next column. */
-            date_arg= 0;
-            datetime_found= FALSE;
-          }
+          cmp_item **cmp= 0;
+          if (array)
+            cmp= ((in_row*)array)->tmp.comparators + col;
           else
-            compare_as_datetime= TRUE;
+            cmp= ((cmp_item_row*)cmp_items[ROW_RESULT])->comparators + col;
+          *cmp= new cmp_item_datetime(date_arg);
         }
       }
     }
@@ -3987,62 +3894,62 @@ void Item_func_in::fix_length_and_dec()
   */
   if (type_cnt == 1 && const_itm && !nulls_in_row())
   {
-    if (compare_as_datetime)
-      array= new in_datetime(date_arg, arg_count - 1);
-    else
+    /*
+      IN must compare INT columns and constants as int values (the same
+      way as equality does).
+      So we must check here if the column on the left and all the constant 
+      values on the right can be compared as integers and adjust the 
+      comparison type accordingly.
+
+      See the comment about the similar block in Item_bool_func2
+    */  
+    if (args[0]->real_item()->type() == FIELD_ITEM &&
+        !thd->lex->is_ps_or_view_context_analysis() && cmp_type != INT_RESULT)
     {
-      /*
-        IN must compare INT columns and constants as int values (the same
-        way as equality does).
-        So we must check here if the column on the left and all the constant 
-        values on the right can be compared as integers and adjust the 
-        comparison type accordingly.
-      */  
-      if (args[0]->real_item()->type() == FIELD_ITEM &&
-          thd->lex->sql_command != SQLCOM_CREATE_VIEW &&
-          thd->lex->sql_command != SQLCOM_SHOW_CREATE &&
-          cmp_type != INT_RESULT)
+      Item_field *field_item= (Item_field*) (args[0]->real_item());
+      if (field_item->field_type() ==  MYSQL_TYPE_LONGLONG ||
+          field_item->field_type() ==  MYSQL_TYPE_YEAR)
       {
-        Item_field *field_item= (Item_field*) (args[0]->real_item());
-        if (field_item->field->can_be_compared_as_longlong())
+        bool all_converted= TRUE;
+        for (arg=args+1, arg_end=args+arg_count; arg != arg_end ; arg++)
         {
-          bool all_converted= TRUE;
-          for (arg=args+1, arg_end=args+arg_count; arg != arg_end ; arg++)
-          {
-            if (!convert_constant_item (thd, field_item, &arg[0]))
-              all_converted= FALSE;
-          }
-          if (all_converted)
-            cmp_type= INT_RESULT;
+           if (!convert_const_to_int(thd, field_item, &arg[0]))
+            all_converted= FALSE;
         }
-      }
-      switch (cmp_type) {
-      case STRING_RESULT:
-        array=new in_string(arg_count-1,(qsort2_cmp) srtcmp_in, 
-                            cmp_collation.collation);
-        break;
-      case INT_RESULT:
-        array= new in_longlong(arg_count-1);
-        break;
-      case REAL_RESULT:
-        array= new in_double(arg_count-1);
-        break;
-      case ROW_RESULT:
-        /*
-          The row comparator was created at the beginning but only DATETIME
-          items comparators were initialized. Call store_value() to setup
-          others.
-        */
-        ((in_row*)array)->tmp.store_value(args[0]);
-        break;
-      case DECIMAL_RESULT:
-        array= new in_decimal(arg_count - 1);
-        break;
-      default:
-        DBUG_ASSERT(0);
-        return;
+        if (all_converted)
+          cmp_type= INT_RESULT;
       }
     }
+    switch (cmp_type) {
+    case STRING_RESULT:
+      array=new in_string(arg_count-1,(qsort2_cmp) srtcmp_in, 
+                          cmp_collation.collation);
+      break;
+    case INT_RESULT:
+      array= new in_longlong(arg_count-1);
+      break;
+    case REAL_RESULT:
+      array= new in_double(arg_count-1);
+      break;
+    case ROW_RESULT:
+      /*
+        The row comparator was created at the beginning but only DATETIME
+        items comparators were initialized. Call store_value() to setup
+        others.
+      */
+      ((in_row*)array)->tmp.store_value(args[0]);
+      break;
+    case DECIMAL_RESULT:
+      array= new in_decimal(arg_count - 1);
+      break;
+    case TIME_RESULT:
+      date_arg= find_date_time_item(args, arg_count, 0);
+      array= new in_datetime(date_arg, arg_count - 1);
+      break;
+    case IMPOSSIBLE_RESULT:
+      DBUG_ASSERT(0);
+      break;
+    }
     if (array && !(thd->is_fatal_error))		// If not EOM
     {
       uint j=0;
@@ -4060,23 +3967,21 @@ void Item_func_in::fix_length_and_dec()
   }
   else
   {
-    if (compare_as_datetime)
-      cmp_items[STRING_RESULT]= new cmp_item_datetime(date_arg);
-    else
+    for (i= 0; i <= (uint) TIME_RESULT; i++)
     {
-      for (i= 0; i <= (uint) DECIMAL_RESULT; i++)
+      if (found_types & (1 << i) && !cmp_items[i])
       {
-        if (found_types & (1 << i) && !cmp_items[i])
-        {
-          if ((Item_result)i == STRING_RESULT &&
-              agg_arg_charsets(cmp_collation, args, arg_count,
-                               MY_COLL_CMP_CONV, 1))
-            return;
-          if (!cmp_items[i] && !(cmp_items[i]=
-              cmp_item::get_comparator((Item_result)i,
-                                       cmp_collation.collation)))
-            return;
-        }
+        if ((Item_result)i == STRING_RESULT &&
+            agg_arg_charsets(cmp_collation, args, arg_count,
+                             MY_COLL_CMP_CONV, 1))
+          return;
+        if ((Item_result)i == TIME_RESULT)
+          date_arg= find_date_time_item(args, arg_count, 0);
+
+        if (!cmp_items[i] && !(cmp_items[i]=
+            cmp_item::get_comparator((Item_result)i, date_arg,
+                                     cmp_collation.collation)))
+          return;
       }
     }
   }
@@ -4144,7 +4049,7 @@ longlong Item_func_in::val_int()
       have_null= TRUE;
       continue;
     }
-    Item_result cmp_type= item_cmp_type(left_result_type, args[i]->result_type());
+    Item_result cmp_type= item_cmp_type(left_result_type, args[i]->cmp_type());
     in_item= cmp_items[(uint)cmp_type];
     DBUG_ASSERT(in_item);
     if (!(value_added_map & (1 << (uint)cmp_type)))
@@ -4233,6 +4138,7 @@ Item_cond::fix_fields(THD *thd, Item **ref)
 #endif
   not_null_tables_cache= used_tables_cache= 0;
   const_item_cache= 1;
+
   /*
     and_table_cache is the value that Item_cond_or() returns for
     not_null_tables()
@@ -4258,7 +4164,6 @@ Item_cond::fix_fields(THD *thd, Item **ref)
   */
   while ((item=li++))
   {
-    table_map tmp_table_map;
     while (item->type() == Item::COND_ITEM &&
 	   ((Item_cond*) item)->functype() == functype() &&
            !((Item_cond*) item)->list.is_empty())
@@ -4280,12 +4185,14 @@ Item_cond::fix_fields(THD *thd, Item **ref)
       and_tables_cache= (table_map) 0;
     else
     {
-      tmp_table_map= item->not_null_tables();
+      table_map tmp_table_map= item->not_null_tables();
       not_null_tables_cache|= tmp_table_map;
       and_tables_cache&= tmp_table_map;
       const_item_cache= FALSE;
-    }  
+    } 
+  
     with_sum_func=	    with_sum_func || item->with_sum_func;
+    with_field=             with_field || item->with_field;
     with_subselect|=        item->with_subselect;
     if (item->maybe_null)
       maybe_null=1;
@@ -4296,6 +4203,62 @@ Item_cond::fix_fields(THD *thd, Item **ref)
   return FALSE;
 }
 
+
+bool
+Item_cond::eval_not_null_tables(uchar *opt_arg)
+{
+  Item *item;
+  List_iterator<Item> li(list);
+  not_null_tables_cache= (table_map) 0;
+  and_tables_cache= ~(table_map) 0;
+  while ((item=li++))
+  {
+    table_map tmp_table_map;
+    if (item->const_item())
+      and_tables_cache= (table_map) 0;
+    else
+    {
+      tmp_table_map= item->not_null_tables();
+      not_null_tables_cache|= tmp_table_map;
+      and_tables_cache&= tmp_table_map;
+    }
+  }
+  return 0;
+}
+
+
+void Item_cond::fix_after_pullout(st_select_lex *new_parent, Item **ref)
+{
+  List_iterator<Item> li(list);
+  Item *item;
+
+  used_tables_cache=0;
+  const_item_cache=1;
+
+  and_tables_cache= ~(table_map) 0; // Here and below we do as fix_fields does
+  not_null_tables_cache= 0;
+
+  while ((item=li++))
+  {
+    table_map tmp_table_map;
+    item->fix_after_pullout(new_parent, li.ref());
+    item= *li.ref();
+    used_tables_cache|= item->used_tables();
+    const_item_cache&= item->const_item();
+
+    if (item->const_item())
+      and_tables_cache= (table_map) 0;
+    else
+    {
+      tmp_table_map= item->not_null_tables();
+      not_null_tables_cache|= tmp_table_map;
+      and_tables_cache&= tmp_table_map;
+      const_item_cache= FALSE;
+    }  
+  }
+}
+
+
 bool Item_cond::walk(Item_processor processor, bool walk_subquery, uchar *arg)
 {
   List_iterator_fast<Item> li(list);
@@ -4505,6 +4468,17 @@ void Item_cond::neg_arguments(THD *thd)
 }
 
 
+void Item_cond_and::mark_as_condition_AND_part(TABLE_LIST *embedding)
+{
+  List_iterator<Item> li(list);
+  Item *item;
+  while ((item=li++))
+  {
+    item->mark_as_condition_AND_part(embedding);
+  }
+}
+
+
 /**
   Evaluation of AND(expr, expr, expr ...).
 
@@ -4607,12 +4581,6 @@ Item *and_expressions(Item *a, Item *b, Item **org_item)
 longlong Item_func_isnull::val_int()
 {
   DBUG_ASSERT(fixed == 1);
-  /*
-    Handle optimization if the argument can't be null
-    This has to be here because of the test in update_used_tables().
-  */
-  if (!used_tables_cache && !with_subselect)
-    return cached_value;
   return args[0]->is_null() ? 1: 0;
 }
 
@@ -4620,12 +4588,6 @@ longlong Item_is_not_null_test::val_int()
 {
   DBUG_ASSERT(fixed == 1);
   DBUG_ENTER("Item_is_not_null_test::val_int");
-  if (!used_tables_cache && !with_subselect)
-  {
-    owner->was_null|= (!cached_value);
-    DBUG_PRINT("info", ("cached: %ld", (long) cached_value));
-    DBUG_RETURN(cached_value);
-  }
   if (args[0]->is_null())
   {
     DBUG_PRINT("info", ("null"));
@@ -4642,19 +4604,9 @@ longlong Item_is_not_null_test::val_int()
 void Item_is_not_null_test::update_used_tables()
 {
   if (!args[0]->maybe_null)
-  {
     used_tables_cache= 0;			/* is always true */
-    cached_value= (longlong) 1;
-  }
   else
-  {
     args[0]->update_used_tables();
-    if (!(used_tables_cache=args[0]->used_tables()) && !with_subselect)
-    {
-      /* Remember if the value is always NULL or never NULL */
-      cached_value= (longlong) !args[0]->is_null();
-    }
-  }
 }
 
 
@@ -4907,6 +4859,7 @@ Item_func_regex::fix_fields(THD *thd, Item **ref)
        args[1]->fix_fields(thd, args + 1)) || args[1]->check_cols(1))
     return TRUE;				/* purecov: inspected */
   with_sum_func=args[0]->with_sum_func || args[1]->with_sum_func;
+  with_field= args[0]->with_field || args[1]->with_field;
   max_length= 1;
   decimals= 0;
 
@@ -5230,23 +5183,21 @@ bool Item_func_like::turboBM_matches(const char* text, int text_len) const
     very fast to use.
 */
 
-longlong Item_cond_xor::val_int()
+longlong Item_func_xor::val_int()
 {
   DBUG_ASSERT(fixed == 1);
-  List_iterator<Item> li(list);
-  Item *item;
-  int result=0;	
-  null_value=0;
-  while ((item=li++))
+  int result= 0;
+  null_value= false;
+  for (uint i= 0; i < arg_count; i++)
   {
-    result^= (item->val_int() != 0);
-    if (item->null_value)
+    result^= (args[i]->val_int() != 0);
+    if (args[i]->null_value)
     {
-      null_value=1;
+      null_value= true;
       return 0;
     }
   }
-  return (longlong) result;
+  return result;
 }
 
 /**
@@ -5287,6 +5238,33 @@ Item *Item_bool_rowready_func2::neg_transformer(THD *thd)
   return item;
 }
 
+/**
+  XOR can be negated by negating one of the operands:
+
+  NOT (a XOR b)  => (NOT a) XOR b
+                 => a       XOR (NOT b)
+
+  @param thd     Thread handle
+  @return        New negated item
+*/
+Item *Item_func_xor::neg_transformer(THD *thd)
+{
+  Item *neg_operand;
+  Item_func_xor *new_item;
+  if ((neg_operand= args[0]->neg_transformer(thd)))
+    // args[0] has neg_tranformer
+    new_item= new(thd->mem_root) Item_func_xor(neg_operand, args[1]);
+  else if ((neg_operand= args[1]->neg_transformer(thd)))
+    // args[1] has neg_tranformer
+    new_item= new(thd->mem_root) Item_func_xor(args[0], neg_operand);
+  else
+  {
+    neg_operand= new(thd->mem_root) Item_func_not(args[0]);
+    new_item= new(thd->mem_root) Item_func_xor(neg_operand, args[1]);
+  }
+  return new_item;
+}
+
 
 /**
   a IS NULL  ->  a IS NOT NULL.
@@ -5331,7 +5309,7 @@ Item *Item_func_nop_all::neg_transformer(THD *thd)
   /* "NOT (e $cmp$ ANY (SELECT ...)) -> e $rev_cmp$" ALL (SELECT ...) */
   Item_func_not_all *new_item= new Item_func_not_all(args[0]);
   Item_allany_subselect *allany= (Item_allany_subselect*)args[0];
-  allany->func= allany->func_creator(FALSE);
+  allany->create_comp_func(FALSE);
   allany->all= !allany->all;
   allany->upper_item= new_item;
   return new_item;
@@ -5343,7 +5321,7 @@ Item *Item_func_not_all::neg_transformer(THD *thd)
   Item_func_nop_all *new_item= new Item_func_nop_all(args[0]);
   Item_allany_subselect *allany= (Item_allany_subselect*)args[0];
   allany->all= !allany->all;
-  allany->func= allany->func_creator(TRUE);
+  allany->create_comp_func(TRUE);
   allany->upper_item= new_item;
   return new_item;
 }
@@ -5392,43 +5370,92 @@ Item *Item_bool_rowready_func2::negated_item()
   return 0;
 }
 
-Item_equal::Item_equal(Item_field *f1, Item_field *f2)
-  : Item_bool_func(), const_item(0), eval_item(0), cond_false(0),
-    compare_as_dates(FALSE)
-{
-  const_item_cache= 0;
-  fields.push_back(f1);
-  fields.push_back(f2);
-}
 
-Item_equal::Item_equal(Item *c, Item_field *f)
+/**
+  Construct a minimal multiple equality item
+
+  @param f1               the first equal item
+  @param f2               the second equal item
+  @param with_const_item  TRUE if the first item is constant
+
+  @details
+  The constructor builds a new item equal object for the equality f1=f2.
+  One of the equal items can be constant. If this is the case it is passed
+  always as the first parameter and the parameter with_const_item serves
+  as an indicator of this case.
+  Currently any non-constant parameter items must point to an item of the
+  of the type Item_field or Item_direct_view_ref(Item_field). 
+*/
+
+Item_equal::Item_equal(Item *f1, Item *f2, bool with_const_item)
   : Item_bool_func(), eval_item(0), cond_false(0)
 {
   const_item_cache= 0;
-  fields.push_back(f);
-  const_item= c;
-  compare_as_dates= f->is_datetime();
+  with_const= with_const_item;
+  equal_items.push_back(f1);
+  equal_items.push_back(f2);
+  compare_as_dates= with_const_item && f2->cmp_type() == TIME_RESULT;
 }
 
 
+/**
+  Copy constructor for a multiple equality
+  
+  @param item_equal   source item for the constructor
+
+  @details
+  The function creates a copy of an Item_equal object.
+  This constructor is used when an item belongs to a multiple equality
+  of an upper level (an upper AND/OR level or an upper level of a nested
+  outer join).
+*/
+
 Item_equal::Item_equal(Item_equal *item_equal)
   : Item_bool_func(), eval_item(0), cond_false(0)
 {
   const_item_cache= 0;
-  List_iterator_fast<Item_field> li(item_equal->fields);
-  Item_field *item;
+  List_iterator_fast<Item> li(item_equal->equal_items);
+  Item *item;
   while ((item= li++))
   {
-    fields.push_back(item);
+    equal_items.push_back(item);
   }
-  const_item= item_equal->const_item;
+  with_const= item_equal->with_const;
   compare_as_dates= item_equal->compare_as_dates;
   cond_false= item_equal->cond_false;
 }
 
 
-void Item_equal::compare_const(Item *c)
+/*
+  @brief
+  Add a constant item to the Item_equal object
+
+  @param[in]  c  the constant to add
+  @param[in]  f  item from the list equal_items the item c is equal to
+                 (this parameter is optional)
+
+  @details
+  The method adds the constant item c to the equal_items list. If the list
+  doesn't have any constant item yet the item c is just put in the front
+  the list. Otherwise the value of c is compared with the value of the
+  constant item from equal_items. If they are not equal cond_false is set
+  to TRUE. This serves as an indicator that this Item_equal is always FALSE.
+  The optional parameter f is used to adjust the flag compare_as_dates.
+*/
+
+void Item_equal::add_const(Item *c, Item *f)
 {
+  if (cond_false)
+    return;
+  if (!with_const)
+  {
+    with_const= TRUE;
+    if (f)
+      compare_as_dates= f->cmp_type() == TIME_RESULT;
+    equal_items.push_front(c);
+    return;
+  }
+  Item *const_item= get_const();
   if (compare_as_dates)
   {
     cmp.set_datetime_cmp_func(this, &c, &const_item);
@@ -5445,65 +5472,28 @@ void Item_equal::compare_const(Item *c)
     const_item_cache= 1;
 }
 
-
-void Item_equal::add(Item *c, Item_field *f)
-{
-  if (cond_false)
-    return;
-  if (!const_item)
-  {
-    DBUG_ASSERT(f);
-    const_item= c;
-    compare_as_dates= f->is_datetime();
-    return;
-  }
-  compare_const(c);
-}
-
-
-void Item_equal::add(Item *c)
-{
-  if (cond_false)
-    return;
-  if (!const_item)
-  {
-    const_item= c;
-    return;
-  }
-  compare_const(c);
-}
-
-void Item_equal::add(Item_field *f)
-{
-  fields.push_back(f);
-}
-
-uint Item_equal::members()
-{
-  return fields.elements;
-}
-
-
 /**
-  Check whether a field is referred in the multiple equality.
-
-  The function checks whether field is occurred in the Item_equal object .
+  @brief
+  Check whether a field is referred to in the multiple equality
 
   @param field   field whose occurrence is to be checked
 
+  @details
+  The function checks whether field is referred to by one of the
+  items from the equal_items list.
+
   @retval
-    1       if nultiple equality contains a reference to field
+    1       if multiple equality contains a reference to field
   @retval
     0       otherwise    
 */
 
 bool Item_equal::contains(Field *field)
 {
-  List_iterator_fast<Item_field> it(fields);
-  Item_field *item;
-  while ((item= it++))
+  Item_equal_fields_iterator it(*this);
+  while (it++)
   {
-    if (field->eq(item->field))
+    if (field->eq(it.get_curr_field()))
         return 1;
   }
   return 0;
@@ -5511,109 +5501,169 @@ bool Item_equal::contains(Field *field)
 
 
 /**
-  Join members of another Item_equal object.
+  @brief
+  Join members of another Item_equal object
   
-    The function actually merges two multiple equalities.
-    After this operation the Item_equal object additionally contains
-    the field items of another item of the type Item_equal.
-    If the optional constant items are not equal the cond_false flag is
-    set to 1.  
   @param item    multiple equality whose members are to be joined
+
+  @details
+  The function actually merges two multiple equalities. After this operation
+  the Item_equal object additionally contains the field items of another item of
+  the type Item_equal.
+  If the optional constant items are not equal the cond_false flag is set to TRUE.
+
+  @notes
+  The function is called for any equality f1=f2 such that f1 and f2 are items
+  of the type Item_field or Item_direct_view_ref(Item_field), and, f1->field is
+  referred to in the list this->equal_items, while the list item->equal_items
+  contains a reference to f2->field.  
 */
 
 void Item_equal::merge(Item_equal *item)
 {
-  fields.concat(&item->fields);
-  Item *c= item->const_item;
+  Item *c= item->get_const();
+  if (c)
+    item->equal_items.pop();
+  equal_items.concat(&item->equal_items);
   if (c)
   {
     /* 
-      The flag cond_false will be set to 1 after this, if 
+      The flag cond_false will be set to TRUE after this if 
       the multiple equality already contains a constant and its 
-      value is  not equal to the value of c.
+      value is not equal to the value of c.
     */
-    add(c);
+    add_const(c);
   }
   cond_false|= item->cond_false;
 } 
 
 
 /**
-  Order field items in multiple equality according to a sorting criteria.
+  @brief
+  Order equal items of the  multiple equality according to a sorting criteria
+
+  @param compare      function to compare items from the equal_items list
+  @param arg          context extra parameter for the cmp function
 
-  The function perform ordering of the field items in the Item_equal
-  object according to the criteria determined by the cmp callback parameter.
-  If cmp(item_field1,item_field2,arg)<0 than item_field1 must be
-  placed after item_fiel2.
+  @details
+  The function performs ordering of the items from the equal_items list
+  according to the criteria determined by the cmp callback parameter.
+  If cmp(item1,item2,arg)<0 than item1 must be placed after item2.
 
-  The function sorts field items by the exchange sort algorithm.
+  @notes
+  The function sorts equal items by the bubble sort algorithm.
   The list of field items is looked through and whenever two neighboring
   members follow in a wrong order they are swapped. This is performed
   again and again until we get all members in a right order.
-
-  @param compare      function to compare field item
-  @param arg          context extra parameter for the cmp function
 */
 
 void Item_equal::sort(Item_field_cmpfunc compare, void *arg)
 {
-  exchange_sort<Item_field>(&fields, compare, arg);
+  bubble_sort<Item>(&equal_items, compare, arg);
 }
 
 
 /**
-  Check appearance of new constant items in the multiple equality object.
+  @brief
+  Check appearance of new constant items in the multiple equality object
 
-  The function checks appearance of new constant items among
-  the members of multiple equalities. Each new constant item is
-  compared with the designated constant item if there is any in the
-  multiple equality. If there is none the first new constant item
-  becomes designated.
+  @details
+  The function checks appearance of new constant items among the members
+  of the equal_items list. Each new constant item is compared with
+  the constant item from the list if there is any. If there is none the first
+  new constant item is placed at the very beginning of the list and
+  with_const is set to TRUE. If it happens that the compared constant items
+  are unequal then the flag cond_false is set to TRUE.
+
+  @notes 
+  Currently this function is called only after substitution of constant tables.
 */
 
 void Item_equal::update_const()
 {
-  List_iterator<Item_field> it(fields);
+  List_iterator<Item> it(equal_items);
+  if (with_const)
+    it++;
   Item *item;
   while ((item= it++))
   {
-    if (item->const_item())
+    if (item->const_item() && !item->is_expensive())
     {
-      it.remove();
-      add(item);
-    }
+      if (item == equal_items.head())
+        with_const= TRUE;
+      else
+      {
+        it.remove();
+        add_const(item);
+      }
+    } 
   }
 }
 
+
+/**
+  @brief
+  Fix fields in a completely built multiple equality
+
+  @param  thd     currently not used thread handle 
+  @param  ref     not used
+
+  @details
+  This function is called once the multiple equality has been built out of 
+  the WHERE/ON condition and no new members are expected to be added to the
+  equal_items list anymore.
+  As any implementation of the virtual fix_fields method the function
+  calculates the cached values of not_null_tables_cache, used_tables_cache,
+  const_item_cache and calls fix_length_and_dec().
+  Additionally the function sets a reference to the Item_equal object in
+  the non-constant items of the equal_items list unless such a reference has
+  been already set.
+
+  @notes 
+  Currently this function is called only in the function
+  build_equal_items_for_cond.
+  
+  @retval
+  FALSE   always
+*/
+
 bool Item_equal::fix_fields(THD *thd, Item **ref)
-{
-  List_iterator_fast<Item_field> li(fields);
+{ 
+  DBUG_ASSERT(fixed == 0);
+  Item_equal_fields_iterator it(*this);
   Item *item;
   not_null_tables_cache= used_tables_cache= 0;
   const_item_cache= 0;
-  while ((item= li++))
+  while ((item= it++))
   {
     table_map tmp_table_map;
     used_tables_cache|= item->used_tables();
     tmp_table_map= item->not_null_tables();
     not_null_tables_cache|= tmp_table_map;
     if (item->maybe_null)
-      maybe_null=1;
+      maybe_null= 1;
+    if (!item->get_item_equal())
+      item->set_item_equal(this);
   }
   fix_length_and_dec();
   fixed= 1;
-  return 0;
+  return FALSE;
 }
 
+
+/**
+  Update the value of the used table attribute and other attributes
+ */
+
 void Item_equal::update_used_tables()
 {
-  List_iterator_fast<Item_field> li(fields);
-  Item *item;
   not_null_tables_cache= used_tables_cache= 0;
   if ((const_item_cache= cond_false))
     return;
+  Item_equal_fields_iterator it(*this);
+  Item *item;
   const_item_cache= 1;
-  while ((item=li++))
+  while ((item= it++))
   {
     item->update_used_tables();
     used_tables_cache|= item->used_tables();
@@ -5621,39 +5671,66 @@ void Item_equal::update_used_tables()
   }
 }
 
+
+
+/**
+  @brief
+  Evaluate multiple equality
+
+  @details
+  The function evaluate multiple equality to a boolean value.
+  The function ignores non-constant items from the equal_items list.
+  The function returns 1 if all constant items from the list are equal. 
+  It returns 0 if there are unequal constant items in the list or 
+  one of the constant items is evaluated to NULL. 
+  
+  @notes 
+  Currently this function can be called only at the optimization
+  stage after the constant table substitution, since all Item_equals
+  are eliminated before the execution stage.
+  
+  @retval
+     0     multiple equality is always FALSE or NULL
+     1     otherwise
+*/
+
 longlong Item_equal::val_int()
 {
-  Item_field *item_field;
   if (cond_false)
     return 0;
-  List_iterator_fast<Item_field> it(fields);
-  Item *item= const_item ? const_item : it++;
-  if ((null_value= item->is_null()))
-    return 0;
+  Item *item= get_const();
+  Item_equal_fields_iterator it(*this);
+  if (!item)
+    item= it++;
   eval_item->store_value(item);
-  while ((item_field= it++))
+  if ((null_value= item->null_value))
+    return 0;
+  while ((item= it++))
   {
+    Field *field= it.get_curr_field();
     /* Skip fields of non-const tables. They haven't been read yet */
-    if (item_field->field->table->const_table)
+    if (field->table->const_table)
     {
-      if ((null_value= item_field->is_null()) || eval_item->cmp(item_field))
+      if (eval_item->cmp(item) || (null_value= item->null_value))
         return 0;
     }
   }
   return 1;
 }
 
+
 void Item_equal::fix_length_and_dec()
 {
-  Item *item= get_first();
-  eval_item= cmp_item::get_comparator(item->result_type(),
+  Item *item= get_first(NO_PARTICULAR_TAB, NULL);
+  eval_item= cmp_item::get_comparator(item->cmp_type(), item,
                                       item->collation.collation);
 }
 
+
 bool Item_equal::walk(Item_processor processor, bool walk_subquery, uchar *arg)
 {
-  List_iterator_fast<Item_field> it(fields);
   Item *item;
+  Item_equal_fields_iterator it(*this);
   while ((item= it++))
   {
     if (item->walk(processor, walk_subquery, arg))
@@ -5662,12 +5739,13 @@ bool Item_equal::walk(Item_processor processor, bool walk_subquery, uchar *arg)
   return Item_func::walk(processor, walk_subquery, arg);
 }
 
+
 Item *Item_equal::transform(Item_transformer transformer, uchar *arg)
 {
   DBUG_ASSERT(!current_thd->is_stmt_prepare());
 
-  List_iterator<Item_field> it(fields);
   Item *item;
+  Item_equal_fields_iterator it(*this);
   while ((item= it++))
   {
     Item *new_item= item->transform(transformer, arg);
@@ -5686,19 +5764,20 @@ Item *Item_equal::transform(Item_transformer transformer, uchar *arg)
   return Item_func::transform(transformer, arg);
 }
 
+
 void Item_equal::print(String *str, enum_query_type query_type)
 {
+  if (cond_false)
+  {
+    str->append('0');
+    return;
+  }
   str->append(func_name());
   str->append('(');
-  List_iterator_fast<Item_field> it(fields);
+  List_iterator_fast<Item> it(equal_items);
   Item *item;
-  if (const_item)
-    const_item->print(str, query_type);
-  else
-  {
-    item= it++;
-    item->print(str, query_type);
-  }
+  item= it++;
+  item->print(str, query_type);
   while ((item= it++))
   {
     str->append(',');
@@ -5708,3 +5787,153 @@ void Item_equal::print(String *str, enum_query_type query_type)
   str->append(')');
 }
 
+
+CHARSET_INFO *Item_equal::compare_collation()
+{ 
+  Item_equal_fields_iterator it(*this);
+  Item *item= it++;
+  return item->collation.collation;
+}
+
+
+/*
+  @brief Get the first equal field of multiple equality.
+  @param[in] field   the field to get equal field to
+
+  @details Get the first field of multiple equality that is equal to the
+  given field. In order to make semi-join materialization strategy work
+  correctly we can't propagate equal fields from upper select to a
+  materialized semi-join.
+  Thus the fields is returned according to following rules:
+
+  1) If the given field belongs to a semi-join then the first field in
+     multiple equality which belong to the same semi-join is returned.
+     Otherwise NULL is returned.
+  2) If the given field doesn't belong to a semi-join then
+     the first field in the multiple equality that doesn't belong to any
+     semi-join is returned.
+     If all fields in the equality are belong to semi-join(s) then NULL
+     is returned.
+  3) If no field is given then the first field in the multiple equality
+     is returned without regarding whether it belongs to a semi-join or not.
+
+  @retval Found first field in the multiple equality.
+  @retval 0 if no field found.
+*/
+
+Item* Item_equal::get_first(JOIN_TAB *context, Item *field_item)
+{
+  Item_equal_fields_iterator it(*this);
+  Item *item;
+  if (!field_item)
+    return (it++);
+  Field *field= ((Item_field *) (field_item->real_item()))->field;
+
+  /*
+    Of all equal fields, return the first one we can use. Normally, this is the
+    field which belongs to the table that is the first in the join order.
+
+    There is one exception to this: When semi-join materialization strategy is
+    used, and the given field belongs to a table within the semi-join nest, we
+    must pick the first field in the semi-join nest.
+
+    Example: suppose we have a join order:
+
+       ot1 ot2  SJ-Mat(it1  it2  it3)  ot3
+
+    and equality ot2.col = it1.col = it2.col
+    If we're looking for best substitute for 'it2.col', we should pick it1.col
+    and not ot2.col.
+    
+    eliminate_item_equal() also has code that deals with equality substitution
+    in presense of SJM nests.
+  */
+
+  TABLE_LIST *emb_nest;
+  if (context != NO_PARTICULAR_TAB)
+    emb_nest= context->emb_sj_nest;
+  else
+    emb_nest= field->table->pos_in_table_list->embedding;
+
+  if (emb_nest && emb_nest->sj_mat_info && emb_nest->sj_mat_info->is_used)
+  {
+    /*
+      It's a field from an materialized semi-join. We can substitute it for
+       - a constant item 
+       - a field from the same semi-join
+       Find the first of such items:
+    */
+    while ((item= it++))
+    {
+      if (item->const_item() || 
+          it.get_curr_field()->table->pos_in_table_list->embedding == emb_nest)
+      {
+        /*
+          If we found given field then return NULL to avoid unnecessary
+          substitution.
+        */
+        return (item != field_item) ? item : NULL;
+      }
+    }
+  }
+  else
+  {
+    /*
+      The field is not in SJ-Materialization nest. We must return the first
+      field in the join order. The field may be inside a semi-join nest, i.e 
+      a join order may look like this:
+
+          SJ-Mat(it1  it2)  ot1  ot2
+
+      where we're looking what to substitute ot2.col for. In this case we must 
+      still return it1.col, here's a proof why:
+
+      First let's note that either it1.col or it2.col participates in 
+      subquery's IN-equality. It can't be otherwise, because materialization is
+      only applicable to uncorrelated subqueries, so the only way we could
+      infer "it1.col=ot1.col" is from the IN-equality. Ok, so IN-eqality has 
+      it1.col or it2.col on its inner side. it1.col is first such item in the
+      join order, so it's not possible for SJ-Mat to be
+      SJ-Materialization-lookup, it is SJ-Materialization-Scan. The scan part
+      of this strategy will unpack value of it1.col=it2.col into it1.col
+      (that's the first equal item inside the subquery), and we'll be able to
+      get it from there. qed.
+    */
+
+    return equal_items.head();
+  }
+  // Shouldn't get here.
+  DBUG_ASSERT(0);
+  return NULL;
+}
+
+
+longlong Item_func_dyncol_exists::val_int()
+{
+  char buff[STRING_BUFFER_USUAL_SIZE];
+  String tmp(buff, sizeof(buff), &my_charset_bin);
+  DYNAMIC_COLUMN col;
+  String *str;
+  ulonglong num;
+  enum enum_dyncol_func_result rc;
+
+  num= args[1]->val_int();
+  str= args[0]->val_str(&tmp);
+  if (args[0]->null_value || args[1]->null_value || num > UINT_MAX16)
+    goto null;
+  col.length= str->length();
+  /* We do not change the string, so could do this trick */
+  col.str= (char *)str->ptr();
+  rc= dynamic_column_exists(&col, (uint) num);
+  if (rc < 0)
+  {
+    dynamic_column_error_message(rc);
+    goto null;
+  }
+  null_value= FALSE;
+  return rc == ER_DYNCOL_YES;
+
+null:
+  null_value= TRUE;
+  return 0;
+}
diff --git a/sql/item_cmpfunc.h b/sql/item_cmpfunc.h
index 0278ad201af..ad4f889f4df 100644
--- a/sql/item_cmpfunc.h
+++ b/sql/item_cmpfunc.h
@@ -1,4 +1,5 @@
 /* Copyright (c) 2000, 2010, Oracle and/or its affiliates.
+   Copyright (c) 2009-2011, Monty Program Ab
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -26,39 +27,29 @@ class Arg_comparator;
 
 typedef int (Arg_comparator::*arg_cmp_func)();
 
-typedef int (*Item_field_cmpfunc)(Item_field *f1, Item_field *f2, void *arg); 
+typedef int (*Item_field_cmpfunc)(Item *f1, Item *f2, void *arg); 
 
 class Arg_comparator: public Sql_alloc
 {
   Item **a, **b;
   arg_cmp_func func;
   Item_result_field *owner;
+  bool set_null;                   // TRUE <=> set owner->null_value
   Arg_comparator *comparators;   // used only for compare_row()
   double precision;
   /* Fields used in DATE/DATETIME comparison. */
   THD *thd;
-  enum_field_types a_type, b_type; // Types of a and b items
   Item *a_cache, *b_cache;         // Cached values of a and b items
-  bool is_nulls_eq;                // TRUE <=> compare for the EQUAL_FUNC
-  bool set_null;                   // TRUE <=> set owner->null_value
                                    //   when one of arguments is NULL.
-  enum enum_date_cmp_type { CMP_DATE_DFLT= 0, CMP_DATE_WITH_DATE,
-                            CMP_DATE_WITH_STR, CMP_STR_WITH_DATE };
-  longlong (*get_value_a_func)(THD *thd, Item ***item_arg, Item **cache_arg,
-                               Item *warn_item, bool *is_null);
-  longlong (*get_value_b_func)(THD *thd, Item ***item_arg, Item **cache_arg,
-                               Item *warn_item, bool *is_null);
-  bool try_year_cmp_func(Item_result type);
 public:
   DTCollation cmp_collation;
   /* Allow owner function to use string buffers. */
   String value1, value2;
 
-  Arg_comparator(): comparators(0), thd(0), a_cache(0), b_cache(0), set_null(TRUE),
-    get_value_a_func(0), get_value_b_func(0) {};
-  Arg_comparator(Item **a1, Item **a2): a(a1), b(a2), comparators(0), thd(0),
-    a_cache(0), b_cache(0), set_null(TRUE),
-    get_value_a_func(0), get_value_b_func(0) {};
+  Arg_comparator():  set_null(TRUE), comparators(0), thd(0),
+    a_cache(0), b_cache(0) {};
+  Arg_comparator(Item **a1, Item **a2): a(a1), b(a2),  set_null(TRUE),
+    comparators(0), thd(0), a_cache(0), b_cache(0) {};
 
   int set_compare_func(Item_result_field *owner, Item_result type);
   inline int set_compare_func(Item_result_field *owner_arg)
@@ -75,8 +66,8 @@ public:
   {
     set_null= set_null_arg;
     return set_cmp_func(owner_arg, a1, a2,
-                        item_cmp_type((*a1)->result_type(),
-                                      (*a2)->result_type()));
+                        item_cmp_type((*a1)->cmp_type(),
+                                      (*a2)->cmp_type()));
   }
   inline int compare() { return (this->*func)(); }
 
@@ -99,14 +90,12 @@ public:
   int compare_real_fixed();
   int compare_e_real_fixed();
   int compare_datetime();        // compare args[0] & args[1] as DATETIMEs
-
-  static enum enum_date_cmp_type can_compare_as_dates(Item *a, Item *b,
-                                                      ulonglong *const_val_arg);
+  int compare_e_datetime();
 
   Item** cache_converted_constant(THD *thd, Item **value, Item **cache,
                                   Item_result type);
   void set_datetime_cmp_func(Item_result_field *owner_arg, Item **a1, Item **b1);
-  static arg_cmp_func comparator_matrix [5][2];
+  static arg_cmp_func comparator_matrix [6][2];
   inline bool is_owner_equal_func()
   {
     return (owner->type() == Item::FUNC_ITEM &&
@@ -242,6 +231,7 @@ class Item_in_optimizer: public Item_bool_func
 {
 protected:
   Item_cache *cache;
+  Item *expr_cache;
   bool save_cache;
   /* 
     Stores the value of "NULL IN (SELECT ...)" for uncorrelated subqueries:
@@ -252,7 +242,7 @@ protected:
   int result_for_null_param;
 public:
   Item_in_optimizer(Item *a, Item_in_subselect *b):
-    Item_bool_func(a, my_reinterpret_cast(Item *)(b)), cache(0),
+    Item_bool_func(a, my_reinterpret_cast(Item *)(b)), cache(0), expr_cache(0),
     save_cache(0), result_for_null_param(UNKNOWN)
   {}
   bool fix_fields(THD *, Item **);
@@ -263,6 +253,16 @@ public:
   const char *func_name() const { return "<in_optimizer>"; }
   Item_cache **get_cache() { return &cache; }
   void keep_top_level_cache();
+  Item *transform(Item_transformer transformer, uchar *arg);
+  virtual Item *expr_cache_insert_transformer(uchar *thd_arg);
+  bool is_expensive_processor(uchar *arg);
+  bool is_expensive();
+  void set_join_tab_idx(uint join_tab_idx_arg)
+  { args[1]->set_join_tab_idx(join_tab_idx_arg); }
+  virtual void get_cache_parameters(List<Item> &parameters);
+  bool is_top_level_item();
+  bool eval_not_null_tables(uchar *opt_arg);
+  void fix_after_pullout(st_select_lex *new_parent, Item **ref);
 };
 
 class Comp_creator
@@ -383,6 +383,7 @@ public:
   CHARSET_INFO *compare_collation() { return cmp.cmp_collation.collation; }
   uint decimal_precision() const { return 1; }
   void top_level_item() { abort_on_null= TRUE; }
+  Arg_comparator *get_comparator() { return &cmp; }
   void cleanup()
   {
     Item_int_func::cleanup();
@@ -401,7 +402,30 @@ public:
   }
   Item *neg_transformer(THD *thd);
   virtual Item *negated_item();
-  bool subst_argument_checker(uchar **arg) { return TRUE; }
+  bool subst_argument_checker(uchar **arg)
+  {
+    return (*arg != NULL);     
+  }
+};
+
+/**
+  XOR inherits from Item_bool_func2 because it is not optimized yet.
+  Later, when XOR is optimized, it needs to inherit from
+  Item_cond instead. See WL#5800. 
+*/
+class Item_func_xor :public Item_bool_func2
+{
+public:
+  Item_func_xor(Item *i1, Item *i2) :Item_bool_func2(i1, i2) {}
+  enum Functype functype() const { return XOR_FUNC; }
+  const char *func_name() const { return "xor"; }
+  longlong val_int();
+  void top_level_item() {}
+  Item *neg_transformer(THD *thd);
+  bool subst_argument_checker(uchar **arg)
+  {
+    return (*arg != NULL);     
+  }
 };
 
 class Item_func_not :public Item_bool_func
@@ -473,7 +497,7 @@ public:
      show(0)
     {}
   virtual void top_level_item() { abort_on_null= 1; }
-  bool top_level() { return abort_on_null; }
+  bool is_top_level_item() { return abort_on_null; }
   longlong val_int();
   enum Functype functype() const { return NOT_ALL_FUNC; }
   const char *func_name() const { return "<not>"; }
@@ -499,13 +523,23 @@ public:
 class Item_func_eq :public Item_bool_rowready_func2
 {
 public:
-  Item_func_eq(Item *a,Item *b) :Item_bool_rowready_func2(a,b) {}
+  Item_func_eq(Item *a,Item *b) :
+    Item_bool_rowready_func2(a,b), in_equality_no(UINT_MAX)
+  {}
   longlong val_int();
   enum Functype functype() const { return EQ_FUNC; }
   enum Functype rev_functype() const { return EQ_FUNC; }
   cond_result eq_cmp_result() const { return COND_TRUE; }
   const char *func_name() const { return "="; }
   Item *negated_item();
+  /* 
+    - If this equality is created from the subquery's IN-equality:
+      number of the item it was created from, e.g. for
+       (a,b) IN (SELECT c,d ...)  a=c will have in_equality_no=0, 
+       and b=d will have in_equality_no=1.
+    - Otherwise, UINT_MAX
+  */
+  uint in_equality_no;
 };
 
 class Item_func_equal :public Item_bool_rowready_func2
@@ -627,9 +661,7 @@ public:
   Item_result cmp_type;
   String value0,value1,value2;
   /* TRUE <=> arguments will be compared as dates. */
-  bool compare_as_dates;
-  /* Comparators used for DATE/DATETIME comparison. */
-  Arg_comparator ge_cmp, le_cmp;
+  Item *compare_as_dates;
   Item_func_between(Item *a, Item *b, Item *c)
     :Item_func_opt_neg(a, b, c), compare_as_dates(FALSE) {}
   longlong val_int();
@@ -642,6 +674,8 @@ public:
   bool is_bool_func() { return 1; }
   CHARSET_INFO *compare_collation() { return cmp_collation.collation; }
   uint decimal_precision() const { return 1; }
+  bool eval_not_null_tables(uchar *opt_arg);
+  void fix_after_pullout(st_select_lex *new_parent, Item **ref);
 };
 
 
@@ -702,6 +736,7 @@ public:
   const char *func_name() const { return "coalesce"; }
   table_map not_null_tables() const { return 0; }
   enum_field_types field_type() const { return cached_field_type; }
+  bool get_date(MYSQL_TIME *ltime,uint fuzzydate);
 };
 
 
@@ -741,6 +776,8 @@ public:
   void fix_length_and_dec();
   uint decimal_precision() const;
   const char *func_name() const { return "if"; }
+  bool eval_not_null_tables(uchar *opt_arg);
+  void fix_after_pullout(st_select_lex *new_parent, Item **ref);
 };
 
 
@@ -907,6 +944,16 @@ public:
      lval_cache(0) {};
   void set(uint pos,Item *item);
   uchar *get_value(Item *item);
+  Item* create_item()
+  { 
+    return new Item_datetime();
+  }
+  void value_to_item(uint pos, Item *item)
+  {
+    packed_longlong *val= reinterpret_cast<packed_longlong*>(base)+pos;
+    Item_datetime *dt= reinterpret_cast<Item_datetime*>(item);
+    dt->set(val->val);
+  }
   friend int cmp_longlong(void *cmp_arg, packed_longlong *a,packed_longlong *b);
 };
 
@@ -966,7 +1013,8 @@ public:
   virtual int cmp(Item *item)= 0;
   // for optimized IN with row
   virtual int compare(cmp_item *item)= 0;
-  static cmp_item* get_comparator(Item_result type, CHARSET_INFO *cs);
+  static cmp_item* get_comparator(Item_result type, Item * warn_item,
+                                  CHARSET_INFO *cs);
   virtual cmp_item *make_same()= 0;
   virtual void store_value_by_template(cmp_item *tmpl, Item *item)
   {
@@ -1162,7 +1210,7 @@ class Item_func_case :public Item_func
   Item_result cmp_type;
   DTCollation cmp_collation;
   enum_field_types cached_field_type;
-  cmp_item *cmp_items[5]; /* For all result types */
+  cmp_item *cmp_items[6]; /* For all result types */
   cmp_item *case_item;
 public:
   Item_func_case(List<Item> &list, Item *first_expr_arg, Item *else_expr_arg)
@@ -1252,7 +1300,7 @@ public:
     Item_int_func::cleanup();
     delete array;
     array= 0;
-    for (i= 0; i <= (uint)DECIMAL_RESULT + 1; i++)
+    for (i= 0; i <= (uint)TIME_RESULT; i++)
     {
       delete cmp_items[i];
       cmp_items[i]= 0;
@@ -1267,6 +1315,8 @@ public:
   bool nulls_in_row();
   bool is_bool_func() { return 1; }
   CHARSET_INFO *compare_collation() { return cmp_collation.collation; }
+  bool eval_not_null_tables(uchar *opt_arg);
+  void fix_after_pullout(st_select_lex *new_parent, Item **ref);
 };
 
 class cmp_item_row :public cmp_item
@@ -1302,8 +1352,6 @@ public:
 
 class Item_func_isnull :public Item_bool_func
 {
-protected:
-  longlong cached_value;
 public:
   Item_func_isnull(Item *a) :Item_bool_func(a) {}
   longlong val_int();
@@ -1321,17 +1369,12 @@ public:
     {
       used_tables_cache= 0;			/* is always false */
       const_item_cache= 1;
-      cached_value= (longlong) 0;
     }
     else
     {
       args[0]->update_used_tables();
-      if ((const_item_cache= !(used_tables_cache= args[0]->used_tables()) &&
-          !with_subselect))
-      {
-	/* Remember if the value is always NULL or never NULL */
-	cached_value= (longlong) args[0]->is_null();
-      }
+      used_tables_cache= args[0]->used_tables();
+      const_item_cache= args[0]->const_item();
     }
   }
   table_map not_null_tables() const { return 0; }
@@ -1513,6 +1556,7 @@ public:
     list.prepand(nlist);
   }
   bool fix_fields(THD *, Item **ref);
+  void fix_after_pullout(st_select_lex *new_parent, Item **ref);
 
   enum Type type() const { return COND_ITEM; }
   List<Item>* argument_list() { return &list; }
@@ -1532,6 +1576,7 @@ public:
   bool subst_argument_checker(uchar **arg) { return TRUE; }
   Item *compile(Item_analyzer analyzer, uchar **arg_p,
                 Item_transformer transformer, uchar *arg_t);
+  bool eval_not_null_tables(uchar *opt_arg);
 };
 
 
@@ -1612,28 +1657,64 @@ public:
 
 class Item_equal: public Item_bool_func
 {
-  List<Item_field> fields; /* list of equal field items                    */
-  Item *const_item;        /* optional constant item equal to fields items */
+  /*
+    The list of equal items. Currently the list can contain:
+     - Item_fields items for references to table columns
+     - Item_direct_view_ref items for references to view columns
+     - one const item
+
+    If the list contains a constant item this item is always first in the list.
+    The list contains at least two elements.
+    Currently all Item_fields/Item_direct_view_ref items in the list should
+    refer to table columns with equavalent type definitions. In particular
+    if these are string columns they should have the same charset/collation.
+
+    Use objects of the companion class Item_equal_fields_iterator to iterate
+    over all items from the list of the Item_field/Item_direct_view_ref classes.
+  */ 
+  List<Item> equal_items; 
+  /* 
+     TRUE <-> one of the items is a const item.
+     Such item is always first in in the equal_items list
+  */
+  bool with_const;        
+  /* 
+    The field eval_item is used when this item is evaluated
+    with the method val_int()
+  */  
   cmp_item *eval_item;
-  Arg_comparator cmp;
+  /*
+    This initially is set to FALSE. It becomes TRUE when this item is evaluated
+    as being always false. If the flag is TRUE the contents of the list 
+    the equal_items should be ignored.
+  */
   bool cond_false;
+  /* 
+    compare_as_dates=TRUE <-> constants equal to fields from equal_items
+    must be compared as datetimes and not as strings.
+    compare_as_dates can be TRUE only if with_const=TRUE 
+  */
   bool compare_as_dates;
+  /* 
+    The comparator used to compare constants equal to fields from equal_items
+    as datetimes. The comparator is used only if compare_as_dates=TRUE
+  */
+  Arg_comparator cmp;
 public:
   inline Item_equal()
-    : Item_bool_func(), const_item(0), eval_item(0), cond_false(0)
+    : Item_bool_func(), with_const(FALSE), eval_item(0), cond_false(0)
   { const_item_cache=0 ;}
-  Item_equal(Item_field *f1, Item_field *f2);
-  Item_equal(Item *c, Item_field *f);
+  Item_equal(Item *f1, Item *f2, bool with_const_item);
   Item_equal(Item_equal *item_equal);
-  inline Item* get_const() { return const_item; }
-  void compare_const(Item *c);
-  void add(Item *c, Item_field *f);
-  void add(Item *c);
-  void add(Item_field *f);
-  uint members();
+  /* Currently the const item is always the first in the list of equal items */
+  inline Item* get_const() { return with_const ? equal_items.head() : NULL; }
+  void add_const(Item *c, Item *f = NULL);
+  /** Add a non-constant item to the multiple equality */
+  void add(Item *f) { equal_items.push_back(f); }
   bool contains(Field *field);
-  Item_field* get_first() { return fields.head(); }
-  uint n_fields() { return fields.elements; }
+  Item* get_first(struct st_join_table *context, Item *field);
+  /** Get number of field items / references to field items in this object */   
+  uint n_field_items() { return equal_items.elements-test(with_const); }
   void merge(Item_equal *item);
   void update_const();
   enum Functype functype() const { return MULT_EQUAL_FUNC; }
@@ -1641,15 +1722,18 @@ public:
   const char *func_name() const { return "multiple equal"; }
   optimize_type select_optimize() const { return OPTIMIZE_EQUAL; }
   void sort(Item_field_cmpfunc compare, void *arg);
-  friend class Item_equal_iterator;
   void fix_length_and_dec();
   bool fix_fields(THD *thd, Item **ref);
   void update_used_tables();
   bool walk(Item_processor processor, bool walk_subquery, uchar *arg);
   Item *transform(Item_transformer transformer, uchar *arg);
   virtual void print(String *str, enum_query_type query_type);
-  CHARSET_INFO *compare_collation() 
-  { return fields.head()->collation.collation; }
+  CHARSET_INFO *compare_collation();
+  friend class Item_equal_fields_iterator;
+  friend Item *eliminate_item_equal(COND *cond, COND_EQUAL *upper_levels,
+                           Item_equal *item_equal);
+  friend bool setup_sj_materialization_part1(struct st_join_table *tab);
+  friend bool setup_sj_materialization_part2(struct st_join_table *tab);
 }; 
 
 class COND_EQUAL: public Sql_alloc
@@ -1667,23 +1751,52 @@ public:
 };
 
 
-class Item_equal_iterator : public List_iterator_fast<Item_field>
+/* 
+  The class Item_equal_fields_iterator is used to iterate over references
+  to table/view columns from a list of equal items.
+*/ 
+
+class Item_equal_fields_iterator : public List_iterator_fast<Item>
 {
+  Item_equal *item_equal;
+  Item *curr_item;
 public:
-  inline Item_equal_iterator(Item_equal &item_equal) 
-    :List_iterator_fast<Item_field> (item_equal.fields)
-  {}
-  inline Item_field* operator++(int)
-  { 
-    Item_field *item= (*(List_iterator_fast<Item_field> *) this)++;
-    return  item;
+  Item_equal_fields_iterator(Item_equal &item_eq) 
+    :List_iterator_fast<Item> (item_eq.equal_items)
+  {
+    curr_item= NULL;
+    item_equal= &item_eq;
+    if (item_eq.with_const)
+    {
+      List_iterator_fast<Item> *list_it= this;
+      curr_item=  (*list_it)++;
+    }
   }
-  inline void rewind(void) 
+  Item* operator++(int)
   { 
-    List_iterator_fast<Item_field>::rewind();
+    List_iterator_fast<Item> *list_it= this;
+    curr_item= (*list_it)++;
+    return curr_item;
   }
+  Item ** ref()
+  {
+    return List_iterator_fast<Item>::ref();
+  }
+  void rewind(void) 
+  { 
+    List_iterator_fast<Item> *list_it= this;
+    list_it->rewind();
+    if (item_equal->with_const)
+      curr_item= (*list_it)++;
+  }  
+  Field *get_curr_field()
+  {
+    Item_field *item= (Item_field *) (curr_item->real_item());
+     return item->field;
+  }  
 };
 
+
 class Item_cond_and :public Item_cond
 {
 public:
@@ -1707,6 +1820,7 @@ public:
     return item;
   }
   Item *neg_transformer(THD *thd);
+  void mark_as_condition_AND_part(TABLE_LIST *embedding);
 };
 
 inline bool is_cond_and(Item *item)
@@ -1748,25 +1862,6 @@ inline bool is_cond_or(Item *item)
   return (cond_item->functype() == Item_func::COND_OR_FUNC);
 }
 
-/*
-  XOR is Item_cond, not an Item_int_func because we could like to
-  optimize (a XOR b) later on. It's low prio, though
-*/
-
-class Item_cond_xor :public Item_cond
-{
-public:
-  Item_cond_xor() :Item_cond() {}
-  Item_cond_xor(Item *i1,Item *i2) :Item_cond(i1,i2) {}
-  enum Functype functype() const { return COND_XOR_FUNC; }
-  /* TODO: remove the next line when implementing XOR optimization */
-  enum Type type() const { return FUNC_ITEM; }
-  longlong val_int();
-  const char *func_name() const { return "xor"; }
-  void top_level_item() {}
-};
-
-
 /* Some useful inline functions */
 
 inline Item *and_conds(Item *a, Item *b)
@@ -1780,3 +1875,11 @@ Item *and_expressions(Item *a, Item *b, Item **org_item);
 
 bool get_mysql_time_from_str(THD *thd, String *str, timestamp_type warn_type, 
                              const char *warn_name, MYSQL_TIME *l_time);
+
+class Item_func_dyncol_exists :public Item_bool_func
+{
+public:
+  Item_func_dyncol_exists(Item *str, Item *num) :Item_bool_func(str, num) {}
+  longlong val_int();
+  const char *func_name() const { return "column_exists"; }
+};
diff --git a/sql/item_create.cc b/sql/item_create.cc
index a5dc3eeb5ad..70b9b1754f3 100644
--- a/sql/item_create.cc
+++ b/sql/item_create.cc
@@ -30,6 +30,69 @@
 
 /*
 =============================================================================
+  HELPER FUNCTIONS
+=============================================================================
+*/
+
+static const char* item_name(Item *a, String *str)
+{
+  if (a->name)
+    return a->name;
+  str->length(0);
+  a->print(str, QT_ORDINARY);
+  return str->c_ptr_safe();
+}
+
+
+static void wrong_precision_error(uint errcode, Item *a,
+                                  ulonglong number, ulong maximum)
+{
+  char buff[1024];
+  String buf(buff, sizeof(buff), system_charset_info);
+
+  my_error(errcode, MYF(0), (uint) min(number, UINT_MAX32),
+           item_name(a, &buf), maximum);
+}
+
+
+/**
+  Get precision and scale for a declaration
+ 
+  return
+    0  ok
+    1  error
+*/
+
+bool get_length_and_scale(ulonglong length, ulonglong decimals,
+                          ulong *out_length, uint *out_decimals,
+                          uint max_precision, uint max_scale,
+                          Item *a)
+{
+  if (length > (ulonglong) max_precision)
+  {
+    wrong_precision_error(ER_TOO_BIG_PRECISION, a, length, max_precision);
+    return 1;
+  }
+  if (decimals > (ulonglong) max_scale)
+  {
+    wrong_precision_error(ER_TOO_BIG_SCALE, a, decimals, max_scale);
+    return 1;
+  }
+
+  *out_length=  (ulong) length;
+  *out_decimals=  (uint) decimals;
+  my_decimal_trim(out_length, out_decimals);
+  
+  if (*out_length < *out_decimals)
+  {
+    my_error(ER_M_BIGGER_THAN_D, MYF(0), "");
+    return 1;
+  }
+  return 0;
+}
+
+/*
+=============================================================================
   LOCAL DECLARATIONS
 =============================================================================
 */
@@ -512,6 +575,19 @@ protected:
 
 
 #ifdef HAVE_SPATIAL
+class Create_func_mbr_contains : public Create_func_arg2
+{
+  public:
+    virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2);
+
+    static Create_func_mbr_contains s_singleton;
+
+  protected:
+    Create_func_mbr_contains() {}
+    virtual ~Create_func_mbr_contains() {}
+};
+
+
 class Create_func_contains : public Create_func_arg2
 {
 public:
@@ -752,6 +828,19 @@ protected:
 
 
 #ifdef HAVE_SPATIAL
+class Create_func_mbr_disjoint : public Create_func_arg2
+{
+  public:
+    virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2);
+
+    static Create_func_mbr_disjoint s_singleton;
+
+  protected:
+    Create_func_mbr_disjoint() {}
+    virtual ~Create_func_mbr_disjoint() {}
+};
+
+
 class Create_func_disjoint : public Create_func_arg2
 {
 public:
@@ -763,6 +852,19 @@ protected:
   Create_func_disjoint() {}
   virtual ~Create_func_disjoint() {}
 };
+
+
+class Create_func_distance : public Create_func_arg2
+{
+  public:
+    virtual Item* create_2_arg(THD *thd, Item *arg1, Item *arg2);
+
+    static Create_func_distance s_singleton;
+
+  protected:
+    Create_func_distance() {}
+    virtual ~Create_func_distance() {}
+};
 #endif
 
 
@@ -836,6 +938,19 @@ protected:
 
 
 #ifdef HAVE_SPATIAL
+class Create_func_mbr_equals : public Create_func_arg2
+{
+  public:
+    virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2);
+
+    static Create_func_mbr_equals s_singleton;
+
+  protected:
+    Create_func_mbr_equals() {}
+    virtual ~Create_func_mbr_equals() {}
+};
+
+
 class Create_func_equals : public Create_func_arg2
 {
 public:
@@ -1164,6 +1279,19 @@ protected:
 
 
 #ifdef HAVE_SPATIAL
+class Create_func_mbr_intersects : public Create_func_arg2
+{
+  public:
+    virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2);
+
+    static Create_func_mbr_intersects s_singleton;
+
+  protected:
+    Create_func_mbr_intersects() {}
+    virtual ~Create_func_mbr_intersects() {}
+};
+
+
 class Create_func_intersects : public Create_func_arg2
 {
 public:
@@ -1175,7 +1303,72 @@ protected:
   Create_func_intersects() {}
   virtual ~Create_func_intersects() {}
 };
-#endif
+
+
+class Create_func_intersection : public Create_func_arg2
+{
+public:
+  virtual Item* create_2_arg(THD *thd, Item *arg1, Item *arg2);
+
+  static Create_func_intersection s_singleton;
+
+protected:
+  Create_func_intersection() {}
+  virtual ~Create_func_intersection() {}
+};
+
+
+class Create_func_difference : public Create_func_arg2
+{
+public:
+  virtual Item* create_2_arg(THD *thd, Item *arg1, Item *arg2);
+
+  static Create_func_difference s_singleton;
+
+protected:
+  Create_func_difference() {}
+  virtual ~Create_func_difference() {}
+};
+
+
+class Create_func_union : public Create_func_arg2
+{
+public:
+  virtual Item* create_2_arg(THD *thd, Item *arg1, Item *arg2);
+
+  static Create_func_union s_singleton;
+
+protected:
+  Create_func_union() {}
+  virtual ~Create_func_union() {}
+};
+
+
+class Create_func_symdifference : public Create_func_arg2
+{
+public:
+  virtual Item* create_2_arg(THD *thd, Item *arg1, Item *arg2);
+
+  static Create_func_symdifference s_singleton;
+
+protected:
+  Create_func_symdifference() {}
+  virtual ~Create_func_symdifference() {}
+};
+
+
+class Create_func_buffer : public Create_func_arg2
+{
+public:
+  virtual Item* create_2_arg(THD *thd, Item *arg1, Item *arg2);
+
+  static Create_func_buffer s_singleton;
+
+protected:
+  Create_func_buffer() {}
+  virtual ~Create_func_buffer() {}
+};
+#endif /*HAVE_SPATIAL*/
 
 
 class Create_func_is_free_lock : public Create_func_arg1
@@ -1607,6 +1800,19 @@ protected:
 
 
 #ifdef HAVE_SPATIAL
+class Create_func_mbr_overlaps : public Create_func_arg2
+{
+  public:
+    virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2);
+
+    static Create_func_mbr_overlaps s_singleton;
+
+  protected:
+    Create_func_mbr_overlaps() {}
+    virtual ~Create_func_mbr_overlaps() {}
+};
+
+
 class Create_func_overlaps : public Create_func_arg2
 {
 public:
@@ -2202,6 +2408,19 @@ protected:
 
 
 #ifdef HAVE_SPATIAL
+class Create_func_mbr_within : public Create_func_arg2
+{
+  public:
+    virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2);
+
+    static Create_func_mbr_within s_singleton;
+
+  protected:
+    Create_func_mbr_within() {}
+    virtual ~Create_func_mbr_within() {}
+};
+
+
 class Create_func_within : public Create_func_arg2
 {
 public:
@@ -2893,6 +3112,16 @@ Create_func_connection_id::create_builder(THD *thd)
 
 
 #ifdef HAVE_SPATIAL
+Create_func_mbr_contains Create_func_mbr_contains::s_singleton;
+
+Item*
+Create_func_mbr_contains::create_2_arg(THD *thd, Item *arg1, Item *arg2)
+{
+  return new (thd->mem_root) Item_func_spatial_mbr_rel(arg1, arg2,
+      Item_func::SP_CONTAINS_FUNC);
+}
+
+
 Create_func_contains Create_func_contains::s_singleton;
 
 Item*
@@ -3125,6 +3354,16 @@ Create_func_dimension::create_1_arg(THD *thd, Item *arg1)
 
 
 #ifdef HAVE_SPATIAL
+Create_func_mbr_disjoint Create_func_mbr_disjoint::s_singleton;
+
+Item*
+Create_func_mbr_disjoint::create_2_arg(THD *thd, Item *arg1, Item *arg2)
+{
+  return new (thd->mem_root) Item_func_spatial_mbr_rel(arg1, arg2,
+      Item_func::SP_DISJOINT_FUNC);
+}
+
+
 Create_func_disjoint Create_func_disjoint::s_singleton;
 
 Item*
@@ -3133,6 +3372,15 @@ Create_func_disjoint::create_2_arg(THD *thd, Item *arg1, Item *arg2)
   return new (thd->mem_root) Item_func_spatial_rel(arg1, arg2,
                                                    Item_func::SP_DISJOINT_FUNC);
 }
+
+
+Create_func_distance Create_func_distance::s_singleton;
+
+Item*
+Create_func_distance::create_2_arg(THD *thd, Item *arg1, Item *arg2)
+{
+  return new (thd->mem_root) Item_func_distance(arg1, arg2);
+}
 #endif
 
 
@@ -3228,6 +3476,16 @@ Create_func_envelope::create_1_arg(THD *thd, Item *arg1)
 
 
 #ifdef HAVE_SPATIAL
+Create_func_mbr_equals Create_func_mbr_equals::s_singleton;
+
+Item*
+Create_func_mbr_equals::create_2_arg(THD *thd, Item *arg1, Item *arg2)
+{
+  return new (thd->mem_root) Item_func_spatial_mbr_rel(arg1, arg2,
+      Item_func::SP_EQUALS_FUNC);
+}
+
+
 Create_func_equals Create_func_equals::s_singleton;
 
 Item*
@@ -3623,6 +3881,16 @@ Create_func_interiorringn::create_2_arg(THD *thd, Item *arg1, Item *arg2)
 
 
 #ifdef HAVE_SPATIAL
+Create_func_mbr_intersects Create_func_mbr_intersects::s_singleton;
+
+Item*
+Create_func_mbr_intersects::create_2_arg(THD *thd, Item *arg1, Item *arg2)
+{
+  return new (thd->mem_root) Item_func_spatial_mbr_rel(arg1, arg2,
+      Item_func::SP_INTERSECTS_FUNC);
+}
+
+
 Create_func_intersects Create_func_intersects::s_singleton;
 
 Item*
@@ -3631,7 +3899,56 @@ Create_func_intersects::create_2_arg(THD *thd, Item *arg1, Item *arg2)
   return new (thd->mem_root) Item_func_spatial_rel(arg1, arg2,
                                                    Item_func::SP_INTERSECTS_FUNC);
 }
-#endif
+
+
+Create_func_intersection Create_func_intersection::s_singleton;
+
+Item*
+Create_func_intersection::create_2_arg(THD *thd, Item *arg1, Item *arg2)
+{
+  return new (thd->mem_root) Item_func_spatial_operation(arg1, arg2,
+                               Gcalc_function::op_intersection);
+}
+
+
+Create_func_difference Create_func_difference::s_singleton;
+
+Item*
+Create_func_difference::create_2_arg(THD *thd, Item *arg1, Item *arg2)
+{
+  return new (thd->mem_root) Item_func_spatial_operation(arg1, arg2,
+                               Gcalc_function::op_difference);
+}
+
+
+Create_func_union Create_func_union::s_singleton;
+
+Item*
+Create_func_union::create_2_arg(THD *thd, Item *arg1, Item *arg2)
+{
+  return new (thd->mem_root) Item_func_spatial_operation(arg1, arg2,
+                               Gcalc_function::op_union);
+}
+
+
+Create_func_symdifference Create_func_symdifference::s_singleton;
+
+Item*
+Create_func_symdifference::create_2_arg(THD *thd, Item *arg1, Item *arg2)
+{
+  return new (thd->mem_root) Item_func_spatial_operation(arg1, arg2,
+                               Gcalc_function::op_symdifference);
+}
+
+
+Create_func_buffer Create_func_buffer::s_singleton;
+
+Item*
+Create_func_buffer::create_2_arg(THD *thd, Item *arg1, Item *arg2)
+{
+  return new (thd->mem_root) Item_func_buffer(arg1, arg2);
+}
+#endif /*HAVE_SPATAI*/
 
 
 Create_func_is_free_lock Create_func_is_free_lock::s_singleton;
@@ -4091,6 +4408,16 @@ Create_func_ord::create_1_arg(THD *thd, Item *arg1)
 
 
 #ifdef HAVE_SPATIAL
+Create_func_mbr_overlaps Create_func_mbr_overlaps::s_singleton;
+
+Item*
+Create_func_mbr_overlaps::create_2_arg(THD *thd, Item *arg1, Item *arg2)
+{
+  return new (thd->mem_root) Item_func_spatial_mbr_rel(arg1, arg2,
+      Item_func::SP_OVERLAPS_FUNC);
+}
+
+
 Create_func_overlaps Create_func_overlaps::s_singleton;
 
 Item*
@@ -4643,6 +4970,16 @@ Create_func_weekofyear::create_1_arg(THD *thd, Item *arg1)
 
 
 #ifdef HAVE_SPATIAL
+Create_func_mbr_within Create_func_mbr_within::s_singleton;
+
+Item*
+Create_func_mbr_within::create_2_arg(THD *thd, Item *arg1, Item *arg2)
+{
+  return new (thd->mem_root) Item_func_spatial_mbr_rel(arg1, arg2,
+      Item_func::SP_WITHIN_FUNC);
+}
+
+
 Create_func_within Create_func_within::s_singleton;
 
 Item*
@@ -4776,6 +5113,7 @@ static Native_func_registry func_array[] =
   { { C_STRING_WITH_LEN("BIN") }, BUILDER(Create_func_bin)},
   { { C_STRING_WITH_LEN("BIT_COUNT") }, BUILDER(Create_func_bit_count)},
   { { C_STRING_WITH_LEN("BIT_LENGTH") }, BUILDER(Create_func_bit_length)},
+  { { C_STRING_WITH_LEN("BUFFER") }, GEOM_BUILDER(Create_func_buffer)},
   { { C_STRING_WITH_LEN("CEIL") }, BUILDER(Create_func_ceiling)},
   { { C_STRING_WITH_LEN("CEILING") }, BUILDER(Create_func_ceiling)},
   { { C_STRING_WITH_LEN("CENTROID") }, GEOM_BUILDER(Create_func_centroid)},
@@ -4803,7 +5141,7 @@ static Native_func_registry func_array[] =
   { { C_STRING_WITH_LEN("DES_DECRYPT") }, BUILDER(Create_func_des_decrypt)},
   { { C_STRING_WITH_LEN("DES_ENCRYPT") }, BUILDER(Create_func_des_encrypt)},
   { { C_STRING_WITH_LEN("DIMENSION") }, GEOM_BUILDER(Create_func_dimension)},
-  { { C_STRING_WITH_LEN("DISJOINT") }, GEOM_BUILDER(Create_func_disjoint)},
+  { { C_STRING_WITH_LEN("DISJOINT") }, GEOM_BUILDER(Create_func_mbr_disjoint)},
   { { C_STRING_WITH_LEN("ELT") }, BUILDER(Create_func_elt)},
   { { C_STRING_WITH_LEN("ENCODE") }, BUILDER(Create_func_encode)},
   { { C_STRING_WITH_LEN("ENCRYPT") }, BUILDER(Create_func_encrypt)},
@@ -4840,7 +5178,7 @@ static Native_func_registry func_array[] =
   { { C_STRING_WITH_LEN("INET_NTOA") }, BUILDER(Create_func_inet_ntoa)},
   { { C_STRING_WITH_LEN("INSTR") }, BUILDER(Create_func_instr)},
   { { C_STRING_WITH_LEN("INTERIORRINGN") }, GEOM_BUILDER(Create_func_interiorringn)},
-  { { C_STRING_WITH_LEN("INTERSECTS") }, GEOM_BUILDER(Create_func_intersects)},
+  { { C_STRING_WITH_LEN("INTERSECTS") }, GEOM_BUILDER(Create_func_mbr_intersects)},
   { { C_STRING_WITH_LEN("ISCLOSED") }, GEOM_BUILDER(Create_func_isclosed)},
   { { C_STRING_WITH_LEN("ISEMPTY") }, GEOM_BUILDER(Create_func_isempty)},
   { { C_STRING_WITH_LEN("ISNULL") }, BUILDER(Create_func_isnull)},
@@ -4869,13 +5207,13 @@ static Native_func_registry func_array[] =
   { { C_STRING_WITH_LEN("MAKETIME") }, BUILDER(Create_func_maketime)},
   { { C_STRING_WITH_LEN("MAKE_SET") }, BUILDER(Create_func_make_set)},
   { { C_STRING_WITH_LEN("MASTER_POS_WAIT") }, BUILDER(Create_func_master_pos_wait)},
-  { { C_STRING_WITH_LEN("MBRCONTAINS") }, GEOM_BUILDER(Create_func_contains)},
-  { { C_STRING_WITH_LEN("MBRDISJOINT") }, GEOM_BUILDER(Create_func_disjoint)},
-  { { C_STRING_WITH_LEN("MBREQUAL") }, GEOM_BUILDER(Create_func_equals)},
-  { { C_STRING_WITH_LEN("MBRINTERSECTS") }, GEOM_BUILDER(Create_func_intersects)},
-  { { C_STRING_WITH_LEN("MBROVERLAPS") }, GEOM_BUILDER(Create_func_overlaps)},
+  { { C_STRING_WITH_LEN("MBRCONTAINS") }, GEOM_BUILDER(Create_func_mbr_contains)},
+  { { C_STRING_WITH_LEN("MBRDISJOINT") }, GEOM_BUILDER(Create_func_mbr_disjoint)},
+  { { C_STRING_WITH_LEN("MBREQUAL") }, GEOM_BUILDER(Create_func_mbr_equals)},
+  { { C_STRING_WITH_LEN("MBRINTERSECTS") }, GEOM_BUILDER(Create_func_mbr_intersects)},
+  { { C_STRING_WITH_LEN("MBROVERLAPS") }, GEOM_BUILDER(Create_func_mbr_overlaps)},
   { { C_STRING_WITH_LEN("MBRTOUCHES") }, GEOM_BUILDER(Create_func_touches)},
-  { { C_STRING_WITH_LEN("MBRWITHIN") }, GEOM_BUILDER(Create_func_within)},
+  { { C_STRING_WITH_LEN("MBRWITHIN") }, GEOM_BUILDER(Create_func_mbr_within)},
   { { C_STRING_WITH_LEN("MD5") }, BUILDER(Create_func_md5)},
   { { C_STRING_WITH_LEN("MLINEFROMTEXT") }, GEOM_BUILDER(Create_func_geometry_from_text)},
   { { C_STRING_WITH_LEN("MLINEFROMWKB") }, GEOM_BUILDER(Create_func_geometry_from_wkb)},
@@ -4898,7 +5236,7 @@ static Native_func_registry func_array[] =
   { { C_STRING_WITH_LEN("OCT") }, BUILDER(Create_func_oct)},
   { { C_STRING_WITH_LEN("OCTET_LENGTH") }, BUILDER(Create_func_length)},
   { { C_STRING_WITH_LEN("ORD") }, BUILDER(Create_func_ord)},
-  { { C_STRING_WITH_LEN("OVERLAPS") }, GEOM_BUILDER(Create_func_overlaps)},
+  { { C_STRING_WITH_LEN("OVERLAPS") }, GEOM_BUILDER(Create_func_mbr_overlaps)},
   { { C_STRING_WITH_LEN("PERIOD_ADD") }, BUILDER(Create_func_period_add)},
   { { C_STRING_WITH_LEN("PERIOD_DIFF") }, BUILDER(Create_func_period_diff)},
   { { C_STRING_WITH_LEN("PI") }, BUILDER(Create_func_pi)},
@@ -4933,6 +5271,64 @@ static Native_func_registry func_array[] =
   { { C_STRING_WITH_LEN("STARTPOINT") }, GEOM_BUILDER(Create_func_startpoint)},
   { { C_STRING_WITH_LEN("STRCMP") }, BUILDER(Create_func_strcmp)},
   { { C_STRING_WITH_LEN("STR_TO_DATE") }, BUILDER(Create_func_str_to_date)},
+  { { C_STRING_WITH_LEN("ST_AREA") }, GEOM_BUILDER(Create_func_area)},
+  { { C_STRING_WITH_LEN("ST_ASBINARY") }, GEOM_BUILDER(Create_func_as_wkb)},
+  { { C_STRING_WITH_LEN("ST_ASTEXT") }, GEOM_BUILDER(Create_func_as_wkt)},
+  { { C_STRING_WITH_LEN("ST_ASWKB") }, GEOM_BUILDER(Create_func_as_wkb)},
+  { { C_STRING_WITH_LEN("ST_ASWKT") }, GEOM_BUILDER(Create_func_as_wkt)},
+  { { C_STRING_WITH_LEN("ST_BUFFER") }, GEOM_BUILDER(Create_func_buffer)},
+  { { C_STRING_WITH_LEN("ST_CENTROID") }, GEOM_BUILDER(Create_func_centroid)},
+  { { C_STRING_WITH_LEN("ST_CONTAINS") }, GEOM_BUILDER(Create_func_contains)},
+  { { C_STRING_WITH_LEN("ST_CROSSES") }, GEOM_BUILDER(Create_func_crosses)},
+  { { C_STRING_WITH_LEN("ST_DIFFERENCE") }, GEOM_BUILDER(Create_func_difference)},
+  { { C_STRING_WITH_LEN("ST_DIMENSION") }, GEOM_BUILDER(Create_func_dimension)},
+  { { C_STRING_WITH_LEN("ST_DISJOINT") }, GEOM_BUILDER(Create_func_disjoint)},
+  { { C_STRING_WITH_LEN("ST_DISTANCE") }, GEOM_BUILDER(Create_func_distance)},
+  { { C_STRING_WITH_LEN("ST_ENDPOINT") }, GEOM_BUILDER(Create_func_endpoint)},
+  { { C_STRING_WITH_LEN("ST_ENVELOPE") }, GEOM_BUILDER(Create_func_envelope)},
+  { { C_STRING_WITH_LEN("ST_EQUALS") }, GEOM_BUILDER(Create_func_equals)},
+  { { C_STRING_WITH_LEN("ST_EXTERIORRING") }, GEOM_BUILDER(Create_func_exteriorring)},
+  { { C_STRING_WITH_LEN("ST_GEOMCOLLFROMTEXT") }, GEOM_BUILDER(Create_func_geometry_from_text)},
+  { { C_STRING_WITH_LEN("ST_GEOMCOLLFROMWKB") }, GEOM_BUILDER(Create_func_geometry_from_wkb)},
+  { { C_STRING_WITH_LEN("ST_GEOMETRYCOLLECTIONFROMTEXT") }, GEOM_BUILDER(Create_func_geometry_from_text)},
+  { { C_STRING_WITH_LEN("ST_GEOMETRYCOLLECTIONFROMWKB") }, GEOM_BUILDER(Create_func_geometry_from_wkb)},
+  { { C_STRING_WITH_LEN("ST_GEOMETRYFROMTEXT") }, GEOM_BUILDER(Create_func_geometry_from_text)},
+  { { C_STRING_WITH_LEN("ST_GEOMETRYFROMWKB") }, GEOM_BUILDER(Create_func_geometry_from_wkb)},
+  { { C_STRING_WITH_LEN("ST_GEOMETRYN") }, GEOM_BUILDER(Create_func_geometryn)},
+  { { C_STRING_WITH_LEN("ST_GEOMETRYTYPE") }, GEOM_BUILDER(Create_func_geometry_type)},
+  { { C_STRING_WITH_LEN("ST_GEOMFROMTEXT") }, GEOM_BUILDER(Create_func_geometry_from_text)},
+  { { C_STRING_WITH_LEN("ST_GEOMFROMWKB") }, GEOM_BUILDER(Create_func_geometry_from_wkb)},
+  { { C_STRING_WITH_LEN("ST_EQUALS") }, GEOM_BUILDER(Create_func_equals)},
+  { { C_STRING_WITH_LEN("ST_INTERIORRINGN") }, GEOM_BUILDER(Create_func_interiorringn)},
+  { { C_STRING_WITH_LEN("ST_INTERSECTS") }, GEOM_BUILDER(Create_func_intersects)},
+  { { C_STRING_WITH_LEN("ST_INTERSECTION") }, GEOM_BUILDER(Create_func_intersection)},
+  { { C_STRING_WITH_LEN("ST_ISCLOSED") }, GEOM_BUILDER(Create_func_isclosed)},
+  { { C_STRING_WITH_LEN("ST_ISEMPTY") }, GEOM_BUILDER(Create_func_isempty)},
+  { { C_STRING_WITH_LEN("ST_ISSIMPLE") }, GEOM_BUILDER(Create_func_issimple)},
+  { { C_STRING_WITH_LEN("ST_LENGTH") }, GEOM_BUILDER(Create_func_glength)},
+  { { C_STRING_WITH_LEN("ST_LINEFROMTEXT") }, GEOM_BUILDER(Create_func_geometry_from_text)},
+  { { C_STRING_WITH_LEN("ST_LINEFROMWKB") }, GEOM_BUILDER(Create_func_geometry_from_wkb)},
+  { { C_STRING_WITH_LEN("ST_LINESTRINGFROMTEXT") }, GEOM_BUILDER(Create_func_geometry_from_text)},
+  { { C_STRING_WITH_LEN("ST_LINESTRINGFROMWKB") }, GEOM_BUILDER(Create_func_geometry_from_wkb)},
+  { { C_STRING_WITH_LEN("ST_NUMGEOMETRIES") }, GEOM_BUILDER(Create_func_numgeometries)},
+  { { C_STRING_WITH_LEN("ST_NUMINTERIORRINGS") }, GEOM_BUILDER(Create_func_numinteriorring)},
+  { { C_STRING_WITH_LEN("ST_NUMPOINTS") }, GEOM_BUILDER(Create_func_numpoints)},
+  { { C_STRING_WITH_LEN("ST_OVERLAPS") }, GEOM_BUILDER(Create_func_overlaps)},
+  { { C_STRING_WITH_LEN("ST_POINTFROMTEXT") }, GEOM_BUILDER(Create_func_geometry_from_text)},
+  { { C_STRING_WITH_LEN("ST_POINTFROMWKB") }, GEOM_BUILDER(Create_func_geometry_from_wkb)},
+  { { C_STRING_WITH_LEN("ST_POINTN") }, GEOM_BUILDER(Create_func_pointn)},
+  { { C_STRING_WITH_LEN("ST_POLYFROMTEXT") }, GEOM_BUILDER(Create_func_geometry_from_text)},
+  { { C_STRING_WITH_LEN("ST_POLYFROMWKB") }, GEOM_BUILDER(Create_func_geometry_from_wkb)},
+  { { C_STRING_WITH_LEN("ST_POLYGONFROMTEXT") }, GEOM_BUILDER(Create_func_geometry_from_text)},
+  { { C_STRING_WITH_LEN("ST_POLYGONFROMWKB") }, GEOM_BUILDER(Create_func_geometry_from_wkb)},
+  { { C_STRING_WITH_LEN("ST_SRID") }, GEOM_BUILDER(Create_func_srid)},
+  { { C_STRING_WITH_LEN("ST_STARTPOINT") }, GEOM_BUILDER(Create_func_startpoint)},
+  { { C_STRING_WITH_LEN("ST_SYMDIFFERENCE") }, GEOM_BUILDER(Create_func_symdifference)},
+  { { C_STRING_WITH_LEN("ST_TOUCHES") }, GEOM_BUILDER(Create_func_touches)},
+  { { C_STRING_WITH_LEN("ST_UNION") }, GEOM_BUILDER(Create_func_union)},
+  { { C_STRING_WITH_LEN("ST_WITHIN") }, GEOM_BUILDER(Create_func_within)},
+  { { C_STRING_WITH_LEN("ST_X") }, GEOM_BUILDER(Create_func_x)},
+  { { C_STRING_WITH_LEN("ST_Y") }, GEOM_BUILDER(Create_func_y)},
   { { C_STRING_WITH_LEN("SUBSTRING_INDEX") }, BUILDER(Create_func_substr_index)},
   { { C_STRING_WITH_LEN("SUBTIME") }, BUILDER(Create_func_subtime)},
   { { C_STRING_WITH_LEN("TAN") }, BUILDER(Create_func_tan)},
@@ -5057,6 +5453,17 @@ create_func_cast(THD *thd, Item *a, Cast_target cast_type,
                  CHARSET_INFO *cs)
 {
   Item *UNINIT_VAR(res);
+  ulonglong length= 0, decimals= 0;
+  int error;
+  
+  /*
+    We don't have to check for error here as sql_yacc.yy has guaranteed
+    that the values are in range of ulonglong
+  */
+  if (c_len)
+    length= (ulonglong) my_strtoll10(c_len, NULL, &error);
+  if (c_dec)
+    decimals= (ulonglong) my_strtoll10(c_dec, NULL, &error);
 
   switch (cast_type) {
   case ITEM_CAST_BINARY:
@@ -5072,62 +5479,50 @@ create_func_cast(THD *thd, Item *a, Cast_target cast_type,
     res= new (thd->mem_root) Item_date_typecast(a);
     break;
   case ITEM_CAST_TIME:
-    res= new (thd->mem_root) Item_time_typecast(a);
-    break;
-  case ITEM_CAST_DATETIME:
-    res= new (thd->mem_root) Item_datetime_typecast(a);
-    break;
-  case ITEM_CAST_DECIMAL:
-  {
-    ulong len= 0;
-    uint dec= 0;
-
-    if (c_len)
+    if (decimals > MAX_DATETIME_PRECISION)
     {
-      ulong decoded_size;
-      errno= 0;
-      decoded_size= strtoul(c_len, NULL, 10);
-      if (errno != 0)
-      {
-        my_error(ER_TOO_BIG_PRECISION, MYF(0), INT_MAX, a->name,
-                 static_cast<ulong>(DECIMAL_MAX_PRECISION));
-        return NULL;
-      }
-      len= decoded_size;
-    }
-
-    if (c_dec)
-    {
-      ulong decoded_size;
-      errno= 0;
-      decoded_size= strtoul(c_dec, NULL, 10);
-      if ((errno != 0) || (decoded_size > UINT_MAX))
-      {
-        my_error(ER_TOO_BIG_SCALE, MYF(0), INT_MAX, a->name,
-                 static_cast<ulong>(DECIMAL_MAX_SCALE));
-        return NULL;
-      }
-      dec= decoded_size;
-    }
-    my_decimal_trim(&len, &dec);
-    if (len < dec)
-    {
-      my_error(ER_M_BIGGER_THAN_D, MYF(0), "");
+      wrong_precision_error(ER_TOO_BIG_PRECISION, a, decimals,
+                            MAX_DATETIME_PRECISION);
       return 0;
     }
-    if (len > DECIMAL_MAX_PRECISION)
+    res= new (thd->mem_root) Item_time_typecast(a, (uint) decimals);
+    break;
+  case ITEM_CAST_DATETIME:
+    if (decimals > MAX_DATETIME_PRECISION)
     {
-      my_error(ER_TOO_BIG_PRECISION, MYF(0), static_cast<int>(len), a->name,
-               static_cast<ulong>(DECIMAL_MAX_PRECISION));
+      wrong_precision_error(ER_TOO_BIG_PRECISION, a, decimals,
+                            MAX_DATETIME_PRECISION);
       return 0;
     }
-    if (dec > DECIMAL_MAX_SCALE)
+    res= new (thd->mem_root) Item_datetime_typecast(a, (uint) decimals);
+    break;
+  case ITEM_CAST_DECIMAL:
+  {
+    ulong len;
+    uint dec;
+    if (get_length_and_scale(length, decimals, &len, &dec,
+                             DECIMAL_MAX_PRECISION, DECIMAL_MAX_SCALE,
+                             a))
+      return NULL;
+    res= new (thd->mem_root) Item_decimal_typecast(a, len, dec);
+    break;
+  }
+  case ITEM_CAST_DOUBLE:
+  {
+    ulong len;
+    uint dec;
+
+    if (!c_len)
     {
-      my_error(ER_TOO_BIG_SCALE, MYF(0), dec, a->name,
-               static_cast<ulong>(DECIMAL_MAX_SCALE));
-      return 0;
+      length=   DBL_DIG+7;
+      decimals= NOT_FIXED_DEC;
     }
-    res= new (thd->mem_root) Item_decimal_typecast(a, len, dec);
+    else if (get_length_and_scale(length, decimals, &len, &dec,
+                                  DECIMAL_MAX_PRECISION, NOT_FIXED_DEC-1,
+                                  a))
+      return NULL;
+    res= new (thd->mem_root) Item_double_typecast(a, (uint) length,
+                                                  (uint) decimals);
     break;
   }
   case ITEM_CAST_CHAR:
@@ -5136,15 +5531,15 @@ create_func_cast(THD *thd, Item *a, Cast_target cast_type,
     CHARSET_INFO *real_cs= (cs ? cs : thd->variables.collation_connection);
     if (c_len)
     {
-      ulong decoded_size;
-      errno= 0;
-      decoded_size= strtoul(c_len, NULL, 10);
-      if ((errno != 0) || (decoded_size > MAX_FIELD_BLOBLENGTH))
+      if (length > MAX_FIELD_BLOBLENGTH)
       {
-        my_error(ER_TOO_BIG_DISPLAYWIDTH, MYF(0), "cast as char", MAX_FIELD_BLOBLENGTH);
+        char buff[1024];
+        String buf(buff, sizeof(buff), system_charset_info);
+        my_error(ER_TOO_BIG_DISPLAYWIDTH, MYF(0), item_name(a, &buf),
+                 MAX_FIELD_BLOBLENGTH);
         return NULL;
       }
-      len= (int) decoded_size;
+      len= (int) length;
     }
     res= new (thd->mem_root) Item_char_typecast(a, len, real_cs);
     break;
@@ -5158,3 +5553,95 @@ create_func_cast(THD *thd, Item *a, Cast_target cast_type,
   }
   return res;
 }
+
+
+static List<Item> *create_func_dyncol_prepare(THD *thd,
+                                              DYNCALL_CREATE_DEF **dfs,
+                                              List<DYNCALL_CREATE_DEF> &list)
+{
+  DYNCALL_CREATE_DEF *def;
+  List_iterator_fast<DYNCALL_CREATE_DEF> li(list);
+  List<Item> *args= new (thd->mem_root) List<Item>;
+
+  *dfs= (DYNCALL_CREATE_DEF *)alloc_root(thd->mem_root,
+                                         sizeof(DYNCALL_CREATE_DEF) *
+                                         list.elements);
+
+  if (!args || !*dfs)
+    return NULL;
+
+  for (uint i= 0; (def= li++) ;)
+  {
+    dfs[0][i++]= *def;
+    args->push_back(def->num);
+    args->push_back(def->value);
+  }
+  return args;
+}
+
+Item *create_func_dyncol_create(THD *thd, List<DYNCALL_CREATE_DEF> &list)
+{
+  List<Item> *args;
+  DYNCALL_CREATE_DEF *dfs;
+  if (!(args= create_func_dyncol_prepare(thd, &dfs, list)))
+    return NULL;
+
+  return new (thd->mem_root) Item_func_dyncol_create(*args, dfs);
+}
+
+
+Item *create_func_dyncol_add(THD *thd, Item *str,
+                             List<DYNCALL_CREATE_DEF> &list)
+{
+  List<Item> *args;
+  DYNCALL_CREATE_DEF *dfs;
+
+  if (!(args= create_func_dyncol_prepare(thd, &dfs, list)))
+    return NULL;
+
+  args->push_back(str);
+
+  return new (thd->mem_root) Item_func_dyncol_add(*args, dfs);
+}
+
+
+
+Item *create_func_dyncol_delete(THD *thd, Item *str, List<Item> &nums)
+{
+  DYNCALL_CREATE_DEF *dfs;
+  Item *num;
+  List_iterator_fast<Item> it(nums);
+  List<Item> *args= new (thd->mem_root) List<Item>;
+
+  dfs= (DYNCALL_CREATE_DEF *)alloc_root(thd->mem_root,
+                                        sizeof(DYNCALL_CREATE_DEF) *
+                                        nums.elements);
+  if (!args || !dfs)
+    return NULL;
+
+  for (uint i= 0; (num= it++); i++)
+  {
+    dfs[i].num= num;
+    dfs[i].value= new Item_null();
+    dfs[i].type= DYN_COL_INT;
+    args->push_back(dfs[i].num);
+    args->push_back(dfs[i].value);
+  }
+
+  args->push_back(str);
+
+  return new (thd->mem_root) Item_func_dyncol_add(*args, dfs);
+}
+
+
+Item *create_func_dyncol_get(THD *thd,  Item *str, Item *num,
+                             Cast_target cast_type,
+                             const char *c_len, const char *c_dec,
+                             CHARSET_INFO *cs)
+{
+  Item *res;
+
+  if (!(res= new (thd->mem_root) Item_dyncol_get(str, num)))
+    return res;                                 // Return NULL
+  return create_func_cast(thd, res, cast_type, c_len, c_dec, cs);
+}
diff --git a/sql/item_create.h b/sql/item_create.h
index ebe6c4942ff..693b77bb739 100644
--- a/sql/item_create.h
+++ b/sql/item_create.h
@@ -166,5 +166,15 @@ Item *
 create_func_cast(THD *thd, Item *a, Cast_target cast_type,
                  const char *len, const char *dec,
                  CHARSET_INFO *cs);
+
+Item *create_func_dyncol_create(THD *thd, List<DYNCALL_CREATE_DEF> &list);
+Item *create_func_dyncol_add(THD *thd, Item *str,
+                             List<DYNCALL_CREATE_DEF> &list);
+Item *create_func_dyncol_delete(THD *thd, Item *str, List<Item> &nums);
+Item *create_func_dyncol_get(THD *thd, Item *num, Item *str,
+                             Cast_target cast_type,
+                             const char *c_len, const char *c_dec,
+                             CHARSET_INFO *cs);
+
 #endif
 
diff --git a/sql/item_func.cc b/sql/item_func.cc
index ed771b60769..e5225de62f9 100644
--- a/sql/item_func.cc
+++ b/sql/item_func.cc
@@ -79,6 +79,7 @@ void Item_func::set_arguments(List<Item> &list)
     {
       *(save_args++)= item;
       with_sum_func|=item->with_sum_func;
+      with_field|= item->with_field;
     }
   }
   list.empty();					// Fields are used
@@ -129,6 +130,7 @@ Item_func::Item_func(THD *thd, Item_func *item)
     Sets as a side effect the following class variables:
       maybe_null	Set if any argument may return NULL
       with_sum_func	Set if any of the arguments contains a sum function
+      with_field        Set if any of the arguments contains or is a field
       used_tables_cache Set to union of the tables used by arguments
 
       str_value.charset If this is a string function, set this to the
@@ -196,8 +198,8 @@ Item_func::fix_fields(THD *thd, Item **ref)
 	maybe_null=1;
 
       with_sum_func= with_sum_func || item->with_sum_func;
+      with_field= with_field || item->with_field;
       used_tables_cache|=     item->used_tables();
-      not_null_tables_cache|= item->not_null_tables();
       const_item_cache&=      item->const_item();
       with_subselect|=        item->with_subselect;
     }
@@ -225,6 +227,44 @@ Item_func::quick_fix_field()
 }
 
 
+bool
+Item_func::eval_not_null_tables(uchar *opt_arg)
+{
+  Item **arg,**arg_end;
+  not_null_tables_cache= 0;
+  if (arg_count)
+  {		
+    for (arg=args, arg_end=args+arg_count; arg != arg_end ; arg++)
+    {
+      not_null_tables_cache|= (*arg)->not_null_tables();
+    }
+  }
+  return FALSE;
+}
+
+
+void Item_func::fix_after_pullout(st_select_lex *new_parent, Item **ref)
+{
+  Item **arg,**arg_end;
+
+  used_tables_cache= not_null_tables_cache= 0;
+  const_item_cache=1;
+
+  if (arg_count)
+  {
+    for (arg=args, arg_end=args+arg_count; arg != arg_end ; arg++)
+    {
+      (*arg)->fix_after_pullout(new_parent, arg);
+      Item *item= *arg;
+
+      used_tables_cache|=     item->used_tables();
+      not_null_tables_cache|= item->not_null_tables();
+      const_item_cache&=      item->const_item();
+    }
+  }
+}
+
+
 bool Item_func::walk(Item_processor processor, bool walk_subquery,
                      uchar *argument)
 {
@@ -324,6 +364,8 @@ Item *Item_func::transform(Item_transformer transformer, uchar *argument)
     the old item is substituted for a new one.
     After this the transformer is applied to the root node
     of the Item_func object. 
+    The compile function is not called if the analyzer returns NULL
+    in the parameter arg_p. 
 
   @param analyzer      the analyzer callback function to be applied to the
                        nodes of the tree of the object
@@ -341,7 +383,7 @@ Item *Item_func::compile(Item_analyzer analyzer, uchar **arg_p,
 {
   if (!(this->*analyzer)(arg_p))
     return 0;
-  if (arg_count)
+  if (*arg_p && arg_count)
   {
     Item **arg,**arg_end;
     for (arg= args, arg_end= args+arg_count; arg != arg_end; arg++)
@@ -349,7 +391,7 @@ Item *Item_func::compile(Item_analyzer analyzer, uchar **arg_p,
       /* 
         The same parameter value of arg_p must be passed
         to analyze any argument of the condition formula.
-      */   
+      */
       uchar *arg_v= *arg_p;
       Item *new_item= (*arg)->compile(analyzer, &arg_v, transformer, arg_t);
       if (new_item && *arg != new_item)
@@ -475,7 +517,8 @@ Field *Item_func::tmp_table_field(TABLE *table)
     field= Field_new_decimal::create_from_item(this);
     break;
   case ROW_RESULT:
-  default:
+  case TIME_RESULT:
+  case IMPOSSIBLE_RESULT:
     // This case should never be chosen
     DBUG_ASSERT(0);
     field= 0;
@@ -486,12 +529,12 @@ Field *Item_func::tmp_table_field(TABLE *table)
   return field;
 }
 
-
+/*
 bool Item_func::is_expensive_processor(uchar *arg)
 {
   return is_expensive();
 }
-
+*/
 
 my_decimal *Item_func::val_decimal(my_decimal *decimal_value)
 {
@@ -681,8 +724,8 @@ void Item_num_op::find_num_type(void)
   DBUG_ENTER("Item_num_op::find_num_type");
   DBUG_PRINT("info", ("name %s", func_name()));
   DBUG_ASSERT(arg_count == 2);
-  Item_result r0= args[0]->result_type();
-  Item_result r1= args[1]->result_type();
+  Item_result r0= args[0]->cast_to_int_type();
+  Item_result r1= args[1]->cast_to_int_type();
 
   if (r0 == REAL_RESULT || r1 == REAL_RESULT ||
       r0 == STRING_RESULT || r1 ==STRING_RESULT)
@@ -691,7 +734,8 @@ void Item_num_op::find_num_type(void)
     max_length= float_length(decimals);
     hybrid_type= REAL_RESULT;
   }
-  else if (r0 == DECIMAL_RESULT || r1 == DECIMAL_RESULT)
+  else if (r0 == DECIMAL_RESULT || r1 == DECIMAL_RESULT ||
+           r0 == TIME_RESULT || r1 == TIME_RESULT)
   {
     hybrid_type= DECIMAL_RESULT;
     result_precision();
@@ -722,7 +766,7 @@ void Item_func_num1::find_num_type()
 {
   DBUG_ENTER("Item_func_num1::find_num_type");
   DBUG_PRINT("info", ("name %s", func_name()));
-  switch (hybrid_type= args[0]->result_type()) {
+  switch (hybrid_type= args[0]->cast_to_int_type()) {
   case INT_RESULT:
     unsigned_flag= args[0]->unsigned_flag;
     break;
@@ -731,9 +775,12 @@ void Item_func_num1::find_num_type()
     hybrid_type= REAL_RESULT;
     max_length= float_length(decimals);
     break;
+  case TIME_RESULT:
+    hybrid_type= DECIMAL_RESULT;
   case DECIMAL_RESULT:
     break;
-  default:
+  case ROW_RESULT:
+  case IMPOSSIBLE_RESULT:
     DBUG_ASSERT(0);
   }
   DBUG_PRINT("info", ("Type: %s",
@@ -790,7 +837,9 @@ String *Item_func_numhybrid::val_str(String *str)
   }
   case STRING_RESULT:
     return str_op(&str_value);
-  default:
+  case TIME_RESULT:
+  case ROW_RESULT:
+  case IMPOSSIBLE_RESULT:
     DBUG_ASSERT(0);
   }
   return str;
@@ -825,7 +874,9 @@ double Item_func_numhybrid::val_real()
     return (res ? my_strntod(res->charset(), (char*) res->ptr(), res->length(),
 			     &end_not_used, &err_not_used) : 0.0);
   }
-  default:
+  case TIME_RESULT:
+  case ROW_RESULT:
+  case IMPOSSIBLE_RESULT:
     DBUG_ASSERT(0);
   }
   return 0.0;
@@ -860,7 +911,9 @@ longlong Item_func_numhybrid::val_int()
     CHARSET_INFO *cs= res->charset();
     return (*(cs->cset->strtoll10))(cs, res->ptr(), &end, &err_not_used);
   }
-  default:
+  case TIME_RESULT:
+  case ROW_RESULT:
+  case IMPOSSIBLE_RESULT:
     DBUG_ASSERT(0);
   }
   return 0;
@@ -898,7 +951,8 @@ my_decimal *Item_func_numhybrid::val_decimal(my_decimal *decimal_value)
     break;
   }  
   case ROW_RESULT:
-  default:
+  case TIME_RESULT:
+  case IMPOSSIBLE_RESULT:
     DBUG_ASSERT(0);
   }
   return val;
@@ -957,21 +1011,32 @@ longlong Item_func_signed::val_int()
   longlong value;
   int error;
 
-  if (args[0]->cast_to_int_type() != STRING_RESULT ||
-      args[0]->result_as_longlong())
+  if (args[0]->cast_to_int_type() != STRING_RESULT)
   {
     value= args[0]->val_int();
     null_value= args[0]->null_value; 
     return value;
   }
+  else if (args[0]->dynamic_result())
+  {
+    /* We come here when argument has an unknown type */
+    args[0]->unsigned_flag= 0;   // Mark that we want to have a signed value
+    value= args[0]->val_int();
+    null_value= args[0]->null_value; 
+    if (!null_value && args[0]->unsigned_flag && value < 0)
+      goto err;                                 // Warn about overflow
+    return value;
+  }
 
   value= val_int_from_str(&error);
   if (value < 0 && error == 0)
-  {
-    push_warning(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN, ER_UNKNOWN_ERROR,
-                 "Cast to signed converted positive out-of-range integer to "
-                 "it's negative complement");
-  }
+    goto err;
+  return value;
+
+err:
+  push_warning(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN, ER_UNKNOWN_ERROR,
+               "Cast to signed converted positive out-of-range integer to "
+               "it's negative complement");
   return value;
 }
 
@@ -999,19 +1064,35 @@ longlong Item_func_unsigned::val_int()
       value= 0;
     return value;
   }
-  else if (args[0]->cast_to_int_type() != STRING_RESULT ||
-           args[0]->result_as_longlong())
+  else if (args[0]->dynamic_result())
+  {
+    /* We come here when argument has an unknown type */
+    args[0]->unsigned_flag= 1;   // Mark that we want to have an unsigned value
+    value= args[0]->val_int();
+    null_value= args[0]->null_value; 
+    if (!null_value && args[0]->unsigned_flag == 0 && value < 0)
+      goto err;                                 // Warn about overflow
+    return value;
+  }
+  else if (args[0]->cast_to_int_type() != STRING_RESULT)
   {
     value= args[0]->val_int();
     null_value= args[0]->null_value; 
+    if (!null_value && args[0]->unsigned_flag == 0 && value < 0)
+      goto err;                                 // Warn about overflow
     return value;
   }
 
   value= val_int_from_str(&error);
   if (error < 0)
-    push_warning(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN, ER_UNKNOWN_ERROR,
-                 "Cast to unsigned converted negative integer to it's "
-                 "positive complement");
+    goto err;
+
+  return value;
+
+err:
+  push_warning(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN, ER_UNKNOWN_ERROR,
+               "Cast to unsigned converted negative integer to it's "
+               "positive complement");
   return value;
 }
 
@@ -1109,6 +1190,51 @@ void Item_decimal_typecast::print(String *str, enum_query_type query_type)
 }
 
 
+double Item_double_typecast::val_real()
+{
+  int error;
+  double tmp= args[0]->val_real();
+  if ((null_value= args[0]->null_value))
+    return 0.0;
+
+  if ((error= truncate_double(&tmp, max_length, decimals, 0, DBL_MAX)))
+  {
+    push_warning_printf(current_thd,
+                        MYSQL_ERROR::WARN_LEVEL_WARN,
+                        ER_WARN_DATA_OUT_OF_RANGE,
+                        ER(ER_WARN_DATA_OUT_OF_RANGE),
+                        name, 1);
+    if (error < 0)
+    {
+      null_value= 1;                            // Illegal value
+      tmp= 0.0;
+    }
+  }
+  return tmp;
+}
+
+
+void Item_double_typecast::print(String *str, enum_query_type query_type)
+{
+  char len_buf[20*3 + 1];
+  char *end;
+
+  str->append(STRING_WITH_LEN("cast("));
+  args[0]->print(str, query_type);
+  str->append(STRING_WITH_LEN(" as double"));
+  if (decimals != NOT_FIXED_DEC)
+  {
+    str->append('(');
+    end= int10_to_str(max_length, len_buf,10);
+    str->append(len_buf, (uint32) (end - len_buf));
+    str->append(',');
+    end= int10_to_str(decimals, len_buf,10);
+    str->append(len_buf, (uint32) (end - len_buf));
+    str->append(')');
+  }
+  str->append(')');
+}
+
 double Item_func_plus::real_op()
 {
   double value= args[0]->val_real() + args[1]->val_real();
@@ -1341,7 +1467,7 @@ void Item_func_div::fix_length_and_dec()
   DBUG_ENTER("Item_func_div::fix_length_and_dec");
   prec_increment= current_thd->variables.div_precincrement;
   Item_num_op::fix_length_and_dec();
-  switch(hybrid_type) {
+  switch (hybrid_type) {
   case REAL_RESULT:
   {
     decimals=max(args[0]->decimals,args[1]->decimals)+prec_increment;
@@ -1364,7 +1490,10 @@ void Item_func_div::fix_length_and_dec()
   case DECIMAL_RESULT:
     result_precision();
     break;
-  default:
+  case STRING_RESULT:
+  case ROW_RESULT:
+  case TIME_RESULT:
+  case IMPOSSIBLE_RESULT:
     DBUG_ASSERT(0);
   }
   maybe_null= 1; // devision by zero
@@ -1834,7 +1963,7 @@ void Item_func_int_val::find_num_type()
 {
   DBUG_ENTER("Item_func_int_val::find_num_type");
   DBUG_PRINT("info", ("name %s", func_name()));
-  switch(hybrid_type= args[0]->result_type())
+  switch (hybrid_type= args[0]->cast_to_int_type())
   {
   case STRING_RESULT:
   case REAL_RESULT:
@@ -1842,6 +1971,7 @@ void Item_func_int_val::find_num_type()
     max_length= float_length(decimals);
     break;
   case INT_RESULT:
+  case TIME_RESULT:
   case DECIMAL_RESULT:
     /*
       -2 because in most high position can't be used any digit for longlong
@@ -1858,7 +1988,8 @@ void Item_func_int_val::find_num_type()
       hybrid_type= INT_RESULT;
     }
     break;
-  default:
+  case ROW_RESULT:
+  case IMPOSSIBLE_RESULT:
     DBUG_ASSERT(0);
   }
   DBUG_PRINT("info", ("Type: %s",
@@ -1987,7 +2118,7 @@ void Item_func_round::fix_length_and_dec()
   }
 
   val1= args[1]->val_int();
-  if ((null_value= args[1]->is_null()))
+  if ((null_value= args[1]->null_value))
     return;
 
   val1_unsigned= args[1]->unsigned_flag;
@@ -2037,7 +2168,9 @@ void Item_func_round::fix_length_and_dec()
                                                              unsigned_flag);
     break;
   }
-  default:
+  case ROW_RESULT:
+  case TIME_RESULT:
+  case IMPOSSIBLE_RESULT:
     DBUG_ASSERT(0); /* This result type isn't handled */
   }
 }
@@ -2245,10 +2378,10 @@ double Item_func_units::val_real()
 void Item_func_min_max::fix_length_and_dec()
 {
   int max_int_part=0;
-  bool datetime_found= FALSE;
   decimals=0;
   max_length=0;
   maybe_null=0;
+  thd= current_thd;
   cmp_type=args[0]->result_type();
 
   for (uint i=0 ; i < arg_count ; i++)
@@ -2257,85 +2390,87 @@ void Item_func_min_max::fix_length_and_dec()
     set_if_bigger(decimals, args[i]->decimals);
     set_if_bigger(max_int_part, args[i]->decimal_int_part());
     if (args[i]->maybe_null)
-      maybe_null=1;
-    cmp_type=item_cmp_type(cmp_type,args[i]->result_type());
-    if (args[i]->result_type() != ROW_RESULT && args[i]->is_datetime())
-    {
-      datetime_found= TRUE;
-      if (!datetime_item || args[i]->field_type() == MYSQL_TYPE_DATETIME)
-        datetime_item= args[i];
-    }
+      maybe_null= 1;
+    cmp_type= item_cmp_type(cmp_type,args[i]->result_type());
   }
   if (cmp_type == STRING_RESULT)
-  {
     agg_arg_charsets(collation, args, arg_count, MY_COLL_CMP_CONV, 1);
-    if (datetime_found)
-    {
-      thd= current_thd;
-      compare_as_dates= TRUE;
-    }
-  }
   else if ((cmp_type == DECIMAL_RESULT) || (cmp_type == INT_RESULT))
     max_length= my_decimal_precision_to_length_no_truncation(max_int_part +
                                                              decimals, decimals,
                                                              unsigned_flag);
   else if (cmp_type == REAL_RESULT)
     max_length= float_length(decimals);
-  cached_field_type= agg_field_type(args, arg_count);
+
+  compare_as_dates= find_date_time_item(args, arg_count, 0);
+  if (compare_as_dates)
+  {
+    cached_field_type= compare_as_dates->field_type();
+    if (mysql_type_to_time_type(cached_field_type) == MYSQL_TIMESTAMP_DATE)
+      decimals= 0;
+    else
+      set_if_smaller(decimals, TIME_SECOND_PART_DIGITS);
+  }
+  else
+    cached_field_type= agg_field_type(args, arg_count);
 }
 
 
 /*
   Compare item arguments in the DATETIME context.
 
-  SYNOPSIS
-    cmp_datetimes()
-    value [out]   found least/greatest DATE/DATETIME value
-
   DESCRIPTION
     Compare item arguments as DATETIME values and return the index of the
     least/greatest argument in the arguments array.
-    The correct integer DATE/DATETIME value of the found argument is
+    The correct DATE/DATETIME value of the found argument is
     stored to the value pointer, if latter is provided.
 
   RETURN
-   0	If one of arguments is NULL or there was a execution error
-   #	index of the least/greatest argument
+   1	If one of arguments is NULL or there was a execution error
+   0    Otherwise
 */
 
-uint Item_func_min_max::cmp_datetimes(ulonglong *value)
+bool Item_func_min_max::get_date(MYSQL_TIME *ltime, uint fuzzy_date)
 {
   longlong UNINIT_VAR(min_max);
-  uint min_max_idx= 0;
+  DBUG_ASSERT(fixed == 1);
+
+  /*
+    just like ::val_int() method of an string item can be called,
+    for example, SELECT CONCAT("10", "12") + 1,
+    ::get_date() can be called for non-temporal values,
+    for example, SELECT MONTH(GREATEST("2011-11-21", "2010-10-09"))
+
+  */
+  if (!compare_as_dates)
+    return Item_func::get_date(ltime, fuzzy_date);
+
+  null_value= 0;
 
   for (uint i=0; i < arg_count ; i++)
   {
     Item **arg= args + i;
     bool is_null;
-    longlong res= get_datetime_value(thd, &arg, 0, datetime_item, &is_null);
+    longlong res= get_datetime_value(thd, &arg, 0, compare_as_dates, &is_null);
 
-    /* Check if we need to stop (because of error or KILL)  and stop the loop */
-    if (thd->is_error())
+    /* Check if we need to stop (because of error or KILL) and stop the loop */
+    if (thd->is_error() || args[i]->null_value)
     {
       null_value= 1;
-      return 0;
+      return 1;
     }
 
-    if ((null_value= args[i]->null_value))
-      return 0;
     if (i == 0 || (res < min_max ? cmp_sign : -cmp_sign) > 0)
-    {
       min_max= res;
-      min_max_idx= i;
-    }
   }
-  if (value)
+  unpack_time(min_max, ltime);
+  if (compare_as_dates->field_type() == MYSQL_TYPE_DATE)
   {
-    *value= min_max;
-    if (datetime_item->field_type() == MYSQL_TYPE_DATE)
-      *value/= 1000000L;
+    ltime->time_type= MYSQL_TIMESTAMP_DATE;
+    ltime->hour= ltime->minute= ltime->second= ltime->second_part= 0;
   }
-  return min_max_idx;
+
+  return 0;
 }
 
 
@@ -2343,46 +2478,14 @@ String *Item_func_min_max::val_str(String *str)
 {
   DBUG_ASSERT(fixed == 1);
   if (compare_as_dates)
-  {
-    String *str_res;
-    uint min_max_idx= cmp_datetimes(NULL);
-    if (null_value)
-      return 0;
-    str_res= args[min_max_idx]->val_str(str);
-    if (args[min_max_idx]->null_value)
-    {
-      // check if the call to val_str() above returns a NULL value
-      null_value= 1;
-      return NULL;
-    }
-    str_res->set_charset(collation.collation);
-    return str_res;
-  }
+    return val_string_from_date(str);
   switch (cmp_type) {
   case INT_RESULT:
-  {
-    longlong nr=val_int();
-    if (null_value)
-      return 0;
-    str->set_int(nr, unsigned_flag, &my_charset_bin);
-    return str;
-  }
+    return val_string_from_int(str);
   case DECIMAL_RESULT:
-  {
-    my_decimal dec_buf, *dec_val= val_decimal(&dec_buf);
-    if (null_value)
-      return 0;
-    my_decimal2string(E_DEC_FATAL_ERROR, dec_val, 0, 0, 0, str);
-    return str;
-  }
+    return val_string_from_decimal(str);
   case REAL_RESULT:
-  {
-    double nr= val_real();
-    if (null_value)
-      return 0; /* purecov: inspected */
-    str->set_real(nr,decimals,&my_charset_bin);
-    return str;
-  }
+    return val_string_from_real(str);
   case STRING_RESULT:
   {
     String *UNINIT_VAR(res);
@@ -2408,9 +2511,9 @@ String *Item_func_min_max::val_str(String *str)
     return res;
   }
   case ROW_RESULT:
-  default:
-    // This case should never be chosen
-    DBUG_ASSERT(0);
+  case TIME_RESULT:
+  case IMPOSSIBLE_RESULT:
+    DBUG_ASSERT(0);                // This case should never be chosen
     return 0;
   }
   return 0;					// Keep compiler happy
@@ -2423,9 +2526,11 @@ double Item_func_min_max::val_real()
   double value=0.0;
   if (compare_as_dates)
   {
-    ulonglong result= 0;
-    (void)cmp_datetimes(&result);
-    return (double)result;
+    MYSQL_TIME ltime;
+    if (get_date(&ltime, TIME_FUZZY_DATE))
+      return 0;
+
+    return TIME_to_double(&ltime);
   }
   for (uint i=0; i < arg_count ; i++)
   {
@@ -2450,9 +2555,11 @@ longlong Item_func_min_max::val_int()
   longlong value=0;
   if (compare_as_dates)
   {
-    ulonglong result= 0;
-    (void)cmp_datetimes(&result);
-    return (longlong)result;
+    MYSQL_TIME ltime;
+    if (get_date(&ltime, TIME_FUZZY_DATE))
+      return 0;
+
+    return TIME_to_ulonglong(&ltime);
   }
   for (uint i=0; i < arg_count ; i++)
   {
@@ -2478,10 +2585,11 @@ my_decimal *Item_func_min_max::val_decimal(my_decimal *dec)
 
   if (compare_as_dates)
   {
-    ulonglong value= 0;
-    (void)cmp_datetimes(&value);
-    ulonglong2decimal(value, dec);
-    return dec;
+    MYSQL_TIME ltime;
+    if (get_date(&ltime, TIME_FUZZY_DATE))
+      return 0;
+
+    return date2my_decimal(&ltime, dec);
   }
   for (uint i=0; i < arg_count ; i++)
   {
@@ -2918,6 +3026,7 @@ udf_handler::fix_fields(THD *thd, Item_result_field *func,
       if (item->maybe_null)
 	func->maybe_null=1;
       func->with_sum_func= func->with_sum_func || item->with_sum_func;
+      func->with_field= func->with_field || item->with_field;
       used_tables_cache|=item->used_tables();
       const_item_cache&=item->const_item();
       f_args.arg_type[i]=item->result_type();
@@ -2963,15 +3072,14 @@ udf_handler::fix_fields(THD *thd, Item_result_field *func,
 
       if (arguments[i]->const_item())
       {
-        switch (arguments[i]->result_type()) 
-        {
+        switch (arguments[i]->result_type()) {
         case STRING_RESULT:
         case DECIMAL_RESULT:
         {
           String *res= arguments[i]->val_str(&buffers[i]);
           if (arguments[i]->null_value)
             continue;
-          f_args.args[i]= (char*) res->c_ptr();
+          f_args.args[i]= (char*) res->c_ptr_safe();
           f_args.lengths[i]= res->length();
           break;
         }
@@ -2990,9 +3098,9 @@ udf_handler::fix_fields(THD *thd, Item_result_field *func,
           to+= ALIGN_SIZE(sizeof(double));
           break;
         case ROW_RESULT:
-        default:
-          // This case should never be chosen
-          DBUG_ASSERT(0);
+        case TIME_RESULT:
+        case IMPOSSIBLE_RESULT:
+          DBUG_ASSERT(0);          // This case should never be chosen
           break;
         }
       }
@@ -3065,9 +3173,9 @@ bool udf_handler::get_arguments()
       }
       break;
     case ROW_RESULT:
-    default:
-      // This case should never be chosen
-      DBUG_ASSERT(0);
+    case TIME_RESULT:
+    case IMPOSSIBLE_RESULT:
+      DBUG_ASSERT(0);              // This case should never be chosen
       break;
     }
   }
@@ -3687,9 +3795,9 @@ longlong Item_func_benchmark::val_int()
       (void) args[1]->val_decimal(&tmp_decimal);
       break;
     case ROW_RESULT:
-    default:
-      // This case should never be chosen
-      DBUG_ASSERT(0);
+    case TIME_RESULT:
+    case IMPOSSIBLE_RESULT:
+      DBUG_ASSERT(0);              // This case should never be chosen
       return 0;
     }
   }
@@ -3867,6 +3975,21 @@ bool Item_func_set_user_var::fix_fields(THD *thd, Item **ref)
     entry->collation.set(args[0]->collation.collation, DERIVATION_IMPLICIT);
   collation.set(entry->collation.collation, DERIVATION_IMPLICIT);
   cached_result_type= args[0]->result_type();
+  if (thd->lex->current_select)
+  {
+    /*
+      When this function is used in a derived table/view force the derived
+      table to be materialized to preserve possible side-effect of setting a
+      user variable.
+    */
+    SELECT_LEX_UNIT *unit= thd->lex->current_select->master_unit();
+    TABLE_LIST *derived;
+    for (derived= unit->derived;
+         derived;
+         derived= derived->select_lex->master_unit()->derived)
+      derived->set_materialized_derived();
+  }
+
   return FALSE;
 }
 
@@ -4043,6 +4166,7 @@ double user_var_entry::val_real(bool *null_value)
   case STRING_RESULT:
     return my_atof(value);                      // This is null terminated
   case ROW_RESULT:
+  case TIME_RESULT:
   case IMPOSSIBLE_RESULT:
     DBUG_ASSERT(0);				// Impossible
     break;
@@ -4075,6 +4199,7 @@ longlong user_var_entry::val_int(bool *null_value) const
     return my_strtoll10(value, (char**) 0, &error);// String is null terminated
   }
   case ROW_RESULT:
+  case TIME_RESULT:
   case IMPOSSIBLE_RESULT:
     DBUG_ASSERT(0);				// Impossible
     break;
@@ -4109,6 +4234,7 @@ String *user_var_entry::val_str(bool *null_value, String *str,
       str= 0;					// EOM error
     break;
   case ROW_RESULT:
+  case TIME_RESULT:
   case IMPOSSIBLE_RESULT:
     DBUG_ASSERT(0);				// Impossible
     break;
@@ -4137,6 +4263,7 @@ my_decimal *user_var_entry::val_decimal(bool *null_value, my_decimal *val)
     str2my_decimal(E_DEC_FATAL_ERROR, value, length, collation.collation, val);
     break;
   case ROW_RESULT:
+  case TIME_RESULT:
   case IMPOSSIBLE_RESULT:
     DBUG_ASSERT(0);				// Impossible
     break;
@@ -4176,8 +4303,9 @@ Item_func_set_user_var::check(bool use_result_field)
   {
     save_result.vint= use_result_field ? result_field->val_int() :
                        args[0]->val_int();
-    unsigned_flag= use_result_field ? ((Field_num*)result_field)->unsigned_flag:
-                    args[0]->unsigned_flag;
+    unsigned_flag= (use_result_field ?
+                    ((Field_num*)result_field)->unsigned_flag:
+                    args[0]->unsigned_flag);
     break;
   }
   case STRING_RESULT:
@@ -4194,9 +4322,9 @@ Item_func_set_user_var::check(bool use_result_field)
     break;
   }
   case ROW_RESULT:
-  default:
-    // This case should never be chosen
-    DBUG_ASSERT(0);
+  case TIME_RESULT:
+  case IMPOSSIBLE_RESULT:
+    DBUG_ASSERT(0);                // This case should never be chosen
     break;
   }
   DBUG_RETURN(FALSE);
@@ -4229,9 +4357,9 @@ void Item_func_set_user_var::save_item_result(Item *item)
     save_result.vdec= item->val_decimal_result(&decimal_buff);
     break;
   case ROW_RESULT:
-  default:
-    // Should never happen
-    DBUG_ASSERT(0);
+  case TIME_RESULT:
+  case IMPOSSIBLE_RESULT:
+    DBUG_ASSERT(0);                // This case should never be chosen
     break;
   }
   DBUG_VOID_RETURN;
@@ -4297,9 +4425,9 @@ Item_func_set_user_var::update()
     break;
   }
   case ROW_RESULT:
-  default:
-    // This case should never be chosen
-    DBUG_ASSERT(0);
+  case TIME_RESULT:
+  case IMPOSSIBLE_RESULT:
+    DBUG_ASSERT(0);                // This case should never be chosen
     break;
   }
   DBUG_RETURN(res);
@@ -4732,7 +4860,7 @@ void Item_func_get_user_var::fix_length_and_dec()
     max_length= var_entry->length;
 
     collation.set(var_entry->collation);
-    switch(m_cached_result_type) {
+    switch (m_cached_result_type) {
     case REAL_RESULT:
       max_length= DBL_DIG + 8;
       break;
@@ -4748,8 +4876,9 @@ void Item_func_get_user_var::fix_length_and_dec()
       decimals= DECIMAL_MAX_SCALE;
       break;
     case ROW_RESULT:                            // Keep compiler happy
-    default:
-      DBUG_ASSERT(0);
+    case TIME_RESULT:
+    case IMPOSSIBLE_RESULT:
+      DBUG_ASSERT(0);                // This case should never be chosen
       break;
     }
   }
@@ -4937,12 +5066,16 @@ void Item_func_get_system_var::fix_length_and_dec()
   switch (var->show_type())
   {
     case SHOW_LONG:
-    case SHOW_INT:
     case SHOW_HA_ROWS:
       unsigned_flag= TRUE;
       max_length= MY_INT64_NUM_DECIMAL_DIGITS;
       decimals=0;
       break;
+    case SHOW_INT:
+      unsigned_flag= FALSE;
+      max_length= MY_INT64_NUM_DECIMAL_DIGITS;
+      decimals=0;
+      break;
     case SHOW_LONGLONG:
       unsigned_flag= TRUE;
       max_length= MY_INT64_NUM_DECIMAL_DIGITS;
@@ -5083,7 +5216,7 @@ longlong Item_func_get_system_var::val_int()
 
   switch (var->show_type())
   {
-    case SHOW_INT:      get_sys_var_safe (uint);
+    case SHOW_INT:      get_sys_var_safe (int);
     case SHOW_LONG:     get_sys_var_safe (ulong);
     case SHOW_LONGLONG: get_sys_var_safe (ulonglong);
     case SHOW_HA_ROWS:  get_sys_var_safe (ha_rows);
diff --git a/sql/item_func.h b/sql/item_func.h
index e750e372691..abaf07c51d5 100644
--- a/sql/item_func.h
+++ b/sql/item_func.h
@@ -45,7 +45,7 @@ public:
   enum Functype { UNKNOWN_FUNC,EQ_FUNC,EQUAL_FUNC,NE_FUNC,LT_FUNC,LE_FUNC,
 		  GE_FUNC,GT_FUNC,FT_FUNC,
 		  LIKE_FUNC,ISNULL_FUNC,ISNOTNULL_FUNC,
-		  COND_AND_FUNC, COND_OR_FUNC, COND_XOR_FUNC,
+		  COND_AND_FUNC, COND_OR_FUNC, XOR_FUNC,
                   BETWEEN, IN_FUNC, MULT_EQUAL_FUNC,
 		  INTERVAL_FUNC, ISNOTNULLTEST_FUNC,
 		  SP_EQUALS_FUNC, SP_DISJOINT_FUNC,SP_INTERSECTS_FUNC,
@@ -66,6 +66,7 @@ public:
     allowed_arg_cols(1), arg_count(0)
   {
     with_sum_func= 0;
+    with_field= 0;
   }
   Item_func(Item *a):
     allowed_arg_cols(1), arg_count(1)
@@ -73,6 +74,7 @@ public:
     args= tmp_arg;
     args[0]= a;
     with_sum_func= a->with_sum_func;
+    with_field= a->with_field;
   }
   Item_func(Item *a,Item *b):
     allowed_arg_cols(1), arg_count(2)
@@ -80,6 +82,7 @@ public:
     args= tmp_arg;
     args[0]= a; args[1]= b;
     with_sum_func= a->with_sum_func || b->with_sum_func;
+    with_field= a->with_field || b->with_field;
   }
   Item_func(Item *a,Item *b,Item *c):
     allowed_arg_cols(1)
@@ -90,6 +93,7 @@ public:
       arg_count= 3;
       args[0]= a; args[1]= b; args[2]= c;
       with_sum_func= a->with_sum_func || b->with_sum_func || c->with_sum_func;
+      with_field= a->with_field || b->with_field || c->with_field;
     }
   }
   Item_func(Item *a,Item *b,Item *c,Item *d):
@@ -102,6 +106,8 @@ public:
       args[0]= a; args[1]= b; args[2]= c; args[3]= d;
       with_sum_func= a->with_sum_func || b->with_sum_func ||
 	c->with_sum_func || d->with_sum_func;
+      with_field= a->with_field || b->with_field ||
+        c->with_field || d->with_field;
     }
   }
   Item_func(Item *a,Item *b,Item *c,Item *d,Item* e):
@@ -113,12 +119,15 @@ public:
       args[0]= a; args[1]= b; args[2]= c; args[3]= d; args[4]= e;
       with_sum_func= a->with_sum_func || b->with_sum_func ||
 	c->with_sum_func || d->with_sum_func || e->with_sum_func ;
+      with_field= a->with_field || b->with_field ||
+        c->with_field || d->with_field || e->with_field;
     }
   }
   Item_func(List<Item> &list);
   // Constructor used for Item_cond_and/or (see Item comment)
   Item_func(THD *thd, Item_func *item);
   bool fix_fields(THD *, Item **ref);
+  void fix_after_pullout(st_select_lex *new_parent, Item **ref);
   void quick_fix_field();
   table_map used_tables() const;
   table_map not_null_tables() const;
@@ -183,8 +192,9 @@ public:
                 Item_transformer transformer, uchar *arg_t);
   void traverse_cond(Cond_traverser traverser,
                      void * arg, traverse_order order);
-  bool is_expensive_processor(uchar *arg);
-  virtual bool is_expensive() { return 0; }
+  bool eval_not_null_tables(uchar *opt_arg);
+ // bool is_expensive_processor(uchar *arg);
+ // virtual bool is_expensive() { return 0; }
   inline double fix_result(double value)
   {
     if (isfinite(value))
@@ -244,6 +254,21 @@ public:
   }
 
   /*
+    By default only substitution for a field whose two different values
+    are never equal is allowed in the arguments of a function.
+    This is overruled for the direct arguments of comparison functions.
+  */ 
+  bool subst_argument_checker(uchar **arg) 
+  { 
+    if (*arg)
+    {
+      *arg= (uchar *) Item::IDENTITY_SUBST;
+      return TRUE;
+    }
+    return FALSE;
+  }
+
+  /*
     We assume the result of any function that has a TIMESTAMP argument to be
     timezone-dependent, since a TIMESTAMP value in both numeric and string
     contexts is interpreted according to the current timezone.
@@ -300,6 +325,8 @@ class Item_func_numhybrid: public Item_func
 protected:
   Item_result hybrid_type;
 public:
+  Item_func_numhybrid() :Item_func(), hybrid_type(REAL_RESULT)
+  {}
   Item_func_numhybrid(Item *a) :Item_func(a), hybrid_type(REAL_RESULT)
   {}
   Item_func_numhybrid(Item *a,Item *b)
@@ -453,7 +480,7 @@ class Item_decimal_typecast :public Item_func
 public:
   Item_decimal_typecast(Item *a, int len, int dec) :Item_func(a)
   {
-    decimals= dec;
+    decimals= (uint8) dec;
     max_length= my_decimal_precision_to_length_no_truncation(len, dec,
                                                              unsigned_flag);
   }
@@ -463,12 +490,29 @@ public:
   my_decimal *val_decimal(my_decimal*);
   enum Item_result result_type () const { return DECIMAL_RESULT; }
   enum_field_types field_type() const { return MYSQL_TYPE_NEWDECIMAL; }
-  void fix_length_and_dec() {};
+  void fix_length_and_dec() {}
   const char *func_name() const { return "decimal_typecast"; }
   virtual void print(String *str, enum_query_type query_type);
 };
 
 
+class Item_double_typecast :public Item_real_func
+{
+public:
+  Item_double_typecast(Item *a, int len, int dec) :Item_real_func(a)
+  {
+    decimals=   (uint8)  dec;
+    max_length= (uint32) len;
+  }
+  double val_real();
+  enum_field_types field_type() const { return MYSQL_TYPE_DOUBLE; }
+  void fix_length_and_dec() { maybe_null= 1; }
+  const char *func_name() const { return "double_typecast"; }
+  virtual void print(String *str, enum_query_type query_type);
+};
+
+
+
 class Item_func_additive_op :public Item_num_op
 {
 public:
@@ -828,25 +872,21 @@ class Item_func_min_max :public Item_func
   Item_result cmp_type;
   String tmp_value;
   int cmp_sign;
-  /* TRUE <=> arguments should be compared in the DATETIME context. */
-  bool compare_as_dates;
   /* An item used for issuing warnings while string to DATETIME conversion. */
-  Item *datetime_item;
+  Item *compare_as_dates;
   THD *thd;
 protected:
   enum_field_types cached_field_type;
 public:
   Item_func_min_max(List<Item> &list,int cmp_sign_arg) :Item_func(list),
-    cmp_type(INT_RESULT), cmp_sign(cmp_sign_arg), compare_as_dates(FALSE),
-    datetime_item(0) {}
+    cmp_type(INT_RESULT), cmp_sign(cmp_sign_arg), compare_as_dates(FALSE) {}
   double val_real();
   longlong val_int();
   String *val_str(String *);
   my_decimal *val_decimal(my_decimal *);
+  bool get_date(MYSQL_TIME *res, uint fuzzy_date);
   void fix_length_and_dec();
   enum Item_result result_type () const { return cmp_type; }
-  bool result_as_longlong() { return compare_as_dates; };
-  uint cmp_datetimes(ulonglong *value);
   enum_field_types field_type() const { return cached_field_type; }
 };
 
@@ -1127,6 +1167,7 @@ class Item_udf_func :public Item_func
 {
 protected:
   udf_handler udf;
+  bool is_expensive_processor(uchar *arg) { return TRUE; }
 
 public:
   Item_udf_func(udf_func *udf_arg)
@@ -1635,6 +1676,7 @@ public:
     table= 0;           // required by Item_func_match::eq()
     DBUG_VOID_RETURN;
   }
+  bool is_expensive_processor(uchar *arg) { return TRUE; }
   enum Functype functype() const { return FT_FUNC; }
   const char *func_name() const { return "match"; }
   void update_used_tables() {}
@@ -1674,14 +1716,7 @@ public:
   void fix_length_and_dec() { decimals=0; max_length=1; maybe_null=1;}
   bool check_vcol_func_processor(uchar *int_arg) 
   {
-#if 0
-    DBUG_ENTER("Item_func_is_free_lock::check_vcol_func_processor");
-    DBUG_PRINT("info",
-      ("check_vcol_func_processor returns TRUE: unsupported function"));
-    DBUG_RETURN(TRUE);
-#else
     return trace_unsupported_by_check_vcol_func_processor(func_name());
-#endif
   }
 };
 
@@ -1705,7 +1740,7 @@ enum Cast_target
 {
   ITEM_CAST_BINARY, ITEM_CAST_SIGNED_INT, ITEM_CAST_UNSIGNED_INT,
   ITEM_CAST_DATE, ITEM_CAST_TIME, ITEM_CAST_DATETIME, ITEM_CAST_CHAR,
-  ITEM_CAST_DECIMAL
+  ITEM_CAST_DECIMAL, ITEM_CAST_DOUBLE
 };
 
 
@@ -1750,6 +1785,9 @@ private:
   bool execute();
   bool execute_impl(THD *thd);
   bool init_result_field(THD *thd);
+
+protected:
+  bool is_expensive_processor(uchar *arg) { return TRUE; }
   
 public:
 
@@ -1833,6 +1871,10 @@ public:
   {
     return trace_unsupported_by_check_vcol_func_processor(func_name());
   }
+  bool limit_index_condition_pushdown_processor(uchar *opt_arg)
+  {
+    return TRUE;
+  }
 };
 
 
@@ -1866,4 +1908,3 @@ public:
     return trace_unsupported_by_check_vcol_func_processor(func_name());
   }
 };
-
diff --git a/sql/item_geofunc.cc b/sql/item_geofunc.cc
index 24e994cfc0e..c4ac9d6adfe 100644
--- a/sql/item_geofunc.cc
+++ b/sql/item_geofunc.cc
@@ -29,9 +29,15 @@
 #endif
 
 #include "mysql_priv.h"
+/*
+  It is necessary to include set_var.h instead of item.h because there
+  are dependencies on include order for set_var.h and item.h. This
+  will be resolved later.
+*/
 #ifdef HAVE_SPATIAL
 #include <m_ctype.h>
 
+
 Field *Item_geometry_func::tmp_table_field(TABLE *t_arg)
 {
   Field *result;
@@ -45,7 +51,7 @@ void Item_geometry_func::fix_length_and_dec()
 {
   collation.set(&my_charset_bin);
   decimals=0;
-  max_length= max_field_size;
+  max_length= (uint32) 4294967295U;
   maybe_null= 1;
 }
 
@@ -137,6 +143,7 @@ String *Item_func_as_wkt::val_str(String *str)
 
 void Item_func_as_wkt::fix_length_and_dec()
 {
+  collation.set(default_charset(), DERIVATION_COERCIBLE, MY_REPERTOIRE_ASCII);
   max_length=MAX_BLOB_WIDTH;
   maybe_null= 1;
 }
@@ -363,8 +370,8 @@ String *Item_func_point::val_str(String *str)
   uint32 srid= 0;
 
   if ((null_value= (args[0]->null_value ||
-		    args[1]->null_value ||
-                    str->realloc(4/*SRID*/ + 1 + 4 + SIZEOF_STORED_DOUBLE*2))))
+                    args[1]->null_value ||
+                    str->realloc(4/*SRID*/ + 1 + 4 + SIZEOF_STORED_DOUBLE * 2))))
     return 0;
 
   str->set_charset(&my_charset_bin);
@@ -511,7 +518,33 @@ err:
   Functions for spatial relations
 */
 
-longlong Item_func_spatial_rel::val_int()
+const char *Item_func_spatial_mbr_rel::func_name() const 
+{ 
+  switch (spatial_rel) {
+    case SP_CONTAINS_FUNC:
+      return "mbrcontains";
+    case SP_WITHIN_FUNC:
+      return "mbrwithin";
+    case SP_EQUALS_FUNC:
+      return "mbrequals";
+    case SP_DISJOINT_FUNC:
+      return "mbrdisjoint";
+    case SP_INTERSECTS_FUNC:
+      return "mbrintersects";
+    case SP_TOUCHES_FUNC:
+      return "mbrtouches";
+    case SP_CROSSES_FUNC:
+      return "mbrcrosses";
+    case SP_OVERLAPS_FUNC:
+      return "mbroverlaps";
+    default:
+      DBUG_ASSERT(0);  // Should never happened
+      return "mbrsp_unknown"; 
+  }
+}
+
+
+longlong Item_func_spatial_mbr_rel::val_int()
 {
   DBUG_ASSERT(fixed == 1);
   String *res1= args[0]->val_str(&cmp.value1);
@@ -556,6 +589,738 @@ longlong Item_func_spatial_rel::val_int()
 }
 
 
+Item_func_spatial_rel::Item_func_spatial_rel(Item *a,Item *b,
+                                             enum Functype sp_rel) :
+    Item_bool_func2(a,b), collector()
+{
+  spatial_rel = sp_rel;
+}
+
+
+Item_func_spatial_rel::~Item_func_spatial_rel()
+{
+}
+
+
+const char *Item_func_spatial_rel::func_name() const 
+{ 
+  switch (spatial_rel) {
+    case SP_CONTAINS_FUNC:
+      return "st_contains";
+    case SP_WITHIN_FUNC:
+      return "st_within";
+    case SP_EQUALS_FUNC:
+      return "st_equals";
+    case SP_DISJOINT_FUNC:
+      return "st_disjoint";
+    case SP_INTERSECTS_FUNC:
+      return "st_intersects";
+    case SP_TOUCHES_FUNC:
+      return "st_touches";
+    case SP_CROSSES_FUNC:
+      return "st_crosses";
+    case SP_OVERLAPS_FUNC:
+      return "st_overlaps";
+    default:
+      DBUG_ASSERT(0);  // Should never happened
+      return "sp_unknown"; 
+  }
+}
+
+
+static double count_edge_t(const Gcalc_heap::Info *ea,
+                           const Gcalc_heap::Info *eb,
+                           const Gcalc_heap::Info *v,
+                           double &ex, double &ey, double &vx, double &vy,
+                           double &e_sqrlen)
+{
+  ex= eb->x - ea->x;
+  ey= eb->y - ea->y;
+  vx= v->x - ea->x;
+  vy= v->y - ea->y;
+  e_sqrlen= ex * ex + ey * ey;
+  return (ex * vx + ey * vy) / e_sqrlen;
+}
+
+
+static double distance_to_line(double ex, double ey, double vx, double vy,
+                               double e_sqrlen)
+{
+  return fabs(vx * ey - vy * ex) / sqrt(e_sqrlen);
+}
+
+
+static double distance_points(const Gcalc_heap::Info *a,
+                              const Gcalc_heap::Info *b)
+{
+  double x= a->x - b->x;
+  double y= a->y - b->y;
+  return sqrt(x * x + y * y);
+}
+
+
+#define GIS_ZERO 0.00000000001
+
+longlong Item_func_spatial_rel::val_int()
+{
+  DBUG_ENTER("Item_func_spatial_rel::val_int");
+  DBUG_ASSERT(fixed == 1);
+  String *res1;
+  String *res2;
+  Geometry_buffer buffer1, buffer2;
+  Geometry *g1, *g2;
+  int result= 0;
+  int mask= 0;
+  uint shape_a, shape_b;
+  MBR umbr, mbr1, mbr2;
+  const char *c_end;
+
+  res1= args[0]->val_str(&tmp_value1);
+  res2= args[1]->val_str(&tmp_value2);
+  Gcalc_operation_transporter trn(&func, &collector);
+
+  if (func.reserve_op_buffer(1))
+    DBUG_RETURN(0);
+
+  if ((null_value=
+       (args[0]->null_value || args[1]->null_value ||
+	!(g1= Geometry::construct(&buffer1, res1->ptr(), res1->length())) ||
+	!(g2= Geometry::construct(&buffer2, res2->ptr(), res2->length())))))
+    goto exit;
+
+  g1->get_mbr(&mbr1, &c_end);
+  g2->get_mbr(&mbr2, &c_end);
+
+  umbr= mbr1;
+  umbr.add_mbr(&mbr2);
+  collector.set_extent(umbr.xmin, umbr.xmax, umbr.ymin, umbr.ymax);
+
+  mbr1.buffer(1e-5);
+
+  switch (spatial_rel) {
+    case SP_CONTAINS_FUNC:
+      if (!mbr1.contains(&mbr2))
+        goto exit;
+      mask= 1;
+      func.add_operation(Gcalc_function::op_difference, 2);
+      /* Mind the g2 goes first. */
+      null_value= g2->store_shapes(&trn) || g1->store_shapes(&trn);
+      break;
+    case SP_WITHIN_FUNC:
+      mbr2.buffer(2e-5);
+      if (!mbr1.within(&mbr2))
+        goto exit;
+      mask= 1;
+      func.add_operation(Gcalc_function::op_difference, 2);
+      null_value= g1->store_shapes(&trn) || g2->store_shapes(&trn);
+      break;
+    case SP_EQUALS_FUNC:
+      if (!mbr1.contains(&mbr2))
+        goto exit;
+      mask= 1;
+      func.add_operation(Gcalc_function::op_symdifference, 2);
+      null_value= g1->store_shapes(&trn) || g2->store_shapes(&trn);
+      break;
+    case SP_DISJOINT_FUNC:
+      mask= 1;
+      func.add_operation(Gcalc_function::op_intersection, 2);
+      null_value= g1->store_shapes(&trn) || g2->store_shapes(&trn);
+      break;
+    case SP_INTERSECTS_FUNC:
+      if (!mbr1.intersects(&mbr2))
+        goto exit;
+      func.add_operation(Gcalc_function::op_intersection, 2);
+      null_value= g1->store_shapes(&trn) || g2->store_shapes(&trn);
+      break;
+    case SP_OVERLAPS_FUNC:
+    case SP_CROSSES_FUNC:
+      func.add_operation(Gcalc_function::op_intersection, 2);
+      func.add_operation(Gcalc_function::v_find_t |
+                         Gcalc_function::op_intersection, 2);
+      shape_a= func.get_next_expression_pos();
+      if ((null_value= g1->store_shapes(&trn)))
+        break;
+      shape_b= func.get_next_expression_pos();
+      if ((null_value= g2->store_shapes(&trn)))
+        break;
+      func.add_operation(Gcalc_function::v_find_t |
+                         Gcalc_function::op_intersection, 2);
+      func.add_operation(Gcalc_function::v_find_t |
+                         Gcalc_function::op_difference, 2);
+      func.repeat_expression(shape_a);
+      func.repeat_expression(shape_b);
+      func.add_operation(Gcalc_function::v_find_t |
+                         Gcalc_function::op_difference, 2);
+      func.repeat_expression(shape_b);
+      func.repeat_expression(shape_a);
+      break;
+    case SP_TOUCHES_FUNC:
+      func.add_operation(Gcalc_function::op_intersection, 2);
+      func.add_operation(Gcalc_function::v_find_f |
+                         Gcalc_function::op_not |
+                         Gcalc_function::op_intersection, 2);
+      func.add_operation(Gcalc_function::op_internals, 1);
+      shape_a= func.get_next_expression_pos();
+      if ((null_value= g1->store_shapes(&trn)))
+        break;
+      func.add_operation(Gcalc_function::op_internals, 1);
+      shape_b= func.get_next_expression_pos();
+      if ((null_value= g2->store_shapes(&trn)))
+        break;
+      func.add_operation(Gcalc_function::v_find_t |
+                         Gcalc_function::op_intersection, 2);
+      func.add_operation(Gcalc_function::op_border, 1);
+      func.repeat_expression(shape_a);
+      func.add_operation(Gcalc_function::op_border, 1);
+      func.repeat_expression(shape_b);
+      break;
+    default:
+      DBUG_ASSERT(FALSE);
+      break;
+  }
+
+  if (null_value)
+    goto exit;
+
+  collector.prepare_operation();
+  scan_it.init(&collector);
+  scan_it.killed= (int *) &(current_thd->killed);
+
+  if (func.alloc_states())
+    goto exit;
+
+  result= func.check_function(scan_it) ^ mask;
+
+exit:
+  collector.reset();
+  func.reset();
+  scan_it.reset();
+  DBUG_RETURN(result);
+}
+
+
+Item_func_spatial_operation::~Item_func_spatial_operation()
+{
+}
+
+
+String *Item_func_spatial_operation::val_str(String *str_value)
+{
+  DBUG_ENTER("Item_func_spatial_operation::val_str");
+  DBUG_ASSERT(fixed == 1);
+  String *res1= args[0]->val_str(&tmp_value1);
+  String *res2= args[1]->val_str(&tmp_value2);
+  Geometry_buffer buffer1, buffer2;
+  Geometry *g1, *g2;
+  uint32 srid= 0;
+  Gcalc_operation_transporter trn(&func, &collector);
+  MBR mbr1, mbr2;
+  const char *c_end;
+
+  if (func.reserve_op_buffer(1))
+    DBUG_RETURN(0);
+  func.add_operation(spatial_op, 2);
+
+  if ((null_value=
+       (args[0]->null_value || args[1]->null_value ||
+	!(g1= Geometry::construct(&buffer1, res1->ptr(), res1->length())) ||
+	!(g2= Geometry::construct(&buffer2, res2->ptr(), res2->length())))))
+  {
+    str_value= 0;
+    goto exit;
+  }
+
+  g1->get_mbr(&mbr1, &c_end);
+  g2->get_mbr(&mbr2, &c_end);
+  mbr1.add_mbr(&mbr2);
+  collector.set_extent(mbr1.xmin, mbr1.xmax, mbr1.ymin, mbr1.ymax);
+  
+  if ((null_value= g1->store_shapes(&trn) || g2->store_shapes(&trn)))
+  {
+    str_value= 0;
+    goto exit;
+  }
+
+  collector.prepare_operation();
+  if (func.alloc_states())
+    goto exit;
+
+  operation.init(&func);
+
+  if (operation.count_all(&collector) ||
+      operation.get_result(&res_receiver))
+    goto exit;
+
+
+  str_value->set_charset(&my_charset_bin);
+  if (str_value->reserve(SRID_SIZE, 512))
+    goto exit;
+  str_value->length(0);
+  str_value->q_append(srid);
+
+  if (!Geometry::create_from_opresult(&buffer1, str_value, res_receiver))
+    goto exit;
+
+exit:
+  collector.reset();
+  func.reset();
+  res_receiver.reset();
+  DBUG_RETURN(str_value);
+}
+
+
+const char *Item_func_spatial_operation::func_name() const
+{ 
+  switch (spatial_op) {
+    case Gcalc_function::op_intersection:
+      return "st_intersection";
+    case Gcalc_function::op_difference:
+      return "st_difference";
+    case Gcalc_function::op_union:
+      return "st_union";
+    case Gcalc_function::op_symdifference:
+      return "st_symdifference";
+    default:
+      DBUG_ASSERT(0);  // Should never happen
+      return "sp_unknown"; 
+  }
+}
+
+
+static const int SINUSES_CALCULATED= 32;
+static double n_sinus[SINUSES_CALCULATED+1]=
+{
+  0,
+  0.04906767432741802,
+  0.0980171403295606,
+  0.1467304744553618,
+  0.1950903220161283,
+  0.2429801799032639,
+  0.2902846772544623,
+  0.3368898533922201,
+  0.3826834323650898,
+  0.4275550934302821,
+  0.4713967368259976,
+  0.5141027441932217,
+  0.5555702330196022,
+  0.5956993044924334,
+  0.6343932841636455,
+  0.6715589548470183,
+  0.7071067811865475,
+  0.7409511253549591,
+  0.773010453362737,
+  0.8032075314806448,
+  0.8314696123025452,
+  0.8577286100002721,
+  0.8819212643483549,
+  0.9039892931234433,
+  0.9238795325112867,
+  0.9415440651830208,
+  0.9569403357322089,
+  0.970031253194544,
+  0.9807852804032304,
+  0.989176509964781,
+  0.9951847266721968,
+  0.9987954562051724,
+  1
+};
+
+
+static void get_n_sincos(int n, double *sinus, double *cosinus)
+{
+  DBUG_ASSERT(n > 0 && n < SINUSES_CALCULATED*2+1);
+  if (n < (SINUSES_CALCULATED + 1))
+  {
+    *sinus= n_sinus[n];
+    *cosinus= n_sinus[SINUSES_CALCULATED - n];
+  }
+  else
+  {
+    n-= SINUSES_CALCULATED;
+    *sinus= n_sinus[SINUSES_CALCULATED - n];
+    *cosinus= -n_sinus[n];
+  }
+}
+
+
+static int fill_half_circle(Gcalc_shape_transporter *trn, double x, double y,
+                            double ax, double ay)
+{
+  double n_sin, n_cos;
+  double x_n, y_n;
+  for (int n = 1; n < (SINUSES_CALCULATED * 2 - 1); n++)
+  {
+    get_n_sincos(n, &n_sin, &n_cos);
+    x_n= ax * n_cos - ay * n_sin;
+    y_n= ax * n_sin + ay * n_cos;
+    if (trn->add_point(x_n + x, y_n + y))
+      return 1;
+  }
+  return 0;
+}
+
+
+static int fill_gap(Gcalc_shape_transporter *trn,
+                    double x, double y,
+                    double ax, double ay, double bx, double by, double d,
+                    bool *empty_gap)
+{
+  double ab= ax * bx + ay * by;
+  double cosab= ab / (d * d) + GIS_ZERO;
+  double n_sin, n_cos;
+  double x_n, y_n;
+  int n=1;
+
+  *empty_gap= true;
+  for (;;)
+  {
+    get_n_sincos(n++, &n_sin, &n_cos);
+    if (n_cos <= cosab)
+      break;
+    *empty_gap= false;
+    x_n= ax * n_cos - ay * n_sin;
+    y_n= ax * n_sin + ay * n_cos;
+    if (trn->add_point(x_n + x, y_n + y))
+      return 1;
+  }
+  return 0;
+}
+
+
+/*
+  Calculates the vector (p2,p1) and
+  negatively orthogonal to it with the length of d.
+  The result is (ex,ey) - the vector, (px,py) - the orthogonal.
+*/
+
+static void calculate_perpendicular(
+    double x1, double y1, double x2, double y2, double d,
+    double *ex, double *ey,
+    double *px, double *py)
+{
+  double q;
+  *ex= x1 - x2;
+  *ey= y1 - y2;
+  q= d / sqrt((*ex) * (*ex) + (*ey) * (*ey));
+  *px= (*ey) * q;
+  *py= -(*ex) * q;
+}
+
+
+int Item_func_buffer::Transporter::single_point(double x, double y)
+{
+  return add_point_buffer(x, y);
+}
+
+
+int Item_func_buffer::Transporter::add_edge_buffer(
+  double x3, double y3, bool round_p1, bool round_p2)
+{
+  Gcalc_operation_transporter trn(m_fn, m_heap);
+  double e1_x, e1_y, e2_x, e2_y, p1_x, p1_y, p2_x, p2_y;
+  double e1e2;
+  double sin1, cos1;
+  double x_n, y_n;
+  bool empty_gap1, empty_gap2;
+
+  ++m_nshapes;
+  if (trn.start_simple_poly())
+    return 1;
+
+  calculate_perpendicular(x1, y1, x2, y2, m_d, &e1_x, &e1_y, &p1_x, &p1_y);
+  calculate_perpendicular(x3, y3, x2, y2, m_d, &e2_x, &e2_y, &p2_x, &p2_y);
+
+  e1e2= e1_x * e2_y - e2_x * e1_y;
+  sin1= n_sinus[1];
+  cos1= n_sinus[31];
+  if (e1e2 < 0)
+  {
+    empty_gap2= false;
+    x_n= x2 + p2_x * cos1 - p2_y * sin1;
+    y_n= y2 + p2_y * cos1 + p2_x * sin1;
+    if (fill_gap(&trn, x2, y2, -p1_x,-p1_y, p2_x,p2_y, m_d, &empty_gap1) ||
+        trn.add_point(x2 + p2_x, y2 + p2_y) ||
+        trn.add_point(x_n, y_n))
+      return 1;
+  }
+  else
+  {
+    x_n= x2 - p2_x * cos1 - p2_y * sin1;
+    y_n= y2 - p2_y * cos1 + p2_x * sin1;
+    if (trn.add_point(x_n, y_n) ||
+        trn.add_point(x2 - p2_x, y2 - p2_y) ||
+        fill_gap(&trn, x2, y2, -p2_x, -p2_y, p1_x, p1_y, m_d, &empty_gap2))
+      return 1;
+    empty_gap1= false;
+  }
+  if ((!empty_gap2 && trn.add_point(x2 + p1_x, y2 + p1_y)) ||
+      trn.add_point(x1 + p1_x, y1 + p1_y))
+    return 1;
+
+  if (round_p1 && fill_half_circle(&trn, x1, y1, p1_x, p1_y))
+    return 1;
+
+  if (trn.add_point(x1 - p1_x, y1 - p1_y) ||
+      (!empty_gap1 && trn.add_point(x2 - p1_x, y2 - p1_y)))
+    return 1;
+  return trn.complete_simple_poly();
+}
+
+
+int Item_func_buffer::Transporter::add_last_edge_buffer()
+{
+  Gcalc_operation_transporter trn(m_fn, m_heap);
+  double e1_x, e1_y, p1_x, p1_y;
+
+  ++m_nshapes;
+  if (trn.start_simple_poly())
+    return 1;
+
+  calculate_perpendicular(x1, y1, x2, y2, m_d, &e1_x, &e1_y, &p1_x, &p1_y);
+
+  if (trn.add_point(x1 + p1_x, y1 + p1_y) ||
+      trn.add_point(x1 - p1_x, y1 - p1_y) ||
+      trn.add_point(x2 - p1_x, y2 - p1_y) ||
+      fill_half_circle(&trn, x2, y2, -p1_x, -p1_y) ||
+      trn.add_point(x2 + p1_x, y2 + p1_y))
+    return 1;
+  return trn.complete_simple_poly();
+}
+
+
+int Item_func_buffer::Transporter::add_point_buffer(double x, double y)
+{
+  Gcalc_operation_transporter trn(m_fn, m_heap);
+
+  m_nshapes++;
+  if (trn.start_simple_poly())
+    return 1;
+  if (trn.add_point(x - m_d, y) ||
+      fill_half_circle(&trn, x, y, -m_d, 0.0) ||
+      trn.add_point(x + m_d, y) ||
+      fill_half_circle(&trn, x, y, m_d, 0.0))
+    return 1;
+  return trn.complete_simple_poly();
+}
+
+
+int Item_func_buffer::Transporter::start_line()
+{
+  if (buffer_op == Gcalc_function::op_difference)
+  {
+    skip_line= TRUE;
+    return 0;
+  }
+  
+  m_nshapes= 0;
+
+  if (m_fn->reserve_op_buffer(2))
+    return 1;
+  last_shape_pos= m_fn->get_next_expression_pos();
+  m_fn->add_operation(buffer_op, 0);
+  m_npoints= 0;
+  int_start_line();
+  return 0;
+}
+
+
+int Item_func_buffer::Transporter::start_poly()
+{
+  m_nshapes= 1;
+
+  if (m_fn->reserve_op_buffer(2))
+    return 1;
+  last_shape_pos= m_fn->get_next_expression_pos();
+  m_fn->add_operation(buffer_op, 0);
+  return Gcalc_operation_transporter::start_poly();
+}
+
+
+int Item_func_buffer::Transporter::complete_poly()
+{
+  if (Gcalc_operation_transporter::complete_poly())
+    return 1;
+  m_fn->add_operands_to_op(last_shape_pos, m_nshapes);
+  return 0;
+}
+
+
+int Item_func_buffer::Transporter::start_ring()
+{
+  m_npoints= 0;
+  return Gcalc_operation_transporter::start_ring();
+}
+
+
+int Item_func_buffer::Transporter::start_collection(int n_objects)
+{
+  if (m_fn->reserve_op_buffer(1))
+    return 1;
+  m_fn->add_operation(Gcalc_function::op_union, n_objects);
+  return 0;
+}
+
+
+int Item_func_buffer::Transporter::add_point(double x, double y)
+{
+  if (skip_line)
+    return 0;
+
+  if (m_npoints && x == x2 && y == y2)
+    return 0;
+
+  ++m_npoints;
+
+  if (m_npoints == 1)
+  {
+    x00= x;
+    y00= y;
+  }
+  else if (m_npoints == 2)
+  {
+    x01= x;
+    y01= y;
+  }
+  else if (add_edge_buffer(x, y, (m_npoints == 3) && line_started(), false))
+    return 1;
+
+  x1= x2;
+  y1= y2;
+  x2= x;
+  y2= y;
+
+  return line_started() ? 0 : Gcalc_operation_transporter::add_point(x, y);
+}
+
+
+int Item_func_buffer::Transporter::complete()
+{
+  if (m_npoints)
+  {
+    if (m_npoints == 1)
+    {
+      if (add_point_buffer(x2, y2))
+        return 1;
+    }
+    else if (m_npoints == 2)
+    {
+      if (add_edge_buffer(x1, y1, true, true))
+        return 1;
+    }
+    else if (line_started())
+    {
+      if (add_last_edge_buffer())
+        return 1;
+    }
+    else
+    {
+      if (x2 != x00 || y2 != y00)
+      {
+        if (add_edge_buffer(x00, y00, false, false))
+          return 1;
+        x1= x2;
+        y1= y2;
+        x2= x00;
+        y2= y00;
+      }
+      if (add_edge_buffer(x01, y01, false, false))
+        return 1;
+    }
+  }
+
+  return 0;
+}
+
+
+int Item_func_buffer::Transporter::complete_line()
+{
+  if (!skip_line)
+  {
+    if (complete())
+      return 1;
+    int_complete_line();
+    m_fn->add_operands_to_op(last_shape_pos, m_nshapes);
+  }
+  skip_line= FALSE;
+  return 0;
+}
+
+
+int Item_func_buffer::Transporter::complete_ring()
+{
+  return complete() ||
+         Gcalc_operation_transporter::complete_ring();
+}
+
+
+String *Item_func_buffer::val_str(String *str_value)
+{
+  DBUG_ENTER("Item_func_buffer::val_str");
+  DBUG_ASSERT(fixed == 1);
+  String *obj= args[0]->val_str(&tmp_value);
+  double dist= args[1]->val_real();
+  Geometry_buffer buffer;
+  Geometry *g;
+  uint32 srid= 0;
+  String *str_result= NULL;
+  Transporter trn(&func, &collector, dist);
+  MBR mbr;
+  const char *c_end;
+
+  null_value= 1;
+  if (args[0]->null_value || args[1]->null_value ||
+      !(g= Geometry::construct(&buffer, obj->ptr(), obj->length())) ||
+      g->get_mbr(&mbr, &c_end))
+    goto mem_error;
+
+  if (dist > 0.0)
+    mbr.buffer(dist);
+  collector.set_extent(mbr.xmin, mbr.xmax, mbr.ymin, mbr.ymax);
+  /*
+    If the distance given is 0, the Buffer function is in fact NOOP,
+    so it's natural just to return the argument1.
+    Besides, internal calculations here can't handle zero distance anyway.
+  */
+  if (fabs(dist) < GIS_ZERO)
+  {
+    null_value= 0;
+    str_result= obj;
+    goto mem_error;
+  }
+
+  if (g->store_shapes(&trn))
+    goto mem_error;
+
+  collector.prepare_operation();
+  if (func.alloc_states())
+    goto mem_error;
+  operation.init(&func);
+  operation.killed= (int *) &(current_thd->killed);
+
+  if (operation.count_all(&collector) ||
+      operation.get_result(&res_receiver))
+    goto mem_error;
+
+
+  str_value->set_charset(&my_charset_bin);
+  if (str_value->reserve(SRID_SIZE, 512))
+    goto mem_error;
+  str_value->length(0);
+  str_value->q_append(srid);
+
+  if (!Geometry::create_from_opresult(&buffer, str_value, res_receiver))
+    goto mem_error;
+
+  null_value= 0;
+  str_result= str_value;
+mem_error:
+  collector.reset();
+  func.reset();
+  res_receiver.reset();
+  DBUG_RETURN(str_result);
+}
+
+
 longlong Item_func_isempty::val_int()
 {
   DBUG_ASSERT(fixed == 1);
@@ -569,21 +1334,63 @@ longlong Item_func_isempty::val_int()
 }
 
 
-/**
-  @todo
-    Ramil or Holyfoot, add real IsSimple calculation
-*/
 longlong Item_func_issimple::val_int()
 {
-  DBUG_ASSERT(fixed == 1);
-  String tmp;
   String *swkb= args[0]->val_str(&tmp);
   Geometry_buffer buffer;
+  Gcalc_operation_transporter trn(&func, &collector);
+  Geometry *g;
+  int result= 1;
+  const Gcalc_scan_iterator::event_point *ev;
+  MBR mbr;
+  const char *c_end;
+
+  DBUG_ENTER("Item_func_issimple::val_int");
+  DBUG_ASSERT(fixed == 1);
   
-  null_value= args[0]->null_value ||
-              !(Geometry::construct(&buffer, swkb->ptr(), swkb->length()));
-  /* TODO: Ramil or Holyfoot, add real IsSimple calculation */
-  return 0;
+  if ((null_value= args[0]->null_value) ||
+      !(g= Geometry::construct(&buffer, swkb->ptr(), swkb->length())))
+    DBUG_RETURN(0);
+
+  g->get_mbr(&mbr, &c_end);
+  collector.set_extent(mbr.xmin, mbr.xmax, mbr.ymin, mbr.ymax);
+
+  if (g->get_class_info()->m_type_id == Geometry::wkb_point)
+    DBUG_RETURN(1);
+
+  if (g->store_shapes(&trn))
+    goto mem_error;
+
+  collector.prepare_operation();
+  scan_it.init(&collector);
+
+  while (scan_it.more_points())
+  {
+    if (scan_it.step())
+      goto mem_error;
+
+    ev= scan_it.get_events();
+    if (ev->simple_event())
+      continue;
+
+    if ((ev->event == scev_thread || ev->event == scev_single_point) &&
+        !ev->get_next())
+      continue;
+
+    if (ev->event == scev_two_threads && !ev->get_next()->get_next())
+      continue;
+
+    result= 0;
+    break;
+  }
+
+  collector.reset();
+  func.reset();
+  scan_it.reset();
+  DBUG_RETURN(result);
+mem_error:
+  null_value= 1;
+  DBUG_RETURN(0);
 }
 
 
@@ -731,12 +1538,13 @@ double Item_func_glength::val_real()
   String *swkb= args[0]->val_str(&value);
   Geometry_buffer buffer;
   Geometry *geom;
+  const char *end;
 
   null_value= (!swkb || 
 	       !(geom= Geometry::construct(&buffer,
                                            swkb->ptr(),
                                            swkb->length())) ||
-	       geom->geom_length(&res));
+	       geom->geom_length(&res, &end));
   return res;
 }
 
@@ -755,4 +1563,168 @@ longlong Item_func_srid::val_int()
   return (longlong) (uint4korr(swkb->ptr()));
 }
 
+
+double Item_func_distance::val_real()
+{
+  bool cur_point_edge;
+  const Gcalc_scan_iterator::point *evpos;
+  const Gcalc_heap::Info *cur_point, *dist_point;
+  const Gcalc_scan_iterator::event_point *ev;
+  double t, distance, cur_distance;
+  double x1, x2, y1, y2;
+  double ex, ey, vx, vy, e_sqrlen;
+  uint obj2_si;
+  Gcalc_operation_transporter trn(&func, &collector);
+
+  DBUG_ENTER("Item_func_distance::val_real");
+  DBUG_ASSERT(fixed == 1);
+  String *res1= args[0]->val_str(&tmp_value1);
+  String *res2= args[1]->val_str(&tmp_value2);
+  Geometry_buffer buffer1, buffer2;
+  Geometry *g1, *g2;
+  MBR mbr1, mbr2;
+  const char *c_end;
+
+
+  if ((null_value= (args[0]->null_value || args[1]->null_value ||
+          !(g1= Geometry::construct(&buffer1, res1->ptr(), res1->length())) ||
+          !(g2= Geometry::construct(&buffer2, res2->ptr(), res2->length())))))
+    goto mem_error;
+
+  g1->get_mbr(&mbr1, &c_end);
+  g2->get_mbr(&mbr2, &c_end);
+  mbr1.add_mbr(&mbr2);
+  collector.set_extent(mbr1.xmin, mbr1.xmax, mbr1.ymin, mbr1.ymax);
+
+  if ((g1->get_class_info()->m_type_id == Geometry::wkb_point) &&
+      (g2->get_class_info()->m_type_id == Geometry::wkb_point))
+  {
+    if (((Gis_point *) g1)->get_xy(&x1, &y1) ||
+        ((Gis_point *) g2)->get_xy(&x2, &y2))
+      goto mem_error;
+    ex= x2 - x1;
+    ey= y2 - y1;
+    DBUG_RETURN(sqrt(ex * ex + ey * ey));
+  }
+
+  if (func.reserve_op_buffer(1))
+    goto mem_error;
+  func.add_operation(Gcalc_function::op_intersection, 2);
+
+  if (g1->store_shapes(&trn))
+    goto mem_error;
+  obj2_si= func.get_nshapes();
+  if (g2->store_shapes(&trn) || func.alloc_states())
+    goto mem_error;
+
+  if (obj2_si == 0 || func.get_nshapes() == obj2_si)
+  {
+    distance= 0.0;
+    null_value= 1;
+    goto exit;
+  }
+
+
+  collector.prepare_operation();
+  scan_it.init(&collector);
+
+  distance= DBL_MAX;
+  while (scan_it.more_points())
+  {
+    if (scan_it.step())
+      goto mem_error;
+    evpos= scan_it.get_event_position();
+    ev= scan_it.get_events();
+
+    if (ev->simple_event())
+    {
+      cur_point= ev->pi;
+      goto count_distance;
+    }
+    /*
+       handling intersection we only need to check if it's the intersecion
+       of objects 1 and 2. In this case distance is 0
+    */
+    cur_point= NULL;
+
+    /*
+       having these events we need to check for possible intersection
+       of objects
+       scev_thread | scev_two_threads | scev_single_point
+    */
+    func.clear_i_states();
+    for (Gcalc_point_iterator pit(&scan_it); pit.point() != evpos; ++pit)
+    {
+      gcalc_shape_info si= pit.point()->get_shape();
+      if ((func.get_shape_kind(si) == Gcalc_function::shape_polygon))
+        func.invert_i_state(si);
+    }
+
+    func.clear_b_states();
+    for (; ev; ev= ev->get_next())
+    {
+      if (ev->event != scev_intersection)
+        cur_point= ev->pi;
+      func.set_b_state(ev->get_shape());
+      if (func.count())
+      {
+        /* Point of one object is inside the other - intersection found */
+        distance= 0;
+        goto exit;
+      }
+    }
+
+    if (!cur_point)
+      continue;
+
+count_distance:
+    if (cur_point->shape >= obj2_si)
+      continue;
+    cur_point_edge= !cur_point->is_bottom();
+
+    for (dist_point= collector.get_first(); dist_point; dist_point= dist_point->get_next())
+    {
+      /* We only check vertices of object 2 */
+      if (dist_point->shape < obj2_si)
+        continue;
+
+      /* if we have an edge to check */
+      if (dist_point->left)
+      {
+        t= count_edge_t(dist_point, dist_point->left, cur_point,
+                        ex, ey, vx, vy, e_sqrlen);
+        if ((t>0.0) && (t<1.0))
+        {
+          cur_distance= distance_to_line(ex, ey, vx, vy, e_sqrlen);
+          if (distance > cur_distance)
+            distance= cur_distance;
+        }
+      }
+      if (cur_point_edge)
+      {
+        t= count_edge_t(cur_point, cur_point->left, dist_point,
+                        ex, ey, vx, vy, e_sqrlen);
+        if ((t>0.0) && (t<1.0))
+        {
+          cur_distance= distance_to_line(ex, ey, vx, vy, e_sqrlen);
+          if (distance > cur_distance)
+            distance= cur_distance;
+        }
+      }
+      cur_distance= distance_points(cur_point, dist_point);
+      if (distance > cur_distance)
+        distance= cur_distance;
+    }
+  }
+exit:
+  collector.reset();
+  func.reset();
+  scan_it.reset();
+  DBUG_RETURN(distance);
+mem_error:
+  null_value= 1;
+  DBUG_RETURN(0);
+}
+
+
 #endif /*HAVE_SPATIAL*/
diff --git a/sql/item_geofunc.h b/sql/item_geofunc.h
index 0bcd933e52b..d2030ee7bb9 100644
--- a/sql/item_geofunc.h
+++ b/sql/item_geofunc.h
@@ -1,4 +1,8 @@
-/* Copyright (c) 2003, 2011, Oracle and/or its affiliates.
+#ifndef ITEM_GEOFUNC_INCLUDED
+#define ITEM_GEOFUNC_INCLUDED
+
+/* Copyright (c) 2000, 2010 Oracle and/or its affiliates. All rights reserved.
+   Copyright (C) 2011 Monty Program Ab.
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -10,8 +14,8 @@
    GNU General Public License for more details.
 
    You should have received a copy of the GNU General Public License
-   along with this program; if not, write to the Free Software
-   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
+   along with this program; if not, write to the Free Software Foundation,
+   51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA */
 
 
 /* This file defines all spatial functions */
@@ -22,6 +26,9 @@
 #pragma interface			/* gcc class implementation */
 #endif
 
+#include "gcalc_slicescan.h"
+#include "gcalc_tools.h"
+
 class Item_geometry_func: public Item_str_func
 {
 public:
@@ -41,7 +48,7 @@ class Item_func_geometry_from_text: public Item_geometry_func
 public:
   Item_func_geometry_from_text(Item *a) :Item_geometry_func(a) {}
   Item_func_geometry_from_text(Item *a, Item *srid) :Item_geometry_func(a, srid) {}
-  const char *func_name() const { return "geometryfromtext"; }
+  const char *func_name() const { return "st_geometryfromtext"; }
   String *val_str(String *);
 };
 
@@ -50,7 +57,7 @@ class Item_func_geometry_from_wkb: public Item_geometry_func
 public:
   Item_func_geometry_from_wkb(Item *a): Item_geometry_func(a) {}
   Item_func_geometry_from_wkb(Item *a, Item *srid): Item_geometry_func(a, srid) {}
-  const char *func_name() const { return "geometryfromwkb"; }
+  const char *func_name() const { return "st_geometryfromwkb"; }
   String *val_str(String *);
 };
 
@@ -58,7 +65,7 @@ class Item_func_as_wkt: public Item_str_func
 {
 public:
   Item_func_as_wkt(Item *a): Item_str_func(a) {}
-  const char *func_name() const { return "astext"; }
+  const char *func_name() const { return "st_astext"; }
   String *val_str(String *);
   void fix_length_and_dec();
 };
@@ -67,7 +74,7 @@ class Item_func_as_wkb: public Item_geometry_func
 {
 public:
   Item_func_as_wkb(Item *a): Item_geometry_func(a) {}
-  const char *func_name() const { return "aswkb"; }
+  const char *func_name() const { return "st_aswkb"; }
   String *val_str(String *);
   enum_field_types field_type() const  { return MYSQL_TYPE_BLOB; }
 };
@@ -77,11 +84,12 @@ class Item_func_geometry_type: public Item_str_func
 public:
   Item_func_geometry_type(Item *a): Item_str_func(a) {}
   String *val_str(String *);
-  const char *func_name() const { return "geometrytype"; }
+  const char *func_name() const { return "st_geometrytype"; }
   void fix_length_and_dec() 
   {
-     max_length=20; // "GeometryCollection" is the most long
-     maybe_null= 1;
+    // "GeometryCollection" is the longest
+    max_length= 20;
+    maybe_null= 1;
   };
 };
 
@@ -89,7 +97,7 @@ class Item_func_centroid: public Item_geometry_func
 {
 public:
   Item_func_centroid(Item *a): Item_geometry_func(a) {}
-  const char *func_name() const { return "centroid"; }
+  const char *func_name() const { return "st_centroid"; }
   String *val_str(String *);
   Field::geometry_type get_geometry_type() const;
 };
@@ -98,7 +106,7 @@ class Item_func_envelope: public Item_geometry_func
 {
 public:
   Item_func_envelope(Item *a): Item_geometry_func(a) {}
-  const char *func_name() const { return "envelope"; }
+  const char *func_name() const { return "st_envelope"; }
   String *val_str(String *);
   Field::geometry_type get_geometry_type() const;
 };
@@ -108,7 +116,7 @@ class Item_func_point: public Item_geometry_func
 public:
   Item_func_point(Item *a, Item *b): Item_geometry_func(a, b) {}
   Item_func_point(Item *a, Item *b, Item *srid): Item_geometry_func(a, b, srid) {}
-  const char *func_name() const { return "point"; }
+  const char *func_name() const { return "st_point"; }
   String *val_str(String *);
   Field::geometry_type get_geometry_type() const;
 };
@@ -124,11 +132,11 @@ public:
     switch (decomp_func)
     {
       case SP_STARTPOINT:
-        return "startpoint";
+        return "st_startpoint";
       case SP_ENDPOINT:
-        return "endpoint";
+        return "st_endpoint";
       case SP_EXTERIORRING:
-        return "exteriorring";
+        return "st_exteriorring";
       default:
 	DBUG_ASSERT(0);  // Should never happened
         return "spatial_decomp_unknown"; 
@@ -148,11 +156,11 @@ public:
     switch (decomp_func_n)
     {
       case SP_POINTN:
-        return "pointn";
+        return "st_pointn";
       case SP_GEOMETRYN:
-        return "geometryn";
+        return "st_geometryn";
       case SP_INTERIORRINGN:
-        return "interiorringn";
+        return "st_interiorringn";
       default:
 	DBUG_ASSERT(0);  // Should never happened
         return "spatial_decomp_n_unknown"; 
@@ -191,57 +199,53 @@ public:
     }
   }
  
-  const char *func_name() const { return "multipoint"; }
+  const char *func_name() const { return "st_multipoint"; }
 };
 
+
 /*
   Spatial relations
 */
 
-class Item_func_spatial_rel: public Item_bool_func2
+class Item_func_spatial_mbr_rel: public Item_bool_func2
 {
   enum Functype spatial_rel;
 public:
-  Item_func_spatial_rel(Item *a,Item *b, enum Functype sp_rel) :
+  Item_func_spatial_mbr_rel(Item *a,Item *b, enum Functype sp_rel) :
     Item_bool_func2(a,b) { spatial_rel = sp_rel; }
   longlong val_int();
   enum Functype functype() const 
   { 
-    switch (spatial_rel) {
-    case SP_CONTAINS_FUNC:
-      return SP_WITHIN_FUNC;
-    case SP_WITHIN_FUNC:
-      return SP_CONTAINS_FUNC;
-    default:
-      return spatial_rel;
-    }
+    return spatial_rel;
   }
   enum Functype rev_functype() const { return spatial_rel; }
-  const char *func_name() const 
-  { 
-    switch (spatial_rel) {
-    case SP_CONTAINS_FUNC:
-      return "contains";
-    case SP_WITHIN_FUNC:
-      return "within";
-    case SP_EQUALS_FUNC:
-      return "equals";
-    case SP_DISJOINT_FUNC:
-      return "disjoint";
-    case SP_INTERSECTS_FUNC:
-      return "intersects";
-    case SP_TOUCHES_FUNC:
-      return "touches";
-    case SP_CROSSES_FUNC:
-      return "crosses";
-    case SP_OVERLAPS_FUNC:
-      return "overlaps";
-    default:
-      DBUG_ASSERT(0);  // Should never happened
-      return "sp_unknown"; 
-    }
+  const char *func_name() const;
+  virtual inline void print(String *str, enum_query_type query_type)
+  {
+    Item_func::print(str, query_type);
   }
+  void fix_length_and_dec() { maybe_null= 1; }
+  bool is_null() { (void) val_int(); return null_value; }
+};
+
 
+class Item_func_spatial_rel: public Item_bool_func2
+{
+  enum Functype spatial_rel;
+  Gcalc_heap collector;
+  Gcalc_scan_iterator scan_it;
+  Gcalc_function func;
+  String tmp_value1,tmp_value2;
+public:
+  Item_func_spatial_rel(Item *a,Item *b, enum Functype sp_rel);
+  virtual ~Item_func_spatial_rel();
+  longlong val_int();
+  enum Functype functype() const 
+  { 
+    return spatial_rel;
+  }
+  enum Functype rev_functype() const { return spatial_rel; }
+  const char *func_name() const;
   virtual inline void print(String *str, enum_query_type query_type)
   {
     Item_func::print(str, query_type);
@@ -251,23 +255,107 @@ public:
   bool is_null() { (void) val_int(); return null_value; }
 };
 
+
+/*
+  Spatial operations
+*/
+
+class Item_func_spatial_operation: public Item_geometry_func
+{
+public:
+  Gcalc_function::op_type spatial_op;
+  Gcalc_heap collector;
+  Gcalc_function func;
+
+  Gcalc_result_receiver res_receiver;
+  Gcalc_operation_reducer operation;
+  String tmp_value1,tmp_value2;
+public:
+  Item_func_spatial_operation(Item *a,Item *b, Gcalc_function::op_type sp_op) :
+    Item_geometry_func(a, b), spatial_op(sp_op)
+  {}
+  virtual ~Item_func_spatial_operation();
+  String *val_str(String *);
+  const char *func_name() const;
+  virtual inline void print(String *str, enum_query_type query_type)
+  {
+    Item_func::print(str, query_type);
+  }
+};
+
+
+class Item_func_buffer: public Item_geometry_func
+{
+protected:
+  class Transporter : public Gcalc_operation_transporter
+  {
+    int m_npoints;
+    double m_d;
+    double x1,y1,x2,y2;
+    double x00,y00,x01,y01;
+    int add_edge_buffer(double x3, double y3, bool round_p1, bool round_p2);
+    int add_last_edge_buffer();
+    int add_point_buffer(double x, double y);
+    int complete();
+    int m_nshapes;
+    Gcalc_function::op_type buffer_op;
+    int last_shape_pos;
+    bool skip_line;
+
+  public:
+    Transporter(Gcalc_function *fn, Gcalc_heap *heap, double d) :
+      Gcalc_operation_transporter(fn, heap), m_npoints(0), m_d(d),
+      m_nshapes(0), buffer_op((d > 0.0) ? Gcalc_function::op_union :
+                                          Gcalc_function::op_difference),
+      skip_line(FALSE)
+    {}
+    int single_point(double x, double y);
+    int start_line();
+    int complete_line();
+    int start_poly();
+    int complete_poly();
+    int start_ring();
+    int complete_ring();
+    int add_point(double x, double y);
+
+    int start_collection(int n_objects);
+  };
+  Gcalc_heap collector;
+  Gcalc_function func;
+
+  Gcalc_result_receiver res_receiver;
+  Gcalc_operation_reducer operation;
+  String tmp_value;
+
+public:
+  Item_func_buffer(Item *obj, Item *distance):
+    Item_geometry_func(obj, distance) {}
+  const char *func_name() const { return "st_buffer"; }
+  String *val_str(String *);
+};
+
+
 class Item_func_isempty: public Item_bool_func
 {
 public:
   Item_func_isempty(Item *a): Item_bool_func(a) {}
   longlong val_int();
   optimize_type select_optimize() const { return OPTIMIZE_NONE; }
-  const char *func_name() const { return "isempty"; }
+  const char *func_name() const { return "st_isempty"; }
   void fix_length_and_dec() { maybe_null= 1; }
 };
 
 class Item_func_issimple: public Item_bool_func
 {
+  Gcalc_heap collector;
+  Gcalc_function func;
+  Gcalc_scan_iterator scan_it;
+  String tmp;
 public:
   Item_func_issimple(Item *a): Item_bool_func(a) {}
   longlong val_int();
   optimize_type select_optimize() const { return OPTIMIZE_NONE; }
-  const char *func_name() const { return "issimple"; }
+  const char *func_name() const { return "st_issimple"; }
   void fix_length_and_dec() { maybe_null= 1; }
 };
 
@@ -277,7 +365,7 @@ public:
   Item_func_isclosed(Item *a): Item_bool_func(a) {}
   longlong val_int();
   optimize_type select_optimize() const { return OPTIMIZE_NONE; }
-  const char *func_name() const { return "isclosed"; }
+  const char *func_name() const { return "st_isclosed"; }
   void fix_length_and_dec() { maybe_null= 1; }
 };
 
@@ -287,7 +375,7 @@ class Item_func_dimension: public Item_int_func
 public:
   Item_func_dimension(Item *a): Item_int_func(a) {}
   longlong val_int();
-  const char *func_name() const { return "dimension"; }
+  const char *func_name() const { return "st_dimension"; }
   void fix_length_and_dec() { max_length= 10; maybe_null= 1; }
 };
 
@@ -297,7 +385,7 @@ class Item_func_x: public Item_real_func
 public:
   Item_func_x(Item *a): Item_real_func(a) {}
   double val_real();
-  const char *func_name() const { return "x"; }
+  const char *func_name() const { return "st_x"; }
   void fix_length_and_dec() 
   { 
     Item_real_func::fix_length_and_dec();
@@ -312,7 +400,7 @@ class Item_func_y: public Item_real_func
 public:
   Item_func_y(Item *a): Item_real_func(a) {}
   double val_real();
-  const char *func_name() const { return "y"; }
+  const char *func_name() const { return "st_y"; }
   void fix_length_and_dec() 
   { 
     Item_real_func::fix_length_and_dec();
@@ -327,7 +415,7 @@ class Item_func_numgeometries: public Item_int_func
 public:
   Item_func_numgeometries(Item *a): Item_int_func(a) {}
   longlong val_int();
-  const char *func_name() const { return "numgeometries"; }
+  const char *func_name() const { return "st_numgeometries"; }
   void fix_length_and_dec() { max_length= 10; maybe_null= 1; }
 };
 
@@ -338,7 +426,7 @@ class Item_func_numinteriorring: public Item_int_func
 public:
   Item_func_numinteriorring(Item *a): Item_int_func(a) {}
   longlong val_int();
-  const char *func_name() const { return "numinteriorrings"; }
+  const char *func_name() const { return "st_numinteriorrings"; }
   void fix_length_and_dec() { max_length= 10; maybe_null= 1; }
 };
 
@@ -349,7 +437,7 @@ class Item_func_numpoints: public Item_int_func
 public:
   Item_func_numpoints(Item *a): Item_int_func(a) {}
   longlong val_int();
-  const char *func_name() const { return "numpoints"; }
+  const char *func_name() const { return "st_numpoints"; }
   void fix_length_and_dec() { max_length= 10; maybe_null= 1; }
 };
 
@@ -360,7 +448,7 @@ class Item_func_area: public Item_real_func
 public:
   Item_func_area(Item *a): Item_real_func(a) {}
   double val_real();
-  const char *func_name() const { return "area"; }
+  const char *func_name() const { return "st_area"; }
   void fix_length_and_dec() 
   { 
     Item_real_func::fix_length_and_dec();
@@ -375,7 +463,7 @@ class Item_func_glength: public Item_real_func
 public:
   Item_func_glength(Item *a): Item_real_func(a) {}
   double val_real();
-  const char *func_name() const { return "glength"; }
+  const char *func_name() const { return "st_length"; }
   void fix_length_and_dec() 
   { 
     Item_real_func::fix_length_and_dec();
@@ -394,11 +482,26 @@ public:
   void fix_length_and_dec() { max_length= 10; maybe_null= 1; }
 };
 
+
+class Item_func_distance: public Item_real_func
+{
+  String tmp_value1;
+  String tmp_value2;
+  Gcalc_heap collector;
+  Gcalc_function func;
+  Gcalc_scan_iterator scan_it;
+public:
+  Item_func_distance(Item *a, Item *b): Item_real_func(a, b) {}
+  double val_real();
+  const char *func_name() const { return "st_distance"; }
+};
+
 #define GEOM_NEW(thd, obj_constructor) new (thd->mem_root) obj_constructor
 
 #else /*HAVE_SPATIAL*/
 
 #define GEOM_NEW(thd, obj_constructor) NULL
 
-#endif
+#endif /*HAVE_SPATIAL*/
+#endif /*ITEM_GEOFUNC_INCLUDED*/
 
diff --git a/sql/item_row.cc b/sql/item_row.cc
index 4040dbff7c6..e2f62bfc868 100644
--- a/sql/item_row.cc
+++ b/sql/item_row.cc
@@ -88,12 +88,29 @@ bool Item_row::fix_fields(THD *thd, Item **ref)
     }
     maybe_null|= item->maybe_null;
     with_sum_func= with_sum_func || item->with_sum_func;
+    with_field= with_field || item->with_field;
   }
   fixed= 1;
   return FALSE;
 }
 
 
+bool
+Item_row::eval_not_null_tables(uchar *opt_arg)
+{
+  Item **arg,**arg_end;
+  not_null_tables_cache= 0;
+  if (arg_count)
+  {		
+    for (arg= items, arg_end= items+arg_count; arg != arg_end ; arg++)
+    {
+      not_null_tables_cache|= (*arg)->not_null_tables();
+    }
+  }
+  return FALSE;
+}
+
+
 void Item_row::cleanup()
 {
   DBUG_ENTER("Item_row::cleanup");
@@ -129,6 +146,22 @@ void Item_row::update_used_tables()
   }
 }
 
+
+void Item_row::fix_after_pullout(st_select_lex *new_parent, Item **ref)
+{
+  used_tables_cache= 0;
+  const_item_cache= 1;
+  not_null_tables_cache= 0;
+  for (uint i= 0; i < arg_count; i++)
+  {
+    items[i]->fix_after_pullout(new_parent, &items[i]);
+    used_tables_cache|= items[i]->used_tables();
+    const_item_cache&= items[i]->const_item();
+    not_null_tables_cache|= items[i]->not_null_tables();
+  }
+}
+
+
 bool Item_row::check_cols(uint c)
 {
   if (c != arg_count)
diff --git a/sql/item_row.h b/sql/item_row.h
index 15dadb3d01c..6366ccba102 100644
--- a/sql/item_row.h
+++ b/sql/item_row.h
@@ -62,17 +62,20 @@ public:
     return 0;
   };
   bool fix_fields(THD *thd, Item **ref);
+  void fix_after_pullout(st_select_lex *new_parent, Item **ref);
   void cleanup();
   void split_sum_func(THD *thd, Item **ref_pointer_array, List<Item> &fields);
   table_map used_tables() const { return used_tables_cache; };
   bool const_item() const { return const_item_cache; };
   enum Item_result result_type() const { return ROW_RESULT; }
+  Item_result cmp_type() const { return ROW_RESULT; }
   void update_used_tables();
   table_map not_null_tables() const { return not_null_tables_cache; }
   virtual void print(String *str, enum_query_type query_type);
 
   bool walk(Item_processor processor, bool walk_subquery, uchar *arg);
   Item *transform(Item_transformer transformer, uchar *arg);
+  bool eval_not_null_tables(uchar *opt_arg);
 
   uint cols() { return arg_count; }
   Item* element_index(uint i) { return items[i]; }
diff --git a/sql/item_strfunc.cc b/sql/item_strfunc.cc
index 1488fe25fd9..23abc8a07cf 100644
--- a/sql/item_strfunc.cc
+++ b/sql/item_strfunc.cc
@@ -2253,6 +2253,7 @@ void Item_func_make_set::fix_length_and_dec()
   not_null_tables_cache&= item->not_null_tables();
   const_item_cache&=	  item->const_item();
   with_sum_func= with_sum_func || item->with_sum_func;
+  with_field= with_field || item->with_field;
 }
 
 
@@ -3026,16 +3027,16 @@ String *Item_load_file::val_str(String *str)
 			func_name(), current_thd->variables.max_allowed_packet);
     goto err;
   }
-  if (tmp_value.alloc(stat_info.st_size))
+  if (tmp_value.alloc((size_t)stat_info.st_size))
     goto err;
   if ((file = my_open(file_name->ptr(), O_RDONLY, MYF(0))) < 0)
     goto err;
-  if (my_read(file, (uchar*) tmp_value.ptr(), stat_info.st_size, MYF(MY_NABP)))
+  if (my_read(file, (uchar*) tmp_value.ptr(), (size_t)stat_info.st_size, MYF(MY_NABP)))
   {
     my_close(file, MYF(0));
     goto err;
   }
-  tmp_value.length(stat_info.st_size);
+  tmp_value.length((uint32)stat_info.st_size);
   my_close(file, MYF(0));
   null_value = 0;
   DBUG_RETURN(&tmp_value);
@@ -3506,3 +3507,768 @@ String *Item_func_uuid::val_str(String *str)
 
   return str;
 }
+
+
+Item_func_dyncol_create::Item_func_dyncol_create(List<Item> &args,
+                                                 DYNCALL_CREATE_DEF *dfs)
+  : Item_str_func(args), defs(dfs), vals(0), nums(0)
+{
+  DBUG_ASSERT((args.elements & 0x1) == 0); // even number of arguments
+}
+
+
+bool Item_func_dyncol_create::fix_fields(THD *thd, Item **ref)
+{
+  bool res= Item_func::fix_fields(thd, ref); // no need Item_str_func here
+  vals= (DYNAMIC_COLUMN_VALUE *) alloc_root(thd->mem_root,
+                                            sizeof(DYNAMIC_COLUMN_VALUE) *
+                                            (arg_count / 2));
+  nums= (uint *) alloc_root(thd->mem_root,
+                            sizeof(uint) * (arg_count / 2));
+  return res || vals == 0 || nums == 0;
+}
+
+
+void Item_func_dyncol_create::fix_length_and_dec()
+{
+  maybe_null= TRUE;
+  collation.set(&my_charset_bin);
+  decimals= 0;
+}
+
+void Item_func_dyncol_create::prepare_arguments()
+{
+  char buff[STRING_BUFFER_USUAL_SIZE];
+  String *res, tmp(buff, sizeof(buff), &my_charset_bin);
+  uint column_count= (arg_count / 2);
+  uint i;
+  my_decimal dtmp, *dres;
+
+  /* get values */
+  for (i= 0; i < column_count; i++)
+  {
+    uint valpos= i * 2 + 1;
+    DYNAMIC_COLUMN_TYPE type= defs[i].type;
+    if (type == DYN_COL_NULL) // auto detect
+    {
+      /*
+        We don't have a default here to ensure we get a warning if
+        one adds a new not handled MYSQL_TYPE_...
+      */
+      switch (args[valpos]->field_type()) {
+      case MYSQL_TYPE_DECIMAL:
+      case MYSQL_TYPE_NEWDECIMAL:
+        type= DYN_COL_DECIMAL;
+        break;
+      case MYSQL_TYPE_TINY:
+      case MYSQL_TYPE_SHORT:
+      case MYSQL_TYPE_LONG:
+      case MYSQL_TYPE_LONGLONG:
+      case MYSQL_TYPE_INT24:
+      case MYSQL_TYPE_YEAR:
+      case MYSQL_TYPE_BIT:
+        type= args[valpos]->unsigned_flag ? DYN_COL_UINT : DYN_COL_INT;
+        break;
+      case MYSQL_TYPE_FLOAT:
+      case MYSQL_TYPE_DOUBLE:
+        type= DYN_COL_DOUBLE;
+        break;
+      case MYSQL_TYPE_NULL:
+        type= DYN_COL_NULL;
+        break;
+      case MYSQL_TYPE_TIMESTAMP:
+      case MYSQL_TYPE_DATETIME:
+        type= DYN_COL_DATETIME;
+	break;
+      case MYSQL_TYPE_DATE:
+      case MYSQL_TYPE_NEWDATE:
+        type= DYN_COL_DATE;
+        break;
+      case MYSQL_TYPE_TIME:
+        type= DYN_COL_TIME;
+        break;
+      case MYSQL_TYPE_VARCHAR:
+      case MYSQL_TYPE_ENUM:
+      case MYSQL_TYPE_SET:
+      case MYSQL_TYPE_TINY_BLOB:
+      case MYSQL_TYPE_MEDIUM_BLOB:
+      case MYSQL_TYPE_LONG_BLOB:
+      case MYSQL_TYPE_BLOB:
+      case MYSQL_TYPE_VAR_STRING:
+      case MYSQL_TYPE_STRING:
+      case MYSQL_TYPE_GEOMETRY:
+        type= DYN_COL_STRING;
+        break;
+      }
+    }
+    nums[i]= (uint) args[i * 2]->val_int();
+    vals[i].type= type;
+    switch (type) {
+    case DYN_COL_NULL:
+      DBUG_ASSERT(args[valpos]->field_type() == MYSQL_TYPE_NULL);
+      break;
+    case DYN_COL_INT:
+      vals[i].x.long_value= args[valpos]->val_int();
+      break;
+    case DYN_COL_UINT:
+      vals[i].x.ulong_value= args[valpos]->val_int();
+      break;
+    case DYN_COL_DOUBLE:
+      vals[i].x.double_value= args[valpos]->val_real();
+      break;
+    case DYN_COL_STRING:
+      res= args[valpos]->val_str(&tmp);
+      if (res &&
+          (vals[i].x.string.value.str= my_strndup(res->ptr(), res->length(),
+                                                MYF(MY_WME))))
+      {
+	vals[i].x.string.value.length= res->length();
+	vals[i].x.string.charset= res->charset();
+      }
+      else
+      {
+        args[valpos]->null_value= 1;            // In case of out of memory
+        vals[i].x.string.value.str= NULL;
+        vals[i].x.string.value.length= 0;         // just to be safe
+      }
+      break;
+    case DYN_COL_DECIMAL:
+      if ((dres= args[valpos]->val_decimal(&dtmp)))
+      {
+	dynamic_column_prepare_decimal(&vals[i]);
+        DBUG_ASSERT(vals[i].x.decimal.value.len == dres->len);
+        vals[i].x.decimal.value.intg= dres->intg;
+        vals[i].x.decimal.value.frac= dres->frac;
+        vals[i].x.decimal.value.sign= dres->sign();
+        memcpy(vals[i].x.decimal.buffer, dres->buf,
+               sizeof(vals[i].x.decimal.buffer));
+      }
+      else
+      {
+	dynamic_column_prepare_decimal(&vals[i]); // just to be safe
+        DBUG_ASSERT(args[valpos]->null_value);
+      }
+      break;
+    case DYN_COL_DATETIME:
+      args[valpos]->get_date(&vals[i].x.time_value, TIME_FUZZY_DATE);
+      break;
+    case DYN_COL_DATE:
+      args[valpos]->get_date(&vals[i].x.time_value, TIME_FUZZY_DATE);
+      break;
+    case DYN_COL_TIME:
+      args[valpos]->get_time(&vals[i].x.time_value);
+      break;
+    default:
+      DBUG_ASSERT(0);
+      vals[i].type= DYN_COL_NULL;
+    }
+    if (vals[i].type != DYN_COL_NULL && args[valpos]->null_value)
+    {
+      if (vals[i].type == DYN_COL_STRING)
+        my_free(vals[i].x.string.value.str, MYF(MY_ALLOW_ZERO_PTR));
+      vals[i].type= DYN_COL_NULL;
+    }
+  }
+}
+
+void Item_func_dyncol_create::cleanup_arguments()
+{
+  uint column_count= (arg_count / 2);
+  uint i;
+
+  for (i= 0; i < column_count; i++)
+  {
+    if (vals[i].type == DYN_COL_STRING)
+      my_free(vals[i].x.string.value.str, MYF(MY_ALLOW_ZERO_PTR));
+  }
+}
+
+String *Item_func_dyncol_create::val_str(String *str)
+{
+  DYNAMIC_COLUMN col;
+  String *res;
+  uint column_count= (arg_count / 2);
+  enum enum_dyncol_func_result rc;
+  DBUG_ASSERT((arg_count & 0x1) == 0); // even number of arguments
+
+  prepare_arguments();
+
+  if ((rc= dynamic_column_create_many(&col, column_count, nums, vals)))
+  {
+    dynamic_column_error_message(rc);
+    dynamic_column_column_free(&col);
+    res= NULL;
+    null_value= TRUE;
+  }
+  else
+  {
+    /* Move result from DYNAMIC_COLUMN to str_value */
+    char *ptr;
+    size_t length, alloc_length;
+    dynamic_column_reassociate(&col, &ptr, &length, &alloc_length);
+    str_value.reassociate(ptr, (uint32) length, (uint32) alloc_length,
+                          &my_charset_bin);
+    res= &str_value;
+    null_value= FALSE;
+  }
+
+  /* cleanup */
+  cleanup_arguments();
+
+  return res;
+}
+
+void Item_func_dyncol_create::print_arguments(String *str,
+                                              enum_query_type query_type)
+{
+  uint i;
+  uint column_count= (arg_count / 2);
+  for (i= 0; i < column_count; i++)
+  {
+    args[i*2]->print(str, query_type);
+    str->append(',');
+    args[i*2 + 1]->print(str, query_type);
+    switch (defs[i].type) {
+    case DYN_COL_NULL: // automatic type => write nothing
+      break;
+    case DYN_COL_INT:
+      str->append(STRING_WITH_LEN(" AS int"));
+      break;
+    case DYN_COL_UINT:
+      str->append(STRING_WITH_LEN(" AS unsigned int"));
+      break;
+    case DYN_COL_DOUBLE:
+      str->append(STRING_WITH_LEN(" AS double"));
+      break;
+    case DYN_COL_STRING:
+      str->append(STRING_WITH_LEN(" AS char"));
+      if (defs[i].cs)
+      {
+        str->append(STRING_WITH_LEN(" charset "));
+        str->append(defs[i].cs->csname);
+        str->append(' ');
+      }
+      break;
+    case DYN_COL_DECIMAL:
+      str->append(STRING_WITH_LEN(" AS decimal"));
+      break;
+    case DYN_COL_DATETIME:
+      str->append(STRING_WITH_LEN(" AS datetime"));
+      break;
+    case DYN_COL_DATE:
+      str->append(STRING_WITH_LEN(" AS date"));
+      break;
+    case DYN_COL_TIME:
+      str->append(STRING_WITH_LEN(" AS time"));
+      break;
+    }
+    if (i < column_count - 1)
+      str->append(',');
+  }
+}
+
+
+void Item_func_dyncol_create::print(String *str,
+                                    enum_query_type query_type)
+{
+  DBUG_ASSERT((arg_count & 0x1) == 0); // even number of arguments
+  str->append(STRING_WITH_LEN("column_create("));
+  print_arguments(str, query_type);
+  str->append(')');
+}
+
+
+String *Item_func_dyncol_add::val_str(String *str)
+{
+  DYNAMIC_COLUMN col;
+  String *res;
+  uint column_count=  (arg_count / 2);
+  enum enum_dyncol_func_result rc;
+  DBUG_ASSERT((arg_count & 0x1) == 1); // odd number of arguments
+
+  /* We store the packed data last */
+  res= args[arg_count - 1]->val_str(str);
+  if (args[arg_count - 1]->null_value)
+    goto null;
+  init_dynamic_string(&col, NULL, res->length() + STRING_BUFFER_USUAL_SIZE,
+                      STRING_BUFFER_USUAL_SIZE);
+
+  col.length= res->length();
+  memcpy(col.str, res->ptr(), col.length);
+
+  prepare_arguments();
+
+  if ((rc= dynamic_column_update_many(&col, column_count, nums, vals)))
+  {
+    dynamic_column_error_message(rc);
+    dynamic_column_column_free(&col);
+    cleanup_arguments();
+    goto null;
+  }
+
+  {
+    /* Move result from DYNAMIC_COLUMN to str */
+    char *ptr;
+    size_t length, alloc_length;
+    dynamic_column_reassociate(&col, &ptr, &length, &alloc_length);
+    str->reassociate(ptr, (uint32) length, (uint32) alloc_length,
+                     &my_charset_bin);
+    null_value= FALSE;
+  }
+
+  /* cleanup */
+  dynamic_column_column_free(&col);
+  cleanup_arguments();
+
+  return str;
+
+null:
+  null_value= TRUE;
+  return NULL;
+}
+
+
+void Item_func_dyncol_add::print(String *str,
+                                 enum_query_type query_type)
+{
+  DBUG_ASSERT((arg_count & 0x1) == 1); // odd number of arguments
+  str->append(STRING_WITH_LEN("column_create("));
+  args[arg_count - 1]->print(str, query_type);
+  str->append(',');
+  print_arguments(str, query_type);
+  str->append(')');
+}
+
+
+/**
+  Get value for a column stored in a dynamic column
+
+  @notes
+  This function ensures that null_value is set correctly
+*/
+
+bool Item_dyncol_get::get_dyn_value(DYNAMIC_COLUMN_VALUE *val, String *tmp)
+{
+  DYNAMIC_COLUMN dyn_str;
+  String *res;
+  longlong num;
+  enum enum_dyncol_func_result rc;
+
+  num= args[1]->val_int();
+  if (args[1]->null_value || num < 0 || num > INT_MAX)
+  {
+    null_value= 1;
+    return 1;
+  }
+
+  res= args[0]->val_str(tmp);
+  if (args[0]->null_value)
+  {
+    null_value= 1;
+    return 1;
+  }
+
+  dyn_str.str=   (char*) res->ptr();
+  dyn_str.length= res->length();
+  if ((rc= dynamic_column_get(&dyn_str, (uint) num, val)))
+  {
+    dynamic_column_error_message(rc);
+    null_value= 1;
+    return 1;
+  }
+
+  null_value= 0;
+  return 0;                                     // ok
+}
+
+
+String *Item_dyncol_get::val_str(String *str_result)
+{
+  DYNAMIC_COLUMN_VALUE val;
+  char buff[STRING_BUFFER_USUAL_SIZE];
+  String tmp(buff, sizeof(buff), &my_charset_bin);
+
+  if (get_dyn_value(&val, &tmp))
+    return NULL;
+
+  switch (val.type) {
+  case DYN_COL_NULL:
+    goto null;
+  case DYN_COL_INT:
+  case DYN_COL_UINT:
+    str_result->set_int(val.x.long_value, test(val.type == DYN_COL_UINT),
+                       &my_charset_latin1);
+    break;
+  case DYN_COL_DOUBLE:
+    str_result->set_real(val.x.double_value, NOT_FIXED_DEC, &my_charset_latin1);
+    break;
+  case DYN_COL_STRING:
+    if ((char*) tmp.ptr() <= val.x.string.value.str &&
+        (char*) tmp.ptr() + tmp.length() >= val.x.string.value.str)
+    {
+      /* value is allocated in tmp buffer; We have to make a copy */
+      str_result->copy(val.x.string.value.str, val.x.string.value.length,
+                      val.x.string.charset);
+    }
+    else
+    {
+      /*
+        It's safe to use the current value because it's either pointing
+        into a field or in a buffer for another item and this buffer
+        is not going to be deleted during expression evaluation
+      */
+      str_result->set(val.x.string.value.str, val.x.string.value.length,
+                      val.x.string.charset);
+    }
+    break;
+  case DYN_COL_DECIMAL:
+  {
+    int res;
+    int length=
+      my_decimal_string_length((const my_decimal*)&val.x.decimal.value);
+    if (str_result->alloc(length))
+      goto null;
+    if ((res= decimal2string(&val.x.decimal.value, (char*) str_result->ptr(),
+                             &length, 0, 0, ' ')) != E_DEC_OK)
+    {
+      char buff[40];
+      int len= sizeof(buff);
+      DBUG_ASSERT(length < (int)sizeof(buff));
+      decimal2string(&val.x.decimal.value, buff, &len, 0, 0, ' ');
+      decimal_operation_results(res, buff, "CHAR");
+    }
+    str_result->set_charset(&my_charset_latin1);
+    str_result->length(length);
+    break;
+  }
+  case DYN_COL_DATETIME:
+  case DYN_COL_DATE:
+  case DYN_COL_TIME:
+  {
+    int length;
+    /*
+      We use AUTO_SEC_PART_DIGITS here to ensure that we do not loose
+      any microseconds from the data. This is safe to do as we are
+      asked to return the time argument as a string.
+    */
+    if (str_result->alloc(MAX_DATE_STRING_REP_LENGTH) ||
+        !(length= my_TIME_to_str(&val.x.time_value, (char*) str_result->ptr(),
+                                 AUTO_SEC_PART_DIGITS)))
+      goto null;
+    str_result->set_charset(&my_charset_latin1);
+    str_result->length(length);
+    break;
+  }
+  }
+  return str_result;
+
+null:
+  null_value= TRUE;
+  return 0;
+}
+
+
+longlong Item_dyncol_get::val_int()
+{
+  DYNAMIC_COLUMN_VALUE val;
+  char buff[STRING_BUFFER_USUAL_SIZE];
+  String tmp(buff, sizeof(buff), &my_charset_bin);
+
+  if (get_dyn_value(&val, &tmp))
+    return 0;
+
+  switch (val.type) {
+  case DYN_COL_NULL:
+    goto null;
+  case DYN_COL_UINT:
+    unsigned_flag= 1;            // Make it possible for caller to detect sign
+    return val.x.long_value;
+  case DYN_COL_INT:
+    unsigned_flag= 0;            // Make it possible for caller to detect sign
+    return val.x.long_value;
+  case DYN_COL_DOUBLE:
+  {
+    bool error;
+    longlong num;
+
+    num= double_to_longlong(val.x.double_value, unsigned_flag, &error);
+    if (error)
+    {
+      char buff[30];
+      sprintf(buff, "%lg", val.x.double_value);
+      push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN,
+                          ER_DATA_OVERFLOW,
+                          ER(ER_DATA_OVERFLOW),
+                          buff,
+                          unsigned_flag ? "UNSIGNED INT" : "INT");
+    }
+    return num;
+  }
+  case DYN_COL_STRING:
+  {
+    int error;
+    longlong num;
+    char *end= val.x.string.value.str + val.x.string.value.length, *org_end= end;
+
+    num= my_strtoll10(val.x.string.value.str, &end, &error);
+    if (end != org_end || error > 0)
+    {
+      char buff[80];
+      strmake(buff, val.x.string.value.str, min(sizeof(buff)-1,
+                                              val.x.string.value.length));
+      push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN,
+                          ER_BAD_DATA,
+                          ER(ER_BAD_DATA),
+                          buff,
+                          unsigned_flag ? "UNSIGNED INT" : "INT");
+    }
+    unsigned_flag= error >= 0;
+    return num;
+  }
+  case DYN_COL_DECIMAL:
+  {
+    longlong num;
+    my_decimal2int(E_DEC_FATAL_ERROR, &val.x.decimal.value, unsigned_flag,
+                   &num);
+    return num;
+  }
+  case DYN_COL_DATETIME:
+  case DYN_COL_DATE:
+  case DYN_COL_TIME:
+    unsigned_flag= !val.x.time_value.neg;
+    if (unsigned_flag)
+      return TIME_to_ulonglong(&val.x.time_value);
+    else
+      return -(longlong)TIME_to_ulonglong(&val.x.time_value);
+  }
+
+null:
+  null_value= TRUE;
+  return 0;
+}
+
+
+double Item_dyncol_get::val_real()
+{
+  DYNAMIC_COLUMN_VALUE val;
+  char buff[STRING_BUFFER_USUAL_SIZE];
+  String tmp(buff, sizeof(buff), &my_charset_bin);
+
+  if (get_dyn_value(&val, &tmp))
+    return 0.0;
+
+  switch (val.type) {
+  case DYN_COL_NULL:
+    goto null;
+  case DYN_COL_UINT:
+    return ulonglong2double(val.x.ulong_value);
+  case DYN_COL_INT:
+    return (double) val.x.long_value;
+  case DYN_COL_DOUBLE:
+    return (double) val.x.double_value;
+  case DYN_COL_STRING:
+  {
+    int error;
+    char *end;
+    double res= my_strntod(val.x.string.charset, (char*) val.x.string.value.str,
+                           val.x.string.value.length, &end, &error);
+
+    if (end != (char*) val.x.string.value.str + val.x.string.value.length ||
+        error)
+    {
+      char buff[80];
+      strmake(buff, val.x.string.value.str, min(sizeof(buff)-1,
+                                              val.x.string.value.length));
+      push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN,
+                          ER_BAD_DATA,
+                          ER(ER_BAD_DATA),
+                          buff, "DOUBLE");
+    }
+    return res;
+  }
+  case DYN_COL_DECIMAL:
+  {
+    double res;
+    /* This will always succeed */
+    decimal2double(&val.x.decimal.value, &res);
+    return res;
+  }
+  case DYN_COL_DATETIME:
+  case DYN_COL_DATE:
+  case DYN_COL_TIME:
+    return TIME_to_double(&val.x.time_value);
+  }
+
+null:
+  null_value= TRUE;
+  return 0.0;
+}
+
+
+my_decimal *Item_dyncol_get::val_decimal(my_decimal *decimal_value)
+{
+  DYNAMIC_COLUMN_VALUE val;
+  char buff[STRING_BUFFER_USUAL_SIZE];
+  String tmp(buff, sizeof(buff), &my_charset_bin);
+
+  if (get_dyn_value(&val, &tmp))
+    return NULL;
+
+  switch (val.type) {
+  case DYN_COL_NULL:
+    goto null;
+  case DYN_COL_UINT:
+    int2my_decimal(E_DEC_FATAL_ERROR, val.x.long_value, TRUE, decimal_value);
+    break;
+  case DYN_COL_INT:
+    int2my_decimal(E_DEC_FATAL_ERROR, val.x.long_value, FALSE, decimal_value);
+    break;
+  case DYN_COL_DOUBLE:
+    double2my_decimal(E_DEC_FATAL_ERROR, val.x.double_value, decimal_value);
+    break;
+  case DYN_COL_STRING:
+  {
+    int rc;
+    rc= str2my_decimal(0, val.x.string.value.str, val.x.string.value.length,
+                       val.x.string.charset, decimal_value);
+    char buff[80];
+    strmake(buff, val.x.string.value.str, min(sizeof(buff)-1,
+                                            val.x.string.value.length));
+    if (rc != E_DEC_OK)
+    {
+      push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN,
+                          ER_BAD_DATA,
+                          ER(ER_BAD_DATA),
+                          buff, "DECIMAL");
+    }
+    break;
+  }
+  case DYN_COL_DECIMAL:
+    decimal2my_decimal(&val.x.decimal.value, decimal_value);
+    break;
+  case DYN_COL_DATETIME:
+  case DYN_COL_DATE:
+  case DYN_COL_TIME:
+    decimal_value= seconds2my_decimal(val.x.time_value.neg,
+                                      TIME_to_ulonglong(&val.x.time_value),
+                                      val.x.time_value.second_part,
+                                      decimal_value);
+    break;
+  }
+  return decimal_value;
+
+null:
+  null_value= TRUE;
+  return 0;
+}
+
+
+bool Item_dyncol_get::get_date(MYSQL_TIME *ltime, uint fuzzy_date)
+{
+  DYNAMIC_COLUMN_VALUE val;
+  char buff[STRING_BUFFER_USUAL_SIZE];
+  String tmp(buff, sizeof(buff), &my_charset_bin);
+  bool signed_value= 0;
+
+  if (get_dyn_value(&val, &tmp))
+    return 1;                                   // Error
+
+  switch (val.type) {
+  case DYN_COL_NULL:
+    goto null;
+  case DYN_COL_INT:
+    signed_value= 1;                                  // For error message
+    /* fall_trough */
+  case DYN_COL_UINT:
+    if (signed_value || val.x.ulong_value <= LONGLONG_MAX)
+    {
+      if (int_to_datetime_with_warn(val.x.ulong_value, ltime, fuzzy_date,
+                                   0 /* TODO */))
+        goto null;
+      return 0;
+    }
+    /* let double_to_datetime_with_warn() issue the warning message */
+    val.x.double_value= static_cast<double>(ULONGLONG_MAX);
+    /* fall_trough */
+  case DYN_COL_DOUBLE:
+    if (double_to_datetime_with_warn(val.x.double_value, ltime, fuzzy_date,
+                                     0 /* TODO */))
+      goto null;
+    return 0;
+  case DYN_COL_DECIMAL:
+    if (decimal_to_datetime_with_warn((my_decimal*)&val.x.decimal.value, ltime,
+                                      fuzzy_date, 0 /* TODO */))
+      goto null;
+    return 0;
+  case DYN_COL_STRING:
+    if (str_to_datetime_with_warn(val.x.string.value.str,
+                                  val.x.string.value.length,
+                                  ltime, fuzzy_date) <= MYSQL_TIMESTAMP_ERROR)
+      goto null;
+    return 0;
+  case DYN_COL_DATETIME:
+  case DYN_COL_DATE:
+  case DYN_COL_TIME:
+    *ltime= val.x.time_value;
+    return 0;
+  }
+
+null:
+  null_value= TRUE;
+  return 1;
+}
+
+
+void Item_dyncol_get::print(String *str, enum_query_type query_type)
+{
+  str->append(STRING_WITH_LEN("column_get("));
+  args[0]->print(str, query_type);
+  str->append(',');
+  args[1]->print(str, query_type);
+  str->append(')');
+}
+
+
+String *Item_func_dyncol_list::val_str(String *str)
+{
+  uint i;
+  enum enum_dyncol_func_result rc;
+  DYNAMIC_ARRAY arr;
+  DYNAMIC_COLUMN col;
+  String *res= args[0]->val_str(str);
+
+  if (args[0]->null_value)
+    goto null;
+  col.length= res->length();
+  /* We do not change the string, so could do this trick */
+  col.str= (char *)res->ptr();
+  if ((rc= dynamic_column_list(&col, &arr)))
+  {
+    dynamic_column_error_message(rc);
+    delete_dynamic(&arr);
+    goto null;
+  }
+
+  /*
+    We support elements from 0 - 65536, so max size for one element is
+    6 (including ,).
+  */
+  if (str->alloc(arr.elements * 6))
+    goto null;
+
+  str->length(0);
+  for (i= 0; i < arr.elements; i++)
+  {
+    str->qs_append(*dynamic_element(&arr, i, uint*));
+    if (i < arr.elements - 1)
+      str->qs_append(',');
+  }
+
+  null_value= FALSE;
+  delete_dynamic(&arr);
+  return str;
+
+null:
+  null_value= TRUE;
+  return NULL;
+}
diff --git a/sql/item_strfunc.h b/sql/item_strfunc.h
index b731c13a871..fc4e888e6ea 100644
--- a/sql/item_strfunc.h
+++ b/sql/item_strfunc.h
@@ -747,7 +747,7 @@ public:
   {
     DBUG_ASSERT(args[0]->fixed);
     conv_charset= cs;
-    if (cache_if_const && args[0]->const_item())
+    if (cache_if_const && args[0]->const_item() && !args[0]->with_subselect)
     {
       uint errors= 0;
       String tmp, *str= args[0]->val_str(&tmp);
@@ -892,3 +892,71 @@ public:
   }
 };
 
+
+class Item_func_dyncol_create: public Item_str_func
+{
+protected:
+  DYNCALL_CREATE_DEF *defs;
+  DYNAMIC_COLUMN_VALUE *vals;
+  uint *nums;
+  void prepare_arguments();
+  void cleanup_arguments();
+  void print_arguments(String *str, enum_query_type query_type);
+public:
+  Item_func_dyncol_create(List<Item> &args, DYNCALL_CREATE_DEF *dfs);
+  bool fix_fields(THD *thd, Item **ref);
+  void fix_length_and_dec();
+  const char *func_name() const{ return "column_create"; }
+  String *val_str(String *);
+  virtual void print(String *str, enum_query_type query_type);
+};
+
+
+class Item_func_dyncol_add: public Item_func_dyncol_create
+{
+public:
+  Item_func_dyncol_add(List<Item> &args, DYNCALL_CREATE_DEF *dfs)
+    :Item_func_dyncol_create(args, dfs)
+  {}
+  const char *func_name() const{ return "column_add"; }
+  String *val_str(String *);
+  virtual void print(String *str, enum_query_type query_type);
+};
+
+
+/*
+  The following functions is always called from an Item_cast function
+*/
+
+class Item_dyncol_get: public Item_str_func
+{
+public:
+  Item_dyncol_get(Item *str, Item *num)
+    :Item_str_func(str, num)
+  {
+    max_length= MAX_DYNAMIC_COLUMN_LENGTH;
+  }
+  void fix_length_and_dec()
+  { maybe_null= 1; }
+  /* Mark that collation can change between calls */
+  bool dynamic_result() { return 1; }
+
+  const char *func_name() const { return "column_get"; }
+  String *val_str(String *);
+  longlong val_int();
+  double val_real();
+  my_decimal *val_decimal(my_decimal *);
+  bool get_dyn_value(DYNAMIC_COLUMN_VALUE *val, String *tmp);
+  bool get_date(MYSQL_TIME *ltime,uint fuzzydate);
+  void print(String *str, enum_query_type query_type);
+};
+
+
+class Item_func_dyncol_list: public Item_str_func
+{
+public:
+  Item_func_dyncol_list(Item *str) :Item_str_func(str) {};
+  void fix_length_and_dec() { maybe_null= 1; max_length= MAX_BLOB_WIDTH; };
+  const char *func_name() const{ return "column_list"; }
+  String *val_str(String *);
+};
diff --git a/sql/item_subselect.cc b/sql/item_subselect.cc
index 3aa8dcd56f0..a97393fa94c 100644
--- a/sql/item_subselect.cc
+++ b/sql/item_subselect.cc
@@ -31,29 +31,34 @@
 #include "mysql_priv.h"
 #include "sql_select.h"
 
-inline Item * and_items(Item* cond, Item *item)
-{
-  return (cond? (new Item_cond_and(cond, item)) : item);
-}
+double get_post_group_estimate(JOIN* join, double join_op_rows);
+
 
 Item_subselect::Item_subselect():
-  Item_result_field(), value_assigned(0), thd(0), substitution(0),
-  engine(0), old_engine(0), used_tables_cache(0), have_to_be_excluded(0),
-  const_item_cache(1), in_fix_fields(0), eliminated(FALSE), 
+  Item_result_field(), value_assigned(0), own_engine(0), thd(0), old_engine(0), 
+  used_tables_cache(0), have_to_be_excluded(0), const_item_cache(1),
+  inside_first_fix_fields(0), done_first_fix_fields(FALSE), 
+  expr_cache(0), forced_const(FALSE), substitution(0), engine(0), eliminated(FALSE),
   engine_changed(0), changed(0), is_correlated(FALSE)
 {
+  DBUG_ENTER("Item_subselect::Item_subselect");
+  DBUG_PRINT("enter", ("this: 0x%lx", (ulong) this));
+#ifndef DBUG_OFF
+  exec_counter= 0;
+#endif
   with_subselect= 1;
   reset();
   /*
-    item value is NULL if select_subselect not changed this value
+    Item value is NULL if select_result_interceptor didn't change this value
     (i.e. some rows will be found returned)
   */
   null_value= TRUE;
+  DBUG_VOID_RETURN;
 }
 
 
 void Item_subselect::init(st_select_lex *select_lex,
-			  select_subselect *result)
+			  select_result_interceptor *result)
 {
   /*
     Please see Item_singlerow_subselect::invalidate_and_restore_select_lex(),
@@ -61,8 +66,10 @@ void Item_subselect::init(st_select_lex *select_lex,
   */
 
   DBUG_ENTER("Item_subselect::init");
-  DBUG_PRINT("enter", ("select_lex: 0x%lx", (long) select_lex));
+  DBUG_PRINT("enter", ("select_lex: 0x%lx  this: 0x%lx",
+                       (ulong) select_lex, (ulong) this));
   unit= select_lex->master_unit();
+  thd= unit->thd;
 
   if (unit->item)
   {
@@ -71,10 +78,10 @@ void Item_subselect::init(st_select_lex *select_lex,
       => we do not copy old_engine here
     */
     engine= unit->item->engine;
+    own_engine= FALSE;
     parsing_place= unit->item->parsing_place;
-    unit->item->engine= 0;
-    unit->item= this;
-    engine->change_result(this, result);
+    thd->change_item_tree((Item**)&unit->item, this);
+    engine->change_result(this, result, TRUE);
   }
   else
   {
@@ -87,15 +94,18 @@ void Item_subselect::init(st_select_lex *select_lex,
                     NO_MATTER :
                     outer_select->parsing_place);
     if (unit->is_union())
-      engine= new subselect_union_engine(unit, result, this);
+      engine= new subselect_union_engine(thd, unit, result, this);
     else
-      engine= new subselect_single_select_engine(select_lex, result, this);
+      engine= new subselect_single_select_engine(thd, select_lex, result, this);
   }
   {
     SELECT_LEX *upper= unit->outer_select();
     if (upper->parsing_place == IN_HAVING)
       upper->subquery_in_having= 1;
+    /* The subquery is an expression cache candidate */
+    upper->expr_cache_may_be_used[upper->parsing_place]= TRUE;
   }
+  DBUG_PRINT("info", ("engine: 0x%lx", (ulong)engine));
   DBUG_VOID_RETURN;
 }
 
@@ -120,6 +130,12 @@ void Item_subselect::cleanup()
     engine->cleanup();
   reset();
   value_assigned= 0;
+  expr_cache= 0;
+  forced_const= FALSE;
+  DBUG_PRINT("info", ("exec_counter: %d", exec_counter));
+#ifndef DBUG_OFF
+  exec_counter= 0;
+#endif
   DBUG_VOID_RETURN;
 }
 
@@ -132,16 +148,59 @@ void Item_singlerow_subselect::cleanup()
   DBUG_VOID_RETURN;
 }
 
+
+void Item_in_subselect::cleanup()
+{
+  DBUG_ENTER("Item_in_subselect::cleanup");
+  if (left_expr_cache)
+  {
+    left_expr_cache->delete_elements();
+    delete left_expr_cache;
+    left_expr_cache= NULL;
+  }
+  /*
+    TODO: This breaks the commented assert in add_strategy().
+    in_strategy&= ~SUBS_STRATEGY_CHOSEN;
+  */
+  first_execution= TRUE;
+  pushed_cond_guards= NULL;
+  Item_subselect::cleanup();
+  DBUG_VOID_RETURN;
+}
+
+
+void Item_allany_subselect::cleanup()
+{
+  /*
+    The MAX/MIN transformation through injection is reverted through the
+    change_item_tree() mechanism. Revert the select_lex object of the
+    query to its initial state.
+  */
+  for (SELECT_LEX *sl= unit->first_select();
+       sl; sl= sl->next_select())
+    if (test_set_strategy(SUBS_MAXMIN_INJECTED))
+      sl->with_sum_func= false;
+  Item_in_subselect::cleanup();
+}
+
+
 Item_subselect::~Item_subselect()
 {
-  delete engine;
+  DBUG_ENTER("Item_subselect::~Item_subselect");
+  DBUG_PRINT("enter", ("this: 0x%lx", (ulong) this));
+  if (own_engine)
+    delete engine;
+  else
+    engine->cleanup();
+  engine= NULL;
+  DBUG_VOID_RETURN;
 }
 
-Item_subselect::trans_res
+bool
 Item_subselect::select_transformer(JOIN *join)
 {
   DBUG_ENTER("Item_subselect::select_transformer");
-  DBUG_RETURN(RES_OK);
+  DBUG_RETURN(false);
 }
 
 
@@ -153,27 +212,46 @@ bool Item_subselect::fix_fields(THD *thd_param, Item **ref)
 
   DBUG_ASSERT(fixed == 0);
   engine->set_thd((thd= thd_param));
-  if (!in_fix_fields)
-    refers_to.empty();
+  if (!done_first_fix_fields)
+  {
+    done_first_fix_fields= TRUE;
+    inside_first_fix_fields= TRUE;
+    upper_refs.empty();
+    /*
+      psergey-todo: remove _first_fix_fields calls, we need changes on every
+      execution
+    */
+  }
+
   eliminated= FALSE;
+  parent_select= thd_param->lex->current_select;
 
   if (check_stack_overrun(thd, STACK_MIN_SIZE, (uchar*)&res))
     return TRUE;
   
-  in_fix_fields++;
-
+  
   if (!(res= engine->prepare()))
   {
     // all transformation is done (used by prepared statements)
     changed= 1;
+    inside_first_fix_fields= FALSE;
 
+    /*
+      Substitute the current item with an Item_in_optimizer that was
+      created by Item_in_subselect::select_in_like_transformer and
+      call fix_fields for the substituted item which in turn calls
+      engine->prepare for the subquery predicate.
+    */
     if (substitution)
     {
-      // did we changed top item of WHERE condition
+      /*
+        If the top item of the WHERE/HAVING condition changed,
+        set correct WHERE/HAVING for PS.
+      */
       if (unit->outer_select()->where == (*ref))
-	unit->outer_select()->where= substitution; // correct WHERE for PS
+        unit->outer_select()->where= substitution;
       else if (unit->outer_select()->having == (*ref))
-	unit->outer_select()->having= substitution; // correct HAVING for PS
+        unit->outer_select()->having= substitution;
 
       (*ref)= substitution;
       substitution->name= name;
@@ -184,12 +262,13 @@ bool Item_subselect::fix_fields(THD *thd_param, Item **ref)
       if (!(*ref)->fixed)
 	res= (*ref)->fix_fields(thd, ref);
       goto end;
+
     }
     // Is it one field subselect?
     if (engine->cols() > max_columns)
     {
       my_error(ER_OPERAND_COLUMNS, MYF(0), 1);
-      res= 1;
+
       goto end;
     }
     fix_length_and_dec();
@@ -206,7 +285,8 @@ bool Item_subselect::fix_fields(THD *thd_param, Item **ref)
   fixed= 1;
 
 end:
-  in_fix_fields--;
+  done_first_fix_fields= FALSE;
+  inside_first_fix_fields= FALSE;
   thd->where= save_where;
   return res;
 }
@@ -214,11 +294,12 @@ end:
 
 bool Item_subselect::enumerate_field_refs_processor(uchar *arg)
 {
-  List_iterator<Item> it(refers_to);
-  Item *item;
-  while ((item= it++))
+  List_iterator<Ref_to_outside> it(upper_refs);
+  Ref_to_outside *upper;
+  
+  while ((upper= it++))
   {
-    if (item->walk(&Item::enumerate_field_refs_processor, FALSE, arg))
+    if (upper->item->walk(&Item::enumerate_field_refs_processor, FALSE, arg))
       return TRUE;
   }
   return FALSE;
@@ -230,9 +311,224 @@ bool Item_subselect::mark_as_eliminated_processor(uchar *arg)
   return FALSE;
 }
 
+
+/**
+  Remove a subselect item from its unit so that the unit no longer
+  represents a subquery.
+
+  @param arg  unused parameter
+
+  @return
+    FALSE to force the evaluation of the processor for the subsequent items.
+*/
+
+bool Item_subselect::eliminate_subselect_processor(uchar *arg)
+{
+  unit->item= NULL;
+  unit->exclude_from_tree();
+  eliminated= TRUE;
+  return FALSE;
+}
+
+
+/**
+  Adjust the master select of the subquery to be the fake_select which
+  represents the whole UNION right above the subquery, instead of the
+  last query of the UNION.
+
+  @param arg  pointer to the fake select
+
+  @return
+    FALSE to force the evaluation of the processor for the subsequent items.
+*/
+
+bool Item_subselect::set_fake_select_as_master_processor(uchar *arg)
+{
+  SELECT_LEX *fake_select= (SELECT_LEX*) arg;
+  /*
+    Move the st_select_lex_unit of a subquery from a global ORDER BY clause to
+    become a direct child of the fake_select of a UNION. In this way the
+    ORDER BY that is applied to the temporary table that contains the result of
+    the whole UNION, and all columns in the subquery are resolved against this
+    table. The transformation is applied only for immediate child subqueries of
+    a UNION query.
+  */
+  if (unit->outer_select()->master_unit()->fake_select_lex == fake_select)
+  {
+    /*
+      Set the master of the subquery to be the fake select (i.e. the whole
+      UNION), instead of the last query in the UNION.
+    */
+    fake_select->add_slave(unit);
+    DBUG_ASSERT(unit->outer_select() == fake_select);
+    /* Adjust the name resolution context hierarchy accordingly. */
+    for (SELECT_LEX *sl= unit->first_select(); sl; sl= sl->next_select())
+      sl->context.outer_context= &(fake_select->context);
+    /*
+      Undo Item_subselect::eliminate_subselect_processor because at that phase
+      we don't know yet that the ORDER clause will be moved to the fake select.
+    */
+    unit->item= this;
+    eliminated= FALSE;
+  }
+  return FALSE;
+}
+
+
+bool Item_subselect::mark_as_dependent(THD *thd, st_select_lex *select, 
+                                       Item *item)
+{
+  if (inside_first_fix_fields)
+  {
+    is_correlated= TRUE;
+    Ref_to_outside *upper;
+    if (!(upper= new (thd->stmt_arena->mem_root) Ref_to_outside()))
+      return TRUE;
+    upper->select= select;
+    upper->item= item;
+    if (upper_refs.push_back(upper, thd->stmt_arena->mem_root))
+      return TRUE;
+  }
+  return FALSE;
+}
+
+
+/*
+  Adjust attributes after our parent select has been merged into grandparent
+
+  DESCRIPTION
+    Subquery is a composite object which may be correlated, that is, it may
+    have
+    1. references to tables of the parent select (i.e. one that has the clause
+      with the subquery predicate)
+    2. references to tables of the grandparent select
+    3. references to tables of further ancestors.
+    
+    Before the pullout, this item indicates:
+    - #1 with table bits in used_tables()
+    - #2 and #3 with OUTER_REF_TABLE_BIT.
+
+    After parent has been merged with grandparent:
+    - references to parent and grandparent tables should be indicated with 
+      table bits.
+    - references to greatgrandparent and further ancestors - with
+      OUTER_REF_TABLE_BIT.
+*/
+
+void Item_subselect::fix_after_pullout(st_select_lex *new_parent, Item **ref)
+{
+  recalc_used_tables(new_parent, TRUE);
+  parent_select= new_parent;
+}
+
+
+class Field_fixer: public Field_enumerator
+{
+public:
+  table_map used_tables; /* Collect used_tables here */
+  st_select_lex *new_parent; /* Select we're in */
+  virtual void visit_field(Item_field *item)
+  {
+    //for (TABLE_LIST *tbl= new_parent->leaf_tables; tbl; tbl= tbl->next_local)
+    //{
+    //  if (tbl->table == field->table)
+    //  {
+        used_tables|= item->field->table->map;
+    //    return;
+    //  }
+    //}
+    //used_tables |= OUTER_REF_TABLE_BIT;
+  }
+};
+
+
+/*
+  Recalculate used_tables_cache 
+*/
+
+void Item_subselect::recalc_used_tables(st_select_lex *new_parent, 
+                                        bool after_pullout)
+{
+  List_iterator<Ref_to_outside> it(upper_refs);
+  Ref_to_outside *upper;
+  
+  used_tables_cache= 0;
+  while ((upper= it++))
+  {
+    bool found= FALSE;
+    /*
+      Check if
+        1. the upper reference refers to the new immediate parent select, or
+        2. one of the further ancestors.
+
+      We rely on the fact that the tree of selects is modified by some kind of
+      'flattening', i.e. a process where child selects are merged into their
+      parents.
+      The merged selects are removed from the select tree but keep pointers to
+      their parents.
+    */
+    for (st_select_lex *sel= upper->select; sel; sel= sel->outer_select())
+    {
+      /* 
+        If we've reached the new parent select by walking upwards from
+        reference's original select, this means that the reference is now 
+        referring to the direct parent:
+      */
+      if (sel == new_parent)
+      {
+        found= TRUE;
+        /* 
+          upper->item may be NULL when we've referred to a grouping function,
+          in which case we don't care about what it's table_map really is,
+          because item->with_sum_func==1 will ensure correct placement of the
+          item.
+        */
+        if (upper->item)
+        {
+          // Now, iterate over fields and collect used_tables() attribute:
+          Field_fixer fixer;
+          fixer.used_tables= 0;
+          fixer.new_parent= new_parent;
+          upper->item->walk(&Item::enumerate_field_refs_processor, FALSE,
+                            (uchar*)&fixer);
+          used_tables_cache |= fixer.used_tables;
+          upper->item->walk(&Item::update_table_bitmaps_processor, FALSE, NULL);
+/*
+          if (after_pullout)
+            upper->item->fix_after_pullout(new_parent, &(upper->item));
+          upper->item->update_used_tables();
+*/          
+        }
+      }
+    }
+    if (!found)
+      used_tables_cache|= OUTER_REF_TABLE_BIT;
+  }
+  /* 
+    Don't update const_tables_cache yet as we don't yet know which of the
+    parent's tables are constant. Parent will call update_used_tables() after
+    he has done const table detection, and that will be our chance to update
+    const_tables_cache.
+  */
+}
+
 bool Item_subselect::walk(Item_processor processor, bool walk_subquery,
                           uchar *argument)
 {
+  if (!(unit->uncacheable & ~UNCACHEABLE_DEPENDENT) && engine->is_executed() &&
+      !unit->describe)
+  {
+    /*
+      The subquery has already been executed (for real, it wasn't EXPLAIN's
+      fake execution) so it should not matter what it has inside.
+      
+      The actual reason for not walking inside is that parts of the subquery
+      (e.g. JTBM join nests and their IN-equality conditions may have been 
+       invalidated by irreversible cleanups (those happen after an uncorrelated 
+       subquery has been executed).
+    */
+    return (this->*processor)(argument);
+  }
 
   if (walk_subquery)
   {
@@ -289,7 +585,9 @@ bool Item_subselect::exec()
   DBUG_EXECUTE_IF("subselect_exec_fail", return 1;);
 
   res= engine->exec();
-
+#ifndef DBUG_OFF
+  ++exec_counter;
+#endif
   if (engine_changed)
   {
     engine_changed= 0;
@@ -298,6 +596,153 @@ bool Item_subselect::exec()
   return (res);
 }
 
+
+void Item_subselect::get_cache_parameters(List<Item> &parameters)
+{
+  Collect_deps_prm prm= {&parameters,
+    unit->first_select()->nest_level_base,
+    unit->first_select()->nest_level};
+  walk(&Item::collect_outer_ref_processor, TRUE, (uchar*)&prm);
+}
+
+int Item_in_subselect::optimize(double *out_rows, double *cost)
+{
+  int res;
+  DBUG_ENTER("Item_in_subselect::optimize");
+  SELECT_LEX *save_select= thd->lex->current_select;
+  JOIN *join= unit->first_select()->join;
+
+  thd->lex->current_select= join->select_lex;
+  if ((res= join->optimize()))
+    DBUG_RETURN(res);
+
+  /* Calculate #rows and cost of join execution */
+  join->get_partial_cost_and_fanout(join->table_count - join->const_tables, 
+                                    table_map(-1),
+                                    cost, out_rows);
+
+  /*
+    Adjust join output cardinality. There can be these cases:
+    - Have no GROUP BY and no aggregate funcs: we won't get into this 
+      function because such join will be processed as a merged semi-join 
+      (TODO: does it really mean we don't need to handle such cases here at 
+       all? put ASSERT)
+    - Have no GROUP BY but have aggregate funcs: output is 1 record.
+    - Have GROUP BY and have (or not) aggregate funcs:  need to adjust output 
+      cardinality.
+  */
+  thd->lex->current_select= save_select;
+  if (!join->group_list && !join->group_optimized_away &&
+      join->tmp_table_param.sum_func_count)
+  {
+    DBUG_PRINT("info",("Materialized join will have only 1 row (it has "
+                       "aggregates but no GROUP BY"));
+    *out_rows= 1;
+  }
+  
+  /* Now with grouping */
+  if (join->group_list)
+  {
+    DBUG_PRINT("info",("Materialized join has grouping, trying to estimate it"));
+    double output_rows= get_post_group_estimate(join, *out_rows);
+    DBUG_PRINT("info",("Got value of %g", output_rows));
+    *out_rows= output_rows;
+  }
+
+  DBUG_RETURN(res);
+
+}
+
+
+/**
+  Check if an expression cache is needed for this subquery
+
+  @param thd             Thread handle
+
+  @details
+  The function checks whether a cache is needed for a subquery and whether
+  the result of the subquery can be put in cache.
+
+  @retval TRUE  cache is needed
+  @retval FALSE otherwise
+*/
+
+bool Item_subselect::expr_cache_is_needed(THD *thd)
+{
+  return ((engine->uncacheable() & UNCACHEABLE_DEPENDENT) &&
+          engine->cols() == 1 &&
+          optimizer_flag(thd, OPTIMIZER_SWITCH_SUBQUERY_CACHE) &&
+          !(engine->uncacheable() & (UNCACHEABLE_RAND |
+                                     UNCACHEABLE_SIDEEFFECT)));
+}
+
+
+/**
+  Check if an expression cache is needed for this subquery
+
+  @param thd             Thread handle
+
+  @details
+  The function checks whether a cache is needed for a subquery and whether
+  the result of the subquery can be put in cache.
+
+  @note
+  This method allows many columns in the subquery because it is supported by
+  Item_in optimizer and result of the IN subquery will be scalar in this
+  case.
+
+  @retval TRUE  cache is needed
+  @retval FALSE otherwise
+*/
+
+bool Item_in_subselect::expr_cache_is_needed(THD *thd)
+{
+  return (optimizer_flag(thd, OPTIMIZER_SWITCH_SUBQUERY_CACHE) &&
+          !(engine->uncacheable() & (UNCACHEABLE_RAND |
+                                     UNCACHEABLE_SIDEEFFECT)));
+}
+
+
+/*
+  Compute the IN predicate if the left operand's cache changed.
+*/
+
+bool Item_in_subselect::exec()
+{
+  DBUG_ENTER("Item_in_subselect::exec");
+  /*
+    Initialize the cache of the left predicate operand. This has to be done as
+    late as now, because Cached_item directly contains a resolved field (not
+    an item, and in some cases (when temp tables are created), these fields
+    end up pointing to the wrong field. One solution is to change Cached_item
+    to not resolve its field upon creation, but to resolve it dynamically
+    from a given Item_ref object.
+    TODO: the cache should be applied conditionally based on:
+    - rules - e.g. only if the left operand is known to be ordered, and/or
+    - on a cost-based basis, that takes into account the cost of a cache
+      lookup, the cache hit rate, and the savings per cache hit.
+  */
+  if (!left_expr_cache && (test_strategy(SUBS_MATERIALIZATION)))
+    init_left_expr_cache();
+
+  /*
+    If the new left operand is already in the cache, reuse the old result.
+    Use the cached result only if this is not the first execution of IN
+    because the cache is not valid for the first execution.
+  */
+  if (!first_execution && left_expr_cache &&
+      test_if_item_cache_changed(*left_expr_cache) < 0)
+    DBUG_RETURN(FALSE);
+
+  /*
+    The exec() method below updates item::value, and item::null_value, thus if
+    we don't call it, the next call to item::val_int() will return whatever
+    result was computed by its previous call.
+  */
+  DBUG_RETURN(Item_subselect::exec());
+}
+
+
 Item::Type Item_subselect::type() const
 {
   return SUBSELECT_ITEM;
@@ -330,11 +775,15 @@ Item *Item_subselect::get_tmp_table_item(THD *thd_arg)
 
 void Item_subselect::update_used_tables()
 {
-  if (!engine->uncacheable())
+  if (!forced_const)
   {
-    // did all used tables become static?
-    if (!(used_tables_cache & ~engine->upper_select_const_tables()))
-      const_item_cache= 1;
+    recalc_used_tables(parent_select, FALSE);
+    if (!engine->uncacheable())
+    {
+      // did all used tables become static?
+      if (!(used_tables_cache & ~engine->upper_select_const_tables()))
+        const_item_cache= 1;
+    }
   }
 }
 
@@ -403,7 +852,7 @@ Item_maxmin_subselect::Item_maxmin_subselect(THD *thd_param,
     of Items belonged to subquery, which will be not repeated
   */
   used_tables_cache= parent->get_used_tables_cache();
-  const_item_cache= parent->get_const_item_cache();
+  const_item_cache= parent->const_item();
 
   /*
     this subquery always creates during preparation, so we can assign
@@ -441,10 +890,12 @@ void Item_maxmin_subselect::print(String *str, enum_query_type query_type)
 
 void Item_singlerow_subselect::reset()
 {
-  eliminated= FALSE;
-  null_value= TRUE;
+  Item_subselect::reset();
   if (value)
-    value->null_value= TRUE;
+  {
+    for(uint i= 0; i < engine->cols(); i++)
+      row[i]->set_null();
+  }
 }
 
 
@@ -457,12 +908,17 @@ void Item_singlerow_subselect::reset()
   - switch off this optimization for prepare statement,
   because we do not rollback this changes.
   Make rollback for it, or special name resolving mode in 5.0.
+
+  @param join  Join object of the subquery (i.e. 'child' join).
+
+  @retval false  The subquery was transformed
 */
-Item_subselect::trans_res
+bool
 Item_singlerow_subselect::select_transformer(JOIN *join)
 {
+  DBUG_ENTER("Item_singlerow_subselect::select_transformer");
   if (changed)
-    return RES_OK;
+    DBUG_RETURN(false);
 
   SELECT_LEX *select_lex= join->select_lex;
   Query_arena *arena= thd->stmt_arena;
@@ -489,7 +945,6 @@ Item_singlerow_subselect::select_transformer(JOIN *join)
       !arena->is_stmt_prepare_or_first_sp_execute()
       )
   {
-
     have_to_be_excluded= 1;
     if (thd->lex->describe)
     {
@@ -505,9 +960,8 @@ Item_singlerow_subselect::select_transformer(JOIN *join)
     */
     substitution->walk(&Item::remove_dependence_processor, 0,
 		       (uchar *) select_lex->outer_select());
-    return RES_REDUCE;
   }
-  return RES_OK;
+  DBUG_RETURN(false);
 }
 
 
@@ -552,8 +1006,47 @@ void Item_singlerow_subselect::fix_length_and_dec()
   */
   if (engine->no_tables())
     maybe_null= engine->may_be_null();
+  else
+  {
+    for (uint i= 0; i < max_columns; i++)
+      row[i]->maybe_null= TRUE;
+  }
 }
 
+
+/**
+  Add an expression cache for this subquery if it is needed
+
+  @param thd_arg         Thread handle
+
+  @details
+  The function checks whether an expression cache is needed for this item
+  and if if so wraps the item into an item of the class
+  Item_exp_cache_wrapper with an appropriate expression cache set up there.
+
+  @note
+  used from Item::transform()
+
+  @return
+  new wrapper item if an expression cache is needed,
+  this item - otherwise
+*/
+
+Item* Item_singlerow_subselect::expr_cache_insert_transformer(uchar *thd_arg)
+{
+  THD *thd= (THD*) thd_arg;
+  DBUG_ENTER("Item_singlerow_subselect::expr_cache_insert_transformer");
+
+  if (expr_cache)
+    DBUG_RETURN(expr_cache);
+
+  if (expr_cache_is_needed(thd) &&
+      (expr_cache= set_expr_cache(thd)))
+    DBUG_RETURN(expr_cache);
+  DBUG_RETURN(this);
+}
+
+
 uint Item_singlerow_subselect::cols()
 {
   return engine->cols();
@@ -700,11 +1193,17 @@ bool Item_in_subselect::test_limit(st_select_lex_unit *unit_arg)
 
 Item_in_subselect::Item_in_subselect(Item * left_exp,
 				     st_select_lex *select_lex):
-  Item_exists_subselect(), optimizer(0), transformed(0),
-  pushed_cond_guards(NULL), upper_item(0)
+  Item_exists_subselect(), 
+  left_expr_cache(0), first_execution(TRUE), in_strategy(SUBS_NOT_TRANSFORMED),
+  optimizer(0), pushed_cond_guards(NULL), emb_on_expr_nest(NULL),
+  is_jtbm_merged(FALSE), is_jtbm_const_tab(FALSE), 
+  is_flattenable_semijoin(FALSE),
+  is_registered_semijoin(FALSE), 
+  upper_item(0)
 {
   DBUG_ENTER("Item_in_subselect::Item_in_subselect");
   left_expr= left_exp;
+  func= &eq_creator;
   init(select_lex, new select_exists_subselect(this));
   max_columns= UINT_MAX;
   maybe_null= 1;
@@ -715,13 +1214,18 @@ Item_in_subselect::Item_in_subselect(Item * left_exp,
   DBUG_VOID_RETURN;
 }
 
+int Item_in_subselect::get_identifier()
+{
+  return engine->get_identifier();
+}
+
 Item_allany_subselect::Item_allany_subselect(Item * left_exp,
                                              chooser_compare_func_creator fc,
 					     st_select_lex *select_lex,
 					     bool all_arg)
   :Item_in_subselect(), func_creator(fc), all(all_arg)
 {
-  DBUG_ENTER("Item_in_subselect::Item_in_subselect");
+  DBUG_ENTER("Item_allany_subselect::Item_allany_subselect");
   left_expr= left_exp;
   func= func_creator(all_arg);
   init(select_lex, new select_exists_subselect(this));
@@ -734,15 +1238,79 @@ Item_allany_subselect::Item_allany_subselect(Item * left_exp,
 }
 
 
+/**
+  Initialize length and decimals for EXISTS  and inherited (IN/ALL/ANY)
+  subqueries
+*/
+
+void Item_exists_subselect::init_length_and_dec()
+{
+  decimals= 0;
+  max_length= 1;
+  max_columns= engine->cols();
+}
+
+
 void Item_exists_subselect::fix_length_and_dec()
 {
-   decimals= 0;
-   max_length= 1;
-   max_columns= engine->cols();
-  /* We need only 1 row to determine existence */
-  unit->global_parameters->select_limit= new Item_int((int32) 1);
+  DBUG_ENTER("Item_exists_subselect::fix_length_and_dec");
+  init_length_and_dec();
+  /*
+    We need only 1 row to determine existence (i.e. any EXISTS that is not
+    an IN always requires LIMIT 1)
+  */
+  thd->change_item_tree(&unit->global_parameters->select_limit,
+                        new Item_int((int32) 1));
+  DBUG_PRINT("info", ("Set limit to 1"));
+  DBUG_VOID_RETURN;
+}
+
+
+void Item_in_subselect::fix_length_and_dec()
+{
+  DBUG_ENTER("Item_in_subselect::fix_length_and_dec");
+  init_length_and_dec();
+  /*
+    Unlike Item_exists_subselect, LIMIT 1 is set later for
+    Item_in_subselect, depending on the chosen strategy.
+  */
+  DBUG_VOID_RETURN;
 }
 
+
+/**
+  Add an expression cache for this subquery if it is needed
+
+  @param thd_arg         Thread handle
+
+  @details
+  The function checks whether an expression cache is needed for this item
+  and if if so wraps the item into an item of the class
+  Item_exp_cache_wrapper with an appropriate expression cache set up there.
+
+  @note
+  used from Item::transform()
+
+  @return
+  new wrapper item if an expression cache is needed,
+  this item - otherwise
+*/
+
+Item* Item_exists_subselect::expr_cache_insert_transformer(uchar *thd_arg)
+{
+  THD *thd= (THD*) thd_arg;
+  DBUG_ENTER("Item_exists_subselect::expr_cache_insert_transformer");
+
+  if (expr_cache)
+    DBUG_RETURN(expr_cache);
+
+  if (substype() == EXISTS_SUBS && expr_cache_is_needed(thd) &&
+      (expr_cache= set_expr_cache(thd)))
+    DBUG_RETURN(expr_cache);
+  DBUG_RETURN(this);
+}
+
+
 double Item_exists_subselect::val_real()
 {
   DBUG_ASSERT(fixed == 1);
@@ -891,6 +1459,8 @@ String *Item_in_subselect::val_str(String *str)
 bool Item_in_subselect::val_bool()
 {
   DBUG_ASSERT(fixed == 1);
+  if (forced_const)
+    return value;
   null_value= was_null= FALSE;
   if (exec())
   {
@@ -923,59 +1493,27 @@ my_decimal *Item_in_subselect::val_decimal(my_decimal *decimal_value)
 }
 
 
-/* 
-  Rewrite a single-column IN/ALL/ANY subselect
-
-  SYNOPSIS
-    Item_in_subselect::single_value_transformer()
-      join  Join object of the subquery (i.e. 'child' join).
-      func  Subquery comparison creator
-
-  DESCRIPTION
-    Rewrite a single-column subquery using rule-based approach. The subquery
-    
-       oe $cmp$ (SELECT ie FROM ... WHERE subq_where ... HAVING subq_having)
-    
-    First, try to convert the subquery to scalar-result subquery in one of
-    the forms:
-    
-       - oe $cmp$ (SELECT MAX(...) )  // handled by Item_singlerow_subselect
-       - oe $cmp$ <max>(SELECT ...)   // handled by Item_maxmin_subselect
-   
-    If that fails, the subquery will be handled with class Item_in_optimizer, 
-    Inject the predicates into subquery, i.e. convert it to:
-
-    - If the subquery has aggregates, GROUP BY, or HAVING, convert to
+/**
+  Prepare a single-column IN/ALL/ANY subselect for rewriting.
 
-       SELECT ie FROM ...  HAVING subq_having AND 
-                                   trigcond(oe $cmp$ ref_or_null_helper<ie>)
-                                   
-      the addition is wrapped into trigger only when we want to distinguish
-      between NULL and FALSE results.
+  @param join  Join object of the subquery (i.e. 'child' join).
 
-    - Otherwise (no aggregates/GROUP BY/HAVING) convert it to one of the
-      following:
+  @details
 
-      = If we don't need to distinguish between NULL and FALSE subquery:
-        
-        SELECT 1 FROM ... WHERE (oe $cmp$ ie) AND subq_where
+  Prepare a single-column subquery to be rewritten. Given the subquery.
 
-      = If we need to distinguish between those:
+  If the subquery has no tables it will be turned to an expression between
+  left part and SELECT list.
 
-        SELECT 1 FROM ...
-          WHERE  subq_where AND trigcond((oe $cmp$ ie) OR (ie IS NULL))
-          HAVING trigcond(<is_not_null_test>(ie))
+  In other cases the subquery will be wrapped with  Item_in_optimizer which
+  allow later to turn it to EXISTS or MAX/MIN.
 
-  RETURN
-    RES_OK     - OK, either subquery was transformed, or appopriate
-                 predicates where injected into it.
-    RES_REDUCE - The subquery was reduced to non-subquery
-    RES_ERROR  - Error
+  @retval false  The subquery was transformed
+  @retval true   Error
 */
 
-Item_subselect::trans_res
-Item_in_subselect::single_value_transformer(JOIN *join,
-					    Comp_creator *func)
+bool
+Item_in_subselect::single_value_transformer(JOIN *join)
 {
   SELECT_LEX *select_lex= join->select_lex;
   DBUG_ENTER("Item_in_subselect::single_value_transformer");
@@ -984,119 +1522,46 @@ Item_in_subselect::single_value_transformer(JOIN *join,
     Check that the right part of the subselect contains no more than one
     column. E.g. in SELECT 1 IN (SELECT * ..) the right part is (SELECT * ...)
   */
+  // psergey: duplicated_subselect_card_check
   if (select_lex->item_list.elements > 1)
   {
     my_error(ER_OPERAND_COLUMNS, MYF(0), 1);
-    DBUG_RETURN(RES_ERROR);
+    DBUG_RETURN(true);
   }
 
-  /*
-    If this is an ALL/ANY single-value subselect, try to rewrite it with
-    a MIN/MAX subselect. We can do that if a possible NULL result of the
-    subselect can be ignored.
-    E.g. SELECT * FROM t1 WHERE b > ANY (SELECT a FROM t2) can be rewritten
-    with SELECT * FROM t1 WHERE b > (SELECT MAX(a) FROM t2).
-    We can't check that this optimization is safe if it's not a top-level
-    item of the WHERE clause (e.g. because the WHERE clause can contain IS
-    NULL/IS NOT NULL functions). If so, we rewrite ALL/ANY with NOT EXISTS
-    later in this method.
-  */
-  if ((abort_on_null || (upper_item && upper_item->top_level())) &&
-      !select_lex->master_unit()->uncacheable && !func->eqne_op())
+  Item* join_having= join->having ? join->having : join->tmp_having;
+  if (!(join_having || select_lex->with_sum_func ||
+        select_lex->group_list.elements) &&
+      select_lex->table_list.elements == 0 &&
+      !select_lex->master_unit()->is_union())
   {
-    if (substitution)
-    {
-      // It is second (third, ...) SELECT of UNION => All is done
-      DBUG_RETURN(RES_OK);
-    }
-
-    Item *subs;
+    Item *where_item= (Item*) select_lex->item_list.head();
     /*
-      Check if optimization with aggregate min/max possible
-      1 There is no aggregate in the subquery
-      2 It is not UNION
-      3 There is tables
-      4 It is not ALL subquery with possible NULLs in the SELECT list
+      it is single select without tables => possible optimization
+      remove the dependence mark since the item is moved to upper
+      select and is not outer anymore.
     */
-    if (!select_lex->group_list.elements &&                /*1*/
-        !select_lex->having &&                             /*1*/
-	!select_lex->with_sum_func &&                      /*1*/
-	!(select_lex->next_select()) &&                    /*2*/
-        select_lex->table_list.elements &&                 /*3*/
-        (!select_lex->ref_pointer_array[0]->maybe_null ||  /*4*/
-         substype() != Item_subselect::ALL_SUBS))          /*4*/
-    {
-      Item_sum_hybrid *item;
-      nesting_map save_allow_sum_func;
-      if (func->l_op())
-      {
-	/*
-	  (ALL && (> || =>)) || (ANY && (< || =<))
-	  for ALL condition is inverted
-	*/
-	item= new Item_sum_max(*select_lex->ref_pointer_array);
-      }
-      else
-      {
-	/*
-	  (ALL && (< || =<)) || (ANY && (> || =>))
-	  for ALL condition is inverted
-	*/
-	item= new Item_sum_min(*select_lex->ref_pointer_array);
-      }
-      if (upper_item)
-        upper_item->set_sum_test(item);
-      *select_lex->ref_pointer_array= item;
-      {
-	List_iterator<Item> it(select_lex->item_list);
-	it++;
-	it.replace(item);
-      }
-
-      DBUG_EXECUTE("where",
-                   print_where(item, "rewrite with MIN/MAX", QT_ORDINARY););
-      if (thd->variables.sql_mode & MODE_ONLY_FULL_GROUP_BY)
-      {
-        DBUG_ASSERT(item->get_arg(0)->real_item()->type() != Item::FIELD_ITEM ||
-                    select_lex->non_agg_field_used());
-        select_lex->set_non_agg_field_used(false);
-      }
-
-      save_allow_sum_func= thd->lex->allow_sum_func;
-      thd->lex->allow_sum_func|= 1 << thd->lex->current_select->nest_level;
-      /*
-	Item_sum_(max|min) can't substitute other item => we can use 0 as
-        reference, also Item_sum_(max|min) can't be fixed after creation, so
-        we do not check item->fixed
-      */
-      if (item->fix_fields(thd, 0))
-	DBUG_RETURN(RES_ERROR);
-      thd->lex->allow_sum_func= save_allow_sum_func; 
-      /* we added aggregate function => we have to change statistic */
-      count_field_types(select_lex, &join->tmp_table_param, join->all_fields, 
-                        0);
-
-      subs= new Item_singlerow_subselect(select_lex);
-    }
-    else
+    where_item->walk(&Item::remove_dependence_processor, 0,
+                     (uchar *) select_lex->outer_select());
+    substitution= func->create(left_expr, where_item);
+    have_to_be_excluded= 1;
+    if (thd->lex->describe)
     {
-      Item_maxmin_subselect *item;
-      subs= item= new Item_maxmin_subselect(thd, this, select_lex, func->l_op());
-      if (upper_item)
-        upper_item->set_sub_test(item);
+      char warn_buff[MYSQL_ERRMSG_SIZE];
+      sprintf(warn_buff, ER(ER_SELECT_REDUCED), select_lex->select_number);
+      push_warning(thd, MYSQL_ERROR::WARN_LEVEL_NOTE,
+                   ER_SELECT_REDUCED, warn_buff);
     }
-    /*
-      The swap is needed for expressions of type 'f1 < ALL ( SELECT ....)'
-      where we want to evaluate the sub query even if f1 would be null.
-    */
-    substitution= func->create_swap(left_expr, subs);
-    DBUG_RETURN(RES_OK);
+    DBUG_RETURN(false);
   }
 
+  /*
+    Wrap the current IN predicate in an Item_in_optimizer. The actual
+    substitution in the Item tree takes place in Item_subselect::fix_fields.
+  */
   if (!substitution)
   {
     /* We're invoked for the 1st (or the only) SELECT in the subquery UNION */
-    SELECT_LEX_UNIT *master_unit= select_lex->master_unit();
     substitution= optimizer;
 
     SELECT_LEX *current= thd->lex->current_select;
@@ -1106,10 +1571,13 @@ Item_in_subselect::single_value_transformer(JOIN *join,
     if (!optimizer || optimizer->fix_left(thd, 0))
     {
       thd->lex->current_select= current;
-      DBUG_RETURN(RES_ERROR);
+      DBUG_RETURN(true);
     }
     thd->lex->current_select= current;
 
+    /* We will refer to upper level cache array => we have to save it for SP */
+    optimizer->keep_top_level_cache();
+
     /*
       As far as  Item_ref_in_optimizer do not substitute itself on fix_fields
       we can use same item for all selects.
@@ -1118,22 +1586,224 @@ Item_in_subselect::single_value_transformer(JOIN *join,
                               (Item**)optimizer->get_cache(),
 			      (char *)"<no matter>",
 			      (char *)in_left_expr_name);
-    if (!left_expr->const_item())
-      master_unit->uncacheable|= UNCACHEABLE_DEPENDENT;
   }
-  if (!abort_on_null && left_expr->maybe_null && !pushed_cond_guards)
+
+  DBUG_RETURN(false);
+}
+
+
+/**
+  Apply transformation max/min  transwormation to ALL/ANY subquery if it is
+  possible.
+
+  @param join  Join object of the subquery (i.e. 'child' join).
+
+  @details
+
+  If this is an ALL/ANY single-value subselect, try to rewrite it with
+  a MIN/MAX subselect. We can do that if a possible NULL result of the
+  subselect can be ignored.
+  E.g. SELECT * FROM t1 WHERE b > ANY (SELECT a FROM t2) can be rewritten
+  with SELECT * FROM t1 WHERE b > (SELECT MAX(a) FROM t2).
+  We can't check that this optimization is safe if it's not a top-level
+  item of the WHERE clause (e.g. because the WHERE clause can contain IS
+  NULL/IS NOT NULL functions). If so, we rewrite ALL/ANY with NOT EXISTS
+  later in this method.
+
+  @retval false  The subquery was transformed
+  @retval true   Error
+*/
+
+bool Item_allany_subselect::transform_into_max_min(JOIN *join)
+{
+  DBUG_ENTER("Item_allany_subselect::transform_into_max_min");
+  if (!test_strategy(SUBS_MAXMIN_INJECTED | SUBS_MAXMIN_ENGINE))
+    DBUG_RETURN(false);
+  Item **place= optimizer->arguments() + 1;
+  THD *thd= join->thd;
+  SELECT_LEX *select_lex= join->select_lex;
+  Item *subs;
+
+  /*
+  */
+  DBUG_ASSERT(!substitution);
+
+  /*
+    Check if optimization with aggregate min/max possible
+    1 There is no aggregate in the subquery
+    2 It is not UNION
+    3 There is tables
+    4 It is not ALL subquery with possible NULLs in the SELECT list
+  */
+  if (!select_lex->group_list.elements &&                /*1*/
+      !select_lex->having &&                             /*1*/
+      !select_lex->with_sum_func &&                      /*1*/
+      !(select_lex->next_select()) &&                    /*2*/
+      select_lex->table_list.elements &&                 /*3*/
+      (!select_lex->ref_pointer_array[0]->maybe_null ||  /*4*/
+       substype() != Item_subselect::ALL_SUBS))          /*4*/
   {
-    if (!(pushed_cond_guards= (bool*)join->thd->alloc(sizeof(bool))))
-      DBUG_RETURN(RES_ERROR);
-    pushed_cond_guards[0]= TRUE;
+    Item_sum_hybrid *item;
+    nesting_map save_allow_sum_func;
+    if (func->l_op())
+    {
+      /*
+        (ALL && (> || =>)) || (ANY && (< || =<))
+        for ALL condition is inverted
+      */
+      item= new Item_sum_max(*select_lex->ref_pointer_array);
+    }
+    else
+    {
+      /*
+        (ALL && (< || =<)) || (ANY && (> || =>))
+        for ALL condition is inverted
+      */
+      item= new Item_sum_min(*select_lex->ref_pointer_array);
+    }
+    if (upper_item)
+      upper_item->set_sum_test(item);
+    thd->change_item_tree(select_lex->ref_pointer_array, item);
+    {
+      List_iterator<Item> it(select_lex->item_list);
+      it++;
+      thd->change_item_tree(it.ref(), item);
+    }
+
+    DBUG_EXECUTE("where",
+                 print_where(item, "rewrite with MIN/MAX", QT_ORDINARY););
+
+    save_allow_sum_func= thd->lex->allow_sum_func;
+    thd->lex->allow_sum_func|= 1 << thd->lex->current_select->nest_level;
+    /*
+      Item_sum_(max|min) can't substitute other item => we can use 0 as
+      reference, also Item_sum_(max|min) can't be fixed after creation, so
+      we do not check item->fixed
+    */
+    if (item->fix_fields(thd, 0))
+      DBUG_RETURN(true);
+    thd->lex->allow_sum_func= save_allow_sum_func; 
+    /* we added aggregate function => we have to change statistic */
+    count_field_types(select_lex, &join->tmp_table_param, join->all_fields, 
+                      0);
+    if (join->prepare_stage2())
+      DBUG_RETURN(true);
+    subs= new Item_singlerow_subselect(select_lex);
+
+    /*
+      Remove other strategies if any (we already changed the query and
+      can't apply other strategy).
+    */
+    set_strategy(SUBS_MAXMIN_INJECTED);
+  }
+  else
+  {
+    Item_maxmin_subselect *item;
+    subs= item= new Item_maxmin_subselect(thd, this, select_lex, func->l_op());
+    if (upper_item)
+      upper_item->set_sub_test(item);
+    /*
+      Remove other strategies if any (we already changed the query and
+      can't apply other strategy).
+    */
+    set_strategy(SUBS_MAXMIN_ENGINE);
   }
+  /*
+    The swap is needed for expressions of type 'f1 < ALL ( SELECT ....)'
+    where we want to evaluate the sub query even if f1 would be null.
+  */
+  subs= func->create_swap(*(optimizer->get_cache()), subs);
+  thd->change_item_tree(place, subs);
+  if (subs->fix_fields(thd, &subs))
+    DBUG_RETURN(true);
+  DBUG_ASSERT(subs == (*place)); // There was no substitutions
 
-  if (!left_expr->const_item())
-    select_lex->uncacheable|= UNCACHEABLE_DEPENDENT;
-  if (join->having || select_lex->with_sum_func ||
+  select_lex->master_unit()->uncacheable&= ~UNCACHEABLE_DEPENDENT_INJECTED;
+  select_lex->uncacheable&= ~UNCACHEABLE_DEPENDENT_INJECTED;
+
+  DBUG_RETURN(false);
+}
+
+
+bool Item_in_subselect::fix_having(Item *having, SELECT_LEX *select_lex)
+{
+  bool fix_res= 0;
+  if (!having->fixed)
+  {
+    select_lex->having_fix_field= 1;
+    fix_res= having->fix_fields(thd, 0);
+    select_lex->having_fix_field= 0;
+  }
+  return fix_res;
+}
+
+bool Item_allany_subselect::is_maxmin_applicable(JOIN *join)
+{
+  /*
+    Check if max/min optimization applicable: It is top item of
+    WHERE condition.
+  */
+  return (abort_on_null || (upper_item && upper_item->is_top_level_item())) &&
+      !join->select_lex->master_unit()->uncacheable && !func->eqne_op();
+}
+
+
+/**
+  Create the predicates needed to transform a single-column IN/ALL/ANY
+  subselect into a correlated EXISTS via predicate injection.
+
+  @param join[in]  Join object of the subquery (i.e. 'child' join).
+  @param where_item[out]   the in-to-exists addition to the where clause
+  @param having_item[out]  the in-to-exists addition to the having clause
+
+  @details
+  The correlated predicates are created as follows:
+
+  - If the subquery has aggregates, GROUP BY, or HAVING, convert to
+
+    SELECT ie FROM ...  HAVING subq_having AND 
+                               trigcond(oe $cmp$ ref_or_null_helper<ie>)
+                                   
+    the addition is wrapped into trigger only when we want to distinguish
+    between NULL and FALSE results.
+
+  - Otherwise (no aggregates/GROUP BY/HAVING) convert it to one of the
+    following:
+
+    = If we don't need to distinguish between NULL and FALSE subquery:
+        
+      SELECT ie FROM ... WHERE subq_where AND (oe $cmp$ ie)
+
+    = If we need to distinguish between those:
+
+      SELECT ie FROM ...
+        WHERE  subq_where AND trigcond((oe $cmp$ ie) OR (ie IS NULL))
+        HAVING trigcond(<is_not_null_test>(ie))
+
+  @retval false If the new conditions were created successfully
+  @retval true  Error
+*/
+
+bool
+Item_in_subselect::create_single_in_to_exists_cond(JOIN * join,
+                                                   Item **where_item,
+                                                   Item **having_item)
+{
+  SELECT_LEX *select_lex= join->select_lex;
+  /*
+    The non-transformed HAVING clause of 'join' may be stored in two ways
+    during JOIN::optimize: this->tmp_having= this->having; this->having= 0;
+  */
+  Item* join_having= join->having ? join->having : join->tmp_having;
+
+  DBUG_ENTER("Item_in_subselect::create_single_in_to_exists_cond");
+
+  *where_item= NULL;
+  *having_item= NULL;
+
+  if (join_having || select_lex->with_sum_func ||
       select_lex->group_list.elements)
   {
-    bool tmp;
     Item *item= func->create(expr,
                              new Item_ref_null_helper(&select_lex->context,
                                                       this,
@@ -1149,24 +1819,12 @@ Item_in_subselect::single_value_transformer(JOIN *join,
       */
       item= new Item_func_trig_cond(item, get_cond_guard(0));
     }
-    
-    /*
-      AND and comparison functions can't be changed during fix_fields()
-      we can assign select_lex->having here, and pass 0 as last
-      argument (reference) to fix_fields()
-    */
-    select_lex->having= join->having= and_items(join->having, item);
-    if (join->having == item)
-      item->name= (char*)in_having_cond;
-    select_lex->having_fix_field= 1;
-    /*
-      we do not check join->having->fixed, because Item_and (from and_items)
-      or comparison function (from func->create) can't be fixed after creation
-    */
-    tmp= join->having->fix_fields(thd, 0);
-    select_lex->having_fix_field= 0;
-    if (tmp)
-      DBUG_RETURN(RES_ERROR);
+
+    if (!join_having)
+      item->name= (char*) in_having_cond;
+    if (fix_having(item, select_lex))
+      DBUG_RETURN(true);
+    *having_item= item;
   }
   else
   {
@@ -1174,13 +1832,8 @@ Item_in_subselect::single_value_transformer(JOIN *join,
 
     if (select_lex->table_list.elements)
     {
-      bool tmp;
-      Item *having= item, *orig_item= item;
-      select_lex->item_list.empty();
-      select_lex->item_list.push_back(new Item_int("Not_used",
-                                                   (longlong) 1,
-                                                   MY_INT64_NUM_DECIMAL_DIGITS));
-      select_lex->ref_pointer_array[0]= select_lex->item_list.head();
+      Item *having= item;
+      Item *orig_item= item;
        
       item= func->create(expr, item);
       if (!abort_on_null && orig_item->maybe_null)
@@ -1190,25 +1843,13 @@ Item_in_subselect::single_value_transformer(JOIN *join,
         {
           if (!(having= new Item_func_trig_cond(having,
                                                 get_cond_guard(0))))
-            DBUG_RETURN(RES_ERROR);
+            DBUG_RETURN(true);
         }
-	/*
-	  Item_is_not_null_test can't be changed during fix_fields()
-	  we can assign select_lex->having here, and pass 0 as last
-	  argument (reference) to fix_fields()
-	*/
-        having->name= (char*)in_having_cond;
-	select_lex->having= join->having= having;
-	select_lex->having_fix_field= 1;
-        /*
-          we do not check join->having->fixed, because Item_and (from
-          and_items) or comparison function (from func->create) can't be
-          fixed after creation
-        */
-	tmp= join->having->fix_fields(thd, 0);
-        select_lex->having_fix_field= 0;
-        if (tmp)
-	  DBUG_RETURN(RES_ERROR);
+        having->name= (char*) in_having_cond;
+        if (fix_having(having, select_lex))
+          DBUG_RETURN(true);
+        *having_item= having;
+
 	item= new Item_cond_or(item,
 			       new Item_func_isnull(orig_item));
       }
@@ -1219,39 +1860,23 @@ Item_in_subselect::single_value_transformer(JOIN *join,
       if (!abort_on_null && left_expr->maybe_null)
       {
         if (!(item= new Item_func_trig_cond(item, get_cond_guard(0))))
-          DBUG_RETURN(RES_ERROR);
+          DBUG_RETURN(true);
       }
+
       /*
         TODO: figure out why the following is done here in 
         single_value_transformer but there is no corresponding action in
         row_value_transformer?
       */
-      item->name= (char *)in_additional_cond;
-
-      /*
-	AND can't be changed during fix_fields()
-	we can assign select_lex->having here, and pass 0 as last
-	argument (reference) to fix_fields()
-      */
-      select_lex->where= join->conds= and_items(join->conds, item);
-      select_lex->where->top_level_item();
-      /*
-        we do not check join->conds->fixed, because Item_and can't be fixed
-        after creation
-      */
-      if (join->conds->fix_fields(thd, 0))
-	DBUG_RETURN(RES_ERROR);
+      item->name= (char *) in_additional_cond;
+      if (!item->fixed && item->fix_fields(thd, 0))
+        DBUG_RETURN(true);
+      *where_item= item;
     }
     else
     {
-      bool tmp;
       if (select_lex->master_unit()->is_union())
       {
-	/*
-	  comparison functions can't be changed during fix_fields()
-	  we can assign select_lex->having here, and pass 0 as last
-	  argument (reference) to fix_fields()
-	*/
         Item *new_having=
           func->create(expr,
                        new Item_ref_null_helper(&select_lex->context, this,
@@ -1262,65 +1887,58 @@ Item_in_subselect::single_value_transformer(JOIN *join,
         {
           if (!(new_having= new Item_func_trig_cond(new_having,
                                                     get_cond_guard(0))))
-            DBUG_RETURN(RES_ERROR);
+            DBUG_RETURN(true);
         }
-        new_having->name= (char*)in_having_cond;
-	select_lex->having= join->having= new_having;
-	select_lex->having_fix_field= 1;
-        
-        /*
-          we do not check join->having->fixed, because comparison function
-          (from func->create) can't be fixed after creation
-        */
-	tmp= join->having->fix_fields(thd, 0);
-        select_lex->having_fix_field= 0;
-        if (tmp)
-	  DBUG_RETURN(RES_ERROR);
+
+        new_having->name= (char*) in_having_cond;
+        if (fix_having(new_having, select_lex))
+          DBUG_RETURN(true);
+        *having_item= new_having;
       }
       else
-      {
-	// it is single select without tables => possible optimization
-        // remove the dependence mark since the item is moved to upper
-        // select and is not outer anymore.
-        item->walk(&Item::remove_dependence_processor, 0,
-                           (uchar *) select_lex->outer_select());
-	item= func->create(left_expr, item);
-	// fix_field of item will be done in time of substituting
-	substitution= item;
-	have_to_be_excluded= 1;
-	if (thd->lex->describe)
-	{
-	  char warn_buff[MYSQL_ERRMSG_SIZE];
-	  sprintf(warn_buff, ER(ER_SELECT_REDUCED), select_lex->select_number);
-	  push_warning(thd, MYSQL_ERROR::WARN_LEVEL_NOTE,
-		       ER_SELECT_REDUCED, warn_buff);
-	}
-	DBUG_RETURN(RES_REDUCE);
-      }
+        DBUG_ASSERT(false);
     }
   }
 
-  DBUG_RETURN(RES_OK);
+  DBUG_RETURN(false);
 }
 
 
-Item_subselect::trans_res
+/**
+  Wrap a multi-column IN/ALL/ANY subselect into an Item_in_optimizer.
+
+  @param join  Join object of the subquery (i.e. 'child' join).
+
+  @details
+  The subquery predicate is wrapped into an Item_in_optimizer. Later the query
+  optimization phase chooses whether the subquery under the Item_in_optimizer
+  will be further transformed into an equivalent correlated EXISTS by injecting
+  additional predicates, or will be executed via subquery materialization in its
+  unmodified form.
+
+  @retval false  The subquery was transformed
+  @retval true   Error
+*/
+
+bool
 Item_in_subselect::row_value_transformer(JOIN *join)
 {
   SELECT_LEX *select_lex= join->select_lex;
-  Item *having_item= 0;
   uint cols_num= left_expr->cols();
-  bool is_having_used= (join->having || select_lex->with_sum_func ||
-                        select_lex->group_list.first ||
-                        !select_lex->table_list.elements);
+
   DBUG_ENTER("Item_in_subselect::row_value_transformer");
 
-  if (select_lex->item_list.elements != left_expr->cols())
+  // psergey: duplicated_subselect_card_check
+  if (select_lex->item_list.elements != cols_num)
   {
-    my_error(ER_OPERAND_COLUMNS, MYF(0), left_expr->cols());
-    DBUG_RETURN(RES_ERROR);
+    my_error(ER_OPERAND_COLUMNS, MYF(0), cols_num);
+    DBUG_RETURN(true);
   }
 
+  /*
+    Wrap the current IN predicate in an Item_in_optimizer. The actual
+    substitution in the Item tree takes place in Item_subselect::fix_fields.
+  */
   if (!substitution)
   {
     //first call for this unit
@@ -1333,54 +1951,113 @@ Item_in_subselect::row_value_transformer(JOIN *join)
     if (!optimizer || optimizer->fix_left(thd, 0))
     {
       thd->lex->current_select= current;
-      DBUG_RETURN(RES_ERROR);
+      DBUG_RETURN(true);
     }
 
     // we will refer to upper level cache array => we have to save it in PS
     optimizer->keep_top_level_cache();
 
     thd->lex->current_select= current;
-    if (!left_expr->const_item())
-      master_unit->uncacheable|= UNCACHEABLE_DEPENDENT;
-
-    if (!abort_on_null && left_expr->maybe_null && !pushed_cond_guards)
-    {
-      if (!(pushed_cond_guards= (bool*)join->thd->alloc(sizeof(bool) *
-                                                        left_expr->cols())))
-        DBUG_RETURN(RES_ERROR);
-      for (uint i= 0; i < cols_num; i++)
-        pushed_cond_guards[i]= TRUE;
-    }
+    /*
+      The uncacheable property controls a number of actions, e.g. whether to
+      save/restore (via init_save_join_tab/restore_tmp) the original JOIN for
+      plans with a temp table where the original JOIN was overriden by
+      make_simple_join. The UNCACHEABLE_EXPLAIN is ignored by EXPLAIN, thus
+      non-correlated subqueries will not appear as such to EXPLAIN.
+    */
+    master_unit->uncacheable|= UNCACHEABLE_EXPLAIN;
+    select_lex->uncacheable|= UNCACHEABLE_EXPLAIN;
   }
 
-  if (!left_expr->const_item())
-    select_lex->uncacheable|= UNCACHEABLE_DEPENDENT;
+  DBUG_RETURN(false);
+}
+
+
+/**
+  Create the predicates needed to transform a multi-column IN/ALL/ANY
+  subselect into a correlated EXISTS via predicate injection.
+
+  @details
+  The correlated predicates are created as follows:
+
+  - If the subquery has aggregates, GROUP BY, or HAVING, convert to
+
+    (l1, l2, l3) IN (SELECT v1, v2, v3 ... HAVING having)
+    =>
+    EXISTS (SELECT ... HAVING having and
+                              (l1 = v1 or is null v1) and
+                              (l2 = v2 or is null v2) and
+                              (l3 = v3 or is null v3) and
+                              is_not_null_test(v1) and
+                              is_not_null_test(v2) and
+                              is_not_null_test(v3))
+
+    where is_not_null_test used to register nulls in case if we have
+    not found matching to return correct NULL value.
+
+  - Otherwise (no aggregates/GROUP BY/HAVING) convert the subquery as follows:
+
+    (l1, l2, l3) IN (SELECT v1, v2, v3 ... WHERE where)
+    =>
+    EXISTS (SELECT ... WHERE where and
+                             (l1 = v1 or is null v1) and
+                             (l2 = v2 or is null v2) and
+                             (l3 = v3 or is null v3)
+                       HAVING is_not_null_test(v1) and
+                              is_not_null_test(v2) and
+                              is_not_null_test(v3))
+    where is_not_null_test registers NULLs values but reject rows.
+
+    in case when we do not need correct NULL, we have simplier construction:
+    EXISTS (SELECT ... WHERE where and
+                             (l1 = v1) and
+                             (l2 = v2) and
+                             (l3 = v3)
+
+  @param join[in]  Join object of the subquery (i.e. 'child' join).
+  @param where_item[out]   the in-to-exists addition to the where clause
+  @param having_item[out]  the in-to-exists addition to the having clause
+
+  @retval false  If the new conditions were created successfully
+  @retval true   Error
+*/
+
+bool
+Item_in_subselect::create_row_in_to_exists_cond(JOIN * join,
+                                                Item **where_item,
+                                                Item **having_item)
+{
+  SELECT_LEX *select_lex= join->select_lex;
+  uint cols_num= left_expr->cols();
+  /*
+    The non-transformed HAVING clause of 'join' may be stored in two ways
+    during JOIN::optimize: this->tmp_having= this->having; this->having= 0;
+  */
+  Item* join_having= join->having ? join->having : join->tmp_having;
+  bool is_having_used= (join_having || select_lex->with_sum_func ||
+                        select_lex->group_list.first ||
+                        !select_lex->table_list.elements);
+
+  DBUG_ENTER("Item_in_subselect::create_row_in_to_exists_cond");
+
+  *where_item= NULL;
+  *having_item= NULL;
+
   if (is_having_used)
   {
-    /*
-      (l1, l2, l3) IN (SELECT v1, v2, v3 ... HAVING having) =>
-      EXISTS (SELECT ... HAVING having and
-                                (l1 = v1 or is null v1) and
-                                (l2 = v2 or is null v2) and
-                                (l3 = v3 or is null v3) and
-                                is_not_null_test(v1) and
-                                is_not_null_test(v2) and
-                                is_not_null_test(v3))
-      where is_not_null_test used to register nulls in case if we have
-      not found matching to return correct NULL value
-      TODO: say here explicitly if the order of AND parts matters or not.
-    */
+    /* TODO: say here explicitly if the order of AND parts matters or not. */
     Item *item_having_part2= 0;
     for (uint i= 0; i < cols_num; i++)
     {
       DBUG_ASSERT((left_expr->fixed &&
-                   select_lex->ref_pointer_array[i]->fixed) ||
+
+                  select_lex->ref_pointer_array[i]->fixed) ||
                   (select_lex->ref_pointer_array[i]->type() == REF_ITEM &&
                    ((Item_ref*)(select_lex->ref_pointer_array[i]))->ref_type() ==
                     Item_ref::OUTER_REF));
       if (select_lex->ref_pointer_array[i]->
           check_cols(left_expr->element_index(i)->cols()))
-        DBUG_RETURN(RES_ERROR);
+        DBUG_RETURN(true);
       Item *item_eq=
         new Item_func_eq(new
                          Item_ref(&select_lex->context,
@@ -1392,23 +2069,21 @@ Item_in_subselect::row_value_transformer(JOIN *join)
                          Item_ref(&select_lex->context,
                                   select_lex->ref_pointer_array + i,
                                   (char *)"<no matter>",
-                                  (char *)"<list ref>")
-                        );
+                                  (char *)"<list ref>"));
       Item *item_isnull=
         new Item_func_isnull(new
                              Item_ref(&select_lex->context,
                                       select_lex->ref_pointer_array+i,
                                       (char *)"<no matter>",
-                                      (char *)"<list ref>")
-                            );
+                                      (char *)"<list ref>"));
       Item *col_item= new Item_cond_or(item_eq, item_isnull);
       if (!abort_on_null && left_expr->element_index(i)->maybe_null)
       {
         if (!(col_item= new Item_func_trig_cond(col_item, get_cond_guard(i))))
-          DBUG_RETURN(RES_ERROR);
+          DBUG_RETURN(true);
       }
-      having_item= and_items(having_item, col_item);
-      
+      *having_item= and_items(*having_item, col_item);
+
       Item *item_nnull_test= 
          new Item_is_not_null_test(this,
                                    new Item_ref(&select_lex->context,
@@ -1420,45 +2095,26 @@ Item_in_subselect::row_value_transformer(JOIN *join)
       {
         if (!(item_nnull_test= 
               new Item_func_trig_cond(item_nnull_test, get_cond_guard(i))))
-          DBUG_RETURN(RES_ERROR);
+          DBUG_RETURN(true);
       }
       item_having_part2= and_items(item_having_part2, item_nnull_test);
       item_having_part2->top_level_item();
     }
-    having_item= and_items(having_item, item_having_part2);
-    having_item->top_level_item();
+    *having_item= and_items(*having_item, item_having_part2);
   }
   else
   {
-    /*
-      (l1, l2, l3) IN (SELECT v1, v2, v3 ... WHERE where) =>
-      EXISTS (SELECT ... WHERE where and
-                               (l1 = v1 or is null v1) and
-                               (l2 = v2 or is null v2) and
-                               (l3 = v3 or is null v3)
-                         HAVING is_not_null_test(v1) and
-                                is_not_null_test(v2) and
-                                is_not_null_test(v3))
-      where is_not_null_test register NULLs values but reject rows
-
-      in case when we do not need correct NULL, we have simplier construction:
-      EXISTS (SELECT ... WHERE where and
-                               (l1 = v1) and
-                               (l2 = v2) and
-                               (l3 = v3)
-    */
-    Item *where_item= 0;
     for (uint i= 0; i < cols_num; i++)
     {
       Item *item, *item_isnull;
       DBUG_ASSERT((left_expr->fixed &&
-                   select_lex->ref_pointer_array[i]->fixed) ||
+                  select_lex->ref_pointer_array[i]->fixed) ||
                   (select_lex->ref_pointer_array[i]->type() == REF_ITEM &&
                    ((Item_ref*)(select_lex->ref_pointer_array[i]))->ref_type() ==
                     Item_ref::OUTER_REF));
       if (select_lex->ref_pointer_array[i]->
           check_cols(left_expr->element_index(i)->cols()))
-        DBUG_RETURN(RES_ERROR);
+        DBUG_RETURN(true);
       item=
         new Item_func_eq(new
                          Item_direct_ref(&select_lex->context,
@@ -1471,8 +2127,7 @@ Item_in_subselect::row_value_transformer(JOIN *join)
                                          select_lex->
                                          ref_pointer_array+i,
                                          (char *)"<no matter>",
-                                         (char *)"<list ref>")
-                        );
+                                         (char *)"<list ref>"));
       if (!abort_on_null)
       {
         Item *having_col_item=
@@ -1490,8 +2145,7 @@ Item_in_subselect::row_value_transformer(JOIN *join)
                                            select_lex->
                                            ref_pointer_array+i,
                                            (char *)"<no matter>",
-                                           (char *)"<list ref>")
-                          );
+                                           (char *)"<list ref>"));
         item= new Item_cond_or(item, item_isnull);
         /* 
           TODO: why we create the above for cases where the right part
@@ -1500,106 +2154,221 @@ Item_in_subselect::row_value_transformer(JOIN *join)
         if (left_expr->element_index(i)->maybe_null)
         {
           if (!(item= new Item_func_trig_cond(item, get_cond_guard(i))))
-            DBUG_RETURN(RES_ERROR);
+            DBUG_RETURN(true);
           if (!(having_col_item= 
                   new Item_func_trig_cond(having_col_item, get_cond_guard(i))))
-            DBUG_RETURN(RES_ERROR);
+            DBUG_RETURN(true);
         }
-        having_item= and_items(having_item, having_col_item);
+        *having_item= and_items(*having_item, having_col_item);
       }
-      where_item= and_items(where_item, item);
+      *where_item= and_items(*where_item, item);
     }
-    /*
-      AND can't be changed during fix_fields()
-      we can assign select_lex->where here, and pass 0 as last
-      argument (reference) to fix_fields()
-    */
-    select_lex->where= join->conds= and_items(join->conds, where_item);
-    select_lex->where->top_level_item();
-    if (join->conds->fix_fields(thd, 0))
-      DBUG_RETURN(RES_ERROR);
   }
-  if (having_item)
+
+  if (*where_item)
   {
-    bool res;
-    select_lex->having= join->having= and_items(join->having, having_item);
-    if (having_item == select_lex->having)
-      having_item->name= (char*)in_having_cond;
-    select_lex->having->top_level_item();
-    /*
-      AND can't be changed during fix_fields()
-      we can assign select_lex->having here, and pass 0 as last
-      argument (reference) to fix_fields()
-    */
-    select_lex->having_fix_field= 1;
-    res= join->having->fix_fields(thd, 0);
-    select_lex->having_fix_field= 0;
-    if (res)
-    {
-      DBUG_RETURN(RES_ERROR);
-    }
+    if (!(*where_item)->fixed && (*where_item)->fix_fields(thd, 0))
+      DBUG_RETURN(true);
+    (*where_item)->top_level_item();
+  }
+
+  if (*having_item)
+  {
+    if (!join_having)
+      (*having_item)->name= (char*) in_having_cond;
+    if (fix_having(*having_item, select_lex))
+      DBUG_RETURN(true);
+    (*having_item)->top_level_item();
   }
 
-  DBUG_RETURN(RES_OK);
+  DBUG_RETURN(false);
 }
 
 
-Item_subselect::trans_res
+bool
 Item_in_subselect::select_transformer(JOIN *join)
 {
-  return select_in_like_transformer(join, &eq_creator);
+  return select_in_like_transformer(join);
 }
 
 
 /**
-  Prepare IN/ALL/ANY/SOME subquery transformation and call appropriate
-  transformation function.
+  Create the predicates needed to transform an IN/ALL/ANY subselect into a
+  correlated EXISTS via predicate injection.
+
+  @param join_arg  Join object of the subquery.
+
+  @retval FALSE  ok
+  @retval TRUE   error
+*/
+
+bool Item_in_subselect::create_in_to_exists_cond(JOIN *join_arg)
+{
+  bool res;
+
+  DBUG_ASSERT(engine->engine_type() == subselect_engine::SINGLE_SELECT_ENGINE ||
+              engine->engine_type() == subselect_engine::UNION_ENGINE);
+  /*
+    TODO: the call to init_cond_guards allocates and initializes an
+    array of booleans that may not be used later because we may choose
+    materialization.
+    The two calls below to create_XYZ_cond depend on this boolean array.
+    If the dependency is removed, the call can be moved to a later phase.
+  */
+  init_cond_guards();
+  if (left_expr->cols() == 1)
+    res= create_single_in_to_exists_cond(join_arg,
+                                         &(join_arg->in_to_exists_where),
+                                         &(join_arg->in_to_exists_having));
+  else
+    res= create_row_in_to_exists_cond(join_arg,
+                                      &(join_arg->in_to_exists_where),
+                                      &(join_arg->in_to_exists_having));
+
+  /*
+    The IN=>EXISTS transformation makes non-correlated subqueries correlated.
+  */
+  if (!left_expr->const_item() || left_expr->is_expensive())
+  {
+    join_arg->select_lex->uncacheable|= UNCACHEABLE_DEPENDENT_INJECTED;
+    join_arg->select_lex->master_unit()->uncacheable|= 
+                                         UNCACHEABLE_DEPENDENT_INJECTED;
+  }
+  /*
+    The uncacheable property controls a number of actions, e.g. whether to
+    save/restore (via init_save_join_tab/restore_tmp) the original JOIN for
+    plans with a temp table where the original JOIN was overriden by
+    make_simple_join. The UNCACHEABLE_EXPLAIN is ignored by EXPLAIN, thus
+    non-correlated subqueries will not appear as such to EXPLAIN.
+  */
+  join_arg->select_lex->master_unit()->uncacheable|= UNCACHEABLE_EXPLAIN;
+  join_arg->select_lex->uncacheable|= UNCACHEABLE_EXPLAIN;
+  return (res);
+}
+
+
+/**
+  Transform an IN/ALL/ANY subselect into a correlated EXISTS via injecting
+  correlated in-to-exists predicates.
+
+  @param join_arg  Join object of the subquery.
+
+  @retval FALSE  ok
+  @retval TRUE   error
+*/
+
+bool Item_in_subselect::inject_in_to_exists_cond(JOIN *join_arg)
+{
+  SELECT_LEX *select_lex= join_arg->select_lex;
+  Item *where_item= join_arg->in_to_exists_where;
+  Item *having_item= join_arg->in_to_exists_having;
+
+  DBUG_ENTER("Item_in_subselect::inject_in_to_exists_cond");
 
-    To decide which transformation procedure (scalar or row) applicable here
-    we have to call fix_fields() for left expression to be able to call
-    cols() method on it. Also this method make arena management for
-    underlying transformation methods.
+  if (where_item)
+  {
+    List<Item> *and_args= NULL;
+    /*
+      If the top-level Item of the WHERE clause is an AND, detach the multiple
+      equality list that was attached to the end of the AND argument list by
+      build_equal_items_for_cond(). The multiple equalities must be detached
+      because fix_fields merges lower level AND arguments into the upper AND.
+      As a result, the arguments from lower-level ANDs are concatenated after
+      the multiple equalities. When the multiple equality list is treated as
+      such, it turns out that it contains non-Item_equal object which is wrong.
+    */
+    if (join_arg->conds && join_arg->conds->type() == Item::COND_ITEM &&
+        ((Item_cond*) join_arg->conds)->functype() == Item_func::COND_AND_FUNC)
+    {
+      and_args= ((Item_cond*) join_arg->conds)->argument_list();
+      if (join_arg->cond_equal)
+        and_args->disjoin((List<Item> *) &join_arg->cond_equal->current_level);
+    }
+
+    where_item= and_items(join_arg->conds, where_item);
+    if (!where_item->fixed && where_item->fix_fields(thd, 0))
+      DBUG_RETURN(true);
+    // TIMOUR TODO: call optimize_cond() for the new where clause
+    thd->change_item_tree(&select_lex->where, where_item);
+    select_lex->where->top_level_item();
+    join_arg->conds= select_lex->where;
+
+    /* Attach back the list of multiple equalities to the new top-level AND. */
+    if (and_args && join_arg->cond_equal)
+    {
+      /* The argument list of the top-level AND may change after fix fields. */
+      and_args= ((Item_cond*) join_arg->conds)->argument_list();
+      List_iterator<Item_equal> li(join_arg->cond_equal->current_level);
+      Item_equal *elem;
+      while ((elem= li++))
+      {
+        and_args->push_back(elem);
+      }
+    }
+  }
+
+  if (having_item)
+  {
+    Item* join_having= join_arg->having ? join_arg->having:join_arg->tmp_having;
+    having_item= and_items(join_having, having_item);
+    if (fix_having(having_item, select_lex))
+      DBUG_RETURN(true);
+    // TIMOUR TODO: call optimize_cond() for the new having clause
+    thd->change_item_tree(&select_lex->having, having_item);
+    select_lex->having->top_level_item();
+    join_arg->having= select_lex->having;
+  }
+  join_arg->thd->change_item_tree(&unit->global_parameters->select_limit,
+                                  new Item_int((int32) 1));
+  unit->select_limit_cnt= 1;
+
+  DBUG_RETURN(false);
+}
+
+
+/**
+  Prepare IN/ALL/ANY/SOME subquery transformation and call the appropriate
+  transformation function.
 
   @param join    JOIN object of transforming subquery
-  @param func    creator of condition function of subquery
 
-  @retval
-    RES_OK      OK
-  @retval
-    RES_REDUCE  OK, and current subquery was reduced during
-    transformation
-  @retval
-    RES_ERROR   Error
+  @notes
+  To decide which transformation procedure (scalar or row) applicable here
+  we have to call fix_fields() for the left expression to be able to call
+  cols() method on it. Also this method makes arena management for
+  underlying transformation methods.
+
+  @retval  false  OK
+  @retval  true   Error
 */
 
-Item_subselect::trans_res
-Item_in_subselect::select_in_like_transformer(JOIN *join, Comp_creator *func)
+bool
+Item_in_subselect::select_in_like_transformer(JOIN *join)
 {
   Query_arena *arena, backup;
   SELECT_LEX *current= thd->lex->current_select;
   const char *save_where= thd->where;
-  Item_subselect::trans_res res= RES_ERROR;
+  bool trans_res= true;
   bool result;
 
   DBUG_ENTER("Item_in_subselect::select_in_like_transformer");
 
+  /*
+    IN/SOME/ALL/ANY subqueries aren't support LIMIT clause. Without it
+    ORDER BY clause becomes meaningless thus we drop it here.
+  */
+  for (SELECT_LEX *sl= current->master_unit()->first_select();
+       sl; sl= sl->next_select())
   {
-    /*
-      IN/SOME/ALL/ANY subqueries aren't support LIMIT clause. Without it
-      ORDER BY clause becomes meaningless thus we drop it here.
-    */
-    SELECT_LEX *sl= current->master_unit()->first_select();
-    for (; sl; sl= sl->next_select())
+    if (sl->join)
     {
-      if (sl->join)
-        sl->join->order= 0;
+      sl->join->order= 0;
+      sl->join->skip_sort_order= 1;
     }
   }
 
   if (changed)
-  {
-    DBUG_RETURN(RES_OK);
-  }
+    DBUG_RETURN(false);
 
   thd->where= "IN/ALL/ANY subquery";
 
@@ -1607,6 +2376,9 @@ Item_in_subselect::select_in_like_transformer(JOIN *join, Comp_creator *func)
     In some optimisation cases we will not need this Item_in_optimizer
     object, but we can't know it here, but here we need address correct
     reference on left expresion.
+
+    note: we won't need Item_in_optimizer when handling degenerate cases
+    like "... IN (SELECT 1)"
   */
   if (!optimizer)
   {
@@ -1628,9 +2400,6 @@ Item_in_subselect::select_in_like_transformer(JOIN *join, Comp_creator *func)
   if (result)
     goto err;
 
-  transformed= 1;
-  arena= thd->activate_stmt_arena_if_needed(&backup);
-
   /*
     Both transformers call fix_fields() only for Items created inside them,
     and all that items do not make permanent changes in current item arena
@@ -1638,8 +2407,9 @@ Item_in_subselect::select_in_like_transformer(JOIN *join, Comp_creator *func)
     of Item, we have to call fix_fields() for it only with original arena to
     avoid memory leack)
   */
+  arena= thd->activate_stmt_arena_if_needed(&backup);
   if (left_expr->cols() == 1)
-    res= single_value_transformer(join, func);
+    trans_res= single_value_transformer(join);
   else
   {
     /* we do not support row operation for ALL/ANY/SOME */
@@ -1648,21 +2418,21 @@ Item_in_subselect::select_in_like_transformer(JOIN *join, Comp_creator *func)
       if (arena)
         thd->restore_active_arena(arena, &backup);
       my_error(ER_OPERAND_COLUMNS, MYF(0), 1);
-      DBUG_RETURN(RES_ERROR);
+      DBUG_RETURN(true);
     }
-    res= row_value_transformer(join);
+    trans_res= row_value_transformer(join);
   }
   if (arena)
     thd->restore_active_arena(arena, &backup);
 err:
   thd->where= save_where;
-  DBUG_RETURN(res);
+  DBUG_RETURN(trans_res);
 }
 
 
 void Item_in_subselect::print(String *str, enum_query_type query_type)
 {
-  if (transformed)
+  if (test_strategy(SUBS_IN_TO_EXISTS))
     str->append(STRING_WITH_LEN("<exists>"));
   else
   {
@@ -1675,29 +2445,196 @@ void Item_in_subselect::print(String *str, enum_query_type query_type)
 
 bool Item_in_subselect::fix_fields(THD *thd_arg, Item **ref)
 {
-  bool result = 0;
-  
+  uint outer_cols_num;
+  List<Item> *inner_cols;
+
+  if (test_strategy(SUBS_SEMI_JOIN))
+    return !( (*ref)= new Item_int(1));
+
+  /*
+    Check if the outer and inner IN operands match in those cases when we
+    will not perform IN=>EXISTS transformation. Currently this is when we
+    use subquery materialization.
+
+    The condition below is true when this method was called recursively from
+    inside JOIN::prepare for the JOIN object created by the call chain
+    Item_subselect::fix_fields -> subselect_single_select_engine::prepare,
+    which creates a JOIN object for the subquery and calls JOIN::prepare for
+    the JOIN of the subquery.
+    Notice that in some cases, this doesn't happen, and the check_cols()
+    test for each Item happens later in
+    Item_in_subselect::row_value_in_to_exists_transformer.
+    The reason for this mess is that our JOIN::prepare phase works top-down
+    instead of bottom-up, so we first do name resoluton and semantic checks
+    for the outer selects, then for the inner.
+  */
+  if (engine &&
+      engine->engine_type() == subselect_engine::SINGLE_SELECT_ENGINE &&
+      ((subselect_single_select_engine*)engine)->join)
+  {
+    outer_cols_num= left_expr->cols();
+
+    if (unit->is_union())
+      inner_cols= &(unit->types);
+    else
+      inner_cols= &(unit->first_select()->item_list);
+    if (outer_cols_num != inner_cols->elements)
+    {
+      my_error(ER_OPERAND_COLUMNS, MYF(0), outer_cols_num);
+      return TRUE;
+    }
+    if (outer_cols_num > 1)
+    {
+      List_iterator<Item> inner_col_it(*inner_cols);
+      Item *inner_col;
+      for (uint i= 0; i < outer_cols_num; i++)
+      {
+        inner_col= inner_col_it++;
+        if (inner_col->check_cols(left_expr->element_index(i)->cols()))
+          return TRUE;
+      }
+    }
+  }
+
   if ((thd_arg->lex->context_analysis_only & CONTEXT_ANALYSIS_ONLY_VIEW) &&
-      left_expr && !left_expr->fixed)
-    result = left_expr->fix_fields(thd_arg, &left_expr);
+      left_expr && !left_expr->fixed &&
+      left_expr->fix_fields(thd_arg, &left_expr))
+    return TRUE;
+  else
+  if (Item_subselect::fix_fields(thd_arg, ref))
+    return TRUE;
+  fixed= TRUE;
+  return FALSE;
+}
+
+
+void Item_in_subselect::fix_after_pullout(st_select_lex *new_parent, Item **ref)
+{
+  left_expr->fix_after_pullout(new_parent, &left_expr);
+  Item_subselect::fix_after_pullout(new_parent, ref);
+  used_tables_cache |= left_expr->used_tables();
+}
+
+void Item_in_subselect::update_used_tables()
+{
+  Item_subselect::update_used_tables();
+  left_expr->update_used_tables();
+  used_tables_cache |= left_expr->used_tables();
+}
+
+
+/**
+  Try to create and initialize an engine to compute a subselect via
+  materialization.
+
+  @details
+  The method creates a new engine for materialized execution, and initializes
+  the engine. The initialization may fail
+  - either because it wasn't possible to create the needed temporary table
+    and its index,
+  - or because of a memory allocation error,
+
+  @returns
+    @retval TRUE  memory allocation error occurred
+    @retval FALSE an execution method was chosen successfully
+*/
 
-  return result || Item_subselect::fix_fields(thd_arg, ref);
+bool Item_in_subselect::setup_mat_engine()
+{
+  subselect_hash_sj_engine       *mat_engine= NULL;
+  subselect_single_select_engine *select_engine;
+
+  DBUG_ENTER("Item_in_subselect::setup_mat_engine");
+
+  /*
+    The select_engine (that executes transformed IN=>EXISTS subselects) is
+    pre-created at parse time, and is stored in statment memory (preserved
+    across PS executions).
+  */
+  DBUG_ASSERT(engine->engine_type() == subselect_engine::SINGLE_SELECT_ENGINE);
+  select_engine= (subselect_single_select_engine*) engine;
+
+  /* Create/initialize execution objects. */
+  if (!(mat_engine= new subselect_hash_sj_engine(thd, this, select_engine)))
+    DBUG_RETURN(TRUE);
+
+  if (mat_engine->init(&select_engine->join->fields_list,
+                       engine->get_identifier()))
+    DBUG_RETURN(TRUE);
+
+  engine= mat_engine;
+  DBUG_RETURN(FALSE);
 }
 
 
-Item_subselect::trans_res
+/**
+  Initialize the cache of the left operand of the IN predicate.
+
+  @note This method has the same purpose as alloc_group_fields(),
+  but it takes a different kind of collection of items, and the
+  list we push to is dynamically allocated.
+
+  @retval TRUE  if a memory allocation error occurred or the cache is
+                not applicable to the current query
+  @retval FALSE if success
+*/
+
+bool Item_in_subselect::init_left_expr_cache()
+{
+  JOIN *outer_join;
+
+  outer_join= unit->outer_select()->join;
+  /*
+    An IN predicate might be evaluated in a query for which all tables have
+    been optimzied away.
+  */ 
+  if (!outer_join || !outer_join->table_count || !outer_join->tables_list)
+    return TRUE;
+
+  if (!(left_expr_cache= new List<Cached_item>))
+    return TRUE;
+
+  for (uint i= 0; i < left_expr->cols(); i++)
+  {
+    Cached_item *cur_item_cache= new_Cached_item(thd,
+                                                 left_expr->element_index(i),
+                                                 FALSE);
+    if (!cur_item_cache || left_expr_cache->push_front(cur_item_cache))
+      return TRUE;
+  }
+  return FALSE;
+}
+
+
+bool Item_in_subselect::init_cond_guards()
+{
+  uint cols_num= left_expr->cols();
+  if (!abort_on_null && left_expr->maybe_null && !pushed_cond_guards)
+  {
+    if (!(pushed_cond_guards= (bool*)thd->alloc(sizeof(bool) * cols_num)))
+        return TRUE;
+    for (uint i= 0; i < cols_num; i++)
+      pushed_cond_guards[i]= TRUE;
+  }
+  return FALSE;
+}
+
+
+bool
 Item_allany_subselect::select_transformer(JOIN *join)
 {
-  transformed= 1;
+  DBUG_ENTER("Item_allany_subselect::select_transformer");
+  DBUG_ASSERT((in_strategy & ~(SUBS_MAXMIN_INJECTED | SUBS_MAXMIN_ENGINE |
+                               SUBS_IN_TO_EXISTS | SUBS_STRATEGY_CHOSEN)) == 0);
   if (upper_item)
     upper_item->show= 1;
-  return select_in_like_transformer(join, func);
+  DBUG_RETURN(select_in_like_transformer(join));
 }
 
 
 void Item_allany_subselect::print(String *str, enum_query_type query_type)
 {
-  if (transformed)
+  if (test_strategy(SUBS_IN_TO_EXISTS))
     str->append(STRING_WITH_LEN("<exists>"));
   else
   {
@@ -1719,23 +2656,28 @@ void subselect_engine::set_thd(THD *thd_arg)
 
 
 subselect_single_select_engine::
-subselect_single_select_engine(st_select_lex *select,
-			       select_subselect *result_arg,
+subselect_single_select_engine(THD *thd_arg, st_select_lex *select,
+			       select_result_interceptor *result_arg,
 			       Item_subselect *item_arg)
-  :subselect_engine(item_arg, result_arg),
-   prepared(0), optimized(0), executed(0), optimize_error(0),
+  :subselect_engine(thd_arg, item_arg, result_arg),
+   prepared(0), executed(0), optimize_error(0),
    select_lex(select), join(0)
 {
   select_lex->master_unit()->item= item_arg;
 }
 
+int subselect_single_select_engine::get_identifier()
+{
+  return select_lex->select_number; 
+}
 
 void subselect_single_select_engine::cleanup()
 {
   DBUG_ENTER("subselect_single_select_engine::cleanup");
-  prepared= optimized= executed= optimize_error= 0;
+  prepared= executed= optimize_error= 0;
   join= 0;
   result->cleanup();
+  select_lex->uncacheable&= ~UNCACHEABLE_DEPENDENT_INJECTED;
   DBUG_VOID_RETURN;
 }
 
@@ -1745,6 +2687,9 @@ void subselect_union_engine::cleanup()
   DBUG_ENTER("subselect_union_engine::cleanup");
   unit->reinit_exec_mechanism();
   result->cleanup();
+  unit->uncacheable&= ~UNCACHEABLE_DEPENDENT_INJECTED;
+  for (SELECT_LEX *sl= unit->first_select(); sl; sl= sl->next_select())
+    sl->uncacheable&= ~UNCACHEABLE_DEPENDENT_INJECTED;
   DBUG_VOID_RETURN;
 }
 
@@ -1776,21 +2721,21 @@ bool subselect_union_engine::no_rows()
   return test(!unit->fake_select_lex->join->send_records);
 }
 
+
 void subselect_uniquesubquery_engine::cleanup()
 {
   DBUG_ENTER("subselect_uniquesubquery_engine::cleanup");
-  /*
-    subselect_uniquesubquery_engine have not 'result' assigbed, so we do not
-    cleanup() it
-  */
+  /* Tell handler we don't need the index anymore */
+  if (tab->table->file->inited)
+    tab->table->file->ha_index_end();
   DBUG_VOID_RETURN;
 }
 
 
-subselect_union_engine::subselect_union_engine(st_select_lex_unit *u,
-					       select_subselect *result_arg,
+subselect_union_engine::subselect_union_engine(THD *thd_arg, st_select_lex_unit *u,
+					       select_result_interceptor *result_arg,
 					       Item_subselect *item_arg)
-  :subselect_engine(item_arg, result_arg)
+  :subselect_engine(thd_arg, item_arg, result_arg)
 {
   unit= u;
   if (!result_arg)				//out of memory
@@ -1799,6 +2744,32 @@ subselect_union_engine::subselect_union_engine(st_select_lex_unit *u,
 }
 
 
+/**
+  Create and prepare the JOIN object that represents the query execution
+  plan for the subquery.
+
+  @details
+  This method is called from Item_subselect::fix_fields. For prepared
+  statements it is called both during the PREPARE and EXECUTE phases in the
+  following ways:
+  - During PREPARE the optimizer needs some properties
+    (join->fields_list.elements) of the JOIN to proceed with preparation of
+    the remaining query (namely to complete ::fix_fields for the subselect
+    related classes. In the end of PREPARE the JOIN is deleted.
+  - When we EXECUTE the query, Item_subselect::fix_fields is called again, and
+    the JOIN object is re-created again, prepared and executed. In the end of
+    execution it is deleted.
+  In all cases the JOIN is created in runtime memory (not in the permanent
+  memory root).
+
+  @todo
+  Re-check what properties of 'join' are needed during prepare, and see if
+  we can avoid creating a JOIN during JOIN::prepare of the outer join.
+
+  @retval 0  if success
+  @retval 1  if error
+*/
+
 int subselect_single_select_engine::prepare()
 {
   if (prepared)
@@ -1840,8 +2811,8 @@ int subselect_union_engine::prepare()
 
 int subselect_uniquesubquery_engine::prepare()
 {
-  //this never should be called
-  DBUG_ASSERT(0);
+  /* Should never be called. */
+  DBUG_ASSERT(FALSE);
   return 1;
 }
 
@@ -1888,7 +2859,7 @@ void subselect_engine::set_row(List<Item> &item_list, Item_cache **row)
     if (!(row[i]= Item_cache::get_cache(sel_item)))
       return;
     row[i]->setup(sel_item);
-    row[i]->store(sel_item);
+ //psergey-backport-timours:   row[i]->store(sel_item);
   }
   if (item_list.elements > 1)
     res_type= ROW_RESULT;
@@ -1940,19 +2911,16 @@ int subselect_single_select_engine::exec()
   char const *save_where= thd->where;
   SELECT_LEX *save_select= thd->lex->current_select;
   thd->lex->current_select= select_lex;
-  if (!optimized)
+
+  if (!join->optimized)
   {
     SELECT_LEX_UNIT *unit= select_lex->master_unit();
 
-    DBUG_EXECUTE_IF("bug11747970_simulate_error",
-                    DBUG_SET("+d,bug11747970_raise_error"););
-
-    optimized= 1;
     unit->set_limit(unit->global_parameters);
     if (join->optimize())
     {
       thd->where= save_where;
-      optimize_error= 1;
+      executed= optimize_error= 1;
       thd->lex->current_select= save_select;
       DBUG_RETURN(join->error ? join->error : 1);
     }
@@ -2011,9 +2979,9 @@ int subselect_single_select_engine::exec()
         pushed down into the subquery. Those optimizations are ref[_or_null]
         acceses. Change them to be full table scans.
       */
-      for (uint i=join->const_tables ; i < join->tables ; i++)
+      for (JOIN_TAB *tab= first_linear_tab(join, WITHOUT_CONST_TABLES); tab;
+           tab= next_linear_tab(join, tab, WITH_BUSH_ROOTS))
       {
-        JOIN_TAB *tab=join->join_tab+i;
         if (tab && tab->keyuse)
         {
           for (uint i= 0; i < tab->ref.key_parts; i++)
@@ -2051,7 +3019,7 @@ int subselect_single_select_engine::exec()
     executed= 1;
     thd->where= save_where;
     thd->lex->current_select= save_select;
-    DBUG_RETURN(join->error||thd->is_fatal_error);
+    DBUG_RETURN(join->error || thd->is_fatal_error || thd->is_error());
   }
   thd->where= save_where;
   thd->lex->current_select= save_select;
@@ -2102,14 +3070,23 @@ int subselect_uniquesubquery_engine::scan_table()
   for (;;)
   {
     error=table->file->ha_rnd_next(table->record[0]);
-    if (error && error != HA_ERR_END_OF_FILE)
-    {
-      error= report_error(table, error);
-      break;
+    if (error) {
+      if (error == HA_ERR_RECORD_DELETED)
+      {
+        error= 0;
+        continue;
+      }
+      if (error == HA_ERR_END_OF_FILE)
+      {
+        error= 0;
+        break;
+      }
+      else
+      {
+        error= report_error(table, error);
+        break;
+      }
     }
-    /* No more rows */
-    if (table->status)
-      break;
 
     if (!cond || cond->val_int())
     {
@@ -2222,6 +3199,56 @@ bool subselect_uniquesubquery_engine::copy_ref_key()
 
 
 /*
+  @retval  1  A NULL was found in the outer reference, index lookup is
+              not applicable, the outer ref is unsusable as a lookup key,
+              use some other method to find a match.
+  @retval  0  The outer ref was copied into an index lookup key.
+  @retval -1  The outer ref cannot possibly match any row, IN is FALSE.
+*/
+/* TIMOUR: this method is a variant of copy_ref_key(), needs refactoring. */
+
+int subselect_uniquesubquery_engine::copy_ref_key_simple()
+{
+  for (store_key **copy= tab->ref.key_copy ; *copy ; copy++)
+  {
+    enum store_key::store_key_result store_res;
+    store_res= (*copy)->copy();
+    tab->ref.key_err= store_res;
+
+    /*
+      When there is a NULL part in the key we don't need to make index
+      lookup for such key thus we don't need to copy whole key.
+      If we later should do a sequential scan return OK. Fail otherwise.
+
+      See also the comment for the subselect_uniquesubquery_engine::exec()
+      function.
+    */
+    null_keypart= (*copy)->null_key;
+    if (null_keypart)
+      return 1;
+
+    /*
+      Check if the error is equal to STORE_KEY_FATAL. This is not expressed 
+      using the store_key::store_key_result enum because ref.key_err is a 
+      boolean and we want to detect both TRUE and STORE_KEY_FATAL from the 
+      space of the union of the values of [TRUE, FALSE] and 
+      store_key::store_key_result.  
+      TODO: fix the variable an return types.
+    */
+    if (store_res == store_key::STORE_KEY_FATAL)
+    {
+      /*
+       Error converting the left IN operand to the column type of the right
+       IN operand. 
+      */
+      return -1;
+    }
+  }
+  return 0;
+}
+
+
+/*
   Execute subselect
 
   SYNOPSIS
@@ -2261,7 +3288,13 @@ int subselect_uniquesubquery_engine::exec()
  
   /* TODO: change to use of 'full_scan' here? */
   if (copy_ref_key())
+  {
+    /*
+      TIMOUR: copy_ref_key() == 1 means NULL result, not error, why return 1?
+      Check who reiles on this result.
+    */
     DBUG_RETURN(1);
+  }
   if (table->status)
   {
     /* 
@@ -2272,6 +3305,9 @@ int subselect_uniquesubquery_engine::exec()
     DBUG_RETURN(0);
   }
 
+  if (!tab->preread_init_done && tab->preread_init())
+    DBUG_RETURN(1);
+
   if (null_keypart)
     DBUG_RETURN(scan_table());
  
@@ -2302,10 +3338,51 @@ int subselect_uniquesubquery_engine::exec()
 }
 
 
+/*
+  TIMOUR: write comment
+*/
+
+int subselect_uniquesubquery_engine::index_lookup()
+{
+  DBUG_ENTER("subselect_uniquesubquery_engine::index_lookup");
+  int error;
+  TABLE *table= tab->table;
+ 
+  if (!table->file->inited)
+    table->file->ha_index_init(tab->ref.key, 0);
+  error= table->file->ha_index_read_map(table->record[0],
+                                        tab->ref.key_buff,
+                                        make_prev_keypart_map(tab->
+                                                              ref.key_parts),
+                                        HA_READ_KEY_EXACT);
+  DBUG_PRINT("info", ("lookup result: %i", error));
+
+  if (error && error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE)
+  {
+    /*
+      TIMOUR: I don't understand at all when do we need to call report_error.
+      In most places where we access an index, we don't do this. Why here?
+    */
+    error= report_error(table, error);
+    DBUG_RETURN(error);
+  }
+
+  table->null_row= 0;
+  if (!error && (!cond || cond->val_int()))
+    ((Item_in_subselect *) item)->value= 1;
+  else
+    ((Item_in_subselect *) item)->value= 0;
+
+  DBUG_RETURN(0);
+}
+
+
+
 subselect_uniquesubquery_engine::~subselect_uniquesubquery_engine()
 {
   /* Tell handler we don't need the index anymore */
-  tab->table->file->ha_index_end();
+  //psergey-merge-todo: the following was gone in 6.0:
+ //psergey-merge: don't need this after all: tab->table->file->ha_index_end();
 }
 
 
@@ -2313,7 +3390,7 @@ subselect_uniquesubquery_engine::~subselect_uniquesubquery_engine()
   Index-lookup subselect 'engine' - run the subquery
 
   SYNOPSIS
-    subselect_uniquesubquery_engine:exec()
+    subselect_indexsubquery_engine:exec()
       full_scan 
 
   DESCRIPTION
@@ -2363,7 +3440,7 @@ subselect_uniquesubquery_engine::~subselect_uniquesubquery_engine()
 
 int subselect_indexsubquery_engine::exec()
 {
-  DBUG_ENTER("subselect_indexsubquery_engine::exec");
+  DBUG_ENTER("subselect_indexsubquery_engine");
   int error;
   bool null_finding= 0;
   TABLE *table= tab->table;
@@ -2394,6 +3471,9 @@ int subselect_indexsubquery_engine::exec()
     DBUG_RETURN(0);
   }
 
+  if (!tab->preread_init_done && tab->preread_init())
+    DBUG_RETURN(1);
+
   if (null_keypart)
     DBUG_RETURN(scan_table());
 
@@ -2451,8 +3531,10 @@ int subselect_indexsubquery_engine::exec()
 
 uint subselect_single_select_engine::cols()
 {
-  DBUG_ASSERT(select_lex->join != 0); // should be called after fix_fields()
-  return select_lex->join->fields_list.elements;
+  //psergey-sj-backport: the following assert was gone in 6.0:
+  //DBUG_ASSERT(select_lex->join != 0); // should be called after fix_fields()
+  //return select_lex->join->fields_list.elements;
+  return select_lex->item_list.elements;
 }
 
 
@@ -2493,10 +3575,13 @@ void subselect_uniquesubquery_engine::exclude()
 }
 
 
-table_map subselect_engine::calc_const_tables(TABLE_LIST *table)
+table_map subselect_engine::calc_const_tables(List<TABLE_LIST> &list)
 {
   table_map map= 0;
-  for (; table; table= table->next_leaf)
+  List_iterator<TABLE_LIST> ti(list);
+  TABLE_LIST *table;
+  //for (; table; table= table->next_leaf)
+  while ((table= ti++))
   {
     TABLE *tbl= table->table;
     if (tbl && tbl->const_table)
@@ -2534,10 +3619,20 @@ void subselect_union_engine::print(String *str, enum_query_type query_type)
 void subselect_uniquesubquery_engine::print(String *str,
                                             enum_query_type query_type)
 {
+  char *table_name= tab->table->s->table_name.str;
   str->append(STRING_WITH_LEN("<primary_index_lookup>("));
   tab->ref.items[0]->print(str, query_type);
   str->append(STRING_WITH_LEN(" in "));
-  str->append(tab->table->s->table_name.str, tab->table->s->table_name.length);
+  if (tab->table->s->table_category == TABLE_CATEGORY_TEMPORARY)
+  {
+    /*
+      Temporary tables' names change across runs, so they can't be used for
+      EXPLAIN EXTENDED.
+    */
+    str->append(STRING_WITH_LEN("<temporary table>"));
+  }
+  else
+    str->append(table_name, tab->table->s->table_name.length);
   KEY *key_info= tab->table->key_info+ tab->ref.key;
   str->append(STRING_WITH_LEN(" on "));
   str->append(key_info->name);
@@ -2549,6 +3644,29 @@ void subselect_uniquesubquery_engine::print(String *str,
   str->append(')');
 }
 
+/*
+TODO:
+The above ::print method should be changed as below. Do it after
+all other tests pass.
+
+void subselect_uniquesubquery_engine::print(String *str)
+{
+  KEY *key_info= tab->table->key_info + tab->ref.key;
+  str->append(STRING_WITH_LEN("<primary_index_lookup>("));
+  for (uint i= 0; i < key_info->key_parts; i++)
+    tab->ref.items[i]->print(str);
+  str->append(STRING_WITH_LEN(" in "));
+  str->append(tab->table->s->table_name.str, tab->table->s->table_name.length);
+  str->append(STRING_WITH_LEN(" on "));
+  str->append(key_info->name);
+  if (cond)
+  {
+    str->append(STRING_WITH_LEN(" where "));
+    cond->print(str);
+  }
+  str->append(')');
+}
+*/
 
 void subselect_indexsubquery_engine::print(String *str,
                                            enum_query_type query_type)
@@ -2580,6 +3698,7 @@ void subselect_indexsubquery_engine::print(String *str,
 
   @param si		new subselect Item
   @param res		new select_result object
+  @param temp           temporary assignment
 
   @retval
     FALSE OK
@@ -2587,12 +3706,32 @@ void subselect_indexsubquery_engine::print(String *str,
     TRUE  error
 */
 
-bool subselect_single_select_engine::change_result(Item_subselect *si,
-                                                 select_subselect *res)
+bool
+subselect_single_select_engine::change_result(Item_subselect *si,
+                                              select_result_interceptor *res,
+                                              bool temp)
 {
   item= si;
-  result= res;
-  return select_lex->join->change_result(result);
+  if (temp)
+  {
+    /*
+      Here we reuse change_item_tree to roll back assignment.  It has
+      nothing special about Item* pointer so it is safe conversion. We do
+      not change the interface to be compatible with MySQL.
+    */
+    thd->change_item_tree((Item**) &result, (Item*)res);
+  }
+  else
+    result= res;
+
+  /*
+    We can't use 'result' below as gcc 4.2.4's alias optimization
+    assumes that result was not changed by thd->change_item_tree().
+    I tried to find a solution to make gcc happy, but could not find anything
+    that would not require a lot of extra code that would be harder to manage
+    than the current code.
+  */
+  return select_lex->join->change_result(res);
 }
 
 
@@ -2609,11 +3748,15 @@ bool subselect_single_select_engine::change_result(Item_subselect *si,
 */
 
 bool subselect_union_engine::change_result(Item_subselect *si,
-                                         select_subselect *res)
+                                           select_result_interceptor *res,
+                                           bool temp)
 {
   item= si;
   int rc= unit->change_result(res, result);
-  result= res;
+  if (temp)
+    thd->change_item_tree((Item**) &result, (Item*)res);
+  else
+    result= res;
   return rc;
 }
 
@@ -2630,8 +3773,11 @@ bool subselect_union_engine::change_result(Item_subselect *si,
     TRUE  error
 */
 
-bool subselect_uniquesubquery_engine::change_result(Item_subselect *si,
-                                                  select_subselect *res)
+bool
+subselect_uniquesubquery_engine::change_result(Item_subselect *si,
+                                               select_result_interceptor *res,
+                                               bool temp
+                                               __attribute__((unused)))
 {
   DBUG_ASSERT(0);
   return TRUE;
@@ -2699,5 +3845,2080 @@ bool subselect_union_engine::no_tables()
 bool subselect_uniquesubquery_engine::no_tables()
 {
   /* returning value is correct, but this method should never be called */
+  DBUG_ASSERT(FALSE);
   return 0;
 }
+
+
+/******************************************************************************
+  WL#1110 - Implementation of class subselect_hash_sj_engine
+******************************************************************************/
+
+
+/**
+  Check if an IN predicate should be executed via partial matching using
+  only schema information.
+
+  @details
+  This test essentially has three results:
+  - partial matching is applicable, but cannot be executed due to a
+    limitation in the total number of indexes, as a result we can't
+    use subquery materialization at all.
+  - partial matching is either applicable or not, and this can be
+    determined by looking at 'this->max_keys'.
+  If max_keys > 1, then we need partial matching because there are
+  more indexes than just the one we use during materialization to
+  remove duplicates.
+
+  @note
+  TIMOUR: The schema-based analysis for partial matching can be done once for
+  prepared statement and remembered. It is done here to remove the need to
+  save/restore all related variables between each re-execution, thus making
+  the code simpler.
+
+  @retval PARTIAL_MATCH  if a partial match should be used
+  @retval COMPLETE_MATCH if a complete match (index lookup) should be used
+*/
+
+subselect_hash_sj_engine::exec_strategy
+subselect_hash_sj_engine::get_strategy_using_schema()
+{
+  Item_in_subselect *item_in= (Item_in_subselect *) item;
+
+  if (item_in->is_top_level_item())
+    return COMPLETE_MATCH;
+  else
+  {
+    List_iterator<Item> inner_col_it(*item_in->unit->get_unit_column_types());
+    Item *outer_col, *inner_col;
+
+    for (uint i= 0; i < item_in->left_expr->cols(); i++)
+    {
+      outer_col= item_in->left_expr->element_index(i);
+      inner_col= inner_col_it++;
+
+      if (!inner_col->maybe_null && !outer_col->maybe_null)
+        bitmap_set_bit(&non_null_key_parts, i);
+      else
+      {
+        bitmap_set_bit(&partial_match_key_parts, i);
+        ++count_partial_match_columns;
+      }
+    }
+  }
+
+  /* If no column contains NULLs use regular hash index lookups. */
+  if (count_partial_match_columns)
+    return PARTIAL_MATCH;
+  return COMPLETE_MATCH;
+}
+
+
+/**
+  Test whether an IN predicate must be computed via partial matching
+  based on the NULL statistics for each column of a materialized subquery.
+
+  @details The procedure analyzes column NULL statistics, updates the
+  matching type of columns that cannot be NULL or that contain only NULLs.
+  Based on this, the procedure determines the final execution strategy for
+  the [NOT] IN predicate.
+
+  @retval PARTIAL_MATCH  if a partial match should be used
+  @retval COMPLETE_MATCH if a complete match (index lookup) should be used
+*/
+
+subselect_hash_sj_engine::exec_strategy
+subselect_hash_sj_engine::get_strategy_using_data()
+{
+  Item_in_subselect *item_in= (Item_in_subselect *) item;
+  select_materialize_with_stats *result_sink=
+    (select_materialize_with_stats *) result;
+  Item *outer_col;
+
+  /*
+    If we already determined that a complete match is enough based on schema
+    information, nothing can be better.
+  */
+  if (strategy == COMPLETE_MATCH)
+    return COMPLETE_MATCH;
+
+  for (uint i= 0; i < item_in->left_expr->cols(); i++)
+  {
+    if (!bitmap_is_set(&partial_match_key_parts, i))
+      continue;
+    outer_col= item_in->left_expr->element_index(i);
+    /*
+      If column 'i' doesn't contain NULLs, and the corresponding outer reference
+      cannot have a NULL value, then 'i' is a non-nullable column.
+    */
+    if (result_sink->get_null_count_of_col(i) == 0 && !outer_col->maybe_null)
+    {
+      bitmap_clear_bit(&partial_match_key_parts, i);
+      bitmap_set_bit(&non_null_key_parts, i);
+      --count_partial_match_columns;
+    }
+    if (result_sink->get_null_count_of_col(i) == tmp_table->file->stats.records)
+      ++count_null_only_columns;
+    if (result_sink->get_null_count_of_col(i))
+      ++count_columns_with_nulls;
+  }
+
+  /* If no column contains NULLs use regular hash index lookups. */
+  if (!count_partial_match_columns)
+    return COMPLETE_MATCH;
+  return PARTIAL_MATCH;
+}
+
+
+void
+subselect_hash_sj_engine::choose_partial_match_strategy(
+  bool has_non_null_key, bool has_covering_null_row,
+  MY_BITMAP *partial_match_key_parts)
+{
+  ulonglong pm_buff_size;
+
+  DBUG_ASSERT(strategy == PARTIAL_MATCH);
+  /*
+    Choose according to global optimizer switch. If only one of the switches is
+    'ON', then the remaining strategy is the only possible one. The only cases
+    when this will be overriden is when the total size of all buffers for the
+    merge strategy is bigger than the 'rowid_merge_buff_size' system variable,
+    or if there isn't enough physical memory to allocate the buffers.
+  */
+  if (!optimizer_flag(thd, OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE) &&
+       optimizer_flag(thd, OPTIMIZER_SWITCH_PARTIAL_MATCH_TABLE_SCAN))
+    strategy= PARTIAL_MATCH_SCAN;
+  else if
+     ( optimizer_flag(thd, OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE) &&
+      !optimizer_flag(thd, OPTIMIZER_SWITCH_PARTIAL_MATCH_TABLE_SCAN))
+    strategy= PARTIAL_MATCH_MERGE;
+
+  /*
+    If both switches are ON, or both are OFF, we interpret that as "let the
+    optimizer decide". Perform a cost based choice between the two partial
+    matching strategies.
+  */
+  /*
+    TIMOUR: the above interpretation of the switch values could be changed to:
+    - if both are ON - let the optimizer decide,
+    - if both are OFF - do not use partial matching, therefore do not use
+      materialization in non-top-level predicates.
+    The problem with this is that we know for sure if we need partial matching
+    only after the subquery is materialized, and this is too late to revert to
+    the IN=>EXISTS strategy.
+  */
+  if (strategy == PARTIAL_MATCH)
+  {
+    /*
+      TIMOUR: Currently we use a super simplistic measure. This will be
+      addressed in a separate task.
+    */
+    if (tmp_table->file->stats.records < 100)
+      strategy= PARTIAL_MATCH_SCAN;
+    else
+      strategy= PARTIAL_MATCH_MERGE;
+  }
+
+  /* Check if there is enough memory for the rowid merge strategy. */
+  if (strategy == PARTIAL_MATCH_MERGE)
+  {
+    pm_buff_size= rowid_merge_buff_size(has_non_null_key,
+                                        has_covering_null_row,
+                                        partial_match_key_parts);
+    if (pm_buff_size > thd->variables.rowid_merge_buff_size)
+      strategy= PARTIAL_MATCH_SCAN;
+  }
+}
+
+
+/*
+  Compute the memory size of all buffers proportional to the number of rows
+  in tmp_table.
+
+  @details
+  If the result is bigger than thd->variables.rowid_merge_buff_size, partial
+  matching via merging is not applicable.
+*/
+
+ulonglong subselect_hash_sj_engine::rowid_merge_buff_size(
+  bool has_non_null_key, bool has_covering_null_row,
+  MY_BITMAP *partial_match_key_parts)
+{
+  /* Total size of all buffers used by partial matching. */
+  ulonglong buff_size;
+  ha_rows row_count= tmp_table->file->stats.records;
+  uint rowid_length= tmp_table->file->ref_length;
+  select_materialize_with_stats *result_sink=
+    (select_materialize_with_stats *) result;
+  ha_rows max_null_row;
+
+  /* Size of the subselect_rowid_merge_engine::row_num_to_rowid buffer. */
+  buff_size= row_count * rowid_length * sizeof(uchar);
+
+  if (has_non_null_key)
+  {
+    /* Add the size of Ordered_key::key_buff of the only non-NULL key. */
+    buff_size+= row_count * sizeof(rownum_t);
+  }
+
+  if (!has_covering_null_row)
+  {
+    for (uint i= 0; i < partial_match_key_parts->n_bits; i++)
+    {
+      if (!bitmap_is_set(partial_match_key_parts, i) ||
+          result_sink->get_null_count_of_col(i) == row_count)
+        continue; /* In these cases we wouldn't construct Ordered keys. */
+
+      /* Add the size of Ordered_key::key_buff */
+      buff_size+= (row_count - result_sink->get_null_count_of_col(i)) *
+                         sizeof(rownum_t);
+      /* Add the size of Ordered_key::null_key */
+      max_null_row= result_sink->get_max_null_of_col(i);
+      if (max_null_row >= UINT_MAX)
+      {
+        /*
+          There can be at most UINT_MAX bits in a MY_BITMAP that is used to
+          store NULLs in an Ordered_key. Return a number of bytes bigger than
+          the maximum allowed memory buffer for partial matching to disable
+          the rowid merge strategy.
+        */
+        return ULONGLONG_MAX;
+      }
+      buff_size+= bitmap_buffer_size(max_null_row);
+    }
+  }
+
+  return buff_size;
+}
+
+
+/*
+  Initialize a MY_BITMAP with a buffer allocated on the current
+  memory root.
+  TIMOUR: move to bitmap C file?
+*/
+
+static my_bool
+bitmap_init_memroot(MY_BITMAP *map, uint n_bits, MEM_ROOT *mem_root)
+{
+  my_bitmap_map *bitmap_buf;
+
+  if (!(bitmap_buf= (my_bitmap_map*) alloc_root(mem_root,
+                                                bitmap_buffer_size(n_bits))) ||
+      bitmap_init(map, bitmap_buf, n_bits, FALSE))
+    return TRUE;
+  bitmap_clear_all(map);
+  return FALSE;
+}
+
+
+/**
+  Create all structures needed for IN execution that can live between PS
+  reexecution.
+
+  @param tmp_columns the items that produce the data for the temp table
+  @param subquery_id subquery's identifier (to make "<subquery%d>" name for
+                                            EXPLAIN)
+
+  @details
+  - Create a temporary table to store the result of the IN subquery. The
+    temporary table has one hash index on all its columns.
+  - Create a new result sink that sends the result stream of the subquery to
+    the temporary table,
+
+  @notice:
+    Currently Item_subselect::init() already chooses and creates at parse
+    time an engine with a corresponding JOIN to execute the subquery.
+
+  @retval TRUE  if error
+  @retval FALSE otherwise
+*/
+
+bool subselect_hash_sj_engine::init(List<Item> *tmp_columns, uint subquery_id)
+{
+  select_union *result_sink;
+  /* Options to create_tmp_table. */
+  ulonglong tmp_create_options= thd->options | TMP_TABLE_ALL_COLUMNS;
+                             /* | TMP_TABLE_FORCE_MYISAM; TIMOUR: force MYISAM */
+
+  DBUG_ENTER("subselect_hash_sj_engine::init");
+
+  if (bitmap_init_memroot(&non_null_key_parts, tmp_columns->elements,
+                            thd->mem_root) ||
+      bitmap_init_memroot(&partial_match_key_parts, tmp_columns->elements,
+                            thd->mem_root))
+    DBUG_RETURN(TRUE);
+
+  /*
+    Create and initialize a select result interceptor that stores the
+    result stream in a temporary table. The temporary table itself is
+    managed (created/filled/etc) internally by the interceptor.
+  */
+/*
+  TIMOUR:
+  Select a more efficient result sink when we know there is no need to collect
+  data statistics.
+
+  if (strategy == COMPLETE_MATCH)
+  {
+    if (!(result= new select_union))
+      DBUG_RETURN(TRUE);
+  }
+  else if (strategy == PARTIAL_MATCH)
+  {
+  if (!(result= new select_materialize_with_stats))
+    DBUG_RETURN(TRUE);
+  }
+*/
+  if (!(result_sink= new select_materialize_with_stats))
+    DBUG_RETURN(TRUE);
+    
+  char buf[32];
+  uint len= my_snprintf(buf, sizeof(buf), "<subquery%d>", subquery_id);
+  char *name;
+  if (!(name= (char*)thd->alloc(len + 1)))
+    DBUG_RETURN(TRUE);
+  memcpy(name, buf, len+1);
+
+  result_sink->get_tmp_table_param()->materialized_subquery= true;
+  if (item->substype() == Item_subselect::IN_SUBS && 
+      ((Item_in_subselect*)item)->is_jtbm_merged)
+  {
+    result_sink->get_tmp_table_param()->force_not_null_cols= true;
+  }
+  if (result_sink->create_result_table(thd, tmp_columns, TRUE,
+                                       tmp_create_options,
+				       name, TRUE, TRUE))
+    DBUG_RETURN(TRUE);
+
+  tmp_table= result_sink->table;
+  result= result_sink;
+
+  /*
+    If the subquery has blobs, or the total key lenght is bigger than
+    some length, or the total number of key parts is more than the
+    allowed maximum (currently MAX_REF_PARTS == 16), then the created
+    index cannot be used for lookups and we can't use hash semi
+    join. If this is the case, delete the temporary table since it
+    will not be used, and tell the caller we failed to initialize the
+    engine.
+  */
+  if (tmp_table->s->keys == 0)
+  {
+    //fprintf(stderr, "Q: %s\n", current_thd->query());
+    DBUG_ASSERT(0);
+    DBUG_ASSERT(
+      tmp_table->s->uniques ||
+      tmp_table->key_info->key_length >= tmp_table->file->max_key_length() ||
+      tmp_table->key_info->key_parts > tmp_table->file->max_key_parts());
+    free_tmp_table(thd, tmp_table);
+    tmp_table= NULL;
+    delete result;
+    result= NULL;
+    DBUG_RETURN(TRUE);
+  }
+
+  /*
+    Make sure there is only one index on the temp table, and it doesn't have
+    the extra key part created when s->uniques > 0.
+  */
+  DBUG_ASSERT(tmp_table->s->keys == 1 &&
+              ((Item_in_subselect *) item)->left_expr->cols() ==
+              tmp_table->key_info->key_parts);
+
+  if (make_semi_join_conds() ||
+      /* A unique_engine is used both for complete and partial matching. */
+      !(lookup_engine= make_unique_engine()))
+    DBUG_RETURN(TRUE);
+
+  /*
+    Repeat name resolution for 'cond' since cond is not part of any
+    clause of the query, and it is not 'fixed' during JOIN::prepare.
+  */
+  if (semi_join_conds && !semi_join_conds->fixed &&
+      semi_join_conds->fix_fields(thd, (Item**)&semi_join_conds))
+    DBUG_RETURN(TRUE);
+  /* Let our engine reuse this query plan for materialization. */
+  materialize_join= materialize_engine->join;
+  materialize_join->change_result(result);
+
+  DBUG_RETURN(FALSE);
+}
+
+
+/*
+  Create an artificial condition to post-filter those rows matched by index
+  lookups that cannot be distinguished by the index lookup procedure.
+
+  @notes
+  The need for post-filtering may occur e.g. because of
+  truncation. Prepared statements execution requires that fix_fields is
+  called for every execution. In order to call fix_fields we need to
+  create a Name_resolution_context and a corresponding TABLE_LIST for
+  the temporary table for the subquery, so that all column references
+  to the materialized subquery table can be resolved correctly.
+
+  @returns
+    @retval TRUE  memory allocation error occurred
+    @retval FALSE the conditions were created and resolved (fixed)
+*/
+
+bool subselect_hash_sj_engine::make_semi_join_conds()
+{
+  /*
+    Table reference for tmp_table that is used to resolve column references
+    (Item_fields) to columns in tmp_table.
+  */
+  TABLE_LIST *tmp_table_ref;
+  /* Name resolution context for all tmp_table columns created below. */
+  Name_resolution_context *context;
+  Item_in_subselect *item_in= (Item_in_subselect *) item;
+
+  DBUG_ENTER("subselect_hash_sj_engine::make_semi_join_conds");
+  DBUG_ASSERT(semi_join_conds == NULL);
+
+  if (!(semi_join_conds= new Item_cond_and))
+    DBUG_RETURN(TRUE);
+
+  if (!(tmp_table_ref= (TABLE_LIST*) thd->alloc(sizeof(TABLE_LIST))))
+    DBUG_RETURN(TRUE);
+
+  tmp_table_ref->init_one_table("", tmp_table->alias.c_ptr(), TL_READ);
+  tmp_table_ref->table= tmp_table;
+
+  context= new Name_resolution_context;
+  context->init();
+  context->first_name_resolution_table=
+    context->last_name_resolution_table= tmp_table_ref;
+  semi_join_conds_context= context;
+  
+  for (uint i= 0; i < item_in->left_expr->cols(); i++)
+  {
+    Item_func_eq *eq_cond; /* New equi-join condition for the current column. */
+    /* Item for the corresponding field from the materialized temp table. */
+    Item_field *right_col_item;
+
+    if (!(right_col_item= new Item_field(thd, context, tmp_table->field[i])) ||
+        !(eq_cond= new Item_func_eq(item_in->left_expr->element_index(i),
+                                    right_col_item)) ||
+        (((Item_cond_and*)semi_join_conds)->add(eq_cond)))
+    {
+      delete semi_join_conds;
+      semi_join_conds= NULL;
+      DBUG_RETURN(TRUE);
+    }
+  }
+  if (semi_join_conds->fix_fields(thd, (Item**)&semi_join_conds))
+    DBUG_RETURN(TRUE);
+
+  DBUG_RETURN(FALSE);
+}
+
+
+/**
+  Create a new uniquesubquery engine for the execution of an IN predicate.
+
+  @details
+  Create and initialize a new JOIN_TAB, and Table_ref objects to perform
+  lookups into the indexed temporary table.
+
+  @retval A new subselect_hash_sj_engine object
+  @retval NULL if a memory allocation error occurs
+*/
+
+subselect_uniquesubquery_engine*
+subselect_hash_sj_engine::make_unique_engine()
+{
+  Item_in_subselect *item_in= (Item_in_subselect *) item;
+  Item_iterator_row it(item_in->left_expr);
+  /* The only index on the temporary table. */
+  KEY *tmp_key= tmp_table->key_info;
+  JOIN_TAB *tab;
+
+  DBUG_ENTER("subselect_hash_sj_engine::make_unique_engine");
+
+  /*
+    Create and initialize the JOIN_TAB that represents an index lookup
+    plan operator into the materialized subquery result. Notice that:
+    - this JOIN_TAB has no corresponding JOIN (and doesn't need one), and
+    - here we initialize only those members that are used by
+      subselect_uniquesubquery_engine, so these objects are incomplete.
+  */
+  if (!(tab= (JOIN_TAB*) thd->alloc(sizeof(JOIN_TAB))))
+    DBUG_RETURN(NULL);
+
+  tab->table= tmp_table;
+  tab->preread_init_done= FALSE;
+  tab->ref.tmp_table_index_lookup_init(thd, tmp_key, it, FALSE);
+
+  DBUG_RETURN(new subselect_uniquesubquery_engine(thd, tab, item,
+                                                  semi_join_conds));
+}
+
+
+subselect_hash_sj_engine::~subselect_hash_sj_engine()
+{
+  delete lookup_engine;
+  delete result;
+  if (tmp_table)
+    free_tmp_table(thd, tmp_table);
+}
+
+
+int subselect_hash_sj_engine::prepare()
+{
+  /*
+    Create and optimize the JOIN that will be used to materialize
+    the subquery if not yet created.
+  */
+  return materialize_engine->prepare();
+}
+
+
+/**
+  Cleanup performed after each PS execution.
+
+  @details
+  Called in the end of JOIN::prepare for PS from Item_subselect::cleanup.
+*/
+
+void subselect_hash_sj_engine::cleanup()
+{
+  enum_engine_type lookup_engine_type= lookup_engine->engine_type();
+  is_materialized= FALSE;
+  bitmap_clear_all(&non_null_key_parts);
+  bitmap_clear_all(&partial_match_key_parts);
+  count_partial_match_columns= 0;
+  count_null_only_columns= 0;
+  strategy= UNDEFINED;
+  materialize_engine->cleanup();
+  /*
+    Restore the original Item_in_subselect engine. This engine is created once
+    at parse time and stored across executions, while all other materialization
+    related engines are created and chosen for each execution.
+  */
+  ((Item_in_subselect *) item)->engine= materialize_engine;
+  if (lookup_engine_type == TABLE_SCAN_ENGINE ||
+      lookup_engine_type == ROWID_MERGE_ENGINE)
+  {
+    subselect_engine *inner_lookup_engine;
+    inner_lookup_engine=
+      ((subselect_partial_match_engine*) lookup_engine)->lookup_engine;
+    /*
+      Partial match engines are recreated for each PS execution inside
+      subselect_hash_sj_engine::exec().
+    */
+    delete lookup_engine;
+    lookup_engine= inner_lookup_engine;
+  }
+  DBUG_ASSERT(lookup_engine->engine_type() == UNIQUESUBQUERY_ENGINE);
+  lookup_engine->cleanup();
+  result->cleanup(); /* Resets the temp table as well. */
+  DBUG_ASSERT(tmp_table);
+  free_tmp_table(thd, tmp_table);
+  tmp_table= NULL;
+}
+
+
+/*
+  Get fanout produced by tables specified in the table_map
+*/
+
+double get_fanout_with_deps(JOIN *join, table_map tset)
+{
+  /* Handle the case of "Impossible WHERE" */
+  if (join->table_count == 0)
+    return 0.0;
+
+  /* First, recursively get all tables we depend on */
+  table_map deps_to_check= tset;
+  table_map checked_deps= 0;
+  table_map further_deps;
+  do
+  {
+    further_deps= 0;
+    Table_map_iterator tm_it(deps_to_check);
+    int tableno;
+    while ((tableno = tm_it.next_bit()) != Table_map_iterator::BITMAP_END)
+    {
+      /* get tableno's dependency tables that are not in needed_set */
+      further_deps |= join->map2table[tableno]->ref.depend_map & ~checked_deps;
+    }
+
+    checked_deps |= deps_to_check;
+    deps_to_check= further_deps;
+  } while (further_deps != 0);
+
+  
+  /* Now, walk the join order and calculate the fanout */
+  double fanout= 1;
+  for (JOIN_TAB *tab= first_top_level_tab(join, WITHOUT_CONST_TABLES); tab;
+       tab= next_top_level_tab(join, tab))
+  {
+    /* 
+      Ignore SJM nests. They have tab->table==NULL. There is no point to walk
+      inside them, because GROUP BY clause cannot refer to tables from within
+      subquery.
+    */
+    if (!tab->is_sjm_nest() && (tab->table->map & checked_deps) && 
+        !tab->emb_sj_nest && 
+        tab->records_read != 0)
+    {
+      fanout *= rows2double(tab->records_read);
+    }
+  } 
+  return fanout;
+}
+
+
+#if 0
+void check_out_index_stats(JOIN *join)
+{
+  ORDER *order;
+  uint n_order_items;
+
+  /*
+    First, collect the keys that we can use in each table.
+    We can use a key if 
+    - all tables refer to it.
+  */
+  key_map key_start_use[MAX_TABLES];
+  key_map key_infix_use[MAX_TABLES];
+  table_map key_used=0;
+  table_map non_key_used= 0;
+  
+  bzero(&key_start_use, sizeof(key_start_use)); //psergey-todo: safe initialization!
+  bzero(&key_infix_use, sizeof(key_infix_use));
+  
+  for (order= join->group_list; order; order= order->next)
+  {
+    Item *item= order->item[0];
+
+    if (item->real_type() == Item::FIELD_ITEM)
+    {
+      if (item->used_tables() & OUTER_REF_TABLE_BIT)
+        continue; /* outside references are like constants for us */
+
+      Field *field= ((Item_field*)item->real_item())->field;
+      uint table_no= field->table->tablenr;
+      if (!(non_key_used && table_map(1) << table_no) && 
+          !field->part_of_key.is_clear_all())
+      {
+        key_map infix_map= field->part_of_key;
+        infix_map.subtract(field->key_start);
+        key_start_use[table_no].merge(field->key_start);
+        key_infix_use[table_no].merge(infix_map);
+        key_used |= table_no;
+      }
+      continue;
+    }
+    /* 
+      Note: the below will cause clauses like GROUP BY YEAR(date) not to be
+      handled. 
+    */
+    non_key_used |= item->used_tables();
+  }
+  
+  Table_map_iterator tm_it(key_used & ~non_key_used);
+  int tableno;
+  while ((tableno = tm_it.next_bit()) != Table_map_iterator::BITMAP_END)
+  {
+    key_map::iterator key_it(key_start_use);
+    int keyno;
+    while ((keyno = tm_it.next_bit()) != key_map::iterator::BITMAP_END)
+    {
+      for (order= join->group_list; order; order= order->next)
+      {
+        Item *item= order->item[0];
+        if (item->used_tables() & (table_map(1) << tableno))
+        {
+          DBUG_ASSERT(item->real_type() == Item::FIELD_ITEM);
+        }
+      }
+      /*
+      if (continuation)
+      {
+        walk through list and find which key parts are occupied;
+        // note that the above can't be made any faster.
+      }
+      else
+        use rec_per_key[0];
+      
+      find out the cardinality.
+      check if cardinality decreases if we use it;
+      */
+    }
+  }
+}
+#endif
+
+
+/*
+  Get an estimate of how many records will be produced after the GROUP BY
+  operation.
+
+  @param join           Join we're operating on 
+  @param join_op_rows   How many records will be produced by the join
+                        operations (this is what join optimizer produces)
+  
+  @seealso
+     See also optimize_semijoin_nests(), grep for "Adjust output cardinality 
+     estimates".  Very similar code there that is not joined with this one
+     because we operate on different data structs and too much effort is
+     needed to abstract them out.
+
+  @return
+     Number of records we expect to get after the GROUP BY operation
+*/
+
+double get_post_group_estimate(JOIN* join, double join_op_rows)
+{
+  table_map tables_in_group_list= table_map(0);
+
+  /* Find out which tables are used in GROUP BY list */
+  for (ORDER *order= join->group_list; order; order= order->next)
+  {
+    Item *item= order->item[0];
+    if (item->used_tables() & RAND_TABLE_BIT)
+    {
+      /* Each join output record will be in its own group */
+      return join_op_rows;
+    }
+    tables_in_group_list|= item->used_tables();
+  }
+  tables_in_group_list &= ~PSEUDO_TABLE_BITS;
+
+  /*
+    Use join fanouts to calculate the max. number of records in the group-list
+  */
+  double fanout_rows[MAX_KEY];
+  bzero(&fanout_rows, sizeof(fanout_rows));
+  double out_rows;
+  
+  out_rows= get_fanout_with_deps(join, tables_in_group_list);
+
+#if 0
+  /* The following will be needed when making use of index stats: */
+  /* 
+    Also generate max. number of records for each of the tables mentioned 
+    in the group-list. We'll use that a baseline number that we'll try to 
+    reduce by using
+     - #table-records 
+     - index statistics.
+  */
+  Table_map_iterator tm_it(tables_in_group_list);
+  int tableno;
+  while ((tableno = tm_it.next_bit()) != Table_map_iterator::BITMAP_END)
+  {
+    fanout_rows[tableno]= get_fanout_with_deps(join, table_map(1) << tableno);
+  }
+  
+  /*
+    Try to bring down estimates using index statistics.
+  */
+  //check_out_index_stats(join);
+#endif
+
+  return out_rows;
+}
+
+
+/**
+  Execute a subquery IN predicate via materialization.
+
+  @details
+  If needed materialize the subquery into a temporary table, then
+  copmpute the predicate via a lookup into this table.
+
+  @retval TRUE  if error
+  @retval FALSE otherwise
+*/
+
+int subselect_hash_sj_engine::exec()
+{
+  Item_in_subselect *item_in= (Item_in_subselect *) item;
+  SELECT_LEX *save_select= thd->lex->current_select;
+  subselect_partial_match_engine *pm_engine= NULL;
+  int res= 0;
+
+  DBUG_ENTER("subselect_hash_sj_engine::exec");
+
+  /*
+    Optimize and materialize the subquery during the first execution of
+    the subquery predicate.
+  */
+  thd->lex->current_select= materialize_engine->select_lex;
+  /* The subquery should be optimized, and materialized only once. */
+  DBUG_ASSERT(materialize_join->optimized && !is_materialized);
+  materialize_join->exec();
+  if ((res= test(materialize_join->error || thd->is_fatal_error ||
+                 thd->is_error())))
+    goto err;
+
+  /*
+    TODO:
+    - Unlock all subquery tables as we don't need them. To implement this
+      we need to add new functionality to JOIN::join_free that can unlock
+      all tables in a subquery (and all its subqueries).
+    - The temp table used for grouping in the subquery can be freed
+      immediately after materialization (yet it's done together with
+      unlocking).
+  */
+  is_materialized= TRUE;
+  /*
+    If the subquery returned no rows, the temporary table is empty, so we know
+    directly that the result of IN is FALSE. We first update the table
+    statistics, then we test if the temporary table for the query result is
+    empty.
+  */
+  tmp_table->file->info(HA_STATUS_VARIABLE);
+  if (!tmp_table->file->stats.records)
+  {
+    /* The value of IN will not change during this execution. */
+    item_in->reset();
+    item_in->make_const();
+    item_in->set_first_execution();
+    DBUG_RETURN(FALSE);
+  }
+
+  /*
+    TIMOUR: The schema-based analysis for partial matching can be done once for
+    prepared statement and remembered. It is done here to remove the need to
+    save/restore all related variables between each re-execution, thus making
+    the code simpler.
+  */
+  strategy= get_strategy_using_schema();
+  /* This call may discover that we don't need partial matching at all. */
+  strategy= get_strategy_using_data();
+  if (strategy == PARTIAL_MATCH)
+  {
+    uint count_pm_keys; /* Total number of keys needed for partial matching. */
+    MY_BITMAP *nn_key_parts= NULL; /* Key parts of the only non-NULL index. */
+    uint count_non_null_columns= 0; /* Number of columns in nn_key_parts. */
+    bool has_covering_null_row;
+    bool has_covering_null_columns;
+    select_materialize_with_stats *result_sink=
+      (select_materialize_with_stats *) result;
+    uint field_count= tmp_table->s->fields;
+
+    if (count_partial_match_columns < field_count)
+    {
+      nn_key_parts= &non_null_key_parts;
+      count_non_null_columns= bitmap_bits_set(nn_key_parts);
+    }
+    has_covering_null_row= (result_sink->get_max_nulls_in_row() == field_count);
+    has_covering_null_columns= (count_non_null_columns +
+                                count_null_only_columns == field_count);
+
+    if (has_covering_null_row && has_covering_null_columns)
+    {
+      /*
+        The whole table consist of only NULL values. The result of IN is
+        a constant UNKNOWN.
+      */
+      DBUG_ASSERT(tmp_table->file->stats.records == 1);
+      item_in->value= 0;
+      item_in->null_value= 1;
+      item_in->make_const();
+      item_in->set_first_execution();
+      DBUG_RETURN(FALSE);
+    }
+
+    if (has_covering_null_row)
+    {
+      DBUG_ASSERT(count_partial_match_columns = field_count);
+      count_pm_keys= 0;
+    }
+    else if (has_covering_null_columns)
+      count_pm_keys= 1;
+    else
+      count_pm_keys= count_partial_match_columns - count_null_only_columns +
+                     (nn_key_parts ? 1 : 0);
+
+    choose_partial_match_strategy(test(nn_key_parts),
+                                  has_covering_null_row,
+                                  &partial_match_key_parts);
+    DBUG_ASSERT(strategy == PARTIAL_MATCH_MERGE ||
+                strategy == PARTIAL_MATCH_SCAN);
+    if (strategy == PARTIAL_MATCH_MERGE)
+    {
+      pm_engine=
+        new subselect_rowid_merge_engine(thd, (subselect_uniquesubquery_engine*)
+                                         lookup_engine, tmp_table,
+                                         count_pm_keys,
+                                         has_covering_null_row,
+                                         has_covering_null_columns,
+                                         count_columns_with_nulls,
+                                         item, result,
+                                         semi_join_conds->argument_list());
+      if (!pm_engine ||
+          ((subselect_rowid_merge_engine*) pm_engine)->
+            init(nn_key_parts, &partial_match_key_parts))
+      {
+        /*
+          The call to init() would fail if there was not enough memory to allocate
+          all buffers for the rowid merge strategy. In this case revert to table
+          scanning which doesn't need any big buffers.
+        */
+        delete pm_engine;
+        pm_engine= NULL;
+        strategy= PARTIAL_MATCH_SCAN;
+      }
+    }
+
+    if (strategy == PARTIAL_MATCH_SCAN)
+    {
+      if (!(pm_engine=
+            new subselect_table_scan_engine(thd, (subselect_uniquesubquery_engine*)
+                                            lookup_engine, tmp_table,
+                                            item, result,
+                                            semi_join_conds->argument_list(),
+                                            has_covering_null_row,
+                                            has_covering_null_columns,
+                                            count_columns_with_nulls)))
+      {
+        /* This is an irrecoverable error. */
+        res= 1;
+        goto err;
+      }
+    }
+  }
+
+  if (pm_engine)
+    lookup_engine= pm_engine;
+  item_in->change_engine(lookup_engine);
+
+err:
+  thd->lex->current_select= save_select;
+  DBUG_RETURN(res);
+}
+
+
+/**
+  Print the state of this engine into a string for debugging and views.
+*/
+
+void subselect_hash_sj_engine::print(String *str, enum_query_type query_type)
+{
+  str->append(STRING_WITH_LEN(" <materialize> ("));
+  materialize_engine->print(str, query_type);
+  str->append(STRING_WITH_LEN(" ), "));
+
+  if (lookup_engine)
+    lookup_engine->print(str, query_type);
+  else
+    str->append(STRING_WITH_LEN(
+           "<engine selected at execution time>"
+         ));
+}
+
+void subselect_hash_sj_engine::fix_length_and_dec(Item_cache** row)
+{
+  DBUG_ASSERT(FALSE);
+}
+
+void subselect_hash_sj_engine::exclude()
+{
+  DBUG_ASSERT(FALSE);
+}
+
+bool subselect_hash_sj_engine::no_tables()
+{
+  DBUG_ASSERT(FALSE);
+  return FALSE;
+}
+
+bool subselect_hash_sj_engine::change_result(Item_subselect *si,
+                                             select_result_interceptor *res,
+                                             bool temp __attribute__((unused)))
+{
+  DBUG_ASSERT(FALSE);
+  return TRUE;
+}
+
+
+Ordered_key::Ordered_key(uint keyid_arg, TABLE *tbl_arg, Item *search_key_arg,
+                         ha_rows null_count_arg, ha_rows min_null_row_arg,
+                         ha_rows max_null_row_arg, uchar *row_num_to_rowid_arg)
+  : keyid(keyid_arg), tbl(tbl_arg), search_key(search_key_arg),
+    row_num_to_rowid(row_num_to_rowid_arg), null_count(null_count_arg)
+{
+  DBUG_ASSERT(tbl->file->stats.records > null_count);
+  key_buff_elements= tbl->file->stats.records - null_count;
+  cur_key_idx= HA_POS_ERROR;
+
+  DBUG_ASSERT((null_count && min_null_row_arg && max_null_row_arg) ||
+              (!null_count && !min_null_row_arg && !max_null_row_arg));
+  if (null_count)
+  {
+    /* The counters are 1-based, for key access we need 0-based indexes. */
+    min_null_row= min_null_row_arg - 1;
+    max_null_row= max_null_row_arg - 1;
+  }
+  else
+    min_null_row= max_null_row= 0;
+}
+
+
+Ordered_key::~Ordered_key()
+{
+  my_free((char*) key_buff, MYF(0));
+  bitmap_free(&null_key);
+}
+
+
+/*
+  Cleanup that needs to be done for each PS (re)execution.
+*/
+
+void Ordered_key::cleanup()
+{
+  /*
+    Currently these keys are recreated for each PS re-execution, thus
+    there is nothing to cleanup, the whole object goes away after execution
+    is over. All handler related initialization/deinitialization is done by
+    the parent subselect_rowid_merge_engine object.
+  */
+}
+
+
+/*
+  Initialize a multi-column index.
+*/
+
+bool Ordered_key::init(MY_BITMAP *columns_to_index)
+{
+  THD *thd= tbl->in_use;
+  uint cur_key_col= 0;
+  Item_field *cur_tmp_field;
+  Item_func_lt *fn_less_than;
+
+  key_column_count= bitmap_bits_set(columns_to_index);
+  key_columns= (Item_field**) thd->alloc(key_column_count *
+                                         sizeof(Item_field*));
+  compare_pred= (Item_func_lt**) thd->alloc(key_column_count *
+                                            sizeof(Item_func_lt*));
+
+  if (!key_columns || !compare_pred)
+    return TRUE; /* Revert to table scan partial match. */
+
+  for (uint i= 0; i < columns_to_index->n_bits; i++)
+  {
+    if (!bitmap_is_set(columns_to_index, i))
+      continue;
+    cur_tmp_field= new Item_field(tbl->field[i]);
+    /* Create the predicate (tmp_column[i] < outer_ref[i]). */
+    fn_less_than= new Item_func_lt(cur_tmp_field,
+                                   search_key->element_index(i));
+    fn_less_than->fix_fields(thd, (Item**) &fn_less_than);
+    key_columns[cur_key_col]= cur_tmp_field;
+    compare_pred[cur_key_col]= fn_less_than;
+    ++cur_key_col;
+  }
+
+  if (alloc_keys_buffers())
+  {
+    /* TIMOUR revert to partial match via table scan. */
+    return TRUE;
+  }
+  return FALSE;
+}
+
+
+/*
+  Initialize a single-column index.
+*/
+
+bool Ordered_key::init(int col_idx)
+{
+  THD *thd= tbl->in_use;
+
+  key_column_count= 1;
+
+  // TIMOUR: check for mem allocation err, revert to scan
+
+  key_columns= (Item_field**) thd->alloc(sizeof(Item_field*));
+  compare_pred= (Item_func_lt**) thd->alloc(sizeof(Item_func_lt*));
+
+  key_columns[0]= new Item_field(tbl->field[col_idx]);
+  /* Create the predicate (tmp_column[i] < outer_ref[i]). */
+  compare_pred[0]= new Item_func_lt(key_columns[0],
+                                    search_key->element_index(col_idx));
+  compare_pred[0]->fix_fields(thd, (Item**)&compare_pred[0]);
+
+  if (alloc_keys_buffers())
+  {
+    /* TIMOUR revert to partial match via table scan. */
+    return TRUE;
+  }
+  return FALSE;
+}
+
+
+/*
+  Allocate the buffers for both the row number, and the NULL-bitmap indexes.
+*/
+
+bool Ordered_key::alloc_keys_buffers()
+{
+  DBUG_ASSERT(key_buff_elements > 0);
+
+  if (!(key_buff= (rownum_t*) my_malloc((size_t)(key_buff_elements * 
+    sizeof(rownum_t)), MYF(MY_WME))))
+    return TRUE;
+
+  /*
+    TIMOUR: it is enough to create bitmaps with size
+    (max_null_row - min_null_row), and then use min_null_row as
+    lookup offset.
+  */
+  /* Notice that max_null_row is max array index, we need count, so +1. */
+  if (bitmap_init(&null_key, NULL, (uint)(max_null_row + 1), FALSE))
+    return TRUE;
+
+  cur_key_idx= HA_POS_ERROR;
+
+  return FALSE;
+}
+
+
+/*
+  Quick sort comparison function that compares two rows of the same table
+  indentfied with their row numbers.
+
+  @retval -1
+  @retval  0
+  @retval +1
+*/
+
+int
+Ordered_key::cmp_keys_by_row_data(ha_rows a, ha_rows b)
+{
+  uchar *rowid_a, *rowid_b;
+  int error, cmp_res;
+  /* The length in bytes of the rowids (positions) of tmp_table. */
+  uint rowid_length= tbl->file->ref_length;
+
+  if (a == b)
+    return 0;
+  /* Get the corresponding rowids. */
+  rowid_a= row_num_to_rowid + a * rowid_length;
+  rowid_b= row_num_to_rowid + b * rowid_length;
+  /* Fetch the rows for comparison. */
+  error= tbl->file->ha_rnd_pos(tbl->record[0], rowid_a);
+  DBUG_ASSERT(!error);
+  error= tbl->file->ha_rnd_pos(tbl->record[1], rowid_b);
+  DBUG_ASSERT(!error);
+  /*
+    Compare the two rows by the corresponding values of the indexed
+    columns.
+  */
+  for (uint i= 0; i < key_column_count; i++)
+  {
+    Field *cur_field= key_columns[i]->field;
+    if ((cmp_res= cur_field->cmp_offset(tbl->s->rec_buff_length)))
+      return (cmp_res > 0 ? 1 : -1);
+  }
+  return 0;
+}
+
+
+int
+Ordered_key::cmp_keys_by_row_data_and_rownum(Ordered_key *key,
+                                             rownum_t* a, rownum_t* b)
+{
+  /* The result of comparing the two keys according to their row data. */
+  int cmp_row_res= key->cmp_keys_by_row_data(*a, *b);
+  if (cmp_row_res)
+    return cmp_row_res;
+  return (*a < *b) ? -1 : (*a > *b) ? 1 : 0;
+}
+
+
+void Ordered_key::sort_keys()
+{
+  my_qsort2(key_buff, (size_t) key_buff_elements, sizeof(rownum_t),
+            (qsort2_cmp) &cmp_keys_by_row_data_and_rownum, (void*) this);
+  /* Invalidate the current row position. */
+  cur_key_idx= HA_POS_ERROR;
+}
+
+
+/*
+  The fraction of rows that do not contain NULL in the columns indexed by
+  this key.
+
+  @retval  1  if there are no NULLs
+  @retval  0  if only NULLs
+*/
+
+double Ordered_key::null_selectivity()
+{
+  /* We should not be processing empty tables. */
+  DBUG_ASSERT(tbl->file->stats.records);
+  return (1 - (double) null_count / (double) tbl->file->stats.records);
+}
+
+
+/*
+  Compare the value(s) of the current key in 'search_key' with the
+  data of the current table record.
+
+  @notes The comparison result follows from the way compare_pred
+  is created in Ordered_key::init. Currently compare_pred compares
+  a field in of the current row with the corresponding Item that
+  contains the search key.
+
+  @param row_num  Number of the row (not index in the key_buff array)
+
+  @retval -1  if (current row  < search_key)
+  @retval  0  if (current row == search_key)
+  @retval +1  if (current row  > search_key)
+*/
+
+int Ordered_key::cmp_key_with_search_key(rownum_t row_num)
+{
+  /* The length in bytes of the rowids (positions) of tmp_table. */
+  uint rowid_length= tbl->file->ref_length;
+  uchar *cur_rowid= row_num_to_rowid + row_num * rowid_length;
+  int error, cmp_res;
+
+  error= tbl->file->ha_rnd_pos(tbl->record[0], cur_rowid);
+  DBUG_ASSERT(!error);
+
+  for (uint i= 0; i < key_column_count; i++)
+  {
+    cmp_res= compare_pred[i]->get_comparator()->compare();
+    /* Unlike Arg_comparator::compare_row() here there should be no NULLs. */
+    DBUG_ASSERT(!compare_pred[i]->null_value);
+    if (cmp_res)
+      return (cmp_res > 0 ? 1 : -1);
+  }
+  return 0;
+}
+
+
+/*
+  Find a key in a sorted array of keys via binary search.
+
+  see create_subq_in_equalities()
+*/
+
+bool Ordered_key::lookup()
+{
+  DBUG_ASSERT(key_buff_elements);
+
+  ha_rows lo= 0;
+  ha_rows hi= key_buff_elements - 1;
+  ha_rows mid;
+  int cmp_res;
+
+  while (lo <= hi)
+  {
+    mid= lo + (hi - lo) / 2;
+    cmp_res= cmp_key_with_search_key(key_buff[mid]);
+    /*
+      In order to find the minimum match, check if the pevious element is
+      equal or smaller than the found one. If equal, we need to search further
+      to the left.
+    */
+    if (!cmp_res && mid > 0)
+      cmp_res= !cmp_key_with_search_key(key_buff[mid - 1]) ? 1 : 0;
+
+    if (cmp_res == -1)
+    {
+      /* row[mid] < search_key */
+      lo= mid + 1;
+    }
+    else if (cmp_res == 1)
+    {
+      /* row[mid] > search_key */
+      if (!mid)
+        goto not_found;
+      hi= mid - 1;
+    }
+    else
+    {
+      /* row[mid] == search_key */
+      cur_key_idx= mid;
+      return TRUE;
+    }
+  }
+not_found:
+  cur_key_idx= HA_POS_ERROR;
+  return FALSE;
+}
+
+
+/*
+  Move the current index pointer to the next key with the same column
+  values as the current key. Since the index is sorted, all such keys
+  are contiguous.
+*/
+
+bool Ordered_key::next_same()
+{
+  DBUG_ASSERT(key_buff_elements);
+
+  if (cur_key_idx < key_buff_elements - 1)
+  {
+    /*
+      TIMOUR:
+      The below is quite inefficient, since as a result we will fetch every
+      row (except the last one) twice. There must be a more efficient way,
+      e.g. swapping record[0] and record[1], and reading only the new record.
+    */
+    if (!cmp_keys_by_row_data(key_buff[cur_key_idx], key_buff[cur_key_idx + 1]))
+    {
+      ++cur_key_idx;
+      return TRUE;
+    }
+  }
+  return FALSE;
+}
+
+
+void Ordered_key::print(String *str)
+{
+  uint i;
+  str->append("{idx=");
+  str->qs_append(keyid);
+  str->append(", (");
+  for (i= 0; i < key_column_count - 1; i++)
+  {
+    str->append(key_columns[i]->field->field_name);
+    str->append(", ");
+  }
+  str->append(key_columns[i]->field->field_name);
+  str->append("), ");
+
+  str->append("null_bitmap: (bits=");
+  str->qs_append(null_key.n_bits);
+  str->append(", nulls= ");
+  str->qs_append((double)null_count);
+  str->append(", min_null= ");
+  str->qs_append((double)min_null_row);
+  str->append(", max_null= ");
+  str->qs_append((double)max_null_row);
+  str->append("), ");
+
+  str->append('}');
+}
+
+
+subselect_partial_match_engine::subselect_partial_match_engine(
+  THD *thd_arg, subselect_uniquesubquery_engine *engine_arg,
+  TABLE *tmp_table_arg, Item_subselect *item_arg,
+  select_result_interceptor *result_arg,
+  List<Item> *equi_join_conds_arg,
+  bool has_covering_null_row_arg,
+  bool has_covering_null_columns_arg,
+  uint count_columns_with_nulls_arg)
+  :subselect_engine(thd_arg, item_arg, result_arg),
+   tmp_table(tmp_table_arg), lookup_engine(engine_arg),
+   equi_join_conds(equi_join_conds_arg),
+   has_covering_null_row(has_covering_null_row_arg),
+   has_covering_null_columns(has_covering_null_columns_arg),
+   count_columns_with_nulls(count_columns_with_nulls_arg)
+{}
+
+
+int subselect_partial_match_engine::exec()
+{
+  Item_in_subselect *item_in= (Item_in_subselect *) item;
+  int copy_res, lookup_res;
+
+  /* Try to find a matching row by index lookup. */
+  copy_res= lookup_engine->copy_ref_key_simple();
+  if (copy_res == -1)
+  {
+    /* The result is FALSE based on the outer reference. */
+    item_in->value= 0;
+    item_in->null_value= 0;
+    return 0;
+  }
+  else if (copy_res == 0)
+  {
+    /* Search for a complete match. */
+    if ((lookup_res= lookup_engine->index_lookup()))
+    {
+      /* An error occured during lookup(). */
+      item_in->value= 0;
+      item_in->null_value= 0;
+      return lookup_res;
+    }
+    else if (item_in->value || !count_columns_with_nulls)
+    {
+      /*
+        A complete match was found, the result of IN is TRUE.
+        If no match was found, and there are no NULLs in the materialized
+        subquery, then the result is guaranteed to be false because this
+        branch is executed when the outer reference has no NULLs as well.
+        Notice: (this->item == lookup_engine->item)
+      */
+      return 0;
+    }
+  }
+
+  if (has_covering_null_row)
+  {
+    /*
+      If there is a NULL-only row that coveres all columns the result of IN
+      is UNKNOWN. 
+    */
+    item_in->value= 0;
+    /*
+      TIMOUR: which one is the right way to propagate an UNKNOWN result?
+      Should we also set empty_result_set= FALSE; ???
+    */
+    //item_in->was_null= 1;
+    item_in->null_value= 1;
+    return 0;
+  }
+
+  /*
+    There is no complete match. Look for a partial match (UNKNOWN result), or
+    no match (FALSE).
+  */
+  if (tmp_table->file->inited)
+    tmp_table->file->ha_index_end();
+
+  if (partial_match())
+  {
+    /* The result of IN is UNKNOWN. */
+    item_in->value= 0;
+    /*
+      TIMOUR: which one is the right way to propagate an UNKNOWN result?
+      Should we also set empty_result_set= FALSE; ???
+    */
+    //item_in->was_null= 1;
+    item_in->null_value= 1;
+  }
+  else
+  {
+    /* The result of IN is FALSE. */
+    item_in->value= 0;
+    /*
+      TIMOUR: which one is the right way to propagate an UNKNOWN result?
+      Should we also set empty_result_set= FALSE; ???
+    */
+    //item_in->was_null= 0;
+    item_in->null_value= 0;
+  }
+
+  return 0;
+}
+
+
+void subselect_partial_match_engine::print(String *str,
+                                           enum_query_type query_type)
+{
+  /*
+    Should never be called as the actual engine cannot be known at query
+    optimization time.
+    DBUG_ASSERT(FALSE);
+  */
+}
+
+
+/*
+  @param non_null_key_parts  
+  @param partial_match_key_parts  A union of all single-column NULL key parts.
+
+  @retval FALSE  the engine was initialized successfully
+  @retval TRUE   there was some (memory allocation) error during initialization,
+                 such errors should be interpreted as revert to other strategy
+*/
+
+bool
+subselect_rowid_merge_engine::init(MY_BITMAP *non_null_key_parts,
+                                   MY_BITMAP *partial_match_key_parts)
+{
+  /* The length in bytes of the rowids (positions) of tmp_table. */
+  uint rowid_length= tmp_table->file->ref_length;
+  ha_rows row_count= tmp_table->file->stats.records;
+  rownum_t cur_rownum= 0;
+  select_materialize_with_stats *result_sink=
+    (select_materialize_with_stats *) result;
+  uint cur_keyid= 0;
+  Item_in_subselect *item_in= (Item_in_subselect*) item;
+  int error;
+
+  if (merge_keys_count == 0)
+  {
+    DBUG_ASSERT(bitmap_bits_set(partial_match_key_parts) == 0 ||
+                has_covering_null_row);
+    /* There is nothing to initialize, we will only do regular lookups. */
+    return FALSE;
+  }
+
+  /*
+    If all nullable columns contain only NULLs, there must be one index
+    over all non-null columns.
+  */
+  DBUG_ASSERT(!has_covering_null_columns ||
+              (has_covering_null_columns &&
+               merge_keys_count == 1 && non_null_key_parts));
+  /*
+    Allocate buffers to hold the merged keys and the mapping between rowids and
+    row numbers. All small buffers are allocated in the runtime memroot. Big
+    buffers are allocated from the OS via malloc.
+  */
+  if (!(merge_keys= (Ordered_key**) thd->alloc(merge_keys_count *
+                                               sizeof(Ordered_key*))) ||
+      !(null_bitmaps= (MY_BITMAP**) thd->alloc(merge_keys_count *
+                                               sizeof(MY_BITMAP*))) ||
+      !(row_num_to_rowid= (uchar*) my_malloc((size_t)(row_count * rowid_length),
+        MYF(MY_WME))))
+    return TRUE;
+
+  /* Create the only non-NULL key if there is any. */
+  if (non_null_key_parts)
+  {
+    non_null_key= new Ordered_key(cur_keyid, tmp_table, item_in->left_expr,
+                                  0, 0, 0, row_num_to_rowid);
+    if (non_null_key->init(non_null_key_parts))
+      return TRUE;
+    merge_keys[cur_keyid]= non_null_key;
+    merge_keys[cur_keyid]->first();
+    ++cur_keyid;
+  }
+
+  /*
+    If all nullable columns contain NULLs, the only key that is needed is the
+    only non-NULL key that is already created above.
+  */
+  if (!has_covering_null_columns)
+  {
+    if (bitmap_init_memroot(&matching_keys, merge_keys_count, thd->mem_root) ||
+        bitmap_init_memroot(&matching_outer_cols, merge_keys_count, thd->mem_root))
+      return TRUE;
+
+    /*
+      Create one single-column NULL-key for each column in
+      partial_match_key_parts.
+    */
+    for (uint i= 0; i < partial_match_key_parts->n_bits; i++)
+    {
+      /* Skip columns that have no NULLs, or contain only NULLs. */
+      if (!bitmap_is_set(partial_match_key_parts, i) ||
+          result_sink->get_null_count_of_col(i) == row_count)
+        continue;
+
+      merge_keys[cur_keyid]= new Ordered_key(
+                                     cur_keyid, tmp_table,
+                                     item_in->left_expr->element_index(i),
+                                     result_sink->get_null_count_of_col(i),
+                                     result_sink->get_min_null_of_col(i),
+                                     result_sink->get_max_null_of_col(i),
+                                     row_num_to_rowid);
+      if (merge_keys[cur_keyid]->init(i))
+        return TRUE;
+      merge_keys[cur_keyid]->first();
+      ++cur_keyid;
+    }
+  }
+  DBUG_ASSERT(cur_keyid == merge_keys_count);
+
+  /* Populate the indexes with data from the temporary table. */
+  if (tmp_table->file->ha_rnd_init_with_error(1))
+    return TRUE;
+  tmp_table->file->extra_opt(HA_EXTRA_CACHE,
+                             current_thd->variables.read_buff_size);
+  tmp_table->null_row= 0;
+  while (TRUE)
+  {
+    error= tmp_table->file->ha_rnd_next(tmp_table->record[0]);
+    if (error == HA_ERR_RECORD_DELETED)
+    {
+      /* We get this for duplicate records that should not be in tmp_table. */
+      continue;
+    }
+    /*
+      This is a temp table that we fully own, there should be no other
+      cause to stop the iteration than EOF.
+    */
+    DBUG_ASSERT(!error || error == HA_ERR_END_OF_FILE);
+    if (error == HA_ERR_END_OF_FILE)
+    {
+      DBUG_ASSERT(cur_rownum == tmp_table->file->stats.records);
+      break;
+    }
+
+    /*
+      Save the position of this record in the row_num -> rowid mapping.
+    */
+    tmp_table->file->position(tmp_table->record[0]);
+    memcpy(row_num_to_rowid + cur_rownum * rowid_length,
+           tmp_table->file->ref, rowid_length);
+
+    /* Add the current row number to the corresponding keys. */
+    if (non_null_key)
+    {
+      /* By definition there are no NULLs in the non-NULL key. */
+      non_null_key->add_key(cur_rownum);
+    }
+
+    for (uint i= (non_null_key ? 1 : 0); i < merge_keys_count; i++)
+    {
+      /*
+        Check if the first and only indexed column contains NULL in the curent
+        row, and add the row number to the corresponding key.
+      */
+      if (tmp_table->field[merge_keys[i]->get_field_idx(0)]->is_null())
+        merge_keys[i]->set_null(cur_rownum);
+      else
+        merge_keys[i]->add_key(cur_rownum);
+    }
+    ++cur_rownum;
+  }
+
+  tmp_table->file->ha_rnd_end();
+
+  /* Sort all the keys by their NULL selectivity. */
+  my_qsort(merge_keys, merge_keys_count, sizeof(Ordered_key*),
+           (qsort_cmp) cmp_keys_by_null_selectivity);
+
+  /* Sort the keys in each of the indexes. */
+  for (uint i= 0; i < merge_keys_count; i++)
+    merge_keys[i]->sort_keys();
+
+  if (init_queue(&pq, merge_keys_count, 0, FALSE,
+                 subselect_rowid_merge_engine::cmp_keys_by_cur_rownum, NULL,
+                 0, 0))
+    return TRUE;
+
+  return FALSE;
+}
+
+
+subselect_rowid_merge_engine::~subselect_rowid_merge_engine()
+{
+  /* None of the resources below is allocated if there are no ordered keys. */
+  if (merge_keys_count)
+  {
+    my_free((char*) row_num_to_rowid, MYF(0));
+    for (uint i= 0; i < merge_keys_count; i++)
+      delete merge_keys[i];
+    delete_queue(&pq);
+    if (tmp_table->file->inited == handler::RND)
+      tmp_table->file->ha_rnd_end();
+  }
+}
+
+
+void subselect_rowid_merge_engine::cleanup()
+{
+}
+
+
+/*
+  Quick sort comparison function to compare keys in order of decreasing bitmap
+  selectivity, so that the most selective keys come first.
+
+  @param  k1 first key to compare
+  @param  k2 second key to compare
+
+  @retval  1  if k1 is less selective than k2
+  @retval  0  if k1 is equally selective as k2
+  @retval -1  if k1 is more selective than k2
+*/
+
+int
+subselect_rowid_merge_engine::cmp_keys_by_null_selectivity(Ordered_key **k1,
+                                                           Ordered_key **k2)
+{
+  double k1_sel= (*k1)->null_selectivity();
+  double k2_sel= (*k2)->null_selectivity();
+  if (k1_sel < k2_sel)
+    return 1;
+  if (k1_sel > k2_sel)
+    return -1;
+  return 0;
+}
+
+
+/*
+*/
+
+int
+subselect_rowid_merge_engine::cmp_keys_by_cur_rownum(void *arg,
+                                                     uchar *k1, uchar *k2)
+{
+  rownum_t r1= ((Ordered_key*) k1)->current();
+  rownum_t r2= ((Ordered_key*) k2)->current();
+
+  return (r1 < r2) ? -1 : (r1 > r2) ? 1 : 0;
+}
+
+
+/*
+  Check if certain table row contains a NULL in all columns for which there is
+  no match in the corresponding value index.
+
+  @note
+  There is no need to check the columns that contain only NULLs, because
+  those are guaranteed to match.
+
+  @retval TRUE if a NULL row exists
+  @retval FALSE otherwise
+*/
+
+bool subselect_rowid_merge_engine::test_null_row(rownum_t row_num)
+{
+  Ordered_key *cur_key;
+  for (uint i = 0; i < merge_keys_count; i++)
+  {
+    cur_key= merge_keys[i];
+    if (bitmap_is_set(&matching_keys, cur_key->get_keyid()))
+    {
+      /*
+        The key 'i' (with id 'cur_keyid') already matches a value in row
+        'row_num', thus we skip it as it can't possibly match a NULL.
+      */
+      continue;
+    }
+    if (!cur_key->is_null(row_num))
+      return FALSE;
+  }
+  return TRUE;
+}
+
+
+/**
+  Test if a subset of NULL-able columns contains a row of NULLs.
+  @retval TRUE  if such a row exists
+  @retval FALSE no complementing null row
+*/
+
+bool subselect_rowid_merge_engine::
+exists_complementing_null_row(MY_BITMAP *keys_to_complement)
+{
+  rownum_t highest_min_row= 0;
+  rownum_t lowest_max_row= UINT_MAX;
+  uint count_null_keys, i;
+  Ordered_key *cur_key;
+
+  if (!count_columns_with_nulls)
+  {
+    /*
+      If there are both NULLs and non-NUll values in the outer reference, and
+      the subquery contains no NULLs, a complementing NULL row cannot exist.
+    */
+    return FALSE;
+  }
+
+  for (i= (non_null_key ? 1 : 0), count_null_keys= 0; i < merge_keys_count; i++)
+  {
+    cur_key= merge_keys[i];
+    if (bitmap_is_set(keys_to_complement, cur_key->get_keyid()))
+      continue;
+    if (!cur_key->get_null_count())
+    {
+      /* If there is column without NULLs, there cannot be a partial match. */
+      return FALSE;
+    }
+    if (cur_key->get_min_null_row() > highest_min_row)
+      highest_min_row= cur_key->get_min_null_row();
+    if (cur_key->get_max_null_row() < lowest_max_row)
+      lowest_max_row= cur_key->get_max_null_row();
+    null_bitmaps[count_null_keys++]= cur_key->get_null_key();
+  }
+
+  if (lowest_max_row < highest_min_row)
+  {
+    /* The intersection of NULL rows is empty. */
+    return FALSE;
+  }
+
+  return bitmap_exists_intersection((const MY_BITMAP**) null_bitmaps,
+                                    count_null_keys,
+                                    (uint)highest_min_row, (uint)lowest_max_row);
+}
+
+
+/*
+  @retval TRUE  there is a partial match (UNKNOWN)
+  @retval FALSE  there is no match at all (FALSE)
+*/
+
+bool subselect_rowid_merge_engine::partial_match()
+{
+  Ordered_key *min_key; /* Key that contains the current minimum position. */
+  rownum_t min_row_num; /* Current row number of min_key. */
+  Ordered_key *cur_key;
+  rownum_t cur_row_num;
+  uint count_nulls_in_search_key= 0;
+  uint max_null_in_any_row=
+    ((select_materialize_with_stats *) result)->get_max_nulls_in_row();
+  bool res= FALSE;
+
+  /* If there is a non-NULL key, it must be the first key in the keys array. */
+  DBUG_ASSERT(!non_null_key || (non_null_key && merge_keys[0] == non_null_key));
+  /* The prioryty queue for keys must be empty. */
+  DBUG_ASSERT(!pq.elements);
+
+  /* All data accesses during execution are via handler::ha_rnd_pos() */
+  if (tmp_table->file->ha_rnd_init_with_error(0))
+  {
+    res= FALSE;
+    goto end;
+  }
+
+  /* Check if there is a match for the columns of the only non-NULL key. */
+  if (non_null_key && !non_null_key->lookup())
+  {
+    res= FALSE;
+    goto end;
+  }
+
+  /*
+    If all nullable columns contain only NULLs, then there is a guranteed
+    partial match, and we don't need to search for a matching row.
+  */
+  if (has_covering_null_columns)
+  {
+    res= TRUE;
+    goto end;
+  }
+
+  if (non_null_key)
+    queue_insert(&pq, (uchar *) non_null_key);
+  /*
+    Do not add the non_null_key, since it was already processed above.
+  */
+  bitmap_clear_all(&matching_outer_cols);
+  for (uint i= test(non_null_key); i < merge_keys_count; i++)
+  {
+    DBUG_ASSERT(merge_keys[i]->get_column_count() == 1);
+    if (merge_keys[i]->get_search_key(0)->null_value)
+    {
+      ++count_nulls_in_search_key;
+      bitmap_set_bit(&matching_outer_cols, merge_keys[i]->get_keyid());
+    }
+    else if (merge_keys[i]->lookup())
+      queue_insert(&pq, (uchar *) merge_keys[i]);
+  }
+
+  /*
+    If the outer reference consists of only NULLs, or if it has NULLs in all
+    nullable columns (above we guarantee there is a match for the non-null
+    coumns), the result is UNKNOWN.
+  */
+  if (count_nulls_in_search_key == merge_keys_count - test(non_null_key))
+  {
+    res= TRUE;
+    goto end;
+  }
+
+  /*
+    If the outer row has NULLs in some columns, and
+    there is no match for any of the remaining columns, and
+    there is a subquery row with NULLs in all unmatched columns,
+    then there is a partial match, otherwise the result is FALSE.
+  */
+  if (count_nulls_in_search_key && !pq.elements)
+  {
+    DBUG_ASSERT(!non_null_key);
+    /*
+      Check if the intersection of all NULL bitmaps of all keys that
+      are not in matching_outer_cols is non-empty.
+    */
+    res= exists_complementing_null_row(&matching_outer_cols);
+    goto end;
+  }
+
+  /*
+    If there is no NULL (sub)row that covers all NULL columns, and there is no
+    match for any of the NULL columns, the result is FALSE. Notice that if there
+    is a non-null key, and there is only one matching key, the non-null key is
+    the matching key. This is so, because this method returns FALSE if the
+    non-null key doesn't have a match.
+  */
+  if (!count_nulls_in_search_key &&
+      (!pq.elements ||
+       (pq.elements == 1 && non_null_key &&
+        max_null_in_any_row < merge_keys_count-1)))
+  {
+    if (!pq.elements)
+    {
+      DBUG_ASSERT(!non_null_key);
+      /*
+        The case of a covering null row is handled by
+        subselect_partial_match_engine::exec()
+      */
+      DBUG_ASSERT(max_null_in_any_row != tmp_table->s->fields);
+    }
+    res= FALSE;
+    goto end;
+  }
+
+  DBUG_ASSERT(pq.elements);
+
+  min_key= (Ordered_key*) queue_remove_top(&pq);
+  min_row_num= min_key->current();
+  bitmap_set_bit(&matching_keys, min_key->get_keyid());
+  bitmap_union(&matching_keys, &matching_outer_cols);
+  if (min_key->next_same())
+    queue_insert(&pq, (uchar *) min_key);
+
+  if (pq.elements == 0)
+  {
+    /*
+      Check the only matching row of the only key min_key for NULL matches
+      in the other columns.
+    */
+    res= test_null_row(min_row_num);
+    goto end;
+  }
+
+  while (TRUE)
+  {
+    cur_key= (Ordered_key*) queue_remove_top(&pq);
+    cur_row_num= cur_key->current();
+
+    if (cur_row_num == min_row_num)
+      bitmap_set_bit(&matching_keys, cur_key->get_keyid());
+    else
+    {
+      /* Follows from the correct use of priority queue. */
+      DBUG_ASSERT(cur_row_num > min_row_num);
+      if (test_null_row(min_row_num))
+      {
+        res= TRUE;
+        goto end;
+      }
+      else
+      {
+        min_key= cur_key;
+        min_row_num= cur_row_num;
+        bitmap_clear_all(&matching_keys);
+        bitmap_set_bit(&matching_keys, min_key->get_keyid());
+        bitmap_union(&matching_keys, &matching_outer_cols);
+      }
+    }
+
+    if (cur_key->next_same())
+      queue_insert(&pq, (uchar *) cur_key);
+
+    if (pq.elements == 0)
+    {
+      /* Check the last row of the last column in PQ for NULL matches. */
+      res= test_null_row(min_row_num);
+      goto end;
+    }
+  }
+
+  /* We should never get here - all branches must be handled explicitly above. */
+  DBUG_ASSERT(FALSE);
+
+end:
+  if (!has_covering_null_columns)
+    bitmap_clear_all(&matching_keys);
+  queue_remove_all(&pq);
+  tmp_table->file->ha_rnd_end();
+  return res;
+}
+
+
+subselect_table_scan_engine::subselect_table_scan_engine(
+  THD *thd_arg, subselect_uniquesubquery_engine *engine_arg,
+  TABLE *tmp_table_arg,
+  Item_subselect *item_arg,
+  select_result_interceptor *result_arg,
+  List<Item> *equi_join_conds_arg,
+  bool has_covering_null_row_arg,
+  bool has_covering_null_columns_arg,
+  uint count_columns_with_nulls_arg)
+  :subselect_partial_match_engine(thd_arg, engine_arg, tmp_table_arg, item_arg,
+                                  result_arg, equi_join_conds_arg,
+                                  has_covering_null_row_arg,
+                                  has_covering_null_columns_arg,
+                                  count_columns_with_nulls_arg)
+{}
+
+
+/*
+  TIMOUR:
+  This method is based on subselect_uniquesubquery_engine::scan_table().
+  Consider refactoring somehow, 80% of the code is the same.
+
+  for each row_i in tmp_table
+  {
+    count_matches= 0;
+    for each row element row_i[j]
+    {
+      if (outer_ref[j] is NULL || row_i[j] is NULL || outer_ref[j] == row_i[j])
+        ++count_matches;
+    }
+    if (count_matches == outer_ref.elements)
+      return TRUE
+  }
+  return FALSE
+*/
+
+bool subselect_table_scan_engine::partial_match()
+{
+  List_iterator_fast<Item> equality_it(*equi_join_conds);
+  Item *cur_eq;
+  uint count_matches;
+  int error;
+  bool res;
+
+  if (tmp_table->file->ha_rnd_init_with_error(1))
+  {
+    res= FALSE;
+    goto end;
+  }
+
+  tmp_table->file->extra_opt(HA_EXTRA_CACHE,
+                             current_thd->variables.read_buff_size);
+  for (;;)
+  {
+    error= tmp_table->file->ha_rnd_next(tmp_table->record[0]);
+    if (error) {
+      if (error == HA_ERR_RECORD_DELETED)
+      {
+        error= 0;
+        continue;
+      }
+      if (error == HA_ERR_END_OF_FILE)
+      {
+        error= 0;
+        break;
+      }
+      else
+      {
+        error= report_error(tmp_table, error);
+        break;
+      }
+    }
+
+    equality_it.rewind();
+    count_matches= 0;
+    while ((cur_eq= equality_it++))
+    {
+      DBUG_ASSERT(cur_eq->type() == Item::FUNC_ITEM &&
+                  ((Item_func*)cur_eq)->functype() == Item_func::EQ_FUNC);
+      if (!cur_eq->val_int() && !cur_eq->null_value)
+        break;
+      ++count_matches;
+    }
+    if (count_matches == tmp_table->s->fields)
+    {
+      res= TRUE; /* Found a matching row. */
+      goto end;
+    }
+  }
+
+  res= FALSE;
+end:
+  tmp_table->file->ha_rnd_end();
+  return res;
+}
+
+
+void subselect_table_scan_engine::cleanup()
+{
+}
+
diff --git a/sql/item_subselect.h b/sql/item_subselect.h
index 9842641788b..79044ccf3f7 100644
--- a/sql/item_subselect.h
+++ b/sql/item_subselect.h
@@ -24,24 +24,21 @@
 class st_select_lex;
 class st_select_lex_unit;
 class JOIN;
-class select_subselect;
+class select_result_interceptor;
 class subselect_engine;
+class subselect_hash_sj_engine;
 class Item_bool_func2;
+class Cached_item;
 
 /* base class for subselects */
 
 class Item_subselect :public Item_result_field
 {
-  bool value_assigned; 		/* value already assigned to subselect */
+  bool value_assigned;   /* value already assigned to subselect */
+  bool own_engine;  /* the engine was not taken from other Item_subselect */
 protected:
   /* thread handler, will be assigned in fix_fields only */
   THD *thd;
-  /* substitution instead of subselect in case of optimization */
-  Item *substitution;
-  /* unit of subquery */
-  st_select_lex_unit *unit;
-  /* engine that perform execution of subselect (single select or union) */
-  subselect_engine *engine;
   /* old engine if engine was changed */
   subselect_engine *old_engine;
   /* cache of used external tables */
@@ -55,13 +52,62 @@ protected:
   /* cache of constant state */
   bool const_item_cache;
   
+  bool inside_first_fix_fields;
+  bool done_first_fix_fields;
+  Item *expr_cache;
+  /*
+    Set to TRUE if at optimization or execution time we determine that this
+    item's value is a constant. We need this member because it is not possible
+    to substitute 'this' with a constant item.
+  */
+  bool forced_const;
+#ifndef DBUG_OFF
+  /* Count the number of times this subquery predicate has been executed. */
+  uint exec_counter;
+#endif
 public:
   /* 
-    References from inside the subquery to the select that this predicate is
-    in.  References to parent selects not included.
+    Used inside Item_subselect::fix_fields() according to this scenario:
+      > Item_subselect::fix_fields
+        > engine->prepare
+          > child_join->prepare
+            (Here we realize we need to do the rewrite and set
+             substitution= some new Item, eg. Item_in_optimizer )
+          < child_join->prepare
+        < engine->prepare
+        *ref= substitution;
+        substitution= NULL;
+      < Item_subselect::fix_fields
+  */
+  /* TODO make this protected member again. */
+  Item *substitution;
+  /* engine that perform execution of subselect (single select or union) */
+  /* TODO make this protected member again. */
+  subselect_engine *engine;
+  /* unit of subquery */
+  st_select_lex_unit *unit;
+  /* A reference from inside subquery predicate to somewhere outside of it */
+  class Ref_to_outside : public Sql_alloc
+  {
+  public:
+    st_select_lex *select; /* Select where the reference is pointing to */
+    /* 
+      What is being referred. This may be NULL when we're referring to an
+      aggregate function.
+    */ 
+    Item *item; 
+  };
+  /*
+    References from within this subquery to somewhere outside of it (i.e. to
+    parent select, grandparent select, etc)
+  */
+  List<Ref_to_outside> upper_refs;
+  st_select_lex *parent_select;
+
+  /*
+   TRUE<=>Table Elimination has made it redundant to evaluate this select
+          (and so it is not part of QEP, etc)
   */
-  List<Item> refers_to;
-  int in_fix_fields;
   bool eliminated;
   
   /* changed engine indicator */
@@ -72,21 +118,26 @@ public:
   /* TRUE <=> The underlying SELECT is correlated w.r.t some ancestor select */
   bool is_correlated; 
 
-  enum trans_res {RES_OK, RES_REDUCE, RES_ERROR};
   enum subs_type {UNKNOWN_SUBS, SINGLEROW_SUBS,
 		  EXISTS_SUBS, IN_SUBS, ALL_SUBS, ANY_SUBS};
 
   Item_subselect();
 
   virtual subs_type substype() { return UNKNOWN_SUBS; }
+  bool is_in_predicate()
+  {
+    return (substype() == Item_subselect::IN_SUBS ||
+            substype() == Item_subselect::ALL_SUBS ||
+            substype() == Item_subselect::ANY_SUBS);
+  }
 
   /*
-     We need this method, because some compilers do not allow 'this'
-     pointer in constructor initialization list, but we need pass pointer
-     to subselect Item class to select_subselect classes constructor.
+    We need this method, because some compilers do not allow 'this'
+    pointer in constructor initialization list, but we need to pass a pointer
+    to subselect Item class to select_result_interceptor's constructor.
   */
   virtual void init (st_select_lex *select_lex,
-		     select_subselect *result);
+		     select_result_interceptor *result);
 
   ~Item_subselect();
   void cleanup();
@@ -95,7 +146,7 @@ public:
     eliminated= FALSE;
     null_value= 1;
   }
-  virtual trans_res select_transformer(JOIN *join);
+  virtual bool select_transformer(JOIN *join);
   bool assigned() { return value_assigned; }
   void assigned(bool a) { value_assigned= a; }
   enum Type type() const;
@@ -105,13 +156,25 @@ public:
     return null_value;
   }
   bool fix_fields(THD *thd, Item **ref);
+  bool mark_as_dependent(THD *thd, st_select_lex *select, Item *item);
+  void fix_after_pullout(st_select_lex *new_parent, Item **ref);
+  void recalc_used_tables(st_select_lex *new_parent, bool after_pullout);
   virtual bool exec();
+  /*
+    If subquery optimization or execution determines that the subquery has
+    an empty result, mark the subquery predicate as a constant value.
+  */
+  void make_const()
+  { 
+    used_tables_cache= 0;
+    const_item_cache= 0;
+    forced_const= TRUE; 
+  }
   virtual void fix_length_and_dec();
   table_map used_tables() const;
   table_map not_null_tables() const { return 0; }
   bool const_item() const;
   inline table_map get_used_tables_cache() { return used_tables_cache; }
-  inline bool get_const_item_cache() { return const_item_cache; }
   Item *get_tmp_table_item(THD *thd);
   void update_used_tables();
   virtual void print(String *str, enum_query_type query_type);
@@ -129,6 +192,7 @@ public:
   */
   bool is_evaluated() const;
   bool is_uncacheable() const;
+  bool is_expensive() { return TRUE; }
 
   /*
     Used by max/min subquery to initialize value presence registration
@@ -138,11 +202,23 @@ public:
   enum_parsing_place place() { return parsing_place; }
   bool walk(Item_processor processor, bool walk_subquery, uchar *arg);
   bool mark_as_eliminated_processor(uchar *arg);
+  bool eliminate_subselect_processor(uchar *arg);
+  bool set_fake_select_as_master_processor(uchar *arg);
   bool enumerate_field_refs_processor(uchar *arg);
   bool check_vcol_func_processor(uchar *int_arg) 
   {
     return trace_unsupported_by_check_vcol_func_processor("subselect");
   }
+  /**
+    Callback to test if an IN predicate is expensive.
+
+    @notes
+    The return value affects the behavior of make_cond_for_table().
+
+    @retval TRUE  if the predicate is expensive
+    @retval FALSE otherwise
+  */
+  bool is_expensive_processor(uchar *arg) { return TRUE; }
 
   /**
     Get the SELECT_LEX structure associated with this Item.
@@ -150,8 +226,15 @@ public:
   */
   st_select_lex* get_select_lex();
   const char *func_name() const { DBUG_ASSERT(0); return "subselect"; }
+  virtual bool expr_cache_is_needed(THD *);
+  virtual void get_cache_parameters(List<Item> &parameters);
+  virtual bool is_subquery_processor (uchar *opt_arg) { return 1; }
+  bool limit_index_condition_pushdown_processor(uchar *opt_arg) 
+  {
+    return TRUE;
+   }
 
-  friend class select_subselect;
+  friend class select_result_interceptor;
   friend class Item_in_optimizer;
   friend bool Item_field::fix_fields(THD *, Item **);
   friend int  Item_field::fix_outer_field(THD *, Field **, Item **);
@@ -159,6 +242,8 @@ public:
   friend void mark_select_range_as_dependent(THD*,
                                              st_select_lex*, st_select_lex*,
                                              Field*, Item*, Item_ident*);
+  friend bool convert_join_subqueries_to_semijoins(JOIN *join);
+
 };
 
 /* single value subselect */
@@ -170,13 +255,14 @@ protected:
   Item_cache *value, **row;
 public:
   Item_singlerow_subselect(st_select_lex *select_lex);
-  Item_singlerow_subselect() :Item_subselect(), value(0), row (0) {}
+  Item_singlerow_subselect() :Item_subselect(), value(0), row (0)
+  {}
 
   void cleanup();
   subs_type substype() { return SINGLEROW_SUBS; }
 
   void reset();
-  trans_res select_transformer(JOIN *join);
+  bool select_transformer(JOIN *join);
   void store(uint i, Item* item);
   double val_real();
   longlong val_int ();
@@ -208,6 +294,8 @@ public:
   */
   st_select_lex* invalidate_and_restore_select_lex();
 
+  Item* expr_cache_insert_transformer(uchar *thd_arg);
+
   friend class select_singlerow_subselect;
 };
 
@@ -235,6 +323,8 @@ class Item_exists_subselect :public Item_subselect
 protected:
   bool value; /* value of this item (boolean: exists/not-exists) */
 
+  void init_length_and_dec();
+
 public:
   Item_exists_subselect(st_select_lex *select_lex);
   Item_exists_subselect(): Item_subselect() {}
@@ -255,20 +345,45 @@ public:
   void fix_length_and_dec();
   virtual void print(String *str, enum_query_type query_type);
 
+  Item* expr_cache_insert_transformer(uchar *thd_arg);
+
   friend class select_exists_subselect;
   friend class subselect_uniquesubquery_engine;
   friend class subselect_indexsubquery_engine;
 };
 
 
-/*
-  IN subselect: this represents "left_exr IN (SELECT ...)"
+TABLE_LIST * const NO_JOIN_NEST=(TABLE_LIST*)0x1;
 
+/*
+  Possible methods to execute an IN predicate. These are set by the optimizer
+  based on user-set optimizer switches, semantic analysis and cost comparison.
+*/
+#define SUBS_NOT_TRANSFORMED 0 /* No execution method was chosen for this IN. */
+/* The Final decision about the strategy is made. */
+#define SUBS_STRATEGY_CHOSEN 1
+#define SUBS_SEMI_JOIN 2       /* IN was converted to semi-join. */
+#define SUBS_IN_TO_EXISTS 4    /* IN was converted to correlated EXISTS. */
+#define SUBS_MATERIALIZATION 8 /* Execute IN via subquery materialization. */
+/* Partial matching substrategies of MATERIALIZATION. */
+#define SUBS_PARTIAL_MATCH_ROWID_MERGE 16
+#define SUBS_PARTIAL_MATCH_TABLE_SCAN 32
+/* ALL/ANY will be transformed with max/min optimization */
+/*   The subquery has not aggregates, transform it into a MAX/MIN query. */
+#define SUBS_MAXMIN_INJECTED 64
+/*   The subquery has aggregates, use a special max/min subselect engine. */
+#define SUBS_MAXMIN_ENGINE 128
+
+
+/**
+  Representation of IN subquery predicates of the form
+  "left_expr IN (SELECT ...)".
+
+  @details
   This class has: 
-   - (as a descendant of Item_subselect) a "subquery execution engine" which 
-      allows it to evaluate subqueries. (and this class participates in
-      execution by having was_null variable where part of execution result
-      is stored.
+   - A "subquery execution engine" (as a subclass of Item_subselect) that allows
+     it to evaluate subqueries. (and this class participates in execution by
+     having was_null variable where part of execution result is stored.
    - Transformation methods (todo: more on this).
 
   This class is not used directly, it is "wrapped" into Item_in_optimizer
@@ -278,20 +393,125 @@ public:
 class Item_in_subselect :public Item_exists_subselect
 {
 protected:
-  Item *left_expr;
+  /*
+    Cache of the left operand of the subquery predicate. Allocated in the
+    runtime memory root, for each execution, thus need not be freed.
+  */
+  List<Cached_item> *left_expr_cache;
+  bool first_execution;
+
   /*
     expr & optimizer used in subselect rewriting to store Item for
     all JOIN in UNION
   */
   Item *expr;
-  Item_in_optimizer *optimizer;
   bool was_null;
   bool abort_on_null;
-  bool transformed;
+  /* A bitmap of possible execution strategies for an IN predicate. */
+  uchar in_strategy;
 public:
+  Item_in_optimizer *optimizer;
+protected:
   /* Used to trigger on/off conditions that were pushed down to subselect */
   bool *pushed_cond_guards;
+  Comp_creator *func;
+
+protected:
+  bool init_cond_guards();
+  bool select_in_like_transformer(JOIN *join);
+  bool single_value_transformer(JOIN *join);
+  bool row_value_transformer(JOIN * join);
+  bool fix_having(Item *having, st_select_lex *select_lex);
+  bool create_single_in_to_exists_cond(JOIN * join,
+                                       Item **where_item,
+                                       Item **having_item);
+  bool create_row_in_to_exists_cond(JOIN * join,
+                                    Item **where_item,
+                                    Item **having_item);
+public:
+  Item *left_expr;
+  /* Priority of this predicate in the convert-to-semi-join-nest process. */
+  int sj_convert_priority;
+  /*
+    Used by subquery optimizations to keep track about in which clause this
+    subquery predicate is located: 
+      NO_JOIN_NEST      - the predicate is an AND-part of the WHERE
+      join nest pointer - the predicate is an AND-part of ON expression
+                          of a join nest   
+      NULL              - for all other locations
+  */
+  TABLE_LIST *emb_on_expr_nest;
+  /*
+    Types of left_expr and subquery's select list allow to perform subquery
+    materialization. Currently, we set this to FALSE when it as well could
+    be TRUE. This is to be properly addressed with fix for BUG#36752.
+  */
+  bool types_allow_materialization;
+
+  /* 
+    Same as above, but they also allow to scan the materialized table. 
+  */
+  bool sjm_scan_allowed;
+
+  /* 
+    JoinTaB Materialization (JTBM) members
+  */
+  
+  /* 
+    TRUE <=> This subselect has been converted into non-mergeable semi-join
+    table.
+  */
+  bool is_jtbm_merged;
+  
+  /* (Applicable if is_jtbm_merged==TRUE) Time required to run the materialized join */
+  double jtbm_read_time;
+
+  /* (Applicable if is_jtbm_merged==TRUE) Number of output rows in materialized join */
+  double jtbm_record_count;   
+  
+  /*
+    (Applicable if is_jtbm_merged==TRUE) TRUE <=> The materialized subselect is
+    a degenerate subselect which produces 0 or 1 rows, which we know at
+    optimization phase.
+    Examples:
+    1. subquery has "Impossible WHERE": 
 
+      SELECT * FROM ot WHERE ot.column IN (SELECT it.col FROM it WHERE 2 > 3)
+    
+    2. Subquery produces one row which opt_sum.cc is able to get with one lookup:
+
+      SELECT * FROM ot WHERE ot.column IN (SELECT MAX(it.key_col) FROM it)
+  */
+  bool is_jtbm_const_tab;
+  
+  /* 
+    (Applicable if is_jtbm_const_tab==TRUE) Whether the subquery has produced 
+     the row (or not)
+  */
+  bool jtbm_const_row_found;
+  
+  /*
+    TRUE<=>this is a flattenable semi-join, false overwise.
+  */
+  bool is_flattenable_semijoin;
+
+  /*
+    TRUE<=>registered in the list of semijoins in outer select
+  */
+  bool is_registered_semijoin;
+  
+  /*
+    Used to determine how this subselect item is represented in the item tree,
+    in case there is a need to locate it there and replace with something else.
+    Two options are possible:
+      1. This item is there 'as-is'.
+      1. This item is wrapped within Item_in_optimizer.
+  */
+  Item *original_item()
+  {
+    return is_flattenable_semijoin ? (Item*)this : (Item*)optimizer;
+  }
+  
   bool *get_cond_guard(int i)
   {
     return pushed_cond_guards ? pushed_cond_guards + i : NULL;
@@ -307,10 +527,13 @@ public:
 
   Item_in_subselect(Item * left_expr, st_select_lex *select_lex);
   Item_in_subselect()
-    :Item_exists_subselect(), optimizer(0), abort_on_null(0), transformed(0),
-     pushed_cond_guards(NULL), upper_item(0)
-  {}
-
+    :Item_exists_subselect(), left_expr_cache(0), first_execution(TRUE),
+     abort_on_null(0), in_strategy(SUBS_NOT_TRANSFORMED), optimizer(0),
+    pushed_cond_guards(NULL), func(NULL), emb_on_expr_nest(NULL), 
+    is_jtbm_merged(FALSE), is_jtbm_const_tab(FALSE),
+    upper_item(0)
+    {}
+  void cleanup();
   subs_type substype() { return IN_SUBS; }
   void reset() 
   {
@@ -319,10 +542,11 @@ public:
     null_value= 0;
     was_null= 0;
   }
-  trans_res select_transformer(JOIN *join);
-  trans_res select_in_like_transformer(JOIN *join, Comp_creator *func);
-  trans_res single_value_transformer(JOIN *join, Comp_creator *func);
-  trans_res row_value_transformer(JOIN * join);
+  bool select_transformer(JOIN *join);
+  bool create_in_to_exists_cond(JOIN *join_arg);
+  bool inject_in_to_exists_cond(JOIN *join_arg);
+
+  virtual bool exec();
   longlong val_int();
   double val_real();
   String *val_str(String*);
@@ -334,10 +558,91 @@ public:
   bool test_limit(st_select_lex_unit *unit);
   virtual void print(String *str, enum_query_type query_type);
   bool fix_fields(THD *thd, Item **ref);
+  void fix_length_and_dec();
+  void fix_after_pullout(st_select_lex *new_parent, Item **ref);
+  void update_used_tables();
+  bool setup_mat_engine();
+  bool init_left_expr_cache();
+  /* Inform 'this' that it was computed, and contains a valid result. */
+  void set_first_execution() { if (first_execution) first_execution= FALSE; }
+  bool expr_cache_is_needed(THD *thd);
+  
+  int optimize(double *out_rows, double *cost);
+  /* 
+    Return the identifier that we could use to identify the subquery for the
+    user.
+  */
+  int get_identifier();
+
+  void mark_as_condition_AND_part(TABLE_LIST *embedding)
+  {
+    emb_on_expr_nest= embedding;
+  }
+
+  bool test_strategy(uchar strategy)
+  { return test(in_strategy & strategy); }
+
+  /**
+    Test that the IN strategy was chosen for execution. This is so
+    when the CHOSEN flag is ON, and there is no other strategy.
+  */
+  bool test_set_strategy(uchar strategy)
+  {
+    DBUG_ASSERT(strategy == SUBS_SEMI_JOIN ||
+                strategy == SUBS_IN_TO_EXISTS ||
+                strategy == SUBS_MATERIALIZATION ||
+                strategy == SUBS_PARTIAL_MATCH_ROWID_MERGE ||
+                strategy == SUBS_PARTIAL_MATCH_TABLE_SCAN ||
+                strategy == SUBS_MAXMIN_INJECTED ||
+                strategy == SUBS_MAXMIN_ENGINE);
+    return ((in_strategy & SUBS_STRATEGY_CHOSEN) &&
+            (in_strategy & ~SUBS_STRATEGY_CHOSEN) == strategy);
+  }
+
+  bool is_set_strategy()
+  { return test(in_strategy & SUBS_STRATEGY_CHOSEN); }
+
+  bool has_strategy()
+  { return in_strategy != SUBS_NOT_TRANSFORMED; }
+
+  void add_strategy (uchar strategy)
+  {
+    DBUG_ASSERT(strategy != SUBS_NOT_TRANSFORMED);
+    DBUG_ASSERT(!(strategy & SUBS_STRATEGY_CHOSEN));
+    /*
+      TODO: PS re-execution breaks this condition, because
+      check_and_do_in_subquery_rewrites() is called for each reexecution
+      and re-adds the same strategies.
+      DBUG_ASSERT(!(in_strategy & SUBS_STRATEGY_CHOSEN));
+    */
+    in_strategy|= strategy;
+  }
+
+  void reset_strategy(uchar strategy)
+  {
+    DBUG_ASSERT(strategy != SUBS_NOT_TRANSFORMED);
+    in_strategy= strategy;
+  }
+
+  void set_strategy(uchar strategy)
+  {
+    /* Check that only one strategy is set for execution. */
+    DBUG_ASSERT(strategy == SUBS_SEMI_JOIN ||
+                strategy == SUBS_IN_TO_EXISTS ||
+                strategy == SUBS_MATERIALIZATION ||
+                strategy == SUBS_PARTIAL_MATCH_ROWID_MERGE ||
+                strategy == SUBS_PARTIAL_MATCH_TABLE_SCAN ||
+                strategy == SUBS_MAXMIN_INJECTED ||
+                strategy == SUBS_MAXMIN_ENGINE);
+    in_strategy= (SUBS_STRATEGY_CHOSEN | strategy);
+  }
 
   friend class Item_ref_null_helper;
   friend class Item_is_not_null_test;
+  friend class Item_in_optimizer;
   friend class subselect_indexsubquery_engine;
+  friend class subselect_hash_sj_engine;
+  friend class subselect_partial_match_engine;
 };
 
 
@@ -346,23 +651,26 @@ class Item_allany_subselect :public Item_in_subselect
 {
 public:
   chooser_compare_func_creator func_creator;
-  Comp_creator *func;
   bool all;
 
   Item_allany_subselect(Item * left_expr, chooser_compare_func_creator fc,
                         st_select_lex *select_lex, bool all);
 
+  void cleanup();
   // only ALL subquery has upper not
   subs_type substype() { return all?ALL_SUBS:ANY_SUBS; }
-  trans_res select_transformer(JOIN *join);
+  bool select_transformer(JOIN *join);
+  void create_comp_func(bool invert) { func= func_creator(invert); }
   virtual void print(String *str, enum_query_type query_type);
+  bool is_maxmin_applicable(JOIN *join);
+  bool transform_into_max_min(JOIN *join);
 };
 
 
 class subselect_engine: public Sql_alloc
 {
 protected:
-  select_subselect *result; /* results storage class */
+  select_result_interceptor *result; /* results storage class */
   THD *thd; /* pointer to current THD */
   Item_subselect *item; /* item, that use this engine */
   enum Item_result res_type; /* type of results */
@@ -370,14 +678,20 @@ protected:
   bool maybe_null; /* may be null (first item in select) */
 public:
 
-  subselect_engine(Item_subselect *si, select_subselect *res)
-    :thd(0)
+  enum enum_engine_type {ABSTRACT_ENGINE, SINGLE_SELECT_ENGINE,
+                         UNION_ENGINE, UNIQUESUBQUERY_ENGINE,
+                         INDEXSUBQUERY_ENGINE, HASH_SJ_ENGINE,
+                         ROWID_MERGE_ENGINE, TABLE_SCAN_ENGINE};
+
+  subselect_engine(THD *thd_arg, Item_subselect *si,
+                   select_result_interceptor *res)
   {
     result= res;
     item= si;
     res_type= STRING_RESULT;
     res_field_type= MYSQL_TYPE_VAR_STRING;
     maybe_null= 0;
+    set_thd(thd_arg);
   }
   virtual ~subselect_engine() {}; // to satisfy compiler
   virtual void cleanup()= 0;
@@ -419,13 +733,17 @@ public:
   virtual bool may_be_null() { return maybe_null; };
   virtual table_map upper_select_const_tables()= 0;
   static table_map calc_const_tables(TABLE_LIST *);
+  static table_map calc_const_tables(List<TABLE_LIST> &list);
   virtual void print(String *str, enum_query_type query_type)= 0;
-  virtual bool change_result(Item_subselect *si, select_subselect *result)= 0;
+  virtual bool change_result(Item_subselect *si,
+                             select_result_interceptor *result,
+                             bool temp= FALSE)= 0;
   virtual bool no_tables()= 0;
   virtual bool is_executed() const { return FALSE; }
   /* Check if subquery produced any rows during last query execution */
   virtual bool no_rows() = 0;
-
+  virtual enum_engine_type engine_type() { return ABSTRACT_ENGINE; }
+  virtual int get_identifier() { DBUG_ASSERT(0); return 0; }
 protected:
   void set_row(List<Item> &item_list, Item_cache **row);
 };
@@ -434,14 +752,13 @@ protected:
 class subselect_single_select_engine: public subselect_engine
 {
   bool prepared;       /* simple subselect is prepared */
-  bool optimized;      /* simple subselect is optimized */
   bool executed;       /* simple subselect is executed */
   bool optimize_error; /* simple subselect optimization failed */
   st_select_lex *select_lex; /* corresponding select_lex */
   JOIN * join; /* corresponding JOIN structure */
 public:
-  subselect_single_select_engine(st_select_lex *select,
-				 select_subselect *result,
+  subselect_single_select_engine(THD *thd_arg, st_select_lex *select,
+				 select_result_interceptor *result,
 				 Item_subselect *item);
   void cleanup();
   int prepare();
@@ -452,11 +769,21 @@ public:
   void exclude();
   table_map upper_select_const_tables();
   virtual void print (String *str, enum_query_type query_type);
-  bool change_result(Item_subselect *si, select_subselect *result);
+  bool change_result(Item_subselect *si,
+                     select_result_interceptor *result,
+                     bool temp);
   bool no_tables();
   bool may_be_null();
   bool is_executed() const { return executed; }
   bool no_rows();
+  virtual enum_engine_type engine_type() { return SINGLE_SELECT_ENGINE; }
+  int get_identifier();
+
+  friend class subselect_hash_sj_engine;
+  friend class Item_in_subselect;
+  friend bool setup_jtbm_semi_joins(JOIN *join, List<TABLE_LIST> *join_list,
+                                    Item **join_where);
+
 };
 
 
@@ -464,8 +791,8 @@ class subselect_union_engine: public subselect_engine
 {
   st_select_lex_unit *unit;  /* corresponding unit structure */
 public:
-  subselect_union_engine(st_select_lex_unit *u,
-			 select_subselect *result,
+  subselect_union_engine(THD *thd_arg, st_select_lex_unit *u,
+			 select_result_interceptor *result,
 			 Item_subselect *item);
   void cleanup();
   int prepare();
@@ -476,10 +803,13 @@ public:
   void exclude();
   table_map upper_select_const_tables();
   virtual void print (String *str, enum_query_type query_type);
-  bool change_result(Item_subselect *si, select_subselect *result);
+  bool change_result(Item_subselect *si,
+                     select_result_interceptor *result,
+                     bool temp= FALSE);
   bool no_tables();
   bool is_executed() const;
   bool no_rows();
+  virtual enum_engine_type engine_type() { return UNION_ENGINE; }
 };
 
 
@@ -519,25 +849,28 @@ public:
   // constructor can assign THD because it will be called after JOIN::prepare
   subselect_uniquesubquery_engine(THD *thd_arg, st_join_table *tab_arg,
 				  Item_subselect *subs, Item *where)
-    :subselect_engine(subs, 0), tab(tab_arg), cond(where)
-  {
-    set_thd(thd_arg);
-  }
+    :subselect_engine(thd_arg, subs, 0), tab(tab_arg), cond(where)
+  {}
   ~subselect_uniquesubquery_engine();
   void cleanup();
   int prepare();
   void fix_length_and_dec(Item_cache** row);
   int exec();
   uint cols() { return 1; }
-  uint8 uncacheable() { return UNCACHEABLE_DEPENDENT; }
+  uint8 uncacheable() { return UNCACHEABLE_DEPENDENT_INJECTED; }
   void exclude();
   table_map upper_select_const_tables() { return 0; }
   virtual void print (String *str, enum_query_type query_type);
-  bool change_result(Item_subselect *si, select_subselect *result);
+  bool change_result(Item_subselect *si,
+                     select_result_interceptor *result,
+                     bool temp= FALSE);
   bool no_tables();
+  int index_lookup(); /* TIMOUR: this method needs refactoring. */
   int scan_table();
   bool copy_ref_key();
+  int copy_ref_key_simple();  /* TIMOUR: this method needs refactoring. */
   bool no_rows() { return empty_result_set; }
+  virtual enum_engine_type engine_type() { return UNIQUESUBQUERY_ENGINE; }
 };
 
 
@@ -587,6 +920,7 @@ public:
   {}
   int exec();
   virtual void print (String *str, enum_query_type query_type);
+  virtual enum_engine_type engine_type() { return INDEXSUBQUERY_ENGINE; }
 };
 
 
@@ -595,9 +929,471 @@ inline bool Item_subselect::is_evaluated() const
   return engine->is_executed();
 }
 
+
 inline bool Item_subselect::is_uncacheable() const
 {
   return engine->uncacheable();
 }
 
 
+/**
+  Compute an IN predicate via a hash semi-join. This class is responsible for
+  the materialization of the subquery, and the selection of the correct and
+  optimal execution method (e.g. direct index lookup, or partial matching) for
+  the IN predicate.
+*/
+
+class subselect_hash_sj_engine : public subselect_engine
+{
+public:
+  /* The table into which the subquery is materialized. */
+  TABLE *tmp_table;
+  /* TRUE if the subquery was materialized into a temp table. */
+  bool is_materialized;
+  /*
+    The old engine already chosen at parse time and stored in permanent memory.
+    Through this member we can re-create and re-prepare materialize_join for
+    each execution of a prepared statement. We also reuse the functionality
+    of subselect_single_select_engine::[prepare | cols].
+  */
+  subselect_single_select_engine *materialize_engine;
+  /*
+    QEP to execute the subquery and materialize its result into a
+    temporary table. Created during the first call to exec().
+  */
+  JOIN *materialize_join;
+  /*
+    A conjunction of all the equality condtions between all pairs of expressions
+    that are arguments of an IN predicate. We need these to post-filter some
+    IN results because index lookups sometimes match values that are actually
+    not equal to the search key in SQL terms.
+  */
+  Item_cond_and *semi_join_conds;
+  Name_resolution_context *semi_join_conds_context;
+
+
+  subselect_hash_sj_engine(THD *thd, Item_subselect *in_predicate,
+                           subselect_single_select_engine *old_engine)
+    : subselect_engine(thd, in_predicate, NULL), 
+      tmp_table(NULL), is_materialized(FALSE), materialize_engine(old_engine),
+      materialize_join(NULL),  semi_join_conds(NULL), lookup_engine(NULL),
+      count_partial_match_columns(0), count_null_only_columns(0),
+      count_columns_with_nulls(0), strategy(UNDEFINED)
+  {}
+  ~subselect_hash_sj_engine();
+
+  bool init(List<Item> *tmp_columns, uint subquery_id);
+  void cleanup();
+  int prepare();
+  int exec();
+  virtual void print(String *str, enum_query_type query_type);
+  uint cols()
+  {
+    return materialize_engine->cols();
+  }
+  uint8 uncacheable() { return materialize_engine->uncacheable(); }
+  table_map upper_select_const_tables() { return 0; }
+  bool no_rows() { return !tmp_table->file->stats.records; }
+  virtual enum_engine_type engine_type() { return HASH_SJ_ENGINE; }
+  /*
+    TODO: factor out all these methods in a base subselect_index_engine class
+    because all of them have dummy implementations and should never be called.
+  */
+  void fix_length_and_dec(Item_cache** row);//=>base class
+  void exclude(); //=>base class
+  //=>base class
+  bool change_result(Item_subselect *si,
+                     select_result_interceptor *result,
+                     bool temp= FALSE);
+  bool no_tables();//=>base class
+
+protected:
+  /* The engine used to compute the IN predicate. */
+  subselect_engine *lookup_engine;
+  /* Keyparts of the only non-NULL composite index in a rowid merge. */
+  MY_BITMAP non_null_key_parts;
+  /* Keyparts of the single column indexes with NULL, one keypart per index. */
+  MY_BITMAP partial_match_key_parts;
+  uint count_partial_match_columns;
+  uint count_null_only_columns;
+  uint count_columns_with_nulls;
+  /* Possible execution strategies that can be used to compute hash semi-join.*/
+  enum exec_strategy {
+    UNDEFINED,
+    COMPLETE_MATCH, /* Use regular index lookups. */
+    PARTIAL_MATCH,  /* Use some partial matching strategy. */
+    PARTIAL_MATCH_MERGE, /* Use partial matching through index merging. */
+    PARTIAL_MATCH_SCAN,  /* Use partial matching through table scan. */
+    IMPOSSIBLE      /* Subquery materialization is not applicable. */
+  };
+  /* The chosen execution strategy. Computed after materialization. */
+  exec_strategy strategy;
+  exec_strategy get_strategy_using_schema();
+  exec_strategy get_strategy_using_data();
+  ulonglong rowid_merge_buff_size(bool has_non_null_key,
+                                  bool has_covering_null_row,
+                                  MY_BITMAP *partial_match_key_parts);
+  void choose_partial_match_strategy(bool has_non_null_key,
+                                     bool has_covering_null_row,
+                                     MY_BITMAP *partial_match_key_parts);
+  bool make_semi_join_conds();
+  subselect_uniquesubquery_engine* make_unique_engine();
+
+};
+
+
+/*
+  Distinguish the type of (0-based) row numbers from the type of the index into
+  an array of row numbers.
+*/
+typedef ha_rows rownum_t;
+
+
+/*
+  An Ordered_key is an in-memory table index that allows O(log(N)) time
+  lookups of a multi-part key.
+
+  If the index is over a single column, then this column may contain NULLs, and
+  the NULLs are stored and tested separately for NULL in O(1) via is_null().
+  Multi-part indexes assume that the indexed columns do not contain NULLs.
+
+  TODO:
+  = Due to the unnatural assymetry between single and multi-part indexes, it
+    makes sense to somehow refactor or extend the class.
+
+  = This class can be refactored into a base abstract interface, and two
+    subclasses:
+    - one to represent single-column indexes, and
+    - another to represent multi-column indexes.
+    Such separation would allow slightly more efficient implementation of
+    the single-column indexes.
+  = The current design requires such indexes to be fully recreated for each
+    PS (re)execution, however most of the comprising objects can be reused.
+*/
+
+class Ordered_key : public Sql_alloc
+{
+protected:
+  /*
+    Index of the key in an array of keys. This index allows to
+    construct (sub)sets of keys represented by bitmaps.
+  */
+  uint keyid;
+  /* The table being indexed. */
+  TABLE *tbl;
+  /* The columns being indexed. */
+  Item_field **key_columns;
+  /* Number of elements in 'key_columns' (number of key parts). */
+  uint key_column_count;
+  /*
+    An expression, or sequence of expressions that forms the search key.
+    The search key is a sequence when it is Item_row. Each element of the
+    sequence is accessible via Item::element_index(int i).
+  */
+  Item *search_key;
+
+/* Value index related members. */
+  /*
+    The actual value index, consists of a sorted sequence of row numbers.
+  */
+  rownum_t *key_buff;
+  /* Number of elements in key_buff. */
+  ha_rows key_buff_elements;
+  /* Current element in 'key_buff'. */
+  ha_rows cur_key_idx;
+  /*
+    Mapping from row numbers to row ids. The element row_num_to_rowid[i]
+    contains a buffer with the rowid for the row numbered 'i'.
+    The memory for this member is not maintanined by this class because
+    all Ordered_key indexes of the same table share the same mapping.
+  */
+  uchar *row_num_to_rowid;
+  /*
+    A sequence of predicates to compare the search key with the corresponding
+    columns of a table row from the index.
+  */
+  Item_func_lt **compare_pred;
+
+/* Null index related members. */
+  MY_BITMAP null_key;
+  /* Count of NULLs per column. */
+  ha_rows null_count;
+  /* The row number that contains the first NULL in a column. */
+  rownum_t min_null_row;
+  /* The row number that contains the last NULL in a column. */
+  rownum_t max_null_row;
+
+protected:
+  bool alloc_keys_buffers();
+  /*
+    Quick sort comparison function that compares two rows of the same table
+    indentfied with their row numbers.
+  */
+  int cmp_keys_by_row_data(rownum_t a, rownum_t b);
+  static int cmp_keys_by_row_data_and_rownum(Ordered_key *key,
+                                             rownum_t* a, rownum_t* b);
+
+  int cmp_key_with_search_key(rownum_t row_num);
+
+public:
+  Ordered_key(uint keyid_arg, TABLE *tbl_arg,
+              Item *search_key_arg, ha_rows null_count_arg,
+              ha_rows min_null_row_arg, ha_rows max_null_row_arg,
+              uchar *row_num_to_rowid_arg);
+  ~Ordered_key();
+  void cleanup();
+  /* Initialize a multi-column index. */
+  bool init(MY_BITMAP *columns_to_index);
+  /* Initialize a single-column index. */
+  bool init(int col_idx);
+
+  uint get_column_count() { return key_column_count; }
+  uint get_keyid() { return keyid; }
+  uint get_field_idx(uint i)
+  {
+    DBUG_ASSERT(i < key_column_count);
+    return key_columns[i]->field->field_index;
+  }
+  rownum_t get_min_null_row() { return min_null_row; }
+  rownum_t get_max_null_row() { return max_null_row; }
+  MY_BITMAP * get_null_key() { return &null_key; }
+  ha_rows get_null_count() { return null_count; }
+  /*
+    Get the search key element that corresponds to the i-th key part of this
+    index.
+  */
+  Item *get_search_key(uint i)
+  {
+    return search_key->element_index(key_columns[i]->field->field_index);
+  }
+  void add_key(rownum_t row_num)
+  {
+    /* The caller must know how many elements to add. */
+    DBUG_ASSERT(key_buff_elements && cur_key_idx < key_buff_elements);
+    key_buff[cur_key_idx]= row_num;
+    ++cur_key_idx;
+  }
+
+  void sort_keys();
+  double null_selectivity();
+
+  /*
+    Position the current element at the first row that matches the key.
+    The key itself is propagated by evaluating the current value(s) of
+    this->search_key.
+  */
+  bool lookup();
+  /* Move the current index cursor to the first key. */
+  void first()
+  {
+    DBUG_ASSERT(key_buff_elements);
+    cur_key_idx= 0;
+  }
+  /* TODO */
+  bool next_same();
+  /* Move the current index cursor to the next key. */
+  bool next()
+  {
+    DBUG_ASSERT(key_buff_elements);
+    if (cur_key_idx < key_buff_elements - 1)
+    {
+      ++cur_key_idx;
+      return TRUE;
+    }
+    return FALSE;
+  };
+  /* Return the current index element. */
+  rownum_t current()
+  {
+    DBUG_ASSERT(key_buff_elements && cur_key_idx < key_buff_elements);
+    return key_buff[cur_key_idx];
+  }
+
+  void set_null(rownum_t row_num)
+  {
+    bitmap_set_bit(&null_key, (uint)row_num);
+  }
+  bool is_null(rownum_t row_num)
+  {
+    /*
+      Indexes consisting of only NULLs do not have a bitmap buffer at all.
+      Their only initialized member is 'n_bits', which is equal to the number
+      of temp table rows.
+    */
+    if (null_count == tbl->file->stats.records)
+    {
+      DBUG_ASSERT(tbl->file->stats.records == null_key.n_bits);
+      return TRUE;
+    }
+    if (row_num > max_null_row || row_num < min_null_row)
+      return FALSE;
+    return bitmap_is_set(&null_key, (uint)row_num);
+  }
+  void print(String *str);
+};
+
+
+class subselect_partial_match_engine : public subselect_engine
+{
+protected:
+  /* The temporary table that contains a materialized subquery. */
+  TABLE *tmp_table;
+  /*
+    The engine used to check whether an IN predicate is TRUE or not. If not
+    TRUE, then subselect_rowid_merge_engine further distinguishes between
+    FALSE and UNKNOWN.
+  */
+  subselect_uniquesubquery_engine *lookup_engine;
+  /* A list of equalities between each pair of IN operands. */
+  List<Item> *equi_join_conds;
+  /*
+    True if there is an all NULL row in tmp_table. If so, then if there is
+    no complete match, there is a guaranteed partial match.
+  */
+  bool has_covering_null_row;
+
+  /*
+    True if all nullable columns of tmp_table consist of only NULL values.
+    If so, then if there is a match in the non-null columns, there is a
+    guaranteed partial match.
+  */
+  bool has_covering_null_columns;
+  uint count_columns_with_nulls;
+
+protected:
+  virtual bool partial_match()= 0;
+public:
+  subselect_partial_match_engine(THD *thd_arg,
+                                 subselect_uniquesubquery_engine *engine_arg,
+                                 TABLE *tmp_table_arg, Item_subselect *item_arg,
+                                 select_result_interceptor *result_arg,
+                                 List<Item> *equi_join_conds_arg,
+                                 bool has_covering_null_row_arg,
+                                 bool has_covering_null_columns_arg,
+                                 uint count_columns_with_nulls_arg);
+  int prepare() { return 0; }
+  int exec();
+  void fix_length_and_dec(Item_cache**) {}
+  uint cols() { /* TODO: what is the correct value? */ return 1; }
+  uint8 uncacheable() { return UNCACHEABLE_DEPENDENT; }
+  void exclude() {}
+  table_map upper_select_const_tables() { return 0; }
+  bool change_result(Item_subselect*,
+                     select_result_interceptor*,
+                     bool temp= FALSE)
+  { DBUG_ASSERT(FALSE); return false; }
+  bool no_tables() { return false; }
+  bool no_rows()
+  {
+    /*
+      TODO: It is completely unclear what is the semantics of this
+      method. The current result is computed so that the call to no_rows()
+      from Item_in_optimizer::val_int() sets Item_in_optimizer::null_value
+      correctly.
+    */
+    return !(((Item_in_subselect *) item)->null_value);
+  }
+  void print(String*, enum_query_type);
+
+  friend void subselect_hash_sj_engine::cleanup();
+};
+
+
+class subselect_rowid_merge_engine: public subselect_partial_match_engine
+{
+protected:
+  /*
+    Mapping from row numbers to row ids. The rowids are stored sequentially
+    in the array - rowid[i] is located in row_num_to_rowid + i * rowid_length.
+  */
+  uchar *row_num_to_rowid;
+  /*
+    A subset of all the keys for which there is a match for the same row.
+    Used during execution. Computed for each outer reference
+  */
+  MY_BITMAP matching_keys;
+  /*
+    The columns of the outer reference that are NULL. Computed for each
+    outer reference.
+  */
+  MY_BITMAP matching_outer_cols;
+  /*
+    Indexes of row numbers, sorted by <column_value, row_number>. If an
+    index may contain NULLs, the NULLs are stored efficiently in a bitmap.
+
+    The indexes are sorted by the selectivity of their NULL sub-indexes, the
+    one with the fewer NULLs is first. Thus, if there is any index on
+    non-NULL columns, it is contained in keys[0].
+  */
+  Ordered_key **merge_keys;
+  /* The number of elements in merge_keys. */
+  uint merge_keys_count;
+  /* The NULL bitmaps of merge keys.*/
+  MY_BITMAP   **null_bitmaps;
+  /*
+    An index on all non-NULL columns of 'tmp_table'. The index has the
+    logical form: <[v_i1 | ... | v_ik], rownum>. It allows to find the row
+    number where the columns c_i1,...,c1_k contain the values v_i1,...,v_ik.
+    If such an index exists, it is always the first element of 'merge_keys'.
+  */
+  Ordered_key *non_null_key;
+  /*
+    Priority queue of Ordered_key indexes, one per NULLable column.
+    This queue is used by the partial match algorithm in method exec().
+  */
+  QUEUE pq;
+protected:
+  /*
+    Comparison function to compare keys in order of decreasing bitmap
+    selectivity.
+  */
+  static int cmp_keys_by_null_selectivity(Ordered_key **k1, Ordered_key **k2);
+  /*
+    Comparison function used by the priority queue pq, the 'smaller' key
+    is the one with the smaller current row number.
+  */
+  static int cmp_keys_by_cur_rownum(void *arg, uchar *k1, uchar *k2);
+
+  bool test_null_row(rownum_t row_num);
+  bool exists_complementing_null_row(MY_BITMAP *keys_to_complement);
+  bool partial_match();
+public:
+  subselect_rowid_merge_engine(THD *thd_arg,
+                               subselect_uniquesubquery_engine *engine_arg,
+                               TABLE *tmp_table_arg, uint merge_keys_count_arg,
+                               bool has_covering_null_row_arg,
+                               bool has_covering_null_columns_arg,
+                               uint count_columns_with_nulls_arg,
+                               Item_subselect *item_arg,
+                               select_result_interceptor *result_arg,
+                               List<Item> *equi_join_conds_arg)
+    :subselect_partial_match_engine(thd_arg, engine_arg, tmp_table_arg,
+                                    item_arg, result_arg, equi_join_conds_arg,
+                                    has_covering_null_row_arg,
+                                    has_covering_null_columns_arg,
+                                    count_columns_with_nulls_arg),
+    merge_keys_count(merge_keys_count_arg), non_null_key(NULL)
+  {}
+  ~subselect_rowid_merge_engine();
+  bool init(MY_BITMAP *non_null_key_parts, MY_BITMAP *partial_match_key_parts);
+  void cleanup();
+  virtual enum_engine_type engine_type() { return ROWID_MERGE_ENGINE; }
+};
+
+
+class subselect_table_scan_engine: public subselect_partial_match_engine
+{
+protected:
+  bool partial_match();
+public:
+  subselect_table_scan_engine(THD *thd_arg,
+                              subselect_uniquesubquery_engine *engine_arg,
+                              TABLE *tmp_table_arg, Item_subselect *item_arg,
+                              select_result_interceptor *result_arg,
+                              List<Item> *equi_join_conds_arg,
+                              bool has_covering_null_row_arg,
+                              bool has_covering_null_columns_arg,
+                              uint count_columns_with_nulls_arg);
+  void cleanup();
+  virtual enum_engine_type engine_type() { return TABLE_SCAN_ENGINE; }
+};
diff --git a/sql/item_sum.cc b/sql/item_sum.cc
index ac6ddc0fd54..60ec27863bd 100644
--- a/sql/item_sum.cc
+++ b/sql/item_sum.cc
@@ -351,7 +351,19 @@ bool Item_sum::register_sum_func(THD *thd, Item **ref)
          sl= sl->master_unit()->outer_select() )
       sl->master_unit()->item->with_sum_func= 1;
   }
-  thd->lex->current_select->mark_as_dependent(aggr_sel, NULL);
+  thd->lex->current_select->mark_as_dependent(thd, aggr_sel, NULL);
+  return FALSE;
+}
+
+
+bool Item_sum::collect_outer_ref_processor(uchar *param)
+{
+  Collect_deps_prm *prm= (Collect_deps_prm *)param;
+  SELECT_LEX *ds;
+  if ((ds= depended_from()) &&
+      ds->nest_level_base == prm->nest_level_base &&
+      ds->nest_level < prm->nest_level)
+    prm->parameters->add_unique(this, &cmp_items);
   return FALSE;
 }
 
@@ -415,6 +427,7 @@ void Item_sum::mark_as_sum_func()
   cur_select->n_sum_items++;
   cur_select->with_sum_func= 1;
   with_sum_func= 1;
+  with_field= 0;
 }
 
 
@@ -481,7 +494,7 @@ bool Item_sum::walk (Item_processor processor, bool walk_subquery,
 Field *Item_sum::create_tmp_field(bool group, TABLE *table,
                                   uint convert_blob_length)
 {
-  Field *field;
+  Field *UNINIT_VAR(field);
   switch (result_type()) {
   case REAL_RESULT:
     field= new Field_double(max_length, maybe_null, name, decimals, TRUE);
@@ -501,7 +514,8 @@ Field *Item_sum::create_tmp_field(bool group, TABLE *table,
     field= Field_new_decimal::create_from_item(this);
     break;
   case ROW_RESULT:
-  default:
+  case TIME_RESULT:
+  case IMPOSSIBLE_RESULT:
     // This case should never be choosen
     DBUG_ASSERT(0);
     return 0;
@@ -615,7 +629,8 @@ Item_sum_hybrid::fix_fields(THD *thd, Item **ref)
     max_length= float_length(decimals);
     break;
   case ROW_RESULT:
-  default:
+  case TIME_RESULT:
+  case IMPOSSIBLE_RESULT:
     DBUG_ASSERT(0);
   };
   setup_hybrid(args[0], NULL);
@@ -650,17 +665,31 @@ Item_sum_hybrid::fix_fields(THD *thd, Item **ref)
     Setup cache/comparator of MIN/MAX functions. When called by the
     copy_or_same function value_arg parameter contains calculated value
     of the original MIN/MAX object and it is saved in this object's cache.
+
+    We mark the value and arg_cache with 'RAND_TABLE_BIT' to ensure
+    that Arg_comparator::compare_datetime() doesn't allocate new
+    item inside of Arg_comparator.  This would cause compare_datetime()
+    and Item_sum_min::add() to use different values!
 */
 
 void Item_sum_hybrid::setup_hybrid(Item *item, Item *value_arg)
 {
-  value= Item_cache::get_cache(item);
+  if (!(value= Item_cache::get_cache(item)))
+    return;
   value->setup(item);
   value->store(value_arg);
-  arg_cache= Item_cache::get_cache(item);
+  /* Don't cache value, as it will change */
+  if (!item->const_item())
+    value->set_used_tables(RAND_TABLE_BIT);
+  if (!(arg_cache= Item_cache::get_cache(item, item->cmp_type())))
+    return;
   arg_cache->setup(item);
+  /* Don't cache value, as it will change */
+  if (!item->const_item())
+    arg_cache->set_used_tables(RAND_TABLE_BIT);
   cmp= new Arg_comparator();
-  cmp->set_cmp_func(this, (Item**)&arg_cache, (Item**)&value, FALSE);
+  if (cmp)
+    cmp->set_cmp_func(this, (Item**)&arg_cache, (Item**)&value, FALSE);
   collation.set(item->collation);
 }
 
@@ -685,14 +714,17 @@ Field *Item_sum_hybrid::create_tmp_field(bool group, TABLE *table,
   */
   switch (args[0]->field_type()) {
   case MYSQL_TYPE_DATE:
-    field= new Field_newdate(maybe_null, name, collation.collation);
+    field= new Field_newdate(0, maybe_null ? (uchar*)"" : 0, 0, Field::NONE,
+                             name, collation.collation);
     break;
   case MYSQL_TYPE_TIME:
-    field= new Field_time(maybe_null, name, collation.collation);
+    field= new_Field_time(0, maybe_null ? (uchar*)"" : 0, 0, Field::NONE,
+                          name, decimals, collation.collation);
     break;
   case MYSQL_TYPE_TIMESTAMP:
   case MYSQL_TYPE_DATETIME:
-    field= new Field_datetime(maybe_null, name, collation.collation);
+    field= new_Field_datetime(0, maybe_null ? (uchar*)"" : 0, 0, Field::NONE,
+                              name, decimals, collation.collation);
     break;
   default:
     return Item_sum::create_tmp_field(group, table, convert_blob_length);
@@ -751,13 +783,14 @@ void Item_sum_sum::fix_length_and_dec()
   DBUG_ENTER("Item_sum_sum::fix_length_and_dec");
   maybe_null=null_value=1;
   decimals= args[0]->decimals;
-  switch (args[0]->result_type()) {
+  switch (args[0]->cast_to_int_type()) {
   case REAL_RESULT:
   case STRING_RESULT:
     hybrid_type= REAL_RESULT;
     sum= 0.0;
     break;
   case INT_RESULT:
+  case TIME_RESULT:
   case DECIMAL_RESULT:
   {
     /* SUM result can't be longer than length(arg) + length(MAX_ROWS) */
@@ -771,7 +804,7 @@ void Item_sum_sum::fix_length_and_dec()
     break;
   }
   case ROW_RESULT:
-  default:
+  case IMPOSSIBLE_RESULT:
     DBUG_ASSERT(0);
   }
   DBUG_PRINT("info", ("Type: %s (%d, %d)",
@@ -960,7 +993,8 @@ void Item_sum_distinct::fix_length_and_dec()
       table_field_type= MYSQL_TYPE_NEWDECIMAL;
     break;
   case ROW_RESULT:
-  default:
+  case TIME_RESULT:
+  case IMPOSSIBLE_RESULT:
     DBUG_ASSERT(0);
   }
   val.traits->fix_length_and_dec(this, args[0]);
@@ -1047,9 +1081,10 @@ bool Item_sum_distinct::unique_walk_function(void *element)
 void Item_sum_distinct::clear()
 {
   DBUG_ENTER("Item_sum_distinct::clear");
-  DBUG_ASSERT(tree != 0);                        /* we always have a tree */
+  /* During EXPLAIN there is no tree because it is created during execution. */
+  if (tree != 0)
+    tree->reset();
   null_value= 1;
-  tree->reset();
   is_evaluated= FALSE;
   DBUG_VOID_RETURN;
 }
@@ -1411,7 +1446,8 @@ void Item_sum_variance::fix_length_and_dec()
     break;
   }
   case ROW_RESULT:
-  default:
+  case TIME_RESULT:
+  case IMPOSSIBLE_RESULT:
     DBUG_ASSERT(0);
   }
   DBUG_PRINT("info", ("Type: REAL_RESULT (%d, %d)", max_length, (int)decimals));
@@ -1836,7 +1872,8 @@ void Item_sum_hybrid::reset_field()
     break;
   }
   case ROW_RESULT:
-  default:
+  case TIME_RESULT:
+  case IMPOSSIBLE_RESULT:
     DBUG_ASSERT(0);
   }
 }
@@ -2561,7 +2598,7 @@ bool Item_sum_count_distinct::add()
     */
     return tree->unique_add(table->record[0] + table->s->null_bytes);
   }
-  if ((error= table->file->ha_write_row(table->record[0])) &&
+  if ((error= table->file->ha_write_tmp_row(table->record[0])) &&
       table->file->is_fatal_error(error, HA_CHECK_DUP))
     return TRUE;
   return FALSE;
diff --git a/sql/item_sum.h b/sql/item_sum.h
index 3d4b00e3b52..c970dec5082 100644
--- a/sql/item_sum.h
+++ b/sql/item_sum.h
@@ -376,6 +376,7 @@ public:
   virtual Field *create_tmp_field(bool group, TABLE *table,
                                   uint convert_blob_length);
   bool walk(Item_processor processor, bool walk_subquery, uchar *argument);
+  virtual bool collect_outer_ref_processor(uchar *param);
   bool init_sum_func_check(THD *thd);
   bool check_sum_func(THD *thd, Item **ref);
   bool register_sum_func(THD *thd, Item **ref);
diff --git a/sql/item_timefunc.cc b/sql/item_timefunc.cc
index 0b5392cb0e5..1002cf9fea8 100644
--- a/sql/item_timefunc.cc
+++ b/sql/item_timefunc.cc
@@ -37,193 +37,6 @@
 /** Day number for Dec 31st, 9999. */
 #define MAX_DAY_NUMBER 3652424L
 
-/**
-  @todo
-  OPTIMIZATION
-  - Replace the switch with a function that should be called for each
-  date type.
-  - Remove sprintf and opencode the conversion, like we do in
-  Field_datetime.
-
-  The reason for this functions existence is that as we don't have a
-  way to know if a datetime/time value has microseconds in them
-  we are now only adding microseconds to the output if the
-  value has microseconds.
-
-  We can't use a standard make_date_time() for this as we don't know
-  if someone will use %f in the format specifier in which case we would get
-  the microseconds twice.
-*/
-
-static bool make_datetime(date_time_format_types format, MYSQL_TIME *ltime,
-			  String *str)
-{
-  char *buff;
-  CHARSET_INFO *cs= &my_charset_bin;
-  uint length= MAX_DATE_STRING_REP_LENGTH;
-
-  if (str->alloc(length))
-    return 1;
-  buff= (char*) str->ptr();
-
-  switch (format) {
-  case TIME_ONLY:
-    length= cs->cset->snprintf(cs, buff, length, "%s%02d:%02d:%02d",
-			       ltime->neg ? "-" : "",
-			       ltime->hour, ltime->minute, ltime->second);
-    break;
-  case TIME_MICROSECOND:
-    length= cs->cset->snprintf(cs, buff, length, "%s%02d:%02d:%02d.%06ld",
-			       ltime->neg ? "-" : "",
-			       ltime->hour, ltime->minute, ltime->second,
-			       ltime->second_part);
-    break;
-  case DATE_ONLY:
-    length= cs->cset->snprintf(cs, buff, length, "%04d-%02d-%02d",
-			       ltime->year, ltime->month, ltime->day);
-    break;
-  case DATE_TIME:
-    length= cs->cset->snprintf(cs, buff, length,
-			       "%04d-%02d-%02d %02d:%02d:%02d",
-			       ltime->year, ltime->month, ltime->day,
-			       ltime->hour, ltime->minute, ltime->second);
-    break;
-  case DATE_TIME_MICROSECOND:
-    length= cs->cset->snprintf(cs, buff, length,
-			       "%04d-%02d-%02d %02d:%02d:%02d.%06ld",
-			       ltime->year, ltime->month, ltime->day,
-			       ltime->hour, ltime->minute, ltime->second,
-			       ltime->second_part);
-    break;
-  }
-
-  str->length(length);
-  str->set_charset(cs);
-  return 0;
-}
-
-
-/*
-  Wrapper over make_datetime() with validation of the input MYSQL_TIME value
-
-  NOTE
-    see make_datetime() for more information
-
-  RETURN
-    1    if there was an error during converion
-    0    otherwise
-*/
-
-static bool make_datetime_with_warn(date_time_format_types format, MYSQL_TIME *ltime,
-                                    String *str)
-{
-  int warning= 0;
-
-  if (make_datetime(format, ltime, str))
-    return 1;
-  if (check_time_range(ltime, &warning))
-    return 1;
-  if (!warning)
-    return 0;
-
-  make_truncated_value_warning(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN,
-                               str->ptr(), str->length(),
-                               MYSQL_TIMESTAMP_TIME, NullS);
-  return make_datetime(format, ltime, str);
-}
-
-
-/*
-  Wrapper over make_time() with validation of the input MYSQL_TIME value
-
-  NOTE
-    see make_time() for more info
-
-  RETURN
-    1    if there was an error during conversion
-    0    otherwise
-*/
-
-static bool make_time_with_warn(const DATE_TIME_FORMAT *format,
-                                MYSQL_TIME *l_time, String *str)
-{
-  int warning= 0;
-  make_time(format, l_time, str);
-  if (check_time_range(l_time, &warning))
-    return 1;
-  if (warning)
-  {
-    make_truncated_value_warning(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN,
-                                 str->ptr(), str->length(),
-                                 MYSQL_TIMESTAMP_TIME, NullS);
-    make_time(format, l_time, str);
-  }
-
-  return 0;
-}
-
-
-/*
-  Convert seconds to MYSQL_TIME value with overflow checking
-
-  SYNOPSIS:
-    sec_to_time()
-    seconds          number of seconds
-    unsigned_flag    1, if 'seconds' is unsigned, 0, otherwise
-    ltime            output MYSQL_TIME value
-
-  DESCRIPTION
-    If the 'seconds' argument is inside MYSQL_TIME data range, convert it to a
-    corresponding value.
-    Otherwise, truncate the resulting value to the nearest endpoint, and
-    produce a warning message.
-
-  RETURN
-    1                if the value was truncated during conversion
-    0                otherwise
-*/
-  
-static bool sec_to_time(longlong seconds, bool unsigned_flag, MYSQL_TIME *ltime)
-{
-  uint sec;
-
-  bzero((char *)ltime, sizeof(*ltime));
-  
-  if (seconds < 0)
-  {
-    if (unsigned_flag)
-      goto overflow;
-    ltime->neg= 1;
-    if (seconds < -3020399)
-      goto overflow;
-    seconds= -seconds;
-  }
-  else if (seconds > 3020399)
-    goto overflow;
-  
-  sec= (uint) ((ulonglong) seconds % 3600);
-  ltime->hour= (uint) (seconds/3600);
-  ltime->minute= sec/60;
-  ltime->second= sec % 60;
-
-  return 0;
-
-overflow:
-  ltime->hour= TIME_MAX_HOUR;
-  ltime->minute= TIME_MAX_MINUTE;
-  ltime->second= TIME_MAX_SECOND;
-
-  char buf[22];
-  int len= (int)(longlong10_to_str(seconds, buf, unsigned_flag ? 10 : -10)
-                 - buf);
-  make_truncated_value_warning(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN,
-                               buf, len, MYSQL_TIMESTAMP_TIME,
-                               NullS);
-  
-  return 1;
-}
-
-
 /*
   Date formats corresponding to compound %r and %T conversion specifiers
 
@@ -272,7 +85,8 @@ static bool extract_date_time(DATE_TIME_FORMAT *format,
 			      const char *val, uint length, MYSQL_TIME *l_time,
                               timestamp_type cached_timestamp_type,
                               const char **sub_pattern_end,
-                              const char *date_time_type)
+                              const char *date_time_type,
+                              uint fuzzy_date)
 {
   int weekday= 0, yearday= 0, daypart= 0;
   int week_number= -1;
@@ -293,6 +107,8 @@ static bool extract_date_time(DATE_TIME_FORMAT *format,
   if (!sub_pattern_end)
     bzero((char*) l_time, sizeof(*l_time));
 
+  l_time->time_type= cached_timestamp_type;
+
   for (; ptr != end && val != val_end; ptr++)
   {
     /* Skip pre-space between each argument */
@@ -466,7 +282,7 @@ static bool extract_date_time(DATE_TIME_FORMAT *format,
         */
         if (extract_date_time(&time_ampm_format, val,
                               (uint)(val_end - val), l_time,
-                              cached_timestamp_type, &val, "time"))
+                              cached_timestamp_type, &val, "time", fuzzy_date))
           DBUG_RETURN(1);
         break;
 
@@ -474,7 +290,7 @@ static bool extract_date_time(DATE_TIME_FORMAT *format,
       case 'T':
         if (extract_date_time(&time_24hrs_format, val,
                               (uint)(val_end - val), l_time,
-                              cached_timestamp_type, &val, "time"))
+                              cached_timestamp_type, &val, "time", fuzzy_date))
           DBUG_RETURN(1);
         break;
 
@@ -581,6 +397,10 @@ static bool extract_date_time(DATE_TIME_FORMAT *format,
       l_time->minute > 59 || l_time->second > 59)
     goto err;
 
+  if ((fuzzy_date & TIME_NO_ZERO_DATE) &&
+       (l_time->year == 0 || l_time->month == 0 || l_time->day == 0))
+    goto err;
+
   if (val != val_end)
   {
     do
@@ -1027,7 +847,7 @@ longlong Item_func_dayofyear::val_int()
 {
   DBUG_ASSERT(fixed == 1);
   MYSQL_TIME ltime;
-  if (get_arg0_date(&ltime,TIME_NO_ZERO_DATE))
+  if (get_arg0_date(&ltime, TIME_NO_ZERO_IN_DATE | TIME_NO_ZERO_DATE))
     return 0;
   return (longlong) calc_daynr(ltime.year,ltime.month,ltime.day) -
     calc_daynr(ltime.year,1,1) + 1;
@@ -1296,47 +1116,83 @@ longlong Item_func_year::val_int_endpoint(bool left_endp, bool *incl_endp)
 }
 
 
-longlong Item_func_unix_timestamp::val_int()
+bool Item_func_unix_timestamp::get_timestamp_value(my_time_t *seconds,
+                                                   ulong *second_part)
 {
-  MYSQL_TIME ltime;
-  my_bool not_used;
-  
   DBUG_ASSERT(fixed == 1);
-  if (arg_count == 0)
-    return (longlong) current_thd->query_start();
   if (args[0]->type() == FIELD_ITEM)
   {						// Optimize timestamp field
     Field *field=((Item_field*) args[0])->field;
     if (field->type() == MYSQL_TYPE_TIMESTAMP)
-      return ((Field_timestamp*) field)->get_timestamp(&null_value);
+    {
+      if ((null_value= field->is_null()))
+        return 1;
+      *seconds= ((Field_timestamp*)field)->get_timestamp(second_part);
+      return 0;
+    }
   }
-  
+
+  MYSQL_TIME ltime;
   if (get_arg0_date(&ltime, 0))
-  {
-    /*
-      We have to set null_value again because get_arg0_date will also set it
-      to true if we have wrong datetime parameter (and we should return 0 in 
-      this case).
-    */
-    null_value= args[0]->null_value;
-    return 0;
-  }
+    return 1;
+
+  uint error_code;
+  *seconds= TIME_to_timestamp(current_thd, &ltime, &error_code);
+  *second_part= ltime.second_part;
+  return (null_value= (error_code == ER_WARN_DATA_OUT_OF_RANGE));
+}
+
+
+longlong Item_func_unix_timestamp::int_op()
+{
+  if (arg_count == 0)
+    return (longlong) current_thd->query_start();
   
-  return (longlong) TIME_to_timestamp(current_thd, &ltime, &not_used);
+  ulong second_part;
+  my_time_t seconds;
+  if (get_timestamp_value(&seconds, &second_part))
+    return 0;
+
+  return seconds;
 }
 
 
-longlong Item_func_time_to_sec::val_int()
+my_decimal *Item_func_unix_timestamp::decimal_op(my_decimal* buf)
+{
+  ulong second_part;
+  my_time_t seconds;
+  if (get_timestamp_value(&seconds, &second_part))
+    return 0;
+
+  return seconds2my_decimal(seconds < 0, seconds < 0 ? -seconds : seconds,
+                            second_part, buf);
+}
+
+
+longlong Item_func_time_to_sec::int_op()
 {
   DBUG_ASSERT(fixed == 1);
   MYSQL_TIME ltime;
-  longlong seconds;
-  (void) get_arg0_time(&ltime);
-  seconds=ltime.hour*3600L+ltime.minute*60+ltime.second;
+  if (get_arg0_time(&ltime))
+    return 0;
+
+  longlong seconds=ltime.hour*3600L+ltime.minute*60+ltime.second;
   return ltime.neg ? -seconds : seconds;
 }
 
 
+my_decimal *Item_func_time_to_sec::decimal_op(my_decimal* buf)
+{
+  DBUG_ASSERT(fixed == 1);
+  MYSQL_TIME ltime;
+  if (get_arg0_time(&ltime))
+    return 0;
+
+  longlong seconds= ltime.hour*3600L+ltime.minute*60+ltime.second;
+  return seconds2my_decimal(ltime.neg, seconds, ltime.second_part, buf);
+}
+
+
 /**
   Convert a string to a interval value.
 
@@ -1495,29 +1351,31 @@ bool get_interval_value(Item *args,interval_type int_type,
 }
 
 
-String *Item_date::val_str(String *str)
+String *Item_temporal_func::val_str(String *str)
+{
+  DBUG_ASSERT(fixed == 1);
+  return val_string_from_date(str);
+}
+
+
+longlong Item_temporal_func::val_int()
 {
   DBUG_ASSERT(fixed == 1);
   MYSQL_TIME ltime;
   if (get_date(&ltime, TIME_FUZZY_DATE))
-    return (String *) 0;
-  if (str->alloc(MAX_DATE_STRING_REP_LENGTH))
-  {
-    null_value= 1;
-    return (String *) 0;
-  }
-  make_date((DATE_TIME_FORMAT *) 0, &ltime, str);
-  return str;
+    return 0;
+  longlong v= TIME_to_ulonglong(&ltime);
+  return ltime.neg ? -v : v;
 }
 
 
-longlong Item_date::val_int()
+double Item_temporal_func::val_real()
 {
   DBUG_ASSERT(fixed == 1);
   MYSQL_TIME ltime;
   if (get_date(&ltime, TIME_FUZZY_DATE))
     return 0;
-  return (longlong) (ltime.year*10000L+ltime.month*100+ltime.day);
+  return TIME_to_double(&ltime);
 }
 
 
@@ -1529,8 +1387,7 @@ bool Item_func_from_days::get_date(MYSQL_TIME *ltime, uint fuzzy_date)
   bzero(ltime, sizeof(MYSQL_TIME));
   get_date_from_daynr((long) value, &ltime->year, &ltime->month, &ltime->day);
 
-  if ((null_value= (fuzzy_date & TIME_NO_ZERO_DATE) &&
-       (ltime->year == 0 || ltime->month == 0 || ltime->day == 0)))
+  if ((null_value= ((fuzzy_date & TIME_NO_ZERO_DATE) && ltime->year == 0)))
     return TRUE;
 
   ltime->time_type= MYSQL_TIMESTAMP_DATE;
@@ -1540,28 +1397,13 @@ bool Item_func_from_days::get_date(MYSQL_TIME *ltime, uint fuzzy_date)
 
 void Item_func_curdate::fix_length_and_dec()
 {
-  collation.set(&my_charset_bin);
-  decimals=0; 
-  max_length=MAX_DATE_WIDTH*MY_CHARSET_BIN_MB_MAXLEN;
-
   store_now_in_TIME(&ltime);
   
   /* We don't need to set second_part and neg because they already 0 */
   ltime.hour= ltime.minute= ltime.second= 0;
   ltime.time_type= MYSQL_TIMESTAMP_DATE;
-  value= (longlong) TIME_to_ulonglong_date(&ltime);
-}
-
-String *Item_func_curdate::val_str(String *str)
-{
-  DBUG_ASSERT(fixed == 1);
-  if (str->alloc(MAX_DATE_STRING_REP_LENGTH))
-  {
-    null_value= 1;
-    return (String *) 0;
-  }
-  make_date((DATE_TIME_FORMAT *) 0, &ltime, str);
-  return str;
+  Item_datefunc::fix_length_and_dec();
+  maybe_null= false;
 }
 
 /**
@@ -1571,8 +1413,7 @@ String *Item_func_curdate::val_str(String *str)
 void Item_func_curdate_local::store_now_in_TIME(MYSQL_TIME *now_time)
 {
   THD *thd= current_thd;
-  thd->variables.time_zone->gmt_sec_to_TIME(now_time, 
-                                             (my_time_t)thd->query_start());
+  thd->variables.time_zone->gmt_sec_to_TIME(now_time, thd->query_start());
   thd->time_zone_used= 1;
 }
 
@@ -1583,8 +1424,8 @@ void Item_func_curdate_local::store_now_in_TIME(MYSQL_TIME *now_time)
 */
 void Item_func_curdate_utc::store_now_in_TIME(MYSQL_TIME *now_time)
 {
-  my_tz_UTC->gmt_sec_to_TIME(now_time, 
-                             (my_time_t)(current_thd->query_start()));
+  THD *thd= current_thd;
+  my_tz_UTC->gmt_sec_to_TIME(now_time, thd->query_start());
   /* 
     We are not flagging this query as using time zone, since it uses fixed
     UTC-SYSTEM time-zone.
@@ -1600,26 +1441,35 @@ bool Item_func_curdate::get_date(MYSQL_TIME *res,
 }
 
 
-String *Item_func_curtime::val_str(String *str)
+bool Item_func_curtime::fix_fields(THD *thd, Item **items)
 {
-  DBUG_ASSERT(fixed == 1);
-  str_value.set(buff, buff_length, &my_charset_bin);
-  return &str_value;
+  if (decimals > TIME_SECOND_PART_DIGITS)
+  {
+    my_error(ER_TOO_BIG_PRECISION, MYF(0), decimals, func_name(),
+             TIME_SECOND_PART_DIGITS);
+    return 1;
+  }
+  return Item_timefunc::fix_fields(thd, items);
 }
 
-
-void Item_func_curtime::fix_length_and_dec()
+bool Item_func_curtime::get_date(MYSQL_TIME *res,
+                                 uint fuzzy_date __attribute__((unused)))
 {
-  MYSQL_TIME ltime;
-
-  decimals= DATETIME_DEC;
-  collation.set(&my_charset_bin);
-  store_now_in_TIME(&ltime);
-  value= TIME_to_ulonglong_time(&ltime);
-  buff_length= (uint) my_time_to_str(&ltime, buff);
-  max_length= buff_length;
+  *res= ltime;
+  return 0;
 }
 
+static void set_sec_part(ulong sec_part, MYSQL_TIME *ltime, Item *item)
+{
+  DBUG_ASSERT(item->decimals == AUTO_SEC_PART_DIGITS ||
+              item->decimals <= TIME_SECOND_PART_DIGITS);
+  if (item->decimals)
+  {
+    ltime->second_part= sec_part;
+    if (item->decimals < TIME_SECOND_PART_DIGITS)
+      ltime->second_part= sec_part_truncate(ltime->second_part, item->decimals);
+  }
+}
 
 /**
     Converts current time in my_time_t to MYSQL_TIME represenatation for local
@@ -1628,8 +1478,10 @@ void Item_func_curtime::fix_length_and_dec()
 void Item_func_curtime_local::store_now_in_TIME(MYSQL_TIME *now_time)
 {
   THD *thd= current_thd;
-  thd->variables.time_zone->gmt_sec_to_TIME(now_time, 
-                                             (my_time_t)thd->query_start());
+  thd->variables.time_zone->gmt_sec_to_TIME(now_time, thd->query_start());
+  now_time->year= now_time->month= now_time->day= 0;
+  now_time->time_type= MYSQL_TIMESTAMP_TIME;
+  set_sec_part(thd->query_start_sec_part(), now_time, this);
   thd->time_zone_used= 1;
 }
 
@@ -1640,36 +1492,28 @@ void Item_func_curtime_local::store_now_in_TIME(MYSQL_TIME *now_time)
 */
 void Item_func_curtime_utc::store_now_in_TIME(MYSQL_TIME *now_time)
 {
-  my_tz_UTC->gmt_sec_to_TIME(now_time, 
-                             (my_time_t)(current_thd->query_start()));
+  THD *thd= current_thd;
+  my_tz_UTC->gmt_sec_to_TIME(now_time, thd->query_start());
+  now_time->year= now_time->month= now_time->day= 0;
+  now_time->time_type= MYSQL_TIMESTAMP_TIME;
+  set_sec_part(thd->query_start_sec_part(), now_time, this);
   /* 
     We are not flagging this query as using time zone, since it uses fixed
     UTC-SYSTEM time-zone.
   */
 }
 
-
-String *Item_func_now::val_str(String *str)
-{
-  DBUG_ASSERT(fixed == 1);
-  str_value.set(buff,buff_length, &my_charset_bin);
-  return &str_value;
-}
-
-
-void Item_func_now::fix_length_and_dec()
+bool Item_func_now::fix_fields(THD *thd, Item **items)
 {
-  decimals= DATETIME_DEC;
-  collation.set(&my_charset_bin);
-
-  store_now_in_TIME(&ltime);
-  value= (longlong) TIME_to_ulonglong_datetime(&ltime);
-
-  buff_length= (uint) my_datetime_to_str(&ltime, buff);
-  max_length= buff_length;
+  if (decimals > TIME_SECOND_PART_DIGITS)
+  {
+    my_error(ER_TOO_BIG_PRECISION, MYF(0), decimals, func_name(),
+             TIME_SECOND_PART_DIGITS);
+    return 1;
+  }
+  return Item_temporal_func::fix_fields(thd, items);
 }
 
-
 /**
     Converts current time in my_time_t to MYSQL_TIME represenatation for local
     time zone. Defines time zone (local) used for whole NOW function.
@@ -1677,8 +1521,8 @@ void Item_func_now::fix_length_and_dec()
 void Item_func_now_local::store_now_in_TIME(MYSQL_TIME *now_time)
 {
   THD *thd= current_thd;
-  thd->variables.time_zone->gmt_sec_to_TIME(now_time, 
-                                             (my_time_t)thd->query_start());
+  thd->variables.time_zone->gmt_sec_to_TIME(now_time, thd->query_start());
+  set_sec_part(thd->query_start_sec_part(), now_time, this);
   thd->time_zone_used= 1;
 }
 
@@ -1689,8 +1533,9 @@ void Item_func_now_local::store_now_in_TIME(MYSQL_TIME *now_time)
 */
 void Item_func_now_utc::store_now_in_TIME(MYSQL_TIME *now_time)
 {
-  my_tz_UTC->gmt_sec_to_TIME(now_time, 
-                             (my_time_t)(current_thd->query_start()));
+  THD *thd= current_thd;
+  my_tz_UTC->gmt_sec_to_TIME(now_time, thd->query_start());
+  set_sec_part(thd->query_start_sec_part(), now_time, this);
   /* 
     We are not flagging this query as using time zone, since it uses fixed
     UTC-SYSTEM time-zone.
@@ -1706,13 +1551,6 @@ bool Item_func_now::get_date(MYSQL_TIME *res,
 }
 
 
-int Item_func_now::save_in_field(Field *to, bool no_conversions)
-{
-  to->set_notnull();
-  return to->store_time(&ltime, MYSQL_TIMESTAMP_DATETIME);
-}
-
-
 /**
     Converts current time in my_time_t to MYSQL_TIME represenatation for local
     time zone. Defines time zone (local) used for whole SYSDATE function.
@@ -1720,98 +1558,61 @@ int Item_func_now::save_in_field(Field *to, bool no_conversions)
 void Item_func_sysdate_local::store_now_in_TIME(MYSQL_TIME *now_time)
 {
   THD *thd= current_thd;
-  thd->variables.time_zone->gmt_sec_to_TIME(now_time, (my_time_t) my_time(0));
+  my_hrtime_t now= my_hrtime();
+  thd->variables.time_zone->gmt_sec_to_TIME(now_time, hrtime_to_my_time(now));
+  set_sec_part(hrtime_sec_part(now), now_time, this);
   thd->time_zone_used= 1;
 }
 
 
-String *Item_func_sysdate_local::val_str(String *str)
-{
-  DBUG_ASSERT(fixed == 1);
-  store_now_in_TIME(&ltime);
-  buff_length= (uint) my_datetime_to_str(&ltime, buff);
-  str_value.set(buff, buff_length, &my_charset_bin);
-  return &str_value;
-}
-
-
-longlong Item_func_sysdate_local::val_int()
-{
-  DBUG_ASSERT(fixed == 1);
-  store_now_in_TIME(&ltime);
-  return (longlong) TIME_to_ulonglong_datetime(&ltime);
-}
-
-
-double Item_func_sysdate_local::val_real()
-{
-  DBUG_ASSERT(fixed == 1);
-  store_now_in_TIME(&ltime);
-  return ulonglong2double(TIME_to_ulonglong_datetime(&ltime));
-}
-
-
-void Item_func_sysdate_local::fix_length_and_dec()
-{
-  decimals= 0;
-  collation.set(&my_charset_bin);
-  max_length= MAX_DATETIME_WIDTH*MY_CHARSET_BIN_MB_MAXLEN;
-}
-
-
 bool Item_func_sysdate_local::get_date(MYSQL_TIME *res,
                                        uint fuzzy_date __attribute__((unused)))
 {
-  store_now_in_TIME(&ltime);
-  *res= ltime;
+  store_now_in_TIME(res);
   return 0;
 }
 
-
-int Item_func_sysdate_local::save_in_field(Field *to, bool no_conversions)
-{
-  store_now_in_TIME(&ltime);
-  to->set_notnull();
-  to->store_time(&ltime, MYSQL_TIMESTAMP_DATETIME);
-  return 0;
-}
-
-
-String *Item_func_sec_to_time::val_str(String *str)
+bool Item_func_sec_to_time::get_date(MYSQL_TIME *ltime, uint fuzzy_date)
 {
   DBUG_ASSERT(fixed == 1);
-  MYSQL_TIME ltime;
-  longlong arg_val= args[0]->val_int(); 
+  bool sign;
+  ulonglong sec;
+  ulong sec_part;
 
-  if ((null_value=args[0]->null_value) ||
-      str->alloc(MAX_DATE_STRING_REP_LENGTH))
-  {
-    null_value= 1;
-    return (String*) 0;
-  }
+  bzero((char *)ltime, sizeof(*ltime));
+  ltime->time_type= MYSQL_TIMESTAMP_TIME;
 
-  sec_to_time(arg_val, args[0]->unsigned_flag, &ltime);
-  
-  make_time((DATE_TIME_FORMAT *) 0, &ltime, str);
-  return str;
-}
+  sign= args[0]->get_seconds(&sec, &sec_part);
 
+  if ((null_value= args[0]->null_value))
+    return 1;
 
-longlong Item_func_sec_to_time::val_int()
-{
-  DBUG_ASSERT(fixed == 1);
-  MYSQL_TIME ltime;
-  longlong arg_val= args[0]->val_int(); 
+  ltime->neg= sign;
+  if (sec > TIME_MAX_VALUE_SECONDS)
+    goto overflow;
+
+  DBUG_ASSERT(sec_part <= TIME_MAX_SECOND_PART);
   
-  if ((null_value=args[0]->null_value))
-    return 0;
+  ltime->hour=   (uint) (sec/3600);
+  ltime->minute= (uint) (sec % 3600) /60;
+  ltime->second= (uint) sec % 60;
+  ltime->second_part= sec_part;
 
-  sec_to_time(arg_val, args[0]->unsigned_flag, &ltime);
+  return 0;
 
-  return (ltime.neg ? -1 : 1) *
-    ((ltime.hour)*10000 + ltime.minute*100 + ltime.second);
-}
+overflow:
+  /* use check_time_range() to set ltime to the max value depending on dec */
+  int unused;
+  char buf[100];
+  String tmp(buf, sizeof(buf), &my_charset_bin), *err= args[0]->val_str(&tmp);
 
+  ltime->hour= TIME_MAX_HOUR+1;
+  check_time_range(ltime, decimals, &unused);
+  make_truncated_value_warning(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN,
+                               err->ptr(), err->length(),
+                               MYSQL_TIMESTAMP_TIME, NullS);
+  return 0;
+}
 
 void Item_func_date_format::fix_length_and_dec()
 {
@@ -1948,24 +1749,12 @@ String *Item_func_date_format::val_str(String *str)
   String *format;
   MYSQL_TIME l_time;
   uint size;
+  int is_time_flag = is_time_format ? TIME_TIME_ONLY : 0;
   DBUG_ASSERT(fixed == 1);
-
-  if (!is_time_format)
-  {
-    if (get_arg0_date(&l_time, TIME_FUZZY_DATE))
-      return 0;
-  }
-  else
-  {
-    String *res;
-    if (!(res=args[0]->val_str(str)) ||
-	(str_to_time_with_warn(res->ptr(), res->length(), &l_time)))
-      goto null_date;
-
-    l_time.year=l_time.month=l_time.day=0;
-    null_value=0;
-  }
-
+  
+  if (get_arg0_date(&l_time, TIME_FUZZY_DATE | is_time_flag))
+    return 0;
+  
   if (!(format = args[1]->val_str(str)) || !format->length())
     goto null_date;
 
@@ -2003,100 +1792,39 @@ null_date:
 void Item_func_from_unixtime::fix_length_and_dec()
 { 
   thd= current_thd;
-  collation.set(&my_charset_bin);
-  decimals= DATETIME_DEC;
-  max_length=MAX_DATETIME_WIDTH*MY_CHARSET_BIN_MB_MAXLEN;
-  maybe_null= 1;
   thd->time_zone_used= 1;
+  decimals= args[0]->decimals;
+  Item_temporal_func::fix_length_and_dec();
 }
 
 
-String *Item_func_from_unixtime::val_str(String *str)
-{
-  MYSQL_TIME time_tmp;
-
-  DBUG_ASSERT(fixed == 1);
-
-  if (get_date(&time_tmp, 0))
-    return 0;
-
-  if (str->alloc(MAX_DATE_STRING_REP_LENGTH))
-  {
-    null_value= 1;
-    return 0;
-  }
-
-  make_datetime((DATE_TIME_FORMAT *) 0, &time_tmp, str);
-
-  return str;
-}
-
-
-longlong Item_func_from_unixtime::val_int()
-{
-  MYSQL_TIME time_tmp;
-
-  DBUG_ASSERT(fixed == 1);
-
-  if (get_date(&time_tmp, 0))
-    return 0;
-
-  return (longlong) TIME_to_ulonglong_datetime(&time_tmp);
-}
-
 bool Item_func_from_unixtime::get_date(MYSQL_TIME *ltime,
 				       uint fuzzy_date __attribute__((unused)))
 {
-  ulonglong tmp= (ulonglong)(args[0]->val_int());
-  /*
-    "tmp > TIMESTAMP_MAX_VALUE" check also covers case of negative
-    from_unixtime() argument since tmp is unsigned.
-  */
-  if ((null_value= (args[0]->null_value || tmp > TIMESTAMP_MAX_VALUE)))
-    return 1;
-
-  thd->variables.time_zone->gmt_sec_to_TIME(ltime, (my_time_t)tmp);
-
-  return 0;
-}
-
-
-void Item_func_convert_tz::fix_length_and_dec()
-{
-  collation.set(&my_charset_bin);
-  decimals= 0;
-  max_length= MAX_DATETIME_WIDTH*MY_CHARSET_BIN_MB_MAXLEN;
-  maybe_null= 1;
-}
+  bool sign;
+  ulonglong sec;
+  ulong sec_part;
 
+  bzero((char *)ltime, sizeof(*ltime));
+  ltime->time_type= MYSQL_TIMESTAMP_TIME;
 
-String *Item_func_convert_tz::val_str(String *str)
-{
-  MYSQL_TIME time_tmp;
+  sign= args[0]->get_seconds(&sec, &sec_part);
 
-  if (get_date(&time_tmp, 0))
-    return 0;
+  if (args[0]->null_value || sign || sec > TIMESTAMP_MAX_VALUE)
+    return (null_value= 1);
 
-  if (str->alloc(MAX_DATE_STRING_REP_LENGTH))
-  {
-    null_value= 1;
-    return 0;
-  }
+  thd->variables.time_zone->gmt_sec_to_TIME(ltime, (my_time_t)sec);
 
-  make_datetime((DATE_TIME_FORMAT *) 0, &time_tmp, str);
+  ltime->second_part= sec_part;
 
-  return str;
+  return (null_value= 0);
 }
 
 
-longlong Item_func_convert_tz::val_int()
+void Item_func_convert_tz::fix_length_and_dec()
 {
-  MYSQL_TIME time_tmp;
-
-  if (get_date(&time_tmp, 0))
-    return 0;
-  
-  return (longlong)TIME_to_ulonglong_datetime(&time_tmp);
+  decimals= args[0]->decimals;
+  Item_temporal_func::fix_length_and_dec();
 }
 
 
@@ -2119,18 +1847,22 @@ bool Item_func_convert_tz::get_date(MYSQL_TIME *ltime,
     to_tz_cached= args[2]->const_item();
   }
 
-  if (from_tz==0 || to_tz==0 || get_arg0_date(ltime, TIME_NO_ZERO_DATE))
+  if (from_tz==0 || to_tz==0 ||
+      get_arg0_date(ltime, TIME_NO_ZERO_DATE | TIME_NO_ZERO_IN_DATE))
   {
     null_value= 1;
     return 1;
   }
 
   {
-    my_bool not_used;
+    uint not_used;
     my_time_tmp= from_tz->TIME_to_gmt_sec(ltime, &not_used);
+    ulong sec_part= ltime->second_part;
     /* my_time_tmp is guranteed to be in the allowed range */
     if (my_time_tmp)
       to_tz->gmt_sec_to_TIME(ltime, my_time_tmp);
+    /* we rely on the fact that no timezone conversion can change sec_part */
+    ltime->second_part= sec_part;
   }
 
   null_value= 0;
@@ -2141,7 +1873,7 @@ bool Item_func_convert_tz::get_date(MYSQL_TIME *ltime,
 void Item_func_convert_tz::cleanup()
 {
   from_tz_cached= to_tz_cached= 0;
-  Item_date_func::cleanup();
+  Item_temporal_func::cleanup();
 }
 
 
@@ -2149,21 +1881,20 @@ void Item_date_add_interval::fix_length_and_dec()
 {
   enum_field_types arg0_field_type;
 
-  collation.set(&my_charset_bin);
-  maybe_null=1;
-  max_length=MAX_DATETIME_FULL_WIDTH*MY_CHARSET_BIN_MB_MAXLEN;
-  value.alloc(max_length);
-
   /*
     The field type for the result of an Item_date function is defined as
     follows:
 
     - If first arg is a MYSQL_TYPE_DATETIME result is MYSQL_TYPE_DATETIME
     - If first arg is a MYSQL_TYPE_DATE and the interval type uses hours,
-      minutes or seconds then type is MYSQL_TYPE_DATETIME.
+      minutes or seconds then type is MYSQL_TYPE_DATETIME
+      otherwise it's MYSQL_TYPE_DATE
+    - if first arg is a MYSQL_TYPE_TIME and the interval type isn't using
+      anything larger than days, then the result is MYSQL_TYPE_TIME,
+      otherwise - MYSQL_TYPE_DATETIME.
     - Otherwise the result is MYSQL_TYPE_STRING
-      (This is because you can't know if the string contains a DATE, MYSQL_TIME or
-      DATETIME argument)
+      (This is because you can't know if the string contains a DATE,
+      MYSQL_TIME or DATETIME argument)
   */
   cached_field_type= MYSQL_TYPE_STRING;
   arg0_field_type= args[0]->field_type();
@@ -2177,6 +1908,20 @@ void Item_date_add_interval::fix_length_and_dec()
     else
       cached_field_type= MYSQL_TYPE_DATETIME;
   }
+  else if (arg0_field_type == MYSQL_TYPE_TIME)
+  {
+    if (int_type >= INTERVAL_DAY && int_type != INTERVAL_YEAR_MONTH)
+      cached_field_type= arg0_field_type;
+    else
+      cached_field_type= MYSQL_TYPE_DATETIME;
+  }
+  if (int_type == INTERVAL_MICROSECOND || int_type >= INTERVAL_DAY_MICROSECOND)
+    decimals= 6;
+  else
+    decimals= args[0]->decimals;
+
+  Item_temporal_func::fix_length_and_dec();
+  value.alloc(max_length);
 }
 
 
@@ -2186,7 +1931,7 @@ bool Item_date_add_interval::get_date(MYSQL_TIME *ltime, uint fuzzy_date)
 {
   INTERVAL interval;
 
-  if (args[0]->get_date(ltime, TIME_NO_ZERO_DATE) ||
+  if (args[0]->get_date(ltime, TIME_NO_ZERO_DATE | TIME_FUZZY_DATE) ||
       get_interval_value(args[1], int_type, &value, &interval))
     return (null_value=1);
 
@@ -2199,44 +1944,6 @@ bool Item_date_add_interval::get_date(MYSQL_TIME *ltime, uint fuzzy_date)
 }
 
 
-String *Item_date_add_interval::val_str(String *str)
-{
-  DBUG_ASSERT(fixed == 1);
-  MYSQL_TIME ltime;
-  enum date_time_format_types format;
-
-  if (Item_date_add_interval::get_date(&ltime, TIME_NO_ZERO_DATE))
-    return 0;
-
-  if (ltime.time_type == MYSQL_TIMESTAMP_DATE)
-    format= DATE_ONLY;
-  else if (ltime.second_part)
-    format= DATE_TIME_MICROSECOND;
-  else
-    format= DATE_TIME;
-
-  if (!make_datetime(format, &ltime, str))
-    return str;
-
-  null_value=1;
-  return 0;
-}
-
-
-longlong Item_date_add_interval::val_int()
-{
-  DBUG_ASSERT(fixed == 1);
-  MYSQL_TIME ltime;
-  longlong date;
-  if (Item_date_add_interval::get_date(&ltime, TIME_NO_ZERO_DATE))
-    return (longlong) 0;
-  date = (ltime.year*100L + ltime.month)*100L + ltime.day;
-  return ltime.time_type == MYSQL_TIMESTAMP_DATE ? date :
-    ((date*100L + ltime.hour)*100L+ ltime.minute)*100L + ltime.second;
-}
-
-
-
 bool Item_date_add_interval::eq(const Item *item, bool binary_cmp) const
 {
   Item_date_add_interval *other= (Item_date_add_interval*) item;
@@ -2318,25 +2025,12 @@ longlong Item_extract::val_int()
   uint year;
   ulong week_format;
   long neg;
-  if (date_value)
-  {
-    if (get_arg0_date(&ltime, TIME_FUZZY_DATE))
-      return 0;
-    neg=1;
-  }
-  else
-  {
-    char buf[40];
-    String value(buf, sizeof(buf), &my_charset_bin);;
-    String *res= args[0]->val_str(&value);
-    if (!res || str_to_time_with_warn(res->ptr(), res->length(), &ltime))
-    {
-      null_value=1;
-      return 0;
-    }
-    neg= ltime.neg ? -1 : 1;
-    null_value=0;
-  }
+  int is_time_flag = date_value ? 0 : TIME_TIME_ONLY;
+
+  if (get_arg0_date(&ltime, TIME_FUZZY_DATE | is_time_flag))
+    return 0;
+  neg= ltime.neg ? -1 : 1;
+
   switch (int_type) {
   case INTERVAL_YEAR:		return ltime.year;
   case INTERVAL_YEAR_MONTH:	return ltime.year*100L+ltime.month;
@@ -2419,12 +2113,19 @@ bool Item_char_typecast::eq(const Item *item, bool binary_cmp) const
   return 1;
 }
 
-void Item_typecast::print(String *str, enum_query_type query_type)
+void Item_temporal_typecast::print(String *str, enum_query_type query_type)
 {
+  char buf[32];
   str->append(STRING_WITH_LEN("cast("));
   args[0]->print(str, query_type);
   str->append(STRING_WITH_LEN(" as "));
   str->append(cast_type());
+  if (decimals)
+  {
+    str->append('(');
+    str->append(llstr(decimals, buf));
+    str->append(')');
+  }
   str->append(')');
 }
 
@@ -2434,13 +2135,13 @@ void Item_char_typecast::print(String *str, enum_query_type query_type)
   str->append(STRING_WITH_LEN("cast("));
   args[0]->print(str, query_type);
   str->append(STRING_WITH_LEN(" as char"));
-  if (cast_length >= 0)
+  if (cast_length != ~0U)
   {
     str->append('(');
     char buffer[20];
     // my_charset_bin is good enough for numbers
     String st(buffer, sizeof(buffer), &my_charset_bin);
-    st.set((ulonglong)cast_length, &my_charset_bin);
+    st.set(static_cast<ulonglong>(cast_length), &my_charset_bin);
     str->append(st);
     str->append(')');
   }
@@ -2458,19 +2159,6 @@ String *Item_char_typecast::val_str(String *str)
   String *res;
   uint32 length;
 
-  if (cast_length >= 0 &&
-      ((unsigned) cast_length) > current_thd->variables.max_allowed_packet)
-  {
-    push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN,
-			ER_WARN_ALLOWED_PACKET_OVERFLOWED,
-			ER(ER_WARN_ALLOWED_PACKET_OVERFLOWED),
-			cast_cs == &my_charset_bin ?
-                        "cast_as_binary" : func_name(),
-                        current_thd->variables.max_allowed_packet);
-    null_value= 1;
-    return 0;
-  }
-
   if (!charset_conversion)
   {
     if (!(res= args[0]->val_str(str)))
@@ -2481,10 +2169,15 @@ String *Item_char_typecast::val_str(String *str)
   }
   else
   {
-    // Convert character set if differ
+    /*
+      Convert character set if differ
+      from_cs is 0 in the case where the result set may vary between calls,
+      for example with dynamic columns.
+    */
     uint dummy_errors;
     if (!(res= args[0]->val_str(str)) ||
-        tmp_value.copy(res->ptr(), res->length(), from_cs,
+        tmp_value.copy(res->ptr(), res->length(),
+                       from_cs ? from_cs  : res->charset(),
                        cast_cs, &dummy_errors))
     {
       null_value= 1;
@@ -2500,7 +2193,7 @@ String *Item_char_typecast::val_str(String *str)
     and the result is longer than cast length, e.g.
     CAST('string' AS CHAR(1))
   */
-  if (cast_length >= 0)
+  if (cast_length != ~0U)
   {
     if (res->length() > (length= (uint32) res->charpos(cast_length)))
     {                                           // Safe even if const arg
@@ -2520,20 +2213,31 @@ String *Item_char_typecast::val_str(String *str)
                           res->c_ptr_safe());
       res->length((uint) length);
     }
-    else if (cast_cs == &my_charset_bin && res->length() < (uint) cast_length)
+    else if (cast_cs == &my_charset_bin && res->length() < cast_length)
     {
-      if (res->alloced_length() < (uint) cast_length)
+      if (res->alloced_length() < cast_length)
       {
         str_value.alloc(cast_length);
         str_value.copy(*res);
         res= &str_value;
       }
-      bzero((char*) res->ptr() + res->length(),
-            (uint) cast_length - res->length());
+      bzero((char*) res->ptr() + res->length(), cast_length - res->length());
       res->length(cast_length);
     }
   }
   null_value= 0;
+
+  if (res->length() > current_thd->variables.max_allowed_packet)
+  {
+    push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN,
+			ER_WARN_ALLOWED_PACKET_OVERFLOWED,
+			ER(ER_WARN_ALLOWED_PACKET_OVERFLOWED),
+			cast_cs == &my_charset_bin ?
+                        "cast_as_binary" : func_name(),
+                        current_thd->variables.max_allowed_packet);
+    null_value= 1;
+    return 0;
+  }
   return res;
 }
 
@@ -2562,193 +2266,116 @@ void Item_char_typecast::fix_length_and_dec()
        and thus avoid unnecessary character set conversion.
      - If the argument is not a number, then from_cs is set to
        the argument's charset.
+     - If argument has a dynamic collation (can change from call to call)
+       we set from_cs to 0 as a marker that we have to take the collation
+       from the result string.
 
        Note (TODO): we could use repertoire technique here.
   */
-  from_cs= (args[0]->result_type() == INT_RESULT || 
-            args[0]->result_type() == DECIMAL_RESULT ||
-            args[0]->result_type() == REAL_RESULT) ?
-           (cast_cs->mbminlen == 1 ? cast_cs : &my_charset_latin1) :
-           args[0]->collation.collation;
-  charset_conversion= (cast_cs->mbmaxlen > 1) ||
+  from_cs= ((args[0]->result_type() == INT_RESULT || 
+             args[0]->result_type() == DECIMAL_RESULT ||
+             args[0]->result_type() == REAL_RESULT) ?
+            (cast_cs->mbminlen == 1 ? cast_cs : &my_charset_latin1) :
+            args[0]->dynamic_result() ? 0 :
+            args[0]->collation.collation);
+  charset_conversion= !from_cs || (cast_cs->mbmaxlen > 1) ||
                       (!my_charset_same(from_cs, cast_cs) &&
                        from_cs != &my_charset_bin &&
                        cast_cs != &my_charset_bin);
   collation.set(cast_cs, DERIVATION_IMPLICIT);
-  char_length= (cast_length >= 0) ? cast_length :
+  char_length= ((cast_length != ~0U) ? cast_length :
                 args[0]->max_length /
-                (cast_cs == &my_charset_bin ? 1 : args[0]->collation.collation->mbmaxlen);
+                (cast_cs == &my_charset_bin ? 1 :
+                 args[0]->collation.collation->mbmaxlen));
   max_length= char_length * cast_cs->mbmaxlen;
 }
 
 
-String *Item_datetime_typecast::val_str(String *str)
+bool Item_time_typecast::get_date(MYSQL_TIME *ltime, uint fuzzy_date)
 {
-  DBUG_ASSERT(fixed == 1);
-  MYSQL_TIME ltime;
-
-  if (!get_arg0_date(&ltime, TIME_FUZZY_DATE) &&
-      !make_datetime(ltime.second_part ? DATE_TIME_MICROSECOND : DATE_TIME, 
-		     &ltime, str))
-    return str;
-
-  null_value=1;
-  return 0;
-}
-
-
-longlong Item_datetime_typecast::val_int()
-{
-  DBUG_ASSERT(fixed == 1);
-  MYSQL_TIME ltime;
-  if (get_arg0_date(&ltime,1))
-  {
-    null_value= 1;
-    return 0;
-  }
-
-  return TIME_to_ulonglong_datetime(&ltime);
-}
-
-
-bool Item_time_typecast::get_time(MYSQL_TIME *ltime)
-{
-  bool res= get_arg0_time(ltime);
+  if (get_arg0_time(ltime))
+    return 1;
+  if (decimals < TIME_SECOND_PART_DIGITS)
+    ltime->second_part= sec_part_truncate(ltime->second_part, decimals);
   /*
-    For MYSQL_TIMESTAMP_TIME value we can have non-zero day part,
+    MYSQL_TIMESTAMP_TIME value can have non-zero day part,
     which we should not lose.
   */
-  if (ltime->time_type == MYSQL_TIMESTAMP_DATETIME)
+  if (ltime->time_type != MYSQL_TIMESTAMP_TIME)
     ltime->year= ltime->month= ltime->day= 0;
   ltime->time_type= MYSQL_TIMESTAMP_TIME;
-  return res;
-}
-
-
-longlong Item_time_typecast::val_int()
-{
-  MYSQL_TIME ltime;
-  if (get_time(&ltime))
-  {
-    null_value= 1;
-    return 0;
-  }
-  return ltime.hour * 10000L + ltime.minute * 100 + ltime.second;
-}
-
-String *Item_time_typecast::val_str(String *str)
-{
-  DBUG_ASSERT(fixed == 1);
-  MYSQL_TIME ltime;
-
-  if (!get_arg0_time(&ltime) &&
-      !make_datetime(ltime.second_part ? TIME_MICROSECOND : TIME_ONLY,
-		     &ltime, str))
-    return str;
-
-  null_value=1;
   return 0;
 }
 
 
 bool Item_date_typecast::get_date(MYSQL_TIME *ltime, uint fuzzy_date)
 {
-  bool res= get_arg0_date(ltime, fuzzy_date);
+  if (get_arg0_date(ltime, fuzzy_date & ~TIME_TIME_ONLY))
+    return 1;
   ltime->hour= ltime->minute= ltime->second= ltime->second_part= 0;
   ltime->time_type= MYSQL_TIMESTAMP_DATE;
-  return res;
-}
-
-
-bool Item_date_typecast::get_time(MYSQL_TIME *ltime)
-{
-  bzero((char *)ltime, sizeof(MYSQL_TIME));
-  return args[0]->null_value;
-}
-
 
-String *Item_date_typecast::val_str(String *str)
-{
-  DBUG_ASSERT(fixed == 1);
-  MYSQL_TIME ltime;
-
-  if (!get_arg0_date(&ltime, TIME_FUZZY_DATE) &&
-      !str->alloc(MAX_DATE_STRING_REP_LENGTH))
+  int unused;
+  if (check_date(ltime, ltime->year || ltime->month || ltime->day,
+                 fuzzy_date, &unused))
   {
-    make_date((DATE_TIME_FORMAT *) 0, &ltime, str);
-    return str;
+    Lazy_string_time str(ltime);
+    make_truncated_value_warning(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN,
+                                 &str, MYSQL_TIMESTAMP_DATE, 0);
+    return (null_value= 1);
   }
-
-  null_value=1;
-  return 0;
+  return (null_value= 0);
 }
 
-longlong Item_date_typecast::val_int()
-{
-  DBUG_ASSERT(fixed == 1);
-  MYSQL_TIME ltime;
-  if ((null_value= args[0]->get_date(&ltime, TIME_FUZZY_DATE)))
-    return 0;
-  return (longlong) (ltime.year * 10000L + ltime.month * 100 + ltime.day);
-}
 
-/**
-  MAKEDATE(a,b) is a date function that creates a date value 
-  from a year and day value.
-
-  NOTES:
-    As arguments are integers, we can't know if the year is a 2 digit or 4 digit year.
-    In this case we treat all years < 100 as 2 digit years. Ie, this is not safe
-    for dates between 0000-01-01 and 0099-12-31
-*/
-
-String *Item_func_makedate::val_str(String *str)
+bool Item_datetime_typecast::get_date(MYSQL_TIME *ltime, uint fuzzy_date)
 {
-  DBUG_ASSERT(fixed == 1);
-  MYSQL_TIME l_time;
-  long daynr=  (long) args[1]->val_int();
-  long year= (long) args[0]->val_int();
-  long days;
+  if (get_arg0_date(ltime, fuzzy_date & ~TIME_TIME_ONLY))
+    return 1;
 
-  if (args[0]->null_value || args[1]->null_value ||
-      year < 0 || year > 9999 || daynr <= 0)
-    goto err;
+  if (decimals < TIME_SECOND_PART_DIGITS)
+    ltime->second_part= sec_part_truncate(ltime->second_part, decimals);
 
-  if (year < 100)
-    year= year_2000_handling(year);
 
-  days= calc_daynr(year,1,1) + daynr - 1;
-  /* Day number from year 0 to 9999-12-31 */
-  if (days >= 0 && days <= MAX_DAY_NUMBER)
+  /*
+    ltime is valid MYSQL_TYPE_TIME (according to fuzzy_date).
+    But not every valid TIME value is a valid DATETIME value!
+  */
+  if (ltime->time_type == MYSQL_TIMESTAMP_TIME)
   {
-    null_value=0;
-    get_date_from_daynr(days,&l_time.year,&l_time.month,&l_time.day);
-    if (str->alloc(MAX_DATE_STRING_REP_LENGTH))
-      goto err;
-    make_date((DATE_TIME_FORMAT *) 0, &l_time, str);
-    return str;
+    if (ltime->neg)
+    {
+      Lazy_string_time str(ltime);
+      make_truncated_value_warning(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN,
+                                   &str, MYSQL_TIMESTAMP_DATETIME, 0);
+      return (null_value= 1);
+    }
+    
+    uint day= ltime->hour/24;
+    ltime->hour %= 24;
+    ltime->month= day / 31;
+    ltime->day= day % 31;
   }
 
-err:
-  null_value=1;
+  ltime->time_type= MYSQL_TIMESTAMP_DATETIME;
   return 0;
 }
 
 
-/*
+/**
   MAKEDATE(a,b) is a date function that creates a date value 
   from a year and day value.
 
   NOTES:
-    As arguments are integers, we can't know if the year is a 2 digit or 4 digit year.
-    In this case we treat all years < 100 as 2 digit years. Ie, this is not safe
-    for dates between 0000-01-01 and 0099-12-31
+    As arguments are integers, we can't know if the year is a 2 digit
+    or 4 digit year.  In this case we treat all years < 100 as 2 digit
+    years. Ie, this is not safe for dates between 0000-01-01 and
+    0099-12-31
 */
 
-longlong Item_func_makedate::val_int()
+bool Item_func_makedate::get_date(MYSQL_TIME *ltime, uint fuzzy_date)
 {
   DBUG_ASSERT(fixed == 1);
-  MYSQL_TIME l_time;
   long daynr=  (long) args[1]->val_int();
   long year= (long) args[0]->val_int();
   long days;
@@ -2762,25 +2389,23 @@ longlong Item_func_makedate::val_int()
 
   days= calc_daynr(year,1,1) + daynr - 1;
   /* Day number from year 0 to 9999-12-31 */
-  if (days >= 0 && days < MAX_DAY_NUMBER)
+  if (days >= 0 && days <= MAX_DAY_NUMBER)
   {
-    null_value=0;
-    get_date_from_daynr(days,&l_time.year,&l_time.month,&l_time.day);
-    return (longlong) (l_time.year * 10000L + l_time.month * 100 + l_time.day);
+    bzero(ltime, sizeof(*ltime));
+    ltime->time_type= MYSQL_TIMESTAMP_DATE;
+    get_date_from_daynr(days, &ltime->year, &ltime->month, &ltime->day);
+    return (null_value= 0);
   }
 
 err:
-  null_value= 1;
-  return 0;
+  return (null_value= 1);
 }
 
 
 void Item_func_add_time::fix_length_and_dec()
 {
   enum_field_types arg0_field_type;
-  decimals=0;
-  max_length=MAX_DATETIME_FULL_WIDTH*MY_CHARSET_BIN_MB_MAXLEN;
-  maybe_null= 1;
+  decimals= max(args[0]->decimals, args[1]->decimals);
 
   /*
     The field type for the result of an Item_func_add_time function is defined
@@ -2800,6 +2425,7 @@ void Item_func_add_time::fix_length_and_dec()
     cached_field_type= MYSQL_TYPE_DATETIME;
   else if (arg0_field_type == MYSQL_TYPE_TIME)
     cached_field_type= MYSQL_TYPE_TIME;
+  Item_temporal_func::fix_length_and_dec();
 }
 
 /**
@@ -2812,38 +2438,37 @@ void Item_func_add_time::fix_length_and_dec()
   Result: Time value or datetime value
 */
 
-String *Item_func_add_time::val_str(String *str)
+bool Item_func_add_time::get_date(MYSQL_TIME *ltime, uint fuzzy_date)
 {
   DBUG_ASSERT(fixed == 1);
-  MYSQL_TIME l_time1, l_time2, l_time3;
+  MYSQL_TIME l_time1, l_time2;
   bool is_time= 0;
   long days, microseconds;
   longlong seconds;
-  int l_sign= sign;
+  int l_sign= sign, was_cut= 0;
 
-  null_value=0;
   if (is_date)                        // TIMESTAMP function
   {
     if (get_arg0_date(&l_time1, TIME_FUZZY_DATE) || 
         args[1]->get_time(&l_time2) ||
         l_time1.time_type == MYSQL_TIMESTAMP_TIME || 
         l_time2.time_type != MYSQL_TIMESTAMP_TIME)
-      goto null_date;
+      return (null_value= 1);
   }
   else                                // ADDTIME function
   {
     if (args[0]->get_time(&l_time1) || 
         args[1]->get_time(&l_time2) ||
         l_time2.time_type == MYSQL_TIMESTAMP_DATETIME)
-      goto null_date;
+      return (null_value= 1);
     is_time= (l_time1.time_type == MYSQL_TIMESTAMP_TIME);
   }
   if (l_time1.neg != l_time2.neg)
     l_sign= -l_sign;
   
-  bzero((char *)&l_time3, sizeof(l_time3));
+  bzero(ltime, sizeof(*ltime));
   
-  l_time3.neg= calc_time_diff(&l_time1, &l_time2, -l_sign,
+  ltime->neg= calc_time_diff(&l_time1, &l_time2, -l_sign,
 			      &seconds, &microseconds);
 
   /*
@@ -2851,35 +2476,36 @@ String *Item_func_add_time::val_str(String *str)
     is non-zero we need to swap sign to get proper result.
   */
   if (l_time1.neg && (seconds || microseconds))
-    l_time3.neg= 1-l_time3.neg;         // Swap sign of result
+    ltime->neg= 1-ltime->neg;         // Swap sign of result
 
-  if (!is_time && l_time3.neg)
-    goto null_date;
+  if (!is_time && ltime->neg)
+    return (null_value= 1);
 
   days= (long)(seconds/86400L);
 
-  calc_time_from_sec(&l_time3, (long)(seconds%86400L), microseconds);
+  calc_time_from_sec(ltime, (long)(seconds%86400L), microseconds);
+
+  ltime->time_type= is_time ? MYSQL_TIMESTAMP_TIME : MYSQL_TIMESTAMP_DATETIME;
 
   if (!is_time)
   {
-    get_date_from_daynr(days,&l_time3.year,&l_time3.month,&l_time3.day);
-    if (l_time3.day &&
-	!make_datetime(l_time1.second_part || l_time2.second_part ?
-		       DATE_TIME_MICROSECOND : DATE_TIME,
-		       &l_time3, str))
-      return str;
-    goto null_date;
+    get_date_from_daynr(days,&ltime->year,&ltime->month,&ltime->day);
+    if (!ltime->day)
+      return (null_value= 1);
+    return (null_value= 0);
   }
   
-  l_time3.hour+= days*24;
-  if (!make_datetime_with_warn(l_time1.second_part || l_time2.second_part ?
-                               TIME_MICROSECOND : TIME_ONLY,
-                               &l_time3, str))
-    return str;
+  ltime->hour+= days*24;
 
-null_date:
-  null_value=1;
-  return 0;
+  MYSQL_TIME copy= *ltime;
+  Lazy_string_time str(&copy);
+
+  check_time_range(ltime, decimals, &was_cut);
+  if (was_cut)
+    make_truncated_value_warning(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN,
+                                 &str, MYSQL_TIMESTAMP_TIME, NullS);
+
+  return (null_value= 0);
 }
 
 
@@ -2912,15 +2538,19 @@ void Item_func_add_time::print(String *str, enum_query_type query_type)
   Result: Time value
 */
 
-String *Item_func_timediff::val_str(String *str)
+bool Item_func_timediff::get_date(MYSQL_TIME *ltime, uint fuzzy_date)
 {
   DBUG_ASSERT(fixed == 1);
   longlong seconds;
   long microseconds;
-  int l_sign= 1;
-  MYSQL_TIME l_time1 ,l_time2, l_time3;
+  int l_sign= 1, was_cut= 0;
+  MYSQL_TIME l_time1,l_time2,l_time3;
+  Lazy_string_time str(&l_time3);
+
+  /* the following may be true in, for example, date_add(timediff(...), ... */
+  if (fuzzy_date & TIME_NO_ZERO_IN_DATE)
+    goto null_date;
 
-  null_value= 0;  
   if (args[0]->get_time(&l_time1) ||
       args[1]->get_time(&l_time2) ||
       l_time1.time_type != l_time2.time_type)
@@ -2942,16 +2572,30 @@ String *Item_func_timediff::val_str(String *str)
   if (l_time1.neg && (seconds || microseconds))
     l_time3.neg= 1-l_time3.neg;         // Swap sign of result
 
+  /*
+    seconds is longlong, when casted to long it may become a small number
+    even if the original seconds value was too large and invalid.
+    as a workaround we limit seconds by a large invalid long number
+    ("invalid" means > TIME_MAX_SECOND)
+  */
+  set_if_smaller(seconds, INT_MAX32);
+
   calc_time_from_sec(&l_time3, (long) seconds, microseconds);
 
-  if (!make_datetime_with_warn(l_time1.second_part || l_time2.second_part ?
-                               TIME_MICROSECOND : TIME_ONLY,
-                               &l_time3, str))
-    return str;
+  if ((fuzzy_date & TIME_NO_ZERO_DATE) && (seconds == 0) &&
+      (microseconds == 0))
+    goto null_date;
+
+  *ltime= l_time3;
+  check_time_range(ltime, decimals, &was_cut);
+
+  if (was_cut)
+    make_truncated_value_warning(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN,
+                                 &str, MYSQL_TIMESTAMP_TIME, NullS);
+  return (null_value= 0);
 
 null_date:
-  null_value=1;
-  return 0;
+  return (null_value= 1);
 }
 
 /**
@@ -2960,10 +2604,9 @@ null_date:
   Result: Time value
 */
 
-String *Item_func_maketime::val_str(String *str)
+bool Item_func_maketime::get_date(MYSQL_TIME *ltime, uint fuzzy_date)
 {
   DBUG_ASSERT(fixed == 1);
-  MYSQL_TIME ltime;
   bool overflow= 0;
 
   longlong hour=   args[0]->val_int();
@@ -2974,12 +2617,11 @@ String *Item_func_maketime::val_str(String *str)
                    args[1]->null_value ||
                    args[2]->null_value ||
                    minute < 0 || minute > 59 ||
-                   second < 0 || second > 59 ||
-                   str->alloc(MAX_DATE_STRING_REP_LENGTH))))
-    return 0;
+                   second < 0 || second > 59)))
+    return 1;
 
-  bzero((char *)&ltime, sizeof(ltime));
-  ltime.neg= 0;
+  bzero(ltime, sizeof(*ltime));
+  ltime->time_type= MYSQL_TIMESTAMP_TIME;
 
   /* Check for integer overflows */
   if (hour < 0)
@@ -2987,22 +2629,22 @@ String *Item_func_maketime::val_str(String *str)
     if (args[0]->unsigned_flag)
       overflow= 1;
     else
-      ltime.neg= 1;
+      ltime->neg= 1;
   }
-  if (-hour > UINT_MAX || hour > UINT_MAX)
+  if (-hour > TIME_MAX_HOUR || hour > TIME_MAX_HOUR)
     overflow= 1;
 
   if (!overflow)
   {
-    ltime.hour=   (uint) ((hour < 0 ? -hour : hour));
-    ltime.minute= (uint) minute;
-    ltime.second= (uint) second;
+    ltime->hour=   (uint) ((hour < 0 ? -hour : hour));
+    ltime->minute= (uint) minute;
+    ltime->second= (uint) second;
   }
   else
   {
-    ltime.hour= TIME_MAX_HOUR;
-    ltime.minute= TIME_MAX_MINUTE;
-    ltime.second= TIME_MAX_SECOND;
+    ltime->hour= TIME_MAX_HOUR;
+    ltime->minute= TIME_MAX_MINUTE;
+    ltime->second= TIME_MAX_SECOND;
     char buf[28];
     char *ptr= longlong10_to_str(hour, buf, args[0]->unsigned_flag ? 10 : -10);
     int len = (int)(ptr - buf) +
@@ -3012,12 +2654,7 @@ String *Item_func_maketime::val_str(String *str)
                                  NullS);
   }
 
-  if (make_time_with_warn((DATE_TIME_FORMAT *) 0, &ltime, str))
-  {
-    null_value= 1;
-    return 0;
-  }
-  return str;
+  return 0;
 }
 
 
@@ -3033,7 +2670,7 @@ longlong Item_func_microsecond::val_int()
 {
   DBUG_ASSERT(fixed == 1);
   MYSQL_TIME ltime;
-  if (!get_arg0_time(&ltime))
+  if (!get_arg0_date(&ltime, TIME_TIME_ONLY))
     return ltime.second_part;
   return 0;
 }
@@ -3048,8 +2685,8 @@ longlong Item_func_timestamp_diff::val_int()
   int neg= 1;
 
   null_value= 0;  
-  if (args[0]->get_date(&ltime1, TIME_NO_ZERO_DATE) ||
-      args[1]->get_date(&ltime2, TIME_NO_ZERO_DATE))
+  if (args[0]->get_date(&ltime1, TIME_NO_ZERO_DATE | TIME_NO_ZERO_IN_DATE) ||
+      args[1]->get_date(&ltime2, TIME_NO_ZERO_DATE | TIME_NO_ZERO_IN_DATE))
     goto null_date;
 
   if (calc_time_diff(&ltime2,&ltime1, 1,
@@ -3299,9 +2936,9 @@ get_date_time_result_type(const char *format, uint length)
           have all types of date-time components and can end our search.
         */
 	return DATE_TIME_MICROSECOND;
+      }
     }
   }
-  }
 
   /* We don't have all three types of date-time components */
   if (frac_second_used)
@@ -3318,40 +2955,39 @@ get_date_time_result_type(const char *format, uint length)
 
 void Item_func_str_to_date::fix_length_and_dec()
 {
-  maybe_null= 1;
-  decimals=0;
-  cached_format_type= DATE_TIME;
   cached_field_type= MYSQL_TYPE_DATETIME;
-  max_length= MAX_DATETIME_FULL_WIDTH*MY_CHARSET_BIN_MB_MAXLEN;
-  cached_timestamp_type= MYSQL_TIMESTAMP_NONE;
+  decimals= NOT_FIXED_DEC;
   if ((const_item= args[1]->const_item()))
   {
     char format_buff[64];
     String format_str(format_buff, sizeof(format_buff), &my_charset_bin);
     String *format= args[1]->val_str(&format_str);
+    decimals= 0;
     if (!args[1]->null_value)
     {
-      cached_format_type= get_date_time_result_type(format->ptr(),
-                                                    format->length());
+      date_time_format_types cached_format_type=
+        get_date_time_result_type(format->ptr(), format->length());
       switch (cached_format_type) {
       case DATE_ONLY:
-        cached_timestamp_type= MYSQL_TIMESTAMP_DATE;
         cached_field_type= MYSQL_TYPE_DATE; 
-        max_length= MAX_DATE_WIDTH * MY_CHARSET_BIN_MB_MAXLEN;
         break;
-      case TIME_ONLY:
       case TIME_MICROSECOND:
-        cached_timestamp_type= MYSQL_TIMESTAMP_TIME;
+        decimals= 6;
+        /* fall through */
+      case TIME_ONLY:
         cached_field_type= MYSQL_TYPE_TIME; 
-        max_length= MAX_TIME_WIDTH * MY_CHARSET_BIN_MB_MAXLEN;
         break;
-      default:
-        cached_timestamp_type= MYSQL_TIMESTAMP_DATETIME;
+      case DATE_TIME_MICROSECOND:
+        decimals= 6;
+        /* fall through */
+      case DATE_TIME:
         cached_field_type= MYSQL_TYPE_DATETIME; 
         break;
       }
     }
   }
+  cached_timestamp_type= mysql_type_to_time_type(cached_field_type);
+  Item_temporal_func::fix_length_and_dec();
 }
 
 
@@ -3360,21 +2996,19 @@ bool Item_func_str_to_date::get_date(MYSQL_TIME *ltime, uint fuzzy_date)
   DATE_TIME_FORMAT date_time_format;
   char val_buff[64], format_buff[64];
   String val_string(val_buff, sizeof(val_buff), &my_charset_bin), *val;
-  String format_str(format_buff, sizeof(format_buff), &my_charset_bin), *format;
+  String format_str(format_buff, sizeof(format_buff), &my_charset_bin),
+    *format;
 
   val=    args[0]->val_str(&val_string);
   format= args[1]->val_str(&format_str);
   if (args[0]->null_value || args[1]->null_value)
     goto null_date;
 
-  null_value= 0;
-  bzero((char*) ltime, sizeof(*ltime));
   date_time_format.format.str=    (char*) format->ptr();
   date_time_format.format.length= format->length();
   if (extract_date_time(&date_time_format, val->ptr(), val->length(),
-			ltime, cached_timestamp_type, 0, "datetime") ||
-      ((fuzzy_date & TIME_NO_ZERO_DATE) &&
-       (ltime->year == 0 || ltime->month == 0 || ltime->day == 0)))
+			ltime, cached_timestamp_type, 0, "datetime",
+                        fuzzy_date))
     goto null_date;
   if (cached_timestamp_type == MYSQL_TIMESTAMP_TIME && ltime->day)
   {
@@ -3386,6 +3020,7 @@ bool Item_func_str_to_date::get_date(MYSQL_TIME *ltime, uint fuzzy_date)
     ltime->hour+= ltime->day*24;
     ltime->day= 0;
   }
+  null_value= 0;
   return 0;
 
 null_date:
@@ -3393,22 +3028,6 @@ null_date:
 }
 
 
-String *Item_func_str_to_date::val_str(String *str)
-{
-  DBUG_ASSERT(fixed == 1);
-  MYSQL_TIME ltime;
-
-  if (Item_func_str_to_date::get_date(&ltime, TIME_FUZZY_DATE))
-    return 0;
-
-  if (!make_datetime((const_item ? cached_format_type :
-		     (ltime.second_part ? DATE_TIME_MICROSECOND : DATE_TIME)),
-		     &ltime, str))
-    return str;
-  return 0;
-}
-
-
 bool Item_func_last_day::get_date(MYSQL_TIME *ltime, uint fuzzy_date)
 {
   if (get_arg0_date(ltime, fuzzy_date & ~TIME_FUZZY_DATE) ||
diff --git a/sql/item_timefunc.h b/sql/item_timefunc.h
index 963e28b80e8..8d19e59ddfb 100644
--- a/sql/item_timefunc.h
+++ b/sql/item_timefunc.h
@@ -359,13 +359,30 @@ class Item_func_dayname :public Item_func_weekday
 };
 
 
-class Item_func_unix_timestamp :public Item_int_func
+class Item_func_seconds_hybrid: public Item_func_numhybrid
 {
-  String value;
 public:
-  Item_func_unix_timestamp() :Item_int_func() {}
-  Item_func_unix_timestamp(Item *a) :Item_int_func(a) {}
-  longlong val_int();
+  Item_func_seconds_hybrid() :Item_func_numhybrid() {}
+  Item_func_seconds_hybrid(Item *a) :Item_func_numhybrid(a) {}
+  void fix_num_length_and_dec()
+  {
+    if (arg_count)
+      decimals= args[0]->decimals;
+    set_if_smaller(decimals, TIME_SECOND_PART_DIGITS);
+    max_length=17 + (decimals ? decimals + 1 : 0);
+  }
+  void find_num_type() { hybrid_type= decimals ? DECIMAL_RESULT : INT_RESULT; }
+  double real_op() { DBUG_ASSERT(0); return 0; }
+  String *str_op(String *str) { DBUG_ASSERT(0); return 0; }
+};
+
+
+class Item_func_unix_timestamp :public Item_func_seconds_hybrid
+{
+  bool get_timestamp_value(my_time_t *seconds, ulong *second_part);
+public:
+  Item_func_unix_timestamp() :Item_func_seconds_hybrid() {}
+  Item_func_unix_timestamp(Item *a) :Item_func_seconds_hybrid(a) {}
   const char *func_name() const { return "unix_timestamp"; }
   bool check_partition_func_processor(uchar *int_arg) {return FALSE;}
   /*
@@ -377,11 +394,6 @@ public:
   {
     return !has_timestamp_args();
   }
-  void fix_length_and_dec()
-  {
-    decimals=0;
-    max_length=10*MY_CHARSET_BIN_MB_MAXLEN;
-  }
   bool check_vcol_func_processor(uchar *int_arg) 
   {
     /*
@@ -390,20 +402,21 @@ public:
     */
     return trace_unsupported_by_check_vcol_func_processor(func_name());
   }
+  longlong int_op();
+  my_decimal *decimal_op(my_decimal* buf);
 };
 
 
-class Item_func_time_to_sec :public Item_int_func
+  
+class Item_func_time_to_sec :public Item_func_seconds_hybrid
 {
 public:
-  Item_func_time_to_sec(Item *item) :Item_int_func(item) {}
-  longlong val_int();
+  Item_func_time_to_sec(Item *item) :Item_func_seconds_hybrid(item) {}
   const char *func_name() const { return "time_to_sec"; }
-  void fix_length_and_dec()
+  void fix_num_length_and_dec()
   {
-    maybe_null= TRUE;
-    decimals=0;
-    max_length=10*MY_CHARSET_BIN_MB_MAXLEN;
+    maybe_null= true;
+    Item_func_seconds_hybrid::fix_num_length_and_dec();
   }
   bool check_partition_func_processor(uchar *int_arg) {return FALSE;}
   bool check_vcol_func_processor(uchar *int_arg) { return FALSE;}
@@ -411,126 +424,98 @@ public:
   {
     return !has_time_args();
   }
+  longlong int_op();
+  my_decimal *decimal_op(my_decimal* buf);
 };
 
 
-/*
-  This can't be a Item_str_func, because the val_real() functions are special
-*/
-
-class Item_date :public Item_func
+class Item_temporal_func: public Item_func
 {
 public:
-  Item_date() :Item_func() {}
-  Item_date(Item *a) :Item_func(a) {}
+  Item_temporal_func() :Item_func() {}
+  Item_temporal_func(Item *a) :Item_func(a) {}
+  Item_temporal_func(Item *a, Item *b) :Item_func(a,b) {}
+  Item_temporal_func(Item *a, Item *b, Item *c) :Item_func(a,b,c) {}
   enum Item_result result_type () const { return STRING_RESULT; }
-  enum_field_types field_type() const { return MYSQL_TYPE_DATE; }
+  enum_field_types field_type() const { return MYSQL_TYPE_DATETIME; }
   String *val_str(String *str);
   longlong val_int();
-  double val_real() { return val_real_from_decimal(); }
-  const char *func_name() const { return "date"; }
-  void fix_length_and_dec()
-  { 
-    collation.set(&my_charset_bin);
-    decimals=0;
-    max_length=MAX_DATE_WIDTH*MY_CHARSET_BIN_MB_MAXLEN;
-  }
-  Field *tmp_table_field(TABLE *table)
-  {
-    return tmp_table_field_from_field_type(table, 0);
-  }
-  bool result_as_longlong() { return TRUE; }
+  double val_real();
+  bool get_date(MYSQL_TIME *res, uint fuzzy_date) { DBUG_ASSERT(0); return 1; }
   my_decimal *val_decimal(my_decimal *decimal_value)
-  {
-    DBUG_ASSERT(fixed == 1);
-    return  val_decimal_from_date(decimal_value);
-  }
+  { return  val_decimal_from_date(decimal_value); }
+  Field *tmp_table_field(TABLE *table)
+  { return tmp_table_field_from_field_type(table, 0); }
   int save_in_field(Field *field, bool no_conversions)
-  {
-    return save_date_in_field(field);
+  { return save_date_in_field(field); }
+  void fix_length_and_dec()
+  { 
+    static const uint max_time_type_width[5]=
+    { MAX_DATETIME_WIDTH, MAX_DATETIME_WIDTH, MAX_DATE_WIDTH,
+      MAX_DATETIME_WIDTH, MIN_TIME_WIDTH };
+
+    maybe_null= true;
+    max_length= max_time_type_width[mysql_type_to_time_type(field_type())+2];
+    if (decimals)
+    {
+      if (decimals == NOT_FIXED_DEC)
+        max_length+= TIME_SECOND_PART_DIGITS + 1;
+      else
+      {
+        set_if_smaller(decimals, TIME_SECOND_PART_DIGITS);
+        max_length+= decimals + 1;
+      }
+    }
+    /*
+      We set maybe_null to 1 as default as any bad argument with date or
+      time can get us to return NULL.
+    */ 
+    maybe_null= 1;
   }
 };
 
 
-class Item_date_func :public Item_str_func
+class Item_datefunc :public Item_temporal_func
 {
 public:
-  Item_date_func() :Item_str_func() {}
-  Item_date_func(Item *a) :Item_str_func(a) {}
-  Item_date_func(Item *a,Item *b) :Item_str_func(a,b) {}
-  Item_date_func(Item *a,Item *b, Item *c) :Item_str_func(a,b,c) {}
-  enum_field_types field_type() const { return MYSQL_TYPE_DATETIME; }
-  Field *tmp_table_field(TABLE *table)
-  {
-    return tmp_table_field_from_field_type(table, 0);
-  }
-  bool result_as_longlong() { return TRUE; }
-  double val_real() { return (double) val_int(); }
-  my_decimal *val_decimal(my_decimal *decimal_value)
-  {
-    DBUG_ASSERT(fixed == 1);
-    return  val_decimal_from_date(decimal_value);
-  }
-  int save_in_field(Field *field, bool no_conversions)
-  {
-    return save_date_in_field(field);
-  }
+  Item_datefunc() :Item_temporal_func() { }
+  Item_datefunc(Item *a) :Item_temporal_func(a) { }
+  enum_field_types field_type() const { return MYSQL_TYPE_DATE; }
 };
 
 
-class Item_str_timefunc :public Item_str_func
+class Item_timefunc :public Item_temporal_func
 {
 public:
-  Item_str_timefunc() :Item_str_func() {}
-  Item_str_timefunc(Item *a) :Item_str_func(a) {}
-  Item_str_timefunc(Item *a,Item *b) :Item_str_func(a,b) {}
-  Item_str_timefunc(Item *a, Item *b, Item *c) :Item_str_func(a, b ,c) {}
+  Item_timefunc() :Item_temporal_func() {}
+  Item_timefunc(Item *a) :Item_temporal_func(a) {}
+  Item_timefunc(Item *a,Item *b) :Item_temporal_func(a,b) {}
+  Item_timefunc(Item *a, Item *b, Item *c) :Item_temporal_func(a, b ,c) {}
   enum_field_types field_type() const { return MYSQL_TYPE_TIME; }
-  void fix_length_and_dec()
-  {
-    decimals= DATETIME_DEC;
-    max_length=MAX_TIME_WIDTH*MY_CHARSET_BIN_MB_MAXLEN;
-  }
-  Field *tmp_table_field(TABLE *table)
-  {
-    return tmp_table_field_from_field_type(table, 0);
-  }
-  double val_real() { return val_real_from_decimal(); }
-  my_decimal *val_decimal(my_decimal *decimal_value)
-  {
-    DBUG_ASSERT(fixed == 1);
-    return  val_decimal_from_time(decimal_value);
-  }
-  int save_in_field(Field *field, bool no_conversions)
-  {
-    return save_time_in_field(field);
-  }
-  longlong val_int() { return val_int_from_decimal(); }
-  bool result_as_longlong() { return TRUE; }
 };
 
 
 /* Abstract CURTIME function. Children should define what time zone is used */
 
-class Item_func_curtime :public Item_str_timefunc
+class Item_func_curtime :public Item_timefunc
 {
-  longlong value;
-  char buff[9*2+32];
-  uint buff_length;
+  MYSQL_TIME ltime;
 public:
-  Item_func_curtime() :Item_str_timefunc() {}
-  Item_func_curtime(Item *a) :Item_str_timefunc(a) {}
-  double val_real() { DBUG_ASSERT(fixed == 1); return (double) value; }
-  longlong val_int() { DBUG_ASSERT(fixed == 1); return value; }
-  String *val_str(String *str);
-  void fix_length_and_dec();
+  Item_func_curtime(uint dec) :Item_timefunc() { decimals= dec; }
+  bool fix_fields(THD *, Item **);
+  void fix_length_and_dec()
+  {
+    store_now_in_TIME(&ltime);
+    Item_timefunc::fix_length_and_dec();
+    maybe_null= false;
+  }
+  bool get_date(MYSQL_TIME *res, uint fuzzy_date);
   /* 
     Abstract method that defines which time zone is used for conversion.
     Converts time current time in my_time_t representation to broken-down
     MYSQL_TIME representation using UTC-SYSTEM or per-thread time zone.
   */
   virtual void store_now_in_TIME(MYSQL_TIME *now_time)=0;
-  bool result_as_longlong() { return TRUE; }
   bool check_vcol_func_processor(uchar *int_arg) 
   {
     return trace_unsupported_by_check_vcol_func_processor(func_name());
@@ -541,8 +526,7 @@ public:
 class Item_func_curtime_local :public Item_func_curtime
 {
 public:
-  Item_func_curtime_local() :Item_func_curtime() {}
-  Item_func_curtime_local(Item *a) :Item_func_curtime(a) {}
+  Item_func_curtime_local(uint dec) :Item_func_curtime(dec) {}
   const char *func_name() const { return "curtime"; }
   virtual void store_now_in_TIME(MYSQL_TIME *now_time);
 };
@@ -551,8 +535,7 @@ public:
 class Item_func_curtime_utc :public Item_func_curtime
 {
 public:
-  Item_func_curtime_utc() :Item_func_curtime() {}
-  Item_func_curtime_utc(Item *a) :Item_func_curtime(a) {}
+  Item_func_curtime_utc(uint dec) :Item_func_curtime(dec) {}
   const char *func_name() const { return "utc_time"; }
   virtual void store_now_in_TIME(MYSQL_TIME *now_time);
 };
@@ -560,14 +543,11 @@ public:
 
 /* Abstract CURDATE function. See also Item_func_curtime. */
 
-class Item_func_curdate :public Item_date
+class Item_func_curdate :public Item_datefunc
 {
-  longlong value;
   MYSQL_TIME ltime;
 public:
-  Item_func_curdate() :Item_date() {}
-  longlong val_int() { DBUG_ASSERT(fixed == 1); return (value) ; }
-  String *val_str(String *str);
+  Item_func_curdate() :Item_datefunc() {}
   void fix_length_and_dec();
   bool get_date(MYSQL_TIME *res, uint fuzzy_date);
   virtual void store_now_in_TIME(MYSQL_TIME *now_time)=0;
@@ -598,21 +578,19 @@ public:
 
 /* Abstract CURRENT_TIMESTAMP function. See also Item_func_curtime */
 
-class Item_func_now :public Item_date_func
+
+class Item_func_now :public Item_temporal_func
 {
-protected:
-  longlong value;
-  char buff[20*2+32];	// +32 to make my_snprintf_{8bit|ucs2} happy
-  uint buff_length;
   MYSQL_TIME ltime;
 public:
-  Item_func_now() :Item_date_func() {}
-  Item_func_now(Item *a) :Item_date_func(a) {}
-  enum Item_result result_type () const { return STRING_RESULT; }
-  longlong val_int() { DBUG_ASSERT(fixed == 1); return value; }
-  int save_in_field(Field *to, bool no_conversions);
-  String *val_str(String *str);
-  void fix_length_and_dec();
+  Item_func_now(uint dec) :Item_temporal_func() { decimals= dec; }
+  bool fix_fields(THD *, Item **);
+  void fix_length_and_dec()
+  {
+    store_now_in_TIME(&ltime);
+    Item_temporal_func::fix_length_and_dec();
+    maybe_null= false;
+  }
   bool get_date(MYSQL_TIME *res, uint fuzzy_date);
   virtual void store_now_in_TIME(MYSQL_TIME *now_time)=0;
   bool check_vcol_func_processor(uchar *int_arg) 
@@ -625,8 +603,7 @@ public:
 class Item_func_now_local :public Item_func_now
 {
 public:
-  Item_func_now_local() :Item_func_now() {}
-  Item_func_now_local(Item *a) :Item_func_now(a) {}
+  Item_func_now_local(uint dec) :Item_func_now(dec) {}
   const char *func_name() const { return "now"; }
   virtual void store_now_in_TIME(MYSQL_TIME *now_time);
   virtual enum Functype functype() const { return NOW_FUNC; }
@@ -636,8 +613,7 @@ public:
 class Item_func_now_utc :public Item_func_now
 {
 public:
-  Item_func_now_utc() :Item_func_now() {}
-  Item_func_now_utc(Item *a) :Item_func_now(a) {}
+  Item_func_now_utc(uint dec) :Item_func_now(dec) {}
   const char *func_name() const { return "utc_timestamp"; }
   virtual void store_now_in_TIME(MYSQL_TIME *now_time);
 };
@@ -650,16 +626,10 @@ public:
 class Item_func_sysdate_local :public Item_func_now
 {
 public:
-  Item_func_sysdate_local() :Item_func_now() {}
-  Item_func_sysdate_local(Item *a) :Item_func_now(a) {}
+  Item_func_sysdate_local(uint dec) :Item_func_now(dec) {}
   bool const_item() const { return 0; }
   const char *func_name() const { return "sysdate"; }
   void store_now_in_TIME(MYSQL_TIME *now_time);
-  double val_real();
-  longlong val_int();
-  int save_in_field(Field *to, bool no_conversions);
-  String *val_str(String *str);
-  void fix_length_and_dec();
   bool get_date(MYSQL_TIME *res, uint fuzzy_date);
   void update_used_tables()
   {
@@ -669,10 +639,10 @@ public:
 };
 
 
-class Item_func_from_days :public Item_date
+class Item_func_from_days :public Item_datefunc
 {
 public:
-  Item_func_from_days(Item *a) :Item_date(a) {}
+  Item_func_from_days(Item *a) :Item_datefunc(a) {}
   const char *func_name() const { return "from_days"; }
   bool get_date(MYSQL_TIME *res, uint fuzzy_date);
   bool check_partition_func_processor(uchar *int_arg) {return FALSE;}
@@ -701,13 +671,11 @@ public:
 };
 
 
-class Item_func_from_unixtime :public Item_date_func
+class Item_func_from_unixtime :public Item_temporal_func
 {
   THD *thd;
  public:
-  Item_func_from_unixtime(Item *a) :Item_date_func(a) {}
-  longlong val_int();
-  String *val_str(String *str);
+  Item_func_from_unixtime(Item *a) :Item_temporal_func(a) {}
   const char *func_name() const { return "from_unixtime"; }
   void fix_length_and_dec();
   bool get_date(MYSQL_TIME *res, uint fuzzy_date);
@@ -728,7 +696,7 @@ class Time_zone;
   tables can be used during this function calculation for loading time zone
   descriptions.
 */
-class Item_func_convert_tz :public Item_date_func
+class Item_func_convert_tz :public Item_temporal_func
 {
   /*
     If time zone parameters are constants we are caching objects that
@@ -740,9 +708,7 @@ class Item_func_convert_tz :public Item_date_func
   Time_zone *from_tz, *to_tz;
  public:
   Item_func_convert_tz(Item *a, Item *b, Item *c):
-    Item_date_func(a, b, c), from_tz_cached(0), to_tz_cached(0) {}
-  longlong val_int();
-  String *val_str(String *str);
+    Item_temporal_func(a, b, c), from_tz_cached(0), to_tz_cached(0) {}
   const char *func_name() const { return "convert_tz"; }
   void fix_length_and_dec();
   bool get_date(MYSQL_TIME *res, uint fuzzy_date);
@@ -750,29 +716,21 @@ class Item_func_convert_tz :public Item_date_func
 };
 
 
-class Item_func_sec_to_time :public Item_str_timefunc
+class Item_func_sec_to_time :public Item_timefunc
 {
 public:
-  Item_func_sec_to_time(Item *item) :Item_str_timefunc(item) {}
-  double val_real()
-  {
-    DBUG_ASSERT(fixed == 1);
-    return (double) Item_func_sec_to_time::val_int();
-  }
-  longlong val_int();
-  String *val_str(String *);
+  Item_func_sec_to_time(Item *item) :Item_timefunc(item) {}
+  bool get_date(MYSQL_TIME *res, uint fuzzy_date);
   void fix_length_and_dec()
-  { 
-    Item_str_timefunc::fix_length_and_dec();
-    collation.set(&my_charset_bin);
-    maybe_null=1;
+  {
+    decimals= args[0]->decimals;
+    Item_timefunc::fix_length_and_dec();
   }
   const char *func_name() const { return "sec_to_time"; }
-  bool result_as_longlong() { return TRUE; }
 };
 
 
-class Item_date_add_interval :public Item_date_func
+class Item_date_add_interval :public Item_temporal_func
 {
   String value;
   enum_field_types cached_field_type;
@@ -781,12 +739,10 @@ public:
   const interval_type int_type; // keep it public
   const bool date_sub_interval; // keep it public
   Item_date_add_interval(Item *a,Item *b,interval_type type_arg,bool neg_arg)
-    :Item_date_func(a,b),int_type(type_arg), date_sub_interval(neg_arg) {}
-  String *val_str(String *);
+    :Item_temporal_func(a,b),int_type(type_arg), date_sub_interval(neg_arg) {}
   const char *func_name() const { return "date_add_interval"; }
   void fix_length_and_dec();
   enum_field_types field_type() const { return cached_field_type; }
-  longlong val_int();
   bool get_date(MYSQL_TIME *res, uint fuzzy_date);
   bool eq(const Item *item, bool binary_cmp) const;
   virtual void print(String *str, enum_query_type query_type);
@@ -847,177 +803,84 @@ class Item_extract :public Item_int_func
 };
 
 
-class Item_typecast :public Item_str_func
-{
-public:
-  Item_typecast(Item *a) :Item_str_func(a) {}
-  String *val_str(String *a)
-  {
-    DBUG_ASSERT(fixed == 1);
-    String *tmp=args[0]->val_str(a);
-    null_value=args[0]->null_value;
-    if (tmp)
-      tmp->set_charset(collation.collation);
-    return tmp;
-  }
-  void fix_length_and_dec()
-  {
-    collation.set(&my_charset_bin);
-    max_length=args[0]->max_length;
-  }
-  virtual const char* cast_type() const= 0;
-  virtual void print(String *str, enum_query_type query_type);
-};
-
-
-class Item_typecast_maybe_null :public Item_typecast
+class Item_char_typecast :public Item_str_func
 {
-public:
-  Item_typecast_maybe_null(Item *a) :Item_typecast(a) {}
-  void fix_length_and_dec()
-  {
-    collation.set(&my_charset_bin);
-    max_length=args[0]->max_length;
-    maybe_null= 1;
-  }
-};
-
-
-class Item_char_typecast :public Item_typecast
-{
-  int cast_length;
+  uint cast_length;
   CHARSET_INFO *cast_cs, *from_cs;
   bool charset_conversion;
   String tmp_value;
 public:
-  Item_char_typecast(Item *a, int length_arg, CHARSET_INFO *cs_arg)
-    :Item_typecast(a), cast_length(length_arg), cast_cs(cs_arg) {}
+  Item_char_typecast(Item *a, uint length_arg, CHARSET_INFO *cs_arg)
+    :Item_str_func(a), cast_length(length_arg), cast_cs(cs_arg) {}
   enum Functype functype() const { return CHAR_TYPECAST_FUNC; }
   bool eq(const Item *item, bool binary_cmp) const;
   const char *func_name() const { return "cast_as_char"; }
-  const char* cast_type() const { return "char"; };
   String *val_str(String *a);
   void fix_length_and_dec();
-  virtual void print(String *str, enum_query_type query_type);
+  void print(String *str, enum_query_type query_type);
 };
 
 
-class Item_date_typecast :public Item_typecast_maybe_null
+class Item_temporal_typecast: public Item_temporal_func
 {
 public:
-  Item_date_typecast(Item *a) :Item_typecast_maybe_null(a) {}
+  Item_temporal_typecast(Item *a) :Item_temporal_func(a) {}
+  virtual const char *cast_type() const = 0;
+  void print(String *str, enum_query_type query_type);
+  void fix_length_and_dec()
+  {
+    if (decimals == NOT_FIXED_DEC)
+      decimals= args[0]->decimals;
+    Item_temporal_func::fix_length_and_dec();
+  }
+};
+
+class Item_date_typecast :public Item_temporal_typecast
+{
+public:
+  Item_date_typecast(Item *a) :Item_temporal_typecast(a) {}
   const char *func_name() const { return "cast_as_date"; }
-  String *val_str(String *str);
   bool get_date(MYSQL_TIME *ltime, uint fuzzy_date);
-  bool get_time(MYSQL_TIME *ltime);
   const char *cast_type() const { return "date"; }
   enum_field_types field_type() const { return MYSQL_TYPE_DATE; }
-  Field *tmp_table_field(TABLE *table)
-  {
-    return tmp_table_field_from_field_type(table, 0);
-  }  
-  void fix_length_and_dec()
-  {
-    collation.set(&my_charset_bin);
-    max_length= 10;
-    maybe_null= 1;
-  }
-  bool result_as_longlong() { return TRUE; }
-  longlong val_int();
-  double val_real() { return (double) val_int(); }
-  my_decimal *val_decimal(my_decimal *decimal_value)
-  {
-    DBUG_ASSERT(fixed == 1);
-    return  val_decimal_from_date(decimal_value);
-  }
-  int save_in_field(Field *field, bool no_conversions)
-  {
-    return save_date_in_field(field);
-  }
 };
 
 
-class Item_time_typecast :public Item_typecast_maybe_null
+class Item_time_typecast :public Item_temporal_typecast
 {
 public:
-  Item_time_typecast(Item *a) :Item_typecast_maybe_null(a) {}
+  Item_time_typecast(Item *a, uint dec_arg)
+    :Item_temporal_typecast(a) { decimals= dec_arg; }
   const char *func_name() const { return "cast_as_time"; }
-  String *val_str(String *str);
-  bool get_time(MYSQL_TIME *ltime);
+  bool get_date(MYSQL_TIME *ltime, uint fuzzy_date);
   const char *cast_type() const { return "time"; }
   enum_field_types field_type() const { return MYSQL_TYPE_TIME; }
-  Field *tmp_table_field(TABLE *table)
-  {
-    return tmp_table_field_from_field_type(table, 0);
-  }
-  bool result_as_longlong() { return TRUE; }
-  longlong val_int();
-  double val_real() { return val_real_from_decimal(); }
-  my_decimal *val_decimal(my_decimal *decimal_value)
-  {
-    DBUG_ASSERT(fixed == 1);
-    return  val_decimal_from_time(decimal_value);
-  }
-  int save_in_field(Field *field, bool no_conversions)
-  {
-    return save_time_in_field(field);
-  }
 };
 
 
-class Item_datetime_typecast :public Item_typecast_maybe_null
+class Item_datetime_typecast :public Item_temporal_typecast
 {
 public:
-  Item_datetime_typecast(Item *a) :Item_typecast_maybe_null(a) {}
+  Item_datetime_typecast(Item *a, uint dec_arg)
+    :Item_temporal_typecast(a) { decimals= dec_arg; }
   const char *func_name() const { return "cast_as_datetime"; }
-  String *val_str(String *str);
   const char *cast_type() const { return "datetime"; }
   enum_field_types field_type() const { return MYSQL_TYPE_DATETIME; }
-  Field *tmp_table_field(TABLE *table)
-  {
-    return tmp_table_field_from_field_type(table, 0);
-  }
-  void fix_length_and_dec()
-  {
-    collation.set(&my_charset_bin);
-    maybe_null= 1;
-    max_length= MAX_DATETIME_FULL_WIDTH * MY_CHARSET_BIN_MB_MAXLEN;
-    decimals= DATETIME_DEC;
-  }
-  bool result_as_longlong() { return TRUE; }
-  longlong val_int();
-  double val_real() { return val_real_from_decimal(); }
-  double val() { return (double) val_int(); }
-  my_decimal *val_decimal(my_decimal *decimal_value)
-  {
-    DBUG_ASSERT(fixed == 1);
-    return  val_decimal_from_date(decimal_value);
-  }
-  int save_in_field(Field *field, bool no_conversions)
-  {
-    return save_date_in_field(field);
-  }
+  bool get_date(MYSQL_TIME *ltime, uint fuzzy_date);
 };
 
-class Item_func_makedate :public Item_date_func
+
+class Item_func_makedate :public Item_temporal_func
 {
 public:
-  Item_func_makedate(Item *a,Item *b) :Item_date_func(a,b) {}
-  String *val_str(String *str);
+  Item_func_makedate(Item *a,Item *b) :Item_temporal_func(a,b) {}
   const char *func_name() const { return "makedate"; }
   enum_field_types field_type() const { return MYSQL_TYPE_DATE; }
-  void fix_length_and_dec()
-  { 
-    decimals=0;
-    max_length=MAX_DATE_WIDTH*MY_CHARSET_BIN_MB_MAXLEN;
-    /* It returns NULL when the second argument is less or equal to 0 */
-    maybe_null= 1;
-  }
-  longlong val_int();
+  bool get_date(MYSQL_TIME *ltime, uint fuzzy_date);
 };
 
 
-class Item_func_add_time :public Item_str_func
+class Item_func_add_time :public Item_temporal_func
 {
   const bool is_date;
   int sign;
@@ -1025,63 +888,39 @@ class Item_func_add_time :public Item_str_func
 
 public:
   Item_func_add_time(Item *a, Item *b, bool type_arg, bool neg_arg)
-    :Item_str_func(a, b), is_date(type_arg) { sign= neg_arg ? -1 : 1; }
-  String *val_str(String *str);
+    :Item_temporal_func(a, b), is_date(type_arg) { sign= neg_arg ? -1 : 1; }
   enum_field_types field_type() const { return cached_field_type; }
   void fix_length_and_dec();
-
-  Field *tmp_table_field(TABLE *table)
-  {
-    return tmp_table_field_from_field_type(table, 0);
-  }
+  bool get_date(MYSQL_TIME *ltime, uint fuzzy_date);
   virtual void print(String *str, enum_query_type query_type);
   const char *func_name() const { return "add_time"; }
-  double val_real() { return val_real_from_decimal(); }
-  my_decimal *val_decimal(my_decimal *decimal_value)
-  {
-    DBUG_ASSERT(fixed == 1);
-    if (cached_field_type == MYSQL_TYPE_TIME)
-      return  val_decimal_from_time(decimal_value);
-    if (cached_field_type == MYSQL_TYPE_DATETIME)
-      return  val_decimal_from_date(decimal_value);
-    return Item_str_func::val_decimal(decimal_value);
-  }
-  int save_in_field(Field *field, bool no_conversions)
-  {
-    if (cached_field_type == MYSQL_TYPE_TIME)
-      return save_time_in_field(field);
-    if (cached_field_type == MYSQL_TYPE_DATETIME)
-      return save_date_in_field(field);
-    return Item_str_func::save_in_field(field, no_conversions);
-  }
 };
 
-class Item_func_timediff :public Item_str_timefunc
+class Item_func_timediff :public Item_timefunc
 {
 public:
   Item_func_timediff(Item *a, Item *b)
-    :Item_str_timefunc(a, b) {}
-  String *val_str(String *str);
+    :Item_timefunc(a, b) {}
   const char *func_name() const { return "timediff"; }
   void fix_length_and_dec()
   {
-    Item_str_timefunc::fix_length_and_dec();
-    maybe_null= 1;
+    decimals= max(args[0]->decimals, args[1]->decimals);
+    Item_timefunc::fix_length_and_dec();
   }
+  bool get_date(MYSQL_TIME *ltime, uint fuzzy_date);
 };
 
-class Item_func_maketime :public Item_str_timefunc
+class Item_func_maketime :public Item_timefunc
 {
 public:
   Item_func_maketime(Item *a, Item *b, Item *c)
-    :Item_str_timefunc(a, b, c) 
-  {
-    maybe_null= TRUE;
-  }
-  String *val_str(String *str);
+    :Item_timefunc(a, b, c) 
+  {}
   const char *func_name() const { return "maketime"; }
+  bool get_date(MYSQL_TIME *ltime, uint fuzzy_date);
 };
 
+
 class Item_func_microsecond :public Item_int_func
 {
 public:
@@ -1143,32 +982,26 @@ public:
 };
 
 
-class Item_func_str_to_date :public Item_str_func
+class Item_func_str_to_date :public Item_temporal_func
 {
   enum_field_types cached_field_type;
-  date_time_format_types cached_format_type;
   timestamp_type cached_timestamp_type;
   bool const_item;
 public:
   Item_func_str_to_date(Item *a, Item *b)
-    :Item_str_func(a, b), const_item(false)
+    :Item_temporal_func(a, b), const_item(false)
   {}
-  String *val_str(String *str);
   bool get_date(MYSQL_TIME *ltime, uint fuzzy_date);
   const char *func_name() const { return "str_to_date"; }
   enum_field_types field_type() const { return cached_field_type; }
   void fix_length_and_dec();
-  Field *tmp_table_field(TABLE *table)
-  {
-    return tmp_table_field_from_field_type(table, 1);
-  }
 };
 
 
-class Item_func_last_day :public Item_date
+class Item_func_last_day :public Item_datefunc
 {
 public:
-  Item_func_last_day(Item *a) :Item_date(a) {}
+  Item_func_last_day(Item *a) :Item_datefunc(a) {}
   const char *func_name() const { return "last_day"; }
   bool get_date(MYSQL_TIME *res, uint fuzzy_date);
 };
diff --git a/sql/key.cc b/sql/key.cc
index fec67b35ebc..b8edb2b3119 100644
--- a/sql/key.cc
+++ b/sql/key.cc
@@ -106,30 +106,46 @@ int find_ref_key(KEY *key, uint key_count, uchar *record, Field *field,
   @param from_record full record to be copied from
   @param key_info    descriptor of the index
   @param key_length  specifies length of all keyparts that will be copied
+  @param with_zerofill  skipped bytes in the key buffer to be filled with 0
 */
 
 void key_copy(uchar *to_key, uchar *from_record, KEY *key_info,
-              uint key_length)
+              uint key_length, bool with_zerofill)
 {
   uint length;
   KEY_PART_INFO *key_part;
 
   if (key_length == 0)
     key_length= key_info->key_length;
-  for (key_part= key_info->key_part; (int) key_length > 0; key_part++)
+  for (key_part= key_info->key_part;
+       (int) key_length > 0;
+       key_part++, to_key+= length, key_length-= length)
   {
     if (key_part->null_bit)
     {
       *to_key++= test(from_record[key_part->null_offset] &
 		   key_part->null_bit);
       key_length--;
+      if (to_key[-1])
+      {
+        /*
+          Don't copy data for null values
+          The -1 below is to subtract the null byte which is already handled
+        */
+        length= min(key_length, (uint) key_part->store_length-1);
+        if (with_zerofill)
+          bzero((char*) to_key, length);
+        continue;
+      }
     }
     if (key_part->key_part_flag & HA_BLOB_PART ||
         key_part->key_part_flag & HA_VAR_LENGTH_PART)
     {
       key_length-= HA_KEY_BLOB_LENGTH;
       length= min(key_length, key_part->length);
-      key_part->field->get_key_image(to_key, length, Field::itRAW);
+      uint bytes= key_part->field->get_key_image(to_key, length, Field::itRAW);
+      if (with_zerofill && bytes < length)
+        bzero((char*) to_key + bytes, length - bytes);
       to_key+= HA_KEY_BLOB_LENGTH;
     }
     else
@@ -141,8 +157,6 @@ void key_copy(uchar *to_key, uchar *from_record, KEY *key_info,
       if (bytes < length)
         cs->cset->fill(cs, (char*) to_key + bytes, length - bytes, ' ');
     }
-    to_key+= length;
-    key_length-= length;
   }
 }
 
@@ -169,16 +183,28 @@ void key_restore(uchar *to_record, uchar *from_key, KEY *key_info,
   {
     key_length= key_info->key_length;
   }
-  for (key_part= key_info->key_part ; (int) key_length > 0 ; key_part++)
+  for (key_part= key_info->key_part ;
+       (int) key_length > 0 ;
+       key_part++, from_key+= length, key_length-= length)
   {
     uchar used_uneven_bits= 0;
     if (key_part->null_bit)
     {
-      if (*from_key++)
+      bool null_value; 
+      if ((null_value= *from_key++))
 	to_record[key_part->null_offset]|= key_part->null_bit;
       else
 	to_record[key_part->null_offset]&= ~key_part->null_bit;
       key_length--;
+      if (null_value)
+      {
+        /*
+          Don't copy data for null bytes
+          The -1 below is to subtract the null byte which is already handled
+        */
+        length= min(key_length, (uint) key_part->store_length-1);
+        continue;
+      }
     }
     if (key_part->type == HA_KEYTYPE_BIT)
     {
@@ -232,8 +258,6 @@ void key_restore(uchar *to_record, uchar *from_key, KEY *key_info,
       memcpy(to_record + key_part->offset, from_key + used_uneven_bits
              , (size_t) length - used_uneven_bits);
     }
-    from_key+= length;
-    key_length-= length;
   }
 }
 
@@ -551,3 +575,287 @@ next_loop:
   } while (key_info); /* no more keys to test */
   DBUG_RETURN(0);
 }
+
+
+/*
+  Compare two key tuples.
+
+  @brief
+    Compare two key tuples, i.e. two key values in KeyTupleFormat.
+
+  @param part          KEY_PART_INFO with key description
+  @param key1          First key to compare
+  @param key2          Second key to compare 
+  @param tuple_length  Length of key1 (and key2, they are the same) in bytes.
+
+  @return
+    @retval  0  key1 == key2
+    @retval -1  key1 < key2
+    @retval +1  key1 > key2 
+*/
+
+int key_tuple_cmp(KEY_PART_INFO *part, uchar *key1, uchar *key2, 
+                  uint tuple_length)
+{
+  uchar *key1_end= key1 + tuple_length;
+  int len;
+  int res;
+  LINT_INIT(len);
+  for (;key1 < key1_end; key1 += len, key2 += len, part++)
+  {
+    len= part->store_length;
+    if (part->null_bit)
+    {
+      if (*key1) // key1 == NULL
+      {
+        if (!*key2) // key1(NULL) < key2(notNULL)
+          return -1;
+        continue;
+      }
+      else if (*key2) // key1(notNULL) > key2 (NULL)
+        return 1;
+      /* Step over the NULL bytes for key_cmp() call */
+      key1++;
+      key2++;
+      len--;
+    }
+    if ((res= part->field->key_cmp(key1, key2)))
+      return res;
+  }
+  return 0;
+}
+
+
+/**
+  Get hash value for the key from a key buffer 
+
+  @param  key_info       the key descriptor
+  @param  used_key_part  number of key parts used for the key
+  @param  key            pointer to the buffer with the key value
+
+  @datails
+  When hashing we should take special care only of:
+  1. NULLs (and keyparts which can be null so one byte reserved for it);
+  2. Strings for which we have to take into account their collations
+  and the values of their lengths in the prefixes.
+
+  @return  hash value calculated for the key
+*/
+
+ulong key_hashnr(KEY *key_info, uint used_key_parts, const uchar *key)
+{
+  ulong nr=1, nr2=4;
+  KEY_PART_INFO *key_part= key_info->key_part;
+  KEY_PART_INFO *end_key_part= key_part + used_key_parts;
+
+  for (; key_part < end_key_part; key_part++)
+  {
+    uchar *pos= (uchar*)key;
+    CHARSET_INFO *cs;
+    uint length, pack_length;
+    bool is_string= TRUE;
+    LINT_INIT(cs);
+    LINT_INIT(length);
+    LINT_INIT(pack_length);
+
+    key+= key_part->length;
+    if (key_part->null_bit)
+    {
+      key++;                       /* Skip null byte */
+      if (*pos)                    /* Found null */
+      {
+        nr^= (nr << 1) | 1;
+        /* Add key pack length to key for VARCHAR segments */
+        switch (key_part->type) {
+        case HA_KEYTYPE_VARTEXT1:
+        case HA_KEYTYPE_VARBINARY1:
+        case HA_KEYTYPE_VARTEXT2:
+        case HA_KEYTYPE_VARBINARY2:
+          key+= 2;
+          break;
+        default:
+          ;
+        }
+    continue;
+      }
+      pos++;                       /* Skip null byte */
+    }
+    /* If it is string set parameters of the string */
+    switch (key_part->type) {
+    case HA_KEYTYPE_TEXT:
+      cs= key_part->field->charset();
+      length= key_part->length;
+      pack_length= 0;
+      break;
+    case HA_KEYTYPE_BINARY :
+      cs= &my_charset_bin;
+      length= key_part->length;
+      pack_length= 0;
+      break;
+    case HA_KEYTYPE_VARTEXT1:
+    case HA_KEYTYPE_VARTEXT2:
+      cs= key_part->field->charset();
+      length= uint2korr(pos);
+      pack_length= 2;
+      break;
+    case HA_KEYTYPE_VARBINARY1:
+    case HA_KEYTYPE_VARBINARY2:
+      cs= &my_charset_bin;
+      length= uint2korr(pos);
+      pack_length= 2;
+      break;
+    default:
+      is_string= FALSE;
+    }
+
+    if (is_string)
+    {
+      if (cs->mbmaxlen > 1)
+      {
+        uint char_length= my_charpos(cs, pos + pack_length,
+                                     pos + pack_length + length,
+                                     length / cs->mbmaxlen);
+        set_if_smaller(length, char_length);
+      }
+      cs->coll->hash_sort(cs, pos+pack_length, length, &nr, &nr2);
+      key+= pack_length;
+    }
+    else
+    {
+      for (; pos < (uchar*)key ; pos++)
+      {
+        nr^=(ulong) ((((uint) nr & 63)+nr2)*((uint) *pos)) + (nr << 8);
+        nr2+=3;
+      }
+    }
+  }
+  DBUG_PRINT("exit", ("hash: %lx", nr));
+  return(nr);
+}
+
+
+/**
+  Check whether two keys in the key buffers are equal
+
+  @param key_info        the key descriptor
+  @param  used_key_part  number of key parts used for the keys
+  @param key1            pointer to the buffer with the first key 
+  @param key2            pointer to the buffer with the second key 
+
+  @detail See details of key_hashnr().
+
+  @retval TRUE  keys in the buffers are NOT equal
+  @retval FALSE keys in the buffers are equal
+*/
+
+bool key_buf_cmp(KEY *key_info, uint used_key_parts,
+                 const uchar *key1, const uchar *key2)
+{
+  KEY_PART_INFO *key_part= key_info->key_part;
+  KEY_PART_INFO *end_key_part= key_part + used_key_parts;
+
+  for (; key_part < end_key_part; key_part++)
+  {
+    uchar *pos1= (uchar*)key1;
+    uchar *pos2= (uchar*)key2;
+    CHARSET_INFO *cs;
+    uint length1, length2, pack_length;
+    bool is_string= TRUE;
+    LINT_INIT(cs);
+    LINT_INIT(length1);
+    LINT_INIT(length2);
+    LINT_INIT(pack_length);
+
+    key1+= key_part->length;
+    key2+= key_part->length;
+    if (key_part->null_bit)
+    {
+      key1++; key2++;                           /* Skip null byte */
+      if (*pos1 && *pos2)                       /* Both are null */
+      {
+        /* Add key pack length to key for VARCHAR segments */
+        switch (key_part->type) {
+        case HA_KEYTYPE_VARTEXT1:
+        case HA_KEYTYPE_VARBINARY1:
+        case HA_KEYTYPE_VARTEXT2:
+        case HA_KEYTYPE_VARBINARY2:
+          key1+= 2; key2+= 2;
+          break;
+        default:
+          ;
+        }
+        continue;
+      }
+      if (*pos1 != *pos2)
+        return TRUE;
+      pos1++; pos2++;
+    }
+
+    /* If it is string set parameters of the string */
+    switch (key_part->type) {
+    case HA_KEYTYPE_TEXT:
+      cs= key_part->field->charset();
+      length1= length2= key_part->length;
+      pack_length= 0;
+      break;
+    case HA_KEYTYPE_BINARY :
+      cs= &my_charset_bin;
+      length1= length2= key_part->length;
+      pack_length= 0;
+      break;
+    case HA_KEYTYPE_VARTEXT1:
+    case HA_KEYTYPE_VARTEXT2:
+      cs= key_part->field->charset();
+      length1= uint2korr(pos1);
+      length2= uint2korr(pos2);
+      pack_length= 2;
+      break;
+    case HA_KEYTYPE_VARBINARY1:
+    case HA_KEYTYPE_VARBINARY2:
+      cs= &my_charset_bin;
+      length1= uint2korr(pos1);
+      length2= uint2korr(pos2);
+      pack_length= 2;
+      break;
+    default:
+      is_string= FALSE;
+    }
+
+    if (is_string)
+    {
+      /*
+        Compare the strings taking into account length in characters
+        and collation
+      */
+      uint byte_len1= length1, byte_len2= length2;
+      if (cs->mbmaxlen > 1)
+      {
+        uint char_length1= my_charpos(cs, pos1 + pack_length,
+                                      pos1 + pack_length + length1,
+                                      length1 / cs->mbmaxlen);
+        uint char_length2= my_charpos(cs, pos2 + pack_length,
+                                      pos2 + pack_length + length2,
+                                      length2 / cs->mbmaxlen);
+        set_if_smaller(length1, char_length1);
+        set_if_smaller(length2, char_length2);
+      }
+      if (length1 != length2 ||
+          cs->coll->strnncollsp(cs,
+                                pos1 + pack_length, byte_len1,
+                                pos2 + pack_length, byte_len2,
+                                1))
+        return TRUE;
+      key1+= pack_length; key2+= pack_length;
+    }
+    else
+    {
+      /* it is OK to compare non-string byte per byte */
+      for (; pos1 < (uchar*)key1 ; pos1++, pos2++)
+      {
+        if (pos1[0] != pos2[0])
+          return TRUE;
+      }
+    }
+  }
+  return FALSE;
+}
diff --git a/sql/lex.h b/sql/lex.h
index f304e0412fa..5d7546dcdc9 100644
--- a/sql/lex.h
+++ b/sql/lex.h
@@ -107,6 +107,7 @@ static SYMBOL symbols[] = {
   { "CHARACTER",	SYM(CHAR_SYM)},
   { "CHARSET",		SYM(CHARSET)},
   { "CHECK",		SYM(CHECK_SYM)},
+  { "CHECKPOINT",	SYM(CHECKPOINT_SYM)},
   { "CHECKSUM",		SYM(CHECKSUM_SYM)},
   { "CIPHER",		SYM(CIPHER_SYM)},
   { "CLIENT",		SYM(CLIENT_SYM)},
@@ -118,6 +119,12 @@ static SYMBOL symbols[] = {
   { "COLLATION",	SYM(COLLATION_SYM)},
   { "COLUMN",		SYM(COLUMN_SYM)},
   { "COLUMNS",		SYM(COLUMNS)},
+  { "COLUMN_ADD",       SYM(COLUMN_ADD_SYM)},
+  { "COLUMN_CREATE",    SYM(COLUMN_CREATE_SYM)},
+  { "COLUMN_DELETE",    SYM(COLUMN_DELETE_SYM)},
+  { "COLUMN_EXISTS",    SYM(COLUMN_EXISTS_SYM)},
+  { "COLUMN_GET",       SYM(COLUMN_GET_SYM)},
+  { "COLUMN_LIST",      SYM(COLUMN_LIST_SYM)},
   { "COMMENT",		SYM(COMMENT_SYM)},
   { "COMMIT",		SYM(COMMIT_SYM)},
   { "COMMITTED",	SYM(COMMITTED_SYM)},
@@ -234,6 +241,7 @@ static SYMBOL symbols[] = {
   { "GRANTS",	        SYM(GRANTS)},
   { "GROUP",		SYM(GROUP_SYM)},
   { "HANDLER",		SYM(HANDLER_SYM)},
+  { "HARD",		SYM(HARD_SYM)},
   { "HASH",		SYM(HASH_SYM)},
   { "HAVING",		SYM(HAVING)},
   { "HELP",		SYM(HELP_SYM)},
@@ -380,6 +388,7 @@ static SYMBOL symbols[] = {
   { "ON",		SYM(ON)},
   { "ONE",              SYM(ONE_SYM)},
   { "ONE_SHOT",		SYM(ONE_SHOT_SYM)},
+  { "ONLINE",		SYM(ONLINE_SYM)},
   { "OPEN",		SYM(OPEN_SYM)},
   { "OPTIMIZE",		SYM(OPTIMIZE)},
   { "OPTIONS",		SYM(OPTIONS_SYM)},
@@ -491,6 +500,7 @@ static SYMBOL symbols[] = {
   { "SNAPSHOT",         SYM(SNAPSHOT_SYM)},
   { "SMALLINT",		SYM(SMALLINT)},
   { "SOCKET",		SYM(SOCKET_SYM)},
+  { "SOFT",             SYM(SOFT_SYM)},
   { "SOME",             SYM(ANY_SYM)},
   { "SONAME",		SYM(SONAME_SYM)},
   { "SOUNDS",		SYM(SOUNDS_SYM)},
diff --git a/sql/lock.cc b/sql/lock.cc
index 1b66fd3d33f..57ced99417b 100644
--- a/sql/lock.cc
+++ b/sql/lock.cc
@@ -86,41 +86,10 @@
 
 extern HASH open_cache;
 
-/* flags for get_lock_data */
-#define GET_LOCK_UNLOCK         1
-#define GET_LOCK_STORE_LOCKS    2
-
-static MYSQL_LOCK *get_lock_data(THD *thd, TABLE **table,uint count,
-				 uint flags, TABLE **write_locked);
-static void reset_lock_data(MYSQL_LOCK *sql_lock);
 static int lock_external(THD *thd, TABLE **table,uint count);
 static int unlock_external(THD *thd, TABLE **table,uint count);
 static void print_lock_error(int error, const char *);
 
-/*
-  Lock tables.
-
-  SYNOPSIS
-    mysql_lock_tables()
-    thd                         The current thread.
-    tables                      An array of pointers to the tables to lock.
-    count                       The number of tables to lock.
-    flags                       Options:
-      MYSQL_LOCK_IGNORE_GLOBAL_READ_LOCK      Ignore a global read lock
-      MYSQL_LOCK_IGNORE_GLOBAL_READ_ONLY      Ignore SET GLOBAL READ_ONLY
-      MYSQL_LOCK_IGNORE_FLUSH                 Ignore a flush tables.
-      MYSQL_LOCK_NOTIFY_IF_NEED_REOPEN        Instead of reopening altered
-                                              or dropped tables by itself,
-                                              mysql_lock_tables() should
-                                              notify upper level and rely
-                                              on caller doing this.
-    need_reopen                 Out parameter, TRUE if some tables were altered
-                                or deleted and should be reopened by caller.
-
-  RETURN
-    A lock structure pointer on success.
-    NULL on error or if some tables should be reopen.
-*/
 
 /* Map the return value of thr_lock to an error from errmsg.txt */
 static int thr_lock_errno_to_mysql[]=
@@ -134,6 +103,7 @@ static int thr_lock_errno_to_mysql[]=
   @param flags Lock flags
   @return 0 if all the check passed, non zero if a check failed.
 */
+
 int mysql_lock_tables_check(THD *thd, TABLE **tables, uint count, uint flags)
 {
   bool log_table_write_query;
@@ -196,14 +166,39 @@ int mysql_lock_tables_check(THD *thd, TABLE **tables, uint count, uint flags)
   DBUG_RETURN(0);
 }
 
+
+/*
+  Lock tables.
+
+  SYNOPSIS
+    mysql_lock_tables()
+    thd                         The current thread.
+    tables                      An array of pointers to the tables to lock.
+    count                       The number of tables to lock.
+    flags                       Options:
+      MYSQL_LOCK_IGNORE_GLOBAL_READ_LOCK      Ignore a global read lock
+      MYSQL_LOCK_IGNORE_GLOBAL_READ_ONLY      Ignore SET GLOBAL READ_ONLY
+      MYSQL_LOCK_IGNORE_FLUSH                 Ignore a flush tables.
+      MYSQL_LOCK_NOTIFY_IF_NEED_REOPEN        Instead of reopening altered
+                                              or dropped tables by itself,
+                                              mysql_lock_tables() should
+                                              notify upper level and rely
+                                              on caller doing this.
+    need_reopen                 Out parameter, TRUE if some tables were altered
+                                or deleted and should be reopened by caller.
+
+  RETURN
+    A lock structure pointer on success.
+    NULL on error or if some tables should be reopen.
+*/
+
 MYSQL_LOCK *mysql_lock_tables(THD *thd, TABLE **tables, uint count,
                               uint flags, bool *need_reopen)
 {
   MYSQL_LOCK *sql_lock;
   TABLE *write_lock_used;
   int rc;
-
-  DBUG_ENTER("mysql_lock_tables");
+  DBUG_ENTER("mysql_lock_tables(tables)");
 
   *need_reopen= FALSE;
 
@@ -212,137 +207,177 @@ MYSQL_LOCK *mysql_lock_tables(THD *thd, TABLE **tables, uint count,
 
   for (;;)
   {
-    if (! (sql_lock= get_lock_data(thd, tables, count, GET_LOCK_STORE_LOCKS,
-                                   &write_lock_used)) ||
-        ! sql_lock->table_count)
+    if (!(sql_lock= get_lock_data(thd, tables, count, GET_LOCK_STORE_LOCKS,
+                                  &write_lock_used)) ||
+        !sql_lock->table_count)
       break;
+    rc= mysql_lock_tables(thd, sql_lock, write_lock_used != 0, flags,
+                          need_reopen);
+    if (!rc)
+      break;                                    // Got lock
+    my_free(sql_lock, MYF(0));
+    if (rc > 0)
+      DBUG_RETURN(0);                           // Failed
+  }
+  DBUG_RETURN(sql_lock);
+}
 
-    if (global_read_lock && write_lock_used &&
-        ! (flags & MYSQL_LOCK_IGNORE_GLOBAL_READ_LOCK))
+/**
+   Lock a table based on a MYSQL_LOCK structure.
+
+   mysql_lock_tables()
+
+   @param thd			The current thread.
+   @param sql_lock		Tables that should be locked
+   @param write_lock_used 	1 if any of the tables are write locked
+   @param flags			See mysql_lock_tables()
+   @param need_reopen           Out parameter, TRUE if some tables were altered
+                                or deleted and should be reopened by caller.
+
+   @return 0  ok
+   @return 1  fatal error
+   @return -1 retry
+*/
+
+int mysql_lock_tables(THD *thd, MYSQL_LOCK *sql_lock,
+                      bool write_lock_used,
+                      uint flags, bool *need_reopen)
+{
+  int res= 0;
+  int rc;
+  DBUG_ENTER("mysql_lock_tables(sql_lock)");
+  *need_reopen= FALSE;
+
+  if (write_lock_used && !(flags & MYSQL_LOCK_IGNORE_GLOBAL_READ_LOCK))
+  {
+    if (global_read_lock)
     {
       /*
-	Someone has issued LOCK ALL TABLES FOR READ and we want a write lock
-	Wait until the lock is gone
+        Someone has issued LOCK ALL TABLES FOR READ and we want a write lock
+        Wait until the lock is gone
       */
       if (wait_if_global_read_lock(thd, 1, 1))
       {
         /* Clear the lock type of all lock data to avoid reusage. */
-        reset_lock_data(sql_lock);
-	my_free((uchar*) sql_lock,MYF(0));
-	sql_lock=0;
-	break;
+        reset_lock_data(sql_lock, 1);
+        DBUG_RETURN(1);                         // Fatal error
       }
       if (thd->version != refresh_version)
       {
         /* Clear the lock type of all lock data to avoid reusage. */
-        reset_lock_data(sql_lock);
-	my_free((uchar*) sql_lock,MYF(0));
-	goto retry;
+        reset_lock_data(sql_lock, 1);
+        goto retry;
       }
     }
 
-    if (!(flags & MYSQL_LOCK_IGNORE_GLOBAL_READ_ONLY) &&
-        write_lock_used &&
-        opt_readonly &&
+    if (opt_readonly &&
         !(thd->security_ctx->master_access & SUPER_ACL) &&
         !thd->slave_thread)
     {
       /*
-	Someone has issued SET GLOBAL READ_ONLY=1 and we want a write lock.
+        Someone has issued SET GLOBAL READ_ONLY=1 and we want a write lock.
         We do not wait for READ_ONLY=0, and fail.
       */
-      reset_lock_data(sql_lock);
-      my_free((uchar*) sql_lock, MYF(0));
-      sql_lock=0;
       my_error(ER_OPTION_PREVENTS_STATEMENT, MYF(0), "--read-only");
-      break;
+      reset_lock_data(sql_lock, 1);
+      DBUG_RETURN(1);                           // Fatal error
     }
+  }
 
-    thd_proc_info(thd, "System lock");
-    DBUG_PRINT("info", ("thd->proc_info %s", thd->proc_info));
-    if (lock_external(thd, sql_lock->table, sql_lock->table_count))
-    {
-      /* Clear the lock type of all lock data to avoid reusage. */
-      reset_lock_data(sql_lock);
-      my_free((uchar*) sql_lock,MYF(0));
-      sql_lock=0;
-      break;
-    }
-    thd_proc_info(thd, "Table lock");
-    DBUG_PRINT("info", ("thd->proc_info %s", thd->proc_info));
-    thd->locked=1;
-    /* Copy the lock data array. thr_multi_lock() reorders its contens. */
-    memcpy(sql_lock->locks + sql_lock->lock_count, sql_lock->locks,
-           sql_lock->lock_count * sizeof(*sql_lock->locks));
-    /* Lock on the copied half of the lock data array. */
-    rc= thr_lock_errno_to_mysql[(int) thr_multi_lock(sql_lock->locks +
-                                                     sql_lock->lock_count,
-                                                     sql_lock->lock_count,
-                                                     thd->lock_id)];
-    if (rc > 1)                                 /* a timeout or a deadlock */
-    {
+  thd_proc_info(thd, "System lock");
+  if (lock_external(thd, sql_lock->table, sql_lock->table_count))
+  {
+    /* Clear the lock type of all lock data to avoid reusage. */
+    res= 1;                                     // Fatal error
+    goto end;
+  }
+  thd_proc_info(thd, "Table lock");
+  DBUG_PRINT("info", ("thd->proc_info %s", thd->proc_info));
+  /* Copy the lock data array. thr_multi_lock() reorders its contens. */
+  memcpy(sql_lock->locks + sql_lock->lock_count, sql_lock->locks,
+         sql_lock->lock_count * sizeof(*sql_lock->locks));
+  /* Lock on the copied half of the lock data array. */
+  rc= thr_lock_errno_to_mysql[(int) thr_multi_lock(sql_lock->locks +
+                                                   sql_lock->lock_count,
+                                                   sql_lock->lock_count,
+                                                   thd->lock_id)];
+  if (rc)                                     // Locking failed
+  {
+    if (sql_lock->table_count)
       VOID(unlock_external(thd, sql_lock->table, sql_lock->table_count));
+
+    /*
+      reset_lock_data is required here. If thr_multi_lock fails it
+      resets lock type for tables, which were locked before (and
+      including) one that caused error. Lock type for other tables
+      preserved.
+    */
+    reset_lock_data(sql_lock, 0);
+
+    if (rc > 1)
+    {
       my_error(rc, MYF(0));
-      my_free((uchar*) sql_lock,MYF(0));
-      sql_lock= 0;
-      break;
+      DBUG_RETURN(1);
     }
-    else if (rc == 1)                           /* aborted */
+    DBUG_ASSERT(rc == 1);                       // Timeout
+    thd->some_tables_deleted= 1;                // Reopen tables
+    sql_lock->lock_count= 0;                	// Locks are already freed
+    /* Retry */
+  }
+  else
+  {
+    /*
+      Lock worked. Now check that nothing happend while we where waiting
+      to get the lock that would require us to free it.
+    */
+    if (!thd->some_tables_deleted || (flags & MYSQL_LOCK_IGNORE_FLUSH))
     {
-      /*
-        reset_lock_data is required here. If thr_multi_lock fails it
-        resets lock type for tables, which were locked before (and
-        including) one that caused error. Lock type for other tables
-        preserved.
-      */
-      reset_lock_data(sql_lock);
-      thd->some_tables_deleted=1;		// Try again
-      sql_lock->lock_count= 0;                  // Locks are already freed
+      res= 0;
+      goto end; /* Lock was not aborted. Return to upper level */
     }
-    else if (!thd->some_tables_deleted || (flags & MYSQL_LOCK_IGNORE_FLUSH))
+    if (!thd->open_tables && !(flags & MYSQL_LOCK_NOT_TEMPORARY))
     {
       /*
-        Thread was killed or lock aborted. Let upper level close all
-        used tables and retry or give error.
+        Only using temporary tables, no need to unlock.
+        We need the flag as open_tables is not enough to distingush if
+        we are only using temporary tables for tables used trough
+        the HANDLER interface.
+
+        We reset some_tables_deleted as it doesn't make sense to have this
+        one when we are only using temporary tables.
       */
-      thd->locked=0;
-      break;
-    }
-    else if (!thd->open_tables)
-    {
-      // Only using temporary tables, no need to unlock
       thd->some_tables_deleted=0;
-      thd->locked=0;
-      break;
+      goto end;
     }
-    thd_proc_info(thd, 0);
+    /* Free lock and retry */
+  }
+
+  /* some table was altered or deleted. reopen tables marked deleted */
+  mysql_unlock_tables(thd, sql_lock, 0);
 
-    /* some table was altered or deleted. reopen tables marked deleted */
-    mysql_unlock_tables(thd,sql_lock);
-    thd->locked=0;
 retry:
-    sql_lock=0;
-    if (flags & MYSQL_LOCK_NOTIFY_IF_NEED_REOPEN)
-    {
-      *need_reopen= TRUE;
-      break;
-    }
-    if (wait_for_tables(thd))
-      break;					// Couldn't open tables
+  res= -1;                                      // Retry
+  if (flags & MYSQL_LOCK_NOTIFY_IF_NEED_REOPEN)
+  {
+    *need_reopen= TRUE;                         // Upper level will retry
+    DBUG_RETURN(1);                             // Fatal error
   }
+  if (wait_for_tables(thd))
+    res= 1;                                    // Couldn't open tables
+
+end: 
   thd_proc_info(thd, 0);
   if (thd->killed)
   {
     thd->send_kill_message();
-    if (sql_lock)
-    {
-      mysql_unlock_tables(thd,sql_lock);
-      sql_lock=0;
-    }
+    if (res == 0)
+      mysql_unlock_tables(thd,sql_lock,0);
+    else
+      reset_lock_data(sql_lock, 1);
+    res= 1;                                     // Fatal
   }
-
   thd->set_time_after_lock();
-  DBUG_RETURN (sql_lock);
+  DBUG_RETURN(res);
 }
 
 
@@ -382,15 +417,15 @@ static int lock_external(THD *thd, TABLE **tables, uint count)
   DBUG_RETURN(0);
 }
 
-
-void mysql_unlock_tables(THD *thd, MYSQL_LOCK *sql_lock)
+void mysql_unlock_tables(THD *thd, MYSQL_LOCK *sql_lock, bool free_lock)
 {
   DBUG_ENTER("mysql_unlock_tables");
   if (sql_lock->table_count)
     VOID(unlock_external(thd,sql_lock->table,sql_lock->table_count));
   if (sql_lock->lock_count)
     thr_multi_unlock(sql_lock->locks,sql_lock->lock_count, 0);
-  my_free((uchar*) sql_lock,MYF(0));
+  if (free_lock)
+    my_free((uchar*) sql_lock,MYF(0));
   DBUG_VOID_RETURN;
 }
 
@@ -849,12 +884,12 @@ static int unlock_external(THD *thd, TABLE **table,uint count)
   @param write_lock_used   Store pointer to last table with WRITE_ALLOW_WRITE
 */
 
-static MYSQL_LOCK *get_lock_data(THD *thd, TABLE **table_ptr, uint count,
-				 uint flags, TABLE **write_lock_used)
+MYSQL_LOCK *get_lock_data(THD *thd, TABLE **table_ptr, uint count,
+                          uint flags, TABLE **write_lock_used)
 {
   uint i,tables,lock_count;
   MYSQL_LOCK *sql_lock;
-  THR_LOCK_DATA **locks, **locks_buf, **locks_start;
+  THR_LOCK_DATA **locks, **locks_buf;
   TABLE **to, **table_buf;
   DBUG_ENTER("get_lock_data");
 
@@ -865,8 +900,10 @@ static MYSQL_LOCK *get_lock_data(THD *thd, TABLE **table_ptr, uint count,
   for (i=tables=lock_count=0 ; i < count ; i++)
   {
     TABLE *t= table_ptr[i];
-
-    if (t->s->tmp_table != NON_TRANSACTIONAL_TMP_TABLE)
+    
+    
+    if (t->s->tmp_table != NON_TRANSACTIONAL_TMP_TABLE && 
+        t->s->tmp_table != INTERNAL_TMP_TABLE)
     {
       tables+= t->file->lock_count();
       lock_count++;
@@ -893,8 +930,10 @@ static MYSQL_LOCK *get_lock_data(THD *thd, TABLE **table_ptr, uint count,
   {
     TABLE *table;
     enum thr_lock_type lock_type;
-
-    if ((table=table_ptr[i])->s->tmp_table == NON_TRANSACTIONAL_TMP_TABLE)
+    THR_LOCK_DATA **locks_start;
+    table= table_ptr[i];
+    if (table->s->tmp_table == NON_TRANSACTIONAL_TMP_TABLE ||
+        table->s->tmp_table == INTERNAL_TMP_TABLE) 
       continue;
     lock_type= table->reginfo.lock_type;
     DBUG_ASSERT(lock_type != TL_WRITE_DEFAULT && lock_type != TL_READ_DEFAULT);
@@ -906,12 +945,11 @@ static MYSQL_LOCK *get_lock_data(THD *thd, TABLE **table_ptr, uint count,
 	my_error(ER_OPEN_AS_READONLY,MYF(0),table->alias.c_ptr());
         /* Clear the lock type of the lock data that are stored already. */
         sql_lock->lock_count= (uint) (locks - sql_lock->locks);
-        reset_lock_data(sql_lock);
+        reset_lock_data(sql_lock, 1);
 	my_free((uchar*) sql_lock,MYF(0));
 	DBUG_RETURN(0);
       }
     }
-    THR_LOCK_DATA **org_locks = locks;
     locks_start= locks;
     locks= table->file->store_lock(thd, locks,
                                    (flags & GET_LOCK_UNLOCK) ? TL_IGNORE :
@@ -924,11 +962,14 @@ static MYSQL_LOCK *get_lock_data(THD *thd, TABLE **table_ptr, uint count,
     }
     *to++= table;
     if (locks)
-      for ( ; org_locks != locks ; org_locks++)
+    {
+      for ( ; locks_start != locks ; locks_start++)
       {
-	(*org_locks)->debug_print_param= (void *) table;
-	(*org_locks)->lock->name=         table->alias.c_ptr();
+	(*locks_start)->debug_print_param= (void *) table;
+	(*locks_start)->lock->name=         table->alias.c_ptr();
+	(*locks_start)->org_type=           (*locks_start)->type;
       }
+    }
   }
   /*
     We do not use 'tables', because there are cases where store_lock()
@@ -969,10 +1010,13 @@ static MYSQL_LOCK *get_lock_data(THD *thd, TABLE **table_ptr, uint count,
   Clear the lock type of all lock data. This ensures that the next
   lock request will set its lock type properly.
 
-  @param sql_lock                  The MySQL lock.
+  @param sql_lock               The MySQL lock.
+  @param unlock			If set, then set lock type to TL_UNLOCK,
+  				otherwise set to original lock type from
+				get_store_lock().
 */
 
-static void reset_lock_data(MYSQL_LOCK *sql_lock)
+void reset_lock_data(MYSQL_LOCK *sql_lock, bool unlock)
 {
   THR_LOCK_DATA **ldata;
   THR_LOCK_DATA **ldata_end;
@@ -980,10 +1024,7 @@ static void reset_lock_data(MYSQL_LOCK *sql_lock)
   for (ldata= sql_lock->locks, ldata_end= ldata + sql_lock->lock_count;
        ldata < ldata_end;
        ldata++)
-  {
-    /* Reset lock type. */
-    (*ldata)->type= TL_UNLOCK;
-  }
+    (*ldata)->type= unlock ? TL_UNLOCK : (*ldata)->org_type;
 }
 
 
@@ -1446,7 +1487,7 @@ static void print_lock_error(int error, const char *table)
 
 ****************************************************************************/
 
-volatile uint global_read_lock=0;
+volatile uint global_read_lock=0, global_disable_checkpoint= 0;
 volatile uint global_read_lock_blocks_commit=0;
 static volatile uint protect_against_global_read_lock=0;
 static volatile uint waiting_for_read_lock=0;
@@ -1505,6 +1546,14 @@ void unlock_global_read_lock(THD *thd)
   tmp= --global_read_lock;
   if (thd->global_read_lock == MADE_GLOBAL_READ_LOCK_BLOCK_COMMIT)
     --global_read_lock_blocks_commit;
+  if (thd->global_disable_checkpoint)
+  {
+    thd->global_disable_checkpoint= 0;
+    if (!--global_disable_checkpoint)
+    {
+      ha_checkpoint_state(0);                   // Enable checkpoints
+    }
+  }
   pthread_mutex_unlock(&LOCK_global_read_lock);
   /* Send the signal outside the mutex to avoid a context switch */
   if (!tmp)
@@ -1627,6 +1676,24 @@ bool make_global_read_lock_block_commit(THD *thd)
 
 
 /**
+   Disable checkpoints for all handlers
+   This is released in unlock_global_read_lock()
+*/
+
+void disable_checkpoints(THD *thd)
+{
+  pthread_mutex_lock(&LOCK_global_read_lock);
+  if (!thd->global_disable_checkpoint)
+  {
+    thd->global_disable_checkpoint= 1;
+    if (!global_disable_checkpoint++)
+      ha_checkpoint_state(1);                   // Disable checkpoints
+  }
+  pthread_mutex_unlock(&LOCK_global_read_lock);
+}
+
+
+/**
   Broadcast COND_refresh and COND_global_read_lock.
 
     Due to a bug in a threading library it could happen that a signal
diff --git a/sql/log.cc b/sql/log.cc
index 047439f6b73..53d81f22b66 100644
--- a/sql/log.cc
+++ b/sql/log.cc
@@ -38,6 +38,7 @@
 #endif
 
 #include <mysql/plugin.h>
+#include "debug_sync.h"
 
 /* max size of the log message */
 #define MAX_LOG_BUFFER_SIZE 1024
@@ -61,6 +62,39 @@ static int binlog_savepoint_rollback(handlerton *hton, THD *thd, void *sv);
 static int binlog_commit(handlerton *hton, THD *thd, bool all);
 static int binlog_rollback(handlerton *hton, THD *thd, bool all);
 static int binlog_prepare(handlerton *hton, THD *thd, bool all);
+static int binlog_start_consistent_snapshot(handlerton *hton, THD *thd);
+
+static LEX_STRING const write_error_msg=
+    { C_STRING_WITH_LEN("error writing to the binary log") };
+
+static my_bool opt_optimize_thread_scheduling= TRUE;
+ulong binlog_checksum_options;
+#ifndef DBUG_OFF
+ulong opt_binlog_dbug_fsync_sleep= 0;
+#endif
+
+static my_bool mutexes_inited;
+pthread_mutex_t LOCK_prepare_ordered;
+pthread_mutex_t LOCK_commit_ordered;
+
+static ulonglong binlog_status_var_num_commits;
+static ulonglong binlog_status_var_num_group_commits;
+static char binlog_snapshot_file[FN_REFLEN];
+static ulonglong binlog_snapshot_position;
+
+static SHOW_VAR binlog_status_vars_detail[]=
+{
+  {"commits",
+    (char *)&binlog_status_var_num_commits, SHOW_LONGLONG},
+  {"group_commits",
+    (char *)&binlog_status_var_num_group_commits, SHOW_LONGLONG},
+  {"snapshot_file",
+    (char *)&binlog_snapshot_file, SHOW_CHAR},
+  {"snapshot_position",
+   (char *)&binlog_snapshot_position, SHOW_LONGLONG},
+  {NullS, NullS, SHOW_LONG}
+};
+
 
 /**
   Silence all errors and warnings reported when performing a write
@@ -134,50 +168,17 @@ char *make_once_alloced_filename(const char *basename, const char *ext)
 
 
 /*
-  Helper class to hold a mutex for the duration of the
-  block.
-
-  Eliminates the need for explicit unlocking of mutexes on, e.g.,
-  error returns.  On passing a null pointer, the sentry will not do
-  anything.
- */
-class Mutex_sentry
-{
-public:
-  Mutex_sentry(pthread_mutex_t *mutex)
-    : m_mutex(mutex)
-  {
-    if (m_mutex)
-      pthread_mutex_lock(mutex);
-  }
-
-  ~Mutex_sentry()
-  {
-    if (m_mutex)
-      pthread_mutex_unlock(m_mutex);
-#ifndef DBUG_OFF
-    m_mutex= 0;
-#endif
-  }
-
-private:
-  pthread_mutex_t *m_mutex;
-
-  // It's not allowed to copy this object in any way
-  Mutex_sentry(Mutex_sentry const&);
-  void operator=(Mutex_sentry const&);
-};
-
-/*
   Helper class to store binary log transaction data.
 */
 class binlog_trx_data {
 public:
   binlog_trx_data()
     : at_least_one_stmt_committed(0), incident(FALSE), m_pending(0),
-    before_stmt_pos(MY_OFF_T_UNDEF)
+    before_stmt_pos(MY_OFF_T_UNDEF), last_commit_pos_offset(0),
+    using_xa(FALSE), xa_xid(0)
   {
     trans_log.end_of_file= max_binlog_cache_size;
+    last_commit_pos_file[0]= 0;
   }
 
   ~binlog_trx_data()
@@ -229,11 +230,14 @@ public:
     completely.
    */
   void reset() {
-    if (!empty())
+    if (trans_log.type != WRITE_CACHE || !empty())
       truncate(0);
     before_stmt_pos= MY_OFF_T_UNDEF;
     incident= FALSE;
     trans_log.end_of_file= max_binlog_cache_size;
+    using_xa= FALSE;
+    last_commit_pos_file[0]= 0;
+    last_commit_pos_offset= 0;
     DBUG_ASSERT(empty());
   }
 
@@ -278,6 +282,22 @@ public:
     Binlog position before the start of the current statement.
   */
   my_off_t before_stmt_pos;
+  /*
+    Binlog position for current transaction.
+    For START TRANSACTION WITH CONSISTENT SNAPSHOT, this is the binlog
+    position corresponding to the snapshot taken. During (and after) commit,
+    this is set to the binlog position corresponding to just after the
+    commit (so storage engines can store it in their transaction log).
+  */
+  char last_commit_pos_file[FN_REFLEN];
+  my_off_t last_commit_pos_offset;
+
+  /*
+    Flag set true if this transaction is committed with log_xid() as part of
+    XA, false if not.
+  */
+  bool using_xa;
+  my_xid xa_xid;
 };
 
 handlerton *binlog_hton;
@@ -376,7 +396,7 @@ void Log_to_csv_event_handler::cleanup()
 */
 
 bool Log_to_csv_event_handler::
-  log_general(THD *thd, time_t event_time, const char *user_host,
+  log_general(THD *thd, my_hrtime_t event_time, const char *user_host,
               uint user_host_len, int thread_id,
               const char *command_type, uint command_type_len,
               const char *sql_text, uint sql_text_len,
@@ -458,8 +478,8 @@ bool Log_to_csv_event_handler::
 
   DBUG_ASSERT(table->field[0]->type() == MYSQL_TYPE_TIMESTAMP);
 
-  ((Field_timestamp*) table->field[0])->store_timestamp((my_time_t)
-                                                        event_time);
+  ((Field_timestamp*) table->field[0])->store_TIME(
+                  hrtime_to_my_time(event_time), hrtime_sec_part(event_time));
 
   /* do a write */
   if (table->field[1]->store(user_host, user_host_len, client_cs) ||
@@ -523,7 +543,6 @@ err:
     log_slow()
     thd               THD of the query
     current_time      current timestamp
-    query_start_arg   command start timestamp
     user_host         the pointer to the string with user@host info
     user_host_len     length of the user_host string. this is computed once
                       and passed to all general log event handlers
@@ -546,7 +565,7 @@ err:
 */
 
 bool Log_to_csv_event_handler::
-  log_slow(THD *thd, time_t current_time, time_t query_start_arg,
+  log_slow(THD *thd, my_hrtime_t current_time,
            const char *user_host, uint user_host_len,
            ulonglong query_utime, ulonglong lock_utime, bool is_command,
            const char *sql_text, uint sql_text_len)
@@ -560,6 +579,11 @@ bool Log_to_csv_event_handler::
   Open_tables_state open_tables_backup;
   CHARSET_INFO *client_cs= thd->variables.character_set_client;
   bool save_time_zone_used;
+  long query_time= (long) min(query_utime/1000000, TIME_MAX_VALUE_SECONDS);
+  long lock_time=  (long) min(lock_utime/1000000, TIME_MAX_VALUE_SECONDS);
+  long query_time_micro= (long) (query_utime % 1000000);
+  long lock_time_micro=  (long) (lock_utime % 1000000);
+
   DBUG_ENTER("Log_to_csv_event_handler::log_slow");
 
   thd->push_internal_handler(& error_handler);
@@ -601,45 +625,34 @@ bool Log_to_csv_event_handler::
 
   /* store the time and user values */
   DBUG_ASSERT(table->field[0]->type() == MYSQL_TYPE_TIMESTAMP);
-  ((Field_timestamp*) table->field[0])->store_timestamp((my_time_t)
-                                                        current_time);
+  ((Field_timestamp*) table->field[0])->store_TIME(
+             hrtime_to_my_time(current_time), hrtime_sec_part(current_time));
   if (table->field[1]->store(user_host, user_host_len, client_cs))
     goto err;
 
-  if (query_start_arg)
-  {
-    longlong query_time= (longlong) (query_utime/1000000);
-    longlong lock_time=  (longlong) (lock_utime/1000000);
-    /*
-      A TIME field can not hold the full longlong range; query_time or
-      lock_time may be truncated without warning here, if greater than
-      839 hours (~35 days)
-    */
-    MYSQL_TIME t;
-    t.neg= 0;
+  /*
+    A TIME field can not hold the full longlong range; query_time or
+    lock_time may be truncated without warning here, if greater than
+    839 hours (~35 days)
+  */
+  MYSQL_TIME t;
+  t.neg= 0;
+
+  /* fill in query_time field */
+  calc_time_from_sec(&t, query_time, query_time_micro);
+  if (table->field[2]->store_time(&t))
+    goto err;
+  /* lock_time */
+  calc_time_from_sec(&t, lock_time, lock_time_micro);
+  if (table->field[3]->store_time(&t))
+    goto err;
+  /* rows_sent */
+  if (table->field[4]->store((longlong) thd->sent_row_count, TRUE))
+    goto err;
+  /* rows_examined */
+  if (table->field[5]->store((longlong) thd->examined_row_count, TRUE))
+    goto err;
 
-    /* fill in query_time field */
-    calc_time_from_sec(&t, (long) min(query_time, (longlong) TIME_MAX_VALUE_SECONDS), 0);
-    if (table->field[2]->store_time(&t, MYSQL_TIMESTAMP_TIME))
-      goto err;
-    /* lock_time */
-    calc_time_from_sec(&t, (long) min(lock_time, (longlong) TIME_MAX_VALUE_SECONDS), 0);
-    if (table->field[3]->store_time(&t, MYSQL_TIMESTAMP_TIME))
-      goto err;
-    /* rows_sent */
-    if (table->field[4]->store((longlong) thd->sent_row_count, TRUE))
-      goto err;
-    /* rows_examined */
-    if (table->field[5]->store((longlong) thd->examined_row_count, TRUE))
-      goto err;
-  }
-  else
-  {
-    table->field[2]->set_null();
-    table->field[3]->set_null();
-    table->field[4]->set_null();
-    table->field[5]->set_null();
-  }
   /* fill database field */
   if (thd->db)
   {
@@ -776,14 +789,14 @@ void Log_to_file_event_handler::init_pthread_objects()
 /** Wrapper around MYSQL_LOG::write() for slow log. */
 
 bool Log_to_file_event_handler::
-  log_slow(THD *thd, time_t current_time, time_t query_start_arg,
+  log_slow(THD *thd, my_hrtime_t current_time,
            const char *user_host, uint user_host_len,
            ulonglong query_utime, ulonglong lock_utime, bool is_command,
            const char *sql_text, uint sql_text_len)
 {
   Silence_log_table_errors error_handler;
   thd->push_internal_handler(&error_handler);
-  bool retval= mysql_slow_log.write(thd, current_time, query_start_arg,
+  bool retval= mysql_slow_log.write(thd, hrtime_to_my_time(current_time),
                                     user_host, user_host_len,
                                     query_utime, lock_utime, is_command,
                                     sql_text, sql_text_len);
@@ -798,7 +811,7 @@ bool Log_to_file_event_handler::
 */
 
 bool Log_to_file_event_handler::
-  log_general(THD *thd, time_t event_time, const char *user_host,
+  log_general(THD *thd, my_hrtime_t event_time, const char *user_host,
               uint user_host_len, int thread_id,
               const char *command_type, uint command_type_len,
               const char *sql_text, uint sql_text_len,
@@ -806,7 +819,8 @@ bool Log_to_file_event_handler::
 {
   Silence_log_table_errors error_handler;
   thd->push_internal_handler(&error_handler);
-  bool retval= mysql_log.write(event_time, user_host, user_host_len,
+  bool retval= mysql_log.write(hrtime_to_time(event_time), user_host,
+                               user_host_len,
                                thread_id, command_type, command_type_len,
                                sql_text, sql_text_len);
   thd->pop_internal_handler();
@@ -1015,8 +1029,6 @@ bool LOGGER::slow_log_print(THD *thd, const char *query, uint query_length,
 
   if (*slow_log_handler_list)
   {
-    time_t current_time;
-
     /* do not log slow queries from replication threads */
     if (thd->slave_thread && !opt_log_slow_slave_statements)
       return 0;
@@ -1036,16 +1048,12 @@ bool LOGGER::slow_log_print(THD *thd, const char *query, uint query_length,
                              sctx->ip ? sctx->ip : "", "]", NullS) -
                     user_host_buff);
 
-    current_time= my_time_possible_from_micro(current_utime);
-    if (thd->start_utime)
-    {
-      query_utime= (current_utime - thd->start_utime);
-      lock_utime=  (thd->utime_after_lock - thd->start_utime);
-    }
-    else
-    {
-      query_utime= lock_utime= 0;
-    }
+    DBUG_ASSERT(thd->start_utime);
+    DBUG_ASSERT(thd->start_time);
+    query_utime= (current_utime - thd->start_utime);
+    lock_utime=  (thd->utime_after_lock - thd->start_utime);
+    my_hrtime_t current_time= { hrtime_from_time(thd->start_time) +
+                                thd->start_time_sec_part + query_utime };
 
     if (!query)
     {
@@ -1054,19 +1062,8 @@ bool LOGGER::slow_log_print(THD *thd, const char *query, uint query_length,
       query_length= command_name[thd->command].length;
     }
 
-    if (!query_length) 
-    {
-      /*
-        Not a real query; Reset counts for slow query logging
-        (QQ: Wonder if this is really needed)
-      */
-      thd->sent_row_count= thd->examined_row_count= 0;
-      thd->query_plan_flags= QPLAN_INIT;
-      thd->query_plan_fsort_passes= 0;
-    }
-
     for (current_handler= slow_log_handler_list; *current_handler ;)
-      error= (*current_handler++)->log_slow(thd, current_time, thd->start_time,
+      error= (*current_handler++)->log_slow(thd, current_time,
                                             user_host_buff, user_host_len,
                                             query_utime, lock_utime, is_command,
                                             query, query_length) || error;
@@ -1084,7 +1081,7 @@ bool LOGGER::general_log_write(THD *thd, enum enum_server_command command,
   char user_host_buff[MAX_USER_HOST_SIZE + 1];
   Security_context *sctx= thd->security_ctx;
   uint user_host_len= 0;
-  time_t current_time;
+  my_hrtime_t current_time;
 
   DBUG_ASSERT(thd);
 
@@ -1101,7 +1098,7 @@ bool LOGGER::general_log_write(THD *thd, enum enum_server_command command,
                           sctx->ip ? sctx->ip : "", "]", NullS) -
                                                           user_host_buff;
 
-  current_time= my_time(0);
+  current_time= my_hrtime();
   while (*current_handler)
     error|= (*current_handler++)->
       log_general(thd, current_time, user_host_buff,
@@ -1421,6 +1418,7 @@ int binlog_init(void *p)
   binlog_hton->commit= binlog_commit;
   binlog_hton->rollback= binlog_rollback;
   binlog_hton->prepare= binlog_prepare;
+  binlog_hton->start_consistent_snapshot= binlog_start_consistent_snapshot;
   binlog_hton->flags= HTON_NOT_USER_SELECTABLE | HTON_HIDDEN;
   return 0;
 }
@@ -1437,91 +1435,118 @@ static int binlog_close_connection(handlerton *hton, THD *thd)
 }
 
 /*
-  End a transaction.
+  End a transaction, writing events to the binary log.
 
   SYNOPSIS
-    binlog_end_trans()
+    binlog_flush_trx_cache()
 
     thd      The thread whose transaction should be ended
     trx_data Pointer to the transaction data to use
-    end_ev   The end event to use, or NULL
     all      True if the entire transaction should be ended, false if
              only the statement transaction should be ended.
+    end_ev   The end event to use (COMMIT, ROLLBACK, or commit XID)
 
   DESCRIPTION
 
     End the currently open transaction. The transaction can be either
-    a real transaction (if 'all' is true) or a statement transaction
-    (if 'all' is false).
+    a real transaction or a statement transaction.
 
-    If 'end_ev' is NULL, the transaction is a rollback of only
-    transactional tables, so the transaction cache will be truncated
-    to either just before the last opened statement transaction (if
-    'all' is false), or reset completely (if 'all' is true).
+    This can be to commit a transaction, with a COMMIT query event or an XA
+    commit XID event. But it can also be to rollback a transaction with a
+    ROLLBACK query event, used for rolling back transactions which also
+    contain updates to non-transactional tables.
  */
 static int
-binlog_end_trans(THD *thd, binlog_trx_data *trx_data,
-                 Log_event *end_ev, bool all)
+binlog_flush_trx_cache(THD *thd, binlog_trx_data *trx_data,
+                       Log_event *end_ev, bool all)
 {
-  DBUG_ENTER("binlog_end_trans");
-  int error=0;
+  DBUG_ENTER("binlog_flush_trx_cache");
   IO_CACHE *trans_log= &trx_data->trans_log;
-  DBUG_PRINT("enter", ("transaction: %s  end_ev: 0x%lx",
-                       all ? "all" : "stmt", (long) end_ev));
   DBUG_PRINT("info", ("thd->options={ %s%s}",
                       FLAGSTR(thd->options, OPTION_NOT_AUTOCOMMIT),
                       FLAGSTR(thd->options, OPTION_BEGIN)));
 
+  if (thd->binlog_flush_pending_rows_event(TRUE))
+    DBUG_RETURN(1);
+
   /*
-    NULL denotes ROLLBACK with nothing to replicate: i.e., rollback of
-    only transactional tables.  If the transaction contain changes to
-    any non-transactiona tables, we need write the transaction and log
-    a ROLLBACK last.
-  */
-  if (end_ev != NULL)
-  {
-    if (thd->binlog_flush_pending_rows_event(TRUE))
-      DBUG_RETURN(1);
-    /*
-      Doing a commit or a rollback including non-transactional tables,
-      i.e., ending a transaction where we might write the transaction
-      cache to the binary log.
-
-      We can always end the statement when ending a transaction since
-      transactions are not allowed inside stored functions.  If they
-      were, we would have to ensure that we're not ending a statement
-      inside a stored function.
-     */
-    error= mysql_bin_log.write(thd, &trx_data->trans_log, end_ev,
-                               trx_data->has_incident());
-    trx_data->reset();
+    Doing a commit or a rollback including non-transactional tables,
+    i.e., ending a transaction where we might write the transaction
+    cache to the binary log.
+
+    We can always end the statement when ending a transaction since
+    transactions are not allowed inside stored functions.  If they
+    were, we would have to ensure that we're not ending a statement
+    inside a stored function.
+   */
+  int error= mysql_bin_log.write_transaction_to_binlog(thd, trx_data,
+                                                       end_ev, all);
 
-    statistic_increment(binlog_cache_use, &LOCK_status);
-    if (trans_log->disk_writes != 0)
-    {
-      statistic_increment(binlog_cache_disk_use, &LOCK_status);
-      trans_log->disk_writes= 0;
-    }
-  }
-  else
+  trx_data->reset();
+
+  statistic_increment(binlog_cache_use, &LOCK_status);
+  if (trans_log->disk_writes != 0)
   {
-    /*
-      If rolling back an entire transaction or a single statement not
-      inside a transaction, we reset the transaction cache.
+    statistic_increment(binlog_cache_disk_use, &LOCK_status);
+    trans_log->disk_writes= 0;
+  }
 
-      If rolling back a statement in a transaction, we truncate the
-      transaction cache to remove the statement.
-     */
-    thd->binlog_remove_pending_rows_event(TRUE);
-    if (all || !(thd->options & (OPTION_BEGIN | OPTION_NOT_AUTOCOMMIT)))
-    {
-      if (trx_data->has_incident())
-        error= mysql_bin_log.write_incident(thd, TRUE);
-      trx_data->reset();
-    }
-    else                                        // ...statement
-      trx_data->truncate(trx_data->before_stmt_pos);
+  DBUG_ASSERT(thd->binlog_get_pending_rows_event() == NULL);
+  DBUG_RETURN(error);
+}
+
+/*
+  Discard a transaction, ie. ROLLBACK with only transactional table updates.
+
+  SYNOPSIS
+    binlog_truncate_trx_cache()
+
+    thd      The thread whose transaction should be ended
+    trx_data Pointer to the transaction data to use
+    all      True if the entire transaction should be ended, false if
+             only the statement transaction should be ended.
+
+  DESCRIPTION
+
+    Rollback (and end) a transaction that only modifies transactional
+    tables. The transaction can be either a real transaction (if 'all' is
+    true) or a statement transaction (if 'all' is false).
+
+    The transaction cache will be truncated to either just before the last
+    opened statement transaction (if 'all' is false), or reset completely (if
+    'all' is true).
+ */
+static int
+binlog_truncate_trx_cache(THD *thd, binlog_trx_data *trx_data, bool all)
+{
+  DBUG_ENTER("binlog_truncate_trx_cache");
+  int error= 0;
+  DBUG_PRINT("enter", ("transaction: %s", all ? "all" : "stmt"));
+  DBUG_PRINT("info", ("thd->options={ %s%s}",
+                      FLAGSTR(thd->options, OPTION_NOT_AUTOCOMMIT),
+                      FLAGSTR(thd->options, OPTION_BEGIN)));
+
+  /*
+    ROLLBACK with nothing to replicate: i.e., rollback of only transactional
+    tables.
+  */
+
+  /*
+    If rolling back an entire transaction or a single statement not
+    inside a transaction, we reset the transaction cache.
+
+    If rolling back a statement in a transaction, we truncate the
+    transaction cache to remove the statement.
+   */
+  thd->binlog_remove_pending_rows_event(TRUE);
+  if (all || !(thd->options & (OPTION_BEGIN | OPTION_NOT_AUTOCOMMIT)))
+  {
+    if (trx_data->has_incident())
+      error= mysql_bin_log.write_incident(thd);
+    trx_data->reset();
   }
+  else                                        // ...statement
+    trx_data->truncate(trx_data->before_stmt_pos);
 
   DBUG_ASSERT(thd->binlog_get_pending_rows_event() == NULL);
   DBUG_RETURN(error);
@@ -1533,7 +1558,7 @@ static int binlog_prepare(handlerton *hton, THD *thd, bool all)
     do nothing.
     just pretend we can do 2pc, so that MySQL won't
     switch to 1pc.
-    real work will be done in MYSQL_BIN_LOG::log_xid()
+    real work will be done in MYSQL_BIN_LOG::log_and_order()
   */
   return 0;
 }
@@ -1584,8 +1609,8 @@ static int binlog_commit(handlerton *hton, THD *thd, bool all)
       (trans_has_no_stmt_committed(thd, all) &&
        !stmt_has_updated_trans_table(thd) && stmt_has_updated_non_trans_table(thd)))
   {
-    Query_log_event qev(thd, STRING_WITH_LEN("COMMIT"), TRUE, TRUE, 0);
-    error= binlog_end_trans(thd, trx_data, &qev, all);
+    Query_log_event end_ev(thd, STRING_WITH_LEN("COMMIT"), TRUE, TRUE, 0);
+    error= binlog_flush_trx_cache(thd, trx_data, &end_ev, all);
   }
 
   trx_data->at_least_one_stmt_committed = my_b_tell(&trx_data->trans_log) > 0;
@@ -1649,7 +1674,7 @@ static int binlog_rollback(handlerton *hton, THD *thd, bool all)
         (thd->options & OPTION_KEEP_LOG)) &&
         mysql_bin_log.check_write_error(thd))
       trx_data->set_incident();
-    error= binlog_end_trans(thd, trx_data, 0, all);
+    error= binlog_truncate_trx_cache(thd, trx_data, all);
   }
   else
   {
@@ -1668,8 +1693,8 @@ static int binlog_rollback(handlerton *hton, THD *thd, bool all)
          stmt_has_updated_non_trans_table(thd) &&
          thd->current_stmt_binlog_row_based))
     {
-      Query_log_event qev(thd, STRING_WITH_LEN("ROLLBACK"), TRUE, TRUE, 0);
-      error= binlog_end_trans(thd, trx_data, &qev, all);
+      Query_log_event end_ev(thd, STRING_WITH_LEN("ROLLBACK"), TRUE, TRUE, 0);
+      error= binlog_flush_trx_cache(thd, trx_data, &end_ev, all);
     }
     /*
       Otherwise, we simply truncate the cache as there is no change on
@@ -1677,7 +1702,7 @@ static int binlog_rollback(handlerton *hton, THD *thd, bool all)
     */
     else if (ending_trans(thd, all) ||
              (!(thd->options & OPTION_KEEP_LOG) && !stmt_has_updated_non_trans_table(thd)))
-      error= binlog_end_trans(thd, trx_data, 0, all);
+      error= binlog_truncate_trx_cache(thd, trx_data, all);
   }
   if (!all)
     trx_data->before_stmt_pos = MY_OFF_T_UNDEF; // part of the stmt rollback
@@ -1773,7 +1798,7 @@ static int binlog_savepoint_set(handlerton *hton, THD *thd, void *sv)
       log_query.append(thd->lex->ident.str, thd->lex->ident.length) ||
       log_query.append("`"))
     DBUG_RETURN(1);
-  int errcode= query_error_code(thd, thd->killed == THD::NOT_KILLED);
+  int errcode= query_error_code(thd, thd->killed == NOT_KILLED);
   Query_log_event qinfo(thd, log_query.ptr(), log_query.length(),
                         TRUE, TRUE, errcode);
   DBUG_RETURN(mysql_bin_log.write(&qinfo));
@@ -1797,7 +1822,7 @@ static int binlog_savepoint_rollback(handlerton *hton, THD *thd, void *sv)
         log_query.append(thd->lex->ident.str, thd->lex->ident.length) ||
         log_query.append("`"))
       DBUG_RETURN(1);
-    int errcode= query_error_code(thd, thd->killed == THD::NOT_KILLED);
+    int errcode= query_error_code(thd, thd->killed == NOT_KILLED);
     Query_log_event qinfo(thd, log_query.ptr(), log_query.length(),
                           TRUE, TRUE, errcode);
     DBUG_RETURN(mysql_bin_log.write(&qinfo));
@@ -2319,7 +2344,6 @@ err:
 
     thd               THD of the query
     current_time      current timestamp
-    query_start_arg   command start timestamp
     user_host         the pointer to the string with user@host info
     user_host_len     length of the user_host string. this is computed once
                       and passed to all general log event handlers
@@ -2341,7 +2365,7 @@ err:
 */
 
 bool MYSQL_QUERY_LOG::write(THD *thd, time_t current_time,
-                            time_t query_start_arg, const char *user_host,
+                            const char *user_host,
                             uint user_host_len, ulonglong query_utime,
                             ulonglong lock_utime, bool is_command,
                             const char *sql_text, uint sql_text_len)
@@ -2518,7 +2542,11 @@ const char *MYSQL_LOG::generate_name(const char *log_name,
 MYSQL_BIN_LOG::MYSQL_BIN_LOG()
   :bytes_written(0), prepared_xids(0), file_id(1), open_count(1),
    need_start_event(TRUE),
+   group_commit_queue(0), group_commit_queue_busy(FALSE),
+   num_commits(0), num_group_commits(0),
    is_relay_log(0),
+   checksum_alg_reset(BINLOG_CHECKSUM_ALG_UNDEF),
+   relay_log_checksum_alg(BINLOG_CHECKSUM_ALG_UNDEF),
    description_event_for_exec(0), description_event_for_queue(0)
 {
   /*
@@ -2574,6 +2602,7 @@ void MYSQL_BIN_LOG::init_pthread_objects()
   (void) my_pthread_mutex_init(&LOCK_index, MY_MUTEX_INIT_SLOW, "LOCK_index",
                                MYF_NO_DEADLOCK_DETECTION);
   (void) pthread_cond_init(&update_cond, 0);
+  (void) pthread_cond_init(&COND_queue_busy, 0);
 }
 
 
@@ -2753,10 +2782,23 @@ bool MYSQL_BIN_LOG::open(const char *log_name,
         as we won't be able to reset it later
       */
       if (io_cache_type == WRITE_CACHE)
-        s.flags|= LOG_EVENT_BINLOG_IN_USE_F;
+        s.flags |= LOG_EVENT_BINLOG_IN_USE_F;
+      s.checksum_alg= is_relay_log ?
+        /* relay-log */
+        /* inherit master's A descriptor if one has been received */
+        (relay_log_checksum_alg= 
+         (relay_log_checksum_alg != BINLOG_CHECKSUM_ALG_UNDEF) ?
+         relay_log_checksum_alg :
+         /* otherwise use slave's local preference of RL events verification */
+         (opt_slave_sql_verify_checksum == 0) ?
+         (uint8) BINLOG_CHECKSUM_ALG_OFF : (uint8) binlog_checksum_options):
+        /* binlog */
+        (uint8) binlog_checksum_options;
+      DBUG_ASSERT(s.checksum_alg != BINLOG_CHECKSUM_ALG_UNDEF);
       if (!s.is_valid())
         goto err;
       s.dont_set_created= null_created_arg;
+      s.pre_55_writing_direct();
       if (s.write(&log_file))
         goto err;
       bytes_written+= s.data_written;
@@ -2788,6 +2830,7 @@ bool MYSQL_BIN_LOG::open(const char *log_name,
       /* Don't set log_pos in event header */
       description_event_for_queue->set_artificial_event();
 
+      description_event_for_queue->pre_55_writing_direct();
       if (description_event_for_queue->write(&log_file))
         goto err;
       bytes_written+= description_event_for_queue->data_written;
@@ -2795,6 +2838,11 @@ bool MYSQL_BIN_LOG::open(const char *log_name,
     if (flush_io_cache(&log_file) ||
         my_sync(log_file.file, MYF(MY_WME)))
       goto err;
+    pthread_mutex_lock(&LOCK_commit_ordered);
+    strmake(last_commit_pos_file, log_file_name,
+            sizeof(last_commit_pos_file)-1);
+    last_commit_pos_offset= my_b_tell(&log_file);
+    pthread_mutex_unlock(&LOCK_commit_ordered);
 
     if (write_file_name_to_index_file)
     {
@@ -3922,8 +3970,16 @@ int MYSQL_BIN_LOG::new_file_impl(bool need_lock)
         We log the whole file name for log file as the user may decide
         to change base names at some point.
       */
-      Rotate_log_event r(new_name+dirname_length(new_name),
-                         0, LOG_EVENT_OFFSET, is_relay_log ? Rotate_log_event::RELAY_LOG : 0);
+      Rotate_log_event r(new_name+dirname_length(new_name), 0, LOG_EVENT_OFFSET,
+                         is_relay_log ? Rotate_log_event::RELAY_LOG : 0);
+      /* 
+         The current relay-log's closing Rotate event must have checksum
+         value computed with an algorithm of the last relay-logged FD event.
+      */
+      if (is_relay_log)
+        r.checksum_alg= relay_log_checksum_alg;
+      r.pre_55_writing_direct();
+      DBUG_ASSERT(!is_relay_log || relay_log_checksum_alg != BINLOG_CHECKSUM_ALG_UNDEF);
       if(DBUG_EVALUATE_IF("fault_injection_new_file_rotate_event", (error=close_on_error=TRUE), FALSE) ||
          (error= r.write(&log_file)))
       {
@@ -3944,7 +4000,12 @@ int MYSQL_BIN_LOG::new_file_impl(bool need_lock)
   old_name=name;
   name=0;				// Don't free name
   close(LOG_CLOSE_TO_BE_OPENED | LOG_CLOSE_INDEX);
-
+  if (log_type == LOG_BIN && checksum_alg_reset != BINLOG_CHECKSUM_ALG_UNDEF)
+  {
+    DBUG_ASSERT(!is_relay_log);
+    DBUG_ASSERT(binlog_checksum_options != checksum_alg_reset);
+    binlog_checksum_options= checksum_alg_reset;
+  }
   /*
      Note that at this point, log_state != LOG_CLOSED (important for is_open()).
   */
@@ -4023,6 +4084,7 @@ bool MYSQL_BIN_LOG::append(Log_event* ev)
     Log_event::write() is smart enough to use my_b_write() or
     my_b_append() depending on the kind of cache we have.
   */
+  ev->pre_55_writing_direct();
   if (ev->write(&log_file))
   {
     error=1;
@@ -4078,6 +4140,10 @@ bool MYSQL_BIN_LOG::flush_and_sync()
   {
     sync_binlog_counter= 0;
     err=my_sync(fd, MYF(MY_WME));
+#ifndef DBUG_OFF
+    if (opt_binlog_dbug_fsync_sleep > 0)
+      my_sleep(opt_binlog_dbug_fsync_sleep);
+#endif
   }
   return err;
 }
@@ -4269,12 +4335,33 @@ void THD::binlog_set_stmt_begin() {
   trx_data->before_stmt_pos= pos;
 }
 
+static int
+binlog_start_consistent_snapshot(handlerton *hton, THD *thd)
+{
+  int err= 0;
+  binlog_trx_data *trx_data;
+  DBUG_ENTER("binlog_start_consistent_snapshot");
+
+  thd->binlog_setup_trx_data();
+  trx_data= (binlog_trx_data*) thd_get_ha_data(thd, binlog_hton);
+
+  /* Server layer calls us with LOCK_commit_ordered locked, so this is safe. */
+  strmake(trx_data->last_commit_pos_file, mysql_bin_log.last_commit_pos_file,
+          sizeof(trx_data->last_commit_pos_file)-1);
+  trx_data->last_commit_pos_offset= mysql_bin_log.last_commit_pos_offset;
+
+  trans_register_ha(thd, TRUE, hton);
+
+  DBUG_RETURN(err);
+}
 
 /*
-  Write a table map to the binary log.
+  Write a table map to the binary log. If with_annotate != NULL and
+  *with_annotate = TRUE write also Annotate_rows before the table map.
  */
 
-int THD::binlog_write_table_map(TABLE *table, bool is_trans)
+int THD::binlog_write_table_map(TABLE *table, bool is_trans,
+                                my_bool *with_annotate)
 {
   int error;
   DBUG_ENTER("THD::binlog_write_table_map");
@@ -4292,7 +4379,7 @@ int THD::binlog_write_table_map(TABLE *table, bool is_trans)
   if (is_trans && binlog_table_maps == 0)
     binlog_start_trans_and_stmt();
 
-  if ((error= mysql_bin_log.write(&the_event)))
+  if ((error= mysql_bin_log.write(&the_event, with_annotate)))
     DBUG_RETURN(error);
 
   binlog_table_maps++;
@@ -4376,44 +4463,49 @@ MYSQL_BIN_LOG::flush_and_set_pending_rows_event(THD *thd,
 
   if (Rows_log_event* pending= trx_data->pending())
   {
-    IO_CACHE *file= &log_file;
-
     /*
       Decide if we should write to the log file directly or to the
       transaction log.
     */
     if (pending->get_cache_stmt() || my_b_tell(&trx_data->trans_log))
-      file= &trx_data->trans_log;
-
-    /*
-      If we are not writing to the log file directly, we could avoid
-      locking the log.
-    */
-    pthread_mutex_lock(&LOCK_log);
-
-    /*
-      Write pending event to log file or transaction cache
-    */
-    if (pending->write(file))
     {
-      pthread_mutex_unlock(&LOCK_log);
-      set_write_error(thd);
-      DBUG_RETURN(1);
+      /* Write to transaction log/cache. */
+      if (pending->write(&trx_data->trans_log))
+      {
+        set_write_error(thd);
+        DBUG_RETURN(1);
+      }
     }
-
-    delete pending;
-
-    if (file == &log_file)
+    else
     {
+      /* Write directly to log file. */
+      pthread_mutex_lock(&LOCK_log);
+      pending->pre_55_writing_direct();
+      if (pending->write(&log_file))
+      {
+        pthread_mutex_unlock(&LOCK_log);
+        set_write_error(thd);
+        DBUG_RETURN(1);
+      }
+
       error= flush_and_sync();
       if (!error)
       {
         signal_update();
         error= rotate_and_purge(RP_LOCK_LOG_IS_ALREADY_LOCKED);
       }
+
+      /*
+        Take mutex to protect against a reader seeing partial writes of 64-bit
+        offset on 32-bit CPUs.
+      */
+      pthread_mutex_lock(&LOCK_commit_ordered);
+      last_commit_pos_offset= my_b_tell(&log_file);
+      pthread_mutex_unlock(&LOCK_commit_ordered);
+      pthread_mutex_unlock(&LOCK_log);
     }
 
-    pthread_mutex_unlock(&LOCK_log);
+    delete pending;
   }
 
   thd->binlog_set_pending_rows_event(event);
@@ -4422,13 +4514,16 @@ MYSQL_BIN_LOG::flush_and_set_pending_rows_event(THD *thd,
 }
 
 /**
-  Write an event to the binary log.
+  Write an event to the binary log. If with_annotate != NULL and
+  *with_annotate = TRUE write also Annotate_rows before the event
+  (this should happen only if the event is a Table_map).
 */
 
-bool MYSQL_BIN_LOG::write(Log_event *event_info)
+bool MYSQL_BIN_LOG::write(Log_event *event_info, my_bool *with_annotate)
 {
   THD *thd= event_info->thd;
   bool error= 1;
+  uint16 cache_type;
   DBUG_ENTER("MYSQL_BIN_LOG::write(Log_event *)");
 
   if (thd->binlog_evt_union.do_union)
@@ -4443,11 +4538,6 @@ bool MYSQL_BIN_LOG::write(Log_event *event_info)
   }
 
   /*
-    Flush the pending rows event to the transaction cache or to the
-    log file.  Since this function potentially aquire the LOCK_log
-    mutex, we do this before aquiring the LOCK_log mutex in this
-    function.
-
     We only end the statement if we are in a top-level statement.  If
     we are inside a stored function, we do not end the statement since
     this will close all tables on the slave.
@@ -4457,8 +4547,6 @@ bool MYSQL_BIN_LOG::write(Log_event *event_info)
   if (thd->binlog_flush_pending_rows_event(end_stmt))
     DBUG_RETURN(error);
 
-  pthread_mutex_lock(&LOCK_log);
-
   /*
      In most cases this is only called if 'is_open()' is true; in fact this is
      mostly called if is_open() *was* true a few instructions before, but it
@@ -4480,7 +4568,6 @@ bool MYSQL_BIN_LOG::write(Log_event *event_info)
          thd->lex->sql_command != SQLCOM_SAVEPOINT &&
          !binlog_filter->db_ok(local_db)))
     {
-      VOID(pthread_mutex_unlock(&LOCK_log));
       DBUG_RETURN(0);
     }
 #endif /* HAVE_REPLICATION */
@@ -4524,15 +4611,26 @@ bool MYSQL_BIN_LOG::write(Log_event *event_info)
         thd->binlog_start_trans_and_stmt();
         file= trans_log;
       }
-      /*
-        TODO as Mats suggested, for all the cases above where we write to
-        trans_log, it sounds unnecessary to lock LOCK_log. We should rather
-        test first if we want to write to trans_log, and if not, lock
-        LOCK_log.
-      */
     }
 #endif /* USING_TRANSACTIONS */
     DBUG_PRINT("info",("event type: %d",event_info->get_type_code()));
+    if (file == &log_file)
+    {
+      pthread_mutex_lock(&LOCK_log);
+      /*
+        We did not want to take LOCK_log unless really necessary.
+        However, now that we hold LOCK_log, we must check is_open() again, lest
+        the log was closed just before.
+      */
+      if (unlikely(!is_open()))
+      {
+        pthread_mutex_unlock(&LOCK_log);
+        DBUG_RETURN(error);
+      }
+      event_info->pre_55_writing_direct();
+    }
+
+    cache_type= event_info->cache_type;
 
     /*
       No check for auto events flag here - this write method should
@@ -4544,6 +4642,16 @@ bool MYSQL_BIN_LOG::write(Log_event *event_info)
       of the SQL command
     */
 
+    if (with_annotate && *with_annotate)
+    {
+      DBUG_ASSERT(event_info->get_type_code() == TABLE_MAP_EVENT);
+      Annotate_rows_log_event anno(thd, cache_type);
+      /* Annotate event should be written not more than once */
+      *with_annotate= 0;
+      if (anno.write(file))
+        goto err;
+    }
+
     /*
       If row-based binlogging, Insert_id, Rand and other kind of "setting
       context" events are not needed.
@@ -4551,12 +4659,14 @@ bool MYSQL_BIN_LOG::write(Log_event *event_info)
     {
       if (!thd->current_stmt_binlog_row_based)
       {
+
         if (thd->stmt_depends_on_first_successful_insert_id_in_prev_stmt)
         {
           Intvar_log_event e(thd,(uchar) LAST_INSERT_ID_EVENT,
-                             thd->first_successful_insert_id_in_prev_stmt_for_binlog);
+                             thd->first_successful_insert_id_in_prev_stmt_for_binlog,
+                             cache_type);
           if (e.write(file))
-            goto err;
+            goto err_unlock;
         }
         if (thd->auto_inc_intervals_in_cur_stmt_for_binlog.nb_elements() > 0)
         {
@@ -4565,15 +4675,16 @@ bool MYSQL_BIN_LOG::write(Log_event *event_info)
                              nb_elements()));
           Intvar_log_event e(thd, (uchar) INSERT_ID_EVENT,
                              thd->auto_inc_intervals_in_cur_stmt_for_binlog.
-                             minimum());
+                             minimum(), cache_type);
           if (e.write(file))
-            goto err;
+            goto err_unlock;
         }
         if (thd->rand_used)
         {
-          Rand_log_event e(thd,thd->rand_saved_seed1,thd->rand_saved_seed2);
+          Rand_log_event e(thd,thd->rand_saved_seed1,thd->rand_saved_seed2,
+                           cache_type);
           if (e.write(file))
-            goto err;
+            goto err_unlock;
         }
         if (thd->user_var_events.elements)
         {
@@ -4586,9 +4697,10 @@ bool MYSQL_BIN_LOG::write(Log_event *event_info)
                                  user_var_event->value,
                                  user_var_event->length,
                                  user_var_event->type,
-                                 user_var_event->charset_number);
+                                 user_var_event->charset_number,
+                                 cache_type);
             if (e.write(file))
-              goto err;
+              goto err_unlock;
           }
         }
       }
@@ -4597,7 +4709,7 @@ bool MYSQL_BIN_LOG::write(Log_event *event_info)
     /* Write the SQL command */
     if (event_info->write(file) || 
         DBUG_EVALUATE_IF("injecting_fault_writing", 1, 0))
-      goto err;
+      goto err_unlock;
 
     if (file == &log_file) // we are writing to the real log (disk)
     {
@@ -4605,20 +4717,33 @@ bool MYSQL_BIN_LOG::write(Log_event *event_info)
       status_var_add(thd->status_var.binlog_bytes_written, data_written);
 
       if (flush_and_sync())
-	goto err;
+	goto err_unlock;
       signal_update();
       if ((error= rotate_and_purge(RP_LOCK_LOG_IS_ALREADY_LOCKED)))
-        goto err;
+        goto err_unlock;
       
     }
     error=0;
 
+err_unlock:
+    if (file == &log_file)
+    {
+      my_off_t offset= my_b_tell(&log_file);
+      /*
+        Take mutex to protect against a reader seeing partial writes of 64-bit
+        offset on 32-bit CPUs.
+      */
+      pthread_mutex_lock(&LOCK_commit_ordered);
+      last_commit_pos_offset= offset;
+      pthread_mutex_unlock(&LOCK_commit_ordered);
+      pthread_mutex_unlock(&LOCK_log);
+    }
+
 err:
     if (error)
       set_write_error(thd);
   }
 
-  pthread_mutex_unlock(&LOCK_log);
   DBUG_RETURN(error);
 }
 
@@ -4723,12 +4848,14 @@ int MYSQL_BIN_LOG::rotate_and_purge(uint flags)
          We give it a shot and try to write an incident event anyway
          to the current log. 
       */
-      if (!write_incident(current_thd, FALSE))
+      if (!write_incident_already_locked(current_thd))
         flush_and_sync();
 
 #ifdef HAVE_REPLICATION
     check_purge= true;
 #endif
+    if (flags & RP_BINLOG_CHECKSUM_ALG_CHANGE)
+      checksum_alg_reset= BINLOG_CHECKSUM_ALG_UNDEF; // done
   }
   if (!(flags & RP_LOCK_LOG_IS_ALREADY_LOCKED))
     pthread_mutex_unlock(&LOCK_log);
@@ -4757,6 +4884,33 @@ uint MYSQL_BIN_LOG::next_file_id()
 }
 
 
+/**
+  Calculate checksum of possibly a part of an event containing at least
+  the whole common header.
+
+  @param    buf       the pointer to trans cache's buffer
+  @param    off       the offset of the beginning of the event in the buffer
+  @param    event_len no-checksum length of the event
+  @param    length    the current size of the buffer
+
+  @param    crc       [in-out] the checksum
+
+  Event size in incremented by @c BINLOG_CHECKSUM_LEN.
+
+  @return 0 or number of unprocessed yet bytes of the event excluding 
+            the checksum part.
+*/
+  static ulong fix_log_event_crc(uchar *buf, uint off, uint event_len,
+                                 uint length, ha_checksum *crc)
+{
+  ulong ret;
+  uchar *event_begin= buf + off;
+
+  ret= length >= off + event_len ? 0 : off + event_len - length;
+  *crc= my_checksum(*crc, event_begin, event_len - ret); 
+  return ret;
+}
+
 /*
   Write the contents of a cache to the binary log.
 
@@ -4764,24 +4918,33 @@ uint MYSQL_BIN_LOG::next_file_id()
     write_cache()
     thd      Current_thread
     cache    Cache to write to the binary log
-    lock_log True if the LOCK_log mutex should be aquired, false otherwise
-    sync_log True if the log should be flushed and sync:ed
 
   DESCRIPTION
     Write the contents of the cache to the binary log. The cache will
     be reset as a READ_CACHE to be able to read the contents from it.
- */
 
-int MYSQL_BIN_LOG::write_cache(THD *thd, IO_CACHE *cache, bool lock_log,
-                               bool sync_log)
-{
-  Mutex_sentry sentry(lock_log ? &LOCK_log : NULL);
+    Reading from the trans cache with possible (per @c binlog_checksum_options) 
+    adding checksum value  and then fixing the length and the end_log_pos of 
+    events prior to fill in the binlog cache.
+*/
 
+int MYSQL_BIN_LOG::write_cache(THD *thd, IO_CACHE *cache)
+{
+  safe_mutex_assert_owner(&LOCK_log);
   if (reinit_io_cache(cache, READ_CACHE, 0, 0, 0))
     return ER_ERROR_ON_WRITE;
   uint length= my_b_bytes_in_cache(cache), group, carry, hdr_offs;
+  ulong remains= 0; // part of unprocessed yet netto length of the event
   long val;
+  ulong end_log_pos_inc= 0; // each event processed adds BINLOG_CHECKSUM_LEN 2 t
   uchar header[LOG_EVENT_HEADER_LEN];
+  ha_checksum crc= 0, crc_0= 0; // assignments to keep compiler happy
+  my_bool do_checksum= (binlog_checksum_options != BINLOG_CHECKSUM_ALG_OFF);
+  uchar buf[BINLOG_CHECKSUM_LEN];
+
+  // while there is just one alg the following must hold:
+  DBUG_ASSERT(!do_checksum ||
+              binlog_checksum_options == BINLOG_CHECKSUM_ALG_CRC32);
 
   /*
     The events in the buffer have incorrect end_log_pos data
@@ -4799,6 +4962,8 @@ int MYSQL_BIN_LOG::write_cache(THD *thd, IO_CACHE *cache, bool lock_log,
 
   group= (uint)my_b_tell(&log_file);
   hdr_offs= carry= 0;
+  if (do_checksum)
+    crc= crc_0= my_checksum(0L, NULL, 0);
 
   do
   {
@@ -4812,12 +4977,21 @@ int MYSQL_BIN_LOG::write_cache(THD *thd, IO_CACHE *cache, bool lock_log,
       DBUG_ASSERT(carry < LOG_EVENT_HEADER_LEN);
 
       /* assemble both halves */
-      memcpy(&header[carry], (char *)cache->read_pos, LOG_EVENT_HEADER_LEN - carry);
+      memcpy(&header[carry], (char *)cache->read_pos,
+             LOG_EVENT_HEADER_LEN - carry);
 
       /* fix end_log_pos */
-      val= uint4korr(&header[LOG_POS_OFFSET]) + group;
+      val= uint4korr(&header[LOG_POS_OFFSET]) + group +
+        (end_log_pos_inc+= (do_checksum ? BINLOG_CHECKSUM_LEN : 0));
       int4store(&header[LOG_POS_OFFSET], val);
 
+      if (do_checksum)
+      {
+        ulong len= uint4korr(&header[EVENT_LEN_OFFSET]);
+        /* fix len */
+        int4store(&header[EVENT_LEN_OFFSET], len + BINLOG_CHECKSUM_LEN);
+      }
+
       /* write the first half of the split header */
       if (my_b_write(&log_file, header, carry))
         return ER_ERROR_ON_WRITE;
@@ -4827,11 +5001,20 @@ int MYSQL_BIN_LOG::write_cache(THD *thd, IO_CACHE *cache, bool lock_log,
         copy fixed second half of header to cache so the correct
         version will be written later.
       */
-      memcpy((char *)cache->read_pos, &header[carry], LOG_EVENT_HEADER_LEN - carry);
+      memcpy((char *)cache->read_pos, &header[carry],
+             LOG_EVENT_HEADER_LEN - carry);
 
       /* next event header at ... */
-      hdr_offs = uint4korr(&header[EVENT_LEN_OFFSET]) - carry;
+      hdr_offs= uint4korr(&header[EVENT_LEN_OFFSET]) - carry -
+        (do_checksum ? BINLOG_CHECKSUM_LEN : 0);
 
+      if (do_checksum)
+      {
+        DBUG_ASSERT(crc == crc_0 && remains == 0);
+        crc= my_checksum(crc, header, carry);
+        remains= uint4korr(header + EVENT_LEN_OFFSET) - carry -
+          BINLOG_CHECKSUM_LEN;
+      }
       carry= 0;
     }
 
@@ -4846,6 +5029,25 @@ int MYSQL_BIN_LOG::write_cache(THD *thd, IO_CACHE *cache, bool lock_log,
         very next iteration, just "eventually").
       */
 
+      /* crc-calc the whole buffer */
+      if (do_checksum && hdr_offs >= length)
+      {
+
+        DBUG_ASSERT(remains != 0 && crc != crc_0);
+
+        crc= my_checksum(crc, cache->read_pos, length); 
+        remains -= length;
+        if (my_b_write(&log_file, cache->read_pos, length))
+          return ER_ERROR_ON_WRITE;
+        if (remains == 0)
+        {
+          int4store(buf, crc);
+          if (my_b_write(&log_file, buf, BINLOG_CHECKSUM_LEN))
+            return ER_ERROR_ON_WRITE;
+          crc= crc_0;
+        }
+      }
+
       while (hdr_offs < length)
       {
         /*
@@ -4853,6 +5055,26 @@ int MYSQL_BIN_LOG::write_cache(THD *thd, IO_CACHE *cache, bool lock_log,
           we get the rest.
         */
 
+        if (do_checksum)
+        {
+          if (remains != 0)
+          {
+            /*
+              finish off with remains of the last event that crawls
+              from previous into the current buffer
+            */
+            DBUG_ASSERT(crc != crc_0);
+            crc= my_checksum(crc, cache->read_pos, hdr_offs);
+            int4store(buf, crc);
+            remains -= hdr_offs;
+            DBUG_ASSERT(remains == 0);
+            if (my_b_write(&log_file, cache->read_pos, hdr_offs) ||
+                my_b_write(&log_file, buf, BINLOG_CHECKSUM_LEN))
+              return ER_ERROR_ON_WRITE;
+            crc= crc_0;
+          }
+        }
+
         if (hdr_offs + LOG_EVENT_HEADER_LEN > length)
         {
           carry= length - hdr_offs;
@@ -4862,17 +5084,38 @@ int MYSQL_BIN_LOG::write_cache(THD *thd, IO_CACHE *cache, bool lock_log,
         else
         {
           /* we've got a full event-header, and it came in one piece */
-
-          uchar *log_pos= (uchar *)cache->read_pos + hdr_offs + LOG_POS_OFFSET;
+          uchar *ev= (uchar *)cache->read_pos + hdr_offs;
+          uint event_len= uint4korr(ev + EVENT_LEN_OFFSET); // netto len
+          uchar *log_pos= ev + LOG_POS_OFFSET;
 
           /* fix end_log_pos */
-          val= uint4korr(log_pos) + group;
+          val= uint4korr(log_pos) + group +
+            (end_log_pos_inc += (do_checksum ? BINLOG_CHECKSUM_LEN : 0));
           int4store(log_pos, val);
 
+	  /* fix CRC */
+	  if (do_checksum)
+          {
+            /* fix length */
+            int4store(ev + EVENT_LEN_OFFSET, event_len + BINLOG_CHECKSUM_LEN);
+            remains= fix_log_event_crc(cache->read_pos, hdr_offs, event_len,
+                                       length, &crc);
+            if (my_b_write(&log_file, ev, 
+                           remains == 0 ? event_len : length - hdr_offs))
+              return ER_ERROR_ON_WRITE;
+            if (remains == 0)
+            {
+              int4store(buf, crc);
+              if (my_b_write(&log_file, buf, BINLOG_CHECKSUM_LEN))
+                return ER_ERROR_ON_WRITE;
+              crc= crc_0; // crc is complete
+            }
+          }
+
           /* next event header at ... */
-          log_pos= (uchar *)cache->read_pos + hdr_offs + EVENT_LEN_OFFSET;
-          hdr_offs += uint4korr(log_pos);
+          hdr_offs += event_len; // incr by the netto len
 
+          DBUG_ASSERT(!do_checksum || remains == 0 || hdr_offs >= length);
         }
       }
 
@@ -4888,17 +5131,19 @@ int MYSQL_BIN_LOG::write_cache(THD *thd, IO_CACHE *cache, bool lock_log,
     }
 
     /* Write data to the binary log file */
-    if (my_b_write(&log_file, cache->read_pos, length))
-      return ER_ERROR_ON_WRITE;
+    DBUG_EXECUTE_IF("fail_binlog_write_1",
+                    errno= 28; return ER_ERROR_ON_WRITE;);
+    if (!do_checksum)
+      if (my_b_write(&log_file, cache->read_pos, length))
+        return ER_ERROR_ON_WRITE;
     status_var_add(thd->status_var.binlog_bytes_written, length);
 
     cache->read_pos=cache->read_end;		// Mark buffer used up
   } while ((length= my_b_fill(cache)));
 
   DBUG_ASSERT(carry == 0);
-
-  if (sync_log)
-    flush_and_sync();
+  DBUG_ASSERT(!do_checksum || remains == 0);
+  DBUG_ASSERT(!do_checksum || crc == crc_0);
 
   return 0;                                     // All OK
 }
@@ -4910,7 +5155,7 @@ int query_error_code(THD *thd, bool not_killed)
 {
   int error;
   
-  if (not_killed || (thd->killed == THD::KILL_BAD_DATA))
+  if (not_killed || (killed_mask_hard(thd->killed) == KILL_BAD_DATA))
   {
     error= thd->is_error() ? thd->main_da.sql_errno() : 0;
 
@@ -4920,7 +5165,7 @@ int query_error_code(THD *thd, bool not_killed)
        caller.
     */
     if (error == ER_SERVER_SHUTDOWN || error == ER_QUERY_INTERRUPTED ||
-        error == ER_NEW_ABORTING_CONNECTION)
+        error == ER_NEW_ABORTING_CONNECTION || error == ER_CONNECTION_KILLED)
       error= 0;
   }
   else
@@ -4933,31 +5178,51 @@ int query_error_code(THD *thd, bool not_killed)
   return error;
 }
 
-bool MYSQL_BIN_LOG::write_incident(THD *thd, bool lock)
+
+bool MYSQL_BIN_LOG::write_incident_already_locked(THD *thd)
 {
   uint error= 0;
-  DBUG_ENTER("MYSQL_BIN_LOG::write_incident");
-
-  if (!is_open())
-    DBUG_RETURN(error);
-
-  LEX_STRING const write_error_msg=
-    { C_STRING_WITH_LEN("error writing to the binary log") };
+  DBUG_ENTER("MYSQL_BIN_LOG::write_incident_already_locked");
   Incident incident= INCIDENT_LOST_EVENTS;
   Incident_log_event ev(thd, incident, write_error_msg);
-  if (lock)
-    pthread_mutex_lock(&LOCK_log);
-  error= ev.write(&log_file);
-  status_var_add(thd->status_var.binlog_bytes_written, ev.data_written);
-  if (lock)
+
+  if (likely(is_open()))
   {
-    if (!error && !(error= flush_and_sync()))
+    ev.pre_55_writing_direct();
+    error= ev.write(&log_file);
+    status_var_add(thd->status_var.binlog_bytes_written, ev.data_written);
+  }
+
+  DBUG_RETURN(error);
+}
+
+
+bool MYSQL_BIN_LOG::write_incident(THD *thd)
+{
+  uint error= 0;
+  my_off_t offset;
+  DBUG_ENTER("MYSQL_BIN_LOG::write_incident");
+
+  pthread_mutex_lock(&LOCK_log);
+  if (likely(is_open()))
+  {
+    if (!(error= write_incident_already_locked(thd)) &&
+        !(error= flush_and_sync()))
     {
       signal_update();
       error= rotate_and_purge(RP_LOCK_LOG_IS_ALREADY_LOCKED);
     }
-    pthread_mutex_unlock(&LOCK_log);
+    offset= my_b_tell(&log_file);
+    /*
+      Take mutex to protect against a reader seeing partial writes of 64-bit
+      offset on 32-bit CPUs.
+    */
+    pthread_mutex_lock(&LOCK_commit_ordered);
+    last_commit_pos_offset= offset;
+    pthread_mutex_unlock(&LOCK_commit_ordered);
   }
+  pthread_mutex_unlock(&LOCK_log);
+
   DBUG_RETURN(error);
 }
 
@@ -4985,111 +5250,373 @@ bool MYSQL_BIN_LOG::write_incident(THD *thd, bool lock)
     'cache' needs to be reinitialized after this functions returns.
 */
 
-bool MYSQL_BIN_LOG::write(THD *thd, IO_CACHE *cache, Log_event *commit_event,
-                          bool incident)
+bool
+MYSQL_BIN_LOG::write_transaction_to_binlog(THD *thd, binlog_trx_data *trx_data,
+                                           Log_event *end_ev, bool all)
 {
-  DBUG_ENTER("MYSQL_BIN_LOG::write(THD *, IO_CACHE *, Log_event *)");
+  group_commit_entry entry;
+  DBUG_ENTER("MYSQL_BIN_LOG::write_transaction_to_binlog");
+
+  entry.thd= thd;
+  entry.trx_data= trx_data;
+  entry.error= 0;
+  entry.all= all;
+
+  /*
+    Log "BEGIN" at the beginning of every transaction.  Here, a transaction is
+    either a BEGIN..COMMIT block or a single statement in autocommit mode.
+
+    Create the necessary events here, where we have the correct THD (and
+    thread context).
+
+    Due to group commit the actual writing to binlog may happen in a different
+    thread.
+  */
+  Query_log_event qinfo(thd, STRING_WITH_LEN("BEGIN"), TRUE, TRUE, 0);
+  entry.begin_event= &qinfo;
+  entry.end_event= end_ev;
+  if (trx_data->has_incident())
+  {
+    Incident_log_event inc_ev(thd, INCIDENT_LOST_EVENTS, write_error_msg);
+    entry.incident_event= &inc_ev;
+    DBUG_RETURN(write_transaction_to_binlog_events(&entry));
+  }
+  else
+  {
+    entry.incident_event= NULL;
+    DBUG_RETURN(write_transaction_to_binlog_events(&entry));
+  }
+}
+
+bool
+MYSQL_BIN_LOG::write_transaction_to_binlog_events(group_commit_entry *entry)
+{
+  /*
+    To facilitate group commit for the binlog, we first queue up ourselves in
+    the group commit queue. Then the first thread to enter the queue waits for
+    the LOCK_log mutex, and commits for everyone in the queue once it gets the
+    lock. Any other threads in the queue just wait for the first one to finish
+    the commit and wake them up.
+  */
+
+  entry->thd->clear_wakeup_ready();
+  pthread_mutex_lock(&LOCK_prepare_ordered);
+  group_commit_entry *orig_queue= group_commit_queue;
+  entry->next= orig_queue;
+  group_commit_queue= entry;
+
+  if (entry->trx_data->using_xa)
+  {
+    DEBUG_SYNC(entry->thd, "commit_before_prepare_ordered");
+    run_prepare_ordered(entry->thd, entry->all);
+    DEBUG_SYNC(entry->thd, "commit_after_prepare_ordered");
+  }
+  pthread_mutex_unlock(&LOCK_prepare_ordered);
+  DEBUG_SYNC(entry->thd, "commit_after_release_LOCK_prepare_ordered");
+
+  /*
+    The first in the queue handle group commit for all; the others just wait
+    to be signalled when group commit is done.
+  */
+  if (orig_queue != NULL)
+    entry->thd->wait_for_wakeup_ready();
+  else
+    trx_group_commit_leader(entry);
+
+  if (!opt_optimize_thread_scheduling)
+  {
+    /* For the leader, trx_group_commit_leader() already took the lock. */
+    if (orig_queue != NULL)
+      pthread_mutex_lock(&LOCK_commit_ordered);
+
+    DEBUG_SYNC(entry->thd, "commit_loop_entry_commit_ordered");
+    ++num_commits;
+    if (entry->trx_data->using_xa && !entry->error)
+      run_commit_ordered(entry->thd, entry->all);
+
+    group_commit_entry *next= entry->next;
+    if (!next)
+    {
+      group_commit_queue_busy= FALSE;
+      pthread_cond_signal(&COND_queue_busy);
+      DEBUG_SYNC(entry->thd, "commit_after_group_run_commit_ordered");
+    }
+    pthread_mutex_unlock(&LOCK_commit_ordered);
+
+    if (next)
+    {
+      next->thd->signal_wakeup_ready();
+    }
+  }
+
+  if (likely(!entry->error))
+    return 0;
+
+  switch (entry->error)
+  {
+  case ER_ERROR_ON_WRITE:
+    my_error(ER_ERROR_ON_WRITE, MYF(ME_NOREFRESH), name, entry->commit_errno);
+    break;
+  case ER_ERROR_ON_READ:
+    my_error(ER_ERROR_ON_READ, MYF(ME_NOREFRESH),
+             entry->trx_data->trans_log.file_name, entry->commit_errno);
+    break;
+  default:
+    /*
+      There are not (and should not be) any errors thrown not covered above.
+      But just in case one is added later without updating the above switch
+      statement, include a catch-all.
+    */
+    my_printf_error(entry->error,
+                    "Error writing transaction to binary log: %d",
+                    MYF(ME_NOREFRESH), entry->error);
+  }
+
+  /*
+    Since we return error, this transaction XID will not be committed, so
+    we need to mark it as not needed for recovery (unlog() is not called
+    for a transaction if log_xid() fails).
+  */
+  if (entry->trx_data->using_xa && entry->trx_data->xa_xid)
+    mark_xid_done();
+
+  return 1;
+}
+
+/*
+  Do binlog group commit as the lead thread.
+
+  This must be called when this thread/transaction is queued at the start of
+  the group_commit_queue. It will wait to obtain the LOCK_log mutex, then group
+  commit all the transactions in the queue (more may have entered while waiting
+  for LOCK_log). After commit is done, all other threads in the queue will be
+  signalled.
+
+ */
+void
+MYSQL_BIN_LOG::trx_group_commit_leader(group_commit_entry *leader)
+{
+  uint xid_count= 0;
+  uint write_count= 0;
+  my_off_t commit_offset;
+  group_commit_entry *current;
+  group_commit_entry *last_in_queue;
+  DBUG_ENTER("MYSQL_BIN_LOG::trx_group_commit_leader");
+  LINT_INIT(commit_offset);
+
+  /*
+    Lock the LOCK_log(), and once we get it, collect any additional writes
+    that queued up while we were waiting.
+  */
   VOID(pthread_mutex_lock(&LOCK_log));
+  DEBUG_SYNC(leader->thd, "commit_after_get_LOCK_log");
 
-  /* NULL would represent nothing to replicate after ROLLBACK */
-  DBUG_ASSERT(commit_event != NULL);
+  pthread_mutex_lock(&LOCK_prepare_ordered);
+  current= group_commit_queue;
+  group_commit_queue= NULL;
+  pthread_mutex_unlock(&LOCK_prepare_ordered);
 
+  /* As the queue is in reverse order of entering, reverse it. */
+  group_commit_entry *queue= NULL;
+  last_in_queue= current;
+  while (current)
+  {
+    group_commit_entry *next= current->next;
+    current->next= queue;
+    queue= current;
+    current= next;
+  }
+  DBUG_ASSERT(leader == queue /* the leader should be first in queue */);
+
+  /* Now we have in queue the list of transactions to be committed in order. */
   DBUG_ASSERT(is_open());
   if (likely(is_open()))                       // Should always be true
   {
     /*
-      We only bother to write to the binary log if there is anything
-      to write.
-     */
-    if (my_b_tell(cache) > 0)
+      Commit every transaction in the queue.
+
+      Note that we are doing this in a different thread than the one running
+      the transaction! So we are limited in the operations we can do. In
+      particular, we cannot call my_error() on behalf of a transaction, as
+      that obtains the THD from thread local storage. Instead, we must set
+      current->error and let the thread do the error reporting itself once
+      we wake it up.
+    */
+    for (current= queue; current != NULL; current= current->next)
     {
-      /*
-        Log "BEGIN" at the beginning of every transaction.  Here, a
-        transaction is either a BEGIN..COMMIT block or a single
-        statement in autocommit mode.
-      */
-      Query_log_event qinfo(thd, STRING_WITH_LEN("BEGIN"), TRUE, TRUE, 0);
+      binlog_trx_data *trx_data= current->trx_data;
+      IO_CACHE *cache= &trx_data->trans_log;
 
       /*
-        Now this Query_log_event has artificial log_pos 0. It must be
-        adjusted to reflect the real position in the log. Not doing it
-        would confuse the slave: it would prevent this one from
-        knowing where he is in the master's binlog, which would result
-        in wrong positions being shown to the user, MASTER_POS_WAIT
-        undue waiting etc.
+        We only bother to write to the binary log if there is anything
+        to write.
       */
-      if (qinfo.write(&log_file))
-        goto err;
-      status_var_add(thd->status_var.binlog_bytes_written, qinfo.data_written);
-
-      DBUG_EXECUTE_IF("crash_before_writing_xid",
-                      {
-                        if ((write_error= write_cache(thd, cache, FALSE,
-                                                      TRUE)))
-                          DBUG_PRINT("info", ("error writing binlog cache: %d",
-                                               write_error));
-                        DBUG_PRINT("info", ("crashing before writing xid"));
-                        DBUG_SUICIDE();
-                      });
-
-      if ((write_error= write_cache(thd, cache, FALSE, FALSE)))
-        goto err;
-
-      if (commit_event)
+      if (my_b_tell(cache) > 0)
       {
-        if (commit_event->write(&log_file))
-          goto err;
-        status_var_add(thd->status_var.binlog_bytes_written,
-                       commit_event->data_written);
+        if ((current->error= write_transaction(current)))
+          current->commit_errno= errno;
+
+        write_count++;
       }
 
-      if (incident && write_incident(thd, FALSE))
-        goto err;
+      strmake(trx_data->last_commit_pos_file, log_file_name,
+              sizeof(trx_data->last_commit_pos_file)-1);
+      commit_offset= my_b_write_tell(&log_file);
+      trx_data->last_commit_pos_offset= commit_offset;
+      if (trx_data->using_xa && trx_data->xa_xid)
+        xid_count++;
+    }
 
+    if (write_count > 0)
+    {
       if (flush_and_sync())
-        goto err;
-      DBUG_EXECUTE_IF("half_binlogged_transaction", DBUG_SUICIDE(););
-      if (cache->error)				// Error on read
       {
-        sql_print_error(ER(ER_ERROR_ON_READ), cache->file_name, errno);
-        write_error=1;				// Don't give more errors
-        goto err;
+        for (current= queue; current != NULL; current= current->next)
+        {
+          if (!current->error)
+          {
+            current->error= ER_ERROR_ON_WRITE;
+            current->commit_errno= errno;
+          }
+        }
+      }
+      else
+      {
+        signal_update();
       }
-      signal_update();
     }
 
     /*
-      if commit_event is Xid_log_event, increase the number of
-      prepared_xids (it's decreasd in ::unlog()). Binlog cannot be rotated
+      if any commit_events are Xid_log_event, increase the number of
+      prepared_xids (it's decreased in ::unlog()). Binlog cannot be rotated
       if there're prepared xids in it - see the comment in new_file() for
       an explanation.
-      If the commit_event is not Xid_log_event (then it's a Query_log_event)
-      rotate binlog, if necessary.
+      If no Xid_log_events (then it's all Query_log_event) rotate binlog,
+      if necessary.
     */
-    if (commit_event && commit_event->get_type_code() == XID_EVENT)
+    if (xid_count > 0)
     {
-      pthread_mutex_lock(&LOCK_prep_xids);
-      prepared_xids++;
-      pthread_mutex_unlock(&LOCK_prep_xids);
+      mark_xids_active(xid_count);
     }
     else
+    {
       if (rotate_and_purge(RP_LOCK_LOG_IS_ALREADY_LOCKED))
-        goto err;
+      {
+        /*
+          If we fail to rotate, which thread should get the error?
+          We give the error to the *last* transaction thread; that seems to
+          make the most sense, as it was the last to write to the log.
+        */
+        last_in_queue->error= ER_ERROR_ON_WRITE;
+        last_in_queue->commit_errno= errno;
+      }
+    }
   }
-  VOID(pthread_mutex_unlock(&LOCK_log));
 
-  DBUG_RETURN(0);
+  DEBUG_SYNC(leader->thd, "commit_before_get_LOCK_commit_ordered");
+  pthread_mutex_lock(&LOCK_commit_ordered);
+  last_commit_pos_offset= commit_offset;
+  /*
+    We cannot unlock LOCK_log until we have locked LOCK_commit_ordered;
+    otherwise scheduling could allow the next group commit to run ahead of us,
+    messing up the order of commit_ordered() calls. But as soon as
+    LOCK_commit_ordered is obtained, we can let the next group commit start.
+  */
+  pthread_mutex_unlock(&LOCK_log);
+  DEBUG_SYNC(leader->thd, "commit_after_release_LOCK_log");
+  ++num_group_commits;
 
-err:
-  if (!write_error)
+  if (!opt_optimize_thread_scheduling)
   {
-    write_error= 1;
-    sql_print_error(ER(ER_ERROR_ON_WRITE), name, errno);
+    /*
+      If we want to run commit_ordered() each in the transaction's own thread
+      context, then we need to mark the queue reserved; we need to finish all
+      threads in one group commit before the next group commit can be allowed
+      to proceed, and we cannot unlock a simple pthreads mutex in a different
+      thread from the one that locked it.
+    */
+
+    while (group_commit_queue_busy)
+      pthread_cond_wait(&COND_queue_busy, &LOCK_commit_ordered);
+    group_commit_queue_busy= TRUE;
+
+    /* Note that we return with LOCK_commit_ordered locked! */
+    DBUG_VOID_RETURN;
   }
-  VOID(pthread_mutex_unlock(&LOCK_log));
-  DBUG_RETURN(1);
+
+  /*
+    Wakeup each participant waiting for our group commit, first calling the
+    commit_ordered() methods for any transactions doing 2-phase commit.
+  */
+  current= queue;
+  while (current != NULL)
+  {
+    group_commit_entry *next;
+
+    DEBUG_SYNC(leader->thd, "commit_loop_entry_commit_ordered");
+    ++num_commits;
+    if (current->trx_data->using_xa && !current->error)
+      run_commit_ordered(current->thd, current->all);
+
+    /*
+      Careful not to access current->next after waking up the other thread! As
+      it may change immediately after wakeup.
+    */
+    next= current->next;
+    if (current != leader)                      // Don't wake up ourself
+      current->thd->signal_wakeup_ready();
+    current= next;
+  }
+  DEBUG_SYNC(leader->thd, "commit_after_group_run_commit_ordered");
+  pthread_mutex_unlock(&LOCK_commit_ordered);
+
+  DBUG_VOID_RETURN;
 }
 
+int
+MYSQL_BIN_LOG::write_transaction(group_commit_entry *entry)
+{
+  binlog_trx_data *trx_data= entry->trx_data;
+  IO_CACHE *cache= &trx_data->trans_log;
+
+  entry->begin_event->pre_55_writing_direct();
+  if (entry->begin_event->write(&log_file))
+    return ER_ERROR_ON_WRITE;
+  status_var_add(entry->thd->status_var.binlog_bytes_written,
+                 entry->begin_event->data_written);
+
+  DBUG_EXECUTE_IF("crash_before_writing_xid",
+                  {
+                    if ((write_cache(entry->thd, cache)))
+                      DBUG_PRINT("info", ("error writing binlog cache"));
+                    else
+                      flush_and_sync();
+
+                    DBUG_PRINT("info", ("crashing before writing xid"));
+                    DBUG_SUICIDE();
+                  });
+
+  if (write_cache(entry->thd, cache))
+    return ER_ERROR_ON_WRITE;
+
+  entry->end_event->pre_55_writing_direct();
+  if (entry->end_event->write(&log_file))
+    return ER_ERROR_ON_WRITE;
+  status_var_add(entry->thd->status_var.binlog_bytes_written,
+                 entry->end_event->data_written);
+
+  if (entry->incident_event)
+  {
+    entry->incident_event->pre_55_writing_direct();
+    if (entry->incident_event->write(&log_file))
+      return ER_ERROR_ON_WRITE;
+  }
+
+  if (cache->error)				// Error on read
+    return ER_ERROR_ON_READ;
+
+  return 0;
+}
 
 /**
   Wait until we get a signal that the binary log has been updated.
@@ -5147,6 +5674,12 @@ void MYSQL_BIN_LOG::close(uint exiting)
 	(exiting & LOG_CLOSE_STOP_EVENT))
     {
       Stop_log_event s;
+      // the checksumming rule for relay-log case is similar to Rotate
+        s.checksum_alg= is_relay_log ?
+          (uint8) relay_log_checksum_alg : (uint8) binlog_checksum_options;
+      DBUG_ASSERT(!is_relay_log ||
+                  relay_log_checksum_alg != BINLOG_CHECKSUM_ALG_UNDEF);
+      s.pre_55_writing_direct();
       s.write(&log_file);
       bytes_written+= s.data_written;
       signal_update();
@@ -5482,6 +6015,171 @@ void sql_print_information(const char *format, ...)
 }
 
 
+void
+TC_init()
+{
+  my_pthread_mutex_init(&LOCK_prepare_ordered, MY_MUTEX_INIT_SLOW,
+                        "LOCK_prepare_ordered", MYF(0));
+  my_pthread_mutex_init(&LOCK_commit_ordered, MY_MUTEX_INIT_SLOW,
+                        "LOCK_commit_ordered", MYF(0));
+  mutexes_inited= TRUE;
+}
+
+
+void
+TC_destroy()
+{
+  if (mutexes_inited)
+  {
+    pthread_mutex_destroy(&LOCK_prepare_ordered);
+    pthread_mutex_destroy(&LOCK_commit_ordered);
+    mutexes_inited= FALSE;
+  }
+}
+
+
+void
+TC_LOG::run_prepare_ordered(THD *thd, bool all)
+{
+  Ha_trx_info *ha_info=
+    all ? thd->transaction.all.ha_list : thd->transaction.stmt.ha_list;
+
+  safe_mutex_assert_owner(&LOCK_prepare_ordered);
+  for (; ha_info; ha_info= ha_info->next())
+  {
+    handlerton *ht= ha_info->ht();
+    if (!ht->prepare_ordered)
+      continue;
+    ht->prepare_ordered(ht, thd, all);
+  }
+}
+
+
+void
+TC_LOG::run_commit_ordered(THD *thd, bool all)
+{
+  Ha_trx_info *ha_info=
+    all ? thd->transaction.all.ha_list : thd->transaction.stmt.ha_list;
+
+  safe_mutex_assert_owner(&LOCK_commit_ordered);
+  for (; ha_info; ha_info= ha_info->next())
+  {
+    handlerton *ht= ha_info->ht();
+    if (!ht->commit_ordered)
+      continue;
+    ht->commit_ordered(ht, thd, all);
+    DEBUG_SYNC(thd, "commit_after_run_commit_ordered");
+  }
+}
+
+
+int TC_LOG_MMAP::log_and_order(THD *thd, my_xid xid, bool all,
+                               bool need_prepare_ordered,
+                               bool need_commit_ordered)
+{
+  int cookie;
+  struct commit_entry entry;
+  bool is_group_commit_leader;
+  LINT_INIT(is_group_commit_leader);
+
+  if (need_prepare_ordered)
+  {
+    pthread_mutex_lock(&LOCK_prepare_ordered);
+    run_prepare_ordered(thd, all);
+    if (need_commit_ordered)
+    {
+      /*
+        Must put us in queue so we can run_commit_ordered() in same sequence
+        as we did run_prepare_ordered().
+      */
+      thd->clear_wakeup_ready();
+      entry.thd= thd;
+      commit_entry *previous_queue= commit_ordered_queue;
+      entry.next= previous_queue;
+      commit_ordered_queue= &entry;
+      is_group_commit_leader= (previous_queue == NULL);
+    }
+    pthread_mutex_unlock(&LOCK_prepare_ordered);
+  }
+
+  cookie= 0;
+  if (xid)
+    cookie= log_one_transaction(xid);
+
+  if (need_commit_ordered)
+  {
+    if (need_prepare_ordered)
+    {
+      /*
+        We did the run_prepare_ordered() serialised, then ran the log_xid() in
+        parallel. Now we have to do run_commit_ordered() serialised in the
+        same sequence as run_prepare_ordered().
+
+        We do this starting from the head of the queue, each thread doing
+        run_commit_ordered() and signalling the next in queue.
+      */
+      if (is_group_commit_leader)
+      {
+        /* The first in queue starts the ball rolling. */
+        pthread_mutex_lock(&LOCK_prepare_ordered);
+        while (commit_ordered_queue_busy)
+          pthread_cond_wait(&COND_queue_busy, &LOCK_prepare_ordered);
+        commit_entry *queue= commit_ordered_queue;
+        commit_ordered_queue= NULL;
+        /*
+          Mark the queue busy while we bounce it from one thread to the
+          next.
+        */
+        commit_ordered_queue_busy= true;
+        pthread_mutex_unlock(&LOCK_prepare_ordered);
+
+        /* Reverse the queue list so we get correct order. */
+        commit_entry *prev= NULL;
+        while (queue)
+        {
+          commit_entry *next= queue->next;
+          queue->next= prev;
+          prev= queue;
+          queue= next;
+        }
+        DBUG_ASSERT(prev == &entry && prev->thd == thd);
+      }
+      else
+      {
+        /* Not first in queue; just wait until previous thread wakes us up. */
+        thd->wait_for_wakeup_ready();
+      }
+    }
+
+    /* Only run commit_ordered() if log_xid was successful. */
+    if (cookie)
+    {
+      pthread_mutex_lock(&LOCK_commit_ordered);
+      run_commit_ordered(thd, all);
+      pthread_mutex_unlock(&LOCK_commit_ordered);
+    }
+
+    if (need_prepare_ordered)
+    {
+      commit_entry *next= entry.next;
+      if (next)
+      {
+        next->thd->signal_wakeup_ready();
+      }
+      else
+      {
+        pthread_mutex_lock(&LOCK_prepare_ordered);
+        commit_ordered_queue_busy= false;
+        pthread_cond_signal(&COND_queue_busy);
+        pthread_mutex_unlock(&LOCK_prepare_ordered);
+      }
+    }
+  }
+
+  return cookie;
+}
+
+
 /********* transaction coordinator log for 2pc - mmap() based solution *******/
 
 /*
@@ -5618,6 +6316,7 @@ int TC_LOG_MMAP::open(const char *opt_name)
   pthread_mutex_init(&LOCK_pool,    MY_MUTEX_INIT_FAST);
   pthread_cond_init(&COND_active, 0);
   pthread_cond_init(&COND_pool, 0);
+  pthread_cond_init(&COND_queue_busy, 0);
 
   inited=6;
 
@@ -5625,6 +6324,8 @@ int TC_LOG_MMAP::open(const char *opt_name)
   active=pages;
   pool=pages+1;
   pool_last=pages+npages-1;
+  commit_ordered_queue= NULL;
+  commit_ordered_queue_busy= false;
 
   return 0;
 
@@ -5730,7 +6431,7 @@ int TC_LOG_MMAP::overflow()
     to the position in memory where xid was logged to.
 */
 
-int TC_LOG_MMAP::log_xid(THD *thd, my_xid xid)
+int TC_LOG_MMAP::log_one_transaction(my_xid xid)
 {
   int err;
   PAGE *p;
@@ -5900,6 +6601,8 @@ void TC_LOG_MMAP::close()
     pthread_mutex_destroy(&LOCK_active);
     pthread_mutex_destroy(&LOCK_pool);
     pthread_cond_destroy(&COND_pool);
+    pthread_cond_destroy(&COND_active);
+    pthread_cond_destroy(&COND_queue_busy);
   case 5:
     data[0]='A'; // garble the first (signature) byte, in case my_delete fails
   case 4:
@@ -6078,7 +6781,8 @@ int TC_LOG_BINLOG::open(const char *opt_name)
       goto err;
     }
 
-    if ((ev= Log_event::read_log_event(&log, 0, &fdle)) &&
+    if ((ev= Log_event::read_log_event(&log, 0, &fdle,
+                                       opt_master_verify_checksum)) &&
         ev->get_type_code() == FORMAT_DESCRIPTION_EVENT &&
         ev->flags & LOG_EVENT_BINLOG_IN_USE_F)
     {
@@ -6108,39 +6812,98 @@ void TC_LOG_BINLOG::close()
   pthread_cond_destroy (&COND_prep_xids);
 }
 
-/**
-  @todo
-  group commit
-
-  @retval
-    0    error
-  @retval
-    1    success
+/*
+  Do a binlog log_xid() for a group of transactions, linked through
+  thd->next_commit_ordered.
 */
-int TC_LOG_BINLOG::log_xid(THD *thd, my_xid xid)
+int
+TC_LOG_BINLOG::log_and_order(THD *thd, my_xid xid, bool all,
+                             bool need_prepare_ordered __attribute__((unused)),
+                             bool need_commit_ordered __attribute__((unused)))
 {
-  DBUG_ENTER("TC_LOG_BINLOG::log");
-  Xid_log_event xle(thd, xid);
-  binlog_trx_data *trx_data=
+  int err;
+  DBUG_ENTER("TC_LOG_BINLOG::log_and_order");
+
+  binlog_trx_data *const trx_data=
     (binlog_trx_data*) thd_get_ha_data(thd, binlog_hton);
-  /*
-    We always commit the entire transaction when writing an XID. Also
-    note that the return value is inverted.
-   */
-  DBUG_RETURN(!binlog_end_trans(thd, trx_data, &xle, TRUE));
+
+  trx_data->using_xa= TRUE;
+  trx_data->xa_xid= xid;
+  if (xid)
+  {
+    Xid_log_event xid_event(thd, xid);
+    err= binlog_flush_trx_cache(thd, trx_data, &xid_event, all);
+  }
+  else
+  {
+    /*
+      Empty xid occurs in XA COMMIT ... ONE PHASE.
+      In this case, we do not have a MySQL xid for the transaction, and the
+      external XA transaction coordinator will have to handle recovery if
+      needed. So we end the transaction with a plain COMMIT query event.
+    */
+    Query_log_event end_event(thd, STRING_WITH_LEN("COMMIT"), TRUE, TRUE, 0);
+    err= binlog_flush_trx_cache(thd, trx_data, &end_event, all);
+  }
+
+  DEBUG_SYNC(thd, "binlog_after_log_and_order");
+
+  DBUG_RETURN(!err);
 }
 
-int TC_LOG_BINLOG::unlog(ulong cookie, my_xid xid)
+/*
+  After an XID is logged, we need to hold on to the current binlog file until
+  it is fully committed in the storage engine. The reason is that crash
+  recovery only looks at the latest binlog, so we must make sure there are no
+  outstanding prepared (but not committed) transactions before rotating the
+  binlog.
+
+  To handle this, we keep a count of outstanding XIDs. This function is used
+  to increase this count when committing one or more transactions to the
+  binary log.
+*/
+void
+TC_LOG_BINLOG::mark_xids_active(uint xid_count)
 {
-  DBUG_ENTER("TC_LOG_BINLOG::unlog");
+  DBUG_ENTER("TC_LOG_BINLOG::mark_xids_active");
+  DBUG_PRINT("info", ("xid_count=%u", xid_count));
+  pthread_mutex_lock(&LOCK_prep_xids);
+  prepared_xids+= xid_count;
+  pthread_mutex_unlock(&LOCK_prep_xids);
+  DBUG_VOID_RETURN;
+}
+
+/*
+  Once an XID is committed, it is safe to rotate the binary log, as it can no
+  longer be needed during crash recovery.
+
+  This function is called to mark an XID this way. It needs to decrease the
+  count of pending XIDs, and signal the log rotator thread when it reaches zero.
+*/
+void
+TC_LOG_BINLOG::mark_xid_done()
+{
+  my_bool send_signal;
+
+  DBUG_ENTER("TC_LOG_BINLOG::mark_xid_done");
   pthread_mutex_lock(&LOCK_prep_xids);
   DBUG_ASSERT(prepared_xids > 0);
-  if (--prepared_xids == 0) {
+  send_signal= !--prepared_xids;
+  pthread_mutex_unlock(&LOCK_prep_xids);
+  if (send_signal) {
     DBUG_PRINT("info", ("prepared_xids=%lu", prepared_xids));
     pthread_cond_signal(&COND_prep_xids);
   }
-  pthread_mutex_unlock(&LOCK_prep_xids);
-  DBUG_RETURN(rotate_and_purge(0));     // as ::write() did not rotate
+  DBUG_VOID_RETURN;
+}
+
+int TC_LOG_BINLOG::unlog(ulong cookie, my_xid xid)
+{
+  DBUG_ENTER("TC_LOG_BINLOG::unlog");
+  if (xid)
+    mark_xid_done();
+  /* As ::write_transaction_to_binlog() did not rotate, do it here. */
+  DBUG_RETURN(rotate_and_purge(0));
 }
 
 int TC_LOG_BINLOG::recover(IO_CACHE *log, Format_description_log_event *fdle)
@@ -6158,7 +6921,9 @@ int TC_LOG_BINLOG::recover(IO_CACHE *log, Format_description_log_event *fdle)
 
   fdle->flags&= ~LOG_EVENT_BINLOG_IN_USE_F; // abort on the first error
 
-  while ((ev= Log_event::read_log_event(log,0,fdle)) && ev->is_valid())
+  while ((ev= Log_event::read_log_event(log, 0, fdle,
+                                        opt_master_verify_checksum))
+         && ev->is_valid())
   {
     if (ev->get_type_code() == XID_EVENT)
     {
@@ -6209,9 +6974,153 @@ ulonglong mysql_bin_log_file_pos(void)
 {
   return (ulonglong) mysql_bin_log.get_log_file()->pos_in_file;
 }
+/*
+  Get the current position of the MySQL binlog for transaction currently being
+  committed.
+
+  This is valid to call from within storage engine commit_ordered() and
+  commit() methods only.
+
+  Since it stores the position inside THD, it is safe to call without any
+  locking.
+*/
+void
+mysql_bin_log_commit_pos(THD *thd, ulonglong *out_pos, const char **out_file)
+{
+  binlog_trx_data *const trx_data=
+    (binlog_trx_data*) thd_get_ha_data(thd, binlog_hton);
+  if (trx_data)
+  {
+    *out_file= trx_data->last_commit_pos_file;
+    *out_pos= (ulonglong)(trx_data->last_commit_pos_offset);
+  }
+  else
+  {
+    *out_file= NULL;
+    *out_pos= 0;
+  }
+}
 #endif /* INNODB_COMPATIBILITY_HOOKS */
 
 
+static void
+binlog_checksum_update(MYSQL_THD thd, struct st_mysql_sys_var *var,
+                       void *var_ptr, const void *save)
+{
+  ulong value=  *((ulong *)save);
+
+  pthread_mutex_lock(mysql_bin_log.get_log_lock());
+  if(mysql_bin_log.is_open())
+  {
+    uint flags= RP_FORCE_ROTATE | RP_LOCK_LOG_IS_ALREADY_LOCKED |
+      (binlog_checksum_options != (uint) value?
+       RP_BINLOG_CHECKSUM_ALG_CHANGE : 0);
+    if (flags & RP_BINLOG_CHECKSUM_ALG_CHANGE)
+      mysql_bin_log.checksum_alg_reset= (uint8) value;
+    mysql_bin_log.rotate_and_purge(flags);
+  }
+  else
+  {
+    binlog_checksum_options= value;
+  }
+  DBUG_ASSERT((ulong) binlog_checksum_options == value);
+  DBUG_ASSERT(mysql_bin_log.checksum_alg_reset == BINLOG_CHECKSUM_ALG_UNDEF);
+  pthread_mutex_unlock(mysql_bin_log.get_log_lock());
+}
+
+
+static int show_binlog_vars(THD *thd, SHOW_VAR *var, char *buff)
+{
+  mysql_bin_log.set_status_variables(thd);
+  var->type= SHOW_ARRAY;
+  var->value= (char *)&binlog_status_vars_detail;
+  return 0;
+}
+
+static SHOW_VAR binlog_status_vars_top[]= {
+  {"binlog", (char *) &show_binlog_vars, SHOW_FUNC},
+  {NullS, NullS, SHOW_LONG}
+};
+
+static MYSQL_SYSVAR_BOOL(
+  optimize_thread_scheduling,
+  opt_optimize_thread_scheduling,
+  PLUGIN_VAR_READONLY,
+  "Run fast part of group commit in a single thread, to optimize kernel "
+  "thread scheduling. On by default. Disable to run each transaction in group "
+  "commit in its own thread, which can be slower at very high concurrency. "
+  "This option is mostly for testing one algorithm versus the other, and it "
+  "should not normally be necessary to change it.",
+  NULL,
+  NULL,
+  1);
+
+static MYSQL_SYSVAR_ENUM(
+  checksum,
+  binlog_checksum_options,
+  PLUGIN_VAR_RQCMDARG,
+  "Type of BINLOG_CHECKSUM_ALG. Include checksum for "
+  "log events in the binary log. Possible values are NONE and CRC32; "
+  "default is NONE.",
+  NULL,
+  binlog_checksum_update,
+  BINLOG_CHECKSUM_ALG_OFF,
+  &binlog_checksum_typelib);
+
+static struct st_mysql_sys_var *binlog_sys_vars[]=
+{
+  MYSQL_SYSVAR(optimize_thread_scheduling),
+  MYSQL_SYSVAR(checksum),
+  NULL
+};
+
+
+/*
+  Copy out the non-directory part of binlog position filename for the
+  `binlog_snapshot_file' status variable, same way as it is done for
+  SHOW MASTER STATUS.
+*/
+static void
+set_binlog_snapshot_file(const char *src)
+{
+  int dir_len = dirname_length(src);
+  strmake(binlog_snapshot_file, src + dir_len, sizeof(binlog_snapshot_file)-1);
+}
+
+/*
+  Copy out current values of status variables, for SHOW STATUS or
+  information_schema.global_status.
+
+  This is called only under LOCK_status, so we can fill in a static array.
+*/
+void
+TC_LOG_BINLOG::set_status_variables(THD *thd)
+{
+  binlog_trx_data *trx_data;
+
+  if (thd)
+    trx_data= (binlog_trx_data*) thd_get_ha_data(thd, binlog_hton);
+  else
+    trx_data= NULL;
+
+  bool have_snapshot= (trx_data && trx_data->last_commit_pos_file[0] != 0);
+  pthread_mutex_lock(&LOCK_commit_ordered);
+  binlog_status_var_num_commits= this->num_commits;
+  binlog_status_var_num_group_commits= this->num_group_commits;
+  if (!have_snapshot)
+  {
+    set_binlog_snapshot_file(last_commit_pos_file);
+    binlog_snapshot_position= last_commit_pos_offset;
+  }
+  pthread_mutex_unlock(&LOCK_commit_ordered);
+
+  if (have_snapshot)
+  {
+    set_binlog_snapshot_file(trx_data->last_commit_pos_file);
+    binlog_snapshot_position= trx_data->last_commit_pos_offset;
+  }
+}
+
 struct st_mysql_storage_engine binlog_storage_engine=
 { MYSQL_HANDLERTON_INTERFACE_VERSION };
 
@@ -6226,8 +7135,8 @@ mysql_declare_plugin(binlog)
   binlog_init, /* Plugin Init */
   NULL, /* Plugin Deinit */
   0x0100 /* 1.0 */,
-  NULL,                       /* status variables                */
-  NULL,                       /* system variables                */
+  binlog_status_vars_top,     /* status variables                */
+  binlog_sys_vars,            /* system variables                */
   NULL                        /* config options                  */
 }
 mysql_declare_plugin_end;
@@ -6242,8 +7151,8 @@ maria_declare_plugin(binlog)
   binlog_init, /* Plugin Init */
   NULL, /* Plugin Deinit */
   0x0100 /* 1.0 */,
-  NULL,                       /* status variables                */
-  NULL,                       /* system variables                */
+  binlog_status_vars_top,     /* status variables                */
+  binlog_sys_vars,            /* system variables                */
   "1.0",                      /* string version */
   MariaDB_PLUGIN_MATURITY_STABLE /* maturity */
 }
diff --git a/sql/log.h b/sql/log.h
index 4b32ecfdc02..07eff783ab6 100644
--- a/sql/log.h
+++ b/sql/log.h
@@ -40,17 +40,58 @@ class TC_LOG
 
   virtual int open(const char *opt_name)=0;
   virtual void close()=0;
-  virtual int log_xid(THD *thd, my_xid xid)=0;
+  virtual int log_and_order(THD *thd, my_xid xid, bool all,
+                            bool need_prepare_ordered,
+                            bool need_commit_ordered) = 0;
   virtual int unlog(ulong cookie, my_xid xid)=0;
+
+protected:
+  /*
+    These methods are meant to be invoked from log_and_order() implementations
+    to run any prepare_ordered() respectively commit_ordered() methods in
+    participating handlers.
+
+    They must be called using suitable thread syncronisation to ensure that
+    they are each called in the correct commit order among all
+    transactions. However, it is only necessary to call them if the
+    corresponding flag passed to log_and_order is set (it is safe, but not
+    required, to call them when the flag is false).
+
+    The caller must be holding LOCK_prepare_ordered respectively
+    LOCK_commit_ordered when calling these methods.
+  */
+  void run_prepare_ordered(THD *thd, bool all);
+  void run_commit_ordered(THD *thd, bool all);
 };
 
+/*
+  Locks used to ensure serialised execution of TC_LOG::run_prepare_ordered()
+  and TC_LOG::run_commit_ordered(), or any other code that calls handler
+  prepare_ordered() or commit_ordered() methods.
+*/
+extern pthread_mutex_t LOCK_prepare_ordered;
+extern pthread_mutex_t LOCK_commit_ordered;
+
+extern void TC_init();
+extern void TC_destroy();
+
 class TC_LOG_DUMMY: public TC_LOG // use it to disable the logging
 {
 public:
   TC_LOG_DUMMY() {}
   int open(const char *opt_name)        { return 0; }
   void close()                          { }
-  int log_xid(THD *thd, my_xid xid)         { return 1; }
+  /*
+    TC_LOG_DUMMY is only used when there are <= 1 XA-capable engines, and we
+    only use internal XA during commit when >= 2 XA-capable engines
+    participate.
+  */
+  int log_and_order(THD *thd, my_xid xid, bool all,
+                    bool need_prepare_ordered, bool need_commit_ordered)
+  {
+    DBUG_ASSERT(0 /* Internal error - TC_LOG_DUMMY::log_and_order() called */);
+    return 1;
+  }
   int unlog(ulong cookie, my_xid xid)  { return 0; }
 };
 
@@ -76,6 +117,13 @@ class TC_LOG_MMAP: public TC_LOG
     pthread_cond_t  cond; // to wait for a sync
   } PAGE;
 
+  /* List of THDs for which to invoke commit_ordered(), in order. */
+  struct commit_entry
+  {
+    struct commit_entry *next;
+    THD *thd;
+  };
+
   char logname[FN_REFLEN];
   File fd;
   my_off_t file_length;
@@ -90,16 +138,38 @@ class TC_LOG_MMAP: public TC_LOG
   */
   pthread_mutex_t LOCK_active, LOCK_pool, LOCK_sync;
   pthread_cond_t COND_pool, COND_active;
+  /*
+    Queue of threads that need to call commit_ordered().
+    Access to this queue must be protected by LOCK_prepare_ordered.
+  */
+  commit_entry *commit_ordered_queue;
+  /*
+    This flag and condition is used to reserve the queue while threads in it
+    each run the commit_ordered() methods one after the other. Only once the
+    last commit_ordered() in the queue is done can we start on a new queue
+    run.
+
+    Since we start this process in the first thread in the queue and finish in
+    the last (and possibly different) thread, we need a condition variable for
+    this (we cannot unlock a mutex in a different thread than the one who
+    locked it).
+
+    The condition is used together with the LOCK_prepare_ordered mutex.
+  */
+  my_bool commit_ordered_queue_busy;
+  pthread_cond_t COND_queue_busy;
 
   public:
   TC_LOG_MMAP(): inited(0) {}
   int open(const char *opt_name);
   void close();
-  int log_xid(THD *thd, my_xid xid);
+  int log_and_order(THD *thd, my_xid xid, bool all,
+                    bool need_prepare_ordered, bool need_commit_ordered);
   int unlog(ulong cookie, my_xid xid);
   int recover();
 
   private:
+  int log_one_transaction(my_xid xid);
   void get_active_from_pool();
   int sync();
   int overflow();
@@ -213,7 +283,7 @@ public:
              uint user_host_len, int thread_id,
              const char *command_type, uint command_type_len,
              const char *sql_text, uint sql_text_len);
-  bool write(THD *thd, time_t current_time, time_t query_start_arg,
+  bool write(THD *thd, time_t current_time,
              const char *user_host, uint user_host_len,
              ulonglong query_utime, ulonglong lock_utime, bool is_command,
              const char *sql_text, uint sql_text_len);
@@ -234,9 +304,31 @@ private:
   time_t last_time;
 };
 
+class binlog_trx_data;
 class MYSQL_BIN_LOG: public TC_LOG, private MYSQL_LOG
 {
  private:
+  struct group_commit_entry
+  {
+    struct group_commit_entry *next;
+    THD *thd;
+    binlog_trx_data *trx_data;
+    /*
+      Extra events (BEGIN, COMMIT/ROLLBACK/XID, and possibly INCIDENT) to be
+      written during group commit. The incident_event is only valid if
+      trx_data->has_incident() is true.
+    */
+    Log_event *begin_event;
+    Log_event *end_event;
+    Log_event *incident_event;
+    /* Set during group commit to record any per-thread error. */
+    int error;
+    int commit_errno;
+    /* This is the `all' parameter for ha_commit_ordered(). */
+    bool all;
+    /* True if we come in through XA log_and_order(), false otherwise. */
+  };
+
   /* LOCK_log and LOCK_index are inited by init_pthread_objects() */
   pthread_mutex_t LOCK_index;
   pthread_mutex_t LOCK_prep_xids;
@@ -278,6 +370,20 @@ class MYSQL_BIN_LOG: public TC_LOG, private MYSQL_LOG
     In 5.0 it's 0 for relay logs too!
   */
   bool no_auto_events;
+  /* Queue of transactions queued up to participate in group commit. */
+  group_commit_entry *group_commit_queue;
+  /*
+    Condition variable to mark that the group commit queue is busy.
+    Used when each thread does it's own commit_ordered() (when
+    binlog_optimize_thread_scheduling=1).
+    Used with the LOCK_commit_ordered mutex.
+  */
+  my_bool group_commit_queue_busy;
+  pthread_cond_t COND_queue_busy;
+  /* Total number of committed transactions. */
+  ulonglong num_commits;
+  /* Number of group commits done. */
+  ulonglong num_group_commits;
 
   int write_to_file(IO_CACHE *cache);
   /*
@@ -287,6 +393,11 @@ class MYSQL_BIN_LOG: public TC_LOG, private MYSQL_LOG
   */
   int new_file_without_locking();
   int new_file_impl(bool need_lock);
+  int write_transaction(group_commit_entry *entry);
+  bool write_transaction_to_binlog_events(group_commit_entry *entry);
+  void trx_group_commit_leader(group_commit_entry *leader);
+  void mark_xid_done();
+  void mark_xids_active(uint xid_count);
 
 public:
   MYSQL_LOG::generate_name;
@@ -294,7 +405,41 @@ public:
 
   /* This is relay log */
   bool is_relay_log;
-
+  uint8 checksum_alg_reset; // to contain a new value when binlog is rotated
+  /*
+    Holds the last seen in Relay-Log FD's checksum alg value.
+    The initial value comes from the slave's local FD that heads
+    the very first Relay-Log file. In the following the value may change
+    with each received master's FD_m.
+    Besides to be used in verification events that IO thread receives
+    (except the 1st fake Rotate, see @c Master_info:: checksum_alg_before_fd), 
+    the value specifies if/how to compute checksum for slave's local events
+    and the first fake Rotate (R_f^1) coming from the master.
+    R_f^1 needs logging checksum-compatibly with the RL's heading FD_s.
+
+    Legends for the checksum related comments:
+
+    FD     - Format-Description event,
+    R      - Rotate event
+    R_f    - the fake Rotate event
+    E      - an arbirary event
+
+    The underscore indexes for any event
+    `_s'   indicates the event is generated by Slave
+    `_m'   - by Master
+
+    Two special underscore indexes of FD:
+    FD_q   - Format Description event for queuing   (relay-logging)
+    FD_e   - Format Description event for executing (relay-logging)
+
+    Upper indexes:
+    E^n    - n:th event is a sequence
+
+    RL     - Relay Log
+    (A)    - checksum algorithm descriptor value
+    FD.(A) - the value of (A) in FD
+  */
+  uint8 relay_log_checksum_alg;
   /*
     These describe the log's format. This is used only for relay logs.
     _for_exec is used by the SQL thread, _for_queue by the I/O thread. It's
@@ -305,6 +450,12 @@ public:
   */
   Format_description_log_event *description_event_for_exec,
     *description_event_for_queue;
+  /*
+    Binlog position of last commit (or non-transactional write) to the binlog.
+    Access to this is protected by LOCK_commit_ordered.
+  */
+  char last_commit_pos_file[FN_REFLEN];
+  my_off_t last_commit_pos_offset;
 
   MYSQL_BIN_LOG();
   /*
@@ -315,7 +466,8 @@ public:
 
   int open(const char *opt_name);
   void close();
-  int log_xid(THD *thd, my_xid xid);
+  int log_and_order(THD *thd, my_xid xid, bool all,
+                    bool need_prepare_ordered, bool need_commit_ordered);
   int unlog(ulong cookie, my_xid xid);
   int recover(IO_CACHE *log, Format_description_log_event *fdle);
 #if !defined(MYSQL_CLIENT)
@@ -359,11 +511,14 @@ public:
   int new_file();
 
   void reset_gathered_updates(THD *thd);
-  bool write(Log_event* event_info); // binary log write
-  bool write(THD *thd, IO_CACHE *cache, Log_event *commit_event, bool incident);
-  bool write_incident(THD *thd, bool lock);
-  int  write_cache(THD *thd, IO_CACHE *cache, bool lock_log,
-                   bool flush_and_sync);
+  bool write(Log_event* event_info,
+             my_bool *with_annotate= 0); // binary log write
+  bool write_transaction_to_binlog(THD *thd, binlog_trx_data *trx_data,
+                                   Log_event *end_ev, bool all);
+
+  bool write_incident_already_locked(THD *thd);
+  bool write_incident(THD *thd);
+  int  write_cache(THD *thd, IO_CACHE *cache);
   void set_write_error(THD *thd);
   bool check_write_error(THD *thd);
 
@@ -418,6 +573,7 @@ public:
   inline void unlock_index() { pthread_mutex_unlock(&LOCK_index);}
   inline IO_CACHE *get_index_file() { return &index_file;}
   inline uint32 get_open_count() { return open_count; }
+  void set_status_variables(THD *thd);
 };
 
 class Log_event_handler
@@ -427,14 +583,14 @@ public:
   virtual bool init()= 0;
   virtual void cleanup()= 0;
 
-  virtual bool log_slow(THD *thd, time_t current_time,
-                        time_t query_start_arg, const char *user_host,
+  virtual bool log_slow(THD *thd, my_hrtime_t current_time,
+                        const char *user_host,
                         uint user_host_len, ulonglong query_utime,
                         ulonglong lock_utime, bool is_command,
                         const char *sql_text, uint sql_text_len)= 0;
   virtual bool log_error(enum loglevel level, const char *format,
                          va_list args)= 0;
-  virtual bool log_general(THD *thd, time_t event_time, const char *user_host,
+  virtual bool log_general(THD *thd, my_hrtime_t event_time, const char *user_host,
                            uint user_host_len, int thread_id,
                            const char *command_type, uint command_type_len,
                            const char *sql_text, uint sql_text_len,
@@ -456,14 +612,14 @@ public:
   virtual bool init();
   virtual void cleanup();
 
-  virtual bool log_slow(THD *thd, time_t current_time,
-                        time_t query_start_arg, const char *user_host,
+  virtual bool log_slow(THD *thd, my_hrtime_t current_time,
+                        const char *user_host,
                         uint user_host_len, ulonglong query_utime,
                         ulonglong lock_utime, bool is_command,
                         const char *sql_text, uint sql_text_len);
   virtual bool log_error(enum loglevel level, const char *format,
                          va_list args);
-  virtual bool log_general(THD *thd, time_t event_time, const char *user_host,
+  virtual bool log_general(THD *thd, my_hrtime_t event_time, const char *user_host,
                            uint user_host_len, int thread_id,
                            const char *command_type, uint command_type_len,
                            const char *sql_text, uint sql_text_len,
@@ -488,14 +644,14 @@ public:
   virtual bool init();
   virtual void cleanup();
 
-  virtual bool log_slow(THD *thd, time_t current_time,
-                        time_t query_start_arg, const char *user_host,
+  virtual bool log_slow(THD *thd, my_hrtime_t current_time,
+                        const char *user_host,
                         uint user_host_len, ulonglong query_utime,
                         ulonglong lock_utime, bool is_command,
                         const char *sql_text, uint sql_text_len);
   virtual bool log_error(enum loglevel level, const char *format,
                          va_list args);
-  virtual bool log_general(THD *thd, time_t event_time, const char *user_host,
+  virtual bool log_general(THD *thd, my_hrtime_t event_time, const char *user_host,
                            uint user_host_len, int thread_id,
                            const char *command_type, uint command_type_len,
                            const char *sql_text, uint sql_text_len,
diff --git a/sql/log_event.cc b/sql/log_event.cc
index 927b61eb391..f9c827aec6c 100644
--- a/sql/log_event.cc
+++ b/sql/log_event.cc
@@ -40,6 +40,31 @@
 #include <base64.h>
 #include <my_bitmap.h>
 
+
+/**
+  BINLOG_CHECKSUM variable.
+*/
+const char *binlog_checksum_type_names[]= {
+  "NONE",
+  "CRC32",
+  NullS
+};
+
+unsigned int binlog_checksum_type_length[]= {
+  sizeof("NONE") - 1,
+  sizeof("CRC32") - 1,
+  0
+};
+
+TYPELIB binlog_checksum_typelib=
+{
+  array_elements(binlog_checksum_type_names) - 1, "",
+  binlog_checksum_type_names,
+  binlog_checksum_type_length
+};
+
+
+
 #define log_cs	&my_charset_latin1
 
 #define FLAGSTR(V,F) ((V)&(F)?#F" ":"")
@@ -53,6 +78,24 @@
 */
 #define FMT_G_BUFSIZE(PREC) (3 + (PREC) + 5 + 1)
 
+/* 
+   replication event checksum is introduced in the following "checksum-home" version.
+   The checksum-aware servers extract FD's version to decide whether the FD event
+   carries checksum info.
+
+   TODO: correct the constant when it has been determined 
+   (which main tree to push and when) 
+*/
+const uchar checksum_version_split_mysql[3]= {5, 6, 1};
+const ulong checksum_version_product_mysql=
+  (checksum_version_split_mysql[0] * 256 +
+   checksum_version_split_mysql[1]) * 256 +
+  checksum_version_split_mysql[2];
+const uchar checksum_version_split_mariadb[3]= {5, 3, 0};
+const ulong checksum_version_product_mariadb=
+  (checksum_version_split_mariadb[0] * 256 +
+   checksum_version_split_mariadb[1]) * 256 +
+  checksum_version_split_mariadb[2];
 
 #if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION)
 static int rows_event_stmt_cleanup(Relay_log_info const *rli, THD* thd);
@@ -401,6 +444,7 @@ inline bool unexpected_error_code(int unexpected_error)
   case ER_NET_READ_ERROR:
   case ER_NET_ERROR_ON_WRITE:
   case ER_QUERY_INTERRUPTED:
+  case ER_CONNECTION_KILLED:
   case ER_SERVER_SHUTDOWN:
   case ER_NEW_ABORTING_CONNECTION:
     return(TRUE);
@@ -617,7 +661,6 @@ static void print_set_option(IO_CACHE* file, uint32 bits_changed,
   }
 }
 #endif
-
 /**************************************************************************
 	Log_event methods (= the parent class of all events)
 **************************************************************************/
@@ -656,6 +699,7 @@ const char* Log_event::get_type_str(Log_event_type type)
   case BEGIN_LOAD_QUERY_EVENT: return "Begin_load_query";
   case EXECUTE_LOAD_QUERY_EVENT: return "Execute_load_query";
   case INCIDENT_EVENT: return "Incident";
+  case ANNOTATE_ROWS_EVENT: return "Annotate_rows";
   default: return "Unknown";				/* impossible */
   }
 }
@@ -672,10 +716,13 @@ const char* Log_event::get_type_str()
 
 #ifndef MYSQL_CLIENT
 Log_event::Log_event(THD* thd_arg, uint16 flags_arg, bool using_trans)
-  :log_pos(0), temp_buf(0), exec_time(0), flags(flags_arg), thd(thd_arg)
+  :log_pos(0), temp_buf(0), exec_time(0), flags(flags_arg),
+   cache_type(EVENT_INVALID_CACHE), crc(0), thd(thd_arg),
+   checksum_alg(BINLOG_CHECKSUM_ALG_UNDEF)
 {
   server_id=	thd->server_id;
-  when=		thd->start_time;
+  when=         thd->start_time;
+  when_sec_part=thd->start_time_sec_part;
   cache_stmt=	using_trans;
 }
 
@@ -689,14 +736,16 @@ Log_event::Log_event(THD* thd_arg, uint16 flags_arg, bool using_trans)
 
 Log_event::Log_event()
   :temp_buf(0), exec_time(0), flags(0), cache_stmt(0),
-   thd(0)
+   cache_type(EVENT_INVALID_CACHE), crc(0),
+   thd(0), checksum_alg(BINLOG_CHECKSUM_ALG_UNDEF)
 {
   server_id=	::server_id;
   /*
     We can't call my_time() here as this would cause a call before
     my_init() is called
   */
-  when=		0;
+  when=         0;
+  when_sec_part=0;
   log_pos=	0;
 }
 #endif /* !MYSQL_CLIENT */
@@ -708,12 +757,14 @@ Log_event::Log_event()
 
 Log_event::Log_event(const char* buf,
                      const Format_description_log_event* description_event)
-  :temp_buf(0), cache_stmt(0)
+  :temp_buf(0), cache_stmt(0), cache_type(EVENT_INVALID_CACHE),
+    crc(0), checksum_alg(BINLOG_CHECKSUM_ALG_UNDEF)
 {
 #ifndef MYSQL_CLIENT
   thd = 0;
 #endif
   when = uint4korr(buf);
+  when_sec_part= 0;
   server_id = uint4korr(buf + SERVER_ID_OFFSET);
   data_written= uint4korr(buf + EVENT_LEN_OFFSET);
   if (description_event->binlog_version==1)
@@ -735,7 +786,7 @@ Log_event::Log_event(const char* buf,
     logs are in 4.0 format, until it finds a Format_desc).
   */
   if (description_event->binlog_version==3 &&
-      buf[EVENT_TYPE_OFFSET]<FORMAT_DESCRIPTION_EVENT && log_pos)
+      (uchar)buf[EVENT_TYPE_OFFSET]<FORMAT_DESCRIPTION_EVENT && log_pos)
   {
       /*
         If log_pos=0, don't change it. log_pos==0 is a marker to mean
@@ -753,8 +804,8 @@ Log_event::Log_event(const char* buf,
   DBUG_PRINT("info", ("log_pos: %lu", (ulong) log_pos));
 
   flags= uint2korr(buf + FLAGS_OFFSET);
-  if ((buf[EVENT_TYPE_OFFSET] == FORMAT_DESCRIPTION_EVENT) ||
-      (buf[EVENT_TYPE_OFFSET] == ROTATE_EVENT))
+  if (((uchar)buf[EVENT_TYPE_OFFSET] == FORMAT_DESCRIPTION_EVENT) ||
+      ((uchar)buf[EVENT_TYPE_OFFSET] == ROTATE_EVENT))
   {
     /*
       These events always have a header which stops here (i.e. their
@@ -802,21 +853,13 @@ int Log_event::do_update_pos(Relay_log_info *rli)
     DBUG_EXECUTE_IF("let_first_flush_log_change_timestamp",
                     if (debug_not_change_ts_if_art_event == 1
                         && is_artificial_event())
-                    {
-                      debug_not_change_ts_if_art_event= 0;
-                    });
-#ifndef DBUG_OFF
-    rli->stmt_done(log_pos, 
-                   is_artificial_event() &&
-                   debug_not_change_ts_if_art_event > 0 ? 0 : when);
-#else
-    rli->stmt_done(log_pos, is_artificial_event()? 0 : when);
-#endif
+                      debug_not_change_ts_if_art_event= 0; );
+    rli->stmt_done(log_pos, is_artificial_event()
+                   IF_DBUG(&& debug_not_change_ts_if_art_event > 0) ?
+                     0 : when);
     DBUG_EXECUTE_IF("let_first_flush_log_change_timestamp",
                     if (debug_not_change_ts_if_art_event == 0)
-                    {
-                      debug_not_change_ts_if_art_event= 2;
-                    });
+                      debug_not_change_ts_if_art_event= 2; );
   }
   return 0;                                   // Cannot fail currently
 }
@@ -893,6 +936,105 @@ void Log_event::init_show_field_list(List<Item>* field_list)
   field_list->push_back(new Item_empty_string("Info", 20));
 }
 
+/**
+   A decider of whether to trigger checksum computation or not.
+   To be invoked in Log_event::write() stack.
+   The decision is positive 
+
+    S,M) if it's been marked for checksumming with @c checksum_alg
+    
+    M) otherwise, if @@global.binlog_checksum is not NONE and the event is 
+       directly written to the binlog file.
+       The to-be-cached event decides at @c write_cache() time.
+
+   Otherwise the decision is negative.
+
+   @note   A side effect of the method is altering Log_event::checksum_alg
+           it the latter was undefined at calling.
+
+   @return true (positive) or false (negative)
+*/
+my_bool Log_event::need_checksum()
+{
+  DBUG_ENTER("Log_event::need_checksum");
+  my_bool ret;
+  /* 
+     few callers of Log_event::write 
+     (incl FD::write, FD constructing code on the slave side, Rotate relay log
+     and Stop event) 
+     provides their checksum alg preference through Log_event::checksum_alg.
+  */
+  ret= ((checksum_alg != BINLOG_CHECKSUM_ALG_UNDEF) ?
+        (checksum_alg != BINLOG_CHECKSUM_ALG_OFF) :
+        ((binlog_checksum_options != BINLOG_CHECKSUM_ALG_OFF) &&
+         (cache_type == Log_event::EVENT_NO_CACHE)) ?
+        test(binlog_checksum_options) : FALSE);
+
+  /*
+    FD calls the methods before data_written has been calculated.
+    The following invariant claims if the current is not the first
+    call (and therefore data_written is not zero) then `ret' must be
+    TRUE. It may not be null because FD is always checksummed.
+  */
+  
+  DBUG_ASSERT(get_type_code() != FORMAT_DESCRIPTION_EVENT || ret ||
+              data_written == 0);
+
+  if (checksum_alg == BINLOG_CHECKSUM_ALG_UNDEF)
+    checksum_alg= ret ? // calculated value stored
+      (uint8) binlog_checksum_options : (uint8) BINLOG_CHECKSUM_ALG_OFF;
+
+  DBUG_ASSERT(!ret || 
+              ((checksum_alg == binlog_checksum_options ||
+               /* 
+                  Stop event closes the relay-log and its checksum alg
+                  preference is set by the caller can be different
+                  from the server's binlog_checksum_options.
+               */
+               get_type_code() == STOP_EVENT ||
+               /* 
+                  Rotate:s can be checksummed regardless of the server's
+                  binlog_checksum_options. That applies to both
+                  the local RL's Rotate and the master's Rotate
+                  which IO thread instantiates via queue_binlog_ver_3_event.
+               */
+               get_type_code() == ROTATE_EVENT
+               ||  /* FD is always checksummed */
+               get_type_code() == FORMAT_DESCRIPTION_EVENT) && 
+               checksum_alg != BINLOG_CHECKSUM_ALG_OFF));
+
+  DBUG_ASSERT(checksum_alg != BINLOG_CHECKSUM_ALG_UNDEF);
+
+  DBUG_ASSERT(((get_type_code() != ROTATE_EVENT &&
+                get_type_code() != STOP_EVENT) ||
+               get_type_code() != FORMAT_DESCRIPTION_EVENT) ||
+              cache_type == Log_event::EVENT_NO_CACHE);
+
+  DBUG_RETURN(ret);
+}
+
+bool Log_event::wrapper_my_b_safe_write(IO_CACHE* file, const uchar* buf, ulong size)
+{
+  if (need_checksum() && size != 0)
+    crc= my_checksum(crc, buf, size);
+
+  return my_b_safe_write(file, buf, size);
+}
+
+bool Log_event::write_footer(IO_CACHE* file) 
+{
+  /*
+     footer contains the checksum-algorithm descriptor 
+     followed by the checksum value
+  */
+  if (need_checksum())
+  {
+    uchar buf[BINLOG_CHECKSUM_LEN];
+    int4store(buf, crc);
+    return (my_b_safe_write(file, (uchar*) buf, sizeof(buf)));
+  }
+  return 0;
+}
 
 /*
   Log_event::write()
@@ -902,11 +1044,18 @@ bool Log_event::write_header(IO_CACHE* file, ulong event_data_length)
 {
   uchar header[LOG_EVENT_HEADER_LEN];
   ulong now;
+  bool ret;
   DBUG_ENTER("Log_event::write_header");
 
   /* Store number of bytes that will be written by this event */
   data_written= event_data_length + sizeof(header);
 
+  if (need_checksum())
+  {
+    crc= my_checksum(0L, NULL, 0);
+    data_written += BINLOG_CHECKSUM_LEN;
+  }
+
   /*
     log_pos != 0 if this is relay-log event. In this case we should not
     change the position
@@ -951,7 +1100,7 @@ bool Log_event::write_header(IO_CACHE* file, ulong event_data_length)
     log_pos= my_b_safe_tell(file)+data_written;
   }
 
-  now= (ulong) get_time();                              // Query start time
+  now= get_time();                               // Query start time
 
   /*
     Header will be of size LOG_EVENT_HEADER_LEN for all events, except for
@@ -965,9 +1114,36 @@ bool Log_event::write_header(IO_CACHE* file, ulong event_data_length)
   int4store(header+ SERVER_ID_OFFSET, server_id);
   int4store(header+ EVENT_LEN_OFFSET, data_written);
   int4store(header+ LOG_POS_OFFSET, log_pos);
-  int2store(header+ FLAGS_OFFSET, flags);
-
-  DBUG_RETURN(my_b_safe_write(file, header, sizeof(header)) != 0);
+  /*
+    recording checksum of FD event computed with dropped
+    possibly active LOG_EVENT_BINLOG_IN_USE_F flag.
+    Similar step at verication: the active flag is dropped before
+    checksum computing.
+  */
+  if (header[EVENT_TYPE_OFFSET] != FORMAT_DESCRIPTION_EVENT ||
+      !need_checksum() || !(flags & LOG_EVENT_BINLOG_IN_USE_F))
+  {
+    int2store(header+ FLAGS_OFFSET, flags);
+    ret= wrapper_my_b_safe_write(file, header, sizeof(header)) != 0;
+  }
+  else
+  {
+    ret= (wrapper_my_b_safe_write(file, header, FLAGS_OFFSET) != 0);
+    if (!ret)
+    {
+      flags &= ~LOG_EVENT_BINLOG_IN_USE_F;
+      int2store(header + FLAGS_OFFSET, flags);
+      crc= my_checksum(crc, header + FLAGS_OFFSET, sizeof(flags));
+      flags |= LOG_EVENT_BINLOG_IN_USE_F;    
+      int2store(header + FLAGS_OFFSET, flags);
+      ret= (my_b_safe_write(file, header + FLAGS_OFFSET, sizeof(flags)) != 0);
+    }
+    if (!ret)
+      ret= (wrapper_my_b_safe_write(file, header + FLAGS_OFFSET + sizeof(flags),
+                                    sizeof(header)
+                                    - (FLAGS_OFFSET + sizeof(flags))) != 0);
+  }
+  DBUG_RETURN( ret);
 }
 
 
@@ -977,11 +1153,13 @@ bool Log_event::write_header(IO_CACHE* file, ulong event_data_length)
 */
 
 int Log_event::read_log_event(IO_CACHE* file, String* packet,
-			      pthread_mutex_t* log_lock)
+			      pthread_mutex_t* log_lock,
+                              uint8 checksum_alg_arg)
 {
   ulong data_len;
   int result=0;
   char buf[LOG_EVENT_MINIMAL_HEADER_LEN];
+  uchar ev_offset= packet->length();
   DBUG_ENTER("Log_event::read_log_event");
 
   if (log_lock)
@@ -1039,6 +1217,31 @@ int Log_event::read_log_event(IO_CACHE* file, String* packet,
                (file->error >= 0 ? LOG_READ_TRUNC: LOG_READ_IO));
       /* Implicit goto end; */
     }
+    else
+    {
+      /* Corrupt the event for Dump thread*/
+      DBUG_EXECUTE_IF("corrupt_read_log_event2",
+	uchar *debug_event_buf_c = (uchar*) packet->ptr() + ev_offset;
+        if (debug_event_buf_c[EVENT_TYPE_OFFSET] != FORMAT_DESCRIPTION_EVENT)
+        {
+          int debug_cor_pos = rand() % (data_len + sizeof(buf) - BINLOG_CHECKSUM_LEN);
+          debug_event_buf_c[debug_cor_pos] =~ debug_event_buf_c[debug_cor_pos];
+          DBUG_PRINT("info", ("Corrupt the event at Log_event::read_log_event: byte on position %d", debug_cor_pos));
+          DBUG_SET("-d,corrupt_read_log_event2");
+	}
+      );                                                                                           
+      /*
+        CRC verification of the Dump thread
+      */
+      if (opt_master_verify_checksum &&
+          event_checksum_test((uchar*) packet->ptr() + ev_offset,
+                              data_len + sizeof(buf),
+                              checksum_alg_arg))
+      {
+        result= LOG_READ_CHECKSUM_FAILURE;
+        goto end;
+      }
+    }
   }
 
 end:
@@ -1064,11 +1267,13 @@ end:
 Log_event* Log_event::read_log_event(IO_CACHE* file,
 				     pthread_mutex_t* log_lock,
                                      const Format_description_log_event
-                                     *description_event)
+                                     *description_event,
+                                     my_bool crc_check)
 #else
 Log_event* Log_event::read_log_event(IO_CACHE* file,
                                      const Format_description_log_event
-                                     *description_event)
+                                     *description_event,
+                                     my_bool crc_check)
 #endif
 {
   DBUG_ENTER("Log_event::read_log_event");
@@ -1132,7 +1337,7 @@ failed my_b_read"));
     error = "read error";
     goto err;
   }
-  if ((res= read_log_event(buf, data_len, &error, description_event)))
+  if ((res= read_log_event(buf, data_len, &error, description_event, crc_check)))
     res->register_temp_buf(buf, TRUE);
 
 err:
@@ -1165,9 +1370,11 @@ err:
 
 Log_event* Log_event::read_log_event(const char* buf, uint event_len,
 				     const char **error,
-                                     const Format_description_log_event *description_event)
+                                     const Format_description_log_event *description_event,
+                                     my_bool crc_check)
 {
   Log_event* ev;
+  uint8 alg;
   DBUG_ENTER("Log_event::read_log_event(char*,...)");
   DBUG_ASSERT(description_event != 0);
   DBUG_PRINT("info", ("binlog_version: %d", description_event->binlog_version));
@@ -1175,14 +1382,68 @@ Log_event* Log_event::read_log_event(const char* buf, uint event_len,
 
   /* Check the integrity */
   if (event_len < EVENT_LEN_OFFSET ||
-      buf[EVENT_TYPE_OFFSET] >= ENUM_END_EVENT ||
+      (uchar)buf[EVENT_TYPE_OFFSET] >= ENUM_END_EVENT ||
       (uint) event_len != uint4korr(buf+EVENT_LEN_OFFSET))
   {
     *error="Sanity check failed";		// Needed to free buffer
     DBUG_RETURN(NULL); // general sanity check - will fail on a partial read
   }
 
-  uint event_type= buf[EVENT_TYPE_OFFSET];
+  uint event_type= (uchar)buf[EVENT_TYPE_OFFSET];
+  // all following START events in the current file are without checksum
+  if (event_type == START_EVENT_V3)
+    (const_cast< Format_description_log_event *>(description_event))->checksum_alg= BINLOG_CHECKSUM_ALG_OFF;
+  /*
+    CRC verification by SQL and Show-Binlog-Events master side.
+    The caller has to provide @description_event->checksum_alg to
+    be the last seen FD's (A) descriptor.
+    If event is FD the descriptor is in it.
+    Notice, FD of the binlog can be only in one instance and therefore
+    Show-Binlog-Events executing master side thread needs just to know
+    the only FD's (A) value -  whereas RL can contain more.
+    In the RL case, the alg is kept in FD_e (@description_event) which is reset 
+    to the newer read-out event after its execution with possibly new alg descriptor.
+    Therefore in a typical sequence of RL:
+    {FD_s^0, FD_m, E_m^1} E_m^1 
+    will be verified with (A) of FD_m.
+
+    See legends definition on MYSQL_BIN_LOG::relay_log_checksum_alg docs
+    lines (log.h).
+
+    Notice, a pre-checksum FD version forces alg := BINLOG_CHECKSUM_ALG_UNDEF.
+  */
+  alg= (event_type != FORMAT_DESCRIPTION_EVENT) ?
+    description_event->checksum_alg : get_checksum_alg(buf, event_len);
+  // Emulate the corruption during reading an event
+  DBUG_EXECUTE_IF("corrupt_read_log_event_char",
+    if (event_type != FORMAT_DESCRIPTION_EVENT)
+    {
+      char *debug_event_buf_c = (char *)buf;
+      int debug_cor_pos = rand() % (event_len - BINLOG_CHECKSUM_LEN);
+      debug_event_buf_c[debug_cor_pos] =~ debug_event_buf_c[debug_cor_pos];
+      DBUG_PRINT("info", ("Corrupt the event at Log_event::read_log_event(char*,...): byte on position %d", debug_cor_pos));
+      DBUG_SET("-d,corrupt_read_log_event_char");
+    }
+  );                                                 
+  if (crc_check &&
+      event_checksum_test((uchar *) buf, event_len, alg))
+  {
+#ifdef MYSQL_CLIENT
+    *error= "Event crc check failed! Most likely there is event corruption.";
+    if (force_opt)
+    {
+      ev= new Unknown_log_event(buf, description_event);
+      DBUG_RETURN(ev);
+    }
+    else
+      DBUG_RETURN(NULL);
+#else
+    *error= ER(ER_BINLOG_READ_EVENT_CHECKSUM_FAILURE);
+    sql_print_error("%s", ER(ER_BINLOG_READ_EVENT_CHECKSUM_FAILURE));
+    DBUG_RETURN(NULL);
+#endif
+  }
+
   if (event_type > description_event->number_of_event_types &&
       event_type != FORMAT_DESCRIPTION_EVENT)
   {
@@ -1221,6 +1482,11 @@ Log_event* Log_event::read_log_event(const char* buf, uint event_len,
       event_type= description_event->event_type_permutation[event_type];
     }
 
+    if (alg != BINLOG_CHECKSUM_ALG_UNDEF &&
+        (event_type == FORMAT_DESCRIPTION_EVENT ||
+         alg != BINLOG_CHECKSUM_ALG_OFF))
+      event_len= event_len - BINLOG_CHECKSUM_LEN;
+    
     switch(event_type) {
     case QUERY_EVENT:
       ev  = new Query_log_event(buf, event_len, description_event, QUERY_EVENT);
@@ -1304,6 +1570,9 @@ Log_event* Log_event::read_log_event(const char* buf, uint event_len,
     case INCIDENT_EVENT:
       ev = new Incident_log_event(buf, event_len, description_event);
       break;
+    case ANNOTATE_ROWS_EVENT:
+      ev = new Annotate_rows_log_event(buf, event_len, description_event);
+      break;
     default:
       DBUG_PRINT("error",("Unknown event code: %d",
                           (int) buf[EVENT_TYPE_OFFSET]));
@@ -1312,6 +1581,14 @@ Log_event* Log_event::read_log_event(const char* buf, uint event_len,
     }
   }
 
+  if (ev)
+  {
+    ev->checksum_alg= alg;
+    if (ev->checksum_alg != BINLOG_CHECKSUM_ALG_OFF &&
+        ev->checksum_alg != BINLOG_CHECKSUM_ALG_UNDEF)
+      ev->crc= uint4korr(buf + (event_len));
+  }
+
   DBUG_PRINT("read_event", ("%s(type_code: %d; event_len: %d)",
                             ev ? ev->get_type_str() : "<unknown>",
                             buf[EVENT_TYPE_OFFSET],
@@ -1366,6 +1643,18 @@ void Log_event::print_header(IO_CACHE* file,
   my_b_printf(file, " server id %lu  end_log_pos %s ", (ulong) server_id,
               llstr(log_pos,llbuff));
 
+  /* print the checksum */
+
+  if (checksum_alg != BINLOG_CHECKSUM_ALG_OFF &&
+      checksum_alg != BINLOG_CHECKSUM_ALG_UNDEF)
+  {
+    char checksum_buf[BINLOG_CHECKSUM_LEN * 2 + 4]; // to fit to "0x%lx "
+    size_t const bytes_written=
+      my_snprintf(checksum_buf, sizeof(checksum_buf), "0x%08lx ", (ulong) crc);
+    my_b_printf(file, "%s ", get_type(&binlog_checksum_typelib, checksum_alg));
+    my_b_printf(file, checksum_buf, bytes_written);
+  }
+
   /* mysqlbinlog --hexdump */
   if (print_event_info->hexdump_from)
   {
@@ -1734,6 +2023,7 @@ beg:
       uint64 i64= uint8korr(ptr); /* YYYYMMDDhhmmss */
       d= (ulong) (i64 / 1000000);
       t= (ulong) (i64 % 1000000);
+
       my_b_printf(file, "%04d-%02d-%02d %02d:%02d:%02d",
                   (int) (d / 10000), (int) (d % 10000) / 100, (int) (d % 100),
                   (int) (t / 10000), (int) (t % 10000) / 100, (int) t % 100);
@@ -1989,12 +2279,10 @@ end:
   delete td;
 }
 
-#ifdef MYSQL_CLIENT
 void free_table_map_log_event(Table_map_log_event *event)
 {
   delete event;
 }
-#endif
 
 void Log_event::print_base64(IO_CACHE* file,
                              PRINT_EVENT_INFO* print_event_info,
@@ -2031,6 +2319,9 @@ void Log_event::print_base64(IO_CACHE* file,
   if (print_event_info->verbose)
   {
     Rows_log_event *ev= NULL;
+    if (checksum_alg != BINLOG_CHECKSUM_ALG_UNDEF &&
+        checksum_alg != BINLOG_CHECKSUM_ALG_OFF)
+      size-= BINLOG_CHECKSUM_LEN; // checksum is displayed through the header
     
     if (ptr[4] == TABLE_MAP_EVENT)
     {
@@ -2074,15 +2365,13 @@ void Log_event::print_base64(IO_CACHE* file,
 void Log_event::print_timestamp(IO_CACHE* file, time_t* ts)
 {
   struct tm *res;
+  time_t my_when= when;
   DBUG_ENTER("Log_event::print_timestamp");
   if (!ts)
-    ts = &when;
-#ifdef MYSQL_SERVER				// This is always false
-  struct tm tm_tmp;
-  localtime_r(ts,(res= &tm_tmp));
-#else
+  {
+    ts = &my_when;
+  }
   res=localtime(ts);
-#endif
 
   my_b_printf(file,"%02d%02d%02d %2d:%02d:%02d",
               res->tm_year % 100,
@@ -2366,6 +2655,15 @@ bool Query_log_event::write(IO_CACHE* file)
       memcpy(start, host.str, host.length);
       start+= host.length;
     }
+
+  }
+
+  if (thd && thd->query_start_sec_part_used)
+  {
+    *start++= Q_HRNOW;
+    get_time();
+    int3store(start, when_sec_part);
+    start+= 3;
   }
   /*
     NOTE: When adding new status vars, please don't forget to update
@@ -2393,12 +2691,13 @@ bool Query_log_event::write(IO_CACHE* file)
   event_length= (uint) (start-buf) + get_post_header_size_for_derived() + db_len + 1 + q_len;
 
   return (write_header(file, event_length) ||
-          my_b_safe_write(file, (uchar*) buf, QUERY_HEADER_LEN) ||
+          wrapper_my_b_safe_write(file, (uchar*) buf, QUERY_HEADER_LEN) ||
           write_post_header_for_derived(file) ||
-          my_b_safe_write(file, (uchar*) start_of_status,
+          wrapper_my_b_safe_write(file, (uchar*) start_of_status,
                           (uint) (start-start_of_status)) ||
-          my_b_safe_write(file, (db) ? (uchar*) db : (uchar*)"", db_len + 1) ||
-          my_b_safe_write(file, (uchar*) query, q_len)) ? 1 : 0;
+          wrapper_my_b_safe_write(file, (db) ? (uchar*) db : (uchar*)"", db_len + 1) ||
+          wrapper_my_b_safe_write(file, (uchar*) query, q_len) ||
+	  write_footer(file)) ? 1 : 0;
 }
 
 /**
@@ -2458,7 +2757,7 @@ Query_log_event::Query_log_event(THD* thd_arg, const char* query_arg,
 
   error_code= errcode;
 
-  time(&end_time);
+  end_time= my_time(0);
   exec_time = (ulong) (end_time  - thd_arg->start_time);
   /**
     @todo this means that if we have no catalog, then it is replicated
@@ -2593,6 +2892,7 @@ code_name(int code)
   case Q_CHARSET_DATABASE_CODE: return "Q_CHARSET_DATABASE_CODE";
   case Q_TABLE_MAP_FOR_UPDATE_CODE: return "Q_TABLE_MAP_FOR_UPDATE_CODE";
   case Q_MASTER_DATA_WRITTEN_CODE: return "Q_MASTER_DATA_WRITTEN_CODE";
+  case Q_HRNOW: return "Q_HRNOW";
   }
   sprintf(buf, "CODE#%d", code);
   return buf;
@@ -2809,6 +3109,14 @@ Query_log_event::Query_log_event(const char* buf, uint event_len,
       CHECK_SPACE(pos, end, host.length);
       host.str= (char *)pos;
       pos+= host.length;
+      break;
+    }
+    case Q_HRNOW:
+    {
+      CHECK_SPACE(pos, end, 3);
+      when_sec_part= uint3korr(pos);
+      pos+= 3;
+      break;
     }
     default:
       /* That's why you must write status vars in growing order of code */
@@ -2888,7 +3196,7 @@ void Query_log_event::print_query_header(IO_CACHE* file,
 					 PRINT_EVENT_INFO* print_event_info)
 {
   // TODO: print the catalog ??
-  char buff[40],*end;				// Enough for SET TIMESTAMP
+  char buff[64], *end;				// Enough for SET TIMESTAMP
   bool different_db= 1;
   uint32 tmp;
 
@@ -2915,6 +3223,11 @@ void Query_log_event::print_query_header(IO_CACHE* file,
   }
 
   end=int10_to_str((long) when, strmov(buff,"SET TIMESTAMP="),10);
+  if (when_sec_part)
+  {
+    *end++= '.';
+    end=int10_to_str(when_sec_part, end, 10);
+  }
   end= strmov(end, print_event_info->delimiter);
   *end++='\n';
   my_b_write(file, (uchar*) buff, (uint) (end-buff));
@@ -3176,7 +3489,7 @@ int Query_log_event::do_apply_event(Relay_log_info const *rli,
   */
   if (is_trans_keyword() || rpl_filter->db_ok(thd->db))
   {
-    thd->set_time((time_t)when);
+    thd->set_time(when, when_sec_part);
     thd->set_query((char*)query_arg, q_len_arg);
     VOID(pthread_mutex_lock(&LOCK_thread_count));
     thd->query_id = next_query_id();
@@ -3381,7 +3694,7 @@ Default database: '%s'. Query: '%s'",
     {
       DBUG_PRINT("info",("error ignored"));
       clear_all_errors(thd, const_cast<Relay_log_info*>(rli));
-      thd->killed= THD::NOT_KILLED;
+      thd->killed= NOT_KILLED;
       /*
         When an error is expected and matches the actual error the
         slave does not report any error and by consequence changes
@@ -3623,10 +3936,11 @@ bool Start_log_event_v3::write(IO_CACHE* file)
   int2store(buff + ST_BINLOG_VER_OFFSET,binlog_version);
   memcpy(buff + ST_SERVER_VER_OFFSET,server_version,ST_SERVER_VER_LEN);
   if (!dont_set_created)
-    created= when= get_time();
+    created= get_time(); // this sets when and when_sec_part as a side effect
   int4store(buff + ST_CREATED_OFFSET,created);
   return (write_header(file, sizeof(buff)) ||
-          my_b_safe_write(file, (uchar*) buff, sizeof(buff)));
+          wrapper_my_b_safe_write(file, (uchar*) buff, sizeof(buff)) ||
+	  write_footer(file));
 }
 #endif
 
@@ -3728,6 +4042,7 @@ int Start_log_event_v3::do_apply_event(Relay_log_info const *rli)
                                 old 4.0 (binlog version 2) is not supported;
                                 it should not be used for replication with
                                 5.0.
+  @param server_ver             a string containing the server version.
 */
 
 Format_description_log_event::
@@ -3743,9 +4058,9 @@ Format_description_log_event(uint8 binlog_ver, const char* server_ver)
     common_header_len= LOG_EVENT_HEADER_LEN;
     number_of_event_types= LOG_EVENT_TYPES;
     /* we'll catch my_malloc() error in is_valid() */
-    post_header_len=(uint8*) my_malloc(number_of_event_types*sizeof(uint8),
+    post_header_len=(uint8*) my_malloc(number_of_event_types*sizeof(uint8)
+                                       + BINLOG_CHECKSUM_ALG_DESC_LEN,
                                        MYF(0));
-
     /*
       This long list of assignments is not beautiful, but I see no way to
       make it nicer, as the right members are #defines, not array members, so
@@ -3808,6 +4123,13 @@ Format_description_log_event(uint8 binlog_ver, const char* server_ver)
                       post_header_len[DELETE_ROWS_EVENT-1]= 6;);
       post_header_len[INCIDENT_EVENT-1]= INCIDENT_HEADER_LEN;
 
+      // Set header length of the reserved events to 0
+      memset(post_header_len + MYSQL_EVENTS_END - 1, 0,
+             (MARIA_EVENTS_BEGIN - MYSQL_EVENTS_END)*sizeof(uint8));
+
+      // Set header lengths of Maria events
+      post_header_len[ANNOTATE_ROWS_EVENT-1]= ANNOTATE_ROWS_HEADER_LEN;
+
       // Sanity-check that all post header lengths are initialized.
       IF_DBUG({
           int i;
@@ -3862,6 +4184,7 @@ Format_description_log_event(uint8 binlog_ver, const char* server_ver)
     break;
   }
   calc_server_version_split();
+  checksum_alg= (uint8) BINLOG_CHECKSUM_ALG_UNDEF;
 }
 
 
@@ -3896,14 +4219,26 @@ Format_description_log_event(const char* buf,
   if ((common_header_len=buf[ST_COMMON_HEADER_LEN_OFFSET]) < OLD_HEADER_LEN)
     DBUG_VOID_RETURN; /* sanity check */
   number_of_event_types=
-    event_len-(LOG_EVENT_MINIMAL_HEADER_LEN+ST_COMMON_HEADER_LEN_OFFSET+1);
+    event_len - (LOG_EVENT_MINIMAL_HEADER_LEN + ST_COMMON_HEADER_LEN_OFFSET + 1);
   DBUG_PRINT("info", ("common_header_len=%d number_of_event_types=%d",
                       common_header_len, number_of_event_types));
   /* If alloc fails, we'll detect it in is_valid() */
+
   post_header_len= (uint8*) my_memdup((uchar*)buf+ST_COMMON_HEADER_LEN_OFFSET+1,
                                       number_of_event_types*
-                                      sizeof(*post_header_len), MYF(0));
+                                      sizeof(*post_header_len),
+                                      MYF(0));
   calc_server_version_split();
+  if (!is_version_before_checksum(&server_version_split))
+  {
+    /* the last bytes are the checksum alg desc and value (or value's room) */
+    number_of_event_types -= BINLOG_CHECKSUM_ALG_DESC_LEN;
+    checksum_alg= post_header_len[number_of_event_types];
+  }
+  else
+  {
+    checksum_alg= (uint8) BINLOG_CHECKSUM_ALG_UNDEF;
+  }
 
   /*
     In some previous versions, the events were given other event type
@@ -4014,21 +4349,59 @@ Format_description_log_event(const char* buf,
 #ifndef MYSQL_CLIENT
 bool Format_description_log_event::write(IO_CACHE* file)
 {
+  bool ret;
+  bool no_checksum;
   /*
     We don't call Start_log_event_v3::write() because this would make 2
     my_b_safe_write().
   */
-  uchar buff[FORMAT_DESCRIPTION_HEADER_LEN];
+  uchar buff[FORMAT_DESCRIPTION_HEADER_LEN + BINLOG_CHECKSUM_ALG_DESC_LEN];
+  size_t rec_size= sizeof(buff);
   int2store(buff + ST_BINLOG_VER_OFFSET,binlog_version);
   memcpy((char*) buff + ST_SERVER_VER_OFFSET,server_version,ST_SERVER_VER_LEN);
   if (!dont_set_created)
-    created= when= get_time();
+    created= get_time();
   int4store(buff + ST_CREATED_OFFSET,created);
   buff[ST_COMMON_HEADER_LEN_OFFSET]= LOG_EVENT_HEADER_LEN;
-  memcpy((char*) buff+ST_COMMON_HEADER_LEN_OFFSET+1, (uchar*) post_header_len,
+  memcpy((char*) buff+ST_COMMON_HEADER_LEN_OFFSET + 1, (uchar*) post_header_len,
          LOG_EVENT_TYPES);
-  return (write_header(file, sizeof(buff)) ||
-          my_b_safe_write(file, buff, sizeof(buff)));
+  /*
+    if checksum is requested
+    record the checksum-algorithm descriptor next to
+    post_header_len vector which will be followed by the checksum value.
+    Master is supposed to trigger checksum computing by binlog_checksum_options,
+    slave does it via marking the event according to
+    FD_queue checksum_alg value.
+  */
+  compile_time_assert(sizeof(BINLOG_CHECKSUM_ALG_DESC_LEN == 1));
+#ifndef DBUG_OFF
+  data_written= 0; // to prepare for need_checksum assert
+#endif
+  buff[FORMAT_DESCRIPTION_HEADER_LEN]= need_checksum() ?
+    checksum_alg : (uint8) BINLOG_CHECKSUM_ALG_OFF;
+  /* 
+     FD of checksum-aware server is always checksum-equipped, (V) is in,
+     regardless of @@global.binlog_checksum policy.
+     Thereby a combination of (A) == 0, (V) != 0 means
+     it's the checksum-aware server's FD event that heads checksum-free binlog
+     file. 
+     Here 0 stands for checksumming OFF to evaluate (V) as 0 is that case.
+     A combination of (A) != 0, (V) != 0 denotes FD of the checksum-aware server
+     heading the checksummed binlog.
+     (A), (V) presence in FD of the checksum-aware server makes the event
+     1 + 4 bytes bigger comparing to the former FD.
+  */
+
+  if ((no_checksum= (checksum_alg == BINLOG_CHECKSUM_ALG_OFF)))
+  {
+    checksum_alg= BINLOG_CHECKSUM_ALG_CRC32;  // Forcing (V) room to fill anyway
+  }
+  ret= (write_header(file, rec_size) ||
+        wrapper_my_b_safe_write(file, buff, rec_size) ||
+        write_footer(file));
+  if (no_checksum)
+    checksum_alg= BINLOG_CHECKSUM_ALG_OFF;
+  return ret;
 }
 #endif
 
@@ -4124,6 +4497,30 @@ Format_description_log_event::do_shall_skip(Relay_log_info *rli)
 
 #endif
 
+static inline void
+do_server_version_split(char* version,
+                        Format_description_log_event::master_version_split *split_versions)
+{
+  char *p= version, *r;
+  ulong number;
+  for (uint i= 0; i<=2; i++)
+  {
+    number= strtoul(p, &r, 10);
+    split_versions->ver[i]= (uchar) number;
+    DBUG_ASSERT(number < 256); // fit in uchar
+    p= r;
+    DBUG_ASSERT(!((i == 0) && (*r != '.'))); // should be true in practice
+    if (*r == '.')
+      p++; // skip the dot
+  }
+  if (strinstr(p, "MariaDB") != 0 || strinstr(p, "-maria-") != 0)
+    split_versions->kind=
+      Format_description_log_event::master_version_split::KIND_MARIADB;
+  else
+    split_versions->kind=
+      Format_description_log_event::master_version_split::KIND_MYSQL;
+}
+
 
 /**
    Splits the event's 'server_version' string into three numeric pieces stored
@@ -4136,24 +4533,67 @@ Format_description_log_event::do_shall_skip(Relay_log_info *rli)
 */
 void Format_description_log_event::calc_server_version_split()
 {
-  char *p= server_version, *r;
-  ulong number;
-  for (uint i= 0; i<=2; i++)
-  {
-    number= strtoul(p, &r, 10);
-    server_version_split[i]= (uchar)number;
-    DBUG_ASSERT(number < 256); // fit in uchar
-    p= r;
-    DBUG_ASSERT(!((i == 0) && (*r != '.'))); // should be true in practice
-    if (*r == '.')
-      p++; // skip the dot
-  }
+  do_server_version_split(server_version, &server_version_split);
+
   DBUG_PRINT("info",("Format_description_log_event::server_version_split:"
                      " '%s' %d %d %d", server_version,
-                     server_version_split[0],
-                     server_version_split[1], server_version_split[2]));
+                     server_version_split.ver[0],
+                     server_version_split.ver[1], server_version_split.ver[2]));
+}
+
+static inline ulong
+version_product(const Format_description_log_event::master_version_split* version_split)
+{
+  return ((version_split->ver[0] * 256 + version_split->ver[1]) * 256
+          + version_split->ver[2]);
 }
 
+/**
+   @return TRUE is the event's version is earlier than one that introduced
+   the replication event checksum. FALSE otherwise.
+*/
+bool
+Format_description_log_event::is_version_before_checksum(master_version_split
+                                                         *version_split)
+{
+  return version_product(version_split) <
+    (version_split->kind == master_version_split::KIND_MARIADB ?
+     checksum_version_product_mariadb : checksum_version_product_mysql);
+}
+
+/**
+   @param buf buffer holding serialized FD event
+   @param len netto (possible checksum is stripped off) length of the event buf
+   
+   @return  the version-safe checksum alg descriptor where zero
+            designates no checksum, 255 - the orginator is
+            checksum-unaware (effectively no checksum) and the actuall
+            [1-254] range alg descriptor.
+*/
+uint8 get_checksum_alg(const char* buf, ulong len)
+{
+  uint8 ret;
+  char version[ST_SERVER_VER_LEN];
+  Format_description_log_event::master_version_split version_split;
+
+  DBUG_ENTER("get_checksum_alg");
+  DBUG_ASSERT(buf[EVENT_TYPE_OFFSET] == FORMAT_DESCRIPTION_EVENT);
+
+  memcpy(version, buf +
+         buf[LOG_EVENT_MINIMAL_HEADER_LEN + ST_COMMON_HEADER_LEN_OFFSET]
+         + ST_SERVER_VER_OFFSET, ST_SERVER_VER_LEN);
+  version[ST_SERVER_VER_LEN - 1]= 0;
+  
+  do_server_version_split(version, &version_split);
+  ret= Format_description_log_event::is_version_before_checksum(&version_split) ?
+    (uint8) BINLOG_CHECKSUM_ALG_UNDEF :
+    * (uint8*) (buf + len - BINLOG_CHECKSUM_LEN - BINLOG_CHECKSUM_ALG_DESC_LEN);
+  DBUG_ASSERT(ret == BINLOG_CHECKSUM_ALG_OFF ||
+              ret == BINLOG_CHECKSUM_ALG_UNDEF ||
+              ret == BINLOG_CHECKSUM_ALG_CRC32);
+  DBUG_RETURN(ret);
+}
+  
 
   /**************************************************************************
         Load_log_event methods
@@ -4452,8 +4892,8 @@ Load_log_event::Load_log_event(const char *buf, uint event_len,
   */
   if (event_len)
     copy_log_event(buf, event_len,
-                   ((buf[EVENT_TYPE_OFFSET] == LOAD_EVENT) ?
-                    LOAD_HEADER_LEN + 
+                   (((uchar)buf[EVENT_TYPE_OFFSET] == LOAD_EVENT) ?
+                   LOAD_HEADER_LEN + 
                     description_event->common_header_len :
                     LOAD_HEADER_LEN + LOG_EVENT_HEADER_LEN),
                    description_event);
@@ -4490,7 +4930,7 @@ int Load_log_event::copy_log_event(const char *buf, ulong event_len,
   */
   if (!(field_lens= (uchar*)sql_ex.init((char*)buf + body_offset,
                                         buf_end,
-                                        buf[EVENT_TYPE_OFFSET] != LOAD_EVENT)))
+                                        (uchar)buf[EVENT_TYPE_OFFSET] != LOAD_EVENT)))
     DBUG_RETURN(1);
   
   data_len = event_len - body_offset;
@@ -4728,7 +5168,7 @@ int Load_log_event::do_apply_event(NET* net, Relay_log_info const *rli,
   */
   if (rpl_filter->db_ok(thd->db))
   {
-    thd->set_time((time_t)when);
+    thd->set_time(when, when_sec_part);
     VOID(pthread_mutex_lock(&LOCK_thread_count));
     thd->query_id = next_query_id();
     VOID(pthread_mutex_unlock(&LOCK_thread_count));
@@ -4997,6 +5437,7 @@ Rotate_log_event::Rotate_log_event(const char* new_log_ident_arg,
   DBUG_PRINT("enter",("new_log_ident: %s  pos: %s  flags: %lu", new_log_ident_arg,
                       llstr(pos_arg, buff), (ulong) flags));
 #endif
+  cache_type= EVENT_NO_CACHE;
   if (flags & DUP_NAME)
     new_log_ident= my_strndup(new_log_ident_arg, ident_len, MYF(MY_WME));
   if (flags & RELAY_LOG)
@@ -5038,9 +5479,11 @@ bool Rotate_log_event::write(IO_CACHE* file)
 {
   char buf[ROTATE_HEADER_LEN];
   int8store(buf + R_POS_OFFSET, pos);
-  return (write_header(file, ROTATE_HEADER_LEN + ident_len) ||
-          my_b_safe_write(file, (uchar*)buf, ROTATE_HEADER_LEN) ||
-          my_b_safe_write(file, (uchar*)new_log_ident, (uint) ident_len));
+  return (write_header(file, ROTATE_HEADER_LEN + ident_len) || 
+          wrapper_my_b_safe_write(file, (uchar*) buf, ROTATE_HEADER_LEN) ||
+          wrapper_my_b_safe_write(file, (uchar*) new_log_ident,
+                                     (uint) ident_len) ||
+          write_footer(file));
 }
 #endif
 
@@ -5209,7 +5652,8 @@ bool Intvar_log_event::write(IO_CACHE* file)
   buf[I_TYPE_OFFSET]= (uchar) type;
   int8store(buf + I_VAL_OFFSET, val);
   return (write_header(file, sizeof(buf)) ||
-          my_b_safe_write(file, buf, sizeof(buf)));
+          wrapper_my_b_safe_write(file, buf, sizeof(buf)) ||
+	  write_footer(file));
 }
 #endif
 
@@ -5337,7 +5781,8 @@ bool Rand_log_event::write(IO_CACHE* file)
   int8store(buf + RAND_SEED1_OFFSET, seed1);
   int8store(buf + RAND_SEED2_OFFSET, seed2);
   return (write_header(file, sizeof(buf)) ||
-          my_b_safe_write(file, buf, sizeof(buf)));
+          wrapper_my_b_safe_write(file, buf, sizeof(buf)) ||
+	  write_footer(file));
 }
 #endif
 
@@ -5439,8 +5884,9 @@ Xid_log_event(const char* buf,
 bool Xid_log_event::write(IO_CACHE* file)
 {
   DBUG_EXECUTE_IF("do_not_write_xid", return 0;);
-  return write_header(file, sizeof(xid)) ||
-         my_b_safe_write(file, (uchar*) &xid, sizeof(xid));
+  return (write_header(file, sizeof(xid)) ||
+	  wrapper_my_b_safe_write(file, (uchar*) &xid, sizeof(xid)) ||
+	  write_footer(file));
 }
 #endif
 
@@ -5659,10 +6105,11 @@ bool User_var_log_event::write(IO_CACHE* file)
   event_length= sizeof(buf)+ name_len + buf1_length + val_len;
 
   return (write_header(file, event_length) ||
-          my_b_safe_write(file, (uchar*) buf, sizeof(buf))   ||
-	  my_b_safe_write(file, (uchar*) name, name_len)     ||
-	  my_b_safe_write(file, (uchar*) buf1, buf1_length) ||
-	  my_b_safe_write(file, pos, val_len));
+          wrapper_my_b_safe_write(file, (uchar*) buf, sizeof(buf))   ||
+	  wrapper_my_b_safe_write(file, (uchar*) name, name_len)     ||
+	  wrapper_my_b_safe_write(file, (uchar*) buf1, buf1_length) ||
+	  wrapper_my_b_safe_write(file, pos, val_len) ||
+          write_footer(file));
 }
 #endif
 
@@ -6185,7 +6632,7 @@ Create_file_log_event::Create_file_log_event(const char* buf, uint len,
   uint8 create_file_header_len= description_event->post_header_len[CREATE_FILE_EVENT-1];
   if (!(event_buf= (char*) my_memdup(buf, len, MYF(MY_WME))) ||
       copy_log_event(event_buf,len,
-                     ((buf[EVENT_TYPE_OFFSET] == LOAD_EVENT) ?
+                     (((uchar)buf[EVENT_TYPE_OFFSET] == LOAD_EVENT) ?
                       load_header_len + header_len :
                       (fake_base ? (header_len+load_header_len) :
                        (header_len+load_header_len) +
@@ -6420,8 +6867,9 @@ bool Append_block_log_event::write(IO_CACHE* file)
   uchar buf[APPEND_BLOCK_HEADER_LEN];
   int4store(buf + AB_FILE_ID_OFFSET, file_id);
   return (write_header(file, APPEND_BLOCK_HEADER_LEN + block_len) ||
-          my_b_safe_write(file, buf, APPEND_BLOCK_HEADER_LEN) ||
-	  my_b_safe_write(file, (uchar*) block, block_len));
+          wrapper_my_b_safe_write(file, buf, APPEND_BLOCK_HEADER_LEN) ||
+	  wrapper_my_b_safe_write(file, (uchar*) block, block_len) ||
+	  write_footer(file));
 }
 #endif
 
@@ -6575,7 +7023,8 @@ bool Delete_file_log_event::write(IO_CACHE* file)
  uchar buf[DELETE_FILE_HEADER_LEN];
  int4store(buf + DF_FILE_ID_OFFSET, file_id);
  return (write_header(file, sizeof(buf)) ||
-         my_b_safe_write(file, buf, sizeof(buf)));
+         wrapper_my_b_safe_write(file, buf, sizeof(buf)) ||
+	 write_footer(file));
 }
 #endif
 
@@ -6672,7 +7121,8 @@ bool Execute_load_log_event::write(IO_CACHE* file)
   uchar buf[EXEC_LOAD_HEADER_LEN];
   int4store(buf + EL_FILE_ID_OFFSET, file_id);
   return (write_header(file, sizeof(buf)) || 
-          my_b_safe_write(file, buf, sizeof(buf)));
+          wrapper_my_b_safe_write(file, buf, sizeof(buf)) ||
+	  write_footer(file));
 }
 #endif
 
@@ -6733,16 +7183,17 @@ int Execute_load_log_event::do_apply_event(Relay_log_info const *rli)
                 fname);
     goto err;
   }
-  if (!(lev = (Load_log_event*)Log_event::read_log_event(&file,
-                                                         (pthread_mutex_t*)0,
-                                                         rli->relay_log.description_event_for_exec)) ||
+  if (!(lev= (Load_log_event*)
+        Log_event::read_log_event(&file,
+                                  (pthread_mutex_t*)0,
+                                  rli->relay_log.description_event_for_exec,
+                                  opt_slave_sql_verify_checksum)) ||
       lev->get_type_code() != NEW_LOAD_EVENT)
   {
     rli->report(ERROR_LEVEL, 0, "Error in Exec_load event: "
                     "file '%s' appears corrupted", fname);
     goto err;
   }
-
   lev->thd = thd;
   /*
     lev->do_apply_event should use rli only for errors i.e. should
@@ -6905,7 +7356,7 @@ Execute_load_query_log_event::write_post_header_for_derived(IO_CACHE* file)
   int4store(buf + 4, fn_pos_start);
   int4store(buf + 4 + 4, fn_pos_end);
   *(buf + 4 + 4 + 4)= (uchar) dup_handling;
-  return my_b_safe_write(file, buf, EXECUTE_LOAD_QUERY_EXTRA_HEADER_LEN);
+  return wrapper_my_b_safe_write(file, buf, EXECUTE_LOAD_QUERY_EXTRA_HEADER_LEN);
 }
 #endif
 
@@ -7141,7 +7592,8 @@ Rows_log_event::Rows_log_event(THD *thd_arg, TABLE *tbl_arg, ulong tid,
     m_width(tbl_arg ? tbl_arg->s->fields : 1),
     m_rows_buf(0), m_rows_cur(0), m_rows_end(0), m_flags(0) 
 #ifdef HAVE_REPLICATION
-    , m_curr_row(NULL), m_curr_row_end(NULL), m_key(NULL)
+    , m_curr_row(NULL), m_curr_row_end(NULL),
+    m_key(NULL), m_key_info(NULL), m_key_nr(0)
 #endif
 {
   /*
@@ -7189,7 +7641,8 @@ Rows_log_event::Rows_log_event(const char *buf, uint event_len,
 #endif
     m_table_id(0), m_rows_buf(0), m_rows_cur(0), m_rows_end(0)
 #if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION)
-    , m_curr_row(NULL), m_curr_row_end(NULL), m_key(NULL)
+    , m_curr_row(NULL), m_curr_row_end(NULL),
+    m_key(NULL), m_key_info(NULL), m_key_nr(0)
 #endif
 {
   DBUG_ENTER("Rows_log_event::Rows_log_event(const char*,...)");
@@ -7534,7 +7987,7 @@ int Rows_log_event::do_apply_event(Relay_log_info const *rli)
       const_cast<Relay_log_info*>(rli)->m_table_map.set_table(ptr->table_id, ptr->table);
     }
 #ifdef HAVE_QUERY_CACHE
-    query_cache.invalidate_locked_for_write(rli->tables_to_lock);
+    query_cache.invalidate_locked_for_write(thd, rli->tables_to_lock);
 #endif
   }
 
@@ -7561,7 +8014,7 @@ int Rows_log_event::do_apply_event(Relay_log_info const *rli)
       TIMESTAMP column to a table with one.
       So we call set_time(), like in SBR. Presently it changes nothing.
     */
-    thd->set_time((time_t)when);
+    thd->set_time(when, when_sec_part);
 
     /*
       Now we are in a statement and will stay in a statement until we
@@ -7871,11 +8324,11 @@ bool Rows_log_event::write_data_header(IO_CACHE *file)
                   {
                     int4store(buf + 0, m_table_id);
                     int2store(buf + 4, m_flags);
-                    return (my_b_safe_write(file, buf, 6));
+                    return (wrapper_my_b_safe_write(file, buf, 6));
                   });
   int6store(buf + RW_MAPID_OFFSET, (ulonglong)m_table_id);
   int2store(buf + RW_FLAGS_OFFSET, m_flags);
-  return (my_b_safe_write(file, buf, ROWS_HEADER_LEN));
+  return (wrapper_my_b_safe_write(file, buf, ROWS_HEADER_LEN));
 }
 
 bool Rows_log_event::write_data_body(IO_CACHE*file)
@@ -7891,10 +8344,10 @@ bool Rows_log_event::write_data_body(IO_CACHE*file)
   DBUG_ASSERT(static_cast<size_t>(sbuf_end - sbuf) <= sizeof(sbuf));
 
   DBUG_DUMP("m_width", sbuf, (size_t) (sbuf_end - sbuf));
-  res= res || my_b_safe_write(file, sbuf, (size_t) (sbuf_end - sbuf));
+  res= res || wrapper_my_b_safe_write(file, sbuf, (size_t) (sbuf_end - sbuf));
 
   DBUG_DUMP("m_cols", (uchar*) m_cols.bitmap, no_bytes_in_map(&m_cols));
-  res= res || my_b_safe_write(file, (uchar*) m_cols.bitmap,
+  res= res || wrapper_my_b_safe_write(file, (uchar*) m_cols.bitmap,
                               no_bytes_in_map(&m_cols));
   /*
     TODO[refactor write]: Remove the "down cast" here (and elsewhere).
@@ -7903,11 +8356,11 @@ bool Rows_log_event::write_data_body(IO_CACHE*file)
   {
     DBUG_DUMP("m_cols_ai", (uchar*) m_cols_ai.bitmap,
               no_bytes_in_map(&m_cols_ai));
-    res= res || my_b_safe_write(file, (uchar*) m_cols_ai.bitmap,
+    res= res || wrapper_my_b_safe_write(file, (uchar*) m_cols_ai.bitmap,
                                 no_bytes_in_map(&m_cols_ai));
   }
   DBUG_DUMP("rows", m_rows_buf, data_size);
-  res= res || my_b_safe_write(file, m_rows_buf, (size_t) data_size);
+  res= res || wrapper_my_b_safe_write(file, m_rows_buf, (size_t) data_size);
 
   return res;
 
@@ -7952,6 +8405,144 @@ void Rows_log_event::print_helper(FILE *file,
 #endif
 
 /**************************************************************************
+	Annotate_rows_log_event member functions
+**************************************************************************/
+
+#ifndef MYSQL_CLIENT
+Annotate_rows_log_event::Annotate_rows_log_event(THD *thd,
+                                                 uint16 cache_type_arg)
+  : Log_event(thd, 0, true),
+    m_save_thd_query_txt(0),
+    m_save_thd_query_len(0)
+{
+  m_query_txt= thd->query();
+  m_query_len= thd->query_length();
+  cache_type= cache_type_arg;
+}
+#endif
+
+Annotate_rows_log_event::Annotate_rows_log_event(const char *buf,
+                                                 uint event_len,
+                                      const Format_description_log_event *desc)
+  : Log_event(buf, desc),
+    m_save_thd_query_txt(0),
+    m_save_thd_query_len(0)
+{
+  m_query_len= event_len - desc->common_header_len;
+  m_query_txt= (char*) buf + desc->common_header_len;
+}
+
+Annotate_rows_log_event::~Annotate_rows_log_event()
+{
+#ifndef MYSQL_CLIENT
+  if (m_save_thd_query_txt)
+    thd->set_query(m_save_thd_query_txt, m_save_thd_query_len);
+#endif
+}
+
+int Annotate_rows_log_event::get_data_size()
+{
+  return m_query_len;
+}
+
+Log_event_type Annotate_rows_log_event::get_type_code()
+{
+  return ANNOTATE_ROWS_EVENT;
+}
+
+bool Annotate_rows_log_event::is_valid() const
+{
+  return (m_query_txt != NULL && m_query_len != 0);
+}
+
+#ifndef MYSQL_CLIENT
+bool Annotate_rows_log_event::write_data_header(IO_CACHE *file)
+{ 
+  return 0;
+}
+#endif
+
+#ifndef MYSQL_CLIENT
+bool Annotate_rows_log_event::write_data_body(IO_CACHE *file)
+{
+  return wrapper_my_b_safe_write(file, (uchar*) m_query_txt, m_query_len);
+}
+#endif
+
+#if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION)
+void Annotate_rows_log_event::pack_info(Protocol* protocol)
+{
+  if (m_query_txt && m_query_len)
+    protocol->store(m_query_txt, m_query_len, &my_charset_bin);
+}
+#endif
+
+#ifdef MYSQL_CLIENT
+void Annotate_rows_log_event::print(FILE *file, PRINT_EVENT_INFO *pinfo)
+{
+  if (pinfo->short_form)
+    return;
+
+  print_header(&pinfo->head_cache, pinfo, TRUE);
+  my_b_printf(&pinfo->head_cache, "\tAnnotate_rows:\n");
+
+  char *pbeg;   // beginning of the next line
+  char *pend;   // end of the next line
+  uint cnt= 0;  // characters counter
+
+  for (pbeg= m_query_txt; ; pbeg= pend)
+  {
+    // skip all \r's and \n's at the beginning of the next line
+    for (;; pbeg++)
+    {
+      if (++cnt > m_query_len)
+        return;
+
+      if (*pbeg != '\r' && *pbeg != '\n')
+        break;
+    }
+
+    // find end of the next line
+    for (pend= pbeg + 1;
+         ++cnt <= m_query_len && *pend != '\r' && *pend != '\n';
+         pend++)
+      ;
+
+    // print next line
+    my_b_write(&pinfo->head_cache, (const uchar*) "#Q> ", 4);
+    my_b_write(&pinfo->head_cache, (const uchar*) pbeg, pend - pbeg);
+    my_b_write(&pinfo->head_cache, (const uchar*) "\n", 1);
+  }
+}
+#endif
+
+#if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION)
+int Annotate_rows_log_event::do_apply_event(Relay_log_info const *rli)
+{
+  m_save_thd_query_txt= thd->query();
+  m_save_thd_query_len= thd->query_length();
+  thd->set_query(m_query_txt, m_query_len);
+  return 0;
+}
+#endif
+
+#if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION)
+int Annotate_rows_log_event::do_update_pos(Relay_log_info *rli)
+{
+  rli->inc_event_relay_log_pos();
+  return 0;
+}
+#endif
+
+#if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION)
+Log_event::enum_skip_reason
+Annotate_rows_log_event::do_shall_skip(Relay_log_info *rli)
+{
+  return continue_group(rli);
+}
+#endif
+
+/**************************************************************************
 	Table_map_log_event member functions and support functions
 **************************************************************************/
 
@@ -8573,11 +9164,11 @@ bool Table_map_log_event::write_data_header(IO_CACHE *file)
                   {
                     int4store(buf + 0, m_table_id);
                     int2store(buf + 4, m_flags);
-                    return (my_b_safe_write(file, buf, 6));
+                    return (wrapper_my_b_safe_write(file, buf, 6));
                   });
   int6store(buf + TM_MAPID_OFFSET, (ulonglong)m_table_id);
   int2store(buf + TM_FLAGS_OFFSET, m_flags);
-  return (my_b_safe_write(file, buf, TABLE_MAP_HEADER_LEN));
+  return (wrapper_my_b_safe_write(file, buf, TABLE_MAP_HEADER_LEN));
 }
 
 bool Table_map_log_event::write_data_body(IO_CACHE *file)
@@ -8601,15 +9192,15 @@ bool Table_map_log_event::write_data_body(IO_CACHE *file)
   uchar mbuf[sizeof(m_field_metadata_size)];
   uchar *const mbuf_end= net_store_length(mbuf, m_field_metadata_size);
 
-  return (my_b_safe_write(file, dbuf,      sizeof(dbuf)) ||
-          my_b_safe_write(file, (const uchar*)m_dbnam,   m_dblen+1) ||
-          my_b_safe_write(file, tbuf,      sizeof(tbuf)) ||
-          my_b_safe_write(file, (const uchar*)m_tblnam,  m_tbllen+1) ||
-          my_b_safe_write(file, cbuf, (size_t) (cbuf_end - cbuf)) ||
-          my_b_safe_write(file, m_coltype, m_colcnt) ||
-          my_b_safe_write(file, mbuf, (size_t) (mbuf_end - mbuf)) ||
-          my_b_safe_write(file, m_field_metadata, m_field_metadata_size),
-          my_b_safe_write(file, m_null_bits, (m_colcnt + 7) / 8));
+  return (wrapper_my_b_safe_write(file, dbuf,      sizeof(dbuf)) ||
+          wrapper_my_b_safe_write(file, (const uchar*)m_dbnam,   m_dblen+1) ||
+          wrapper_my_b_safe_write(file, tbuf,      sizeof(tbuf)) ||
+          wrapper_my_b_safe_write(file, (const uchar*)m_tblnam,  m_tbllen+1) ||
+          wrapper_my_b_safe_write(file, cbuf, (size_t) (cbuf_end - cbuf)) ||
+          wrapper_my_b_safe_write(file, m_coltype, m_colcnt) ||
+          wrapper_my_b_safe_write(file, mbuf, (size_t) (mbuf_end - mbuf)) ||
+          wrapper_my_b_safe_write(file, m_field_metadata, m_field_metadata_size),
+          wrapper_my_b_safe_write(file, m_null_bits, (m_colcnt + 7) / 8));
  }
 #endif
 
@@ -9210,6 +9801,86 @@ record_compare_exit:
   return result;
 }
 
+
+/**
+  Find the best key to use when locating the row in @c find_row().
+
+  A primary key is preferred if it exists; otherwise a unique index is
+  preferred. Else we pick the index with the smalles rec_per_key value.
+
+  If a suitable key is found, set @c m_key, @c m_key_nr and @c m_key_info
+  member fields appropriately.
+
+  @returns Error code on failure, 0 on success.
+*/
+int Rows_log_event::find_key()
+{
+  uint i, best_key_nr, last_part;
+  KEY *key, *best_key;
+  ulong best_rec_per_key, tmp;
+  DBUG_ENTER("Rows_log_event::find_key");
+  DBUG_ASSERT(m_table);
+
+  best_key_nr= MAX_KEY;
+  LINT_INIT(best_key);
+  LINT_INIT(best_rec_per_key);
+
+  /*
+    Keys are sorted so that any primary key is first, followed by unique keys,
+    followed by any other. So we will automatically pick the primary key if
+    it exists.
+  */
+  for (i= 0, key= m_table->key_info; i < m_table->s->keys; i++, key++)
+  {
+    if (!m_table->s->keys_in_use.is_set(i))
+      continue;
+    /*
+      We cannot use a unique key with NULL-able columns to uniquely identify
+      a row (but we can still select it for range scan below if nothing better
+      is available).
+    */
+    if ((key->flags & (HA_NOSAME | HA_NULL_PART_KEY)) == HA_NOSAME)
+    {
+      best_key_nr= i;
+      best_key= key;
+      break;
+    }
+    /*
+      We can only use a non-unique key if it allows range scans (ie. skip
+      FULLTEXT indexes and such).
+    */
+    last_part= key->key_parts - 1;
+    DBUG_PRINT("info", ("Index %s rec_per_key[%u]= %lu",
+                        key->name, last_part, key->rec_per_key[last_part]));
+    if (!(m_table->file->index_flags(i, last_part, 1) & HA_READ_NEXT))
+      continue;
+
+    tmp= key->rec_per_key[last_part];
+    if (best_key_nr == MAX_KEY || (tmp > 0 && tmp < best_rec_per_key))
+    {
+      best_key_nr= i;
+      best_key= key;
+      best_rec_per_key= tmp;
+    }
+  }
+
+  if (best_key_nr == MAX_KEY)
+  {
+    m_key_info= NULL;
+    DBUG_RETURN(0);
+  }
+
+  // Allocate buffer for key searches
+  m_key= (uchar *) my_malloc(best_key->key_length, MYF(MY_WME));
+  if (m_key == NULL)
+    DBUG_RETURN(HA_ERR_OUT_OF_MEM);
+  m_key_info= best_key;
+  m_key_nr= best_key_nr;
+
+  DBUG_RETURN(0);;
+}
+
+
 /**
   Locate the current row in event's table.
 
@@ -9309,12 +9980,17 @@ int Rows_log_event::find_row(const Relay_log_info *rli)
    */ 
   store_record(table,record[1]);    
 
-  if (table->s->keys > 0 && table->s->keys_in_use.is_set(0))
+  if (m_key_info)
   {
-    DBUG_PRINT("info",("locating record using primary key (index_read)"));
+    DBUG_PRINT("info",("locating record using key #%u [%s] (index_read)",
+                       m_key_nr, m_key_info->name));
+    /* We use this to test that the correct key is used in test cases. */
+    DBUG_EXECUTE_IF("slave_crash_if_wrong_index",
+                    if(0 != strcmp(m_key_info->name,"expected_key")) abort(););
 
-    /* The 0th key is active: search the table using the index */
-    if (!table->file->inited && (error= table->file->ha_index_init(0, FALSE)))
+    /* The key is active: search the table using the index */
+    if (!table->file->inited &&
+        (error= table->file->ha_index_init(m_key_nr, FALSE)))
     {
       DBUG_PRINT("info",("ha_index_init returns error %d",error));
       table->file->print_error(error, MYF(0));
@@ -9324,14 +10000,14 @@ int Rows_log_event::find_row(const Relay_log_info *rli)
     /* Fill key data for the row */
 
     DBUG_ASSERT(m_key);
-    key_copy(m_key, table->record[0], table->key_info, 0);
+    key_copy(m_key, table->record[0], m_key_info, 0);
 
     /*
       Don't print debug messages when running valgrind since they can
       trigger false warnings.
      */
 #ifndef HAVE_valgrind
-    DBUG_DUMP("key data", m_key, table->key_info->key_length);
+    DBUG_DUMP("key data", m_key, m_key_info->key_length);
 #endif
 
     /*
@@ -9417,6 +10093,8 @@ int Rows_log_event::find_row(const Relay_log_info *rli)
       record we are looking for is stored in record[1].
      */ 
     DBUG_PRINT("info",("non-unique index, scanning it to find matching record")); 
+    /* We use this to test that the correct key is used in test cases. */
+    DBUG_EXECUTE_IF("slave_crash_if_index_scan", abort(););
 
     while (record_compare(table))
     {
@@ -9455,6 +10133,8 @@ int Rows_log_event::find_row(const Relay_log_info *rli)
   else
   {
     DBUG_PRINT("info",("locating record using table scan (rnd_next)"));
+    /* We use this to test that the correct key is used in test cases. */
+    DBUG_EXECUTE_IF("slave_crash_if_table_scan", abort(););
 
     int restart_count= 0; // Number of times scanning has restarted from top
 
@@ -9575,14 +10255,7 @@ Delete_rows_log_event::do_before_row_operations(const Slave_reporting_capability
     return 0;
   }
 
-  if (m_table->s->keys > 0)
-  {
-    // Allocate buffer for key searches
-    m_key= (uchar*)my_malloc(m_table->key_info->key_length, MYF(MY_WME));
-    if (!m_key)
-      return HA_ERR_OUT_OF_MEM;
-  }
-  return 0;
+  return find_key();
 }
 
 int 
@@ -9593,6 +10266,7 @@ Delete_rows_log_event::do_after_row_operations(const Slave_reporting_capability
   m_table->file->ha_index_or_rnd_end();
   my_free(m_key, MYF(MY_ALLOW_ZERO_PTR));
   m_key= NULL;
+  m_key_info= NULL;
 
   return error;
 }
@@ -9695,13 +10369,9 @@ Update_rows_log_event::Update_rows_log_event(const char *buf, uint event_len,
 int 
 Update_rows_log_event::do_before_row_operations(const Slave_reporting_capability *const)
 {
-  if (m_table->s->keys > 0)
-  {
-    // Allocate buffer for key searches
-    m_key= (uchar*)my_malloc(m_table->key_info->key_length, MYF(MY_WME));
-    if (!m_key)
-      return HA_ERR_OUT_OF_MEM;
-  }
+  int err;
+  if ((err= find_key()))
+    return err;
 
   m_table->timestamp_field_type= TIMESTAMP_NO_AUTO_SET;
 
@@ -9716,6 +10386,7 @@ Update_rows_log_event::do_after_row_operations(const Slave_reporting_capability
   m_table->file->ha_index_or_rnd_end();
   my_free(m_key, MYF(MY_ALLOW_ZERO_PTR)); // Free for multi_malloc
   m_key= NULL;
+  m_key_info= NULL;
 
   return error;
 }
@@ -9891,13 +10562,25 @@ Incident_log_event::write_data_header(IO_CACHE *file)
   DBUG_PRINT("enter", ("m_incident: %d", m_incident));
   uchar buf[sizeof(int16)];
   int2store(buf, (int16) m_incident);
-  DBUG_RETURN(my_b_safe_write(file, buf, sizeof(buf)));
+#ifndef MYSQL_CLIENT
+  DBUG_RETURN(wrapper_my_b_safe_write(file, buf, sizeof(buf)));
+#else
+   DBUG_RETURN(my_b_safe_write(file, buf, sizeof(buf)));
+#endif
 }
 
 bool
 Incident_log_event::write_data_body(IO_CACHE *file)
 {
+  uchar tmp[1];
   DBUG_ENTER("Incident_log_event::write_data_body");
+  tmp[0]= (uchar) m_message.length;
+  crc= my_checksum(crc, (uchar*) tmp, 1);
+  if (m_message.length > 0)
+  {
+    crc= my_checksum(crc, (uchar*) m_message.str, m_message.length);
+    // todo: report a bug on write_str accepts uint but treats it as uchar
+  }
   DBUG_RETURN(write_str(file, m_message.str, (uint) m_message.length));
 }
 
diff --git a/sql/log_event.h b/sql/log_event.h
index a8b6e95ea2e..1ce17bd7e35 100644
--- a/sql/log_event.h
+++ b/sql/log_event.h
@@ -74,6 +74,7 @@
 #define LOG_READ_MEM    -5
 #define LOG_READ_TRUNC  -6
 #define LOG_READ_TOO_LARGE -7
+#define LOG_READ_CHECKSUM_FAILURE -8
 
 #define LOG_EVENT_OFFSET 4
 
@@ -252,6 +253,7 @@ struct sql_ex_info
 #define EXECUTE_LOAD_QUERY_EXTRA_HEADER_LEN (4 + 4 + 4 + 1)
 #define EXECUTE_LOAD_QUERY_HEADER_LEN  (QUERY_HEADER_LEN + EXECUTE_LOAD_QUERY_EXTRA_HEADER_LEN)
 #define INCIDENT_HEADER_LEN    2
+#define ANNOTATE_ROWS_HEADER_LEN  0
 /* 
   Max number of possible extra bytes in a replication event compared to a
   packet (i.e. a query) sent from client to master;
@@ -267,6 +269,7 @@ struct sql_ex_info
                                    1 + 2          /* type, charset_database_number */ + \
                                    1 + 8          /* type, table_map_for_update */ + \
                                    1 + 4          /* type, master_data_written */ + \
+                                   1 + 3          /* type, sec_part of NOW() */ + \
                                    1 + 16 + 1 + 60/* type, user_len, user, host_len, host */)
 #define MAX_LOG_EVENT_HEADER   ( /* in order of Query_log_event::write */ \
   LOG_EVENT_HEADER_LEN + /* write_header */ \
@@ -338,6 +341,8 @@ struct sql_ex_info
 
 #define Q_INVOKER 11
 
+#define Q_HRNOW 128
+
 /* Intvar event post-header */
 
 /* Intvar event data */
@@ -526,6 +531,22 @@ struct sql_ex_info
 #endif
 #undef EXPECTED_OPTIONS         /* You shouldn't use this one */
 
+enum enum_binlog_checksum_alg {
+  BINLOG_CHECKSUM_ALG_OFF= 0,    // Events are without checksum though its generator
+                                 // is checksum-capable New Master (NM).
+  BINLOG_CHECKSUM_ALG_CRC32= 1,  // CRC32 of zlib algorithm.
+  BINLOG_CHECKSUM_ALG_ENUM_END,  // the cut line: valid alg range is [1, 0x7f].
+  BINLOG_CHECKSUM_ALG_UNDEF= 255 // special value to tag undetermined yet checksum
+                                 // or events from checksum-unaware servers
+};
+
+#define CHECKSUM_CRC32_SIGNATURE_LEN 4
+/**
+   defined statically while there is just one alg implemented
+*/
+#define BINLOG_CHECKSUM_LEN CHECKSUM_CRC32_SIGNATURE_LEN
+#define BINLOG_CHECKSUM_ALG_DESC_LEN 1  /* 1 byte checksum alg descriptor */
+
 /**
   @enum Log_event_type
 
@@ -584,8 +605,14 @@ enum Log_event_type
    */
   INCIDENT_EVENT= 26,
 
+  /* New MySQL/Sun events are to be added right above this comment */
+  MYSQL_EVENTS_END,
+
+  MARIA_EVENTS_BEGIN= 160,
+  /* New Maria event numbers start from here */
+  ANNOTATE_ROWS_EVENT= 160,
+
   /*
-    Add new events here - right above this comment!
     Existing events (except ENUM_END_EVENT) should never change their numbers
   */
 
@@ -898,7 +925,8 @@ public:
     execution time, which guarantees good replication (otherwise, we
     could have a query and its event with different timestamps).
   */
-  time_t when;
+  my_time_t when;
+  ulong     when_sec_part;
   /* The number of seconds the query took to run on the master. */
   ulong exec_time;
   /* Number of bytes written by write() function */
@@ -918,6 +946,27 @@ public:
   uint16 flags;
 
   bool cache_stmt;
+  /*
+    The revid:alfranio.correia@sun.com-20091103190256-637o8qxlveikrt3i commit
+    ("WL#2687 WL#5072 BUG#40278 BUG#47175") in MySQL 5.5 changes the bool
+    cache_stmt into an enum cache_type. For the backport of WL#2540 binlog
+    event checksum, we need this event_type member to know if we are writing
+    directly to the log, or into a transaction cache.
+
+    Until the cache_type stuff is merged, we temporarily partially backport
+    the cache_type member, only enough to be able to check if we are writing
+    directly to log or not. Once MySQL 5.5 is merged, this can be removed, and
+    replaced with the MySQL 5.5 code.
+
+    Similarly, in MySQL 5.5 the decision on whether to write directly to log
+    or indirectly through cache is decided differently, and the
+    pre_55_writing_direct() (and all calls to it) are not needed and can be
+    removed once 5.5 is merged.
+  */
+  enum enum_event_cache_type { EVENT_INVALID_CACHE, EVENT_STMT_CACHE,
+                               EVENT_TRANSACTIONAL_CACHE, EVENT_NO_CACHE };
+  uint16 cache_type;
+  void pre_55_writing_direct() { cache_type= EVENT_NO_CACHE; }
 
   /**
     A storage to cache the global system variable's value.
@@ -925,6 +974,10 @@ public:
   */
   ulong slave_exec_mode;
 
+  /**
+    Placeholder for event checksum while writing to binlog.
+   */
+  ha_checksum crc;
 #ifndef MYSQL_CLIENT
   THD* thd;
 
@@ -944,9 +997,10 @@ public:
   static Log_event* read_log_event(IO_CACHE* file,
 				   pthread_mutex_t* log_lock,
                                    const Format_description_log_event
-                                   *description_event);
+                                   *description_event,
+                                   my_bool crc_check);
   static int read_log_event(IO_CACHE* file, String* packet,
-			    pthread_mutex_t* log_lock);
+			    pthread_mutex_t* log_lock, uint8 checksum_alg_arg);
   /*
     init_show_field_list() prepares the column names and types for the
     output of SHOW BINLOG EVENTS; it is used only by SHOW BINLOG
@@ -973,7 +1027,7 @@ public:
     /* avoid having to link mysqlbinlog against libpthread */
   static Log_event* read_log_event(IO_CACHE* file,
                                    const Format_description_log_event
-                                   *description_event);
+                                   *description_event, my_bool crc_check);
   /* print*() functions are used by mysqlbinlog */
   virtual void print(FILE* file, PRINT_EVENT_INFO* print_event_info) = 0;
   void print_timestamp(IO_CACHE* file, time_t *ts = 0);
@@ -982,6 +1036,15 @@ public:
   void print_base64(IO_CACHE* file, PRINT_EVENT_INFO* print_event_info,
                     bool is_more);
 #endif
+  /* 
+     The value is set by caller of FD constructor and
+     Log_event::write_header() for the rest.
+     In the FD case it's propagated into the last byte 
+     of post_header_len[] at FD::write().
+     On the slave side the value is assigned from post_header_len[last] 
+     of the last seen FD event.
+  */
+  uint8 checksum_alg;
 
   static void *operator new(size_t size)
   {
@@ -996,29 +1059,46 @@ public:
   /* Placement version of the above operators */
   static void *operator new(size_t, void* ptr) { return ptr; }
   static void operator delete(void*, void*) { }
+  bool wrapper_my_b_safe_write(IO_CACHE* file, const uchar* buf, ulong data_length);
 
 #ifndef MYSQL_CLIENT
   bool write_header(IO_CACHE* file, ulong data_length);
+  bool write_footer(IO_CACHE* file);
+  my_bool need_checksum();
+
   virtual bool write(IO_CACHE* file)
   {
-    return (write_header(file, get_data_size()) ||
-            write_data_header(file) ||
-            write_data_body(file));
+    return(write_header(file, get_data_size()) ||
+	   write_data_header(file) ||
+	   write_data_body(file) ||
+	   write_footer(file));
   }
   virtual bool write_data_header(IO_CACHE* file)
   { return 0; }
   virtual bool write_data_body(IO_CACHE* file __attribute__((unused)))
   { return 0; }
-  inline time_t get_time()
+  inline my_time_t get_time()
   {
     THD *tmp_thd;
     if (when)
       return when;
     if (thd)
-      return thd->start_time;
+    {
+      when= thd->start_time;
+      when_sec_part= thd->start_time_sec_part;
+      return when;
+    }
+    /* thd will only be 0 here at time of log creation */
     if ((tmp_thd= current_thd))
-      return tmp_thd->start_time;
-    return my_time(0);
+    {
+      when= tmp_thd->start_time;
+      when_sec_part= tmp_thd->start_time_sec_part;
+      return when;
+    }
+    my_hrtime_t hrtime= my_hrtime();
+    when= hrtime_to_my_time(hrtime);
+    when_sec_part= hrtime_sec_part(hrtime);
+    return when;
   }
 #endif
   virtual Log_event_type get_type_code() = 0;
@@ -1053,7 +1133,7 @@ public:
   static Log_event* read_log_event(const char* buf, uint event_len,
 				   const char **error,
                                    const Format_description_log_event
-                                   *description_event);
+                                   *description_event, my_bool crc_check);
   /**
     Returns the human readable name of the given event type.
   */
@@ -2232,9 +2312,17 @@ public:
   */
   uint8 common_header_len;
   uint8 number_of_event_types;
-  /* The list of post-headers' lengthes */
+  /* 
+     The list of post-headers' lengths followed 
+     by the checksum alg decription byte
+  */
   uint8 *post_header_len;
-  uchar server_version_split[3];
+  struct master_version_split {
+    enum {KIND_MYSQL, KIND_MARIADB};
+    int kind;
+    uchar ver[3];
+  };
+  master_version_split server_version_split;
   const uint8 *event_type_permutation;
 
   Format_description_log_event(uint8 binlog_ver, const char* server_ver=0);
@@ -2266,7 +2354,7 @@ public:
   }
 
   void calc_server_version_split();
-
+  static bool is_version_before_checksum(master_version_split *version_split);
 protected:
 #if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION)
   virtual int do_apply_event(Relay_log_info const *rli);
@@ -2321,9 +2409,10 @@ public:
   uchar type;
 
 #ifndef MYSQL_CLIENT
-  Intvar_log_event(THD* thd_arg,uchar type_arg, ulonglong val_arg)
-    :Log_event(thd_arg,0,0),val(val_arg),type(type_arg)
-  {}
+  Intvar_log_event(THD* thd_arg,uchar type_arg, ulonglong val_arg,
+                   uint16 cache_type_arg)
+    :Log_event(thd_arg,0,0), val(val_arg), type(type_arg)
+  { cache_type= cache_type_arg; }
 #ifdef HAVE_REPLICATION
   void pack_info(Protocol* protocol);
 #endif /* HAVE_REPLICATION */
@@ -2397,9 +2486,10 @@ class Rand_log_event: public Log_event
   ulonglong seed2;
 
 #ifndef MYSQL_CLIENT
-  Rand_log_event(THD* thd_arg, ulonglong seed1_arg, ulonglong seed2_arg)
-    :Log_event(thd_arg,0,0),seed1(seed1_arg),seed2(seed2_arg)
-  {}
+  Rand_log_event(THD* thd_arg, ulonglong seed1_arg, ulonglong seed2_arg,
+                 uint16 cache_type_arg)
+    :Log_event(thd_arg, 0, 0), seed1(seed1_arg), seed2(seed2_arg)
+    { cache_type= cache_type_arg; }
 #ifdef HAVE_REPLICATION
   void pack_info(Protocol* protocol);
 #endif /* HAVE_REPLICATION */
@@ -2443,7 +2533,8 @@ class Xid_log_event: public Log_event
    my_xid xid;
 
 #ifndef MYSQL_CLIENT
-  Xid_log_event(THD* thd_arg, my_xid x): Log_event(thd_arg,0,0), xid(x) {}
+  Xid_log_event(THD* thd_arg, my_xid x): Log_event(thd_arg, 0, 0), xid(x)
+  { cache_type= EVENT_NO_CACHE; }
 #ifdef HAVE_REPLICATION
   void pack_info(Protocol* protocol);
 #endif /* HAVE_REPLICATION */
@@ -2490,10 +2581,11 @@ public:
 #ifndef MYSQL_CLIENT
   User_var_log_event(THD* thd_arg, char *name_arg, uint name_len_arg,
                      char *val_arg, ulong val_len_arg, Item_result type_arg,
-		     uint charset_number_arg)
-    :Log_event(), name(name_arg), name_len(name_len_arg), val(val_arg),
-    val_len(val_len_arg), type(type_arg), charset_number(charset_number_arg)
-    { is_null= !val; }
+		     uint charset_number_arg,
+                     uint16 cache_type_arg)
+    :Log_event(thd_arg, 0, 0), name(name_arg), name_len(name_len_arg), val(val_arg),
+     val_len(val_len_arg), type(type_arg), charset_number(charset_number_arg)
+    { is_null= !val; cache_type= cache_type_arg; }
   void pack_info(Protocol* protocol);
 #else
   void print(FILE* file, PRINT_EVENT_INFO* print_event_info);
@@ -2990,6 +3082,59 @@ public:
 char *str_to_hex(char *to, const char *from, uint len);
 
 /**
+  @class Annotate_rows_log_event
+
+  In row-based mode, if binlog_annotate_row_events = ON, each group of
+  Table_map_log_events is preceded by an Annotate_rows_log_event which
+  contains the query which caused the subsequent rows operations.
+
+  The Annotate_rows_log_event has no post-header and its body contains
+  the corresponding query (without trailing zero). Note. The query length
+  is to be calculated as a difference between the whole event length and
+  the common header length.
+*/
+class Annotate_rows_log_event: public Log_event
+{
+public:
+#ifndef MYSQL_CLIENT
+  Annotate_rows_log_event(THD*, uint16 cache_type_arg);
+#endif
+  Annotate_rows_log_event(const char *buf, uint event_len,
+                          const Format_description_log_event*);
+  ~Annotate_rows_log_event();
+
+  virtual int get_data_size();
+  virtual Log_event_type get_type_code();
+  virtual bool is_valid() const;
+
+#ifndef MYSQL_CLIENT
+  virtual bool write_data_header(IO_CACHE*);
+  virtual bool write_data_body(IO_CACHE*);
+#endif
+
+#if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION)
+  virtual void pack_info(Protocol*);
+#endif
+
+#ifdef MYSQL_CLIENT
+  virtual void print(FILE*, PRINT_EVENT_INFO*);
+#endif
+
+#if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION)
+private:
+  virtual int do_apply_event(Relay_log_info const*);
+  virtual int do_update_pos(Relay_log_info*);
+  virtual enum_skip_reason do_shall_skip(Relay_log_info*);
+#endif
+
+private:
+  char *m_query_txt;
+  uint  m_query_len;
+  char *m_save_thd_query_txt;
+  uint  m_save_thd_query_len;
+};
+
+/**
   @class Table_map_log_event
 
   In row-based mode, every row operation event is preceded by a
@@ -3594,7 +3739,10 @@ protected:
   const uchar *m_curr_row;     /* Start of the row being processed */
   const uchar *m_curr_row_end; /* One-after the end of the current row */
   uchar    *m_key;      /* Buffer to keep key value during searches */
+  KEY      *m_key_info; /* Pointer to KEY info for m_key_nr */
+  uint      m_key_nr;   /* Key number */
 
+  int find_key(); // Find a best key to use in find_row()
   int find_row(const Relay_log_info *const);
   int write_row(const Relay_log_info *const, const bool);
 
@@ -3604,7 +3752,8 @@ protected:
     DBUG_ASSERT(m_table);
 
     ASSERT_OR_RETURN_ERROR(m_curr_row < m_rows_end, HA_ERR_CORRUPT_EVENT);
-    int const result= ::unpack_row(rli, m_table, m_width, m_curr_row, &m_cols,
+    int const result= ::unpack_row(rli, m_table, m_width, m_curr_row,
+                                   m_rows_end, &m_cols,
                                    &m_curr_row_end, &m_master_reclength);
     if (m_curr_row_end > m_rows_end)
       my_error(ER_SLAVE_CORRUPT_EVENT, MYF(0));
@@ -3979,6 +4128,10 @@ bool rpl_get_position_info(const char **log_file_name, ulonglong *log_pos,
                            const char **group_relay_log_name,
                            ulonglong *relay_log_pos);
 
+bool event_checksum_test(uchar *buf, ulong event_len, uint8 alg);
+uint8 get_checksum_alg(const char* buf, ulong len);
+extern TYPELIB binlog_checksum_typelib;
+
 /**
   @} (end of group Replication)
 */
diff --git a/sql/log_event_old.cc b/sql/log_event_old.cc
index 94e45bc9ca6..7328d3451f1 100644
--- a/sql/log_event_old.cc
+++ b/sql/log_event_old.cc
@@ -151,7 +151,7 @@ Old_rows_log_event::do_apply_event(Old_rows_log_event *ev, const Relay_log_info
       const_cast<Relay_log_info*>(rli)->m_table_map.set_table(ptr->table_id, ptr->table);
     }
 #ifdef HAVE_QUERY_CACHE
-    query_cache.invalidate_locked_for_write(rli->tables_to_lock);
+    query_cache.invalidate_locked_for_write(thd, rli->tables_to_lock);
 #endif
   }
 
@@ -173,7 +173,7 @@ Old_rows_log_event::do_apply_event(Old_rows_log_event *ev, const Relay_log_info
       TIMESTAMP column to a table with one.
       So we call set_time(), like in SBR. Presently it changes nothing.
     */
-    ev_thd->set_time((time_t)ev->when);
+    ev_thd->set_time(ev->when, ev->when_sec_part);
     /*
       There are a few flags that are replicated with each row event.
       Make sure to set/clear them before executing the main body of
@@ -998,7 +998,8 @@ Write_rows_log_event_old::do_prepare_row(THD *thd_arg,
   int error;
   error= unpack_row_old(const_cast<Relay_log_info*>(rli),
                         table, m_width, table->record[0],
-                        row_start, &m_cols, row_end, &m_master_reclength,
+                        row_start, m_rows_end,
+                        &m_cols, row_end, &m_master_reclength,
                         table->write_set, PRE_GA_WRITE_ROWS_EVENT);
   bitmap_copy(table->read_set, table->write_set);
   return error;
@@ -1085,7 +1086,8 @@ Delete_rows_log_event_old::do_prepare_row(THD *thd_arg,
 
   error= unpack_row_old(const_cast<Relay_log_info*>(rli),
                         table, m_width, table->record[0],
-                        row_start, &m_cols, row_end, &m_master_reclength,
+                        row_start, m_rows_end,
+                        &m_cols, row_end, &m_master_reclength,
                         table->read_set, PRE_GA_DELETE_ROWS_EVENT);
   /*
     If we will access rows using the random access method, m_key will
@@ -1184,13 +1186,15 @@ int Update_rows_log_event_old::do_prepare_row(THD *thd_arg,
   /* record[0] is the before image for the update */
   error= unpack_row_old(const_cast<Relay_log_info*>(rli),
                         table, m_width, table->record[0],
-                        row_start, &m_cols, row_end, &m_master_reclength,
+                        row_start, m_rows_end,
+                        &m_cols, row_end, &m_master_reclength,
                         table->read_set, PRE_GA_UPDATE_ROWS_EVENT);
   row_start = *row_end;
   /* m_after_image is the after image for the update */
   error= unpack_row_old(const_cast<Relay_log_info*>(rli),
                         table, m_width, m_after_image,
-                        row_start, &m_cols, row_end, &m_master_reclength,
+                        row_start, m_rows_end,
+                        &m_cols, row_end, &m_master_reclength,
                         table->write_set, PRE_GA_UPDATE_ROWS_EVENT);
 
   DBUG_DUMP("record[0]", table->record[0], table->s->reclength);
@@ -1650,7 +1654,7 @@ int Old_rows_log_event::do_apply_event(Relay_log_info const *rli)
       const_cast<Relay_log_info*>(rli)->m_table_map.set_table(ptr->table_id, ptr->table);
     }
 #ifdef HAVE_QUERY_CACHE
-    query_cache.invalidate_locked_for_write(rli->tables_to_lock);
+    query_cache.invalidate_locked_for_write(thd, rli->tables_to_lock);
 #endif
   }
 
@@ -1674,7 +1678,7 @@ int Old_rows_log_event::do_apply_event(Relay_log_info const *rli)
       TIMESTAMP column to a table with one.
       So we call set_time(), like in SBR. Presently it changes nothing.
     */
-    thd->set_time((time_t)when);
+    thd->set_time(when, when_sec_part);
     /*
       There are a few flags that are replicated with each row event.
       Make sure to set/clear them before executing the main body of
diff --git a/sql/log_event_old.h b/sql/log_event_old.h
index da5cf403fdb..8fe2e9e0a75 100644
--- a/sql/log_event_old.h
+++ b/sql/log_event_old.h
@@ -203,7 +203,8 @@ protected:
   { 
     DBUG_ASSERT(m_table);
     ASSERT_OR_RETURN_ERROR(m_curr_row < m_rows_end, HA_ERR_CORRUPT_EVENT);
-    int const result= ::unpack_row(rli, m_table, m_width, m_curr_row, &m_cols,
+    int const result= ::unpack_row(rli, m_table, m_width, m_curr_row,
+                                   m_rows_end, &m_cols,
                                    &m_curr_row_end, &m_master_reclength);
     ASSERT_OR_RETURN_ERROR(m_curr_row_end <= m_rows_end, HA_ERR_CORRUPT_EVENT);
     return result;
diff --git a/sql/multi_range_read.cc b/sql/multi_range_read.cc
new file mode 100644
index 00000000000..e86c9ca251c
--- /dev/null
+++ b/sql/multi_range_read.cc
@@ -0,0 +1,1828 @@
+#include "mysql_priv.h"
+#include <my_bit.h>
+#include "sql_select.h"
+
+/****************************************************************************
+ * Default MRR implementation (MRR to non-MRR converter)
+ ***************************************************************************/
+
+/**
+  Get cost and other information about MRR scan over a known list of ranges
+
+  Calculate estimated cost and other information about an MRR scan for given
+  sequence of ranges.
+
+  @param keyno           Index number
+  @param seq             Range sequence to be traversed
+  @param seq_init_param  First parameter for seq->init()
+  @param n_ranges_arg    Number of ranges in the sequence, or 0 if the caller
+                         can't efficiently determine it
+  @param bufsz    INOUT  IN:  Size of the buffer available for use
+                         OUT: Size of the buffer that is expected to be actually
+                              used, or 0 if buffer is not needed.
+  @param flags    INOUT  A combination of HA_MRR_* flags
+  @param cost     OUT    Estimated cost of MRR access
+
+  @note
+    This method (or an overriding one in a derived class) must check for
+    thd->killed and return HA_POS_ERROR if it is not zero. This is required
+    for a user to be able to interrupt the calculation by killing the
+    connection/query.
+
+  @retval
+    HA_POS_ERROR  Error or the engine is unable to perform the requested
+                  scan. Values of OUT parameters are undefined.
+  @retval
+    other         OK, *cost contains cost of the scan, *bufsz and *flags
+                  contain scan parameters.
+*/
+
+ha_rows 
+handler::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
+                                     void *seq_init_param, uint n_ranges_arg,
+                                     uint *bufsz, uint *flags, COST_VECT *cost)
+{
+  KEY_MULTI_RANGE range;
+  range_seq_t seq_it;
+  ha_rows rows, total_rows= 0;
+  uint n_ranges=0;
+  THD *thd= current_thd;
+  
+  /* Default MRR implementation doesn't need buffer */
+  *bufsz= 0;
+
+  seq_it= seq->init(seq_init_param, n_ranges, *flags);
+  while (!seq->next(seq_it, &range))
+  {
+    if (unlikely(thd->killed != 0))
+      return HA_POS_ERROR;
+    
+    n_ranges++;
+    key_range *min_endp, *max_endp;
+    if (range.range_flag & GEOM_FLAG)
+    {
+      /* In this case tmp_min_flag contains the handler-read-function */
+      range.start_key.flag= (ha_rkey_function) (range.range_flag ^ GEOM_FLAG);
+      min_endp= &range.start_key;
+      max_endp= NULL;
+    }
+    else
+    {
+      min_endp= range.start_key.length? &range.start_key : NULL;
+      max_endp= range.end_key.length? &range.end_key : NULL;
+    }
+    if ((range.range_flag & UNIQUE_RANGE) && !(range.range_flag & NULL_RANGE))
+      rows= 1; /* there can be at most one row */
+    else
+    {
+      if (HA_POS_ERROR == (rows= this->records_in_range(keyno, min_endp, 
+                                                        max_endp)))
+      {
+        /* Can't scan one range => can't do MRR scan at all */
+        total_rows= HA_POS_ERROR;
+        break;
+      }
+    }
+    total_rows += rows;
+  }
+  
+  if (total_rows != HA_POS_ERROR)
+  {
+    /* The following calculation is the same as in multi_range_read_info(): */
+    *flags |= HA_MRR_USE_DEFAULT_IMPL;
+    cost->zero();
+    cost->avg_io_cost= 1; /* assume random seeks */
+    if ((*flags & HA_MRR_INDEX_ONLY) && total_rows > 2)
+      cost->io_count= keyread_time(keyno, n_ranges, (uint)total_rows);
+    else
+      cost->io_count= read_time(keyno, n_ranges, total_rows);
+    cost->cpu_cost= (double) total_rows / TIME_FOR_COMPARE + 0.01;
+  }
+  return total_rows;
+}
+
+
+/**
+  Get cost and other information about MRR scan over some sequence of ranges
+
+  Calculate estimated cost and other information about an MRR scan for some
+  sequence of ranges.
+
+  The ranges themselves will be known only at execution phase. When this
+  function is called we only know number of ranges and a (rough) E(#records)
+  within those ranges.
+
+  Currently this function is only called for "n-keypart singlepoint" ranges,
+  i.e. each range is "keypart1=someconst1 AND ... AND keypartN=someconstN"
+
+  The flags parameter is a combination of those flags: HA_MRR_SORTED,
+  HA_MRR_INDEX_ONLY, HA_MRR_NO_ASSOCIATION, HA_MRR_LIMITS.
+
+  @param keyno           Index number
+  @param n_ranges        Estimated number of ranges (i.e. intervals) in the
+                         range sequence.
+  @param n_rows          Estimated total number of records contained within all
+                         of the ranges
+  @param bufsz    INOUT  IN:  Size of the buffer available for use
+                         OUT: Size of the buffer that will be actually used, or
+                              0 if buffer is not needed.
+  @param flags    INOUT  A combination of HA_MRR_* flags
+  @param cost     OUT    Estimated cost of MRR access
+
+  @retval
+    0     OK, *cost contains cost of the scan, *bufsz and *flags contain scan
+          parameters.
+  @retval
+    other Error or can't perform the requested scan
+*/
+
+ha_rows handler::multi_range_read_info(uint keyno, uint n_ranges, uint n_rows,
+                                       uint key_parts, uint *bufsz, 
+                                       uint *flags, COST_VECT *cost)
+{
+  /* 
+    Currently we expect this function to be called only in preparation of scan
+    with HA_MRR_SINGLE_POINT property.
+  */
+  DBUG_ASSERT(*flags | HA_MRR_SINGLE_POINT);
+
+  *bufsz= 0; /* Default implementation doesn't need a buffer */
+  *flags |= HA_MRR_USE_DEFAULT_IMPL;
+
+  cost->zero();
+  cost->avg_io_cost= 1; /* assume random seeks */
+
+  /* Produce the same cost as non-MRR code does */
+  if (*flags & HA_MRR_INDEX_ONLY)
+    cost->io_count= keyread_time(keyno, n_ranges, n_rows);
+  else
+    cost->io_count= read_time(keyno, n_ranges, n_rows);
+  return 0;
+}
+
+
+/**
+  Initialize the MRR scan
+
+  Initialize the MRR scan. This function may do heavyweight scan 
+  initialization like row prefetching/sorting/etc (NOTE: but better not do
+  it here as we may not need it, e.g. if we never satisfy WHERE clause on
+  previous tables. For many implementations it would be natural to do such
+  initializations in the first multi_read_range_next() call)
+
+  mode is a combination of the following flags: HA_MRR_SORTED,
+  HA_MRR_INDEX_ONLY, HA_MRR_NO_ASSOCIATION 
+
+  @param seq             Range sequence to be traversed
+  @param seq_init_param  First parameter for seq->init()
+  @param n_ranges        Number of ranges in the sequence
+  @param mode            Flags, see the description section for the details
+  @param buf             INOUT: memory buffer to be used
+
+  @note
+    One must have called index_init() before calling this function. Several
+    multi_range_read_init() calls may be made in course of one query.
+
+    Until WL#2623 is done (see its text, section 3.2), the following will 
+    also hold:
+    The caller will guarantee that if "seq->init == mrr_ranges_array_init"
+    then seq_init_param is an array of n_ranges KEY_MULTI_RANGE structures.
+    This property will only be used by NDB handler until WL#2623 is done.
+     
+    Buffer memory management is done according to the following scenario:
+    The caller allocates the buffer and provides it to the callee by filling
+    the members of HANDLER_BUFFER structure.
+    The callee consumes all or some fraction of the provided buffer space, and
+    sets the HANDLER_BUFFER members accordingly.
+    The callee may use the buffer memory until the next multi_range_read_init()
+    call is made, all records have been read, or until index_end() call is
+    made, whichever comes first.
+
+  @retval 0  OK
+  @retval 1  Error
+*/
+
+int
+handler::multi_range_read_init(RANGE_SEQ_IF *seq_funcs, void *seq_init_param,
+                               uint n_ranges, uint mode, HANDLER_BUFFER *buf)
+{
+  DBUG_ENTER("handler::multi_range_read_init");
+  mrr_iter= seq_funcs->init(seq_init_param, n_ranges, mode);
+  mrr_funcs= *seq_funcs;
+  mrr_is_output_sorted= test(mode & HA_MRR_SORTED);
+  mrr_have_range= FALSE;
+  DBUG_RETURN(0);
+}
+
+/**
+  Get next record in MRR scan
+
+  Default MRR implementation: read the next record
+
+  @param range_info  OUT  Undefined if HA_MRR_NO_ASSOCIATION flag is in effect
+                          Otherwise, the opaque value associated with the range
+                          that contains the returned record.
+
+  @retval 0      OK
+  @retval other  Error code
+*/
+
+int handler::multi_range_read_next(range_id_t *range_info)
+{
+  int result= HA_ERR_END_OF_FILE;
+  bool range_res;
+  DBUG_ENTER("handler::multi_range_read_next");
+
+  if (!mrr_have_range)
+  {
+    mrr_have_range= TRUE;
+    goto start;
+  }
+
+  do
+  {
+    /* Save a call if there can be only one row in range. */
+    if (mrr_cur_range.range_flag != (UNIQUE_RANGE | EQ_RANGE))
+    {
+      result= read_range_next();
+      /* On success or non-EOF errors jump to the end. */
+      if (result != HA_ERR_END_OF_FILE)
+        break;
+    }
+    else
+    {
+      if (was_semi_consistent_read())
+      {
+        /*
+          The following assignment is redundant, but for extra safety and to
+          remove the compiler warning:
+        */
+        range_res= FALSE;
+        goto scan_it_again;
+      }
+      /*
+        We need to set this for the last range only, but checking this
+        condition is more expensive than just setting the result code.
+      */
+      result= HA_ERR_END_OF_FILE;
+    }
+
+start:
+    /* Try the next range(s) until one matches a record. */
+    while (!(range_res= mrr_funcs.next(mrr_iter, &mrr_cur_range)))
+    {
+scan_it_again:
+      result= read_range_first(mrr_cur_range.start_key.keypart_map ?
+                                 &mrr_cur_range.start_key : 0,
+                               mrr_cur_range.end_key.keypart_map ?
+                                 &mrr_cur_range.end_key : 0,
+                               test(mrr_cur_range.range_flag & EQ_RANGE),
+                               mrr_is_output_sorted);
+      if (result != HA_ERR_END_OF_FILE)
+        break;
+    }
+  }
+  while ((result == HA_ERR_END_OF_FILE) && !range_res);
+
+  *range_info= mrr_cur_range.ptr;
+  DBUG_PRINT("exit",("handler::multi_range_read_next result %d", result));
+  DBUG_RETURN(result);
+}
+
+/****************************************************************************
+ * Mrr_*_reader classes (building blocks for DS-MRR)
+ ***************************************************************************/
+
+int Mrr_simple_index_reader::init(handler *h_arg, RANGE_SEQ_IF *seq_funcs, 
+                                  void *seq_init_param, uint n_ranges,
+                                  uint mode,  Key_parameters *key_par_arg,
+                                  Lifo_buffer *key_buffer_arg,
+                                  Buffer_manager *buf_manager_arg)
+{
+  HANDLER_BUFFER no_buffer = {NULL, NULL, NULL};
+  file= h_arg;
+  return file->handler::multi_range_read_init(seq_funcs, seq_init_param,
+                                              n_ranges, mode, &no_buffer);
+}
+
+
+int Mrr_simple_index_reader::get_next(range_id_t *range_info)
+{
+  int res;
+  while (!(res= file->handler::multi_range_read_next(range_info)))
+  {
+    KEY_MULTI_RANGE *curr_range= &file->handler::mrr_cur_range;
+    if (!file->mrr_funcs.skip_index_tuple ||
+        !file->mrr_funcs.skip_index_tuple(file->mrr_iter, curr_range->ptr))
+      break;
+  }
+  if (res && res != HA_ERR_END_OF_FILE && res != HA_ERR_KEY_NOT_FOUND)
+    file->print_error(res, MYF(0));             // Fatal error
+  return res;
+}
+
+
+/**
+  @brief Get next index record
+
+  @param range_info  OUT identifier of range that the returned record belongs to
+  
+  @note
+    We actually iterate over nested sequences:
+    - an ordered sequence of groups of identical keys
+      - each key group has key value, which has multiple matching records 
+        - thus, each record matches all members of the key group
+
+  @retval 0                   OK, next record was successfully read
+  @retval HA_ERR_END_OF_FILE  End of records
+  @retval Other               Some other error; Error is printed
+*/
+
+int Mrr_ordered_index_reader::get_next(range_id_t *range_info)
+{
+  int res;
+  DBUG_ENTER("Mrr_ordered_index_reader::get_next");
+  
+  for(;;)
+  {
+    if (!scanning_key_val_iter)
+    {
+      while ((res= kv_it.init(this)))
+      {
+        if ((res != HA_ERR_KEY_NOT_FOUND && res != HA_ERR_END_OF_FILE))
+          DBUG_RETURN(res); /* Some fatal error */
+
+        if (key_buffer->is_empty())
+        {
+          DBUG_RETURN(HA_ERR_END_OF_FILE);
+        }
+      }
+      scanning_key_val_iter= TRUE;
+    }
+
+    if ((res= kv_it.get_next(range_info)))
+    {
+      scanning_key_val_iter= FALSE;
+      if ((res != HA_ERR_KEY_NOT_FOUND && res != HA_ERR_END_OF_FILE))
+        DBUG_RETURN(res);
+      kv_it.move_to_next_key_value();
+      continue;
+    }
+    if (!skip_index_tuple(*range_info) &&
+        !skip_record(*range_info, NULL))
+    {
+      break;
+    }
+    /* Go get another (record, range_id) combination */
+  } /* while */
+
+  DBUG_RETURN(0);
+}
+
+
+/*
+  Supply index reader with the O(1)space it needs for scan interrupt/restore
+  operation
+*/
+
+bool Mrr_ordered_index_reader::set_interruption_temp_buffer(uint rowid_length,
+                                                            uint key_len, 
+                                                            uint saved_pk_len,
+                                                            uchar **space_start,
+                                                            uchar *space_end)
+{
+  if (space_end - *space_start <= (ptrdiff_t)(rowid_length + key_len + saved_pk_len))
+    return TRUE;
+  support_scan_interruptions= TRUE; 
+  
+  saved_rowid= *space_start;
+  *space_start += rowid_length;
+  
+  if (saved_pk_len)
+  {
+    saved_primary_key= *space_start;
+    *space_start += saved_pk_len;
+  }
+  else
+    saved_primary_key= NULL;
+
+  saved_key_tuple= *space_start;
+  *space_start += key_len;
+
+  have_saved_rowid= FALSE;
+  return FALSE;
+}
+
+void Mrr_ordered_index_reader::set_no_interruption_temp_buffer()
+{
+  support_scan_interruptions= FALSE;
+  saved_key_tuple= saved_rowid= saved_primary_key= NULL; /* safety */
+  have_saved_rowid= FALSE;
+}
+
+void Mrr_ordered_index_reader::interrupt_read()
+{
+  DBUG_ASSERT(support_scan_interruptions);
+  TABLE *table= file->get_table();
+  KEY *used_index= &table->key_info[file->active_index];
+  /* Save the current key value */
+  key_copy(saved_key_tuple, table->record[0],
+           used_index, used_index->key_length);
+  
+  if (saved_primary_key)
+  {
+    key_copy(saved_primary_key, table->record[0], 
+             &table->key_info[table->s->primary_key],
+             table->key_info[table->s->primary_key].key_length);
+  }
+
+  /* Save the last rowid */
+  memcpy(saved_rowid, file->ref, file->ref_length);
+  have_saved_rowid= TRUE;
+}
+
+void Mrr_ordered_index_reader::position()
+{
+  if (have_saved_rowid)
+    memcpy(file->ref, saved_rowid, file->ref_length);
+  else
+    Mrr_index_reader::position();
+}
+
+void Mrr_ordered_index_reader::resume_read()
+{
+  TABLE *table= file->get_table();
+  KEY *used_index= &table->key_info[file->active_index];
+  key_restore(table->record[0], saved_key_tuple, 
+              used_index, used_index->key_length);
+  if (saved_primary_key)
+  {
+    key_restore(table->record[0], saved_primary_key, 
+                &table->key_info[table->s->primary_key],
+                table->key_info[table->s->primary_key].key_length);
+  }
+}
+
+
+/**
+  Fill the buffer with (lookup_tuple, range_id) pairs and sort
+
+  @return 
+    0                   OK, the buffer is non-empty and sorted
+    HA_ERR_END_OF_FILE  Source exhausted, the buffer is empty.
+*/
+
+int Mrr_ordered_index_reader::refill_buffer(bool initial)
+{
+  KEY_MULTI_RANGE cur_range;
+  DBUG_ENTER("Mrr_ordered_index_reader::refill_buffer");
+
+  DBUG_ASSERT(key_buffer->is_empty());
+
+  if (source_exhausted)
+    DBUG_RETURN(HA_ERR_END_OF_FILE);
+
+  buf_manager->reset_buffer_sizes(buf_manager->arg);
+  key_buffer->reset();
+  key_buffer->setup_writing(keypar.key_size_in_keybuf,
+                            is_mrr_assoc? sizeof(range_id_t) : 0);
+
+  while (key_buffer->can_write() && 
+         !(source_exhausted= mrr_funcs.next(mrr_iter, &cur_range)))
+  {
+    DBUG_ASSERT(cur_range.range_flag & EQ_RANGE);
+
+    /* Put key, or {key, range_id} pair into the buffer */
+    key_buffer->write_ptr1= keypar.use_key_pointers ?
+                              (uchar*)&cur_range.start_key.key : 
+                              (uchar*)cur_range.start_key.key;
+    key_buffer->write_ptr2= (uchar*)&cur_range.ptr;
+    key_buffer->write();
+  }
+  
+  /* Force get_next() to start with kv_it.init() call: */
+  scanning_key_val_iter= FALSE;
+
+  if (source_exhausted && key_buffer->is_empty())
+    DBUG_RETURN(HA_ERR_END_OF_FILE);
+
+  if (!initial)
+  {
+    /* This is a non-initial buffer fill and we've got a non-empty buffer */
+    THD *thd= current_thd;
+    status_var_increment(thd->status_var.ha_mrr_key_refills_count);
+  }
+
+  key_buffer->sort((key_buffer->type() == Lifo_buffer::FORWARD)? 
+                     (qsort2_cmp)Mrr_ordered_index_reader::compare_keys_reverse : 
+                     (qsort2_cmp)Mrr_ordered_index_reader::compare_keys, 
+                   this);
+  DBUG_RETURN(0);
+}
+
+
+int Mrr_ordered_index_reader::init(handler *h_arg, RANGE_SEQ_IF *seq_funcs,
+                                   void *seq_init_param, uint n_ranges,
+                                   uint mode, Key_parameters *key_par_arg,
+                                   Lifo_buffer *key_buffer_arg,
+                                   Buffer_manager *buf_manager_arg)
+{
+  file= h_arg;
+  key_buffer= key_buffer_arg;
+  buf_manager= buf_manager_arg;
+  keypar= *key_par_arg;
+
+  KEY *key_info= &file->get_table()->key_info[file->active_index];
+  keypar.index_ranges_unique= test(key_info->flags & HA_NOSAME && 
+                                   key_info->key_parts == 
+                                   my_count_bits(keypar.key_tuple_map));
+
+  mrr_iter= seq_funcs->init(seq_init_param, n_ranges, mode);
+  is_mrr_assoc=    !test(mode & HA_MRR_NO_ASSOCIATION);
+  mrr_funcs= *seq_funcs;
+  source_exhausted= FALSE;
+  if (support_scan_interruptions)
+    bzero(saved_key_tuple, key_info->key_length);
+  have_saved_rowid= FALSE;
+  return 0;
+}
+
+
+static int rowid_cmp_reverse(void *file, uchar *a, uchar *b)
+{
+  return - ((handler*)file)->cmp_ref(a, b);
+}
+
+
+int Mrr_ordered_rndpos_reader::init(handler *h_arg, 
+                                    Mrr_index_reader *index_reader_arg,
+                                    uint mode,
+                                    Lifo_buffer *buf)
+{
+  file= h_arg;
+  index_reader= index_reader_arg;
+  rowid_buffer= buf;
+  is_mrr_assoc= !test(mode & HA_MRR_NO_ASSOCIATION);
+  index_reader_exhausted= FALSE;
+  index_reader_needs_refill= TRUE;
+  return 0;
+}
+
+
+/**
+  DS-MRR: Fill and sort the rowid buffer
+
+  Scan the MRR ranges and collect ROWIDs (or {ROWID, range_id} pairs) into 
+  buffer. When the buffer is full or scan is completed, sort the buffer by 
+  rowid and return.
+
+  When this function returns, either rowid buffer is not empty, or the source
+  of lookup keys (i.e. ranges) is exhaused.
+  
+  @retval 0      OK, the next portion of rowids is in the buffer,
+                 properly ordered
+  @retval other  Error
+*/
+
+int Mrr_ordered_rndpos_reader::refill_buffer(bool initial)
+{
+  int res;
+  bool first_call= initial;
+  DBUG_ENTER("Mrr_ordered_rndpos_reader::refill_buffer");
+
+  if (index_reader_exhausted)
+    DBUG_RETURN(HA_ERR_END_OF_FILE);
+
+  while (initial || index_reader_needs_refill || 
+         (res= refill_from_index_reader()) == HA_ERR_END_OF_FILE)
+  {
+    if ((res= index_reader->refill_buffer(initial)))
+    {
+      if (res == HA_ERR_END_OF_FILE)
+        index_reader_exhausted= TRUE;
+      break;
+    }
+    initial= FALSE;
+    index_reader_needs_refill= FALSE;
+  }
+
+  if (!first_call && !index_reader_exhausted)
+  {
+    /* Ok, this was a successful buffer refill operation */
+    THD *thd= current_thd;
+    status_var_increment(thd->status_var.ha_mrr_rowid_refills_count);
+  }
+
+  DBUG_RETURN(res);
+}
+
+
+void Mrr_index_reader::position()
+{
+  file->position(file->get_table()->record[0]);
+}
+
+
+/* 
+  @brief Try to refill the rowid buffer without calling
+  index_reader->refill_buffer(). 
+*/
+
+int Mrr_ordered_rndpos_reader::refill_from_index_reader()
+{
+  range_id_t range_info;
+  int res;
+  DBUG_ENTER("Mrr_ordered_rndpos_reader::refill_from_index_reader");
+
+  DBUG_ASSERT(rowid_buffer->is_empty());
+  index_rowid= index_reader->get_rowid_ptr();
+  rowid_buffer->reset();
+  rowid_buffer->setup_writing(file->ref_length,
+                              is_mrr_assoc? sizeof(range_id_t) : 0);
+
+  last_identical_rowid= NULL;
+
+  index_reader->resume_read();
+  while (rowid_buffer->can_write())
+  {
+    res= index_reader->get_next(&range_info);
+
+    if (res)
+    {
+      if (res != HA_ERR_END_OF_FILE)
+        DBUG_RETURN(res);
+      index_reader_needs_refill=TRUE;
+      break;
+    }
+
+    index_reader->position();
+
+    /* Put rowid, or {rowid, range_id} pair into the buffer */
+    rowid_buffer->write_ptr1= index_rowid;
+    rowid_buffer->write_ptr2= (uchar*)&range_info;
+    rowid_buffer->write();
+  }
+   
+  index_reader->interrupt_read();
+  /* Sort the buffer contents by rowid */
+  rowid_buffer->sort((qsort2_cmp)rowid_cmp_reverse, (void*)file);
+
+  rowid_buffer->setup_reading(file->ref_length,
+                              is_mrr_assoc ? sizeof(range_id_t) : 0);
+  DBUG_RETURN(rowid_buffer->is_empty()? HA_ERR_END_OF_FILE : 0);
+}
+
+
+/*
+  Get the next {record, range_id} using ordered array of rowid+range_id pairs
+
+  @note
+    Since we have sorted rowids, we try not to make multiple rnd_pos() calls
+    with the same rowid value.
+*/
+
+int Mrr_ordered_rndpos_reader::get_next(range_id_t *range_info)
+{
+  int res;
+  
+  /* 
+    First, check if rowid buffer has elements with the same rowid value as
+    the previous.
+  */
+  while (last_identical_rowid)
+  {
+    /*
+      Current record (the one we've returned in previous call) was obtained
+      from a rowid that matched multiple range_ids. Return this record again,
+      with next matching range_id.
+    */
+    (void)rowid_buffer->read();
+
+    if (rowid_buffer->read_ptr1 == last_identical_rowid)
+      last_identical_rowid= NULL; /* reached the last of identical rowids */
+
+    if (!is_mrr_assoc)
+      return 0;
+
+    memcpy(range_info, rowid_buffer->read_ptr2, sizeof(range_id_t));
+    if (!index_reader->skip_record(*range_info, rowid_buffer->read_ptr1))
+      return 0;
+  }
+  
+  /* 
+     Ok, last_identical_rowid==NULL, it's time to read next different rowid
+     value and get record for it.
+  */
+  for(;;)
+  {
+    /* Return eof if there are no rowids in the buffer after re-fill attempt */
+    if (rowid_buffer->read())
+      return HA_ERR_END_OF_FILE;
+
+    if (is_mrr_assoc)
+    {
+      memcpy(range_info, rowid_buffer->read_ptr2, sizeof(range_id_t));
+      if (index_reader->skip_record(*range_info, rowid_buffer->read_ptr1))
+        continue;
+    }
+
+    res= file->ha_rnd_pos(file->get_table()->record[0], 
+                          rowid_buffer->read_ptr1);
+
+    if (res == HA_ERR_RECORD_DELETED)
+    {
+      /* not likely to get this code with current storage engines, but still */
+      continue;
+    }
+
+    if (res)
+      return res; /* Some fatal error */
+
+    break; /* Got another record */
+  }
+
+  /* 
+    Check if subsequent buffer elements have the same rowid value as this
+    one. If yes, remember this fact so that we don't make any more rnd_pos()
+    calls with this value.
+
+    Note: this implies that SQL layer doesn't touch table->record[0]
+    between calls.
+  */
+  Lifo_buffer_iterator it;
+  it.init(rowid_buffer);
+  while (!it.read())
+  {
+    if (file->cmp_ref(it.read_ptr1, rowid_buffer->read_ptr1))
+      break;
+    last_identical_rowid= it.read_ptr1;
+  }
+  return 0;
+}
+
+
+/****************************************************************************
+ * Top-level DS-MRR implementation functions (the ones called by storage engine)
+ ***************************************************************************/
+
+/**
+  DS-MRR: Initialize and start MRR scan
+
+  Initialize and start the MRR scan. Depending on the mode parameter, this
+  may use default or DS-MRR implementation.
+
+  @param h_arg           Table handler to be used
+  @param key             Index to be used
+  @param seq_funcs       Interval sequence enumeration functions
+  @param seq_init_param  Interval sequence enumeration parameter
+  @param n_ranges        Number of ranges in the sequence.
+  @param mode            HA_MRR_* modes to use
+  @param buf             INOUT Buffer to use
+
+  @retval 0     Ok, Scan started.
+  @retval other Error
+*/
+
+int DsMrr_impl::dsmrr_init(handler *h_arg, RANGE_SEQ_IF *seq_funcs, 
+                           void *seq_init_param, uint n_ranges, uint mode,
+                           HANDLER_BUFFER *buf)
+{
+  THD *thd= current_thd;
+  int res;
+  Key_parameters keypar;
+  uint key_buff_elem_size;
+  handler *h_idx;
+  Mrr_ordered_rndpos_reader *disk_strategy= NULL;
+  bool do_sort_keys= FALSE;
+  DBUG_ENTER("DsMrr_impl::dsmrr_init");
+  LINT_INIT(key_buff_elem_size); /* set/used when do_sort_keys==TRUE */
+  /*
+    index_merge may invoke a scan on an object for which dsmrr_info[_const]
+    has not been called, so set the owner handler here as well.
+  */
+  primary_file= h_arg;
+  is_mrr_assoc=    !test(mode & HA_MRR_NO_ASSOCIATION);
+
+  strategy_exhausted= FALSE;
+  
+  /* By default, have do-nothing buffer manager */
+  buf_manager.arg= this;
+  buf_manager.reset_buffer_sizes= do_nothing;
+  buf_manager.redistribute_buffer_space= do_nothing;
+
+  if (mode & (HA_MRR_USE_DEFAULT_IMPL | HA_MRR_SORTED))
+    goto use_default_impl;
+  
+  /*
+    Determine whether we'll need to do key sorting and/or rnd_pos() scan
+  */
+  index_strategy= NULL;
+  if ((mode & HA_MRR_SINGLE_POINT) &&
+      optimizer_flag(thd, OPTIMIZER_SWITCH_MRR_SORT_KEYS))
+  {
+    do_sort_keys= TRUE;
+    index_strategy= &reader_factory.ordered_index_reader;
+  }
+  else
+    index_strategy= &reader_factory.simple_index_reader;
+
+  strategy= index_strategy;
+  /*
+    We don't need a rowid-to-rndpos step if
+     - We're doing a scan on clustered primary key
+     - [In the future] We're doing an index_only read
+  */
+  DBUG_ASSERT(primary_file->inited == handler::INDEX || 
+              (primary_file->inited == handler::RND && 
+               secondary_file && 
+               secondary_file->inited == handler::INDEX));
+
+  h_idx= (primary_file->inited == handler::INDEX)? primary_file: secondary_file;
+  keyno= h_idx->active_index;
+
+  if (!(keyno == table->s->primary_key && h_idx->primary_key_is_clustered()))
+  {
+    strategy= disk_strategy= &reader_factory.ordered_rndpos_reader;
+  }
+
+  full_buf= buf->buffer;
+  full_buf_end= buf->buffer_end;
+
+  if (do_sort_keys)
+  {
+    /* Pre-calculate some parameters of key sorting */
+    keypar.use_key_pointers= test(mode & HA_MRR_MATERIALIZED_KEYS);
+    seq_funcs->get_key_info(seq_init_param, &keypar.key_tuple_length, 
+                            &keypar.key_tuple_map);
+    keypar.key_size_in_keybuf= keypar.use_key_pointers? 
+                                 sizeof(char*) : keypar.key_tuple_length;
+    key_buff_elem_size= keypar.key_size_in_keybuf + (int)is_mrr_assoc * sizeof(void*);
+    
+    /* Ordered index reader needs some space to store an index tuple */
+    if (strategy != index_strategy)
+    {
+      uint saved_pk_length=0;
+      if (h_idx->primary_key_is_clustered())
+      {
+        uint pk= h_idx->get_table()->s->primary_key;
+        if (pk != MAX_KEY)
+          saved_pk_length= h_idx->get_table()->key_info[pk].key_length;
+      }
+      
+      KEY *used_index= &h_idx->get_table()->key_info[h_idx->active_index];
+      if (reader_factory.ordered_index_reader.
+            set_interruption_temp_buffer(primary_file->ref_length,
+                                         used_index->key_length,
+                                         saved_pk_length,
+                                         &full_buf, full_buf_end))
+        goto use_default_impl;
+    }
+    else
+      reader_factory.ordered_index_reader.set_no_interruption_temp_buffer();
+  }
+
+  if (strategy == index_strategy)
+  {
+    /* 
+      Index strategy alone handles the record retrieval. Give all buffer space
+      to it. Key buffer should have forward orientation so we can return the
+      end of it.
+    */
+    key_buffer= &forward_key_buf;
+    key_buffer->set_buffer_space(full_buf, full_buf_end);
+    
+    /* Safety: specify that rowid buffer has zero size: */
+    rowid_buffer.set_buffer_space(full_buf_end, full_buf_end);
+
+    if (do_sort_keys && !key_buffer->have_space_for(key_buff_elem_size))
+      goto use_default_impl;
+
+    if ((res= index_strategy->init(primary_file, seq_funcs, seq_init_param, n_ranges,
+                                   mode, &keypar, key_buffer, &buf_manager)))
+      goto error;
+  }
+  else
+  {
+    /* We'll have both index and rndpos strategies working together */
+    if (do_sort_keys)
+    {
+      /* Both strategies will need buffer space, share the buffer */
+      if (setup_buffer_sharing(keypar.key_size_in_keybuf, keypar.key_tuple_map))
+        goto use_default_impl;
+
+      buf_manager.reset_buffer_sizes= reset_buffer_sizes;
+      buf_manager.redistribute_buffer_space= redistribute_buffer_space;
+    }
+    else
+    {
+      /* index strategy doesn't need buffer, give all space to rowids*/
+      rowid_buffer.set_buffer_space(full_buf, full_buf_end);
+      if (!rowid_buffer.have_space_for(primary_file->ref_length + 
+                                       (int)is_mrr_assoc * sizeof(range_id_t)))
+        goto use_default_impl;
+    }
+
+    if ((res= setup_two_handlers()))
+      goto error;
+
+    if ((res= index_strategy->init(secondary_file, seq_funcs, seq_init_param,
+                                   n_ranges, mode, &keypar, key_buffer, 
+                                   &buf_manager)) || 
+        (res= disk_strategy->init(primary_file, index_strategy, mode, 
+                                  &rowid_buffer)))
+    {
+      goto error;
+    }
+  }
+  
+  /* 
+    At this point, we're sure that we're running a native MRR scan (i.e. we
+    didnt fall back to default implementation for some reason).
+  */
+  status_var_increment(thd->status_var.ha_mrr_init_count);
+
+  res= strategy->refill_buffer(TRUE);
+  if (res)
+  {
+    if (res != HA_ERR_END_OF_FILE)
+      goto error;
+    strategy_exhausted= TRUE;
+  }
+
+  /*
+    If we have scanned through all intervals in *seq, then adjust *buf to 
+    indicate that the remaining buffer space will not be used.
+  */
+//  if (dsmrr_eof) 
+//    buf->end_of_used_area= rowid_buffer.end_of_space();
+
+  
+  DBUG_RETURN(0);
+error:
+  close_second_handler();
+   /* Safety, not really needed but: */
+  strategy= NULL;
+  DBUG_RETURN(res);
+
+use_default_impl:
+  if (primary_file->inited != handler::INDEX)
+  {
+    /* We can get here when 
+       - we've previously successfully done a DS-MRR scan (and so have 
+         secondary_file!= NULL, secondary_file->inited= INDEX, 
+         primary_file->inited=RND)
+       - for this invocation, we haven't got enough buffer space, and so we
+         have to use the default MRR implementation.
+
+      note: primary_file->ha_index_end() will call dsmrr_close() which will
+      close/destroy the secondary_file, this is intentional. 
+      (Yes this is slow, but one can't expect performance with join buffer 
+       so small that it can accomodate one rowid and one index tuple)
+    */
+    if ((res= primary_file->ha_rnd_end()) || 
+        (res= primary_file->ha_index_init(keyno, test(mode & HA_MRR_SORTED))))
+    {
+      DBUG_RETURN(res);
+    }
+  }
+  /* Call correct init function and assign to top level object */
+  Mrr_simple_index_reader *s= &reader_factory.simple_index_reader;
+  res= s->init(primary_file, seq_funcs, seq_init_param, n_ranges, mode, NULL, 
+               NULL, NULL);
+  strategy= s;
+  DBUG_RETURN(res);
+}
+
+
+/*
+  Whatever the current state is, make it so that we have two handler objects:
+  - primary_file       -  initialized for rnd_pos() scan
+  - secondary_file     -  initialized for scanning the index specified in
+                          this->keyno
+  RETURN 
+    0        OK
+    HA_XXX   Error code
+*/
+
+int DsMrr_impl::setup_two_handlers()
+{
+  int res;
+  THD *thd= primary_file->get_table()->in_use;
+  DBUG_ENTER("DsMrr_impl::setup_two_handlers");
+  if (!secondary_file)
+  {
+    handler *new_h2;
+    Item *pushed_cond= NULL;
+    DBUG_ASSERT(primary_file->inited == handler::INDEX);
+    /* Create a separate handler object to do rnd_pos() calls. */
+    /*
+      ::clone() takes up a lot of stack, especially on 64 bit platforms.
+      The constant 5 is an empiric result.
+    */
+    if (check_stack_overrun(thd, 5*STACK_MIN_SIZE, (uchar*) &new_h2))
+      DBUG_RETURN(1);
+
+    /* Create a separate handler object to do rnd_pos() calls. */
+    if (!(new_h2= primary_file->clone(primary_file->get_table()->s->
+                                      normalized_path.str,
+                                      thd->mem_root)) || 
+        new_h2->ha_external_lock(thd, F_RDLCK))
+    {
+      delete new_h2;
+      DBUG_RETURN(1);
+    }
+
+    if (keyno == primary_file->pushed_idx_cond_keyno)
+      pushed_cond= primary_file->pushed_idx_cond;
+    
+    Mrr_reader *save_strategy= strategy;
+    strategy= NULL;
+    /*
+      Caution: this call will invoke this->dsmrr_close(). Do not put the
+      created secondary table handler new_h2 into this->secondary_file or it 
+      will delete it. Also, save the picked strategy
+    */
+    res= primary_file->ha_index_end();
+
+    strategy= save_strategy;
+    secondary_file= new_h2;
+
+    if (res || (res= (primary_file->ha_rnd_init(FALSE))))
+      goto error;
+
+    table->prepare_for_position();
+    secondary_file->extra(HA_EXTRA_KEYREAD);
+    secondary_file->mrr_iter= primary_file->mrr_iter;
+
+    if ((res= secondary_file->ha_index_init(keyno, FALSE)))
+      goto error;
+
+    if (pushed_cond)
+      secondary_file->idx_cond_push(keyno, pushed_cond);
+  }
+  else
+  {
+    DBUG_ASSERT(secondary_file && secondary_file->inited==handler::INDEX);
+    /* 
+      We get here when the access alternates betwen MRR scan(s) and non-MRR
+      scans.
+
+      Calling primary_file->index_end() will invoke dsmrr_close() for this object,
+      which will delete secondary_file. We need to keep it, so put it away and dont
+      let it be deleted:
+    */
+    if (primary_file->inited == handler::INDEX)
+    {
+      handler *save_h2= secondary_file;
+      Mrr_reader *save_strategy= strategy;
+      secondary_file= NULL;
+      strategy= NULL;
+      res= primary_file->ha_index_end();
+      secondary_file= save_h2;
+      strategy= save_strategy;
+      if (res)
+        goto error;
+    }
+    if ((primary_file->inited != handler::RND) && 
+        (res= primary_file->ha_rnd_init(FALSE)))
+      goto error;
+  }
+  DBUG_RETURN(0);
+
+error:
+  DBUG_RETURN(res);
+}
+
+
+void DsMrr_impl::close_second_handler()
+{
+  if (secondary_file)
+  {
+    secondary_file->ha_index_or_rnd_end();
+    secondary_file->ha_external_lock(current_thd, F_UNLCK);
+    secondary_file->ha_close();
+    delete secondary_file;
+    secondary_file= NULL;
+  }
+}
+
+
+void DsMrr_impl::dsmrr_close()
+{
+  DBUG_ENTER("DsMrr_impl::dsmrr_close");
+  close_second_handler();
+  strategy= NULL;
+  DBUG_VOID_RETURN;
+}
+
+
+/* 
+  my_qsort2-compatible static member function to compare key tuples 
+*/
+
+int Mrr_ordered_index_reader::compare_keys(void* arg, uchar* key1_arg, 
+                                           uchar* key2_arg)
+{
+  Mrr_ordered_index_reader *reader= (Mrr_ordered_index_reader*)arg;
+  TABLE *table= reader->file->get_table();
+  KEY_PART_INFO *part= table->key_info[reader->file->active_index].key_part;
+  uchar *key1, *key2;
+   
+  if (reader->keypar.use_key_pointers)
+  {
+    /* the buffer stores pointers to keys, get to the keys */
+    memcpy(&key1, key1_arg, sizeof(char*));
+    memcpy(&key2, key2_arg, sizeof(char*));
+  }
+  else
+  {
+    key1= key1_arg;
+    key2= key2_arg;
+  }
+
+  return key_tuple_cmp(part, key1, key2, reader->keypar.key_tuple_length);
+}
+
+
+int Mrr_ordered_index_reader::compare_keys_reverse(void* arg, uchar* key1, 
+                                                   uchar* key2)
+{
+  return -compare_keys(arg, key1, key2);
+}
+
+
+/**
+  Set the buffer space to be shared between rowid and key buffer
+
+  @return FALSE  ok 
+  @return TRUE   There is so little buffer space that we won't be able to use
+                 the strategy. 
+                 This happens when we don't have enough space for one rowid 
+                 element and one key element so this is mainly targeted at
+                 testing.
+*/
+
+bool DsMrr_impl::setup_buffer_sharing(uint key_size_in_keybuf, 
+                                      key_part_map key_tuple_map)
+{
+  long key_buff_elem_size= key_size_in_keybuf + 
+                           (int)is_mrr_assoc * sizeof(range_id_t);
+  
+  KEY *key_info= &primary_file->get_table()->key_info[keyno];
+  /* 
+    Ok if we got here we need to allocate one part of the buffer 
+    for keys and another part for rowids.
+  */
+  ulonglong rowid_buf_elem_size= primary_file->ref_length + 
+                                 (int)is_mrr_assoc * sizeof(range_id_t);
+  
+  /*
+    Use rec_per_key statistics as a basis to find out how many rowids 
+    we'll get for each key value.
+     TODO: what should be the default value to use when there is no 
+           statistics?
+  */
+  uint parts= my_count_bits(key_tuple_map);
+  ulong rpc;
+  ulonglong rowids_size= rowid_buf_elem_size;
+  if ((rpc= key_info->rec_per_key[parts - 1]))
+    rowids_size= rowid_buf_elem_size * rpc;
+
+  double fraction_for_rowids=
+    (ulonglong2double(rowids_size) / 
+     (ulonglong2double(rowids_size) + key_buff_elem_size));
+
+  ptrdiff_t bytes_for_rowids= 
+    (ptrdiff_t)floor(0.5 + fraction_for_rowids * (full_buf_end - full_buf));
+  
+  ptrdiff_t bytes_for_keys= (full_buf_end - full_buf) - bytes_for_rowids;
+
+  if (bytes_for_keys < key_buff_elem_size + 1)
+  {
+    ptrdiff_t add= key_buff_elem_size + 1 - bytes_for_keys;
+    bytes_for_keys= key_buff_elem_size + 1;
+    bytes_for_rowids -= add;
+  }
+
+  if (bytes_for_rowids < (ptrdiff_t)rowid_buf_elem_size + 1)
+  {
+    ptrdiff_t add= (ptrdiff_t)(rowid_buf_elem_size + 1 - bytes_for_rowids);
+    bytes_for_rowids= (ptrdiff_t)rowid_buf_elem_size + 1;
+    bytes_for_keys -= add;
+  }
+
+  rowid_buffer_end= full_buf + bytes_for_rowids;
+  rowid_buffer.set_buffer_space(full_buf, rowid_buffer_end);
+  key_buffer= &backward_key_buf;
+  key_buffer->set_buffer_space(rowid_buffer_end, full_buf_end); 
+
+  if (!key_buffer->have_space_for(key_buff_elem_size) ||
+      !rowid_buffer.have_space_for((size_t)rowid_buf_elem_size))
+    return TRUE; /* Failed to provide minimum space for one of the buffers */
+
+  return FALSE;
+}
+
+
+void DsMrr_impl::do_nothing(void *dsmrr_arg)
+{
+  /* Do nothing */
+}
+
+
+void DsMrr_impl::reset_buffer_sizes(void *dsmrr_arg)
+{
+  DsMrr_impl *dsmrr= (DsMrr_impl*)dsmrr_arg;
+  dsmrr->rowid_buffer.set_buffer_space(dsmrr->full_buf, 
+                                       dsmrr->rowid_buffer_end);
+  dsmrr->key_buffer->set_buffer_space(dsmrr->rowid_buffer_end, 
+                                      dsmrr->full_buf_end);
+}
+
+
+/*
+  Take unused space from the key buffer and give it to the rowid buffer
+*/
+
+void DsMrr_impl::redistribute_buffer_space(void *dsmrr_arg)
+{
+  DsMrr_impl *dsmrr= (DsMrr_impl*)dsmrr_arg;
+  uchar *unused_start, *unused_end;
+  dsmrr->key_buffer->remove_unused_space(&unused_start, &unused_end);
+  dsmrr->rowid_buffer.grow(unused_start, unused_end);
+}
+
+
+/*
+  @brief Initialize the iterator
+  
+  @note
+  Initialize the iterator to produce matches for the key of the first element 
+  in owner_arg->key_buffer
+
+  @retval  0                    OK
+  @retval  HA_ERR_END_OF_FILE   Either the owner->key_buffer is empty or 
+                                no matches for the key we've tried (check
+                                key_buffer->is_empty() to tell these apart)
+  @retval  other code           Fatal error
+*/
+
+int Key_value_records_iterator::init(Mrr_ordered_index_reader *owner_arg)
+{
+  int res;
+  owner= owner_arg;
+
+  identical_key_it.init(owner->key_buffer);
+  owner->key_buffer->setup_reading(owner->keypar.key_size_in_keybuf,
+                                   owner->is_mrr_assoc ? sizeof(void*) : 0);
+
+  if (identical_key_it.read())
+    return HA_ERR_END_OF_FILE;
+
+  uchar *key_in_buf= last_identical_key_ptr= identical_key_it.read_ptr1;
+
+  uchar *index_tuple= key_in_buf;
+  if (owner->keypar.use_key_pointers)
+    memcpy(&index_tuple, key_in_buf, sizeof(char*));
+  
+  /* Check out how many more identical keys are following */
+  while (!identical_key_it.read())
+  {
+    if (Mrr_ordered_index_reader::compare_keys(owner, key_in_buf, 
+                                               identical_key_it.read_ptr1))
+      break;
+    last_identical_key_ptr= identical_key_it.read_ptr1;
+  }
+  identical_key_it.init(owner->key_buffer);
+  res= owner->file->ha_index_read_map(owner->file->get_table()->record[0], 
+                                      index_tuple, 
+                                      owner->keypar.key_tuple_map, 
+                                      HA_READ_KEY_EXACT);
+
+  if (res)
+  {
+    /* Failed to find any matching records */
+    move_to_next_key_value();
+    return res;
+  }
+  owner->have_saved_rowid= FALSE;
+  get_next_row= FALSE;
+  return 0;
+}
+
+
+int Key_value_records_iterator::get_next(range_id_t *range_info)
+{
+  int res;
+
+  if (get_next_row)
+  {
+    if (owner->keypar.index_ranges_unique)
+    {
+      /* We're using a full unique key, no point to call index_next_same */
+      return HA_ERR_END_OF_FILE;
+    }
+    
+    handler *h= owner->file;
+    if ((res= h->ha_index_next_same(h->get_table()->record[0], 
+                                    identical_key_it.read_ptr1, 
+                                    owner->keypar.key_tuple_length)))
+    {
+      /* It's either HA_ERR_END_OF_FILE or some other error */
+      return res; 
+    }
+    identical_key_it.init(owner->key_buffer);
+    owner->have_saved_rowid= FALSE;
+    get_next_row= FALSE;
+  }
+
+  identical_key_it.read(); /* This gets us next range_id */
+  memcpy(range_info, identical_key_it.read_ptr2, sizeof(range_id_t));
+
+  if (!last_identical_key_ptr || 
+      (identical_key_it.read_ptr1 == last_identical_key_ptr))
+  {
+    /* 
+      We've reached the last of the identical keys that current record is a
+      match for.  Set get_next_row=TRUE so that we read the next index record
+      on the next call to this function.
+    */
+    get_next_row= TRUE;
+  }
+  return 0;
+}
+
+
+void Key_value_records_iterator::move_to_next_key_value()
+{
+  while (!owner->key_buffer->read() && 
+         (owner->key_buffer->read_ptr1 != last_identical_key_ptr)) {}
+}
+
+
+/**
+  DS-MRR implementation: multi_range_read_next() function.
+
+  Calling convention is like multi_range_read_next() has.
+*/
+
+int DsMrr_impl::dsmrr_next(range_id_t *range_info)
+{
+  int res;
+  if (strategy_exhausted)
+    return HA_ERR_END_OF_FILE;
+
+  while ((res= strategy->get_next(range_info)) == HA_ERR_END_OF_FILE)
+  {
+    if ((res= strategy->refill_buffer(FALSE)))
+      break; /* EOF or error */
+  }
+  return res;
+}
+
+
+/**
+  DS-MRR implementation: multi_range_read_info() function
+*/
+ha_rows DsMrr_impl::dsmrr_info(uint keyno, uint n_ranges, uint rows, 
+                               uint key_parts,
+                               uint *bufsz, uint *flags, COST_VECT *cost)
+{  
+  ha_rows res;
+  uint def_flags= *flags;
+  uint def_bufsz= *bufsz;
+
+  /* Get cost/flags/mem_usage of default MRR implementation */
+  res= primary_file->handler::multi_range_read_info(keyno, n_ranges, rows,
+                                                    key_parts, &def_bufsz, 
+                                                    &def_flags, cost);
+  DBUG_ASSERT(!res);
+
+  if ((*flags & HA_MRR_USE_DEFAULT_IMPL) || 
+      choose_mrr_impl(keyno, rows, flags, bufsz, cost))
+  {
+    /* Default implementation is choosen */
+    DBUG_PRINT("info", ("Default MRR implementation choosen"));
+    *flags= def_flags;
+    *bufsz= def_bufsz;
+  }
+  else
+  {
+    /* *flags and *bufsz were set by choose_mrr_impl */
+    DBUG_PRINT("info", ("DS-MRR implementation choosen"));
+  }
+  return 0;
+}
+
+
+/**
+  DS-MRR Implementation: multi_range_read_info_const() function
+*/
+
+ha_rows DsMrr_impl::dsmrr_info_const(uint keyno, RANGE_SEQ_IF *seq,
+                                 void *seq_init_param, uint n_ranges, 
+                                 uint *bufsz, uint *flags, COST_VECT *cost)
+{
+  ha_rows rows;
+  uint def_flags= *flags;
+  uint def_bufsz= *bufsz;
+  /* Get cost/flags/mem_usage of default MRR implementation */
+  rows= primary_file->handler::multi_range_read_info_const(keyno, seq, 
+                                                           seq_init_param,
+                                                           n_ranges, 
+                                                           &def_bufsz, 
+                                                           &def_flags, cost);
+  if (rows == HA_POS_ERROR)
+  {
+    /* Default implementation can't perform MRR scan => we can't either */
+    return rows;
+  }
+
+  /*
+    If HA_MRR_USE_DEFAULT_IMPL has been passed to us, that is an order to
+    use the default MRR implementation (we need it for UPDATE/DELETE).
+    Otherwise, make a choice based on cost and @@optimizer_switch settings
+  */
+  if ((*flags & HA_MRR_USE_DEFAULT_IMPL) ||
+      choose_mrr_impl(keyno, rows, flags, bufsz, cost))
+  {
+    DBUG_PRINT("info", ("Default MRR implementation choosen"));
+    *flags= def_flags;
+    *bufsz= def_bufsz;
+  }
+  else
+  {
+    /* *flags and *bufsz were set by choose_mrr_impl */
+    DBUG_PRINT("info", ("DS-MRR implementation choosen"));
+  }
+  return rows;
+}
+
+
+/**
+  Check if key has partially-covered columns
+
+  We can't use DS-MRR to perform range scans when the ranges are over
+  partially-covered keys, because we'll not have full key part values
+  (we'll have their prefixes from the index) and will not be able to check
+  if we've reached the end the range.
+
+  @param keyno  Key to check
+
+  @todo
+    Allow use of DS-MRR in cases where the index has partially-covered
+    components but they are not used for scanning.
+
+  @retval TRUE   Yes
+  @retval FALSE  No
+*/
+
+bool key_uses_partial_cols(TABLE *table, uint keyno)
+{
+  KEY_PART_INFO *kp= table->key_info[keyno].key_part;
+  KEY_PART_INFO *kp_end= kp + table->key_info[keyno].key_parts;
+  for (; kp != kp_end; kp++)
+  {
+    if (!kp->field->part_of_key.is_set(keyno))
+      return TRUE;
+  }
+  return FALSE;
+}
+
+
+/*
+  Check if key/flags allow DS-MRR/CPK strategy to be used
+  
+  @param thd
+  @param keyno      Index that will be used
+  @param  mrr_flags  
+  
+  @retval TRUE   DS-MRR/CPK should be used
+  @retval FALSE  Otherwise
+*/
+
+bool DsMrr_impl::check_cpk_scan(THD *thd, uint keyno, uint mrr_flags)
+{
+  return test((mrr_flags & HA_MRR_SINGLE_POINT) &&
+              keyno == table->s->primary_key && 
+              primary_file->primary_key_is_clustered() && 
+              optimizer_flag(thd, OPTIMIZER_SWITCH_MRR_SORT_KEYS));
+}
+
+
+/*
+  DS-MRR Internals: Choose between Default MRR implementation and DS-MRR
+
+  Make the choice between using Default MRR implementation and DS-MRR.
+  This function contains common functionality factored out of dsmrr_info()
+  and dsmrr_info_const(). The function assumes that the default MRR
+  implementation's applicability requirements are satisfied.
+
+  @param keyno       Index number
+  @param rows        E(full rows to be retrieved)
+  @param flags  IN   MRR flags provided by the MRR user
+                OUT  If DS-MRR is choosen, flags of DS-MRR implementation
+                     else the value is not modified
+  @param bufsz  IN   If DS-MRR is choosen, buffer use of DS-MRR implementation
+                     else the value is not modified
+  @param cost   IN   Cost of default MRR implementation
+                OUT  If DS-MRR is choosen, cost of DS-MRR scan
+                     else the value is not modified
+
+  @retval TRUE   Default MRR implementation should be used
+  @retval FALSE  DS-MRR implementation should be used
+*/
+
+
+bool DsMrr_impl::choose_mrr_impl(uint keyno, ha_rows rows, uint *flags,
+                                 uint *bufsz, COST_VECT *cost)
+{
+  COST_VECT dsmrr_cost;
+  bool res;
+  THD *thd= current_thd;
+
+  bool doing_cpk_scan= check_cpk_scan(thd, keyno, *flags); 
+  bool using_cpk= test(keyno == table->s->primary_key &&
+                       primary_file->primary_key_is_clustered());
+  *flags &= ~HA_MRR_IMPLEMENTATION_FLAGS;
+  if (!optimizer_flag(thd, OPTIMIZER_SWITCH_MRR) ||
+      *flags & HA_MRR_INDEX_ONLY ||
+      (using_cpk && !doing_cpk_scan) || key_uses_partial_cols(table, keyno))
+  {
+    /* Use the default implementation */
+    *flags |= HA_MRR_USE_DEFAULT_IMPL;
+    *flags &= ~HA_MRR_IMPLEMENTATION_FLAGS;
+    return TRUE;
+  }
+
+  uint add_len= table->key_info[keyno].key_length + primary_file->ref_length; 
+  *bufsz -= add_len;
+  if (get_disk_sweep_mrr_cost(keyno, rows, *flags, bufsz, &dsmrr_cost))
+    return TRUE;
+  *bufsz += add_len;
+  
+  bool force_dsmrr;
+  /* 
+    If mrr_cost_based flag is not set, then set cost of DS-MRR to be minimum of
+    DS-MRR and Default implementations cost. This allows one to force use of
+    DS-MRR whenever it is applicable without affecting other cost-based
+    choices.
+  */
+  if ((force_dsmrr= !optimizer_flag(thd, OPTIMIZER_SWITCH_MRR_COST_BASED)) &&
+      dsmrr_cost.total_cost() > cost->total_cost())
+    dsmrr_cost= *cost;
+
+  if (force_dsmrr || dsmrr_cost.total_cost() <= cost->total_cost())
+  {
+    *flags &= ~HA_MRR_USE_DEFAULT_IMPL;  /* Use the DS-MRR implementation */
+    *flags &= ~HA_MRR_SORTED;          /* We will return unordered output */
+    *cost= dsmrr_cost;
+    res= FALSE;
+
+
+    if ((using_cpk && doing_cpk_scan) ||
+        (optimizer_flag(thd, OPTIMIZER_SWITCH_MRR_SORT_KEYS) &&
+         *flags & HA_MRR_SINGLE_POINT))
+    {
+      *flags |= DSMRR_IMPL_SORT_KEYS;
+    }
+    
+    if (!(using_cpk && doing_cpk_scan) &&
+        !(*flags & HA_MRR_INDEX_ONLY))
+    {
+      *flags |= DSMRR_IMPL_SORT_ROWIDS;
+    }
+    /*
+    if ((*flags & HA_MRR_SINGLE_POINT) && 
+         optimizer_flag(thd, OPTIMIZER_SWITCH_MRR_SORT_KEYS))
+      *flags |= HA_MRR_MATERIALIZED_KEYS;
+    */
+  }
+  else
+  {
+    /* Use the default MRR implementation */
+    res= TRUE;
+  }
+  return res;
+}
+
+/*
+  Take the flags we've returned previously and print one of
+  - Key-ordered scan
+  - Rowid-ordered scan
+  - Key-ordered Rowid-ordered scan
+*/
+
+int DsMrr_impl::dsmrr_explain_info(uint mrr_mode, char *str, size_t size)
+{
+  const char *key_ordered=   "Key-ordered scan";
+  const char *rowid_ordered= "Rowid-ordered scan";
+  const char *both_ordered=  "Key-ordered Rowid-ordered scan";
+  const char *used_str="";
+  const uint BOTH_FLAGS= (DSMRR_IMPL_SORT_KEYS | DSMRR_IMPL_SORT_ROWIDS);
+
+  if (!(mrr_mode & HA_MRR_USE_DEFAULT_IMPL))
+  {
+    if ((mrr_mode & BOTH_FLAGS) == BOTH_FLAGS)
+      used_str= both_ordered;
+    else if (mrr_mode & DSMRR_IMPL_SORT_KEYS)
+      used_str= key_ordered;
+    else if (mrr_mode & DSMRR_IMPL_SORT_ROWIDS)
+      used_str= rowid_ordered;
+
+    uint used_str_len= strlen(used_str);
+    uint copy_len= min(used_str_len, size);
+    memcpy(str, used_str, size);
+    return copy_len;
+  }
+  return 0;
+}
+
+
+static void get_sort_and_sweep_cost(TABLE *table, ha_rows nrows, COST_VECT *cost);
+
+
+/**
+  Get cost of DS-MRR scan
+
+  @param keynr              Index to be used
+  @param rows               E(Number of rows to be scanned)
+  @param flags              Scan parameters (HA_MRR_* flags)
+  @param buffer_size INOUT  Buffer size
+  @param cost        OUT    The cost
+
+  @retval FALSE  OK
+  @retval TRUE   Error, DS-MRR cannot be used (the buffer is too small
+                 for even 1 rowid)
+*/
+
+bool DsMrr_impl::get_disk_sweep_mrr_cost(uint keynr, ha_rows rows, uint flags,
+                                         uint *buffer_size, COST_VECT *cost)
+{
+  ulong max_buff_entries, elem_size;
+  ha_rows rows_in_full_step;
+  ha_rows rows_in_last_step;
+  uint n_full_steps;
+  double index_read_cost;
+
+  elem_size= primary_file->ref_length + 
+             sizeof(void*) * (!test(flags & HA_MRR_NO_ASSOCIATION));
+  max_buff_entries = *buffer_size / elem_size;
+
+  if (!max_buff_entries)
+    return TRUE; /* Buffer has not enough space for even 1 rowid */
+
+  /* Number of iterations we'll make with full buffer */
+  n_full_steps= (uint)floor(rows2double(rows) / max_buff_entries);
+  
+  /* 
+    Get numbers of rows we'll be processing in 
+     - non-last sweep, with full buffer 
+     - last iteration, with non-full buffer
+  */
+  rows_in_full_step= max_buff_entries;
+  rows_in_last_step= rows % max_buff_entries;
+  
+  /* Adjust buffer size if we expect to use only part of the buffer */
+  if (n_full_steps)
+  {
+    get_sort_and_sweep_cost(table, rows_in_full_step, cost);
+    cost->multiply(n_full_steps);
+  }
+  else
+  {
+    cost->zero();
+    *buffer_size= max(*buffer_size, 
+                      (size_t)(1.2*rows_in_last_step) * elem_size + 
+                      primary_file->ref_length + table->key_info[keynr].key_length);
+  }
+  
+  COST_VECT last_step_cost;
+  get_sort_and_sweep_cost(table, rows_in_last_step, &last_step_cost);
+  cost->add(&last_step_cost);
+ 
+  if (n_full_steps != 0)
+    cost->mem_cost= *buffer_size;
+  else
+    cost->mem_cost= (double)rows_in_last_step * elem_size;
+  
+  /* Total cost of all index accesses */
+  index_read_cost= primary_file->keyread_time(keynr, 1, rows);
+  cost->add_io(index_read_cost, 1 /* Random seeks */);
+  return FALSE;
+}
+
+
+/* 
+  Get cost of one sort-and-sweep step
+  
+  It consists of two parts:
+   - sort an array of #nrows ROWIDs using qsort
+   - read #nrows records from table in a sweep.
+
+  @param table       Table being accessed
+  @param nrows       Number of rows to be sorted and retrieved
+  @param cost   OUT  The cost of scan
+*/
+
+static 
+void get_sort_and_sweep_cost(TABLE *table, ha_rows nrows, COST_VECT *cost)
+{
+  if (nrows)
+  {
+    get_sweep_read_cost(table, nrows, FALSE, cost);
+    /* Add cost of qsort call: n * log2(n) * cost(rowid_comparison) */
+    double cmp_op= rows2double(nrows) * (1.0 / TIME_FOR_COMPARE_ROWID);
+    if (cmp_op < 3)
+      cmp_op= 3;
+    cost->cpu_cost += cmp_op * log2(cmp_op);
+  }
+  else
+    cost->zero();
+}
+
+
+/**
+  Get cost of reading nrows table records in a "disk sweep"
+
+  A disk sweep read is a sequence of handler->rnd_pos(rowid) calls that made
+  for an ordered sequence of rowids.
+
+  We assume hard disk IO. The read is performed as follows:
+
+   1. The disk head is moved to the needed cylinder
+   2. The controller waits for the plate to rotate
+   3. The data is transferred
+
+  Time to do #3 is insignificant compared to #2+#1.
+
+  Time to move the disk head is proportional to head travel distance.
+
+  Time to wait for the plate to rotate depends on whether the disk head
+  was moved or not. 
+
+  If disk head wasn't moved, the wait time is proportional to distance
+  between the previous block and the block we're reading.
+
+  If the head was moved, we don't know how much we'll need to wait for the
+  plate to rotate. We assume the wait time to be a variate with a mean of
+  0.5 of full rotation time.
+
+  Our cost units are "random disk seeks". The cost of random disk seek is
+  actually not a constant, it depends one range of cylinders we're going
+  to access. We make it constant by introducing a fuzzy concept of "typical 
+  datafile length" (it's fuzzy as it's hard to tell whether it should
+  include index file, temp.tables etc). Then random seek cost is:
+
+    1 = half_rotation_cost + move_cost * 1/3 * typical_data_file_length
+
+  We define half_rotation_cost as DISK_SEEK_BASE_COST=0.9.
+
+  @param table             Table to be accessed
+  @param nrows             Number of rows to retrieve
+  @param interrupted       TRUE <=> Assume that the disk sweep will be
+                           interrupted by other disk IO. FALSE - otherwise.
+  @param cost         OUT  The cost.
+*/
+
+void get_sweep_read_cost(TABLE *table, ha_rows nrows, bool interrupted, 
+                         COST_VECT *cost)
+{
+  DBUG_ENTER("get_sweep_read_cost");
+
+  cost->zero();
+  if (table->file->primary_key_is_clustered())
+  {
+    cost->io_count= table->file->read_time(table->s->primary_key,
+                                           (uint) nrows, nrows);
+  }
+  else
+  {
+    double n_blocks=
+      ceil(ulonglong2double(table->file->stats.data_file_length) / IO_SIZE);
+    double busy_blocks=
+      n_blocks * (1.0 - pow(1.0 - 1.0/n_blocks, rows2double(nrows)));
+    if (busy_blocks < 1.0)
+      busy_blocks= 1.0;
+
+    DBUG_PRINT("info",("sweep: nblocks=%g, busy_blocks=%g", n_blocks,
+                       busy_blocks));
+    cost->io_count= busy_blocks;
+
+    if (!interrupted)
+    {
+      /* Assume reading is done in one 'sweep' */
+      cost->avg_io_cost= (DISK_SEEK_BASE_COST +
+                          DISK_SEEK_PROP_COST*n_blocks/busy_blocks);
+    }
+  }
+  DBUG_PRINT("info",("returning cost=%g", cost->total_cost()));
+  DBUG_VOID_RETURN;
+}
+
+
+/* **************************************************************************
+ * DS-MRR implementation ends
+ ***************************************************************************/
+
+
diff --git a/sql/multi_range_read.h b/sql/multi_range_read.h
new file mode 100644
index 00000000000..1b72e71944d
--- /dev/null
+++ b/sql/multi_range_read.h
@@ -0,0 +1,637 @@
+/**
+  @defgroup DS-MRR declarations
+  @{
+*/
+
+/**
+  A Disk-Sweep implementation of MRR Interface (DS-MRR for short)
+
+  This is a "plugin"(*) for storage engines that allows to
+    1. When doing index scans, read table rows in rowid order;
+    2. when making many index lookups, do them in key order and don't
+       lookup the same key value multiple times;
+    3. Do both #1 and #2, when applicable.
+  These changes are expected to speed up query execution for disk-based 
+  storage engines running io-bound loads and "big" queries (ie. queries that
+  do joins and enumerate lots of records).
+
+  (*) - only conceptually. No dynamic loading or binary compatibility of any
+        kind.
+
+  General scheme of things:
+   
+      SQL Layer code
+       |   |   |
+       v   v   v 
+      -|---|---|---- handler->multi_range_read_XXX() function calls
+       |   |   |
+      _____________________________________
+     / DS-MRR module                       \
+     | (order/de-duplicate lookup keys,    |
+     | scan indexes in key order,          |
+     | order/de-duplicate rowids,          |
+     | retrieve full record reads in rowid |
+     | order)                              |
+     \_____________________________________/
+       |   |   |
+      -|---|---|----- handler->read_range_first()/read_range_next(), 
+       |   |   |      handler->index_read(), handler->rnd_pos() calls.
+       |   |   |
+       v   v   v
+      Storage engine internals
+
+
+  Currently DS-MRR is used by MyISAM, InnoDB/XtraDB and Maria storage engines.
+  Potentially it can be used with any table handler that has disk-based data
+  storage and has better performance when reading data in rowid order.
+*/
+
+#include "sql_lifo_buffer.h"
+
+class DsMrr_impl;
+class Mrr_ordered_index_reader;
+
+
+/* A structure with key parameters that's shared among several classes */
+class Key_parameters
+{
+public:
+  uint         key_tuple_length; /* Length of index lookup tuple, in bytes */
+  key_part_map key_tuple_map;    /* keyparts used in index lookup tuples */
+
+  /*
+    This is 
+      = key_tuple_length   if we copy keys to buffer
+      = sizeof(void*)      if we're using pointers to materialized keys.
+  */
+  uint key_size_in_keybuf;
+
+  /* TRUE <=> don't copy key values, use pointers to them instead.  */
+  bool use_key_pointers;
+
+  /* TRUE <=> We can get at most one index tuple for a lookup key */
+  bool index_ranges_unique;
+};
+
+
+/**
+  A class to enumerate (record, range_id) pairs that match given key value.
+  
+  @note
+
+  The idea is that we have a Lifo_buffer which holds (key, range_id) pairs
+  ordered by key value. From the front of the buffer we see
+
+    (key_val1, range_id1), (key_val1, range_id2) ... (key_val2, range_idN)
+
+  we take the first elements that have the same key value (key_val1 in the
+  example above), and make lookup into the table.  The table will have 
+  multiple matches for key_val1:
+ 
+                  == Table Index ==
+                   ...
+     key_val1 ->  key_val1, index_tuple1
+                  key_val1, index_tuple2
+                   ...
+                  key_val1, index_tupleN
+                   ...
+  
+  Our goal is to produce all possible combinations, i.e. we need:
+  
+    {(key_val1, index_tuple1), range_id1}
+    {(key_val1, index_tuple1), range_id2}
+       ...           ...               |
+    {(key_val1, index_tuple1), range_idN},
+                  
+    {(key_val1, index_tuple2), range_id1}
+    {(key_val1, index_tuple2), range_id2}
+        ...          ...               |
+    {(key_val1, index_tuple2), range_idN},
+
+        ...          ...          ...                          
+
+    {(key_val1, index_tupleK), range_idN}
+*/
+
+class Key_value_records_iterator
+{
+  /* Use this to get table handler, key buffer and other parameters */
+  Mrr_ordered_index_reader *owner;
+
+  /* Iterator to get (key, range_id) pairs from */
+  Lifo_buffer_iterator identical_key_it;
+  
+  /* 
+    Last of the identical key values (when we get this pointer from
+    identical_key_it, it will be time to stop).
+  */
+  uchar *last_identical_key_ptr;
+
+  /*
+    FALSE <=> we're right after the init() call, the record has been already
+    read with owner->file->index_read_map() call
+  */
+  bool get_next_row;
+  
+public:
+  int init(Mrr_ordered_index_reader *owner_arg);
+  int get_next(range_id_t *range_info);
+  void move_to_next_key_value();
+};
+
+
+/*
+  Buffer manager interface. Mrr_reader objects use it to inqure DsMrr_impl
+  to manage buffer space for them.
+*/
+typedef struct st_buffer_manager
+{
+public:
+  /* Opaque value to be passed as the first argument to all member functions */
+  void *arg;
+  
+  /*
+    This is called when we've freed more space from the rowid buffer. The
+    callee will get the unused space from the rowid buffer and give it to the
+    key buffer.
+  */
+  void (*redistribute_buffer_space)(void *arg);
+
+  /* 
+    This is called when both key and rowid buffers are empty, and so it's time 
+    to reset them to their original size (They've lost their original size,
+    because we were dynamically growing rowid buffer and shrinking key buffer).
+  */
+  void (*reset_buffer_sizes)(void *arg);
+
+} Buffer_manager;
+
+
+/* 
+  Mrr_reader - DS-MRR execution strategy abstraction
+
+  A reader produces ([index]_record, range_info) pairs, and requires periodic
+  refill operations.
+
+  - one starts using the reader by calling reader->get_next(),
+  - when a get_next() call returns HA_ERR_END_OF_FILE, one must call 
+    refill_buffer() before they can make more get_next() calls.
+  - when refill_buffer() returns HA_ERR_END_OF_FILE, this means the real
+    end of stream and get_next() should not be called anymore.
+
+  Both functions can return other error codes, these mean unrecoverable errors
+  after which one cannot continue.
+*/
+
+class Mrr_reader 
+{
+public:
+  virtual int get_next(range_id_t *range_info) = 0;
+  virtual int refill_buffer(bool initial) = 0;
+  virtual ~Mrr_reader() {}; /* just to remove compiler warning */
+};
+
+
+/* 
+  A common base for readers that do index scans and produce index tuples 
+*/
+
+class Mrr_index_reader : public Mrr_reader
+{
+protected:
+  handler *file; /* Handler object to use */
+public:
+  virtual int init(handler *h_arg, RANGE_SEQ_IF *seq_funcs, 
+                   void *seq_init_param, uint n_ranges,
+                   uint mode, Key_parameters *key_par, 
+                   Lifo_buffer *key_buffer, 
+                   Buffer_manager *buf_manager_arg) = 0;
+
+  /* Get pointer to place where every get_next() call will put rowid */
+  virtual uchar *get_rowid_ptr() = 0;
+  /* Get the rowid (call this after get_next() call) */
+  virtual void position();
+  virtual bool skip_record(range_id_t range_id, uchar *rowid) = 0;
+
+  virtual void interrupt_read() {}
+  virtual void resume_read() {}
+};
+
+
+/*
+  A "bypass" index reader that just does and index scan. The index scan is done 
+  by calling default MRR implementation (i.e.  handler::multi_range_read_XXX())
+  functions.
+*/
+
+class Mrr_simple_index_reader : public Mrr_index_reader
+{
+public:
+  int init(handler *h_arg, RANGE_SEQ_IF *seq_funcs,
+           void *seq_init_param, uint n_ranges,
+           uint mode, Key_parameters *key_par,
+           Lifo_buffer *key_buffer,
+           Buffer_manager *buf_manager_arg);
+  int get_next(range_id_t *range_info);
+  int refill_buffer(bool initial) { return initial? 0: HA_ERR_END_OF_FILE; }
+  uchar *get_rowid_ptr() { return file->ref; }
+  bool skip_record(range_id_t range_id, uchar *rowid)
+  {
+    return (file->mrr_funcs.skip_record &&
+            file->mrr_funcs.skip_record(file->mrr_iter, range_id, rowid));
+  }
+};
+
+
+/* 
+  A reader that sorts the key values before it makes the index lookups.
+*/
+
+class Mrr_ordered_index_reader : public Mrr_index_reader
+{
+public:
+  int init(handler *h_arg, RANGE_SEQ_IF *seq_funcs, 
+           void *seq_init_param, uint n_ranges,
+           uint mode, Key_parameters *key_par,
+           Lifo_buffer *key_buffer,
+           Buffer_manager *buf_manager_arg);
+  int get_next(range_id_t *range_info);
+  int refill_buffer(bool initial);
+  uchar *get_rowid_ptr() { return file->ref; }
+  
+  bool skip_record(range_id_t range_info, uchar *rowid)
+  {
+    return (mrr_funcs.skip_record &&
+            mrr_funcs.skip_record(mrr_iter, range_info, rowid));
+  }
+
+  bool skip_index_tuple(range_id_t range_info)
+  {
+    return (mrr_funcs.skip_index_tuple &&
+            mrr_funcs.skip_index_tuple(mrr_iter, range_info));
+  }
+  
+  bool set_interruption_temp_buffer(uint rowid_length, uint key_len, 
+                                    uint saved_pk_len,
+                                    uchar **space_start, uchar *space_end);
+  void set_no_interruption_temp_buffer();
+
+  void interrupt_read();
+  void resume_read();
+  void position();
+private:
+  Key_value_records_iterator kv_it;
+
+  bool scanning_key_val_iter;
+  
+  /* Buffer to store (key, range_id) pairs */
+  Lifo_buffer *key_buffer;
+  
+  /* This manages key buffer allocation and sizing for us */
+  Buffer_manager *buf_manager;
+
+  Key_parameters  keypar; /* index scan and lookup tuple parameters */
+
+  /* TRUE <=> need range association, buffers hold {rowid, range_id} pairs */
+  bool is_mrr_assoc;
+  
+  /* Range sequence iteration members */
+  RANGE_SEQ_IF mrr_funcs;
+  range_seq_t mrr_iter;
+  
+  /* TRUE == reached eof when enumerating ranges */
+  bool source_exhausted;
+   
+  /* 
+    Following members are for interrupt_read()/resume_read(). The idea is that 
+    in some cases index scan that is done by this object is interrupted by
+    rnd_pos() calls made by Mrr_ordered_rndpos_reader. The problem is that
+    we're sharing handler->record[0] with that object, and it destroys its
+    contents.
+    We need to save/restore our current
+    - index tuple (for pushed index condition checks)
+    - clustered primary key values (again, for pushed index condition checks)
+    - rowid of the last record we've retrieved (in case this rowid matches
+      multiple ranges and we'll need to return it again)
+  */ 
+  bool support_scan_interruptions;
+  /* Space where we save the rowid of the last record we've returned */
+  uchar *saved_rowid;
+  
+  /* TRUE <=> saved_rowid has the last saved rowid */
+  bool have_saved_rowid;
+  
+  uchar *saved_key_tuple; /* Saved current key tuple */
+  uchar *saved_primary_key; /* Saved current primary key tuple */
+
+  static int compare_keys(void* arg, uchar* key1, uchar* key2);
+  static int compare_keys_reverse(void* arg, uchar* key1, uchar* key2);
+  
+  friend class Key_value_records_iterator; 
+  friend class DsMrr_impl;
+  friend class Mrr_ordered_rndpos_reader;
+};
+
+
+/* 
+  A reader that gets rowids from an Mrr_index_reader, and then sorts them 
+  before getting full records with handler->rndpos() calls.
+*/
+
+class Mrr_ordered_rndpos_reader : public Mrr_reader 
+{
+public:
+  int init(handler *file, Mrr_index_reader *index_reader, uint mode,
+           Lifo_buffer *buf);
+  int get_next(range_id_t *range_info);
+  int refill_buffer(bool initial);
+private:
+  handler *file; /* Handler to use */
+  
+  /* This what we get (rowid, range_info) pairs from */
+  Mrr_index_reader *index_reader;
+
+  /* index_reader->get_next() puts rowid here */
+  uchar *index_rowid;
+  
+  /* TRUE <=> index_reader->refill_buffer() call has returned EOF */
+  bool index_reader_exhausted;
+  
+  /* 
+    TRUE <=> We should call index_reader->refill_buffer(). This happens if
+    1. we've made index_reader->get_next() call which returned EOF
+    2. we haven't made any index_reader calls (and our first call should 
+       be index_reader->refill_buffer(initial=TRUE)
+  */
+  bool index_reader_needs_refill;
+
+  /* TRUE <=> need range association, buffers hold {rowid, range_id} pairs */
+  bool is_mrr_assoc;
+  
+  /* 
+    When reading from ordered rowid buffer: the rowid element of the last
+    buffer element that has rowid identical to this one.
+  */
+  uchar *last_identical_rowid;
+
+  /* Buffer to store (rowid, range_id) pairs */
+  Lifo_buffer *rowid_buffer;
+  
+  int refill_from_index_reader();
+};
+
+
+/*
+  A primitive "factory" of various Mrr_*_reader classes (the point is to 
+  get various kinds of readers without having to allocate them on the heap)
+*/
+
+class Mrr_reader_factory
+{
+public:
+  Mrr_ordered_rndpos_reader ordered_rndpos_reader;
+  Mrr_ordered_index_reader  ordered_index_reader;
+  Mrr_simple_index_reader   simple_index_reader;
+};
+
+
+#define DSMRR_IMPL_SORT_KEYS   HA_MRR_IMPLEMENTATION_FLAG1
+#define DSMRR_IMPL_SORT_ROWIDS HA_MRR_IMPLEMENTATION_FLAG2
+
+/*
+  DS-MRR implementation for one table. Create/use one object of this class for
+  each ha_{myisam/innobase/etc} object. That object will be further referred to
+  as "the handler"
+
+  DsMrr_impl supports has the following execution strategies:
+
+  - Bypass DS-MRR, pass all calls to default MRR implementation, which is 
+    an MRR-to-non-MRR call converter.
+  - Key-Ordered Retrieval
+  - Rowid-Ordered Retrieval
+
+  DsMrr_impl will use one of the above strategies, or a combination of them, 
+  according to the following diagram:
+
+         (mrr function calls)
+                |
+                +----------------->-----------------+
+                |                                   |
+     ___________v______________      _______________v________________
+    / default: use lookup keys \    / KEY-ORDERED RETRIEVAL:         \
+    | (or ranges) in whatever  |    | sort lookup keys and then make | 
+    | order they are supplied  |    | index lookups in index order   |
+    \__________________________/    \________________________________/
+              | |  |                           |    |
+      +---<---+ |  +--------------->-----------|----+
+      |         |                              |    |
+      |         |              +---------------+    |
+      |   ______v___ ______    |     _______________v_______________
+      |  / default: read   \   |    / ROWID-ORDERED RETRIEVAL:      \
+      |  | table records   |   |    | Before reading table records, |
+      v  | in random order |   v    | sort their rowids and then    |
+      |  \_________________/   |    | read them in rowid order      |
+      |         |              |    \_______________________________/
+      |         |              |                    |
+      |         |              |                    |
+      +-->---+  |  +----<------+-----------<--------+
+             |  |  |                                
+             v  v  v
+      (table records and range_ids)
+
+  The choice of strategy depends on MRR scan properties, table properties
+  (whether we're scanning clustered primary key), and @@optimizer_switch
+  settings.
+  
+  Key-Ordered Retrieval
+  ---------------------
+  The idea is: if MRR scan is essentially a series of lookups on 
+   
+    tbl.key=value1 OR tbl.key=value2 OR ... OR tbl.key=valueN
+  
+  then it makes sense to collect and order the set of lookup values, i.e.
+   
+     sort(value1, value2, .. valueN)
+
+  and then do index lookups in index order. This results in fewer index page
+  fetch operations, and we also can avoid making multiple index lookups for the
+  same value. That is, if value1=valueN we can easily discover that after
+  sorting and make one index lookup for them instead of two.
+
+  Rowid-Ordered Retrieval
+  -----------------------
+  If we do a regular index scan or a series of index lookups, we'll be hitting
+  table records at random. For disk-based engines, this is much slower than 
+  reading the same records in disk order. We assume that disk ordering of
+  rows is the same as ordering of their rowids (which is provided by 
+  handler::cmp_ref())
+  In order to retrieve records in different order, we must separate index
+  scanning and record fetching, that is, MRR scan uses the following steps:
+
+    1. Scan the index (and only index, that is, with HA_EXTRA_KEYREAD on) and 
+        fill a buffer with {rowid, range_id} pairs
+    2. Sort the buffer by rowid value
+    3. for each {rowid, range_id} pair in the buffer
+         get record by rowid and return the {record, range_id} pair
+    4. Repeat the above steps until we've exhausted the list of ranges we're
+       scanning.
+
+  Buffer space management considerations
+  --------------------------------------
+  With regards to buffer/memory management, MRR interface specifies that 
+   - SQL layer provides multi_range_read_init() with buffer of certain size.
+   - MRR implementation may use (i.e. have at its disposal till the end of 
+     the MRR scan) all of the buffer, or return the unused end of the buffer 
+     to SQL layer.
+
+  DS-MRR needs buffer in order to accumulate and sort rowids and/or keys. When
+  we need to accumulate/sort only keys (or only rowids), it is fairly trivial.
+
+  When we need to accumulate/sort both keys and rowids, efficient buffer use
+  gets complicated. We need to:
+   - First, accumulate keys and sort them
+   - Then use the keys (smaller values go first) to obtain rowids. A key is not
+     needed after we've got matching rowids for it.
+   - Make sure that rowids are accumulated at the front of the buffer, so that we
+     can return the end part of the buffer to SQL layer, should there be too
+     few rowid values to occupy the buffer.
+
+  All of these goals are achieved by using the following scheme:
+
+     |                    |   We get an empty buffer from SQL layer.   
+
+     |                  *-|    
+     |               *----|   First, we fill the buffer with keys. Key_buffer
+     |            *-------|   part grows from end of the buffer space to start
+     |         *----------|   (In this picture, the buffer is big enough to
+     |      *-------------|    accomodate all keys and even have some space left)
+
+     |      *=============|   We want to do key-ordered index scan, so we sort
+                              the keys
+
+     |-x      *===========|   Then we use the keys get rowids. Rowids are 
+     |----x      *========|   stored from start of buffer space towards the end.
+     |--------x     *=====|   The part of the buffer occupied with keys
+     |------------x   *===|   gradually frees up space for rowids. In this
+     |--------------x   *=|   picture we run out of keys before we've ran out
+     |----------------x   |   of buffer space (it can be other way as well).
+
+     |================x   |   Then we sort the rowids.
+                     
+     |                |~~~|   The unused part of the buffer is at the end, so
+                              we can return it to the SQL layer.
+
+     |================*       Sorted rowids are then used to read table records 
+                              in disk order
+
+*/
+
+class DsMrr_impl
+{
+public:
+  typedef void (handler::*range_check_toggle_func_t)(bool on);
+
+  DsMrr_impl()
+    : secondary_file(NULL) {};
+  
+  void init(handler *h_arg, TABLE *table_arg)
+  {
+    primary_file= h_arg; 
+    table= table_arg;
+  }
+  int dsmrr_init(handler *h_arg, RANGE_SEQ_IF *seq_funcs, 
+                 void *seq_init_param, uint n_ranges, uint mode, 
+                 HANDLER_BUFFER *buf);
+  void dsmrr_close();
+  int dsmrr_next(range_id_t *range_info);
+
+  ha_rows dsmrr_info(uint keyno, uint n_ranges, uint keys, uint key_parts, 
+                     uint *bufsz, uint *flags, COST_VECT *cost);
+
+  ha_rows dsmrr_info_const(uint keyno, RANGE_SEQ_IF *seq, 
+                            void *seq_init_param, uint n_ranges, uint *bufsz,
+                            uint *flags, COST_VECT *cost);
+
+  int dsmrr_explain_info(uint mrr_mode, char *str, size_t size);
+private:
+  /* Buffer to store (key, range_id) pairs */
+  Lifo_buffer *key_buffer;
+
+  /*
+    The "owner" handler object (the one that is expected to "own" this object
+    and call its functions).
+  */
+  handler *primary_file;
+  TABLE *table; /* Always equal to primary_file->table */
+
+  /*
+    Secondary handler object. (created when needed, we need it when we need 
+    to run both index scan and rnd_pos() scan at the same time)
+  */
+  handler *secondary_file;
+  
+  uint keyno; /* index we're running the scan on */
+  /* TRUE <=> need range association, buffers hold {rowid, range_id} pairs */
+  bool is_mrr_assoc;
+
+  Mrr_reader_factory reader_factory;
+
+  Mrr_reader *strategy;
+  bool strategy_exhausted;
+
+  Mrr_index_reader *index_strategy;
+
+  /* The whole buffer space that we're using */
+  uchar *full_buf;
+  uchar *full_buf_end;
+  
+  /* 
+    When using both rowid and key buffers: the boundary between key and rowid
+    parts of the buffer. This is the "original" value, actual memory ranges 
+    used by key and rowid parts may be different because of dynamic space 
+    reallocation between them.
+  */
+  uchar *rowid_buffer_end;
+ 
+  /*
+    One of the following two is used for key buffer: forward is used when 
+    we only need key buffer, backward is used when we need both key and rowid
+    buffers.
+  */
+  Forward_lifo_buffer forward_key_buf;
+  Backward_lifo_buffer backward_key_buf;
+
+  /*
+    Buffer to store (rowid, range_id) pairs, or just rowids if 
+    is_mrr_assoc==FALSE
+  */
+  Forward_lifo_buffer rowid_buffer;
+  
+  bool choose_mrr_impl(uint keyno, ha_rows rows, uint *flags, uint *bufsz, 
+                       COST_VECT *cost);
+  bool get_disk_sweep_mrr_cost(uint keynr, ha_rows rows, uint flags, 
+                               uint *buffer_size, COST_VECT *cost);
+  bool check_cpk_scan(THD *thd, uint keyno, uint mrr_flags);
+
+  bool setup_buffer_sharing(uint key_size_in_keybuf, key_part_map key_tuple_map);
+
+  /* Buffer_manager and its member functions */
+  Buffer_manager buf_manager;
+  static void redistribute_buffer_space(void *dsmrr_arg);
+  static void reset_buffer_sizes(void *dsmrr_arg);
+  static void do_nothing(void *dsmrr_arg);
+
+  Lifo_buffer* get_key_buffer() { return key_buffer; }
+
+  friend class Key_value_records_iterator;
+  friend class Mrr_ordered_index_reader;
+  friend class Mrr_ordered_rndpos_reader;
+
+  int  setup_two_handlers();
+  void close_second_handler();
+};
+
+/**
+  @} (end of group DS-MRR declarations)
+*/
+
diff --git a/sql/my_decimal.cc b/sql/my_decimal.cc
index 960ccbf3439..44207b9319b 100644
--- a/sql/my_decimal.cc
+++ b/sql/my_decimal.cc
@@ -18,6 +18,10 @@
 #include "mysql_priv.h"
 #include <time.h>
 
+#define DIG_BASE     1000000000
+#define DIG_PER_DEC1 9
+#define ROUND_UP(X)  (((X)+DIG_PER_DEC1-1)/DIG_PER_DEC1)
+
 
 #ifndef MYSQL_CLIENT
 /**
@@ -32,21 +36,20 @@
     result
 */
 
-int decimal_operation_results(int result)
+int decimal_operation_results(int result, const char *value, const char *type)
 {
   switch (result) {
   case E_DEC_OK:
     break;
   case E_DEC_TRUNCATED:
     push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN,
-			WARN_DATA_TRUNCATED, ER(WARN_DATA_TRUNCATED),
-			"", (ulong) 0);
+			ER_DATA_TRUNCATED, ER(ER_DATA_TRUNCATED),
+			value, type);
     break;
   case E_DEC_OVERFLOW:
     push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_ERROR,
-                        ER_TRUNCATED_WRONG_VALUE,
-                        ER(ER_TRUNCATED_WRONG_VALUE),
-			"DECIMAL", "");
+                        ER_DATA_OVERFLOW, ER(ER_DATA_OVERFLOW),
+			value, type);
     break;
   case E_DEC_DIV_ZERO:
     push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_ERROR,
@@ -54,9 +57,8 @@ int decimal_operation_results(int result)
     break;
   case E_DEC_BAD_NUM:
     push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_ERROR,
-			ER_TRUNCATED_WRONG_VALUE_FOR_FIELD,
-			ER(ER_TRUNCATED_WRONG_VALUE_FOR_FIELD),
-			"decimal", "", "", (ulong) 0);
+			ER_BAD_DATA, ER(ER_BAD_DATA),
+			value, type);
     break;
   case E_DEC_OOM:
     my_error(ER_OUT_OF_RESOURCES, MYF(0));
@@ -212,20 +214,69 @@ int str2my_decimal(uint mask, const char *from, uint length,
 }
 
 
+/**
+  converts a decimal into a pair of integers - for integer and fractional parts
+
+  special version, for decimals representing number of seconds.
+  integer part cannot be larger that 1e18 (otherwise it's an overflow).
+  fractional part is microseconds.
+*/
+bool my_decimal2seconds(const my_decimal *d, ulonglong *sec, ulong *microsec)
+{
+  int pos;
+  
+  if (d->intg)
+  {
+    pos= (d->intg-1)/DIG_PER_DEC1;
+    *sec= d->buf[pos];
+    if (pos > 0)
+      *sec+= static_cast<longlong>(d->buf[pos-1]) * DIG_BASE;
+  }
+  else
+  {
+    *sec=0;
+    pos= -1;
+  }
+
+  *microsec= d->frac ? static_cast<longlong>(d->buf[pos+1]) / (DIG_BASE/1000000) : 0;
+
+  if (pos > 1)
+  {
+    for (int i=0; i < pos-1; i++)
+      if (d->buf[i])
+      {
+        *sec= LONGLONG_MAX;
+        break;
+      }
+  }
+  return d->sign();
+}
+
+
+/**
+  converts a pair of integers (seconds, microseconds) into a decimal
+*/
+my_decimal *seconds2my_decimal(bool sign,
+                               ulonglong sec, ulong microsec, my_decimal *d)
+{
+  d->init();
+  longlong2decimal(sec, d); // cannot fail
+  if (microsec)
+  {
+    d->buf[(d->intg-1) / DIG_PER_DEC1 + 1]= microsec * (DIG_BASE/1000000);
+    d->frac= 6;
+  }
+  ((decimal_t *)d)->sign= sign;
+  return d;
+}
+
+
 my_decimal *date2my_decimal(MYSQL_TIME *ltime, my_decimal *dec)
 {
-  longlong date;
-  date = (ltime->year*100L + ltime->month)*100L + ltime->day;
+  longlong date= (ltime->year*100L + ltime->month)*100L + ltime->day;
   if (ltime->time_type > MYSQL_TIMESTAMP_DATE)
     date= ((date*100L + ltime->hour)*100L+ ltime->minute)*100L + ltime->second;
-  if (int2my_decimal(E_DEC_FATAL_ERROR, ltime->neg ? -date : date, FALSE, dec))
-    return dec;
-  if (ltime->second_part)
-  {
-    dec->buf[(dec->intg-1) / 9 + 1]= ltime->second_part * 1000;
-    dec->frac= 6;
-  }
-  return dec;
+  return seconds2my_decimal(ltime->neg, date, ltime->second_part, dec);
 }
 
 
@@ -240,12 +291,37 @@ void my_decimal_trim(ulong *precision, uint *scale)
 }
 
 
+/*
+  Convert a decimal to an ulong with a descriptive error message
+*/
+
+int my_decimal2int(uint mask, const decimal_t *d, bool unsigned_flag,
+		   longlong *l)
+{
+  int res;
+  my_decimal rounded;
+  /* decimal_round can return only E_DEC_TRUNCATED */
+  decimal_round(d, &rounded, 0, HALF_UP);
+  res= (unsigned_flag ?
+        decimal2ulonglong(&rounded, (ulonglong *) l) :
+        decimal2longlong(&rounded, l));
+  if (res & mask)
+  {
+    char buff[DECIMAL_MAX_STR_LENGTH];
+    int length= sizeof(buff);
+    decimal2string(d, buff, &length, 0, 0, 0);
+
+    decimal_operation_results(res, buff,
+                              unsigned_flag ? "UNSIGNED INT" :
+                              "INT");
+  }
+  return res;
+}
+
+
 #ifndef DBUG_OFF
 /* routines for debugging print */
 
-#define DIG_PER_DEC1 9
-#define ROUND_UP(X)  (((X)+DIG_PER_DEC1-1)/DIG_PER_DEC1)
-
 /* print decimal */
 void
 print_decimal(const my_decimal *dec)
@@ -286,7 +362,6 @@ const char *dbug_decimal_as_string(char *buff, const my_decimal *val)
   return buff;
 }
 
-#endif /*DBUG_OFF*/
-
 
+#endif /*DBUG_OFF*/
 #endif /*MYSQL_CLIENT*/
diff --git a/sql/my_decimal.h b/sql/my_decimal.h
index 845aecb972c..814916231fb 100644
--- a/sql/my_decimal.h
+++ b/sql/my_decimal.h
@@ -34,32 +34,7 @@ C_MODE_START
 #include <decimal.h>
 C_MODE_END
 
-#define DECIMAL_LONGLONG_DIGITS 22
-#define DECIMAL_LONG_DIGITS 10
-#define DECIMAL_LONG3_DIGITS 8
-
-/** maximum length of buffer in our big digits (uint32). */
-#define DECIMAL_BUFF_LENGTH 9
-
-/* the number of digits that my_decimal can possibly contain */
-#define DECIMAL_MAX_POSSIBLE_PRECISION (DECIMAL_BUFF_LENGTH * 9)
-
-
-/**
-  maximum guaranteed precision of number in decimal digits (number of our
-  digits * number of decimal digits in one our big digit - number of decimal
-  digits in one our big digit decreased by 1 (because we always put decimal
-  point on the border of our big digits))
-*/
-#define DECIMAL_MAX_PRECISION (DECIMAL_MAX_POSSIBLE_PRECISION - 8*2)
-#define DECIMAL_MAX_SCALE 30
-#define DECIMAL_NOT_SPECIFIED 31
-
-/**
-  maximum length of string representation (number of maximum decimal
-  digits + 1 position for sign + 1 position for decimal point, no terminator)
-*/
-#define DECIMAL_MAX_STR_LENGTH (DECIMAL_MAX_POSSIBLE_PRECISION + 2)
+#include <my_decimal_limits.h>
 
 /**
   maximum size of packet length.
@@ -163,9 +138,10 @@ const char *dbug_decimal_as_string(char *buff, const my_decimal *val);
 #endif
 
 #ifndef MYSQL_CLIENT
-int decimal_operation_results(int result);
+int decimal_operation_results(int result, const char *value, const char *type);
 #else
-inline int decimal_operation_results(int result)
+inline int decimal_operation_results(int result, const char *value,
+                                     const char *type)
 {
   return result;
 }
@@ -187,7 +163,7 @@ inline void max_internal_decimal(my_decimal *to)
 inline int check_result(uint mask, int result)
 {
   if (result & mask)
-    decimal_operation_results(result);
+    decimal_operation_results(result, "", "DECIMAL");
   return result;
 }
 
@@ -324,24 +300,19 @@ int my_decimal2string(uint mask, const my_decimal *d, uint fixed_prec,
 		      uint fixed_dec, char filler, String *str);
 #endif
 
-inline
-int my_decimal2int(uint mask, const my_decimal *d, my_bool unsigned_flag,
-		   longlong *l)
-{
-  my_decimal rounded;
-  /* decimal_round can return only E_DEC_TRUNCATED */
-  decimal_round((decimal_t*)d, &rounded, 0, HALF_UP);
-  return check_result(mask, (unsigned_flag ?
-			     decimal2ulonglong(&rounded, (ulonglong *)l) :
-			     decimal2longlong(&rounded, l)));
-}
+bool my_decimal2seconds(const my_decimal *d, ulonglong *sec, ulong *microsec);
 
+my_decimal *seconds2my_decimal(bool sign, ulonglong sec, ulong microsec,
+                               my_decimal *d);
+
+int my_decimal2int(uint mask, const decimal_t *d, bool unsigned_flag,
+		   longlong *l);
 
 inline
-int my_decimal2double(uint, const my_decimal *d, double *result)
+int my_decimal2double(uint, const decimal_t *d, double *result)
 {
   /* No need to call check_result as this will always succeed */
-  return decimal2double((decimal_t*) d, result);
+  return decimal2double(d, result);
 }
 
 
@@ -384,6 +355,16 @@ int int2my_decimal(uint mask, longlong i, my_bool unsigned_flag, my_decimal *d)
 			     longlong2decimal(i, d)));
 }
 
+inline
+void decimal2my_decimal(decimal_t *from, my_decimal *to)
+{
+  DBUG_ASSERT(to->len >= from->len);
+  to->intg= from->intg;
+  to->frac= from->frac;
+  to->sign(from->sign);
+  memcpy(to->buf, from->buf, to->len*sizeof(decimal_digit_t));
+}
+
 
 inline
 void my_decimal_neg(decimal_t *arg)
@@ -447,7 +428,6 @@ int my_decimal_mod(uint mask, my_decimal *res, const my_decimal *a,
                                    res);
 }
 
-
 /**
   @return
     -1 if a<b, 1 if a>b and 0 if a==b
diff --git a/sql/mysql_priv.h b/sql/mysql_priv.h
index 26bb8df9249..94b06add2bd 100644
--- a/sql/mysql_priv.h
+++ b/sql/mysql_priv.h
@@ -63,12 +63,15 @@ class Parser_state;
 
   QT_ORDINARY -- ordinary SQL query.
   QT_IS -- SQL query to be shown in INFORMATION_SCHEMA (in utf8 and without
-  character set introducers).
+           character set introducers).
+  QT_VIEW_INTERNAL -- view internal representation (like QT_ORDINARY except
+                      ORDER BY clause)
 */
 enum enum_query_type
 {
   QT_ORDINARY,
-  QT_IS
+  QT_IS,
+  QT_VIEW_INTERNAL
 };
 
 /* TODO convert all these three maps to Bitmap classes */
@@ -117,8 +120,6 @@ char *sql_strmake_with_convert(const char *str, size_t arg_length,
 			       CHARSET_INFO *from_cs,
 			       size_t max_res_length,
 			       CHARSET_INFO *to_cs, size_t *result_length);
-uint kill_one_thread(THD *thd, ulong id, bool only_kill_query);
-void sql_kill(THD *thd, ulong id, bool only_kill_query);
 bool net_request_file(NET* net, const char* fname);
 char* query_table_status(THD *thd,const char *db,const char *table_name);
 
@@ -353,7 +354,10 @@ protected:
   Number of comparisons of table rowids equivalent to reading one row from a 
   table.
 */
-#define TIME_FOR_COMPARE_ROWID  (TIME_FOR_COMPARE*2)
+#define TIME_FOR_COMPARE_ROWID  (TIME_FOR_COMPARE*100)
+
+/* cost1 is better that cost2 only if cost1 + COST_EPS < cost2 */
+#define COST_EPS  0.001
 
 /*
   For sequential disk seeks the cost formula is:
@@ -362,11 +366,11 @@ protected:
   The cost of average seek 
     DISK_SEEK_BASE_COST + DISK_SEEK_PROP_COST*BLOCKS_IN_AVG_SEEK =1.0.
 */
-#define DISK_SEEK_BASE_COST ((double)0.5)
+#define DISK_SEEK_BASE_COST ((double)0.9)
 
 #define BLOCKS_IN_AVG_SEEK  128
 
-#define DISK_SEEK_PROP_COST ((double)0.5/BLOCKS_IN_AVG_SEEK)
+#define DISK_SEEK_PROP_COST ((double)0.1/BLOCKS_IN_AVG_SEEK)
 
 
 /**
@@ -376,6 +380,12 @@ protected:
 */
 #define MATCHING_ROWS_IN_OTHER_TABLE 10
 
+/*
+  Subquery materialization-related constants
+*/
+#define HEAP_TEMPTABLE_LOOKUP_COST 0.05
+#define DISK_TEMPTABLE_LOOKUP_COST 1.0
+
 /** Don't pack string keys shorter than this (if PACK_KEYS=1 isn't used). */
 #define KEY_DEFAULT_PACK_LENGTH 8
 
@@ -403,7 +413,6 @@ protected:
 #define DELAYED_LIMIT		100		/**< pause after xxx inserts */
 #define DELAYED_QUEUE_SIZE	1000
 #define DELAYED_WAIT_TIMEOUT	5*60		/**< Wait for delayed insert */
-#define FLUSH_TIME		0		/**< Don't flush tables */
 #define MAX_CONNECT_ERRORS	10		///< errors before disabling host
 
 #ifdef __NETWARE__
@@ -413,8 +422,6 @@ protected:
 #endif
 
 #if defined(__WIN__)
-#undef	FLUSH_TIME
-#define FLUSH_TIME	1800			/**< Flush every half hour */
 
 #define INTERRUPT_PRIOR -2
 #define CONNECT_PRIOR	-1
@@ -508,7 +515,6 @@ protected:
 #define OPTION_PROFILING                (ULL(1) << 33)
 
 
-
 /**
   Maximum length of time zone name that we support
   (Time zone name is char(64) in db). mysqlbinlog needs it.
@@ -557,27 +563,59 @@ protected:
 #define OPTIMIZER_SWITCH_INDEX_MERGE_UNION 2
 #define OPTIMIZER_SWITCH_INDEX_MERGE_SORT_UNION 4
 #define OPTIMIZER_SWITCH_INDEX_MERGE_INTERSECT 8
+#define OPTIMIZER_SWITCH_INDEX_MERGE_SORT_INTERSECT 16
+#define OPTIMIZER_SWITCH_INDEX_COND_PUSHDOWN 32
+#define OPTIMIZER_SWITCH_DERIVED_MERGE 64
+#define OPTIMIZER_SWITCH_DERIVED_WITH_KEYS 128
+#define OPTIMIZER_SWITCH_FIRSTMATCH 256
+#define OPTIMIZER_SWITCH_LOOSE_SCAN 512
+#define OPTIMIZER_SWITCH_MATERIALIZATION 1024
+#define OPTIMIZER_SWITCH_IN_TO_EXISTS (1<<11)
+#define OPTIMIZER_SWITCH_SEMIJOIN (1<<12)
+#define OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE  (1<<13)
+#define OPTIMIZER_SWITCH_PARTIAL_MATCH_TABLE_SCAN (1<<14)
+#define OPTIMIZER_SWITCH_SUBQUERY_CACHE (1<<15)
+/** If this is off, MRR is never used. */
+#define OPTIMIZER_SWITCH_MRR                       (1ULL << 16)
+/**
+   If OPTIMIZER_SWITCH_MRR is on and this is on, MRR is used depending on a
+   cost-based choice ("automatic"). If OPTIMIZER_SWITCH_MRR is on and this is
+   off, MRR is "forced" (i.e. used as long as the storage engine is capable of
+   doing it).
+*/
+#define OPTIMIZER_SWITCH_MRR_COST_BASED            (1ULL << 17)
+#define OPTIMIZER_SWITCH_MRR_SORT_KEYS             (1ULL << 18)
+#define OPTIMIZER_SWITCH_OUTER_JOIN_WITH_CACHE     (1ULL << 19)
+#define OPTIMIZER_SWITCH_SEMIJOIN_WITH_CACHE       (1ULL << 20)
+#define OPTIMIZER_SWITCH_JOIN_CACHE_INCREMENTAL    (1ULL << 21)
+#define OPTIMIZER_SWITCH_JOIN_CACHE_HASHED         (1ULL << 22)
+#define OPTIMIZER_SWITCH_JOIN_CACHE_BKA            (1ULL << 23)
+#define OPTIMIZER_SWITCH_OPTIMIZE_JOIN_BUFFER_SIZE (1ULL << 24)
+#define OPTIMIZER_SWITCH_TABLE_ELIMINATION         (1ULL << 25)
+#define OPTIMIZER_SWITCH_LAST                      (1ULL << 26)
 
-#ifdef DBUG_OFF
-#  define OPTIMIZER_SWITCH_LAST 16
-#else
-#  define OPTIMIZER_SWITCH_TABLE_ELIMINATION 16
-#  define OPTIMIZER_SWITCH_LAST 32
-#endif
-
-#ifdef DBUG_OFF 
 /* The following must be kept in sync with optimizer_switch_str in mysqld.cc */
-#  define OPTIMIZER_SWITCH_DEFAULT (OPTIMIZER_SWITCH_INDEX_MERGE | \
-                                    OPTIMIZER_SWITCH_INDEX_MERGE_UNION | \
-                                    OPTIMIZER_SWITCH_INDEX_MERGE_SORT_UNION | \
-                                    OPTIMIZER_SWITCH_INDEX_MERGE_INTERSECT)
-#else 
-#  define OPTIMIZER_SWITCH_DEFAULT (OPTIMIZER_SWITCH_INDEX_MERGE | \
+#define OPTIMIZER_SWITCH_DEFAULT   (OPTIMIZER_SWITCH_INDEX_MERGE | \
                                     OPTIMIZER_SWITCH_INDEX_MERGE_UNION | \
                                     OPTIMIZER_SWITCH_INDEX_MERGE_SORT_UNION | \
                                     OPTIMIZER_SWITCH_INDEX_MERGE_INTERSECT | \
-                                    OPTIMIZER_SWITCH_TABLE_ELIMINATION)
-#endif
+                                    OPTIMIZER_SWITCH_INDEX_COND_PUSHDOWN | \
+                                    OPTIMIZER_SWITCH_DERIVED_MERGE | \
+                                    OPTIMIZER_SWITCH_DERIVED_WITH_KEYS | \
+                                    OPTIMIZER_SWITCH_TABLE_ELIMINATION | \
+                                    OPTIMIZER_SWITCH_IN_TO_EXISTS | \
+                                    OPTIMIZER_SWITCH_MATERIALIZATION | \
+                                    OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE|\
+                                    OPTIMIZER_SWITCH_PARTIAL_MATCH_TABLE_SCAN|\
+                                    OPTIMIZER_SWITCH_OUTER_JOIN_WITH_CACHE | \
+                                    OPTIMIZER_SWITCH_SEMIJOIN_WITH_CACHE | \
+                                    OPTIMIZER_SWITCH_JOIN_CACHE_INCREMENTAL | \
+                                    OPTIMIZER_SWITCH_JOIN_CACHE_HASHED | \
+                                    OPTIMIZER_SWITCH_JOIN_CACHE_BKA | \
+                                    OPTIMIZER_SWITCH_SUBQUERY_CACHE | \
+                                    OPTIMIZER_SWITCH_SEMIJOIN | \
+                                    OPTIMIZER_SWITCH_FIRSTMATCH | \
+                                    OPTIMIZER_SWITCH_LOOSE_SCAN )
 
 /*
   Replication uses 8 bytes to store SQL_MODE in the binary log. The day you
@@ -622,15 +660,36 @@ protected:
 */
 #define CONTEXT_ANALYSIS_ONLY_DERIVED 4
 
-// uncachable cause
-#define UNCACHEABLE_DEPENDENT   1
-#define UNCACHEABLE_RAND        2
-#define UNCACHEABLE_SIDEEFFECT	4
-/// forcing to save JOIN for explain
-#define UNCACHEABLE_EXPLAIN     8
+/*
+  Don't evaluate constant sub-expressions of virtual column
+  expressions when opening tables
+*/ 
+#define CONTEXT_ANALYSIS_ONLY_VCOL_EXPR 8
+
+/*
+  Uncachable causes:
+
+This subquery has fields from outer query (put by user)
+*/
+#define UNCACHEABLE_DEPENDENT_GENERATED 1
+/* This subquery contains functions with random result */
+#define UNCACHEABLE_RAND                2
+/* This subquery contains functions with side effect */
+#define UNCACHEABLE_SIDEEFFECT	        4
+/* Forcing to save JOIN tables for explain */
+#define UNCACHEABLE_EXPLAIN             8
 /* For uncorrelated SELECT in an UNION with some correlated SELECTs */
-#define UNCACHEABLE_UNITED     16
-#define UNCACHEABLE_CHECKOPTION 32
+#define UNCACHEABLE_UNITED              16
+#define UNCACHEABLE_CHECKOPTION         32
+/*
+  This subquery has fields from outer query injected during
+  transformation process
+*/
+#define UNCACHEABLE_DEPENDENT_INJECTED  64
+
+/* This subquery has fields from outer query (any nature) */
+#define UNCACHEABLE_DEPENDENT (UNCACHEABLE_DEPENDENT_GENERATED | \
+                               UNCACHEABLE_DEPENDENT_INJECTED)
 
 /* Used to check GROUP BY list in the MODE_ONLY_FULL_GROUP_BY mode */
 #define UNDEF_POS (-1)
@@ -638,7 +697,11 @@ protected:
 /* BINLOG_DUMP options */
 
 #define BINLOG_DUMP_NON_BLOCK   1
+#endif /* !MYSQL_CLIENT */
+
+#define BINLOG_SEND_ANNOTATE_ROWS_EVENT   2
 
+#ifndef MYSQL_CLIENT
 /* sql_show.cc:show_log_files() */
 #define SHOW_LOG_STATUS_FREE "FREE"
 #define SHOW_LOG_STATUS_INUSE "IN USE"
@@ -681,7 +744,8 @@ enum enum_parsing_place
   SELECT_LIST,
   IN_WHERE,
   IN_ON,
-  IN_GROUP_BY
+  IN_GROUP_BY,
+  PARSING_PLACE_SIZE /* always should be the last */
 };
 
 struct st_table;
@@ -712,17 +776,6 @@ inline THD *_current_thd(void)
 #endif
 #define current_thd _current_thd()
 
-
-/** 
-  The meat of thd_proc_info(THD*, char*), a macro that packs the last
-  three calling-info parameters. 
-*/
-extern "C"
-const char *set_thd_proc_info(THD *thd, const char *info, 
-                              const char *calling_func, 
-                              const char *calling_file, 
-                              const unsigned int calling_line);
-
 /**
   Enumerate possible types of a table from re-execution
   standpoint.
@@ -758,9 +811,92 @@ typedef my_bool (*qc_engine_callback)(THD *thd, char *table_key,
                                       uint key_length,
                                       ulonglong *engine_data);
 #include "sql_string.h"
+#include "my_decimal.h"
+
+/*
+  to unify the code that differs only in the argument passed to the
+  error message (string vs. number)
+
+  We pass this container around, and only convert the number
+  to a string when necessary.
+*/
+class Lazy_string
+{
+public:
+  Lazy_string() {}
+  virtual ~Lazy_string() {}
+  virtual void copy_to(String *str) const = 0;
+};
+
+class Lazy_string_str : public Lazy_string
+{
+  const char *str;
+  size_t len;
+public:
+  Lazy_string_str(const char *str_arg, size_t len_arg)
+    : Lazy_string(), str(str_arg), len(len_arg) {}
+  void copy_to(String *dst) const
+  { dst->copy(str, (uint32)len, system_charset_info); }
+};
+
+class Lazy_string_num : public Lazy_string
+{
+  longlong num;
+public:
+  Lazy_string_num(longlong num_arg) : Lazy_string(), num(num_arg) {}
+  void copy_to(String *dst) const { dst->set(num, &my_charset_bin); }
+};
+
+class Lazy_string_decimal: public Lazy_string
+{
+  const my_decimal *d;
+public:
+  Lazy_string_decimal(const my_decimal *d_arg)
+    : Lazy_string(), d(d_arg) {}
+  void copy_to(String *dst) const {
+    my_decimal2string(E_DEC_FATAL_ERROR, d, 0, 0, ' ', dst);
+  }
+};
+
+class Lazy_string_double: public Lazy_string
+{
+  double num;
+public:
+  Lazy_string_double(double num_arg) : Lazy_string(), num(num_arg) {}
+  void copy_to(String *dst) const
+  { dst->set_real(num, NOT_FIXED_DEC, &my_charset_bin); }
+};
+
+class Lazy_string_time : public Lazy_string
+{
+  const MYSQL_TIME *ltime;
+public:
+  Lazy_string_time(const MYSQL_TIME *ltime_arg)
+    : Lazy_string(), ltime(ltime_arg) {}
+  void copy_to(String *dst) const
+  {
+    dst->alloc(MAX_DATETIME_FULL_WIDTH);
+    dst->length((uint) my_TIME_to_str(ltime, (char*) dst->ptr(),
+                                      AUTO_SEC_PART_DIGITS));
+    dst->set_charset(&my_charset_bin);
+  }
+};
+
+static inline enum enum_mysql_timestamp_type
+mysql_type_to_time_type(enum enum_field_types mysql_type)
+{
+  switch (mysql_type) {
+  case MYSQL_TYPE_TIME: return MYSQL_TIMESTAMP_TIME;
+  case MYSQL_TYPE_TIMESTAMP:
+  case MYSQL_TYPE_DATETIME: return MYSQL_TIMESTAMP_DATETIME;
+  case MYSQL_TYPE_NEWDATE:
+  case MYSQL_TYPE_DATE: return MYSQL_TIMESTAMP_DATE;
+  default: return MYSQL_TIMESTAMP_ERROR;
+  }
+}
+
 #include "sql_list.h"
 #include "sql_map.h"
-#include "my_decimal.h"
 #include "handler.h"
 #include "parse_file.h"
 #include "table.h"
@@ -784,6 +920,7 @@ typedef Comp_creator* (*chooser_compare_func_creator)(bool invert);
 #endif
 #include "item.h"
 extern my_decimal decimal_zero;
+extern my_decimal max_seconds_for_time_type, time_second_part_factor;
 
 /* sql_parse.cc */
 void free_items(Item *item);
@@ -885,6 +1022,7 @@ bool general_log_write(THD *thd, enum enum_server_command command,
 #ifdef MYSQL_SERVER
 #include "sql_servers.h"
 #include "opt_range.h"
+#include "sql_expression_cache.h"
 
 #ifdef HAVE_QUERY_CACHE
 struct Query_cache_query_flags
@@ -909,6 +1047,7 @@ struct Query_cache_query_flags
   MY_LOCALE *lc_time_names;
 };
 #define QUERY_CACHE_FLAGS_SIZE sizeof(Query_cache_query_flags)
+#define QUERY_CACHE_DB_LENGTH_SIZE 2
 #include "sql_cache.h"
 #define query_cache_store_query(A, B) query_cache.store_query(A, B)
 #define query_cache_destroy() query_cache.destroy()
@@ -917,7 +1056,7 @@ struct Query_cache_query_flags
 #define query_cache_resize(A) query_cache.resize(A)
 #define query_cache_set_min_res_unit(A) query_cache.set_min_res_unit(A)
 #define query_cache_invalidate3(A, B, C) query_cache.invalidate(A, B, C)
-#define query_cache_invalidate1(A) query_cache.invalidate(A)
+#define query_cache_invalidate1(A,B) query_cache.invalidate(A,B)
 #define query_cache_send_result_to_client(A, B, C) \
   query_cache.send_result_to_client(A, B, C)
 #define query_cache_invalidate_by_MyISAM_filename_ref \
@@ -929,6 +1068,7 @@ struct Query_cache_query_flags
   (((L)->sql_command == SQLCOM_SELECT) && (L)->safe_to_cache_query)
 #else
 #define QUERY_CACHE_FLAGS_SIZE 0
+#define QUERY_CACHE_DB_LENGTH_SIZE 0
 #define query_cache_store_query(A, B)     do { } while(0)
 #define query_cache_destroy()             do { } while(0)
 #define query_cache_result_size_limit(A)  do { } while(0)
@@ -936,16 +1076,19 @@ struct Query_cache_query_flags
 #define query_cache_resize(A)             do { } while(0)
 #define query_cache_set_min_res_unit(A)   do { } while(0)
 #define query_cache_invalidate3(A, B, C)  do { } while(0)
-#define query_cache_invalidate1(A)        do { } while(0)
+#define query_cache_invalidate1(A,B)      do { } while(0)
 #define query_cache_send_result_to_client(A, B, C) 0
 #define query_cache_invalidate_by_MyISAM_filename_ref NULL
-
-#define query_cache_abort(A)
-#define query_cache_end_of_result(A)
+#define query_cache_abort(A)              do { } while(0)
+#define query_cache_end_of_result(A)      do { } while(0)
 #define query_cache_maybe_disabled(T) 1
 #define query_cache_is_cacheable_query(L) 0
 #endif /*HAVE_QUERY_CACHE*/
 
+uint kill_one_thread(THD *thd, ulong id, killed_state kill_signal);
+void sql_kill(THD *thd, ulong id, killed_state kill_signal);
+void sql_kill_user(THD *thd, LEX_USER *str, killed_state kill_signal);
+
 /*
   Error injector Macros to enable easy testing of recovery after failures
   in various error cases.
@@ -1225,6 +1368,9 @@ bool mysql_select(THD *thd, Item ***rref_pointer_array,
                   Item *having, ORDER *proc_param, ulonglong select_type, 
                   select_result *result, SELECT_LEX_UNIT *unit, 
                   SELECT_LEX *select_lex);
+
+int join_read_key2(THD *thd, struct st_join_table *tab, TABLE *table,
+                   struct st_table_ref *table_ref);
 void free_underlaid_joins(THD *thd, SELECT_LEX *select);
 bool mysql_explain_union(THD *thd, SELECT_LEX_UNIT *unit,
                          select_result *result);
@@ -1232,11 +1378,9 @@ int mysql_explain_select(THD *thd, SELECT_LEX *sl, char const *type,
 			 select_result *result);
 bool mysql_union(THD *thd, LEX *lex, select_result *result,
                  SELECT_LEX_UNIT *unit, ulong setup_tables_done_option);
-bool mysql_handle_derived(LEX *lex, bool (*processor)(THD *thd,
-                                                      LEX *lex,
-                                                      TABLE_LIST *table));
-bool mysql_derived_prepare(THD *thd, LEX *lex, TABLE_LIST *t);
-bool mysql_derived_filling(THD *thd, LEX *lex, TABLE_LIST *t);
+bool mysql_handle_derived(LEX *lex, uint phases);
+bool mysql_handle_single_derived(LEX *lex, TABLE_LIST *derived, uint phases);
+bool mysql_handle_list_of_derived(LEX *lex, TABLE_LIST *dt_list, uint phases);
 bool check_table_file_presence(char *old_path, char *path,
                                const char *db,
                                const char *table_name,
@@ -1249,6 +1393,12 @@ Field *create_tmp_field(THD *thd, TABLE *table,Item *item, Item::Type type,
 			bool table_cant_handle_bit_fields,
                         bool make_copy_field,
                         uint convert_blob_length);
+bool open_tmp_table(TABLE *table);
+bool create_internal_tmp_table(TABLE *table, KEY *keyinfo, 
+                                      ENGINE_COLUMNDEF *start_recinfo,
+                                      ENGINE_COLUMNDEF **recinfo,
+                                      ulonglong options);
+
 void sp_prepare_create_field(THD *thd, Create_field *sql_field);
 int prepare_create_field(Create_field *sql_field, 
 			 uint *blob_columns, 
@@ -1268,7 +1418,8 @@ bool mysql_alter_table(THD *thd, char *new_db, char *new_name,
                        HA_CREATE_INFO *create_info,
                        TABLE_LIST *table_list,
                        Alter_info *alter_info,
-                       uint order_num, ORDER *order, bool ignore);
+                       uint order_num, ORDER *order, bool ignore,
+                       bool require_online);
 bool mysql_recreate_table(THD *thd, TABLE_LIST *table_list);
 bool mysql_create_like_table(THD *thd, TABLE_LIST *table,
                              TABLE_LIST *src_table,
@@ -1367,6 +1518,7 @@ find_field_in_table(THD *thd, TABLE *table, const char *name, uint length,
 Field *
 find_field_in_table_sef(TABLE *table, const char *name);
 int update_virtual_fields(THD *thd, TABLE *table, bool ignore_stored= FALSE);
+int dynamic_column_error_message(enum_dyncol_func_result rc);
 
 #endif /* MYSQL_SERVER */
 
@@ -1478,15 +1630,6 @@ void mysqld_stmt_reset(THD *thd, char *packet);
 void mysql_stmt_get_longdata(THD *thd, char *pos, ulong packet_length);
 void reinit_stmt_before_use(THD *thd, LEX *lex);
 
-/* sql_handler.cc */
-bool mysql_ha_open(THD *thd, TABLE_LIST *tables, bool reopen);
-bool mysql_ha_close(THD *thd, TABLE_LIST *tables);
-bool mysql_ha_read(THD *, TABLE_LIST *,enum enum_ha_read_modes,char *,
-                   List<Item> *,enum ha_rkey_function,Item *,ha_rows,ha_rows);
-void mysql_ha_flush(THD *thd);
-void mysql_ha_rm_tables(THD *thd, TABLE_LIST *tables, bool is_locked);
-void mysql_ha_cleanup(THD *thd);
-
 /* sql_base.cc */
 #define TMP_TABLE_KEY_EXTRA 8
 void set_item_name(Item *item,char *pos,uint length);
@@ -1557,17 +1700,21 @@ bool get_key_map_from_key_list(key_map *map, TABLE *table,
 bool insert_fields(THD *thd, Name_resolution_context *context,
 		   const char *db_name, const char *table_name,
                    List_iterator<Item> *it, bool any_privileges);
+void make_leaves_list(List<TABLE_LIST> &list, TABLE_LIST *tables,
+                      bool full_table_list, TABLE_LIST *boundary);
 bool setup_tables(THD *thd, Name_resolution_context *context,
                   List<TABLE_LIST> *from_clause, TABLE_LIST *tables,
-                  TABLE_LIST **leaves, bool select_insert);
+                  List<TABLE_LIST> &leaves, bool select_insert,
+                  bool full_table_list);
 bool setup_tables_and_check_access(THD *thd, 
                                    Name_resolution_context *context,
                                    List<TABLE_LIST> *from_clause, 
                                    TABLE_LIST *tables, 
-                                   TABLE_LIST **leaves, 
+                                   List<TABLE_LIST> &leaves, 
                                    bool select_insert,
                                    ulong want_access_first,
-                                   ulong want_access);
+                                   ulong want_access,
+                                   bool full_table_list);
 int setup_wild(THD *thd, TABLE_LIST *tables, List<Item> &fields,
 	       List<Item> *sum_func_list, uint wild_num);
 bool setup_fields(THD *thd, Item** ref_pointer_array,
@@ -1586,8 +1733,9 @@ inline bool setup_fields_with_no_wrap(THD *thd, Item **ref_pointer_array,
   thd->lex->select_lex.no_wrap_view_item= FALSE;
   return res;
 }
-int setup_conds(THD *thd, TABLE_LIST *tables, TABLE_LIST *leaves,
+int setup_conds(THD *thd, TABLE_LIST *tables, List<TABLE_LIST> &leaves,
 		COND **conds);
+void wrap_ident(THD *thd, Item **conds);
 int setup_ftfuncs(SELECT_LEX* select);
 int init_ftfuncs(THD *thd, SELECT_LEX* select, bool no_order);
 void wait_for_condition(THD *thd, pthread_mutex_t *mutex,
@@ -1608,7 +1756,8 @@ inline int open_and_lock_tables(THD *thd, TABLE_LIST *tables)
 /* simple open_and_lock_tables without derived handling for single table */
 TABLE *open_n_lock_single_table(THD *thd, TABLE_LIST *table_l,
                                 thr_lock_type lock_type);
-bool open_normal_and_derived_tables(THD *thd, TABLE_LIST *tables, uint flags);
+bool open_normal_and_derived_tables(THD *thd, TABLE_LIST *tables, uint flags,
+                                    uint dt_phases);
 int lock_tables(THD *thd, TABLE_LIST *tables, uint counter, bool *need_reopen);
 int decide_logging_format(THD *thd, TABLE_LIST *tables);
 TABLE *open_temporary_table(THD *thd, const char *path, const char *db,
@@ -1638,6 +1787,7 @@ void flush_tables();
 bool is_equal(const LEX_STRING *a, const LEX_STRING *b);
 char *make_default_log_name(char *buff,const char* log_ext);
 char *make_once_alloced_filename(const char *basename, const char *ext);
+void unfix_fields(List<Item> &items);
 
 #ifdef WITH_PARTITION_STORAGE_ENGINE
 uint fast_alter_partition_table(THD *thd, TABLE *table,
@@ -1800,7 +1950,7 @@ bool close_cached_connection_tables(THD *thd, bool wait_for_refresh,
                                     bool have_lock = FALSE);
 void copy_field_from_tmp_record(Field *field,int offset);
 bool fill_record(THD *thd, Field **field, List<Item> &values,
-                 bool ignore_errors);
+                 bool ignore_errors, bool use_value);
 bool fill_record_n_invoke_before_triggers(THD *thd, List<Item> &fields,
                                           List<Item> &values,
                                           bool ignore_errors,
@@ -1855,19 +2005,26 @@ void print_cached_tables(void);
 void TEST_filesort(SORT_FIELD *sortorder,uint s_length);
 void print_plan(JOIN* join,uint idx, double record_count, double read_time,
                 double current_read_time, const char *info);
+void print_keyuse_array(DYNAMIC_ARRAY *keyuse_array);
+void print_sjm(SJ_MATERIALIZATION_INFO *sjm);
 #endif
 void mysql_print_status();
 /* key.cc */
 int find_ref_key(KEY *key, uint key_count, uchar *record, Field *field,
                  uint *key_length, uint *keypart);
-void key_copy(uchar *to_key, uchar *from_record, KEY *key_info, uint key_length);
+void key_copy(uchar *to_key, uchar *from_record, KEY *key_info, uint key_length,
+              bool with_zerofill= FALSE);
 void key_restore(uchar *to_record, uchar *from_key, KEY *key_info,
                  uint key_length);
 bool key_cmp_if_same(TABLE *form,const uchar *key,uint index,uint key_length);
 void key_unpack(String *to,TABLE *form,uint index);
 bool is_key_used(TABLE *table, uint idx, const MY_BITMAP *fields);
 int key_cmp(KEY_PART_INFO *key_part, const uchar *key, uint key_length);
+ulong key_hashnr(KEY *key_info, uint used_key_parts, const uchar *key);
+bool key_buf_cmp(KEY *key_info, uint used_key_parts,
+                 const uchar *key1, const uchar *key2);
 extern "C" int key_rec_cmp(void *key_info, uchar *a, uchar *b);
+int key_tuple_cmp(KEY_PART_INFO *part, uchar *key1, uchar *key2, uint tuple_length);
 
 bool init_errmessage(void);
 #endif /* MYSQL_SERVER */
@@ -1875,6 +2032,7 @@ void sql_perror(const char *message);
 
 bool fn_format_relative_to_data_home(char * to, const char *name,
 				     const char *dir, const char *extension);
+void set_server_version(void);
 /**
   Test a file path to determine if the path is compatible with the secure file
   path restriction.
@@ -1898,6 +2056,7 @@ void flush_thread_cache();
 /* item_func.cc */
 extern bool check_reserved_words(LEX_STRING *name);
 extern enum_field_types agg_field_type(Item **items, uint nitems);
+Item *find_date_time_item(Item **args, uint nargs, uint col);
 
 /* strfunc.cc */
 ulonglong find_set(TYPELIB *lib, const char *x, uint length, CHARSET_INFO *cs,
@@ -1986,6 +2145,7 @@ extern ulonglong thd_startup_options;
 extern ulong thread_id;
 extern ulong binlog_cache_use, binlog_cache_disk_use;
 extern ulong aborted_threads,aborted_connects;
+extern ulong opt_progress_report_time;
 extern ulong delayed_insert_timeout;
 extern ulong delayed_insert_limit, delayed_queue_size;
 extern ulong delayed_insert_threads, delayed_insert_writes;
@@ -1998,15 +2158,18 @@ extern MYSQL_PLUGIN_IMPORT ulong max_connections;
 extern ulong max_connect_errors, connect_timeout;
 extern ulong extra_max_connections;
 extern ulong slave_net_timeout, slave_trans_retries;
-extern uint max_user_connections;
+extern int max_user_connections;
+extern bool max_user_connections_checking;
 extern ulonglong denied_connections;
 extern ulong what_to_log,flush_time;
 extern ulong query_buff_size;
 extern ulong max_prepared_stmt_count, prepared_stmt_count;
 extern ulong binlog_cache_size, open_files_limit;
 extern ulonglong max_binlog_cache_size;
-extern ulong max_binlog_size, max_relay_log_size;
+extern ulong max_binlog_size, max_relay_log_size, opt_binlog_dbug_fsync_sleep;
 extern ulong opt_binlog_rows_event_max_size;
+extern my_bool opt_master_verify_checksum;
+extern my_bool opt_slave_sql_verify_checksum;
 extern ulong rpl_recovery_rank, thread_cache_size, thread_pool_size;
 extern ulong back_log;
 #endif /* MYSQL_SERVER */
@@ -2016,6 +2179,7 @@ extern ulong MYSQL_PLUGIN_IMPORT specialflag;
 #ifdef MYSQL_SERVER
 extern ulong current_pid;
 extern ulong expire_logs_days, sync_binlog_period, sync_binlog_counter;
+extern ulong binlog_checksum_options;
 extern ulong opt_tc_log_size, tc_log_max_pages_used, tc_log_page_size;
 extern ulong tc_log_page_waits;
 extern my_bool relay_log_purge, opt_innodb_safe_binlog, opt_innodb;
@@ -2024,6 +2188,8 @@ extern uint test_flags,select_errors,ha_open_options;
 extern uint protocol_version, mysqld_port, mysqld_extra_port, dropping_tables;
 extern uint delay_key_write_options;
 extern ulong max_long_data_size, max_used_connections;
+extern uint internal_tmp_table_max_key_length;
+extern uint internal_tmp_table_max_key_segments;
 #endif /* MYSQL_SERVER */
 #if defined MYSQL_SERVER || defined INNODB_COMPATIBILITY_HOOKS
 extern MYSQL_PLUGIN_IMPORT uint lower_case_table_names;
@@ -2048,13 +2214,13 @@ extern bool opt_ignore_builtin_innodb;
 extern my_bool opt_character_set_client_handshake;
 extern bool volatile abort_loop, shutdown_in_progress;
 extern bool in_bootstrap;
-extern uint volatile thread_count, thread_running, global_read_lock;
+extern uint volatile thread_count, thread_running, global_read_lock, global_disable_checkpoint;
 extern ulong thread_created;
 extern uint thread_handling;
 extern uint connection_count, extra_connection_count;
 extern my_bool opt_sql_bin_update, opt_safe_user_create, opt_no_mix_types;
 extern my_bool opt_safe_show_db, opt_local_infile, opt_myisam_use_mmap;
-extern my_bool opt_slave_compressed_protocol, use_temp_pool;
+extern my_bool opt_slave_compressed_protocol, use_temp_pool, opt_help;
 extern ulong slave_exec_mode_options;
 extern my_bool opt_readonly, lower_case_file_system;
 extern my_bool opt_userstat_running;
@@ -2062,6 +2228,7 @@ extern my_bool opt_enable_named_pipe, opt_sync_frm, opt_allow_suspicious_udfs;
 extern my_bool opt_secure_auth, debug_assert_if_crashed_table;
 extern char* opt_secure_file_priv;
 extern my_bool opt_log_slow_admin_statements, opt_log_slow_slave_statements;
+extern my_bool opt_query_cache_strip_comments;
 extern my_bool sp_automatic_privileges, opt_noacl;
 extern my_bool opt_old_style_user_limits, trust_function_creators;
 extern uint opt_crash_binlog_innodb;
@@ -2180,6 +2347,10 @@ extern struct st_VioSSLFd * ssl_acceptor_fd;
 
 MYSQL_LOCK *mysql_lock_tables(THD *thd, TABLE **table, uint count,
                               uint flags, bool *need_reopen);
+int mysql_lock_tables(THD *thd, MYSQL_LOCK *sql_lock,
+                      bool write_lock_used,
+                      uint flags, bool *need_reopen);
+
 /* mysql_lock_tables() and open_table() flags bits */
 #define MYSQL_LOCK_IGNORE_GLOBAL_READ_LOCK      0x0001
 #define MYSQL_LOCK_IGNORE_FLUSH                 0x0002
@@ -2187,8 +2358,12 @@ MYSQL_LOCK *mysql_lock_tables(THD *thd, TABLE **table, uint count,
 #define MYSQL_OPEN_TEMPORARY_ONLY               0x0008
 #define MYSQL_LOCK_IGNORE_GLOBAL_READ_ONLY      0x0010
 #define MYSQL_LOCK_PERF_SCHEMA                  0x0020
+#define MYSQL_LOCK_NOT_TEMPORARY		0x0040
+/* flags for get_lock_data */
+#define GET_LOCK_UNLOCK         1
+#define GET_LOCK_STORE_LOCKS    2
 
-void mysql_unlock_tables(THD *thd, MYSQL_LOCK *sql_lock);
+void mysql_unlock_tables(THD *thd, MYSQL_LOCK *sql_lock, bool free_lock= 1);
 void mysql_unlock_read_tables(THD *thd, MYSQL_LOCK *sql_lock);
 void mysql_unlock_some_tables(THD *thd, TABLE **table,uint count);
 void mysql_lock_remove(THD *thd, MYSQL_LOCK *locked,TABLE *table,
@@ -2200,15 +2375,19 @@ bool mysql_lock_abort_for_thread(THD *thd, TABLE *table);
 MYSQL_LOCK *mysql_lock_merge(MYSQL_LOCK *a,MYSQL_LOCK *b);
 TABLE_LIST *mysql_lock_have_duplicate(THD *thd, TABLE_LIST *needle,
                                       TABLE_LIST *haystack);
+void reset_lock_data(MYSQL_LOCK *sql_lock, bool unlock);
 bool lock_global_read_lock(THD *thd);
 void unlock_global_read_lock(THD *thd);
 bool wait_if_global_read_lock(THD *thd, bool abort_on_refresh,
                               bool is_not_commit);
 void start_waiting_global_read_lock(THD *thd);
 bool make_global_read_lock_block_commit(THD *thd);
+void disable_checkpoints(THD *thd);
 bool set_protect_against_global_read_lock(void);
 void unset_protect_against_global_read_lock(void);
 void broadcast_refresh(void);
+MYSQL_LOCK *get_lock_data(THD *thd, TABLE **table_ptr, uint count,
+                          uint flags, TABLE **write_lock_used);
 
 /* Lock based on name */
 int lock_and_wait_for_table_name(THD *thd, TABLE_LIST *table_list);
@@ -2266,17 +2445,34 @@ ulong convert_period_to_month(ulong period);
 ulong convert_month_to_period(ulong month);
 void get_date_from_daynr(long daynr,uint *year, uint *month,
 			 uint *day);
-my_time_t TIME_to_timestamp(THD *thd, const MYSQL_TIME *t, my_bool *not_exist);
-bool str_to_time_with_warn(const char *str,uint length,MYSQL_TIME *l_time);
+my_time_t TIME_to_timestamp(THD *thd, const MYSQL_TIME *t, uint *error);
+bool str_to_time_with_warn(const char *str,uint length,MYSQL_TIME *l_time,
+                           ulong fuzzydate);
 timestamp_type str_to_datetime_with_warn(const char *str, uint length,
-                                         MYSQL_TIME *l_time, uint flags);
+                                         MYSQL_TIME *l_time, ulong flags);
 void localtime_to_TIME(MYSQL_TIME *to, struct tm *from);
 void calc_time_from_sec(MYSQL_TIME *to, long seconds, long microseconds);
 
-void make_truncated_value_warning(THD *thd, MYSQL_ERROR::enum_warning_level level,
-                                  const char *str_val,
-				  uint str_length, timestamp_type time_type,
+void make_truncated_value_warning(THD *thd,
+                                  MYSQL_ERROR::enum_warning_level level,
+                                  const Lazy_string *str_val,
+                                  timestamp_type time_type,
                                   const char *field_name);
+bool double_to_datetime_with_warn(double value, MYSQL_TIME *ltime,
+                                  ulong fuzzydate, const char *field_name);
+bool decimal_to_datetime_with_warn(const my_decimal *value, MYSQL_TIME *ltime,
+                                   ulong fuzzydate, const char *field_name);
+bool int_to_datetime_with_warn(longlong value, MYSQL_TIME *ltime,
+                               ulong fuzzydate, const char *field_name);
+
+static inline void make_truncated_value_warning(THD *thd,
+                MYSQL_ERROR::enum_warning_level level, const char *str_val,
+                uint str_length, timestamp_type time_type,
+                const char *field_name)
+{
+  const Lazy_string_str str(str_val, str_length);
+  make_truncated_value_warning(thd, level, &str, time_type, field_name);
+}
 
 bool date_add_interval(MYSQL_TIME *ltime, interval_type int_type, INTERVAL interval);
 bool calc_time_diff(MYSQL_TIME *l_time1, MYSQL_TIME *l_time2, int l_sign,
@@ -2293,12 +2489,6 @@ const char *get_date_time_format_str(KNOWN_DATE_TIME_FORMAT *format,
 				     timestamp_type type);
 extern bool make_date_time(DATE_TIME_FORMAT *format, MYSQL_TIME *l_time,
 			   timestamp_type type, String *str);
-void make_datetime(const DATE_TIME_FORMAT *format, const MYSQL_TIME *l_time,
-                   String *str);
-void make_date(const DATE_TIME_FORMAT *format, const MYSQL_TIME *l_time,
-               String *str);
-void make_time(const DATE_TIME_FORMAT *format, const MYSQL_TIME *l_time,
-               String *str);
 int my_time_compare(MYSQL_TIME *a, MYSQL_TIME *b);
 longlong get_datetime_value(THD *thd, Item ***item_arg, Item **cache_arg,
                             Item *warn_item, bool *is_null);
@@ -2316,6 +2506,8 @@ ha_rows filesort(THD *thd, TABLE *form,struct st_sort_field *sortorder,
 		 ha_rows max_rows, bool sort_positions,
                  ha_rows *examined_rows);
 void filesort_free_buffers(TABLE *table, bool full);
+double get_merge_many_buffs_cost(uint *buffer, uint last_n_elems,
+                                 int elem_size);
 void change_double_for_sort(double nr,uchar *to);
 double my_double_round(double value, longlong dec, bool dec_unsigned,
                        bool truncate);
@@ -2497,7 +2689,7 @@ Item * all_any_subquery_creator(Item *left_expr,
 inline void setup_table_map(TABLE *table, TABLE_LIST *table_list, uint tablenr)
 {
   table->used_fields= 0;
-  table->const_table= 0;
+  table_list->reset_const_table();
   table->null_row= 0;
   table->status= STATUS_NO_RECORD;
   table->maybe_null= table_list->outer_join;
@@ -2513,6 +2705,14 @@ inline void setup_table_map(TABLE *table, TABLE_LIST *table_list, uint tablenr)
   table->force_index_order= table->force_index_group= 0;
   table->covering_keys= table->s->keys_for_keyread;
   table->merge_keys.clear_all();
+  TABLE_LIST *orig= table_list->select_lex ?
+    table_list->select_lex->master_unit()->derived : 0;
+  if (!orig || !orig->is_merged_derived())
+  {
+    /* Tables merged from derived were set up already.*/
+    table->covering_keys= table->s->keys_for_keyread;
+    table->merge_keys.clear_all();
+  }
 }
 
 
@@ -2539,6 +2739,19 @@ inline bool is_user_table(TABLE * table)
   return strncmp(name, tmp_file_prefix, tmp_file_prefix_length);
 }
 
+
+#ifndef HAVE_LOG2
+/*
+  This will be slightly slower and perhaps a tiny bit less accurate than
+  doing it the IEEE754 way but log2() should be available on C99 systems.
+*/
+inline double log2(double x)
+{
+  return (log(x) / M_LN2);
+}
+#endif
+
+
 /*
   Some functions that are different in the embedded library and the normal
   server
diff --git a/sql/mysqld.cc b/sql/mysqld.cc
index 586c614555d..aa3942521ed 100644
--- a/sql/mysqld.cc
+++ b/sql/mysqld.cc
@@ -148,6 +148,7 @@ static event_handle_t eh;
 static Report_t ref;
 static void *refneb= NULL;
 my_bool event_flag= FALSE;
+
 static int volumeid= -1;
 
   /* NEB event callback */
@@ -335,10 +336,23 @@ TYPELIB sql_mode_typelib= { array_elements(sql_mode_names)-1,"",
 static const char *optimizer_switch_names[]=
 {
   "index_merge","index_merge_union","index_merge_sort_union",
-  "index_merge_intersection",
-#ifndef DBUG_OFF
+  "index_merge_intersection","index_merge_sort_intersection",
+  "index_condition_pushdown",
+  "derived_merge", "derived_with_keys",
+  "firstmatch","loosescan","materialization","in_to_exists","semijoin",
+  "partial_match_rowid_merge",
+  "partial_match_table_scan",
+  "subquery_cache",
+  "mrr",
+  "mrr_cost_based",
+  "mrr_sort_keys",
+  "outer_join_with_cache",
+  "semijoin_with_cache",
+  "join_cache_incremental",
+  "join_cache_hashed",
+  "join_cache_bka",
+  "optimize_join_buffer_size",
   "table_elimination",
-#endif
   "default", NullS
 };
 
@@ -349,9 +363,28 @@ static const unsigned int optimizer_switch_names_len[]=
   sizeof("index_merge_union") - 1,
   sizeof("index_merge_sort_union") - 1,
   sizeof("index_merge_intersection") - 1,
-#ifndef DBUG_OFF
+  sizeof("index_merge_sort_intersection") - 1,
+  sizeof("index_condition_pushdown") - 1,
+  sizeof("derived_merge") - 1,
+  sizeof("derived_with_keys") - 1,
+  sizeof("firstmatch") - 1,
+  sizeof("loosescan") - 1,
+  sizeof("materialization") - 1,
+  sizeof("in_to_exists") - 1,
+  sizeof("semijoin") - 1,
+  sizeof("partial_match_rowid_merge") - 1,
+  sizeof("partial_match_table_scan") - 1,
+  sizeof("subquery_cache") - 1,
+  sizeof("mrr") - 1,
+  sizeof("mrr_cost_based") - 1,
+  sizeof("mrr_sort_keys") - 1,
+  sizeof("outer_join_with_cache") - 1,
+  sizeof("semijoin_with_cache") - 1,
+  sizeof("join_cache_incremental") - 1,
+  sizeof("join_cache_hashed") - 1,
+  sizeof("join_cache_bka") - 1,
+  sizeof("optimize_join_buffer_size") - 1,
   sizeof("table_elimination") - 1,
-#endif
   sizeof("default") - 1
 };
 TYPELIB optimizer_switch_typelib= { array_elements(optimizer_switch_names)-1,"",
@@ -404,14 +437,15 @@ bool opt_large_files= sizeof(my_off_t) > 4;
 /*
   Used with --help for detailed option
 */
-static my_bool opt_help= 0, opt_verbose= 0;
+static my_bool opt_verbose= 0;
 
-arg_cmp_func Arg_comparator::comparator_matrix[5][2] =
+arg_cmp_func Arg_comparator::comparator_matrix[6][2] =
 {{&Arg_comparator::compare_string,     &Arg_comparator::compare_e_string},
  {&Arg_comparator::compare_real,       &Arg_comparator::compare_e_real},
  {&Arg_comparator::compare_int_signed, &Arg_comparator::compare_e_int},
  {&Arg_comparator::compare_row,        &Arg_comparator::compare_e_row},
- {&Arg_comparator::compare_decimal,    &Arg_comparator::compare_e_decimal}};
+ {&Arg_comparator::compare_decimal,    &Arg_comparator::compare_e_decimal},
+ {&Arg_comparator::compare_datetime,   &Arg_comparator::compare_e_datetime}};
 
 const char *log_output_names[] = { "NONE", "FILE", "TABLE", NullS};
 static const unsigned int log_output_names_len[]= { 4, 4, 5, 0 };
@@ -433,7 +467,13 @@ static bool volatile ready_to_exit;
 static my_bool opt_debugging= 0, opt_external_locking= 0, opt_console= 0;
 static my_bool opt_short_log_format= 0;
 static my_bool opt_ignore_wrong_options= 0;
-static my_bool opt_sync= 0;
+static my_bool opt_sync= 0, opt_thread_alarm;
+/*
+  Set this to 1 if you want to that 'strict mode' should affect all date
+  operations. If this is 0, then date checking is only done when storing
+  dates into a table.
+*/
+my_bool strict_date_checking= 0;
 static uint kill_cached_threads, wake_thread;
 ulong thread_created;
 uint thread_handling;
@@ -444,12 +484,30 @@ static const char *sql_mode_str= "OFF";
 /* Text representation for OPTIMIZER_SWITCH_DEFAULT */
 static const char *optimizer_switch_str="index_merge=on,index_merge_union=on,"
                                         "index_merge_sort_union=on,"
-                                        "index_merge_intersection=on"
-#ifndef DBUG_OFF                                        
-                                        ",table_elimination=on";
-#else
+                                        "index_merge_intersection=on,"
+                                        "index_merge_sort_intersection=off,"
+                                        "index_condition_pushdown=on,"
+                                        "derived_merge=on,"
+                                        "derived_with_keys=on,"
+                                        "firstmatch=on,"
+                                        "loosescan=on,"
+                                        "materialization=on,"
+                                        "in_to_exists=on,"
+                                        "semijoin=on,"
+                                        "partial_match_rowid_merge=on,"
+                                        "partial_match_table_scan=on,"
+                                        "subquery_cache=on,"
+                                        "mrr=off,"
+                                        "mrr_cost_based=off,"
+                                        "mrr_sort_keys=off,"
+                                        "outer_join_with_cache=on,"
+                                        "semijoin_with_cache=on,"
+                                        "join_cache_incremental=on,"
+                                        "join_cache_hashed=on,"
+                                        "join_cache_bka=on,"
+                                        "optimize_join_buffer_size=off,"
+                                        "table_elimination=on";
                                         ;
-#endif
 #ifdef SAFEMALLOC
 my_bool sf_malloc_trough_check= 0;
 #endif
@@ -471,7 +529,7 @@ static pthread_cond_t COND_thread_cache, COND_flush_thread_cache;
 /* Global variables */
 
 bool opt_update_log, opt_bin_log, opt_ignore_builtin_innodb= 0;
-my_bool opt_log, opt_slow_log, debug_assert_if_crashed_table;
+my_bool opt_log, opt_slow_log, debug_assert_if_crashed_table, opt_help= 0;
 my_bool opt_userstat_running;
 ulong log_output_options;
 my_bool opt_log_queries_not_using_indexes= 0;
@@ -516,6 +574,7 @@ my_bool opt_local_infile, opt_slave_compressed_protocol;
 my_bool opt_safe_user_create = 0, opt_no_mix_types = 0;
 my_bool opt_show_slave_auth_info, opt_sql_bin_update = 0;
 my_bool opt_log_slave_updates= 0;
+my_bool opt_replicate_annotate_row_events= 0;
 bool slave_warning_issued = false;
 
 /*
@@ -551,6 +610,7 @@ my_bool opt_secure_auth= 0;
 char* opt_secure_file_priv= 0;
 my_bool opt_log_slow_admin_statements= 0;
 my_bool opt_log_slow_slave_statements= 0;
+my_bool opt_query_cache_strip_comments = 0;
 my_bool lower_case_file_system= 0;
 my_bool opt_large_pages= 0;
 my_bool opt_myisam_use_mmap= 0;
@@ -569,6 +629,8 @@ my_bool opt_noacl;
 my_bool sp_automatic_privileges= 1;
 
 ulong opt_binlog_rows_event_max_size;
+my_bool opt_master_verify_checksum= 0;
+my_bool opt_slave_sql_verify_checksum= 1;
 const char *binlog_format_names[]= {"MIXED", "STATEMENT", "ROW", NullS};
 TYPELIB binlog_format_typelib=
   { array_elements(binlog_format_names) - 1, "",
@@ -614,7 +676,12 @@ ulong extra_max_connections;
 */
 ulong max_long_data_size;
 
-uint  max_user_connections= 0;
+/* Limits for internal temporary tables (MyISAM or Aria) */
+uint internal_tmp_table_max_key_length;
+uint internal_tmp_table_max_key_segments;
+
+int  max_user_connections= 0;
+bool max_user_connections_checking=0;
 ulonglong denied_connections;
 /**
   Limit of the total number of prepared statements in the server.
@@ -647,6 +714,7 @@ char mysql_real_data_home[FN_REFLEN],
      language[FN_REFLEN], reg_ext[FN_EXTLEN], mysql_charsets_dir[FN_REFLEN],
      *opt_init_file, *opt_tc_log_file,
      def_ft_boolean_syntax[sizeof(ft_boolean_syntax)];
+const char *opt_basename;
 char mysql_unpacked_real_data_home[FN_REFLEN];
 int mysql_unpacked_real_data_home_len;
 uint reg_ext_length;
@@ -671,6 +739,8 @@ const char *in_additional_cond= "<IN COND>";
 const char *in_having_cond= "<IN HAVING>";
 
 my_decimal decimal_zero;
+my_decimal max_seconds_for_time_type, time_second_part_factor;
+
 /* classes for comparation parsing/processing */
 Eq_creator eq_creator;
 Ne_creator ne_creator;
@@ -874,7 +944,6 @@ pthread_handler_t signal_hand(void *arg);
 static int mysql_init_variables(void);
 static int get_options(int *argc,char **argv);
 extern "C" my_bool mysqld_get_one_option(int, const struct my_option *, char *);
-static void set_server_version(void);
 static int init_thread_environment();
 static char *get_relative_path(const char *path);
 static int fix_paths(void);
@@ -1028,7 +1097,7 @@ static void close_connections(void)
     if (tmp->slave_thread)
       continue;
 
-    tmp->killed= THD::KILL_CONNECTION;
+    tmp->killed= KILL_SERVER_HARD;
     thread_scheduler.post_kill_notification(tmp);
     pthread_mutex_lock(&tmp->LOCK_thd_data);
     if (tmp->mysys_var)
@@ -1402,6 +1471,7 @@ void clean_up(bool print_message)
   ha_end();
   if (tc_log)
     tc_log->close();
+  TC_destroy();
   xid_cache_free();
   wt_end();
   delete_elements(&key_caches, (void (*)(const char*, uchar*)) free_key_cache);
@@ -1818,8 +1888,10 @@ static my_socket activate_tcp_port(uint port)
   }
   if (ret < 0)
   {
-    DBUG_PRINT("error",("Got error: %d from bind",socket_errno));
-    sql_perror("Can't start server: Bind on TCP/IP port");
+    char buff[100];
+    sprintf(buff, "Can't start server: Bind on TCP/IP port. Got error: %d",
+            (int) socket_errno);
+    sql_perror(buff);
     sql_print_error("Do you already have another mysqld server running on "
                     "port: %u ?", port);
     unireg_abort(1);
@@ -1974,7 +2046,7 @@ void close_connection(THD *thd, uint errcode, bool lock)
 		      errcode ? ER(errcode) : ""));
   if (lock)
     (void) pthread_mutex_lock(&LOCK_thread_count);
-  thd->killed= THD::KILL_CONNECTION;
+  thd->killed= KILL_CONNECTION;
 
   if (global_system_variables.log_warnings > 3)
   {
@@ -2088,7 +2160,8 @@ static bool cache_thread()
         this thread for handling of new THD object/connection.
       */
       thd->mysys_var->abort= 0;
-      thd->thr_create_utime= my_micro_time();
+      thd->thr_create_utime= microsecond_interval_timer();
+      thd->start_utime= thd->thr_create_utime;
       threads.append(thd);
       return(1);
     }
@@ -2925,41 +2998,25 @@ int my_message_sql(uint error, const char *str, myf MyFlags)
 
     thd->is_slave_error=  1; // needed to catch query errors during replication
 
-    /*
-      thd->lex->current_select == 0 if lex structure is not inited
-      (not query command (COM_QUERY))
-    */
-    if (thd->lex->current_select &&
-	thd->lex->current_select->no_error && !thd->is_fatal_error)
+    if (thd->main_da.is_ok() && !thd->main_da.can_overwrite_status)
     {
-      DBUG_PRINT("error",
-                 ("Error converted to warning: current_select: no_error %d  "
-                  "fatal_error: %d",
-                  (thd->lex->current_select ?
-                   thd->lex->current_select->no_error : 0),
-                  (int) thd->is_fatal_error));
+      /*
+        Client has already got ok packet and we are not in net_flush(), so
+        we write a message to error log.
+        This could happen if we get an error in implicit commit.
+        This should never happen in normal operation, so lets
+        assert here in debug builds.
+      */
+      DBUG_ASSERT(0);
+      func= sql_print_error;
+      MyFlags|= ME_NOREFRESH;
     }
-    else
+    else if (! thd->main_da.is_error()) // Return only first message
     {
-      if (thd->main_da.is_ok() && !thd->main_da.can_overwrite_status)
-      {
-        /*
-          Client has already got ok packet and we are not in net_flush(), so
-          we write a message to error log.
-          This could happen if we get an error in implicit commit.
-          This should never happen in normal operation, so lets
-          assert here in debug builds.
-        */
-        DBUG_ASSERT(0);
-        func= sql_print_error;
-        MyFlags|= ME_NOREFRESH;
-      }
-      else if (! thd->main_da.is_error()) // Return only first message
-      {
-        thd->main_da.set_error_status(thd, error, str);
-      }
-      query_cache_abort(&thd->net);
+      thd->main_da.set_error_status(thd, error, str);
     }
+    query_cache_abort(&thd->net);
+
     /*
       If a continue handler is found, the error message will be cleared
       by the stored procedures code.
@@ -3041,6 +3098,7 @@ const char *load_default_groups[]= {
 #endif
 "mysqld", "server", MYSQL_BASE_VERSION,
 "mariadb", MARIADB_BASE_VERSION,
+"client-server",
 0, 0};
 
 #if defined(__WIN__) && !defined(EMBEDDED_LIBRARY)
@@ -3260,9 +3318,7 @@ static int init_common_variables(const char *conf_file_name, int argc,
 				 char **argv, const char **groups)
 {
   char buff[FN_REFLEN], *s;
-  const char *basename;
   umask(((~my_umask) & 0666));
-  my_decimal_set_zero(&decimal_zero); // set decimal_zero constant;
   tzset();			// Set tzname
 
   max_system_variables.pseudo_thread_id= (ulong)~0;
@@ -3321,13 +3377,13 @@ static int init_common_variables(const char *conf_file_name, int argc,
     strmake(glob_hostname, STRING_WITH_LEN("localhost"));
     sql_print_warning("gethostname failed, using '%s' as hostname",
                         glob_hostname);
-    basename= "mysql";
+    opt_basename= "mysql";
   }
   else
   {
-    basename= glob_hostname;
+    opt_basename= glob_hostname;
   }
-  strmake(pidfile_name, basename, sizeof(pidfile_name)-5);
+  strmake(pidfile_name, opt_basename, sizeof(pidfile_name)-5);
   strmov(fn_ext(pidfile_name),".pid");		// Add proper extension
 
   /*
@@ -3606,8 +3662,7 @@ You should consider changing lower_case_table_names to 1 or 2",
     }
   }
   else if (lower_case_table_names == 2 &&
-           !(lower_case_file_system=
-             (test_if_case_insensitive(mysql_real_data_home) == 1)))
+           !(lower_case_file_system= (lower_case_file_system == 1)))
   {
     if (global_system_variables.log_warnings)
       sql_print_warning("lower_case_table_names was set to 2, even though your "
@@ -3618,8 +3673,7 @@ You should consider changing lower_case_table_names to 1 or 2",
   }
   else
   {
-    lower_case_file_system=
-      (test_if_case_insensitive(mysql_real_data_home) == 1);
+    lower_case_file_system= (lower_case_file_system == 1);
   }
 
   /* Reset table_alias_charset, now that lower_case_table_names is set. */
@@ -4057,11 +4111,13 @@ a file name for --log-bin-index option", opt_binlog_index_name);
         require a name. But as we don't want to break many existing setups, we
         only give warning, not error.
       */
-      sql_print_warning("No argument was provided to --log-bin, and "
-                        "--log-bin-index was not used; so replication "
-                        "may break when this MySQL server acts as a "
-                        "master and has his hostname changed!! Please "
-                        "use '--log-bin=%s' to avoid this problem.", ln);
+      sql_print_warning("No argument was provided to --log-bin and "
+                        "neither --log-basename or --log-bin-index where "
+                        "used;  This may cause repliction to break when this "
+                        "server acts as a master and has its hostname "
+                        "changed!! Please use '--log-basename=%s' or "
+                        "'--log-bin=%s' to avoid this problem.",
+                        opt_basename, ln);
     }
     if (ln == buf)
     {
@@ -4131,6 +4187,8 @@ a file name for --log-bin-index option", opt_binlog_index_name);
   if (!errmesg[0][0])
     unireg_abort(1);
 
+  TC_init();
+
   /* We have to initialize the storage engines before CSV logging */
   if (ha_init())
   {
@@ -4218,6 +4276,11 @@ a file name for --log-bin-index option", opt_binlog_index_name);
     sql_print_error("Aria engine is not enabled or did not start. The Aria engine must be enabled to continue as mysqld was configured with --with-aria-tmp-tables");
     unireg_abort(1);
   }
+  internal_tmp_table_max_key_length=   maria_max_key_length();
+  internal_tmp_table_max_key_segments= maria_max_key_segments();
+#else
+  internal_tmp_table_max_key_length=   myisam_max_key_length();
+  internal_tmp_table_max_key_segments= myisam_max_key_segments();
 #endif
 
   tc_log= (total_ha_2pc > 1 ? (opt_bin_log  ?
@@ -4514,13 +4577,14 @@ int main(int argc, char **argv)
 
 #ifndef DBUG_OFF
   test_lc_time_sz();
+  srand((uint) time(NULL)); 
 #endif
 
   /*
     We have enough space for fiddling with the argv, continue
   */
   check_data_home(mysql_real_data_home);
-  if (my_setwd(mysql_real_data_home,MYF(MY_WME)) && !opt_help)
+  if (my_setwd(mysql_real_data_home, opt_help ? 0 : MYF(MY_WME)) && !opt_help)
     unireg_abort(1);				/* purecov: inspected */
   mysql_data_home= mysql_data_home_buff;
   mysql_data_home[0]=FN_CURLIB;		// all paths are relative from here
@@ -5004,9 +5068,9 @@ void handle_connection_in_main_thread(THD *thd)
   safe_mutex_assert_owner(&LOCK_thread_count);
   thread_cache_size=0;			// Safety
   threads.append(thd);
-  thd->start_utime= my_micro_time();
+  thd->set_time(my_hrtime());
   pthread_mutex_unlock(&LOCK_thread_count);
-  thd->start_utime= my_micro_time();
+  thd->start_utime= microsecond_interval_timer();
   handle_one_connection(thd);
 }
 
@@ -5032,7 +5096,7 @@ void create_thread_to_handle_connection(THD *thd)
     thread_created++;
     threads.append(thd);
     DBUG_PRINT("info",(("creating thread %lu"), thd->thread_id));
-    thd->prior_thr_create_utime= thd->start_utime= my_micro_time();
+    thd->prior_thr_create_utime= microsecond_interval_timer();
     if ((error=pthread_create(&thd->real_id,&connection_attrib,
                               handle_one_connection,
                               (void*) thd)))
@@ -5042,7 +5106,7 @@ void create_thread_to_handle_connection(THD *thd)
                  ("Can't create thread to handle request (error %d)",
                   error));
       thread_count--;
-      thd->killed= THD::KILL_CONNECTION;			// Safety
+      thd->killed= KILL_CONNECTION;             // Safety
       (void) pthread_mutex_unlock(&LOCK_thread_count);
 
       pthread_mutex_lock(&LOCK_connection_count);
@@ -5763,7 +5827,9 @@ enum options_mysqld
   OPT_SQL_BIN_UPDATE_SAME,     OPT_REPLICATE_DO_DB,
   OPT_REPLICATE_IGNORE_DB,     OPT_LOG_SLAVE_UPDATES,
   OPT_BINLOG_DO_DB,            OPT_BINLOG_IGNORE_DB,
-  OPT_BINLOG_FORMAT,
+  OPT_BINLOG_FORMAT,           OPT_DEBUG_BINLOG_FSYNC_SLEEP,
+  OPT_BINLOG_ANNOTATE_ROWS_EVENTS,
+  OPT_REPLICATE_ANNOTATE_ROWS_EVENTS,
 #ifndef DBUG_OFF
   OPT_BINLOG_SHOW_XID,
 #endif
@@ -5811,6 +5877,7 @@ enum options_mysqld
   OPT_FLUSH_TIME, OPT_FT_MIN_WORD_LEN, OPT_FT_BOOLEAN_SYNTAX,
   OPT_FT_MAX_WORD_LEN, OPT_FT_QUERY_EXPANSION_LIMIT, OPT_FT_STOPWORD_FILE,
   OPT_INTERACTIVE_TIMEOUT, OPT_JOIN_BUFF_SIZE,
+  OPT_JOIN_BUFF_SPACE_LIMIT, OPT_JOIN_CACHE_LEVEL,
   OPT_KEY_BUFFER_SIZE, OPT_KEY_CACHE_BLOCK_SIZE,
   OPT_KEY_CACHE_DIVISION_LIMIT, OPT_KEY_CACHE_AGE_THRESHOLD,
   OPT_KEY_CACHE_PARTITIONS,
@@ -5824,7 +5891,7 @@ enum options_mysqld
   OPT_MAX_SEEKS_FOR_KEY, OPT_MAX_TMP_TABLES, OPT_MAX_USER_CONNECTIONS,
   OPT_MAX_LENGTH_FOR_SORT_DATA,
   OPT_MAX_WRITE_LOCK_COUNT, OPT_BULK_INSERT_BUFFER_SIZE,
-  OPT_MAX_ERROR_COUNT, OPT_MULTI_RANGE_COUNT, OPT_MYISAM_DATA_POINTER_SIZE,
+  OPT_MAX_ERROR_COUNT, OPT_MRR_BUFFER_SIZE, OPT_MYISAM_DATA_POINTER_SIZE,
 
   OPT_MYISAM_BLOCK_SIZE, OPT_MYISAM_MAX_EXTRA_SORT_FILE_SIZE,
   OPT_MYISAM_MAX_SORT_FILE_SIZE, OPT_MYISAM_SORT_BUFFER_SIZE,
@@ -5844,17 +5911,20 @@ enum options_mysqld
   OPT_RECORD_RND_BUFFER, OPT_DIV_PRECINCREMENT, OPT_RELAY_LOG_SPACE_LIMIT,
   OPT_RELAY_LOG_PURGE,
   OPT_SLAVE_NET_TIMEOUT, OPT_SLAVE_COMPRESSED_PROTOCOL, OPT_SLOW_LAUNCH_TIME,
-  OPT_SLAVE_TRANS_RETRIES, OPT_READONLY, OPT_DEBUGGING, OPT_DEBUG_FLUSH,
+  OPT_SLAVE_TRANS_RETRIES,
+  OPT_SUBQUERY_CACHE,
+  OPT_READONLY, OPT_ROWID_MERGE_BUFF_SIZE,
+  OPT_DEBUGGING, OPT_DEBUG_FLUSH,
   OPT_SORT_BUFFER, OPT_TABLE_OPEN_CACHE, OPT_TABLE_DEF_CACHE,
   OPT_THREAD_CONCURRENCY, OPT_THREAD_CACHE_SIZE,
   OPT_TMP_TABLE_SIZE, OPT_THREAD_STACK,
-  OPT_WAIT_TIMEOUT,
+  OPT_WAIT_TIMEOUT, OPT_PROGRESS_REPORT_TIME,
   OPT_ERROR_LOG_FILE,
   OPT_DEFAULT_WEEK_FORMAT,
   OPT_RANGE_ALLOC_BLOCK_SIZE, OPT_ALLOW_SUSPICIOUS_UDFS,
   OPT_QUERY_ALLOC_BLOCK_SIZE, OPT_QUERY_PREALLOC_SIZE,
   OPT_TRANS_ALLOC_BLOCK_SIZE, OPT_TRANS_PREALLOC_SIZE,
-  OPT_SYNC_FRM, OPT_SYNC_BINLOG, OPT_SYNC,
+  OPT_SYNC_FRM, OPT_SYNC_BINLOG, OPT_SYNC, OPT_THREAD_ALARM,
   OPT_SYNC_REPLICATION,
   OPT_SYNC_REPLICATION_SLAVE_ID,
   OPT_SYNC_REPLICATION_TIMEOUT,
@@ -5929,7 +5999,10 @@ enum options_mysqld
   OPT_IGNORE_BUILTIN_INNODB,
   OPT_BINLOG_DIRECT_NON_TRANS_UPDATE,
   OPT_DEFAULT_CHARACTER_SET_OLD,
-  OPT_MAX_LONG_DATA_SIZE
+  OPT_MAX_LONG_DATA_SIZE,
+  OPT_MASTER_VERIFY_CHECKSUM,
+  OPT_SLAVE_SQL_VERIFY_CHECKSUM,
+  OPT_QUERY_CACHE_STRIP_COMMENTS
 };
 
 
@@ -5980,6 +6053,12 @@ struct my_option my_long_options[] =
   {"bind-address", OPT_BIND_ADDRESS, "IP address to bind to.",
    &my_bind_addr_str, &my_bind_addr_str, 0, GET_STR,
    REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
+#ifndef DBUG_OFF
+  {"debug-binlog-fsync-sleep", OPT_DEBUG_BINLOG_FSYNC_SLEEP,
+    "Extra sleep (in microseconds) to add to binlog fsync(), for debugging",
+    &opt_binlog_dbug_fsync_sleep, &opt_binlog_dbug_fsync_sleep,
+   0, GET_ULONG, REQUIRED_ARG, 0, 0, ULONG_MAX, 0, 1, 0},
+#endif
   {"binlog_format", OPT_BINLOG_FORMAT,
    "Does not have any effect without '--log-bin'. "
    "Tell the master the form of binary logging to use: either 'row' for "
@@ -5993,6 +6072,18 @@ struct my_option my_long_options[] =
 #endif
    , &opt_binlog_format, &opt_binlog_format,
    0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
+  {"binlog-annotate-row-events", OPT_BINLOG_ANNOTATE_ROWS_EVENTS,
+   "Tells the master to annotate RBR events with the statement that "
+   "caused these events.",
+   (uchar**) &global_system_variables.binlog_annotate_row_events,
+   (uchar**) &max_system_variables.binlog_annotate_row_events,
+   0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},
+  {"replicate-annotate-row-events", OPT_REPLICATE_ANNOTATE_ROWS_EVENTS,
+   "Tells the slave to write annotate rows events recieved from the master "
+   "to its own binary log. Sensible only in pair with log-slave-updates option.",
+   (uchar**) &opt_replicate_annotate_row_events,
+   (uchar**) &opt_replicate_annotate_row_events,
+   0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},
   {"binlog-do-db", OPT_BINLOG_DO_DB,
    "Tells the master it should log updates for the specified database, "
    "and exclude all others not explicitly mentioned.",
@@ -6186,7 +6277,7 @@ struct my_option my_long_options[] =
    "Disable initialization of builtin InnoDB plugin.",
    0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
   {"init-connect", OPT_INIT_CONNECT, 
-   "Command(s) that are executed for each new connection.",
+   "Command(s) that are executed for each new connection (but not for SUPER users).",
    &opt_init_connect, &opt_init_connect, 0, GET_STR_ALLOC,
    REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
 #ifndef DISABLE_GRANT_OPTIONS
@@ -6782,6 +6873,19 @@ each time the SQL thread starts.",
    "not stop for operations that are idempotent. In STRICT mode, replication "
    "will stop on any unexpected difference between the master and the slave.",
    &slave_exec_mode_str, &slave_exec_mode_str, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
+  {"master-verify-checksum", OPT_MASTER_VERIFY_CHECKSUM,
+   "Force checksum verification of logged events in binary log before "
+   "sending them to slaves or printing them in output of SHOW BINLOG EVENTS. "
+   "Disabled by default.",
+   &opt_master_verify_checksum, &opt_master_verify_checksum,
+   0, GET_BOOL, OPT_ARG, 0, 0, 0, 0, 0, 0},
+  {"slave-sql-verify-checksum", OPT_SLAVE_SQL_VERIFY_CHECKSUM,
+   "Force checksum verification of replication events after reading them "
+   "from relay log. Note: Events are always checksum-verified by slave on "
+   "receiving them from the network before writing them to the relay "
+   "log. Enabled by default.",
+   &opt_slave_sql_verify_checksum, &opt_slave_sql_verify_checksum,
+   0, GET_BOOL, OPT_ARG, 1, 0, 0, 0, 0, 0},
 #endif
   {"slow-query-log", OPT_SLOW_LOG,
    "Enable/disable slow query log. See also '--log-slow-queries'",
@@ -6954,7 +7058,7 @@ each time the SQL thread starts.",
   { "flush_time", OPT_FLUSH_TIME,
     "A dedicated thread is created to flush all tables at the given interval.",
     &flush_time, &flush_time, 0, GET_ULONG, REQUIRED_ARG,
-    FLUSH_TIME, 0, LONG_TIMEOUT, 0, 1, 0},
+    0 , 0, LONG_TIMEOUT, 0, 1, 0},
   { "ft_boolean_syntax", OPT_FT_BOOLEAN_SYNTAX,
     "List of operators for MATCH ... AGAINST ( ... IN BOOLEAN MODE).",
     0, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
@@ -6988,11 +7092,22 @@ each time the SQL thread starts.",
    &max_system_variables.net_interactive_timeout, 0,
    GET_ULONG, REQUIRED_ARG, NET_WAIT_TIMEOUT, 1, LONG_TIMEOUT, 0, 1, 0},
   {"join_buffer_size", OPT_JOIN_BUFF_SIZE,
-   "The size of the buffer that is used for full joins.",
-   &global_system_variables.join_buff_size,
-   &max_system_variables.join_buff_size, 0, GET_ULONG,
-   REQUIRED_ARG, 128*1024L, IO_SIZE*2+MALLOC_OVERHEAD, (longlong) ULONG_MAX,
-   MALLOC_OVERHEAD, IO_SIZE, 0},
+   "The size of the buffer that is used for joins.",
+    &global_system_variables.join_buff_size,
+    &max_system_variables.join_buff_size, 0, GET_ULONG,
+   REQUIRED_ARG, 128*1024L, 128+MALLOC_OVERHEAD, (longlong) ULONG_MAX,
+   MALLOC_OVERHEAD, 128, 0},
+  {"join_buffer_space_limit", OPT_JOIN_BUFF_SPACE_LIMIT,
+   "The limit of the space for all join buffers used by a query.",
+   &global_system_variables.join_buff_space_limit,
+   &max_system_variables.join_buff_space_limit, 0, GET_ULL,
+   REQUIRED_ARG, 16*128*1024L, 2048+MALLOC_OVERHEAD, (longlong) ULONGLONG_MAX,
+   MALLOC_OVERHEAD, 2048, 0},
+   {"join_cache_level", OPT_JOIN_CACHE_LEVEL,
+   "Controls what join operations can be executed with join buffers. Odd numbers are used for plain join buffers while even numbers are used for linked buffers",
+   &global_system_variables.join_cache_level,
+   &max_system_variables.join_cache_level,
+   0, GET_ULONG, REQUIRED_ARG, 2, 0, 8, 0, 1, 0},
   {"keep_files_on_create", OPT_KEEP_FILES_ON_CREATE,
    "Don't overwrite stale .MYD and .MYI even if no directory is specified.",
    &global_system_variables.keep_files_on_create,
@@ -7156,9 +7271,9 @@ each time the SQL thread starts.",
    &max_system_variables.max_tmp_tables, 0, GET_ULONG,
    REQUIRED_ARG, 32, 1, (longlong) ULONG_MAX, 0, 1, 0},
   {"max_user_connections", OPT_MAX_USER_CONNECTIONS,
-   "The maximum number of active connections for a single user (0 = no limit).",
-   &max_user_connections, &max_user_connections, 0, GET_UINT,
-   REQUIRED_ARG, 0, 0, UINT_MAX, 0, 1, 0},
+   "The maximum number of active connections for a single user (0 = no limit. In addition global max_user_connections counting and checking is permanently disabled).",
+   &max_user_connections, &max_user_connections, 0, GET_INT,
+   REQUIRED_ARG, 0, 0, INT_MAX, 0, 1, 0},
   {"max_write_lock_count", OPT_MAX_WRITE_LOCK_COUNT,
    "After this many write locks, allow some read locks to run in between.",
    &max_write_lock_count, &max_write_lock_count, 0, GET_ULONG,
@@ -7168,11 +7283,12 @@ each time the SQL thread starts.",
    &global_system_variables.min_examined_row_limit,
    &max_system_variables.min_examined_row_limit, 0, GET_ULONG,
   REQUIRED_ARG, 0, 0, (longlong) ULONG_MAX, 0, 1L, 0},
-  {"multi_range_count", OPT_MULTI_RANGE_COUNT,
-   "Number of key ranges to request at once.",
-   &global_system_variables.multi_range_count,
-   &max_system_variables.multi_range_count, 0,
-   GET_ULONG, REQUIRED_ARG, 256, 1, (longlong) ULONG_MAX, 0, 1, 0},
+  {"mrr_buffer_size", OPT_MRR_BUFFER_SIZE,
+   "Size of buffer to use when using MRR with range access",
+   (uchar**) &global_system_variables.mrr_buff_size,
+   (uchar**) &max_system_variables.mrr_buff_size, 0,
+   GET_ULONG, REQUIRED_ARG, 256*1024L, IO_SIZE*2+MALLOC_OVERHEAD,
+   INT_MAX32, MALLOC_OVERHEAD, 1 /* Small to be able to do tests */ , 0},
   {"myisam_block_size", OPT_MYISAM_BLOCK_SIZE,
    "Block size to be used for MyISAM index pages.",
    &opt_myisam_block_size, &opt_myisam_block_size, 0, GET_ULONG, REQUIRED_ARG,
@@ -7278,10 +7394,16 @@ each time the SQL thread starts.",
    0, GET_ULONG, OPT_ARG, MAX_TABLES+1, 0, MAX_TABLES+2, 0, 1, 0},
   {"optimizer_switch", OPT_OPTIMIZER_SWITCH,
    "optimizer_switch=option=val[,option=val...], where option={index_merge, "
-   "index_merge_union, index_merge_sort_union, index_merge_intersection"
-#ifndef DBUG_OFF
+   "index_merge_union, index_merge_sort_union, index_merge_intersection, "
+   "index_merge_sort_intersection, index_condition_pushdown, "
+   "derived_merge, derived_with_keys, "
+   "firstmatch, loosescan, materialization, in_to_exists, "
+   "semijoin, partial_match_rowid_merge, partial_match_table_scan, "
+   "subquery_cache, mrr, mrr_cost_based, mrr_sort_keys, "
+   "outer_join_with_cache, semijoin_with_cache, "
+   "join_cache_incremental, join_cache_hashed, join_cache_bka, "
+   "optimize_join_buffer_size"
    ", table_elimination"
-#endif 
    "} and val={on, off, default}.",
    &optimizer_switch_str, &optimizer_switch_str, 0, GET_STR, REQUIRED_ARG, 
    /*OPTIMIZER_SWITCH_DEFAULT*/0, 0, 0, 0, 0, 0},
@@ -7304,6 +7426,11 @@ each time the SQL thread starts.",
    &global_system_variables.preload_buff_size,
    &max_system_variables.preload_buff_size, 0, GET_ULONG,
    REQUIRED_ARG, 32*1024L, 1024, 1024*1024*1024L, 0, 1, 0},
+  {"progress_report_time", OPT_PROGRESS_REPORT_TIME,
+   "Seconds between sending progress reports to the client for slow commands. Set to 0 to disable progress reporting.",
+   &global_system_variables.progress_report_time,
+   &max_system_variables.progress_report_time,
+   0, GET_ULONG, REQUIRED_ARG, 5, 0, ULONG_MAX, 0, 1, 0},
   {"query_alloc_block_size", OPT_QUERY_ALLOC_BLOCK_SIZE,
    "Allocation block size for query parsing and execution.",
    &global_system_variables.query_alloc_block_size,
@@ -7327,6 +7454,13 @@ each time the SQL thread starts.",
    &query_cache_size, &query_cache_size, 0, GET_ULONG,
    REQUIRED_ARG, 0, 0, (longlong) ULONG_MAX, 0, 1024, 0},
 #ifdef HAVE_QUERY_CACHE
+  {"query_cache_strip_comments", OPT_QUERY_CACHE_STRIP_COMMENTS,
+   "Enable and disable optimisation \"strip comment for query cache\" - "
+   "optimisation strip all comments from query while search query result "
+   "in query cache",
+   (uchar**) &opt_query_cache_strip_comments,
+   (uchar**) &opt_query_cache_strip_comments,
+   0, GET_BOOL, REQUIRED_ARG, 0, 0, 1, 0, 1, 0},
   {"query_cache_type", OPT_QUERY_CACHE_TYPE,
    "0 = OFF = Don't cache or retrieve results. 1 = ON = Cache all results "
    "except SELECT SQL_NO_CACHE ... queries. 2 = DEMAND = Cache only SELECT "
@@ -7351,6 +7485,11 @@ each time the SQL thread starts.",
    &max_system_variables.range_alloc_block_size, 0, GET_ULONG,
    REQUIRED_ARG, RANGE_ALLOC_BLOCK_SIZE, RANGE_ALLOC_BLOCK_SIZE,
    (longlong) ULONG_MAX, 0, 1024, 0},
+  {"rowid_merge_buff_size", OPT_ROWID_MERGE_BUFF_SIZE,
+   "The size of the buffers used [NOT] IN evaluation via partial matching.",
+   (uchar**) &global_system_variables.rowid_merge_buff_size,
+   (uchar**) &max_system_variables.rowid_merge_buff_size, 0, GET_ULONG,
+   REQUIRED_ARG, 8*1024*1024L, 0, MAX_MEM_TABLE_SIZE/2, 0, 1, 0},
   {"read_buffer_size", OPT_RECORD_BUFFER,
    "Each thread that does a sequential scan allocates a buffer of this size "
    "for each table it scans. If you do many sequential scans, you may want "
@@ -7445,6 +7584,10 @@ each time the SQL thread starts.",
    "error. Used only if the connection has active cursors.",
    &table_lock_wait_timeout, &table_lock_wait_timeout,
    0, GET_ULONG, REQUIRED_ARG, 50, 1, 1024 * 1024 * 1024, 0, 1, 0},
+  {"thread-alarm", OPT_THREAD_ALARM,
+   "Enable/disable system thread alarm calls. Should only be turned off when running tests or debugging!!",
+   &opt_thread_alarm, &opt_thread_alarm, 0, GET_BOOL, NO_ARG, 1, 0, 0, 0, 0,
+   0},
   {"thread_cache_size", OPT_THREAD_CACHE_SIZE,
    "How many threads we should keep in a cache for reuse.",
    &thread_cache_size, &thread_cache_size, 0, GET_ULONG,
@@ -7958,16 +8101,27 @@ SHOW_VAR status_vars[]= {
   {"Handler_commit",           (char*) offsetof(STATUS_VAR, ha_commit_count), SHOW_LONG_STATUS},
   {"Handler_delete",           (char*) offsetof(STATUS_VAR, ha_delete_count), SHOW_LONG_STATUS},
   {"Handler_discover",         (char*) offsetof(STATUS_VAR, ha_discover_count), SHOW_LONG_STATUS},
+
+  {"Handler_icp_attempts ",    (char*) offsetof(STATUS_VAR, ha_icp_attempts), SHOW_LONG_STATUS},
+  {"Handler_icp_match",        (char*) offsetof(STATUS_VAR, ha_icp_match), SHOW_LONG_STATUS},
+
+  {"Handler_mrr_init",         (char*) offsetof(STATUS_VAR, ha_mrr_init_count),  SHOW_LONG_STATUS},
+  {"Handler_mrr_key_refills",   (char*) offsetof(STATUS_VAR, ha_mrr_key_refills_count), SHOW_LONG_STATUS},
+  {"Handler_mrr_rowid_refills", (char*) offsetof(STATUS_VAR, ha_mrr_rowid_refills_count), SHOW_LONG_STATUS},
+
   {"Handler_prepare",          (char*) offsetof(STATUS_VAR, ha_prepare_count),  SHOW_LONG_STATUS},
   {"Handler_read_first",       (char*) offsetof(STATUS_VAR, ha_read_first_count), SHOW_LONG_STATUS},
   {"Handler_read_key",         (char*) offsetof(STATUS_VAR, ha_read_key_count), SHOW_LONG_STATUS},
   {"Handler_read_next",        (char*) offsetof(STATUS_VAR, ha_read_next_count), SHOW_LONG_STATUS},
   {"Handler_read_prev",        (char*) offsetof(STATUS_VAR, ha_read_prev_count), SHOW_LONG_STATUS},
   {"Handler_read_rnd",         (char*) offsetof(STATUS_VAR, ha_read_rnd_count), SHOW_LONG_STATUS},
+  {"Handler_read_rnd_deleted", (char*) offsetof(STATUS_VAR, ha_read_rnd_deleted_count), SHOW_LONG_STATUS},
   {"Handler_read_rnd_next",    (char*) offsetof(STATUS_VAR, ha_read_rnd_next_count), SHOW_LONG_STATUS},
   {"Handler_rollback",         (char*) offsetof(STATUS_VAR, ha_rollback_count), SHOW_LONG_STATUS},
   {"Handler_savepoint",        (char*) offsetof(STATUS_VAR, ha_savepoint_count), SHOW_LONG_STATUS},
   {"Handler_savepoint_rollback",(char*) offsetof(STATUS_VAR, ha_savepoint_rollback_count), SHOW_LONG_STATUS},
+  {"Handler_tmp_update",       (char*) offsetof(STATUS_VAR, ha_tmp_update_count), SHOW_LONG_STATUS},
+  {"Handler_tmp_write",        (char*) offsetof(STATUS_VAR, ha_tmp_write_count), SHOW_LONG_STATUS},
   {"Handler_update",           (char*) offsetof(STATUS_VAR, ha_update_count), SHOW_LONG_STATUS},
   {"Handler_write",            (char*) offsetof(STATUS_VAR, ha_write_count), SHOW_LONG_STATUS},
   {"Key",                      (char*) &show_default_keycache, SHOW_FUNC},
@@ -7984,6 +8138,7 @@ SHOW_VAR status_vars[]= {
   {"Prepared_stmt_count",      (char*) &show_prepared_stmt_count, SHOW_FUNC},
   {"Rows_sent",                (char*) offsetof(STATUS_VAR, rows_sent), SHOW_LONGLONG_STATUS},
   {"Rows_read",                (char*) offsetof(STATUS_VAR, rows_read), SHOW_LONGLONG_STATUS},
+  {"Rows_tmp_read",            (char*) offsetof(STATUS_VAR, rows_tmp_read), SHOW_LONGLONG_STATUS},
 #ifdef HAVE_QUERY_CACHE
   {"Qcache_free_blocks",       (char*) &query_cache.free_memory_blocks, SHOW_LONG_NOFLUSH},
   {"Qcache_free_memory",       (char*) &query_cache.free_memory, SHOW_LONG_NOFLUSH},
@@ -8041,6 +8196,12 @@ SHOW_VAR status_vars[]= {
   {"Ssl_version",              (char*) &show_ssl_get_version, SHOW_FUNC},
 #endif /* HAVE_OPENSSL */
   {"Syncs",                    (char*) &my_sync_count,          SHOW_LONG_NOFLUSH},
+  /*
+    Expression cache used only for caching subqueries now, so its statistic
+    variables we call subquery_cache*.
+  */
+  {"Subquery_cache_hit",       (char*) &subquery_cache_hit, SHOW_LONG},
+  {"Subquery_cache_miss",      (char*) &subquery_cache_miss, SHOW_LONG},
   {"Table_locks_immediate",    (char*) &locks_immediate,        SHOW_LONG},
   {"Table_locks_waited",       (char*) &locks_waited,           SHOW_LONG},
 #ifdef HAVE_MMAP
@@ -8182,6 +8343,7 @@ static int mysql_init_variables(void)
   abort_loop= select_thread_in_use= signal_thread_in_use= 0;
   ready_to_exit= shutdown_in_progress= grant_option= 0;
   aborted_threads= aborted_connects= 0;
+  subquery_cache_miss= subquery_cache_hit= 0;
   delayed_insert_threads= delayed_insert_writes= delayed_rows_in_use= 0;
   delayed_insert_errors= thread_created= 0;
   specialflag= 0;
@@ -8253,6 +8415,13 @@ static int mysql_init_variables(void)
   /* set key_cache_hash.default_value = dflt_key_cache */
   multi_keycache_init();
 
+  /* Useful MariaDB variables */
+  double2decimal((double) TIME_MAX_VALUE_SECONDS +
+                 TIME_MAX_SECOND_PART/(double)TIME_SECOND_PART_FACTOR,
+                 &max_seconds_for_time_type);
+  longlong2decimal(TIME_SECOND_PART_FACTOR, &time_second_part_factor);
+  my_decimal_set_zero(&decimal_zero); // set decimal_zero constant;
+
   /* Set directory paths */
   strmake(language, LANGUAGE, sizeof(language)-1);
   strmake(mysql_real_data_home, get_relative_path(MYSQL_DATADIR),
@@ -9258,6 +9427,7 @@ static int get_options(int *argc,char **argv)
   */
   my_disable_locking= myisam_single_user= test(opt_external_locking == 0);
   my_disable_sync= opt_sync == 0;
+  my_disable_thr_alarm= opt_thread_alarm == 0;
   my_default_record_cache_size=global_system_variables.read_buff_size;
   myisam_max_temp_length=
     (my_off_t) global_system_variables.myisam_max_sort_file_size;
@@ -9320,6 +9490,8 @@ static int get_options(int *argc,char **argv)
   if (!max_long_data_size_used)
     max_long_data_size= global_system_variables.max_allowed_packet;
 
+  /* Rember if max_user_connections was 0 at startup */
+  max_user_connections_checking= max_user_connections != 0;
   return 0;
 }
 
@@ -9331,7 +9503,7 @@ static int get_options(int *argc,char **argv)
   (MYSQL_SERVER_SUFFIX is set by the compilation environment)
 */
 
-static void set_server_version(void)
+void set_server_version(void)
 {
   char *end= strxmov(server_version, MYSQL_SERVER_VERSION,
                      MYSQL_SERVER_SUFFIX_STR, NullS);
@@ -9644,7 +9816,8 @@ static int test_if_case_insensitive(const char *dir_name)
   (void) my_delete(buff2, MYF(0));
   if ((file= my_create(buff, 0666, O_RDWR, MYF(0))) < 0)
   {
-    sql_print_warning("Can't create test file %s", buff);
+    if (!opt_help)
+      sql_print_warning("Can't create test file %s", buff);
     DBUG_RETURN(-1);
   }
   my_close(file, MYF(0));
diff --git a/sql/net_serv.cc b/sql/net_serv.cc
index 272b1759151..df392616676 100644
--- a/sql/net_serv.cc
+++ b/sql/net_serv.cc
@@ -258,18 +258,20 @@ static int net_data_is_ready(my_socket sd)
 #endif /* EMBEDDED_LIBRARY */
 
 /**
-  Remove unwanted characters from connection
-  and check if disconnected.
+   Intialize NET handler for new reads:
 
-    Read from socket until there is nothing more to read. Discard
-    what is read.
+   - Read from socket until there is nothing more to read. Discard
+     what is read.
+   - Initialize net for new net_read/net_write calls.
 
-    If there is anything when to read 'net_clear' is called this
-    normally indicates an error in the protocol.
+   If there is anything when to read 'net_clear' is called this
+   normally indicates an error in the protocol. Normally one should not
+   need to do clear the communication buffer. If one compiles without
+   -DUSE_NET_CLEAR then one wins one read call / query.
 
-    When connection is properly closed (for TCP it means with
-    a FIN packet), then select() considers a socket "ready to read",
-    in the sense that there's EOF to read, but read() returns 0.
+   When connection is properly closed (for TCP it means with
+   a FIN packet), then select() considers a socket "ready to read",
+   in the sense that there's EOF to read, but read() returns 0.
 
   @param net			NET handler
   @param clear_buffer           if <> 0, then clear all data from comm buff
@@ -277,20 +279,18 @@ static int net_data_is_ready(my_socket sd)
 
 void net_clear(NET *net, my_bool clear_buffer __attribute__((unused)))
 {
-#if !defined(EMBEDDED_LIBRARY) && defined(DBUG_OFF)
-  size_t count;
-  int ready;
-#endif
   DBUG_ENTER("net_clear");
 
 /*
-  We don't do a clear in case of DBUG_OFF to catch bugs
-  in the protocol handling
+  We don't do a clear in case of not DBUG_OFF to catch bugs in the
+  protocol handling.
 */
 
-#if !defined(EMBEDDED_LIBRARY) && defined(DBUG_OFF)
+#if (!defined(EMBEDDED_LIBRARY) && defined(DBUG_OFF)) || defined(USE_NET_CLEAR)
   if (clear_buffer)
   {
+    size_t count;
+    int ready;
     while ((ready= net_data_is_ready(net->vio->sd)) > 0)
     {
       /* The socket is ready */
@@ -687,7 +687,8 @@ net_real_write(NET *net,const uchar *packet, size_t len)
   {
     my_bool old_mode;
     thr_end_alarm(&alarmed);
-    vio_blocking(net->vio, net_blocking, &old_mode);
+    if (!net_blocking)
+      vio_blocking(net->vio, net_blocking, &old_mode);
   }
   net->reading_or_writing=0;
   DBUG_RETURN(((int) (pos != end)));
@@ -978,7 +979,8 @@ end:
   {
     my_bool old_mode;
     thr_end_alarm(&alarmed);
-    vio_blocking(net->vio, net_blocking, &old_mode);
+    if (!net_blocking)
+      vio_blocking(net->vio, net_blocking, &old_mode);
   }
   net->reading_or_writing=0;
 #ifdef DEBUG_DATA_PACKETS
diff --git a/sql/opt_index_cond_pushdown.cc b/sql/opt_index_cond_pushdown.cc
new file mode 100644
index 00000000000..c2b49d863a1
--- /dev/null
+++ b/sql/opt_index_cond_pushdown.cc
@@ -0,0 +1,430 @@
+#include "mysql_priv.h"
+#include "sql_select.h"
+
+/****************************************************************************
+ * Index Condition Pushdown code starts
+ ***************************************************************************/
+/* 
+  Check if given expression uses only table fields covered by the given index
+
+  SYNOPSIS
+    uses_index_fields_only()
+      item           Expression to check
+      tbl            The table having the index
+      keyno          The index number
+      other_tbls_ok  TRUE <=> Fields of other non-const tables are allowed
+
+  DESCRIPTION
+    Check if given expression only uses fields covered by index #keyno in the
+    table tbl. The expression can use any fields in any other tables.
+    
+    The expression is guaranteed not to be AND or OR - those constructs are 
+    handled outside of this function.
+
+  RETURN
+    TRUE   Yes
+    FALSE  No
+*/
+
+bool uses_index_fields_only(Item *item, TABLE *tbl, uint keyno,
+                            bool other_tbls_ok)
+{
+  if (item->walk(&Item::limit_index_condition_pushdown_processor, FALSE, NULL))
+  {
+    return FALSE;
+  }
+
+  if (item->const_item())
+    return TRUE;
+
+  /* 
+    Don't push down the triggered conditions. Nested outer joins execution 
+    code may need to evaluate a condition several times (both triggered and
+    untriggered), and there is no way to put thi
+    TODO: Consider cloning the triggered condition and using the copies for:
+      1. push the first copy down, to have most restrictive index condition
+         possible
+      2. Put the second copy into tab->select_cond. 
+  */
+  if (item->type() == Item::FUNC_ITEM && 
+      ((Item_func*)item)->functype() == Item_func::TRIG_COND_FUNC)
+    return FALSE;
+
+  if (!(item->used_tables() & tbl->map))
+    return other_tbls_ok;
+
+  Item::Type item_type= item->type();
+  switch (item_type) {
+  case Item::FUNC_ITEM:
+    {
+      /* This is a function, apply condition recursively to arguments */
+      Item_func *item_func= (Item_func*)item;
+      Item **child;
+      Item **item_end= (item_func->arguments()) + item_func->argument_count();
+      for (child= item_func->arguments(); child != item_end; child++)
+      {
+        if (!uses_index_fields_only(*child, tbl, keyno, other_tbls_ok))
+          return FALSE;
+      }
+      return TRUE;
+    }
+  case Item::COND_ITEM:
+    {
+      /*
+        This is a AND/OR condition. Regular AND/OR clauses are handled by
+        make_cond_for_index() which will chop off the part that can be
+        checked with index. This code is for handling non-top-level AND/ORs,
+        e.g. func(x AND y).
+      */
+      List_iterator<Item> li(*((Item_cond*)item)->argument_list());
+      Item *item;
+      while ((item=li++))
+      {
+        if (!uses_index_fields_only(item, tbl, keyno, other_tbls_ok))
+          return FALSE;
+      }
+      return TRUE;
+    }
+  case Item::FIELD_ITEM:
+    {
+      Item_field *item_field= (Item_field*)item;
+      Field *field= item_field->field;
+      if (field->table != tbl)
+        return TRUE;
+      /*
+        The below is probably a repetition - the first part checks the
+        other two, but let's play it safe:
+      */
+      if(!field->part_of_key.is_set(keyno) ||
+         field->type() == MYSQL_TYPE_GEOMETRY ||
+         field->type() == MYSQL_TYPE_BLOB)
+        return FALSE;
+      KEY *key_info= tbl->key_info + keyno;
+      KEY_PART_INFO *key_part= key_info->key_part;
+      KEY_PART_INFO *key_part_end= key_part + key_info->key_parts;
+      for ( ; key_part < key_part_end; key_part++)
+      {
+        if (field->eq(key_part->field))
+	  return !(key_part->key_part_flag & HA_PART_KEY_SEG);          
+      }
+      if ((tbl->file->ha_table_flags() & HA_PRIMARY_KEY_IN_READ_INDEX) &&
+          tbl->s->primary_key != MAX_KEY &&
+	  tbl->s->primary_key != keyno)
+      {
+        key_info= tbl->key_info + tbl->s->primary_key;
+        key_part= key_info->key_part;
+        key_part_end= key_part + key_info->key_parts;
+        for ( ; key_part < key_part_end; key_part++)
+        {
+          /* 
+            It does not make sense to use the fact that the engine can read in
+            a full field if the key if the index is built only over a part
+            of this field.
+	  */
+          if (field->eq(key_part->field))
+	    return !(key_part->key_part_flag & HA_PART_KEY_SEG);          
+        }
+      }  
+      return FALSE;
+    }
+  case Item::REF_ITEM:
+    return uses_index_fields_only(item->real_item(), tbl, keyno,
+                                  other_tbls_ok);
+  default:
+    return FALSE; /* Play it safe, don't push unknown non-const items */
+  }
+}
+
+#define ICP_COND_USES_INDEX_ONLY 10
+
+/*
+  Get a part of the condition that can be checked using only index fields
+
+  SYNOPSIS
+    make_cond_for_index()
+      cond           The source condition
+      table          The table that is partially available
+      keyno          The index in the above table. Only fields covered by the index
+                     are available
+      other_tbls_ok  TRUE <=> Fields of other non-const tables are allowed
+
+  DESCRIPTION
+    Get a part of the condition that can be checked when for the given table 
+    we have values only of fields covered by some index. The condition may
+    refer to other tables, it is assumed that we have values of all of their 
+    fields.
+
+    Example:
+      make_cond_for_index(
+         "cond(t1.field) AND cond(t2.key1) AND cond(t2.non_key) AND cond(t2.key2)",
+          t2, keyno(t2.key1)) 
+      will return
+        "cond(t1.field) AND cond(t2.key2)"
+
+  RETURN
+    Index condition, or NULL if no condition could be inferred.
+*/
+
+Item *make_cond_for_index(Item *cond, TABLE *table, uint keyno,
+                          bool other_tbls_ok)
+{
+  if (!cond)
+    return NULL;
+  if (cond->type() == Item::COND_ITEM)
+  {
+    uint n_marked= 0;
+    if (((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC)
+    {
+      table_map used_tables= 0;
+      Item_cond_and *new_cond=new Item_cond_and;
+      if (!new_cond)
+	return (COND*) 0;
+      List_iterator<Item> li(*((Item_cond*) cond)->argument_list());
+      Item *item;
+      while ((item=li++))
+      {
+	Item *fix= make_cond_for_index(item, table, keyno, other_tbls_ok);
+	if (fix)
+        {
+	  new_cond->argument_list()->push_back(fix);
+          used_tables|= fix->used_tables();
+        }
+        if (test(item->marker == ICP_COND_USES_INDEX_ONLY))
+        {
+          n_marked++;
+          item->marker= 0;
+        } 
+      }
+      if (n_marked ==((Item_cond*)cond)->argument_list()->elements)
+        cond->marker= ICP_COND_USES_INDEX_ONLY;
+      switch (new_cond->argument_list()->elements) {
+      case 0:
+	return (COND*) 0;
+      case 1:
+        new_cond->used_tables_cache= used_tables;
+	return new_cond->argument_list()->head();
+      default:
+	new_cond->quick_fix_field();
+        new_cond->used_tables_cache= used_tables;
+	return new_cond;
+      }
+    }
+    else /* It's OR */
+    {
+      Item_cond_or *new_cond=new Item_cond_or;
+      if (!new_cond)
+	return (COND*) 0;
+      List_iterator<Item> li(*((Item_cond*) cond)->argument_list());
+      Item *item;
+      while ((item=li++))
+      {
+	Item *fix= make_cond_for_index(item, table, keyno, other_tbls_ok);
+	if (!fix)
+	  return (COND*) 0;
+	new_cond->argument_list()->push_back(fix);
+        if (test(item->marker == ICP_COND_USES_INDEX_ONLY))
+        {
+          n_marked++;
+          item->marker= 0;
+        } 
+      }
+      if (n_marked ==((Item_cond*)cond)->argument_list()->elements)
+        cond->marker= ICP_COND_USES_INDEX_ONLY;
+      new_cond->quick_fix_field();
+      new_cond->used_tables_cache= ((Item_cond_or*) cond)->used_tables_cache;
+      new_cond->top_level_item();
+      return new_cond;
+    }
+  }
+
+  if (!uses_index_fields_only(cond, table, keyno, other_tbls_ok))
+    return (COND*) 0;
+  cond->marker= ICP_COND_USES_INDEX_ONLY;
+  return cond;
+}
+
+
+Item *make_cond_remainder(Item *cond, TABLE *table, uint keyno,
+                          bool other_tbls_ok, bool exclude_index)
+{
+  if (cond->type() == Item::COND_ITEM)
+  {
+    table_map tbl_map= 0;
+    if (((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC)
+    {
+      /* Create new top level AND item */
+      Item_cond_and *new_cond=new Item_cond_and;
+      if (!new_cond)
+	return (COND*) 0;
+      List_iterator<Item> li(*((Item_cond*) cond)->argument_list());
+      Item *item;
+      while ((item=li++))
+      {
+	Item *fix= make_cond_remainder(item, table, keyno,
+                                       other_tbls_ok, exclude_index);
+	if (fix)
+        {
+	  new_cond->argument_list()->push_back(fix);
+          tbl_map |= fix->used_tables();
+        }
+      }
+      switch (new_cond->argument_list()->elements) {
+      case 0:
+	return (COND*) 0;
+      case 1:
+	return new_cond->argument_list()->head();
+      default:
+	new_cond->quick_fix_field();
+        ((Item_cond*)new_cond)->used_tables_cache= tbl_map;
+	return new_cond;
+      }
+    }
+    else /* It's OR */
+    {
+      Item_cond_or *new_cond=new Item_cond_or;
+      if (!new_cond)
+	return (COND*) 0;
+      List_iterator<Item> li(*((Item_cond*) cond)->argument_list());
+      Item *item;
+      while ((item=li++))
+      {
+	Item *fix= make_cond_remainder(item, table, keyno, 
+                                       other_tbls_ok, FALSE);
+	if (!fix)
+	  return (COND*) 0;
+	new_cond->argument_list()->push_back(fix);
+        tbl_map |= fix->used_tables();
+      }
+      new_cond->quick_fix_field();
+      ((Item_cond*)new_cond)->used_tables_cache= tbl_map;
+      new_cond->top_level_item();
+      return new_cond;
+    }
+  }
+  else
+  {
+    if (exclude_index && 
+        uses_index_fields_only(cond, table, keyno, other_tbls_ok))
+      return 0;
+    else
+      return cond;
+  }
+}
+
+
+/*
+  Try to extract and push the index condition
+
+  SYNOPSIS
+    push_index_cond()
+      tab            A join tab that has tab->table->file and its condition
+                     in tab->select_cond
+      keyno          Index for which extract and push the condition
+
+  DESCRIPTION
+    Try to extract and push the index condition down to table handler
+*/
+
+void push_index_cond(JOIN_TAB *tab, uint keyno)
+{
+  DBUG_ENTER("push_index_cond");
+  Item *idx_cond;
+  
+  /*
+  Backported the following from MySQL 5.6:
+    6. The index is not a clustered index. The performance improvement
+       of pushing an index condition on a clustered key is much lower 
+       than on a non-clustered key. This restriction should be 
+       re-evaluated when WL#6061 is implemented.
+  */
+  if ((tab->table->file->index_flags(keyno, 0, 1) &
+      HA_DO_INDEX_COND_PUSHDOWN) &&
+     optimizer_flag(tab->join->thd, OPTIMIZER_SWITCH_INDEX_COND_PUSHDOWN) &&
+     tab->join->thd->lex->sql_command != SQLCOM_UPDATE_MULTI &&
+     tab->join->thd->lex->sql_command != SQLCOM_DELETE_MULTI &&
+     tab->type != JT_CONST && tab->type != JT_SYSTEM &&
+     !(keyno == tab->table->s->primary_key &&             // (6)
+       tab->table->file->primary_key_is_clustered()))     // (6)
+
+  {
+    DBUG_EXECUTE("where",
+                 print_where(tab->select_cond, "full cond", QT_ORDINARY););
+
+    idx_cond= make_cond_for_index(tab->select_cond, tab->table, keyno,
+                                  tab->icp_other_tables_ok);
+
+    DBUG_EXECUTE("where",
+                 print_where(idx_cond, "idx cond", QT_ORDINARY););
+
+    if (idx_cond)
+    {
+      Item *idx_remainder_cond= 0;
+      tab->pre_idx_push_select_cond= tab->select->cond;
+      /*
+        For BKA cache we store condition to special BKA cache field
+        because evaluation of the condition requires additional operations
+        before the evaluation. This condition is used in 
+        JOIN_CACHE_BKA[_UNIQUE]::skip_index_tuple() functions.
+      */
+      if (tab->use_join_cache &&
+          /*
+            if cache is used then the value is TRUE only 
+            for BKA[_UNIQUE] cache (see check_join_cache_usage func).
+          */
+          tab->icp_other_tables_ok &&
+          (idx_cond->used_tables() &
+           ~(tab->table->map | tab->join->const_table_map)))
+        tab->cache_idx_cond= idx_cond;
+      else
+        idx_remainder_cond= tab->table->file->idx_cond_push(keyno, idx_cond);
+
+      /*
+        Disable eq_ref's "lookup cache" if we've pushed down an index
+        condition. 
+        TODO: This check happens to work on current ICP implementations, but
+        there may exist a compliant implementation that will not work 
+        correctly with it. Sort this out when we stabilize the condition
+        pushdown APIs.
+      */
+      if (idx_remainder_cond != idx_cond)
+        tab->ref.disable_cache= TRUE;
+
+      Item *row_cond= tab->idx_cond_fact_out ? 
+	                make_cond_remainder(tab->select_cond, tab->table, keyno,
+			                    tab->icp_other_tables_ok, TRUE) :
+	                tab->pre_idx_push_select_cond;
+
+      DBUG_EXECUTE("where",
+                   print_where(row_cond, "remainder cond", QT_ORDINARY););
+      
+      if (row_cond)
+      {
+        if (!idx_remainder_cond)
+          tab->select_cond= row_cond;
+        else
+        {
+          COND *new_cond= new Item_cond_and(row_cond, idx_remainder_cond);
+          tab->select_cond= new_cond;
+	  tab->select_cond->quick_fix_field();
+          ((Item_cond_and*)tab->select_cond)->used_tables_cache= 
+            row_cond->used_tables() | idx_remainder_cond->used_tables();
+        }
+      }
+      else
+        tab->select_cond= idx_remainder_cond;
+      if (tab->select)
+      {
+        DBUG_EXECUTE("where",
+                     print_where(tab->select->cond,
+                                 "select_cond",
+                                 QT_ORDINARY););
+
+        tab->select->cond= tab->select_cond;
+        tab->select->pre_idx_push_select_cond= tab->pre_idx_push_select_cond;
+      }
+    }
+  }
+  DBUG_VOID_RETURN;
+}
+
+
diff --git a/sql/opt_range.cc b/sql/opt_range.cc
index 185f3eecd3c..e0841d3a696 100644
--- a/sql/opt_range.cc
+++ b/sql/opt_range.cc
@@ -35,7 +35,7 @@
     
     The lists are returned in form of complicated structure of interlinked
     SEL_TREE/SEL_IMERGE/SEL_ARG objects.
-    See check_quick_keys, find_used_partitions for examples of how to walk 
+    See quick_range_seq_next, find_used_partitions for examples of how to walk 
     this structure.
     All direct "users" of this module are located within this file, too.
 
@@ -60,6 +60,48 @@
 
   Record retrieval code for range/index_merge/groupby-min-max.
     Implementations of QUICK_*_SELECT classes.
+
+  KeyTupleFormat
+  ~~~~~~~~~~~~~~
+  The code in this file (and elsewhere) makes operations on key value tuples.
+  Those tuples are stored in the following format:
+  
+  The tuple is a sequence of key part values. The length of key part value
+  depends only on its type (and not depends on the what value is stored)
+  
+    KeyTuple: keypart1-data, keypart2-data, ...
+  
+  The value of each keypart is stored in the following format:
+  
+    keypart_data: [isnull_byte] keypart-value-bytes
+
+  If a keypart may have a NULL value (key_part->field->real_maybe_null() can
+  be used to check this), then the first byte is a NULL indicator with the 
+  following valid values:
+    1  - keypart has NULL value.
+    0  - keypart has non-NULL value.
+
+  <questionable-statement> If isnull_byte==1 (NULL value), then the following
+  keypart->length bytes must be 0.
+  </questionable-statement>
+
+  keypart-value-bytes holds the value. Its format depends on the field type.
+  The length of keypart-value-bytes may or may not depend on the value being
+  stored. The default is that length is static and equal to 
+  KEY_PART_INFO::length.
+  
+  Key parts with (key_part_flag & HA_BLOB_PART) have length depending of the 
+  value:
+  
+     keypart-value-bytes: value_length value_bytes
+
+  The value_length part itself occupies HA_KEY_BLOB_LENGTH=2 bytes.
+
+  See key_copy() and key_restore() for code to move data between index tuple
+  and table record
+
+  CAUTION: the above description is only sergefp's understanding of the 
+           subject and may omit some details.
 */
 
 #ifdef USE_PRAGMA_IMPLEMENTATION
@@ -263,6 +305,11 @@ public:
   uint8 part;					// Which key part
   uint8 maybe_null;
   /* 
+    The ordinal number the least significant component encountered in
+    the ranges of the SEL_ARG tree (the first component has number 1) 
+  */
+  uint16 max_part_no; 
+  /* 
     Number of children of this element in the RB-tree, plus 1 for this
     element itself.
   */
@@ -295,8 +342,9 @@ public:
   SEL_ARG(Field *field, uint8 part, uchar *min_value, uchar *max_value,
 	  uint8 min_flag, uint8 max_flag, uint8 maybe_flag);
   SEL_ARG(enum Type type_arg)
-    :min_flag(0),elements(1),use_count(1),left(0),right(0),next_key_part(0),
-    color(BLACK), type(type_arg)
+    :min_flag(0), max_part_no(0) /* first key part means 1. 0 mean 'no parts'*/, 
+     elements(1),use_count(1),left(0),right(0),
+     next_key_part(0), color(BLACK), type(type_arg)
   {}
   inline bool is_same(SEL_ARG *arg)
   {
@@ -404,6 +452,7 @@ public:
   /* returns a number of keypart values (0 or 1) appended to the key buffer */
   int store_min(uint length, uchar **min_key,uint min_key_flag)
   {
+    /* "(kp1 > c1) AND (kp2 OP c2) AND ..." -> (kp1 > c1) */
     if ((min_flag & GEOM_FLAG) ||
         (!(min_flag & NO_MIN_RANGE) &&
 	!(min_key_flag & (NO_MIN_RANGE | NEAR_MIN))))
@@ -503,6 +552,14 @@ public:
     increment_use_count(1);
     use_count++;
   }
+  void incr_refs_all()
+  {
+    for (SEL_ARG *pos=first(); pos ; pos=pos->next)
+    {
+      pos->increment_use_count(1);
+    }
+    use_count++;
+  }
   void free_tree()
   {
     for (SEL_ARG *pos=first(); pos ; pos=pos->next)
@@ -564,7 +621,100 @@ public:
 
 class SEL_IMERGE;
 
+#define CLONE_KEY1_MAYBE 1
+#define CLONE_KEY2_MAYBE 2
+#define swap_clone_flag(A) ((A & 1) << 1) | ((A & 2) >> 1)
+
 
+/*
+  While objects of the class SEL_ARG represent ranges for indexes or
+  index infixes (including ranges for index prefixes and index suffixes),
+  objects of the class SEL_TREE represent AND/OR formulas of such ranges.
+  Currently an AND/OR formula represented by a SEL_TREE object can have
+  at most three levels: 
+
+    <SEL_TREE formula> ::= 
+      [ <SEL_RANGE_TREE formula> AND ]
+      [ <SEL_IMERGE formula> [ AND <SEL_IMERGE formula> ...] ]
+
+    <SEL_RANGE_TREE formula> ::=
+      <SEL_ARG formula> [ AND  <SEL_ARG_formula> ... ]
+
+    <SEL_IMERGE formula> ::=  
+      <SEL_RANGE_TREE formula> [ OR <SEL_RANGE_TREE formula> ]
+
+  As we can see from the above definitions:
+   - SEL_RANGE_TREE formula is a conjunction of SEL_ARG formulas
+   - SEL_IMERGE formula is a disjunction of SEL_RANGE_TREE formulas
+   - SEL_TREE formula is a conjunction of a SEL_RANGE_TREE formula
+     and SEL_IMERGE formulas. 
+  It's required above that a SEL_TREE formula has at least one conjunct.
+
+  Usually we will consider normalized SEL_RANGE_TREE formulas where we use
+  TRUE as conjunct members for those indexes whose SEL_ARG trees are empty.
+  
+  We will call an SEL_TREE object simply 'tree'. 
+  The part of a tree that represents SEL_RANGE_TREE formula is called
+  'range part' of the tree while the remaining part is called 'imerge part'. 
+  If a tree contains only a range part then we call such a tree 'range tree'.
+  Components of a range tree that represent SEL_ARG formulas are called ranges.
+  If a tree does not contain any range part we call such a tree 'imerge tree'.
+  Components of the imerge part of a tree that represent SEL_IMERGE formula
+  are called imerges.
+
+  Usually we'll designate:
+    SEL_TREE formulas         by T_1,...,T_k
+    SEL_ARG formulas          by R_1,...,R_k
+    SEL_RANGE_TREE formulas   by RT_1,...,RT_k
+    SEL_IMERGE formulas       by M_1,...,M_k
+  Accordingly we'll use:
+    t_1,...,t_k - to designate trees representing T_1,...,T_k
+    r_1,...,r_k - to designate ranges representing R_1,...,R_k 
+    rt_1,...,r_tk - to designate range trees representing RT_1,...,RT_k
+    m_1,...,m_k - to designate imerges representing M_1,...,M_k
+
+  SEL_TREE objects are usually built from WHERE conditions or
+  ON expressions.
+  A SEL_TREE object always represents an inference of the condition it is
+  built from. Therefore, if a row satisfies a SEL_TREE formula it also
+  satisfies the condition it is built from.
+
+  The following transformations of tree t representing SEL_TREE formula T 
+  yield a new tree t1 thar represents an inference of T: T=>T1.  
+    (1) remove any of SEL_ARG tree from the range part of t
+    (2) remove any imerge from the tree t 
+    (3) remove any of SEL_ARG tree from any range tree contained
+        in any imerge of tree   
+ 
+  Since the basic blocks of any SEL_TREE objects are ranges, SEL_TREE
+  objects in many cases can be effectively used to filter out a big part
+  of table rows that do not satisfy WHERE/IN conditions utilizing
+  only single or multiple range index scans.
+
+  A single range index scan is constructed for a range tree that contains
+  only one SEL_ARG object for an index or an index prefix.
+  An index intersection scan can be constructed for a range tree
+  that contains several SEL_ARG objects. Currently index intersection
+  scans are constructed only for single-point ranges.
+  An index merge scan is constructed for a imerge tree that contains only
+  one imerge. If range trees of this imerge contain only single-point merges
+  than a union of index intersections can be built.
+
+  Usually the tree built by the range optimizer for a query table contains
+  more than one range in the range part, and additionally may contain some
+  imerges in the imerge part. The range optimizer evaluates all of them one
+  by one and chooses the range or the imerge that provides the cheapest
+  single or multiple range index scan of the table.  According to rules 
+  (1)-(3) this scan always filter out only those rows that do not satisfy
+  the query conditions. 
+
+  For any condition the SEL_TREE object for it is built in a bottom up
+  manner starting from the range trees for the predicates. The tree_and
+  function builds a tree for any conjunction of formulas from the trees
+  for its conjuncts. The tree_or function builds a tree for any disjunction
+  of formulas from the trees for its disjuncts.    
+*/ 
+  
 class SEL_TREE :public Sql_alloc
 {
 public:
@@ -580,7 +730,7 @@ public:
     keys_map.clear_all();
     bzero((char*) keys,sizeof(keys));
   }
-  SEL_TREE(SEL_TREE *arg, RANGE_OPT_PARAM *param);
+  SEL_TREE(SEL_TREE *arg, bool without_merges, RANGE_OPT_PARAM *param);
   /*
     Note: there may exist SEL_TREE objects with sel_tree->type=KEY and
     keys[i]=0 for all i. (SergeyP: it is not clear whether there is any
@@ -600,9 +750,15 @@ public:
   key_map ror_scans_map;   /* bitmask of ROR scan-able elements in keys */
   uint    n_ror_scans;     /* number of set bits in ror_scans_map */
 
+  struct st_index_scan_info **index_scans;     /* list of index scans */
+  struct st_index_scan_info **index_scans_end; /* last index scan */
+
   struct st_ror_scan_info **ror_scans;     /* list of ROR key scans */
   struct st_ror_scan_info **ror_scans_end; /* last ROR scan */
   /* Note that #records for each key scan is stored in table->quick_rows */
+
+  bool without_ranges() { return keys_map.is_clear_all(); }
+  bool without_imerges() { return merges.is_empty(); }
 };
 
 class RANGE_OPT_PARAM
@@ -633,6 +789,11 @@ public:
   */
   bool using_real_indexes;
   
+  /*
+    Aggressively remove "scans" that do not have conditions on first
+    keyparts. Such scans are usable when doing partition pruning but not
+    regular range optimization.
+  */
   bool remove_jump_scans;
   
   /*
@@ -640,19 +801,27 @@ public:
     using_real_indexes==TRUE
   */
   uint real_keynr[MAX_KEY];
+
+  /*
+    Used to store 'current key tuples', in both range analysis and
+    partitioning (list) analysis
+  */
+  uchar min_key[MAX_KEY_LENGTH+MAX_FIELD_WIDTH],
+    max_key[MAX_KEY_LENGTH+MAX_FIELD_WIDTH];
+
   /* Number of SEL_ARG objects allocated by SEL_ARG::clone_tree operations */
   uint alloced_sel_args; 
+  bool force_default_mrr;
+  KEY_PART *key[MAX_KEY]; /* First key parts of keys used in the query */
 };
 
 class PARAM : public RANGE_OPT_PARAM
 {
 public:
-  KEY_PART *key[MAX_KEY]; /* First key parts of keys used in the query */
+  ha_rows quick_rows[MAX_KEY];
   longlong baseflag;
   uint max_key_part, range_count;
 
-  uchar min_key[MAX_KEY_LENGTH+MAX_FIELD_WIDTH],
-    max_key[MAX_KEY_LENGTH+MAX_FIELD_WIDTH];
   bool quick;				// Don't calulate possible keys
 
   uint fields_bitmap_size;
@@ -671,13 +840,16 @@ public:
   uint8 first_null_comp; /* first null component if any, 0 - otherwise */
 };
 
+
 class TABLE_READ_PLAN;
   class TRP_RANGE;
   class TRP_ROR_INTERSECT;
   class TRP_ROR_UNION;
-  class TRP_ROR_INDEX_MERGE;
+  class TRP_INDEX_INTERSECT;
+  class TRP_INDEX_MERGE;
   class TRP_GROUP_MIN_MAX;
 
+struct st_index_scan_info;
 struct st_ror_scan_info;
 
 static SEL_TREE * get_mm_parts(RANGE_OPT_PARAM *param,COND *cond_func,Field *field,
@@ -689,20 +861,22 @@ static SEL_ARG *get_mm_leaf(RANGE_OPT_PARAM *param,COND *cond_func,Field *field,
 static SEL_TREE *get_mm_tree(RANGE_OPT_PARAM *param,COND *cond);
 
 static bool is_key_scan_ror(PARAM *param, uint keynr, uint8 nparts);
-static ha_rows check_quick_select(PARAM *param,uint index,SEL_ARG *key_tree,
-                                  bool update_tbl_stats);
-static ha_rows check_quick_keys(PARAM *param,uint index,SEL_ARG *key_tree,
-                                uchar *min_key, uint min_key_flag, int,
-                                uchar *max_key, uint max_key_flag, int);
+static ha_rows check_quick_select(PARAM *param, uint idx, bool index_only,
+                                  SEL_ARG *tree, bool update_tbl_stats, 
+                                  uint *mrr_flags, uint *bufsize,
+                                  COST_VECT *cost);
 
 QUICK_RANGE_SELECT *get_quick_select(PARAM *param,uint index,
-                                     SEL_ARG *key_tree,
-                                     MEM_ROOT *alloc = NULL);
+                                     SEL_ARG *key_tree, uint mrr_flags, 
+                                     uint mrr_buf_size, MEM_ROOT *alloc);
 static TRP_RANGE *get_key_scans_params(PARAM *param, SEL_TREE *tree,
                                        bool index_read_must_be_used,
                                        bool update_tbl_stats,
                                        double read_time);
 static
+TRP_INDEX_INTERSECT *get_best_index_intersect(PARAM *param, SEL_TREE *tree,
+                                              double read_time);
+static
 TRP_ROR_INTERSECT *get_best_ror_intersect(const PARAM *param, SEL_TREE *tree,
                                           double read_time,
                                           bool *are_all_covering);
@@ -713,7 +887,12 @@ TRP_ROR_INTERSECT *get_best_covering_ror_intersect(PARAM *param,
 static
 TABLE_READ_PLAN *get_best_disjunct_quick(PARAM *param, SEL_IMERGE *imerge,
                                          double read_time);
-static TRP_GROUP_MIN_MAX *get_best_group_min_max(PARAM *param, SEL_TREE *tree);
+static
+TABLE_READ_PLAN *merge_same_index_scans(PARAM *param, SEL_IMERGE *imerge,
+                                        TRP_INDEX_MERGE *imerge_trp,
+                                        double read_time);
+static
+TRP_GROUP_MIN_MAX *get_best_group_min_max(PARAM *param, SEL_TREE *tree);
 
 #ifndef DBUG_OFF
 static void print_sel_tree(PARAM *param, SEL_TREE *tree, key_map *tree_map,
@@ -724,11 +903,15 @@ static void print_ror_scans_arr(TABLE *table, const char *msg,
 static void print_quick(QUICK_SELECT_I *quick, const key_map *needed_reg);
 #endif
 
-static SEL_TREE *tree_and(RANGE_OPT_PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2);
-static SEL_TREE *tree_or(RANGE_OPT_PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2);
+static SEL_TREE *tree_and(RANGE_OPT_PARAM *param,
+                          SEL_TREE *tree1, SEL_TREE *tree2);
+static SEL_TREE *tree_or(RANGE_OPT_PARAM *param,
+                         SEL_TREE *tree1,SEL_TREE *tree2);
 static SEL_ARG *sel_add(SEL_ARG *key1,SEL_ARG *key2);
-static SEL_ARG *key_or(RANGE_OPT_PARAM *param, SEL_ARG *key1, SEL_ARG *key2);
-static SEL_ARG *key_and(RANGE_OPT_PARAM *param, SEL_ARG *key1, SEL_ARG *key2,
+static SEL_ARG *key_or(RANGE_OPT_PARAM *param,
+                       SEL_ARG *key1, SEL_ARG *key2);
+static SEL_ARG *key_and(RANGE_OPT_PARAM *param,
+                        SEL_ARG *key1, SEL_ARG *key2,
                         uint clone_flag);
 static bool get_range(SEL_ARG **e1,SEL_ARG **e2,SEL_ARG *root1);
 bool get_quick_keys(PARAM *param,QUICK_RANGE_SELECT *quick,KEY_PART *key,
@@ -739,7 +922,25 @@ static bool eq_tree(SEL_ARG* a,SEL_ARG *b);
 static SEL_ARG null_element(SEL_ARG::IMPOSSIBLE);
 static bool null_part_in_key(KEY_PART *key_part, const uchar *key,
                              uint length);
-bool sel_trees_can_be_ored(SEL_TREE *tree1, SEL_TREE *tree2, RANGE_OPT_PARAM* param);
+static bool is_key_scan_ror(PARAM *param, uint keynr, uint8 nparts);
+
+#include "opt_range_mrr.cc"
+
+static bool sel_trees_have_common_keys(SEL_TREE *tree1, SEL_TREE *tree2, 
+                                       key_map *common_keys);
+static void eliminate_single_tree_imerges(RANGE_OPT_PARAM *param,
+                                          SEL_TREE *tree);
+
+static bool sel_trees_can_be_ored(RANGE_OPT_PARAM* param,
+                                  SEL_TREE *tree1, SEL_TREE *tree2, 
+                                  key_map *common_keys);
+static bool sel_trees_must_be_ored(RANGE_OPT_PARAM* param,
+                                   SEL_TREE *tree1, SEL_TREE *tree2,
+                                   key_map common_keys);
+static int and_range_trees(RANGE_OPT_PARAM *param,
+                           SEL_TREE *tree1, SEL_TREE *tree2,
+                           SEL_TREE *result);
+static bool remove_nonrange_trees(RANGE_OPT_PARAM *param, SEL_TREE *tree);
 
 
 /*
@@ -771,23 +972,39 @@ public:
     trees_next(trees),
     trees_end(trees + PREALLOCED_TREES)
   {}
-  SEL_IMERGE (SEL_IMERGE *arg, RANGE_OPT_PARAM *param);
+  SEL_IMERGE (SEL_IMERGE *arg, uint cnt, RANGE_OPT_PARAM *param);
   int or_sel_tree(RANGE_OPT_PARAM *param, SEL_TREE *tree);
-  int or_sel_tree_with_checks(RANGE_OPT_PARAM *param, SEL_TREE *new_tree);
-  int or_sel_imerge_with_checks(RANGE_OPT_PARAM *param, SEL_IMERGE* imerge);
+  bool have_common_keys(RANGE_OPT_PARAM *param, SEL_TREE *tree);
+  int and_sel_tree(RANGE_OPT_PARAM *param, SEL_TREE *tree, 
+                   SEL_IMERGE *new_imerge);
+  int or_sel_tree_with_checks(RANGE_OPT_PARAM *param,
+                              uint n_init_trees, 
+                              SEL_TREE *new_tree,
+                              bool is_first_check_pass,
+                              bool *is_last_check_pass);
+  int or_sel_imerge_with_checks(RANGE_OPT_PARAM *param,
+                                uint n_init_trees,
+                                SEL_IMERGE* imerge,
+                                bool is_first_check_pass,
+                                bool *is_last_check_pass);
 };
 
 
 /*
-  Add SEL_TREE to this index_merge without any checks,
+  Add a range tree to the range trees of this imerge 
 
-  NOTES
-    This function implements the following:
-      (x_1||...||x_N) || t = (x_1||...||x_N||t), where x_i, t are SEL_TREEs
+  SYNOPSIS
+    or_sel_tree()
+      param                  Context info for the operation         
+      tree                   SEL_TREE to add to this imerge 
+
+  DESCRIPTION 
+    The function just adds the range tree 'tree' to the range trees
+    of this imerge.
 
   RETURN
-     0 - OK
-    -1 - Out of memory.
+     0   if the operation is success
+    -1   if the function runs out memory
 */
 
 int SEL_IMERGE::or_sel_tree(RANGE_OPT_PARAM *param, SEL_TREE *tree)
@@ -812,96 +1029,302 @@ int SEL_IMERGE::or_sel_tree(RANGE_OPT_PARAM *param, SEL_TREE *tree)
 
 
 /*
-  Perform OR operation on this SEL_IMERGE and supplied SEL_TREE new_tree,
-  combining new_tree with one of the trees in this SEL_IMERGE if they both
-  have SEL_ARGs for the same key.
+  Check if any of the range trees of this imerge intersects with a given tree 
 
   SYNOPSIS
-    or_sel_tree_with_checks()
-      param    PARAM from SQL_SELECT::test_quick_select
-      new_tree SEL_TREE with type KEY or KEY_SMALLER.
+    have_common_keys()
+      param    Context info for the function
+      tree     SEL_TREE intersection with the imerge range trees is checked for 
 
-  NOTES
-    This does the following:
-    (t_1||...||t_k)||new_tree =
-     either
-       = (t_1||...||t_k||new_tree)
-     or
-       = (t_1||....||(t_j|| new_tree)||...||t_k),
-
-     where t_i, y are SEL_TREEs.
-    new_tree is combined with the first t_j it has a SEL_ARG on common
-    key with. As a consequence of this, choice of keys to do index_merge
-    read may depend on the order of conditions in WHERE part of the query.
+  DESCRIPTION
+    The function checks whether there is any range tree rt_i in this imerge
+    such that there are some indexes for which ranges are defined in both
+    rt_i and the range part of the SEL_TREE tree.  
+    To check this the function calls the function sel_trees_have_common_keys.
 
+  RETURN 
+    TRUE    if there are such range trees in this imerge
+    FALSE   otherwise
+*/
+
+bool SEL_IMERGE::have_common_keys(RANGE_OPT_PARAM *param, SEL_TREE *tree)
+{
+  for (SEL_TREE** or_tree= trees, **bound= trees_next;
+       or_tree != bound; or_tree++)
+  {
+    key_map common_keys;
+    if (sel_trees_have_common_keys(*or_tree, tree, &common_keys))
+      return TRUE;
+  }
+  return FALSE;
+}
+
+
+/* 
+  Perform AND operation for this imerge and the range part of a tree
+
+  SYNOPSIS
+    and_sel_tree()
+      param           Context info for the operation
+      tree            SEL_TREE for the second operand of the operation
+      new_imerge  OUT imerge for the result of the operation
+
+  DESCRIPTION
+    This function performs AND operation for this imerge m and the
+    range part of the SEL_TREE tree rt. In other words the function
+    pushes rt into this imerge. The resulting imerge is returned in
+    the parameter new_imerge.
+    If this imerge m represent the formula
+      RT_1 OR ... OR RT_k
+    then the resulting imerge of the function represents the formula
+      (RT_1 AND RT) OR ... OR (RT_k AND RT)
+    The function calls the function and_range_trees to construct the
+    range tree representing (RT_i AND RT).
+    
+  NOTE
+    The function may return an empty imerge without any range trees.
+    This happens when each call of and_range_trees returns an 
+    impossible range tree (SEL_TREE::IMPOSSIBLE).
+    Example: (key1 < 2 AND key2 > 10) AND (key1 > 4 OR key2 < 6).
+         
   RETURN
-    0  OK
-    1  One of the trees was combined with new_tree to SEL_TREE::ALWAYS,
-       and (*this) should be discarded.
-   -1  An error occurred.
+     0  if the operation is a success
+    -1  otherwise: there is not enough memory to perform the operation
 */
 
-int SEL_IMERGE::or_sel_tree_with_checks(RANGE_OPT_PARAM *param, SEL_TREE *new_tree)
+int SEL_IMERGE::and_sel_tree(RANGE_OPT_PARAM *param, SEL_TREE *tree,
+                             SEL_IMERGE *new_imerge)
 {
-  for (SEL_TREE** tree = trees;
-       tree != trees_next;
-       tree++)
+  for (SEL_TREE** or_tree= trees; or_tree != trees_next; or_tree++) 
   {
-    if (sel_trees_can_be_ored(*tree, new_tree, param))
+    SEL_TREE *res_or_tree= 0;
+    SEL_TREE *and_tree= 0;
+    if (!(res_or_tree= new SEL_TREE()) ||
+        !(and_tree= new SEL_TREE(tree, TRUE, param)))
+      return (-1);
+    if (!and_range_trees(param, *or_tree, and_tree, res_or_tree))
     {
-      *tree = tree_or(param, *tree, new_tree);
-      if (!*tree)
-        return 1;
-      if (((*tree)->type == SEL_TREE::MAYBE) ||
-          ((*tree)->type == SEL_TREE::ALWAYS))
+      if (new_imerge->or_sel_tree(param, res_or_tree))
+        return (-1);
+    }        
+  }
+  return 0;
+}      
+
+
+/*
+  Perform OR operation on this imerge and the range part of a tree
+
+  SYNOPSIS
+    or_sel_tree_with_checks()
+      param                  Context info for the operation 
+      n_trees                Number of trees in this imerge to check for oring        
+      tree                   SEL_TREE whose range part is to be ored 
+      is_first_check_pass    <=> the first call of the function for this imerge  
+      is_last_check_pass OUT <=> no more calls of the function for this imerge
+
+  DESCRIPTION
+    The function performs OR operation on this imerge m and the range part
+    of the SEL_TREE tree rt. It always replaces this imerge with the result
+    of the operation.
+ 
+    The operation can be performed in two different modes: with
+    is_first_check_pass==TRUE and is_first_check_pass==FALSE, transforming
+    this imerge differently.
+
+    Given this imerge represents the formula
+      RT_1 OR ... OR RT_k:
+
+    1. In the first mode, when is_first_check_pass==TRUE :
+      1.1. If rt must be ored(see the function sel_trees_must_be_ored) with
+           some rt_j (there may be only one such range tree in the imerge)
+           then the function produces an imerge representing the formula
+             RT_1 OR ... OR (RT_j OR RT) OR ... OR RT_k,
+           where the tree for (RT_j OR RT) is built by oring the pairs
+           of SEL_ARG trees for the corresponding indexes
+      1.2. Otherwise the function produces the imerge representing the formula:
+           RT_1 OR ... OR RT_k OR RT.
+
+    2. In the second mode, when is_first_check_pass==FALSE :
+      2.1. For each rt_j in the imerge that can be ored (see the function
+           sel_trees_can_be_ored) with rt the function replaces rt_j for a
+           range tree such that for each index for which ranges are defined
+           in both in rt_j and rt  the tree contains the  result of oring of
+           these ranges.
+      2.2. In other cases the function does not produce any imerge.
+
+    When is_first_check==TRUE the function returns FALSE in the parameter
+    is_last_check_pass if there is no rt_j such that rt_j can be ored with rt,
+    but, at the same time, it's not true that rt_j must be ored with rt.
+    When is_first_check==FALSE the function always returns FALSE in the
+    parameter is_last_check_pass.    
+          
+  RETURN
+    1  The result of oring of rt_j and rt that must be ored returns the
+       the range tree with type==SEL_TREE::ALWAYS
+       (in this case the imerge m should be discarded)
+   -1  The function runs out of memory
+    0  in all other cases 
+*/
+
+int SEL_IMERGE::or_sel_tree_with_checks(RANGE_OPT_PARAM *param,
+                                        uint n_trees,
+                                        SEL_TREE *tree,
+                                        bool is_first_check_pass,
+                                        bool *is_last_check_pass)
+{
+  bool was_ored= FALSE;
+  *is_last_check_pass= is_first_check_pass;
+  SEL_TREE** or_tree = trees;
+  for (uint i= 0; i < n_trees; i++, or_tree++)
+  {
+    SEL_TREE *result= 0;
+    key_map result_keys;
+    key_map ored_keys;
+    if (sel_trees_can_be_ored(param, *or_tree, tree, &ored_keys))
+    {
+      bool must_be_ored= sel_trees_must_be_ored(param, *or_tree, tree,
+                                                ored_keys);
+      if (must_be_ored || !is_first_check_pass)
+      {
+        result_keys.clear_all();
+        result= *or_tree;
+        for (uint key_no= 0; key_no < param->keys; key_no++)
+        {
+          if (!ored_keys.is_set(key_no))
+	  {
+            result->keys[key_no]= 0;
+	    continue;
+          }
+          SEL_ARG *key1= (*or_tree)->keys[key_no];
+          SEL_ARG *key2= tree->keys[key_no];
+          key2->incr_refs();
+          if ((result->keys[key_no]= key_or(param, key1, key2)))
+          {
+            
+            result_keys.set_bit(key_no);
+#ifdef EXTRA_DEBUG
+            if (param->alloced_sel_args < SEL_ARG::MAX_SEL_ARGS)
+	    {
+              key1= result->keys[key_no]; 
+              (key1)->test_use_count(key1);
+            }
+#endif
+          }       
+        }
+      }
+      else if(is_first_check_pass) 
+        *is_last_check_pass= FALSE;
+    } 
+
+    if (result)
+    {
+      result->keys_map= result_keys;
+      if (result_keys.is_clear_all())
+        result->type= SEL_TREE::ALWAYS;
+      if ((result->type == SEL_TREE::MAYBE) ||
+          (result->type == SEL_TREE::ALWAYS))
         return 1;
       /* SEL_TREE::IMPOSSIBLE is impossible here */
-      return 0;
+      *or_tree= result;
+      was_ored= TRUE;
     }
   }
+  if (was_ored)
+    return 0;
 
-  /* New tree cannot be combined with any of existing trees. */
-  return or_sel_tree(param, new_tree);
+  if (is_first_check_pass && !*is_last_check_pass &&
+      !(tree= new SEL_TREE(tree, FALSE, param)))
+    return (-1);
+  return or_sel_tree(param, tree);
 }
 
 
 /*
-  Perform OR operation on this index_merge and supplied index_merge list.
+  Perform OR operation on this imerge and and another imerge
 
+  SYNOPSIS
+    or_sel_imerge_with_checks()
+      param                  Context info for the operation 
+      n_trees           Number of trees in this imerge to check for oring        
+      imerge                 The second operand of the operation 
+      is_first_check_pass    <=> the first call of the function for this imerge  
+      is_last_check_pass OUT <=> no more calls of the function for this imerge
+
+  DESCRIPTION
+    For each range tree rt from 'imerge' the function calls the method
+    SEL_IMERGE::or_sel_tree_with_checks that performs OR operation on this
+    SEL_IMERGE object m and the tree rt. The mode of the operation is
+    specified by the parameter is_first_check_pass. Each call of
+    SEL_IMERGE::or_sel_tree_with_checks transforms this SEL_IMERGE object m.
+    The function returns FALSE in the prameter is_last_check_pass if
+    at least one of the calls of SEL_IMERGE::or_sel_tree_with_checks
+    returns FALSE as the value of its last parameter. 
+    
   RETURN
-    0 - OK
-    1 - One of conditions in result is always TRUE and this SEL_IMERGE
-        should be discarded.
-   -1 - An error occurred
+    1  One of the calls of SEL_IMERGE::or_sel_tree_with_checks returns 1.
+       (in this case the imerge m should be discarded)
+   -1  The function runs out of memory
+    0  in all other cases 
 */
 
-int SEL_IMERGE::or_sel_imerge_with_checks(RANGE_OPT_PARAM *param, SEL_IMERGE* imerge)
-{
-  for (SEL_TREE** tree= imerge->trees;
-       tree != imerge->trees_next;
-       tree++)
-  {
-    if (or_sel_tree_with_checks(param, *tree))
-      return 1;
+int SEL_IMERGE::or_sel_imerge_with_checks(RANGE_OPT_PARAM *param,
+                                          uint n_trees,
+                                          SEL_IMERGE* imerge,
+                                          bool is_first_check_pass,
+                                          bool *is_last_check_pass)
+{
+  *is_last_check_pass= TRUE;
+  SEL_TREE** tree= imerge->trees;
+  SEL_TREE** tree_end= imerge->trees_next;
+  for ( ; tree < tree_end; tree++)
+  {
+    uint rc;
+    bool is_last= TRUE; 
+    rc= or_sel_tree_with_checks(param, n_trees, *tree, 
+                               is_first_check_pass, &is_last);
+    if (!is_last)
+      *is_last_check_pass= FALSE;
+    if (rc)
+      return rc;
   }
   return 0;
 }
 
 
-SEL_TREE::SEL_TREE(SEL_TREE *arg, RANGE_OPT_PARAM *param): Sql_alloc()
+/*
+  Copy constructor for SEL_TREE objects
+
+  SYNOPSIS
+    SEL_TREE
+      arg            The source tree for the constructor
+      without_merges <=> only the range part of the tree arg is copied
+      param          Context info for the operation
+
+  DESCRIPTION
+    The constructor creates a full copy of the SEL_TREE arg if
+    the prameter without_merges==FALSE. Otherwise a tree is created
+    that contains the copy only of the range part of the tree arg. 
+*/ 
+
+SEL_TREE::SEL_TREE(SEL_TREE *arg, bool without_merges,
+                   RANGE_OPT_PARAM *param): Sql_alloc()
 {
   keys_map= arg->keys_map;
   type= arg->type;
-  for (int idx= 0; idx < MAX_KEY; idx++)
+  for (uint idx= 0; idx < param->keys; idx++)
   {
     if ((keys[idx]= arg->keys[idx]))
-      keys[idx]->increment_use_count(1);
+      keys[idx]->incr_refs_all();
   }
 
+  if (without_merges)
+    return;
+
   List_iterator<SEL_IMERGE> it(arg->merges);
   for (SEL_IMERGE *el= it++; el; el= it++)
   {
-    SEL_IMERGE *merge= new SEL_IMERGE(el, param);
+    SEL_IMERGE *merge= new SEL_IMERGE(el, 0, param);
     if (!merge || merge->trees == merge->trees_next)
     {
       merges.empty();
@@ -912,7 +1335,23 @@ SEL_TREE::SEL_TREE(SEL_TREE *arg, RANGE_OPT_PARAM *param): Sql_alloc()
 }
 
 
-SEL_IMERGE::SEL_IMERGE (SEL_IMERGE *arg, RANGE_OPT_PARAM *param) : Sql_alloc()
+/*
+  Copy constructor for SEL_IMERGE objects
+
+  SYNOPSIS
+    SEL_IMERGE
+      arg         The source imerge for the constructor
+      cnt         How many trees from arg are to be copied
+      param       Context info for the operation
+
+  DESCRIPTION
+    The cnt==0 then the constructor creates a full copy of the 
+    imerge arg. Otherwise only the first cnt trees of the imerge
+    are copied.
+*/ 
+
+SEL_IMERGE::SEL_IMERGE(SEL_IMERGE *arg, uint cnt,
+                       RANGE_OPT_PARAM *param) : Sql_alloc()
 {
   uint elements= (arg->trees_end - arg->trees);
   if (elements > PREALLOCED_TREES)
@@ -924,13 +1363,13 @@ SEL_IMERGE::SEL_IMERGE (SEL_IMERGE *arg, RANGE_OPT_PARAM *param) : Sql_alloc()
   else
     trees= &trees_prealloced[0];
 
-  trees_next= trees;
+  trees_next= trees + (cnt ? cnt : arg->trees_next-arg->trees);
   trees_end= trees + elements;
 
-  for (SEL_TREE **tree = trees, **arg_tree= arg->trees; tree < trees_end; 
+  for (SEL_TREE **tree = trees, **arg_tree= arg->trees; tree < trees_next; 
        tree++, arg_tree++)
   {
-    if (!(*tree= new SEL_TREE(*arg_tree, param)))
+    if (!(*tree= new SEL_TREE(*arg_tree, FALSE, param)))
       goto mem_err;
   }
 
@@ -944,7 +1383,19 @@ mem_err:
 
 
 /*
-  Perform AND operation on two index_merge lists and store result in *im1.
+  Perform AND operation on two imerge lists
+
+  SYNOPSIS
+    imerge_list_and_list()
+      param             Context info for the operation         
+      im1               The first imerge list for the operation
+      im2               The second imerge list for the operation
+
+  DESCRIPTION
+    The function just appends the imerge list im2 to the imerge list im1  
+    
+  RETURN VALUE
+    none
 */
 
 inline void imerge_list_and_list(List<SEL_IMERGE> *im1, List<SEL_IMERGE> *im2)
@@ -954,73 +1405,242 @@ inline void imerge_list_and_list(List<SEL_IMERGE> *im1, List<SEL_IMERGE> *im2)
 
 
 /*
-  Perform OR operation on 2 index_merge lists, storing result in first list.
-
-  NOTES
-    The following conversion is implemented:
-     (a_1 &&...&& a_N)||(b_1 &&...&& b_K) = AND_i,j(a_i || b_j) =>
-      => (a_1||b_1).
-
-    i.e. all conjuncts except the first one are currently dropped.
-    This is done to avoid producing N*K ways to do index_merge.
-
-    If (a_1||b_1) produce a condition that is always TRUE, NULL is returned
-    and index_merge is discarded (while it is actually possible to try
-    harder).
-
-    As a consequence of this, choice of keys to do index_merge read may depend
-    on the order of conditions in WHERE part of the query.
+  Perform OR operation on two imerge lists
 
+  SYNOPSIS
+    imerge_list_or_list()
+      param             Context info for the operation         
+      im1               The first imerge list for the operation
+      im2               The second imerge list for the operation
+     
+  DESCRIPTION
+    Assuming that the first imerge list represents the formula
+      F1= M1_1 AND ... AND M1_k1 
+    while the second imerge list represents the formula 
+      F2= M2_1 AND ... AND M2_k2,
+    where M1_i= RT1_i_1 OR ... OR RT1_i_l1i (i in [1..k1])
+    and M2_i = RT2_i_1 OR ... OR RT2_i_l2i (i in [1..k2]),
+    the function builds a list of imerges for some formula that can be 
+    inferred from the formula (F1 OR F2).
+
+    More exactly the function builds imerges for the formula (M1_1 OR M2_1).
+    Note that
+      (F1 OR F2) = (M1_1 AND ... AND M1_k1) OR (M2_1 AND ... AND M2_k2) =
+      AND (M1_i OR M2_j) (i in [1..k1], j in [1..k2]) =>
+      M1_1 OR M2_1.
+    So (M1_1 OR M2_1) is indeed an inference formula for (F1 OR F2).
+
+    To build imerges for the formula (M1_1 OR M2_1) the function invokes,
+    possibly twice, the method SEL_IMERGE::or_sel_imerge_with_checks
+    for the imerge m1_1.
+    At its first invocation the method SEL_IMERGE::or_sel_imerge_with_checks
+    performs OR operation on the imerge m1_1 and the range tree rt2_1_1 by
+    calling SEL_IMERGE::or_sel_tree_with_checks with is_first_pass_check==TRUE.
+    The resulting imerge of the operation is ored with the next range tree of
+    the imerge m2_1. This oring continues until the last range tree from
+    m2_1 has been ored. 
+    At its second invocation the method SEL_IMERGE::or_sel_imerge_with_checks
+    performs the same sequence of OR operations, but now calling
+    SEL_IMERGE::or_sel_tree_with_checks with is_first_pass_check==FALSE.
+
+    The imerges that the operation produces replace those in the list im1   
+       
   RETURN
-    0     OK, result is stored in *im1
-    other Error, both passed lists are unusable
+    0     if the operation is a success 
+   -1     if the function has run out of memory 
 */
 
 int imerge_list_or_list(RANGE_OPT_PARAM *param,
                         List<SEL_IMERGE> *im1,
                         List<SEL_IMERGE> *im2)
 {
+
+  uint rc;
+  bool is_last_check_pass= FALSE;
+
   SEL_IMERGE *imerge= im1->head();
+  uint elems= imerge->trees_next-imerge->trees;
   im1->empty();
   im1->push_back(imerge);
 
-  return imerge->or_sel_imerge_with_checks(param, im2->head());
+  rc= imerge->or_sel_imerge_with_checks(param, elems, im2->head(),
+                                        TRUE, &is_last_check_pass);
+  if (rc)
+  {
+    if (rc == 1)
+    {
+      im1->empty();
+      rc= 0;
+    }
+    return rc;
+  }
+
+  if (!is_last_check_pass)
+  {
+    SEL_IMERGE* new_imerge= new SEL_IMERGE(imerge, elems, param);
+    if (new_imerge)
+    {
+      is_last_check_pass= TRUE;
+      rc= new_imerge->or_sel_imerge_with_checks(param, elems, im2->head(),
+                                                 FALSE, &is_last_check_pass);
+      if (!rc)
+        im1->push_back(new_imerge); 
+    }
+  }
+  return rc;  
 }
 
 
 /*
-  Perform OR operation on index_merge list and key tree.
+  Perform OR operation for each imerge from a list and the range part of a tree
+
+  SYNOPSIS
+    imerge_list_or_tree()
+      param       Context info for the operation
+      merges      The list of imerges to be ored with the range part of tree          
+      tree        SEL_TREE whose range part is to be ored with the imerges
+
+  DESCRIPTION
+    For each imerge mi from the list 'merges' the function performes OR
+    operation with mi and the range part of 'tree' rt, producing one or
+    two imerges.
 
+    Given the merge mi represent the formula RTi_1 OR ... OR RTi_k, 
+    the function forms the merges by the following rules:
+ 
+    1. If rt cannot be ored with any of the trees rti the function just
+       produces an imerge that represents the formula
+         RTi_1 OR ... RTi_k OR RT.
+    2. If there exist a tree rtj that must be ored with rt the function
+       produces an imerge the represents the formula
+         RTi_1 OR ... OR (RTi_j OR RT) OR ... OR RTi_k,
+       where the range tree for (RTi_j OR RT) is constructed by oring the
+       SEL_ARG trees that must be ored.
+    3. For each rti_j that can be ored with rt the function produces
+       the new tree rti_j' and substitutes rti_j for this new range tree.
+
+    In any case the function removes mi from the list and then adds all
+    produced imerges.
+
+    To build imerges by rules 1-3 the function calls the method
+    SEL_IMERGE::or_sel_tree_with_checks, possibly twice. With the first
+    call it passes TRUE for the third parameter of the function.
+    At this first call imerges by rules 1-2 are built. If the call
+    returns FALSE as the return value of its fourth parameter then the
+    function are called for the second time. At this call the imerge
+    of rule 3 is produced.
+
+    If a call of SEL_IMERGE::or_sel_tree_with_checks returns 1 then
+    then it means that the produced tree contains an always true
+    range tree and the whole imerge can be discarded.
+    
   RETURN
-    0     OK, result is stored in *im1.
-    other Error
+    1     if no imerges are produced
+    0     otherwise
 */
 
+static
 int imerge_list_or_tree(RANGE_OPT_PARAM *param,
-                        List<SEL_IMERGE> *im1,
+                        List<SEL_IMERGE> *merges,
                         SEL_TREE *tree)
 {
+
   SEL_IMERGE *imerge;
-  List_iterator<SEL_IMERGE> it(*im1);
-  bool tree_used= FALSE;
+  List<SEL_IMERGE> additional_merges;
+  List_iterator<SEL_IMERGE> it(*merges);
+  
   while ((imerge= it++))
   {
-    SEL_TREE *or_tree;
-    if (tree_used)
+    bool is_last_check_pass;
+    int rc= 0;
+    int rc1= 0;
+    SEL_TREE *or_tree= new SEL_TREE (tree, FALSE, param);
+    if (or_tree)
     {
-      or_tree= new SEL_TREE (tree, param);
-      if (!or_tree ||
-          (or_tree->keys_map.is_clear_all() && or_tree->merges.is_empty()))
-        return FALSE;
+      uint elems= imerge->trees_next-imerge->trees;
+      rc= imerge->or_sel_tree_with_checks(param, elems, or_tree,
+                                          TRUE, &is_last_check_pass);
+      if (!is_last_check_pass)
+      {
+        SEL_IMERGE *new_imerge= new SEL_IMERGE(imerge, elems, param);
+        if (new_imerge)
+	{ 
+          rc1= new_imerge->or_sel_tree_with_checks(param, elems, or_tree,
+                                                   FALSE, &is_last_check_pass);
+          if (!rc1)
+            additional_merges.push_back(new_imerge);
+        }
+      }
     }
-    else
-      or_tree= tree;
-
-    if (imerge->or_sel_tree_with_checks(param, or_tree))
+    if (rc || rc1 || !or_tree)
       it.remove();
-    tree_used= TRUE;
   }
-  return im1->is_empty();
+
+  merges->concat(&additional_merges);  
+  return merges->is_empty();
+}
+
+
+/*
+  Perform pushdown operation of the range part of a tree into given imerges 
+
+  SYNOPSIS
+    imerge_list_and_tree()
+      param           Context info for the operation
+      merges   IN/OUT List of imerges to push the range part of 'tree' into
+      tree            SEL_TREE whose range part is to be pushed into imerges
+
+  DESCRIPTION
+    For each imerge from the list merges the function pushes the range part
+    rt of 'tree' into the imerge. 
+    More exactly if the imerge mi from the list represents the formula
+      RTi_1 OR ... OR RTi_k 
+    the function bulds a new imerge that represents the formula
+      (RTi_1 AND RT) OR ... OR (RTi_k AND RT)
+    and adds this imerge to the list merges.
+    To perform this pushdown operation the function calls the method
+    SEL_IMERGE::and_sel_tree. 
+    For any imerge mi the new imerge is not created if for each pair of
+    trees rti_j and rt the intersection of the indexes with defined ranges
+    is empty.
+    If the result of the pushdown operation for the imerge mi returns an
+    imerge with no trees then then not only nothing is added to the list 
+    merges but mi itself is removed from the list. 
+     
+  RETURN
+    1    if no imerges are left in the list merges             
+    0    otherwise
+*/
+
+static
+int imerge_list_and_tree(RANGE_OPT_PARAM *param,
+                         List<SEL_IMERGE> *merges,
+                         SEL_TREE *tree)
+{
+  SEL_IMERGE *imerge;
+  SEL_IMERGE *new_imerge= NULL;
+  List<SEL_IMERGE> new_merges;
+  List_iterator<SEL_IMERGE> it(*merges);
+  
+  while ((imerge= it++))
+  {
+    if (!new_imerge)
+       new_imerge= new SEL_IMERGE();
+    if (imerge->have_common_keys(param, tree) && 
+        new_imerge && !imerge->and_sel_tree(param, tree, new_imerge))
+    {
+      if (new_imerge->trees == new_imerge->trees_next)
+        it.remove();
+      else
+      {         
+        new_merges.push_back(new_imerge);
+        new_imerge= NULL;
+      }
+    }
+  }
+  imerge_list_and_list(&new_merges, merges);
+  *merges= new_merges;
+  return merges->is_empty();
 }
 
 
@@ -1054,7 +1674,7 @@ SQL_SELECT *make_select(TABLE *head, table_map const_tables,
   select->read_tables=read_tables;
   select->const_tables=const_tables;
   select->head=head;
-  select->cond=conds;
+  select->cond= conds;
 
   if (head->sort.io_cache)
   {
@@ -1068,7 +1688,7 @@ SQL_SELECT *make_select(TABLE *head, table_map const_tables,
 }
 
 
-SQL_SELECT::SQL_SELECT() :quick(0),cond(0),free_cond(0)
+SQL_SELECT::SQL_SELECT() :quick(0),cond(0),pre_idx_push_select_cond(NULL),free_cond(0)
 {
   quick_keys.clear_all(); needed_reg.clear_all();
   my_b_clear(&file);
@@ -1102,25 +1722,22 @@ QUICK_SELECT_I::QUICK_SELECT_I()
 {}
 
 QUICK_RANGE_SELECT::QUICK_RANGE_SELECT(THD *thd, TABLE *table, uint key_nr,
-                                       bool no_alloc, MEM_ROOT *parent_alloc)
-  :dont_free(0),doing_key_read(0),error(0),free_file(0),in_range(0),cur_range(NULL),last_range(0)
+                                       bool no_alloc, MEM_ROOT *parent_alloc,
+                                       bool *create_error)
+  :doing_key_read(0),/*error(0),*/free_file(0),/*in_range(0),*/cur_range(NULL),last_range(0),dont_free(0)
 {
   my_bitmap_map *bitmap;
   DBUG_ENTER("QUICK_RANGE_SELECT::QUICK_RANGE_SELECT");
 
   in_ror_merged_scan= 0;
-  sorted= 0;
   index= key_nr;
   head=  table;
   key_part_info= head->key_info[index].key_part;
   my_init_dynamic_array(&ranges, sizeof(QUICK_RANGE*), 16, 16);
 
   /* 'thd' is not accessible in QUICK_RANGE_SELECT::reset(). */
-  multi_range_bufsiz= thd->variables.read_rnd_buff_size;
-  multi_range_count= thd->variables.multi_range_count;
-  multi_range_length= 0;
-  multi_range= NULL;
-  multi_range_buff= NULL;
+  mrr_buf_size= thd->variables.mrr_buff_size;
+  mrr_buf_desc= NULL;
 
   if (!no_alloc && !parent_alloc)
   {
@@ -1135,12 +1752,12 @@ QUICK_RANGE_SELECT::QUICK_RANGE_SELECT(THD *thd, TABLE *table, uint key_nr,
   save_read_set= head->read_set;
   save_write_set= head->write_set;
 
-  /* Allocate a bitmap for used columns */
+  /* Allocate a bitmap for used columns (Q: why not on MEM_ROOT?) */
   if (!(bitmap= (my_bitmap_map*) my_malloc(head->s->column_bitmap_size,
                                            MYF(MY_WME))))
   {
     column_bitmap.bitmap= 0;
-    error= 1;
+    *create_error= 1;
   }
   else
     bitmap_init(&column_bitmap, bitmap, head->s->fields, FALSE);
@@ -1148,6 +1765,20 @@ QUICK_RANGE_SELECT::QUICK_RANGE_SELECT(THD *thd, TABLE *table, uint key_nr,
 }
 
 
+void QUICK_RANGE_SELECT::need_sorted_output()
+{
+  if (!(mrr_flags & HA_MRR_SORTED))
+  {
+    /*
+      Native implementation can't produce sorted output. We'll have to
+      switch to default
+    */
+    mrr_flags |= HA_MRR_USE_DEFAULT_IMPL; 
+  }
+  mrr_flags |= HA_MRR_SORTED;
+}
+
+
 int QUICK_RANGE_SELECT::init()
 {
   DBUG_ENTER("QUICK_RANGE_SELECT::init");
@@ -1181,7 +1812,7 @@ QUICK_RANGE_SELECT::~QUICK_RANGE_SELECT()
         DBUG_PRINT("info", ("Freeing separate handler 0x%lx (free: %d)", (long) file,
                             free_file));
         file->ha_external_lock(current_thd, F_UNLCK);
-        file->close();
+        file->ha_close();
         delete file;
       }
     }
@@ -1190,17 +1821,22 @@ QUICK_RANGE_SELECT::~QUICK_RANGE_SELECT()
     my_free((char*) column_bitmap.bitmap, MYF(MY_ALLOW_ZERO_PTR));
   }
   head->column_bitmaps_set(save_read_set, save_write_set);
-  x_free(multi_range);
-  x_free(multi_range_buff);
+  x_free(mrr_buf_desc);
   DBUG_VOID_RETURN;
 }
 
+/*
+  QUICK_INDEX_SORT_SELECT works as follows:
+  - Do index scans, accumulate rowids in the Unique object 
+    (Unique will also sort and de-duplicate rowids)
+  - Use rowids from unique to run a disk-ordered sweep
+*/
 
-QUICK_INDEX_MERGE_SELECT::QUICK_INDEX_MERGE_SELECT(THD *thd_param,
-                                                   TABLE *table)
+QUICK_INDEX_SORT_SELECT::QUICK_INDEX_SORT_SELECT(THD *thd_param,
+                                                 TABLE *table)
   :unique(NULL), pk_quick_select(NULL), thd(thd_param)
 {
-  DBUG_ENTER("QUICK_INDEX_MERGE_SELECT::QUICK_INDEX_MERGE_SELECT");
+  DBUG_ENTER("QUICK_INDEX_SORT_SELECT::QUICK_INDEX_SORT_SELECT");
   index= MAX_KEY;
   head= table;
   bzero(&read_record, sizeof(read_record));
@@ -1208,38 +1844,48 @@ QUICK_INDEX_MERGE_SELECT::QUICK_INDEX_MERGE_SELECT(THD *thd_param,
   DBUG_VOID_RETURN;
 }
 
-int QUICK_INDEX_MERGE_SELECT::init()
+int QUICK_INDEX_SORT_SELECT::init()
 {
-  DBUG_ENTER("QUICK_INDEX_MERGE_SELECT::init");
+  DBUG_ENTER("QUICK_INDEX_SORT_SELECT::init");
   DBUG_RETURN(0);
 }
 
-int QUICK_INDEX_MERGE_SELECT::reset()
+int QUICK_INDEX_SORT_SELECT::reset()
 {
-  DBUG_ENTER("QUICK_INDEX_MERGE_SELECT::reset");
+  DBUG_ENTER("QUICK_INDEX_SORT_SELECT::reset");
   DBUG_RETURN(read_keys_and_merge());
 }
 
 bool
-QUICK_INDEX_MERGE_SELECT::push_quick_back(QUICK_RANGE_SELECT *quick_sel_range)
+QUICK_INDEX_SORT_SELECT::push_quick_back(QUICK_RANGE_SELECT *quick_sel_range)
 {
-  /*
-    Save quick_select that does scan on clustered primary key as it will be
-    processed separately.
-  */
+  DBUG_ENTER("QUICK_INDEX_SORT_SELECT::push_quick_back");
   if (head->file->primary_key_is_clustered() &&
       quick_sel_range->index == head->s->primary_key)
+  {
+   /*
+     A quick_select over a clustered primary key is handled specifically
+     Here we assume:
+     - PK columns are included in any other merged index
+     - Scan on the PK is disk-ordered.
+       (not meeting #2 will only cause performance degradation)
+
+       We could treat clustered PK as any other index, but that would
+       be inefficient. There is no point in doing scan on
+       CPK, remembering the rowid, then making rnd_pos() call with
+       that rowid.
+    */
     pk_quick_select= quick_sel_range;
-  else
-    return quick_selects.push_back(quick_sel_range);
-  return 0;
+    DBUG_RETURN(0);
+  }
+  DBUG_RETURN(quick_selects.push_back(quick_sel_range));
 }
 
-QUICK_INDEX_MERGE_SELECT::~QUICK_INDEX_MERGE_SELECT()
+QUICK_INDEX_SORT_SELECT::~QUICK_INDEX_SORT_SELECT()
 {
   List_iterator_fast<QUICK_RANGE_SELECT> quick_it(quick_selects);
   QUICK_RANGE_SELECT* quick;
-  DBUG_ENTER("QUICK_INDEX_MERGE_SELECT::~QUICK_INDEX_MERGE_SELECT");
+  DBUG_ENTER("QUICK_INDEX_SORT_SELECT::~QUICK_INDEX_SORT_SELECT");
   delete unique;
   quick_it.rewind();
   while ((quick= quick_it++))
@@ -1253,7 +1899,6 @@ QUICK_INDEX_MERGE_SELECT::~QUICK_INDEX_MERGE_SELECT()
   DBUG_VOID_RETURN;
 }
 
-
 QUICK_ROR_INTERSECT_SELECT::QUICK_ROR_INTERSECT_SELECT(THD *thd_param,
                                                        TABLE *table,
                                                        bool retrieve_full_rows,
@@ -1362,7 +2007,7 @@ int QUICK_RANGE_SELECT::init_ror_merged_scan(bool reuse_handler)
   if (init() || reset())
   {
     file->ha_external_lock(thd, F_UNLCK);
-    file->close();
+    file->ha_close();
     goto failure;
   }
   free_file= TRUE;
@@ -1541,7 +2186,7 @@ int QUICK_ROR_UNION_SELECT::init()
   DBUG_ENTER("QUICK_ROR_UNION_SELECT::init");
   if (init_queue(&queue, quick_selects.elements, 0,
                  FALSE , QUICK_ROR_UNION_SELECT::queue_cmp,
-                 (void*) this))
+                 (void*) this, 0, 0))
   {
     bzero(&queue, sizeof(QUEUE));
     DBUG_RETURN(1);
@@ -1665,6 +2310,7 @@ SEL_ARG::SEL_ARG(SEL_ARG &arg) :Sql_alloc()
   min_value=arg.min_value;
   max_value=arg.max_value;
   next_key_part=arg.next_key_part;
+  max_part_no= arg.max_part_no;
   use_count=1; elements=1;
 }
 
@@ -1682,9 +2328,10 @@ SEL_ARG::SEL_ARG(Field *f,const uchar *min_value_arg,
   :min_flag(0), max_flag(0), maybe_flag(0), maybe_null(f->real_maybe_null()),
    elements(1), use_count(1), field(f), min_value((uchar*) min_value_arg),
    max_value((uchar*) max_value_arg), next(0),prev(0),
-   next_key_part(0),color(BLACK),type(KEY_RANGE)
+   next_key_part(0), color(BLACK), type(KEY_RANGE)
 {
   left=right= &null_element;
+  max_part_no= 1;
 }
 
 SEL_ARG::SEL_ARG(Field *field_,uint8 part_,
@@ -1695,6 +2342,7 @@ SEL_ARG::SEL_ARG(Field *field_,uint8 part_,
    field(field_), min_value(min_value_), max_value(max_value_),
    next(0),prev(0),next_key_part(0),color(BLACK),type(KEY_RANGE)
 {
+  max_part_no= part+1;
   left=right= &null_element;
 }
 
@@ -1738,6 +2386,7 @@ SEL_ARG *SEL_ARG::clone(RANGE_OPT_PARAM *param, SEL_ARG *new_parent,
   increment_use_count(1);
   tmp->color= color;
   tmp->elements= this->elements;
+  tmp->max_part_no= max_part_no;
   return tmp;
 }
 
@@ -1981,9 +2630,11 @@ class TRP_RANGE : public TABLE_READ_PLAN
 public:
   SEL_ARG *key; /* set of intervals to be used in "range" method retrieval */
   uint     key_idx; /* key number in PARAM::key */
+  uint     mrr_flags; 
+  uint     mrr_buf_size;
 
-  TRP_RANGE(SEL_ARG *key_arg, uint idx_arg)
-   : key(key_arg), key_idx(idx_arg)
+  TRP_RANGE(SEL_ARG *key_arg, uint idx_arg, uint mrr_flags_arg)
+   : key(key_arg), key_idx(idx_arg), mrr_flags(mrr_flags_arg)
   {}
   virtual ~TRP_RANGE() {}                     /* Remove gcc warning */
 
@@ -1992,7 +2643,8 @@ public:
   {
     DBUG_ENTER("TRP_RANGE::make_quick");
     QUICK_RANGE_SELECT *quick;
-    if ((quick= get_quick_select(param, key_idx, key, parent_alloc)))
+    if ((quick= get_quick_select(param, key_idx, key,  mrr_flags, 
+                                 mrr_buf_size, parent_alloc)))
     {
       quick->records= records;
       quick->read_time= read_cost;
@@ -2040,6 +2692,26 @@ public:
 
 
 /*
+  Plan for QUICK_INDEX_INTERSECT_SELECT scan.
+  QUICK_INDEX_INTERSECT_SELECT always retrieves full rows, so retrieve_full_rows
+  is ignored by make_quick.
+*/
+
+class TRP_INDEX_INTERSECT : public TABLE_READ_PLAN
+{
+public:
+  TRP_INDEX_INTERSECT() {}                        /* Remove gcc warning */
+  virtual ~TRP_INDEX_INTERSECT() {}               /* Remove gcc warning */
+  QUICK_SELECT_I *make_quick(PARAM *param, bool retrieve_full_rows,
+                             MEM_ROOT *parent_alloc);
+  TRP_RANGE **range_scans; /* array of ptrs to plans of intersected scans */
+  TRP_RANGE **range_scans_end; /* end of the array */
+  /* keys whose scans are to be filtered by cpk conditions */
+  key_map filtered_scans;  
+};
+
+
+/*
   Plan for QUICK_INDEX_MERGE_SELECT scan.
   QUICK_ROR_INTERSECT_SELECT always retrieves full rows, so retrieve_full_rows
   is ignored by make_quick.
@@ -2106,6 +2778,38 @@ public:
 };
 
 
+typedef struct st_index_scan_info
+{
+  uint      idx;      /* # of used key in param->keys */
+  uint      keynr;    /* # of used key in table */
+  uint      range_count;
+  ha_rows   records;  /* estimate of # records this scan will return */
+
+  /* Set of intervals over key fields that will be used for row retrieval. */
+  SEL_ARG   *sel_arg;
+
+  KEY *key_info;
+  uint used_key_parts;
+
+  /* Estimate of # records filtered out by intersection with cpk */
+  ha_rows   filtered_out;
+  /* Bitmap of fields used in index intersection */ 
+  MY_BITMAP used_fields;
+
+  /* Fields used in the query and covered by ROR scan. */
+  MY_BITMAP covered_fields;
+  uint      used_fields_covered; /* # of set bits in covered_fields */
+  int       key_rec_length; /* length of key record (including rowid) */
+
+  /*
+    Cost of reading all index records with values in sel_arg intervals set
+    (assuming there is no need to access full table records)
+  */
+  double    index_read_cost;
+  uint      first_uncovered_field; /* first unused bit in covered_fields */
+  uint      key_components; /* # of parts in the key */
+} INDEX_SCAN_INFO;
+
 /*
   Fill param->needed_fields with bitmap of fields used in the query.
   SYNOPSIS
@@ -2217,7 +2921,8 @@ static int fill_used_fields_bitmap(PARAM *param)
 
 int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
 				  table_map prev_tables,
-				  ha_rows limit, bool force_quick_range)
+				  ha_rows limit, bool force_quick_range, 
+                                  bool ordered_output)
 {
   uint idx;
   double scan_time;
@@ -2230,7 +2935,8 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
   quick=0;
   needed_reg.clear_all();
   quick_keys.clear_all();
-  if (keys_to_use.is_clear_all())
+  DBUG_ASSERT(!head->is_filled_at_execution());
+  if (keys_to_use.is_clear_all() || head->is_filled_at_execution())
     DBUG_RETURN(0);
   records= head->file->stats.records;
   if (!records)
@@ -2275,6 +2981,7 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
     param.imerge_cost_buff_size= 0;
     param.using_real_indexes= TRUE;
     param.remove_jump_scans= TRUE;
+    param.force_default_mrr= ordered_output;
 
     thd->no_errors=1;				// Don't warn about NULL
     init_sql_alloc(&alloc, thd->variables.range_alloc_block_size, 0);
@@ -2381,72 +3088,92 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
         It is possible to use a range-based quick select (but it might be
         slower than 'all' table scan).
       */
-      if (tree->merges.is_empty())
-      {
-        TRP_RANGE         *range_trp;
-        TRP_ROR_INTERSECT *rori_trp;
-        bool can_build_covering= FALSE;
+      TRP_RANGE         *range_trp;
+      TRP_ROR_INTERSECT *rori_trp;
+      TRP_INDEX_INTERSECT *intersect_trp;
+      bool can_build_covering= FALSE;
+      
+      remove_nonrange_trees(&param, tree);
 
-        /* Get best 'range' plan and prepare data for making other plans */
-        if ((range_trp= get_key_scans_params(&param, tree, FALSE, TRUE,
-                                             best_read_time)))
-        {
-          best_trp= range_trp;
-          best_read_time= best_trp->read_cost;
-        }
+      /* Get best 'range' plan and prepare data for making other plans */
+      if ((range_trp= get_key_scans_params(&param, tree, FALSE, TRUE,
+                                           best_read_time)))
+      {
+        best_trp= range_trp;
+        best_read_time= best_trp->read_cost;
+      }
 
+      /*
+        Simultaneous key scans and row deletes on several handler
+        objects are not allowed so don't use ROR-intersection for
+        table deletes.
+      */
+      if ((thd->lex->sql_command != SQLCOM_DELETE) && 
+           optimizer_flag(thd, OPTIMIZER_SWITCH_INDEX_MERGE))
+      {
         /*
-          Simultaneous key scans and row deletes on several handler
-          objects are not allowed so don't use ROR-intersection for
-          table deletes.
+          Get best non-covering ROR-intersection plan and prepare data for
+          building covering ROR-intersection.
         */
-        if ((thd->lex->sql_command != SQLCOM_DELETE) && 
-             optimizer_flag(thd, OPTIMIZER_SWITCH_INDEX_MERGE))
+        if ((rori_trp= get_best_ror_intersect(&param, tree, best_read_time,
+                                              &can_build_covering)))
         {
+          best_trp= rori_trp;
+          best_read_time= best_trp->read_cost;
           /*
-            Get best non-covering ROR-intersection plan and prepare data for
-            building covering ROR-intersection.
+            Try constructing covering ROR-intersect only if it looks possible
+            and worth doing.
           */
-          if ((rori_trp= get_best_ror_intersect(&param, tree, best_read_time,
-                                                &can_build_covering)))
-          {
+          if (!rori_trp->is_covering && can_build_covering &&
+              (rori_trp= get_best_covering_ror_intersect(&param, tree,
+                                                         best_read_time)))
             best_trp= rori_trp;
-            best_read_time= best_trp->read_cost;
-            /*
-              Try constructing covering ROR-intersect only if it looks possible
-              and worth doing.
-            */
-            if (!rori_trp->is_covering && can_build_covering &&
-                (rori_trp= get_best_covering_ror_intersect(&param, tree,
-                                                           best_read_time)))
-              best_trp= rori_trp;
-          }
         }
       }
-      else
+      /*
+        Do not look for an index intersection  plan if there is a covering
+        index. The scan by this covering index will be always cheaper than
+        any index intersection.
+      */
+      if (param.table->covering_keys.is_clear_all() &&
+          optimizer_flag(thd, OPTIMIZER_SWITCH_INDEX_MERGE) &&
+          optimizer_flag(thd, OPTIMIZER_SWITCH_INDEX_MERGE_SORT_INTERSECT))
+      {
+        if ((intersect_trp= get_best_index_intersect(&param, tree,
+                                                    best_read_time)))
+        {
+          best_trp= intersect_trp;
+          best_read_time= best_trp->read_cost; 
+          set_if_smaller(param.table->quick_condition_rows, 
+                         intersect_trp->records);
+        }
+      }
+
+      if (optimizer_flag(thd, OPTIMIZER_SWITCH_INDEX_MERGE))
       {
-        if (optimizer_flag(thd, OPTIMIZER_SWITCH_INDEX_MERGE))
+        /* Try creating index_merge/ROR-union scan. */
+        SEL_IMERGE *imerge;
+        TABLE_READ_PLAN *best_conj_trp= NULL, *new_conj_trp;
+        LINT_INIT(new_conj_trp); /* no empty index_merge lists possible */
+        DBUG_PRINT("info",("No range reads possible,"
+                           " trying to construct index_merge"));
+        List_iterator_fast<SEL_IMERGE> it(tree->merges);
+        while ((imerge= it++))
         {
-          /* Try creating index_merge/ROR-union scan. */
-          SEL_IMERGE *imerge;
-          TABLE_READ_PLAN *best_conj_trp= NULL, *new_conj_trp;
-          LINT_INIT(new_conj_trp); /* no empty index_merge lists possible */
-          DBUG_PRINT("info",("No range reads possible,"
-                             " trying to construct index_merge"));
-          List_iterator_fast<SEL_IMERGE> it(tree->merges);
-          while ((imerge= it++))
+          new_conj_trp= get_best_disjunct_quick(&param, imerge, best_read_time);
+          if (new_conj_trp)
+            set_if_smaller(param.table->quick_condition_rows, 
+                           new_conj_trp->records);
+          if (new_conj_trp &&
+              (!best_conj_trp || 
+               new_conj_trp->read_cost < best_conj_trp->read_cost))
           {
-            new_conj_trp= get_best_disjunct_quick(&param, imerge, best_read_time);
-            if (new_conj_trp)
-              set_if_smaller(param.table->quick_condition_rows, 
-                             new_conj_trp->records);
-            if (!best_conj_trp || (new_conj_trp && new_conj_trp->read_cost <
-                                   best_conj_trp->read_cost))
-              best_conj_trp= new_conj_trp;
+            best_conj_trp= new_conj_trp;
+            best_read_time= best_conj_trp->read_cost;
           }
-          if (best_conj_trp)
-            best_trp= best_conj_trp;
         }
+        if (best_conj_trp)
+          best_trp= best_conj_trp;
       }
     }
 
@@ -3598,11 +4325,19 @@ double get_sweep_read_cost(const PARAM *param, ha_rows records)
   DBUG_ENTER("get_sweep_read_cost");
   if (param->table->file->primary_key_is_clustered())
   {
+    /*
+      We are using the primary key to find the rows.
+      Calculate the cost for this.
+    */
     result= param->table->file->read_time(param->table->s->primary_key,
                                           (uint)records, records);
   }
   else
   {
+    /*
+      Rows will be retreived with rnd_pos(). Caluclate the expected
+      cost for this.
+    */
     double n_blocks=
       ceil(ulonglong2double(param->table->file->stats.data_file_length) /
            IO_SIZE);
@@ -3619,7 +4354,7 @@ double get_sweep_read_cost(const PARAM *param, ha_rows records)
       return 1;
     */
     JOIN *join= param->thd->lex->select_lex.join;
-    if (!join || join->tables == 1)
+    if (!join || join->table_count == 1)
     {
       /* No join, assume reading is done in one 'sweep' */
       result= busy_blocks*(DISK_SEEK_BASE_COST +
@@ -3710,7 +4445,6 @@ TABLE_READ_PLAN *get_best_disjunct_quick(PARAM *param, SEL_IMERGE *imerge,
 {
   SEL_TREE **ptree;
   TRP_INDEX_MERGE *imerge_trp= NULL;
-  uint n_child_scans= imerge->trees_next - imerge->trees;
   TRP_RANGE **range_scans;
   TRP_RANGE **cur_child;
   TRP_RANGE **cpk_scan= NULL;
@@ -3730,6 +4464,24 @@ TABLE_READ_PLAN *get_best_disjunct_quick(PARAM *param, SEL_IMERGE *imerge,
   DBUG_ENTER("get_best_disjunct_quick");
   DBUG_PRINT("info", ("Full table scan cost: %g", read_time));
 
+  /*
+    In every tree of imerge remove SEL_ARG trees that do not make ranges.
+    If after this removal some SEL_ARG tree becomes empty discard imerge.  
+  */
+  for (ptree= imerge->trees; ptree != imerge->trees_next; ptree++)
+  {
+    if (remove_nonrange_trees(param, *ptree))
+    {
+      imerge->trees_next= imerge->trees;
+      break;
+    }
+  }
+
+  uint n_child_scans= imerge->trees_next - imerge->trees;
+  
+  if (!n_child_scans)
+    DBUG_RETURN(NULL);
+
   if (!(range_scans= (TRP_RANGE**)alloc_root(param->mem_root,
                                              sizeof(TRP_RANGE*)*
                                              n_child_scans)))
@@ -3834,7 +4586,9 @@ TABLE_READ_PLAN *get_best_disjunct_quick(PARAM *param, SEL_IMERGE *imerge,
   imerge_cost +=
     Unique::get_use_cost(param->imerge_cost_buff, (uint)non_cpk_scan_records,
                          param->table->file->ref_length,
-                         param->thd->variables.sortbuff_size);
+                         param->thd->variables.sortbuff_size,
+                         TIME_FOR_COMPARE_ROWID,
+                         FALSE, NULL);
   DBUG_PRINT("info",("index_merge total cost: %g (wanted: less then %g)",
                      imerge_cost, read_time));
   if (imerge_cost < read_time)
@@ -3849,6 +4603,13 @@ TABLE_READ_PLAN *get_best_disjunct_quick(PARAM *param, SEL_IMERGE *imerge,
       imerge_trp->range_scans_end= range_scans + n_child_scans;
       read_time= imerge_cost;
     }
+    if (imerge_trp)
+    {
+      TABLE_READ_PLAN *trp= merge_same_index_scans(param, imerge, imerge_trp,
+                                                   read_time);
+      if (trp != imerge_trp)
+        DBUG_RETURN(trp);
+    }
   }
 
 build_ror_index_merge:
@@ -3864,6 +4625,7 @@ build_ror_index_merge:
                                         sizeof(TABLE_READ_PLAN*)*
                                         n_child_scans)))
     DBUG_RETURN(imerge_trp);
+
 skip_to_ror_scan:
   roru_index_costs= 0.0;
   roru_total_records= 0;
@@ -3947,30 +4709,990 @@ skip_to_ror_scan:
       DBUG_RETURN(roru);
     }
   }
-  DBUG_RETURN(imerge_trp);
+    DBUG_RETURN(imerge_trp);
 }
 
-typedef struct st_ror_scan_info
+
+/*
+  Merge index scans for the same indexes in an index merge plan
+
+  SYNOPSIS
+    merge_same_index_scans()
+      param           Context info for the operation
+      imerge   IN/OUT SEL_IMERGE from which imerge_trp has been extracted          
+      imerge_trp      The index merge plan where index scans for the same
+                      indexes are to be merges
+      read_time       The upper bound for the cost of the plan to be evaluated
+
+  DESRIPTION
+    For the given index merge plan imerge_trp extracted from the SEL_MERGE
+    imerge the function looks for range scans with the same indexes and merges
+    them into SEL_ARG trees. Then for each such SEL_ARG tree r_i the function
+    creates a range tree rt_i that contains only r_i. All rt_i are joined
+    into one index merge that replaces the original index merge imerge.
+    The function calls get_best_disjunct_quick for the new index merge to
+    get a new index merge plan that contains index scans only for different
+    indexes.
+    If there are no index scans for the same index in the original index
+    merge plan the function does not change the original imerge and returns
+    imerge_trp as its result.
+
+  RETURN
+    The original or or improved index merge plan                        
+*/
+
+static
+TABLE_READ_PLAN *merge_same_index_scans(PARAM *param, SEL_IMERGE *imerge,
+                                        TRP_INDEX_MERGE *imerge_trp,
+                                        double read_time)
 {
-  uint      idx;      /* # of used key in param->keys */
-  uint      keynr;    /* # of used key in table */
-  ha_rows   records;  /* estimate of # records this scan will return */
+  uint16 first_scan_tree_idx[MAX_KEY];
+  SEL_TREE **tree;
+  TRP_RANGE **cur_child;
+  uint removed_cnt= 0;
 
-  /* Set of intervals over key fields that will be used for row retrieval. */
-  SEL_ARG   *sel_arg;
+  DBUG_ENTER("merge_same_index_scans");
 
-  /* Fields used in the query and covered by this ROR scan. */
-  MY_BITMAP covered_fields;
-  uint      used_fields_covered; /* # of set bits in covered_fields */
-  int       key_rec_length; /* length of key record (including rowid) */
+  bzero(first_scan_tree_idx, sizeof(first_scan_tree_idx[0])*param->keys);
 
-  /*
-    Cost of reading all index records with values in sel_arg intervals set
-    (assuming there is no need to access full table records)
-  */
-  double    index_read_cost;
-  uint      first_uncovered_field; /* first unused bit in covered_fields */
-  uint      key_components; /* # of parts in the key */
+  for (tree= imerge->trees, cur_child= imerge_trp->range_scans;
+       tree != imerge->trees_next;
+       tree++, cur_child++)
+  {
+    DBUG_ASSERT(tree);
+    uint key_idx= (*cur_child)->key_idx;
+    uint16 *tree_idx_ptr= &first_scan_tree_idx[key_idx];
+    if (!*tree_idx_ptr)
+      *tree_idx_ptr= (uint16) (tree-imerge->trees+1);
+    else
+    {
+      SEL_TREE **changed_tree= imerge->trees+(*tree_idx_ptr-1);
+      SEL_ARG *key= (*changed_tree)->keys[key_idx];
+      bzero((*changed_tree)->keys,
+            sizeof((*changed_tree)->keys[0])*param->keys);
+      (*changed_tree)->keys_map.clear_all();
+      if (((*changed_tree)->keys[key_idx]=
+             key_or(param, key, (*tree)->keys[key_idx])))
+        (*changed_tree)->keys_map.set_bit(key_idx);
+      *tree= NULL;
+      removed_cnt++;
+    }
+  }
+  if (!removed_cnt)
+    DBUG_RETURN(imerge_trp);
+
+  TABLE_READ_PLAN *trp= NULL;
+  SEL_TREE **new_trees_next= imerge->trees;
+  for (tree= new_trees_next; tree != imerge->trees_next; tree++)
+  {
+    if (!*tree)
+      continue;
+    if (tree > new_trees_next)
+      *new_trees_next= *tree;
+    new_trees_next++;
+  }
+  imerge->trees_next= new_trees_next;
+
+  DBUG_ASSERT(imerge->trees_next>imerge->trees);
+
+  if (imerge->trees_next-imerge->trees > 1)
+    trp= get_best_disjunct_quick(param, imerge, read_time);
+  else
+  {
+    /*
+      This alternative theoretically can be reached when the cost
+      of the index merge for such a formula as
+        (key1 BETWEEN c1_1 AND c1_2) AND key2 > c2 OR
+        (key1 BETWEEN c1_3 AND c1_4) AND key3 > c3
+      is estimated as being cheaper than the cost of index scan for
+      the formula
+        (key1 BETWEEN c1_1 AND c1_2) OR (key1 BETWEEN c1_3 AND c1_4)
+      
+      In the current code this may happen for two reasons:
+      1. for a single index range scan data records are accessed in
+         a random order
+      2. the functions that estimate the cost of a range scan and an
+         index merge retrievals are not well calibrated
+    */
+    trp= get_key_scans_params(param, *imerge->trees, FALSE, TRUE,
+                              read_time);
+  }
+
+  DBUG_RETURN(trp); 
+}
+
+
+/*
+  This structure contains the info common for all steps of a partial
+  index intersection plan. Morever it contains also the info common
+  for index intersect plans. This info is filled in by the function
+  prepare_search_best just before searching for the best index
+  intersection plan.
+*/  
+
+typedef struct st_common_index_intersect_info
+{
+  PARAM *param;           /* context info for range optimizations            */
+  uint key_size;          /* size of a ROWID element stored in Unique object */
+  uint compare_factor;         /* 1/compare - cost to compare two ROWIDs     */
+  ulonglong max_memory_size;   /* maximum space allowed for Unique objects   */   
+  ha_rows table_cardinality;   /* estimate of the number of records in table */
+  double cutoff_cost;        /* discard index intersects with greater costs  */ 
+  INDEX_SCAN_INFO *cpk_scan;  /* clustered primary key used in intersection  */
+
+  bool in_memory;  /* unique object for intersection is completely in memory */
+
+  INDEX_SCAN_INFO **search_scans;    /* scans possibly included in intersect */ 
+  uint n_search_scans;               /* number of elements in search_scans   */
+
+  bool best_uses_cpk;   /* current best intersect uses clustered primary key */
+  double best_cost;       /* cost of the current best index intersection     */
+  /* estimate of the number of records in the current best intersection      */
+  ha_rows best_records;
+  uint best_length;    /* number of indexes in the current best intersection */
+  INDEX_SCAN_INFO **best_intersect;  /* the current best index intersection  */
+  /* scans from the best intersect to be filtrered by cpk conditions         */
+  key_map filtered_scans; 
+
+  uint *buff_elems;        /* buffer to calculate cost of index intersection */
+  
+} COMMON_INDEX_INTERSECT_INFO;
+
+
+/*
+  This structure contains the info specific for one step of an index
+  intersection plan. The structure is filled in by the function 
+   check_index_intersect_extension.
+*/
+
+typedef struct st_partial_index_intersect_info
+{
+  COMMON_INDEX_INTERSECT_INFO *common_info;    /* shared by index intersects */
+  uint length;         /* number of index scans in the partial intersection  */
+  ha_rows records;     /* estimate of the number of records in intersection  */
+  double cost;         /* cost of the partial index intersection             */
+
+  /* estimate of total number of records of all scans of the partial index
+     intersect sent to the Unique object used for the intersection  */
+  ha_rows records_sent_to_unique;
+
+  /* total cost of the scans of indexes from the partial index intersection  */
+  double index_read_cost; 
+
+  bool use_cpk_filter;      /* cpk filter is to be used for this       scan  */  
+  bool in_memory;            /* uses unique object in memory                 */
+  double in_memory_cost;     /* cost of using unique object in memory        */
+
+  key_map filtered_scans;    /* scans to be filtered by cpk conditions       */
+         
+  MY_BITMAP *intersect_fields;     /* bitmap of fields used in intersection  */
+} PARTIAL_INDEX_INTERSECT_INFO;
+
+
+/* Check whether two indexes have the same first n components */
+
+static
+bool same_index_prefix(KEY *key1, KEY *key2, uint used_parts)
+{
+  KEY_PART_INFO *part1= key1->key_part;
+  KEY_PART_INFO *part2= key2->key_part;
+  for(uint i= 0; i < used_parts; i++, part1++, part2++)
+  {
+    if (part1->fieldnr != part2->fieldnr)
+      return FALSE;
+  }
+  return TRUE;
+}
+
+
+/* Create a bitmap for all fields of a table */
+
+static
+bool create_fields_bitmap(PARAM *param, MY_BITMAP *fields_bitmap)
+{
+  my_bitmap_map *bitmap_buf;
+
+  if (!(bitmap_buf= (my_bitmap_map *) alloc_root(param->mem_root,
+                                                 param->fields_bitmap_size)))
+    return TRUE;
+  if (bitmap_init(fields_bitmap, bitmap_buf, param->table->s->fields, FALSE))
+    return TRUE;
+  
+  return FALSE;
+}
+
+/* Compare two indexes scans for sort before search for the best intersection */
+
+static
+int cmp_intersect_index_scan(INDEX_SCAN_INFO **a, INDEX_SCAN_INFO **b)
+{
+  return (*a)->records < (*b)->records ?
+          -1 : (*a)->records == (*b)->records ? 0 : 1;
+}
+
+
+static inline
+void set_field_bitmap_for_index_prefix(MY_BITMAP *field_bitmap,
+                                       KEY_PART_INFO *key_part,
+                                       uint used_key_parts)
+{
+  bitmap_clear_all(field_bitmap);
+  for (KEY_PART_INFO *key_part_end= key_part+used_key_parts;
+       key_part < key_part_end; key_part++)
+  {
+    bitmap_set_bit(field_bitmap, key_part->fieldnr-1);
+  }
+}
+
+
+/*
+  Round up table cardinality read from statistics provided by engine.
+  This function should go away when mysql test will allow to handle
+  more or less easily in the test suites deviations of InnoDB 
+  statistical data.
+*/
+ 
+static inline
+ha_rows get_table_cardinality_for_index_intersect(TABLE *table)
+{
+  if (table->file->ha_table_flags() & HA_STATS_RECORDS_IS_EXACT)
+    return table->file->stats.records;
+  else
+  {
+    ha_rows d;
+    double q;
+    for (q= (double)table->file->stats.records, d= 1 ; q >= 10; q/= 10, d*= 10 ) ;
+    return (ha_rows) (floor(q+0.5) * d);
+  } 
+}
+
+  
+static
+ha_rows records_in_index_intersect_extension(PARTIAL_INDEX_INTERSECT_INFO *curr,
+                                             INDEX_SCAN_INFO *ext_index_scan);
+
+/*
+  Prepare to search for the best index intersection
+
+  SYNOPSIS
+    prepare_search_best_index_intersect()
+      param         common info about index ranges
+      tree          tree of ranges for indexes than can be intersected
+      common    OUT info needed for search to be filled by the function 
+      init      OUT info for an initial pseudo step of the intersection plans
+      cutoff_cost   cut off cost of the interesting index intersection 
+
+  DESCRIPTION
+    The function initializes all fields of the structure 'common' to be used
+    when searching for the best intersection plan. It also allocates
+    memory to store the most cheap index intersection.
+
+  NOTES
+    When selecting candidates for index intersection we always take only
+    one representative out of any set of indexes that share the same range
+    conditions. These indexes always have the same prefixes and the
+    components of this prefixes are exactly those used in these range
+    conditions.
+    Range conditions over clustered primary key (cpk) is always used only
+    as the condition that filters out some rowids retrieved by the scans
+    for secondary indexes. The cpk index will be handled in special way by
+    the function that search for the best index intersection. 
+
+  RETURN
+    FALSE  in the case of success
+    TRUE   otherwise
+*/
+
+static
+bool prepare_search_best_index_intersect(PARAM *param, 
+                                         SEL_TREE *tree,
+                                         COMMON_INDEX_INTERSECT_INFO *common,
+                                         PARTIAL_INDEX_INTERSECT_INFO *init,
+                                         double cutoff_cost)
+{
+  uint i;
+  uint n_search_scans;
+  double cost;
+  INDEX_SCAN_INFO **index_scan;
+  INDEX_SCAN_INFO **scan_ptr;
+  INDEX_SCAN_INFO *cpk_scan= NULL;
+  TABLE *table= param->table;
+  uint n_index_scans= tree->index_scans_end - tree->index_scans;
+
+  if (!n_index_scans)
+    return 1;
+
+  bzero(init, sizeof(*init));
+  init->common_info= common;
+  init->cost= cutoff_cost;
+
+  common->param= param;
+  common->key_size= table->file->ref_length;
+  common->compare_factor= TIME_FOR_COMPARE_ROWID;
+  common->max_memory_size= param->thd->variables.sortbuff_size;
+  common->cutoff_cost= cutoff_cost;
+  common->cpk_scan= NULL;
+  common->table_cardinality= 
+    get_table_cardinality_for_index_intersect(table);
+
+  if (n_index_scans <= 1)
+    return TRUE;
+
+  if (table->file->primary_key_is_clustered())
+  {
+    INDEX_SCAN_INFO **index_scan_end;
+    index_scan= tree->index_scans;
+    index_scan_end= index_scan+n_index_scans;
+    for ( ; index_scan < index_scan_end; index_scan++)
+    {  
+      if ((*index_scan)->keynr == table->s->primary_key)
+      {
+        common->cpk_scan= cpk_scan= *index_scan;
+        break;
+      }
+    }
+  }
+
+  i= n_index_scans - test(cpk_scan != NULL) + 1;
+
+  if (!(common->search_scans =
+	(INDEX_SCAN_INFO **) alloc_root (param->mem_root,
+                                         sizeof(INDEX_SCAN_INFO *) * i)))
+    return TRUE;
+  bzero(common->search_scans, sizeof(INDEX_SCAN_INFO *) * i);
+
+  INDEX_SCAN_INFO **selected_index_scans= common->search_scans;
+    
+  for (i=0, index_scan= tree->index_scans; i < n_index_scans; i++, index_scan++)
+  {
+    uint used_key_parts= (*index_scan)->used_key_parts;
+    KEY *key_info= (*index_scan)->key_info;
+
+    if (*index_scan == cpk_scan)
+      continue;
+    if (cpk_scan && cpk_scan->used_key_parts >= used_key_parts &&
+        same_index_prefix(cpk_scan->key_info, key_info, used_key_parts))
+      continue;
+
+    cost= table->file->keyread_time((*index_scan)->keynr,
+                                    (*index_scan)->range_count,
+                                    (*index_scan)->records);
+    if (cost >= cutoff_cost)
+      continue;
+   
+    for (scan_ptr= selected_index_scans; *scan_ptr ; scan_ptr++)
+    {
+      /*
+        When we have range conditions for two different indexes with the same
+        beginning it does not make sense to consider both of them for index 
+        intersection if the range conditions are covered by common initial
+        components of the indexes. Actually in this case the indexes are
+        guaranteed to have the same range conditions.
+      */
+      if ((*scan_ptr)->used_key_parts == used_key_parts &&
+          same_index_prefix((*scan_ptr)->key_info, key_info, used_key_parts))
+        break;
+    }
+    if (!*scan_ptr || cost < (*scan_ptr)->index_read_cost)
+    {
+      *scan_ptr= *index_scan;
+      (*scan_ptr)->index_read_cost= cost;
+    }
+  } 
+
+  ha_rows records_in_scans= 0;
+
+  for (scan_ptr=selected_index_scans, i= 0; *scan_ptr; scan_ptr++, i++)
+  {
+    if (create_fields_bitmap(param, &(*scan_ptr)->used_fields))
+      return TRUE;
+    records_in_scans+= (*scan_ptr)->records;
+  }
+  n_search_scans= i;
+
+  if (cpk_scan && create_fields_bitmap(param, &cpk_scan->used_fields))
+    return TRUE;
+  
+  if (!(common->n_search_scans= n_search_scans))
+    return TRUE;
+    
+  common->best_uses_cpk= FALSE;
+  common->best_cost= cutoff_cost + COST_EPS;
+  common->best_length= 0;
+
+  if (!(common->best_intersect=
+	(INDEX_SCAN_INFO **) alloc_root (param->mem_root,
+                                         sizeof(INDEX_SCAN_INFO *) *
+                                         (i + test(cpk_scan != NULL)))))
+    return TRUE;
+
+  size_t calc_cost_buff_size=
+         Unique::get_cost_calc_buff_size((size_t)records_in_scans,
+                                         common->key_size,
+				         common->max_memory_size);
+  if (!(common->buff_elems= (uint *) alloc_root(param->mem_root,
+                                                calc_cost_buff_size)))
+    return TRUE;
+
+  my_qsort(selected_index_scans, n_search_scans, sizeof(INDEX_SCAN_INFO *),
+           (qsort_cmp) cmp_intersect_index_scan);
+
+  if (cpk_scan)
+  {
+    PARTIAL_INDEX_INTERSECT_INFO curr;
+    set_field_bitmap_for_index_prefix(&cpk_scan->used_fields,
+                                      cpk_scan->key_info->key_part,
+                                      cpk_scan->used_key_parts);
+    curr.common_info= common;
+    curr.intersect_fields= &cpk_scan->used_fields;
+    curr.records= cpk_scan->records;
+    curr.length= 1;
+    for (scan_ptr=selected_index_scans; *scan_ptr; scan_ptr++)
+    {
+      ha_rows scan_records= (*scan_ptr)->records;
+      ha_rows records= records_in_index_intersect_extension(&curr, *scan_ptr);
+      (*scan_ptr)->filtered_out= records >= scan_records ?
+                                   0 : scan_records-records; 
+    }
+  } 
+  else
+  {
+    for (scan_ptr=selected_index_scans; *scan_ptr; scan_ptr++)
+      (*scan_ptr)->filtered_out= 0;
+  }
+
+  return FALSE;
+}
+
+
+/*
+  On Estimation of the Number of Records in an Index Intersection 
+  ===============================================================
+
+  Consider query Q over table t. Let C be the WHERE condition of  this query,
+  and, idx1(a1_1,...,a1_k1) and idx2(a2_1,...,a2_k2) be some indexes defined
+  on table t.
+  Let rt1 and rt2 be the range trees extracted by the range optimizer from C
+  for idx1 and idx2 respectively.
+  Let #t be the estimate of the number of records in table t provided for the
+  optimizer. 
+  Let #r1 and #r2 be the estimates of the number of records in the range trees
+  rt1 and rt2, respectively, obtained by the range optimizer.
+
+  We need to get an estimate for the number of records in the index 
+  intersection of rt1 and rt2. In other words, we need to estimate the
+  cardinality of the set of records that are in both trees. Let's designate
+  this number by #r.
+
+  If we do not make any assumptions then we can only state that
+     #r<=min(#r1,#r2).
+  With this estimate we can't say that the index intersection scan will be 
+  cheaper than the cheapest index scan.
+
+  Let Rt1 and Rt2 be AND/OR conditions representing rt and rt2 respectively.
+  The probability that a record belongs to rt1 is sel(Rt1)=#r1/#t.
+  The probability that a record belongs to rt2 is sel(Rt2)=#r2/#t.
+
+  If we assume that the values in columns of idx1 and idx2 are independent
+  then #r/#t=sel(Rt1&Rt2)=sel(Rt1)*sel(Rt2)=(#r1/#t)*(#r2/#t).
+  So in this case we have: #r=#r1*#r2/#t.
+
+  The above assumption of independence of the columns in idx1 and idx2 means
+  that:
+  - all columns are different
+  - values from one column do not correlate with values from any other column.
+
+  We can't help with the case when column correlate with each other.
+  Yet, if they are assumed to be uncorrelated the value of #r theoretically can
+  be evaluated . Unfortunately this evaluation, in general, is rather complex.
+
+  Let's consider two indexes idx1:(dept, manager),  idx2:(dept, building)
+  over table 'employee' and two range conditions over these indexes:
+    Rt1: dept=10 AND manager LIKE 'S%'
+    Rt2: dept=10 AND building LIKE 'L%'.
+  We can state that:
+    sel(Rt1&Rt2)=sel(dept=10)*sel(manager LIKE 'S%')*sel(building LIKE 'L%')
+    =sel(Rt1)*sel(Rt2)/sel(dept=10).
+  sel(Rt1/2_0:dept=10) can be estimated if we know the cardinality #r1_0 of
+  the range for sub-index idx1_0 (dept) of the index idx1 or the cardinality
+  #rt2_0 of the same range for sub-index idx2_0(dept) of the index idx2.
+  The current code does not make an estimate either for #rt1_0, or for #rt2_0,
+  but it can be adjusted to provide those numbers.
+  Alternatively, min(rec_per_key) for (dept) could be used to get an upper 
+  bound for the value of sel(Rt1&Rt2). Yet this statistics is not provided
+  now.  
+ 
+  Let's consider two other indexes idx1:(dept, last_name), 
+  idx2:(first_name, last_name) and two range conditions over these indexes:
+    Rt1: dept=5 AND last_name='Sm%'
+    Rt2: first_name='Robert' AND last_name='Sm%'.
+
+  sel(Rt1&Rt2)=sel(dept=5)*sel(last_name='Sm5')*sel(first_name='Robert')
+  =sel(Rt2)*sel(dept=5)
+  Here max(rec_per_key) for (dept) could be used to get an upper bound for
+  the value of sel(Rt1&Rt2).
+  
+  When the intersected indexes have different major columns, but some
+  minor column are common the picture may be more complicated.
+
+  Let's consider the following range conditions for the same indexes as in
+  the previous example:
+    Rt1: (Rt11: dept=5 AND last_name='So%') 
+         OR 
+         (Rt12: dept=7 AND last_name='Saw%')
+    Rt2: (Rt21: first_name='Robert' AND last_name='Saw%')
+         OR
+         (Rt22: first_name='Bob' AND last_name='So%')
+  Here we have:
+  sel(Rt1&Rt2)= sel(Rt11)*sel(Rt21)+sel(Rt22)*sel(dept=5) +
+                sel(Rt21)*sel(dept=7)+sel(Rt12)*sel(Rt22)
+  Now consider the range condition:
+    Rt1_0: (dept=5 OR dept=7)
+  For this condition we can state that:
+  sel(Rt1_0&Rt2)=(sel(dept=5)+sel(dept=7))*(sel(Rt21)+sel(Rt22))=
+  sel(dept=5)*sel(Rt21)+sel(dept=7)*sel(Rt21)+
+  sel(dept=5)*sel(Rt22)+sel(dept=7)*sel(Rt22)=
+  sel(dept=5)*sel(Rt21)+sel(Rt21)*sel(dept=7)+
+  sel(Rt22)*sel(dept=5)+sel(dept=7)*sel(Rt22) >
+  sel(Rt11)*sel(Rt21)+sel(Rt22)*sel(dept=5)+
+  sel(Rt21)*sel(dept=7)+sel(Rt12)*sel(Rt22) >
+  sel(Rt1 & Rt2) 
+
+ We've just demonstrated for an example what is intuitively almost obvious
+ in general. We can  remove the ending parts fromrange trees getting less
+ selective range conditions for sub-indexes.
+ So if not a most major component with the number k of an index idx is
+ encountered in the index with which we intersect we can use the sub-index
+ idx_k-1 that includes the components of idx up to the i-th component and
+ the range tree for idx_k-1 to make an upper bound estimate for the number
+  of records in the index intersection.
+ The range tree for idx_k-1 we use here is the subtree of the original range
+  tree for idx that contains only parts from the first k-1 components.
+
+  As it was mentioned above the range optimizer currently does not provide
+  an estimate for the number of records in the ranges for sub-indexes.
+  However, some reasonable upper bound estimate can be obtained.
+
+  Let's consider the following range tree:
+    Rt: (first_name='Robert' AND last_name='Saw%')
+        OR
+        (first_name='Bob' AND last_name='So%')
+  Let #r be the number of records in Rt. Let f_1 be the fan-out of column
+  last_name:
+    f_1 = rec_per_key[first_name]/rec_per_key[last_name].
+  The the number of records in the range tree:
+    Rt_0:  (first_name='Robert' OR first_name='Bob')
+  for the sub-index (first_name) is not greater than max(#r*f_1, #t).
+  Strictly speaking, we can state only that it's not greater than 
+  max(#r*max_f_1, #t), where
+    max_f_1= max_rec_per_key[first_name]/min_rec_per_key[last_name].
+  Yet, if #r/#t is big enough (and this is the case of an index intersection,
+  because using this index range with a single index scan is cheaper than
+  the cost of the intersection when #r/#t is small) then almost safely we
+  can use here f_1 instead of max_f_1.
+
+  The above considerations can be used in future development. Now, they are
+  used partly in the function that provides a rough upper bound estimate for
+  the number of records in an index intersection that follow below.
+*/
+
+/*
+  Estimate the number of records selected by an extension a partial intersection
+
+  SYNOPSIS
+    records_in_index_intersect_extension()
+     curr            partial intersection plan to be extended
+     ext_index_scan  the evaluated extension of this partial plan
+
+  DESCRIPTION
+    The function provides an estimate for the number of records in the
+    intersection of the partial index intersection curr with the index
+    ext_index_scan. If all intersected indexes does not have common columns
+    then  the function returns an exact estimate (assuming there are no
+    correlations between values in the columns). If the intersected indexes
+    have common  columns the function returns an upper bound for the number
+    of records in the intersection provided that the intersection of curr
+    with ext_index_scan can is expected to have less records than the expected
+    number of records in the partial intersection curr. In this case the
+    function also assigns the bitmap of the columns in the extended 
+    intersection to ext_index_scan->used_fields.
+    If the function cannot expect that the number of records in the extended
+    intersection is less that the expected number of records #r in curr then
+    the function returns a number bigger than #r.
+
+  NOTES
+   See the comment before the desription of the function that explains the
+   reasoning used  by this function.
+    
+  RETURN
+    The expected number of rows in the extended index intersection
+*/
+
+static
+ha_rows records_in_index_intersect_extension(PARTIAL_INDEX_INTERSECT_INFO *curr,
+                                             INDEX_SCAN_INFO *ext_index_scan)
+{
+  KEY *key_info= ext_index_scan->key_info;
+  KEY_PART_INFO* key_part= key_info->key_part;
+  uint used_key_parts= ext_index_scan->used_key_parts;
+  MY_BITMAP *used_fields= &ext_index_scan->used_fields;
+  
+  if (!curr->length)
+  {
+    /* 
+      If this the first index in the intersection just mark the
+      fields in the used_fields bitmap and return the expected
+      number of records in the range scan for the index provided
+      by the range optimizer.
+    */ 
+    set_field_bitmap_for_index_prefix(used_fields, key_part, used_key_parts);
+    return ext_index_scan->records;
+  }
+
+  uint i;
+  bool better_selectivity= FALSE;
+  ha_rows records= curr->records;
+  MY_BITMAP *curr_intersect_fields= curr->intersect_fields; 
+  for (i= 0; i < used_key_parts; i++, key_part++)
+  {
+    if (bitmap_is_set(curr_intersect_fields, key_part->fieldnr-1))
+      break;
+  }
+  if (i)
+  {
+    ha_rows table_cardinality= curr->common_info->table_cardinality;
+    ha_rows ext_records= ext_index_scan->records;
+    if (i < used_key_parts)
+    {
+      ulong *rec_per_key= key_info->rec_per_key+i-1;
+      ulong f1= rec_per_key[0] ? rec_per_key[0] : 1;
+      ulong f2= rec_per_key[1] ? rec_per_key[1] : 1;
+      ext_records= (ha_rows) ((double) ext_records / f2 * f1);
+    }
+    if (ext_records < table_cardinality)
+    {
+      better_selectivity= TRUE;
+      records= (ha_rows) ((double) records / table_cardinality *
+			  ext_records);
+      bitmap_copy(used_fields, curr_intersect_fields);
+      key_part= key_info->key_part;
+      for (uint j= 0; j < used_key_parts; j++, key_part++)
+        bitmap_set_bit(used_fields, key_part->fieldnr-1);
+    }
+  }
+  return !better_selectivity ? records+1 :
+                               !records ? 1 : records;
+}
+
+
+/* 
+  Estimate the cost a binary search within disjoint cpk range intervals
+
+  Number of comparisons to check whether a cpk value satisfies
+  the cpk range condition = log2(cpk_scan->range_count).
+*/ 
+
+static inline
+double get_cpk_filter_cost(ha_rows filtered_records, 
+                           INDEX_SCAN_INFO *cpk_scan,
+                           double compare_factor)
+{
+  return log((double) (cpk_scan->range_count+1)) / (compare_factor * M_LN2) *
+           filtered_records;
+}
+
+
+/*
+  Check whether a patial index intersection plan can be extended 
+
+  SYNOPSIS
+    check_index_intersect_extension()
+     curr            partial intersection plan to be extended
+     ext_index_scan  a possible extension of this plan to be checked
+     next       OUT  the structure to be filled for the extended plan 
+
+  DESCRIPTION
+    The function checks whether it makes sense to extend the index
+    intersection plan adding the index ext_index_scan, and, if this
+    the case, the function fills in the structure for the extended plan.
+
+  RETURN
+    TRUE      if it makes sense to extend the given plan 
+    FALSE     otherwise
+*/
+
+static
+bool check_index_intersect_extension(PARTIAL_INDEX_INTERSECT_INFO *curr,
+                                     INDEX_SCAN_INFO *ext_index_scan,
+                                     PARTIAL_INDEX_INTERSECT_INFO *next)
+{
+  ha_rows records;
+  ha_rows records_sent_to_unique;
+  double cost;
+  ha_rows ext_index_scan_records= ext_index_scan->records;
+  ha_rows records_filtered_out_by_cpk= ext_index_scan->filtered_out;
+  COMMON_INDEX_INTERSECT_INFO *common_info= curr->common_info;
+  double cutoff_cost= common_info->cutoff_cost;
+  uint idx= curr->length;
+  next->index_read_cost= curr->index_read_cost+ext_index_scan->index_read_cost;
+  if (next->index_read_cost > cutoff_cost)
+    return FALSE; 
+
+  if ((next->in_memory= curr->in_memory))
+    next->in_memory_cost= curr->in_memory_cost;
+
+  next->intersect_fields= &ext_index_scan->used_fields;
+  next->filtered_scans= curr->filtered_scans;
+
+  records_sent_to_unique= curr->records_sent_to_unique;
+
+  next->use_cpk_filter= FALSE;
+
+  /* Calculate the cost of using a Unique object for index intersection */
+  if (idx && next->in_memory)
+  { 
+    /* 
+      All rowids received from the first scan are expected in one unique tree
+    */
+    ha_rows elems_in_tree= common_info->search_scans[0]->records-
+                           common_info->search_scans[0]->filtered_out ;
+    next->in_memory_cost+= Unique::get_search_cost(elems_in_tree,
+                                                   common_info->compare_factor)* 
+                             ext_index_scan_records;
+    cost= next->in_memory_cost;
+  }
+  else
+  {
+    uint *buff_elems= common_info->buff_elems;
+    uint key_size= common_info->key_size;
+    uint compare_factor= common_info->compare_factor;         
+    ulonglong max_memory_size= common_info->max_memory_size; 
+    
+    records_sent_to_unique+= ext_index_scan_records;
+    cost= Unique::get_use_cost(buff_elems, (size_t) records_sent_to_unique, key_size,
+                               max_memory_size, compare_factor, TRUE,
+                               &next->in_memory);
+    if (records_filtered_out_by_cpk)
+    {
+      /* Check whether using cpk filter for this scan is beneficial */
+
+      double cost2;
+      bool in_memory2;
+      ha_rows records2= records_sent_to_unique-records_filtered_out_by_cpk;
+      cost2=  Unique::get_use_cost(buff_elems, (size_t) records2, key_size,
+                                   max_memory_size, compare_factor, TRUE,
+                                   &in_memory2);
+      cost2+= get_cpk_filter_cost(ext_index_scan_records, common_info->cpk_scan,
+                                  compare_factor);
+      if (cost > cost2 + COST_EPS)
+      {
+        cost= cost2;
+        next->in_memory= in_memory2;
+        next->use_cpk_filter= TRUE;
+        records_sent_to_unique= records2;
+      }
+
+    }   
+    if (next->in_memory)
+      next->in_memory_cost= cost;
+  }
+
+  if (next->use_cpk_filter)
+  {
+    next->filtered_scans.set_bit(ext_index_scan->keynr);
+    bitmap_union(&ext_index_scan->used_fields,
+                 &common_info->cpk_scan->used_fields);
+  }
+  next->records_sent_to_unique= records_sent_to_unique;
+       
+  records= records_in_index_intersect_extension(curr, ext_index_scan);
+  if (idx && records > curr->records)
+    return FALSE;
+  if (next->use_cpk_filter && curr->filtered_scans.is_clear_all())
+    records-= records_filtered_out_by_cpk;
+  next->records= records;
+
+  cost+= next->index_read_cost;
+  if (cost >= cutoff_cost)
+    return FALSE;
+
+  cost+= get_sweep_read_cost(common_info->param, records);
+
+  next->cost= cost;
+  next->length= curr->length+1;
+
+  return TRUE;
+}
+
+
+/*
+  Search for the cheapest extensions of range scans used to access a table    
+
+  SYNOPSIS
+    find_index_intersect_best_extension()
+      curr        partial intersection to evaluate all possible extension for 
+
+  DESCRIPTION
+    The function tries to extend the partial plan curr in all possible ways
+    to look for a cheapest index intersection whose cost less than the 
+    cut off value set in curr->common_info.cutoff_cost. 
+*/
+
+static 
+void find_index_intersect_best_extension(PARTIAL_INDEX_INTERSECT_INFO *curr)
+{
+  PARTIAL_INDEX_INTERSECT_INFO next;
+  COMMON_INDEX_INTERSECT_INFO *common_info= curr->common_info;
+  INDEX_SCAN_INFO **index_scans= common_info->search_scans;
+  uint idx= curr->length;
+  INDEX_SCAN_INFO **rem_first_index_scan_ptr= &index_scans[idx];
+  double cost= curr->cost;
+
+  if (cost + COST_EPS < common_info->best_cost)
+  {
+    common_info->best_cost= cost;
+    common_info->best_length= curr->length;
+    common_info->best_records= curr->records;
+    common_info->filtered_scans= curr->filtered_scans;
+    /* common_info->best_uses_cpk <=> at least one scan uses a cpk filter */
+    common_info->best_uses_cpk= !curr->filtered_scans.is_clear_all();
+    uint sz= sizeof(INDEX_SCAN_INFO *) * curr->length;
+    memcpy(common_info->best_intersect, common_info->search_scans, sz);
+    common_info->cutoff_cost= cost;
+  }   
+
+  if (!(*rem_first_index_scan_ptr))
+    return;  
+
+  next.common_info= common_info;
+ 
+  INDEX_SCAN_INFO *rem_first_index_scan= *rem_first_index_scan_ptr;
+  for (INDEX_SCAN_INFO **index_scan_ptr= rem_first_index_scan_ptr;
+       *index_scan_ptr; index_scan_ptr++)
+  {
+    *rem_first_index_scan_ptr= *index_scan_ptr;
+    *index_scan_ptr= rem_first_index_scan;
+    if (check_index_intersect_extension(curr, *rem_first_index_scan_ptr, &next))
+      find_index_intersect_best_extension(&next);
+    *index_scan_ptr= *rem_first_index_scan_ptr;
+    *rem_first_index_scan_ptr= rem_first_index_scan;
+  }
+}
+
+
+/*
+  Get the plan of the best intersection of range scans used to access a table    
+
+  SYNOPSIS
+    get_best_index_intersect()
+      param         common info about index ranges
+      tree          tree of ranges for indexes than can be intersected
+      read_time     cut off value for the evaluated plans 
+
+  DESCRIPTION
+    The function looks for the cheapest index intersection of the range
+    scans to access a table. The info about the ranges for all indexes
+    is provided by the range optimizer and is passed through the
+    parameters param and tree. Any plan whose cost is greater than read_time
+    is rejected. 
+    After the best index intersection is found the function constructs
+    the structure that manages the execution by the chosen plan.
+
+  RETURN
+    Pointer to the generated execution structure if a success,
+    0 - otherwise.
+*/
+
+static
+TRP_INDEX_INTERSECT *get_best_index_intersect(PARAM *param, SEL_TREE *tree,
+                                              double read_time)
+{
+  uint i;
+  uint count;
+  TRP_RANGE **cur_range;
+  TRP_RANGE **range_scans;
+  INDEX_SCAN_INFO *index_scan;
+  COMMON_INDEX_INTERSECT_INFO common;
+  PARTIAL_INDEX_INTERSECT_INFO init;
+  TRP_INDEX_INTERSECT *intersect_trp= NULL;
+  TABLE *table= param->table;
+  
+  
+  DBUG_ENTER("get_best_index_intersect");
+
+  if (prepare_search_best_index_intersect(param, tree, &common, &init,
+                                          read_time))
+    DBUG_RETURN(NULL);
+
+  find_index_intersect_best_extension(&init);
+
+  if (common.best_length <= 1 && !common.best_uses_cpk)
+    DBUG_RETURN(NULL);
+
+  if (common.best_uses_cpk)
+  {
+    memmove((char *) (common.best_intersect+1), (char *) common.best_intersect,
+            sizeof(INDEX_SCAN_INFO *) * common.best_length);
+    common.best_intersect[0]= common.cpk_scan;
+    common.best_length++;
+  }
+
+  count= common.best_length;
+
+  if (!(range_scans= (TRP_RANGE**)alloc_root(param->mem_root,
+                                            sizeof(TRP_RANGE *)*
+                                            count)))
+    DBUG_RETURN(NULL);
+
+  for (i= 0, cur_range= range_scans; i < count; i++)
+  {
+    index_scan= common.best_intersect[i];
+    if ((*cur_range= new (param->mem_root) TRP_RANGE(index_scan->sel_arg,
+                                                     index_scan->idx, 0)))
+    {  
+      TRP_RANGE *trp= *cur_range;  
+      trp->read_cost= index_scan->index_read_cost;  
+      trp->records= index_scan->records;        
+      trp->is_ror= FALSE;
+      trp->mrr_buf_size= 0;
+      table->intersect_keys.set_bit(index_scan->keynr);
+      cur_range++;
+    }
+  }
+  
+  count= tree->index_scans_end - tree->index_scans;
+  for (i= 0; i < count; i++)
+  {
+    index_scan= tree->index_scans[i]; 
+    if (!table->intersect_keys.is_set(index_scan->keynr))
+    {
+      for (uint j= 0; j < common.best_length; j++)
+      {
+	INDEX_SCAN_INFO *scan= common.best_intersect[j];
+        if (same_index_prefix(index_scan->key_info, scan->key_info,
+                              scan->used_key_parts))
+	{
+          table->intersect_keys.set_bit(index_scan->keynr);
+          break;
+        } 
+      }
+    }
+  }
+      
+  if ((intersect_trp= new (param->mem_root)TRP_INDEX_INTERSECT))
+  {
+    intersect_trp->read_cost= common.best_cost;
+    intersect_trp->records= common.best_records;
+    intersect_trp->range_scans= range_scans;
+    intersect_trp->range_scans_end= cur_range;
+    intersect_trp->filtered_scans= common.filtered_scans;
+  }
+  DBUG_RETURN(intersect_trp);
+}
+
+
+typedef struct st_ror_scan_info : INDEX_SCAN_INFO
+{ 
 } ROR_SCAN_INFO;
 
 
@@ -4006,7 +5728,7 @@ ROR_SCAN_INFO *make_ror_scan(const PARAM *param, int idx, SEL_ARG *sel_arg)
   ror_scan->key_rec_length= (param->table->key_info[keynr].key_length +
                              param->table->file->ref_length);
   ror_scan->sel_arg= sel_arg;
-  ror_scan->records= param->table->quick_rows[keynr];
+  ror_scan->records= param->quick_rows[keynr];
 
   if (!(bitmap_buf= (my_bitmap_map*) alloc_root(param->mem_root,
                                                 param->fields_bitmap_size)))
@@ -4026,8 +5748,7 @@ ROR_SCAN_INFO *make_ror_scan(const PARAM *param, int idx, SEL_ARG *sel_arg)
       bitmap_set_bit(&ror_scan->covered_fields, key_part->fieldnr-1);
   }
   ror_scan->index_read_cost=
-    param->table->file->keyread_time(ror_scan->keynr, 1,
-                                     param->table->quick_rows[ror_scan->keynr]);
+    param->table->file->keyread_time(ror_scan->keynr, 1, ror_scan->records);
   DBUG_RETURN(ror_scan);
 }
 
@@ -4312,7 +6033,7 @@ static double ror_scan_selectivity(const ROR_INTERSECT_INFO *info,
   }
   if (!prev_covered)
   {
-    double tmp= rows2double(info->param->table->quick_rows[scan->keynr]) /
+    double tmp= rows2double(info->param->quick_rows[scan->keynr]) /
                 rows2double(prev_records);
     DBUG_PRINT("info", ("Selectivity multiplier: %g", tmp));
     selectivity_mult *= tmp;
@@ -4391,7 +6112,7 @@ static bool ror_intersect_add(ROR_INTERSECT_INFO *info,
   }
   else
   {
-    info->index_records += info->param->table->quick_rows[ror_scan->keynr];
+    info->index_records += info->param->quick_rows[ror_scan->keynr];
     info->index_scan_costs += ror_scan->index_read_cost;
     bitmap_union(&info->covered_fields, &ror_scan->covered_fields);
     if (!info->is_covering && bitmap_is_subset(&info->param->needed_fields,
@@ -4501,7 +6222,6 @@ TRP_ROR_INTERSECT *get_best_ror_intersect(const PARAM *param, SEL_TREE *tree,
   ROR_SCAN_INFO **cur_ror_scan;
   ROR_SCAN_INFO *cpk_scan= NULL;
   uint cpk_no;
-  bool cpk_scan_used= FALSE;
 
   if (!(tree->ror_scans= (ROR_SCAN_INFO**)alloc_root(param->mem_root,
                                                      sizeof(ROR_SCAN_INFO*)*
@@ -4513,11 +6233,20 @@ TRP_ROR_INTERSECT *get_best_ror_intersect(const PARAM *param, SEL_TREE *tree,
   for (idx= 0, cur_ror_scan= tree->ror_scans; idx < param->keys; idx++)
   {
     ROR_SCAN_INFO *scan;
+    uint key_no;
     if (!tree->ror_scans_map.is_set(idx))
       continue;
+    key_no= param->real_keynr[idx];
+    if (key_no != cpk_no &&
+        param->table->file->index_flags(key_no,0,0) & HA_CLUSTERED_INDEX)
+    {
+      /* Ignore clustering keys */
+      tree->n_ror_scans--;
+      continue;
+    }
     if (!(scan= make_ror_scan(param, idx, tree->keys[idx])))
       return NULL;
-    if (param->real_keynr[idx] == cpk_no)
+    if (key_no == cpk_no)
     {
       cpk_scan= scan;
       tree->n_ror_scans--;
@@ -4603,15 +6332,14 @@ TRP_ROR_INTERSECT *get_best_ror_intersect(const PARAM *param, SEL_TREE *tree,
   {
     if (ror_intersect_add(intersect, cpk_scan, TRUE) && 
         (intersect->total_cost < min_cost))
-    {
-      cpk_scan_used= TRUE;
       intersect_best= intersect; //just set pointer here
-    }
   }
+  else
+    cpk_scan= 0;                                // Don't use cpk_scan
 
   /* Ok, return ROR-intersect plan if we have found one */
   TRP_ROR_INTERSECT *trp= NULL;
-  if (min_cost < read_time && (cpk_scan_used || best_num > 1))
+  if (min_cost < read_time && (cpk_scan || best_num > 1))
   {
     if (!(trp= new (param->mem_root) TRP_ROR_INTERSECT))
       DBUG_RETURN(trp);
@@ -4630,7 +6358,7 @@ TRP_ROR_INTERSECT *get_best_ror_intersect(const PARAM *param, SEL_TREE *tree,
     set_if_smaller(param->table->quick_condition_rows, best_rows);
     trp->records= best_rows;
     trp->index_scan_costs= intersect_best->index_scan_costs;
-    trp->cpk_scan= cpk_scan_used? cpk_scan: NULL;
+    trp->cpk_scan= cpk_scan;
     DBUG_PRINT("info", ("Returning non-covering ROR-intersect plan:"
                         "cost %g, records %lu",
                         trp->read_cost, (ulong) trp->records));
@@ -4642,7 +6370,7 @@ TRP_ROR_INTERSECT *get_best_ror_intersect(const PARAM *param, SEL_TREE *tree,
 /*
   Get best covering ROR-intersection.
   SYNOPSIS
-    get_best_covering_ror_intersect()
+    get_best_ntersectcovering_ror_intersect()
       param     Parameter from test_quick_select function.
       tree      SEL_TREE with sets of intervals for different keys.
       read_time Don't return table read plans with cost > read_time.
@@ -4814,11 +6542,12 @@ static TRP_RANGE *get_key_scans_params(PARAM *param, SEL_TREE *tree,
                                        bool update_tbl_stats,
                                        double read_time)
 {
-  int idx;
+  uint idx;
   SEL_ARG **key,**end, **key_to_read= NULL;
   ha_rows UNINIT_VAR(best_records);              /* protected by key_to_read */
+  uint    UNINIT_VAR(best_mrr_flags),            /* protected by key_to_read */
+          UNINIT_VAR(best_buf_size);             /* protected by key_to_read */
   TRP_RANGE* read_plan= NULL;
-  bool pk_is_clustered= param->table->file->primary_key_is_clustered();
   DBUG_ENTER("get_key_scans_params");
   /*
     Note that there may be trees that have type SEL_TREE::KEY but contain no
@@ -4829,14 +6558,23 @@ static TRP_RANGE *get_key_scans_params(PARAM *param, SEL_TREE *tree,
                                       "tree scans"););
   tree->ror_scans_map.clear_all();
   tree->n_ror_scans= 0;
-  for (idx= 0,key=tree->keys, end=key+param->keys;
-       key != end ;
-       key++,idx++)
+  tree->index_scans= 0;
+  if (!tree->keys_map.is_clear_all())
+  {
+    tree->index_scans=
+      (INDEX_SCAN_INFO **) alloc_root(param->mem_root,
+                                      sizeof(INDEX_SCAN_INFO *) * param->keys);
+  }
+  tree->index_scans_end= tree->index_scans;                                                  
+  for (idx= 0,key=tree->keys, end=key+param->keys; key != end; key++,idx++)
   {
-    ha_rows found_records;
-    double found_read_time;
     if (*key)
     {
+      ha_rows found_records;
+      COST_VECT cost;
+      double found_read_time;
+      uint mrr_flags, buf_size;
+      INDEX_SCAN_INFO *index_scan;
       uint keynr= param->real_keynr[idx];
       if ((*key)->type == SEL_ARG::MAYBE_KEY ||
           (*key)->maybe_flag)
@@ -4845,48 +6583,37 @@ static TRP_RANGE *get_key_scans_params(PARAM *param, SEL_TREE *tree,
       bool read_index_only= index_read_must_be_used ? TRUE :
                             (bool) param->table->covering_keys.is_set(keynr);
 
-      found_records= check_quick_select(param, idx, *key, update_tbl_stats);
-      if (param->is_ror_scan)
+      found_records= check_quick_select(param, idx, read_index_only, *key,
+                                        update_tbl_stats, &mrr_flags,
+                                        &buf_size, &cost);
+
+      if (found_records != HA_POS_ERROR && tree->index_scans &&
+          (index_scan= (INDEX_SCAN_INFO *)alloc_root(param->mem_root,
+						     sizeof(INDEX_SCAN_INFO))))
+      {
+        index_scan->idx= idx;
+        index_scan->keynr= keynr;
+        index_scan->key_info= &param->table->key_info[keynr];
+        index_scan->used_key_parts= param->max_key_part+1;
+        index_scan->range_count= param->range_count;
+        index_scan->records= found_records;
+        index_scan->sel_arg= *key;
+        *tree->index_scans_end++= index_scan;
+      }        
+      if ((found_records != HA_POS_ERROR) && param->is_ror_scan)
       {
         tree->n_ror_scans++;
         tree->ror_scans_map.set_bit(idx);
       }
-      double cpu_cost= (double) found_records / TIME_FOR_COMPARE;
-      if (found_records != HA_POS_ERROR && found_records > 2 &&
-          read_index_only &&
-          (param->table->file->index_flags(keynr, param->max_key_part,1) &
-           HA_KEYREAD_ONLY) &&
-          !(pk_is_clustered && keynr == param->table->s->primary_key))
-      {
-        /*
-          We can resolve this by only reading through this key. 
-          0.01 is added to avoid races between range and 'index' scan.
-        */
-        found_read_time= param->table->file->keyread_time(keynr, 1, found_records) +
-                         cpu_cost + 0.01;
-      }
-      else
-      {
-        /*
-          cost(read_through_index) = cost(disk_io) + cost(row_in_range_checks)
-          The row_in_range check is in QUICK_RANGE_SELECT::cmp_next function.
-        */
-	found_read_time= param->table->file->read_time(keynr,
-                                                       param->range_count,
-                                                       found_records) +
-			 cpu_cost + 0.01;
-      }
-      DBUG_PRINT("info",("key %s: found_read_time: %g (cur. read_time: %g)",
-                         param->table->key_info[keynr].name, found_read_time,
-                         read_time));
-
-      if (read_time > found_read_time && found_records != HA_POS_ERROR)
+      if (found_records != HA_POS_ERROR &&
+          read_time > (found_read_time= cost.total_cost()))
       {
         read_time=    found_read_time;
         best_records= found_records;
         key_to_read=  key;
+        best_mrr_flags= mrr_flags;
+        best_buf_size=  buf_size;
       }
-
     }
   }
 
@@ -4895,11 +6622,13 @@ static TRP_RANGE *get_key_scans_params(PARAM *param, SEL_TREE *tree,
   if (key_to_read)
   {
     idx= key_to_read - tree->keys;
-    if ((read_plan= new (param->mem_root) TRP_RANGE(*key_to_read, idx)))
+    if ((read_plan= new (param->mem_root) TRP_RANGE(*key_to_read, idx,
+                                                    best_mrr_flags)))
     {
       read_plan->records= best_records;
       read_plan->is_ror= tree->ror_scans_map.is_set(idx);
       read_plan->read_cost= read_time;
+      read_plan->mrr_buf_size= best_buf_size;
       DBUG_PRINT("info",
                  ("Returning range plan for key %s, cost %g, records %lu",
                   param->table->key_info[param->real_keynr[idx]].name,
@@ -4940,6 +6669,36 @@ QUICK_SELECT_I *TRP_INDEX_MERGE::make_quick(PARAM *param,
   return quick_imerge;
 }
 
+
+QUICK_SELECT_I *TRP_INDEX_INTERSECT::make_quick(PARAM *param,
+                                                bool retrieve_full_rows,
+                                                MEM_ROOT *parent_alloc)
+{
+  QUICK_INDEX_INTERSECT_SELECT *quick_intersect;
+  QUICK_RANGE_SELECT *quick;
+  /* index_merge always retrieves full rows, ignore retrieve_full_rows */
+  if (!(quick_intersect= new QUICK_INDEX_INTERSECT_SELECT(param->thd, param->table)))
+    return NULL;
+
+  quick_intersect->records= records;
+  quick_intersect->read_time= read_cost;
+  quick_intersect->filtered_scans= filtered_scans;
+  for (TRP_RANGE **range_scan= range_scans; range_scan != range_scans_end;
+       range_scan++)
+  {
+    if (!(quick= (QUICK_RANGE_SELECT*)
+          ((*range_scan)->make_quick(param, FALSE, &quick_intersect->alloc)))||
+        quick_intersect->push_quick_back(quick))
+    {
+      delete quick;
+      delete quick_intersect;
+      return NULL;
+    }
+  }
+  return quick_intersect;
+}
+
+
 QUICK_SELECT_I *TRP_ROR_INTERSECT::make_quick(PARAM *param,
                                               bool retrieve_full_rows,
                                               MEM_ROOT *parent_alloc)
@@ -4962,7 +6721,9 @@ QUICK_SELECT_I *TRP_ROR_INTERSECT::make_quick(PARAM *param,
     for (; first_scan != last_scan;++first_scan)
     {
       if (!(quick= get_quick_select(param, (*first_scan)->idx,
-                                    (*first_scan)->sel_arg, alloc)) ||
+                                    (*first_scan)->sel_arg,
+                                    HA_MRR_USE_DEFAULT_IMPL | HA_MRR_SORTED,
+                                    0, alloc)) ||
           quick_intrsect->push_quick_back(alloc, quick))
       {
         delete quick_intrsect;
@@ -4972,7 +6733,9 @@ QUICK_SELECT_I *TRP_ROR_INTERSECT::make_quick(PARAM *param,
     if (cpk_scan)
     {
       if (!(quick= get_quick_select(param, cpk_scan->idx,
-                                    cpk_scan->sel_arg, alloc)))
+                                    cpk_scan->sel_arg,
+                                    HA_MRR_USE_DEFAULT_IMPL | HA_MRR_SORTED,
+                                    0, alloc)))
       {
         delete quick_intrsect;
         DBUG_RETURN(NULL);
@@ -5385,11 +7148,10 @@ static SEL_TREE *get_full_func_mm_tree(RANGE_OPT_PARAM *param,
   Item_equal *item_equal= field_item->item_equal;
   if (item_equal)
   {
-    Item_equal_iterator it(*item_equal);
-    Item_field *item;
-    while ((item= it++))
+    Item_equal_fields_iterator it(*item_equal);
+    while (it++)
     {
-      Field *f= item->field;
+      Field *f= it.get_curr_field();
       if (field->eq(f))
         continue;
       if (!((ref_tables | f->table->map) & param_comp))
@@ -5452,7 +7214,7 @@ static SEL_TREE *get_mm_tree(RANGE_OPT_PARAM *param,COND *cond)
     DBUG_RETURN(tree);
   }
   /* Here when simple cond */
-  if (cond->const_item())
+  if (cond->const_item() && !cond->is_expensive())
   {
     /*
       During the cond->val_int() evaluation we can come across a subselect 
@@ -5538,13 +7300,13 @@ static SEL_TREE *get_mm_tree(RANGE_OPT_PARAM *param,COND *cond)
   case Item_func::MULT_EQUAL_FUNC:
   {
     Item_equal *item_equal= (Item_equal *) cond;    
-    if (!(value= item_equal->get_const()))
+    if (!(value= item_equal->get_const()) || value->is_expensive())
       DBUG_RETURN(0);
-    Item_equal_iterator it(*item_equal);
+    Item_equal_fields_iterator it(*item_equal);
     ref_tables= value->used_tables();
-    while ((field_item= it++))
+    while (it++)
     {
-      Field *field= field_item->field;
+      Field *field= it.get_curr_field();
       Item_result cmp_type= field->cmp_type();
       if (!((ref_tables | field->table->map) & param_comp))
       {
@@ -5571,6 +7333,9 @@ static SEL_TREE *get_mm_tree(RANGE_OPT_PARAM *param,COND *cond)
     }
     else
       DBUG_RETURN(0);
+    if (value && value->is_expensive())
+      DBUG_RETURN(0);
+
     ftree= get_full_func_mm_tree(param, cond_func, field_item, value, inv);
   }
 
@@ -5619,6 +7384,7 @@ get_mm_parts(RANGE_OPT_PARAM *param, COND *cond_func, Field *field,
 	  DBUG_RETURN(0);			// OOM
       }
       sel_arg->part=(uchar) key_part->part;
+      sel_arg->max_part_no= sel_arg->part+1;
       tree->keys[key_part->key]=sel_add(tree->keys[key_part->key],sel_arg);
       tree->keys_map.set_bit(key_part->key);
     }
@@ -5639,7 +7405,6 @@ get_mm_leaf(RANGE_OPT_PARAM *param, COND *conf_func, Field *field,
   SEL_ARG *tree= 0;
   MEM_ROOT *alloc= param->mem_root;
   uchar *str;
-  ulong orig_sql_mode;
   int err;
   DBUG_ENTER("get_mm_leaf");
 
@@ -5807,16 +7572,8 @@ get_mm_leaf(RANGE_OPT_PARAM *param, COND *conf_func, Field *field,
     We can't always use indexes when comparing a string index to a number
     cmp_type() is checked to allow compare of dates to numbers
   */
-  if (field->result_type() == STRING_RESULT &&
-      value->result_type() != STRING_RESULT &&
-      field->cmp_type() != value->result_type())
+  if (field->cmp_type() == STRING_RESULT && value->cmp_type() != STRING_RESULT)
     goto end;
-  /* For comparison purposes allow invalid dates like 2000-01-32 */
-  orig_sql_mode= field->table->in_use->variables.sql_mode;
-  if (value->real_item()->type() == Item::STRING_ITEM &&
-      (field->type() == MYSQL_TYPE_DATE ||
-       field->type() == MYSQL_TYPE_DATETIME))
-    field->table->in_use->variables.sql_mode|= MODE_INVALID_DATES;
   err= value->save_in_field_no_warnings(field, 1);
   if (err > 0)
   {
@@ -5828,7 +7585,6 @@ get_mm_leaf(RANGE_OPT_PARAM *param, COND *conf_func, Field *field,
       {
         tree= new (alloc) SEL_ARG(field, 0, 0);
         tree->type= SEL_ARG::IMPOSSIBLE;
-        field->table->in_use->variables.sql_mode= orig_sql_mode;
         goto end;
       }
       else
@@ -5862,10 +7618,7 @@ get_mm_leaf(RANGE_OPT_PARAM *param, COND *conf_func, Field *field,
           */
         }
         else
-        {
-          field->table->in_use->variables.sql_mode= orig_sql_mode;
           goto end;
-        }
       }
     }
 
@@ -5888,12 +7641,10 @@ get_mm_leaf(RANGE_OPT_PARAM *param, COND *conf_func, Field *field,
   }
   else if (err < 0)
   {
-    field->table->in_use->variables.sql_mode= orig_sql_mode;
     /* This happens when we try to insert a NULL field in a not null column */
     tree= &null_element;                        // cmp with NULL is never TRUE
     goto end;
   }
-  field->table->in_use->variables.sql_mode= orig_sql_mode;
 
   /*
     Any sargable predicate except "<=>" involving NULL as a constant is always
@@ -6068,13 +7819,138 @@ sel_add(SEL_ARG *key1,SEL_ARG *key2)
   return root;
 }
 
-#define CLONE_KEY1_MAYBE 1
-#define CLONE_KEY2_MAYBE 2
-#define swap_clone_flag(A) ((A & 1) << 1) | ((A & 2) >> 1)
 
+/* 
+  Build a range tree for the conjunction of the range parts of two trees
 
-static SEL_TREE *
-tree_and(RANGE_OPT_PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2)
+  SYNOPSIS
+    and_range_trees()
+      param           Context info for the operation
+      tree1           SEL_TREE for the first conjunct          
+      tree2           SEL_TREE for the second conjunct
+      result          SEL_TREE for the result
+
+  DESCRIPTION
+    This function takes range parts of two trees tree1 and tree2 and builds
+    a range tree for the conjunction of the formulas that these two range parts
+    represent.
+    More exactly: 
+    if the range part of tree1 represents the normalized formula 
+      R1_1 AND ... AND R1_k,
+    and the range part of tree2 represents the normalized formula
+      R2_1 AND ... AND R2_k,
+    then the range part of the result represents the formula:
+     RT = R_1 AND ... AND R_k, where R_i=(R1_i AND R2_i) for each i from [1..k]
+
+    The function assumes that tree1 is never equal to tree2. At the same
+    time the tree result can be the same as tree1 (but never as tree2).
+    If result==tree1 then rt replaces the range part of tree1 leaving
+    imerges as they are.
+    if result!=tree1 than it is assumed that the SEL_ARG trees in tree1 and
+    tree2 should be preserved. Otherwise they can be destroyed.
+
+  RETURN 
+    1    if the type the result tree is  SEL_TREE::IMPOSSIBLE
+    0    otherwise    
+*/
+
+static
+int and_range_trees(RANGE_OPT_PARAM *param, SEL_TREE *tree1, SEL_TREE *tree2,
+                    SEL_TREE *result)
+{
+  DBUG_ENTER("and_ranges");
+  key_map  result_keys;
+  result_keys.clear_all();
+  key_map anded_keys= tree1->keys_map;
+  anded_keys.merge(tree2->keys_map);
+  int key_no;
+  key_map::Iterator it(anded_keys);
+  while ((key_no= it++) != key_map::Iterator::BITMAP_END)
+  {
+    uint flag=0;
+    SEL_ARG *key1= tree1->keys[key_no];
+    SEL_ARG *key2= tree2->keys[key_no];
+    if (key1 && !key1->simple_key())
+      flag|= CLONE_KEY1_MAYBE;
+    if (key2 && !key2->simple_key())
+      flag|=CLONE_KEY2_MAYBE;
+    if (result != tree1)
+    { 
+      if (key1)
+        key1->incr_refs();
+      if (key2)
+        key2->incr_refs();
+    }
+    SEL_ARG *key;
+    if ((result->keys[key_no]= key =key_and(param, key1, key2, flag)))
+    {
+      if (key && key->type == SEL_ARG::IMPOSSIBLE)
+      {
+	result->type= SEL_TREE::IMPOSSIBLE;
+        DBUG_RETURN(1);
+      }
+      result_keys.set_bit(key_no);
+#ifdef EXTRA_DEBUG
+      if (param->alloced_sel_args < SEL_ARG::MAX_SEL_ARGS) 
+        key->test_use_count(key);
+#endif
+    }
+  }
+  result->keys_map= result_keys;
+  DBUG_RETURN(0);
+}
+  
+
+/*
+  Build a SEL_TREE for a conjunction out of such trees for the conjuncts
+
+  SYNOPSIS
+    tree_and()
+      param           Context info for the operation
+      tree1           SEL_TREE for the first conjunct          
+      tree2           SEL_TREE for the second conjunct
+
+  DESCRIPTION
+    This function builds a tree for the formula (A AND B) out of the trees
+    tree1 and tree2 that has been built for the formulas A and B respectively.
+
+    In a general case
+      tree1 represents the formula RT1 AND MT1,
+        where RT1 = R1_1 AND ... AND R1_k1, MT1=M1_1 AND ... AND M1_l1;
+      tree2 represents the formula RT2 AND MT2 
+        where RT2 = R2_1 AND ... AND R2_k2, MT2=M2_1 and ... and M2_l2.
+
+    The result tree will represent the formula of the the following structure:
+      RT AND MT1 AND MT2 AND RT1MT2 AND RT2MT1, such that
+        rt is a tree obtained by range intersection of trees tree1 and tree2,
+        RT1MT2 = RT1M2_1 AND ... AND RT1M2_l2,
+        RT2MT1 = RT2M1_1 AND ... AND RT2M1_l1,
+        where rt1m2_i (i=1,...,l2) is the result of the pushdown operation
+        of range tree rt1 into imerge m2_i, while rt2m1_j (j=1,...,l1) is the
+        result of the pushdown operation of range tree rt2 into imerge m1_j.
+
+    RT1MT2/RT2MT is empty if MT2/MT1 is empty.
+ 
+    The range intersection of two range trees is produced by the function
+    and_range_trees. The pushdown of a range tree to a imerge is performed
+    by the function imerge_list_and_tree. This function may produce imerges
+    containing only one range tree. Such trees are intersected with rt and 
+    the result of intersection is returned as the range part of the result
+    tree, while the corresponding imerges are removed altogether from its
+    imerge part. 
+    
+  NOTE.
+    The pushdown operation of range trees into imerges is needed to be able
+    to construct valid imerges for the condition like this:
+      key1_p1=c1 AND (key1_p2 BETWEEN c21 AND c22 OR key2 < c2)
+
+  RETURN
+    The result tree, if a success
+    0 - otherwise.        
+*/
+
+static 
+SEL_TREE *tree_and(RANGE_OPT_PARAM *param, SEL_TREE *tree1, SEL_TREE *tree2)
 {
   DBUG_ENTER("tree_and");
   if (!tree1)
@@ -6096,87 +7972,216 @@ tree_and(RANGE_OPT_PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2)
     tree1->type=SEL_TREE::KEY_SMALLER;
     DBUG_RETURN(tree1);
   }
-  key_map  result_keys;
-  result_keys.clear_all();
-  
-  /* Join the trees key per key */
-  SEL_ARG **key1,**key2,**end;
-  for (key1= tree1->keys,key2= tree2->keys,end=key1+param->keys ;
-       key1 != end ; key1++,key2++)
+
+  if (!tree1->merges.is_empty())
+    imerge_list_and_tree(param, &tree1->merges, tree2);
+  if (!tree2->merges.is_empty())
+    imerge_list_and_tree(param, &tree2->merges, tree1);
+  if (and_range_trees(param, tree1, tree2, tree1))
+    DBUG_RETURN(tree1);
+  imerge_list_and_list(&tree1->merges, &tree2->merges);
+  eliminate_single_tree_imerges(param, tree1);
+  DBUG_RETURN(tree1);
+}
+
+
+/*
+  Eliminate single tree imerges in a SEL_TREE objects
+
+  SYNOPSIS
+    eliminate_single_tree_imerges()
+      param      Context info for the function
+      tree       SEL_TREE where single tree imerges are to be eliminated 
+
+  DESCRIPTION
+    For each imerge in 'tree' that contains only one disjunct tree, i.e.
+    for any imerge of the form m=rt, the function performs and operation
+    the range part of tree, replaces rt the with the result of anding and
+    removes imerge m from the the merge part of 'tree'.
+
+  RETURN VALUE
+    none          
+*/
+
+static
+void eliminate_single_tree_imerges(RANGE_OPT_PARAM *param, SEL_TREE *tree)
+{
+  SEL_IMERGE *imerge;
+  List<SEL_IMERGE> merges= tree->merges;
+  List_iterator<SEL_IMERGE> it(merges);
+  tree->merges.empty();
+  while ((imerge= it++))
   {
-    uint flag=0;
-    if (*key1 || *key2)
-    {
-      if (*key1 && !(*key1)->simple_key())
-	flag|=CLONE_KEY1_MAYBE;
-      if (*key2 && !(*key2)->simple_key())
-	flag|=CLONE_KEY2_MAYBE;
-      *key1=key_and(param, *key1, *key2, flag);
-      if (*key1 && (*key1)->type == SEL_ARG::IMPOSSIBLE)
-      {
-	tree1->type= SEL_TREE::IMPOSSIBLE;
-        DBUG_RETURN(tree1);
-      }
-      result_keys.set_bit(key1 - tree1->keys);
-#ifdef EXTRA_DEBUG
-        if (*key1 && param->alloced_sel_args < SEL_ARG::MAX_SEL_ARGS) 
-          (*key1)->test_use_count(*key1);
-#endif
+    if (imerge->trees+1 == imerge->trees_next)
+    {
+      tree= tree_and(param, tree, *imerge->trees);
+      it.remove();
     }
   }
-  tree1->keys_map= result_keys;
-  /* dispose index_merge if there is a "range" option */
-  if (!result_keys.is_clear_all())
-  {
-    tree1->merges.empty();
-    DBUG_RETURN(tree1);
-  }
+  tree->merges= merges;
+} 
 
-  /* ok, both trees are index_merge trees */
-  imerge_list_and_list(&tree1->merges, &tree2->merges);
-  DBUG_RETURN(tree1);
+
+/*
+  For two trees check that there are indexes with ranges in both of them  
+ 
+  SYNOPSIS
+    sel_trees_have_common_keys()
+      tree1           SEL_TREE for the first tree
+      tree2           SEL_TREE for the second tree
+      common_keys OUT bitmap of all indexes with ranges in both trees
+
+  DESCRIPTION
+    For two trees tree1 and tree1 the function checks if there are indexes
+    in their range parts such that SEL_ARG trees are defined for them in the
+    range parts of both trees. The function returns the bitmap of such 
+    indexes in the parameter common_keys.
+
+  RETURN 
+    TRUE    if there are such indexes (common_keys is nor empty)
+    FALSE   otherwise
+*/
+
+static
+bool sel_trees_have_common_keys(SEL_TREE *tree1, SEL_TREE *tree2, 
+                                key_map *common_keys)
+{
+  *common_keys= tree1->keys_map;
+  common_keys->intersect(tree2->keys_map);
+  return !common_keys->is_clear_all();
 }
 
 
 /*
-  Check if two SEL_TREES can be combined into one (i.e. a single key range
-  read can be constructed for "cond_of_tree1 OR cond_of_tree2" ) without
-  using index_merge.
+  Check whether range parts of two trees can be ored for some indexes
+
+  SYNOPSIS
+    sel_trees_can_be_ored()
+      param              Context info for the function
+      tree1              SEL_TREE for the first tree
+      tree2              SEL_TREE for the second tree
+      common_keys IN/OUT IN: bitmap of all indexes with SEL_ARG in both trees
+                        OUT: bitmap of all indexes that can be ored
+
+  DESCRIPTION
+    For two trees tree1 and tree2 and the bitmap common_keys containing
+    bits for indexes that have SEL_ARG trees in range parts of both trees
+    the function checks if there are indexes for which SEL_ARG trees can
+    be ored. Two SEL_ARG trees for the same index can be ored if the most
+    major components of the index used in these trees coincide. If the 
+    SEL_ARG trees for an index cannot be ored the function clears the bit
+    for this index in the bitmap common_keys.
+
+    The function does not verify that indexes marked in common_keys really
+    have SEL_ARG trees in both tree1 and tree2. It assumes that this is true.
+
+  NOTE
+    The function sel_trees_can_be_ored is usually used in pair with the
+    function sel_trees_have_common_keys.
+
+  RETURN
+    TRUE    if there are indexes for which SEL_ARG trees can be ored 
+    FALSE   otherwise
 */
 
-bool sel_trees_can_be_ored(SEL_TREE *tree1, SEL_TREE *tree2, 
-                           RANGE_OPT_PARAM* param)
+static
+bool sel_trees_can_be_ored(RANGE_OPT_PARAM* param,
+                           SEL_TREE *tree1, SEL_TREE *tree2, 
+                           key_map *common_keys)
 {
-  key_map common_keys= tree1->keys_map;
   DBUG_ENTER("sel_trees_can_be_ored");
-  common_keys.intersect(tree2->keys_map);
+  if (!sel_trees_have_common_keys(tree1, tree2, common_keys))
+    DBUG_RETURN(FALSE);
+  int key_no;
+  key_map::Iterator it(*common_keys);
+  while ((key_no= it++) != key_map::Iterator::BITMAP_END)
+  {
+    DBUG_ASSERT(tree1->keys[key_no] && tree2->keys[key_no]);
+    /* Trees have a common key, check if they refer to the same key part */
+    if (tree1->keys[key_no]->part != tree2->keys[key_no]->part)
+      common_keys->clear_bit(key_no);
+  }
+  DBUG_RETURN(!common_keys->is_clear_all());
+}
+
+/*
+  Check whether range parts of two trees must be ored for some indexes
+
+  SYNOPSIS
+    sel_trees_must_be_ored()
+      param              Context info for the function
+      tree1              SEL_TREE for the first tree
+      tree2              SEL_TREE for the second tree
+      ordable_keys       bitmap of SEL_ARG trees that can be ored
+
+  DESCRIPTION
+    For two trees tree1 and tree2 the function checks whether they must be
+    ored. The function assumes that the bitmap ordable_keys contains bits for
+    those corresponding pairs of SEL_ARG trees from tree1 and tree2 that can
+    be ored.
+    We believe that tree1 and tree2 must be ored if any pair of SEL_ARG trees
+    r1 and r2, such that r1 is from tree1 and r2 is from tree2 and both
+    of them are marked in ordable_keys, can be merged.
+    
+  NOTE
+    The function sel_trees_must_be_ored as a rule is used in pair with the
+    function sel_trees_can_be_ored.
+
+  RETURN
+    TRUE    if there are indexes for which SEL_ARG trees must be ored 
+    FALSE   otherwise
+*/
+
+static
+bool sel_trees_must_be_ored(RANGE_OPT_PARAM* param,
+                            SEL_TREE *tree1, SEL_TREE *tree2,
+                            key_map oredable_keys)
+{
+  key_map tmp;
+  DBUG_ENTER("sel_trees_must_be_ored");
 
-  if (common_keys.is_clear_all())
+  tmp= tree1->keys_map;
+  tmp.merge(tree2->keys_map);
+  tmp.subtract(oredable_keys);
+  if (!tmp.is_clear_all())
     DBUG_RETURN(FALSE);
 
-  /* trees have a common key, check if they refer to same key part */
-  SEL_ARG **key1,**key2;
-  for (uint key_no=0; key_no < param->keys; key_no++)
+  int idx1, idx2;
+  key_map::Iterator it1(oredable_keys);
+  while ((idx1= it1++) != key_map::Iterator::BITMAP_END)
   {
-    if (common_keys.is_set(key_no))
+    KEY_PART *key1_init= param->key[idx1]+tree1->keys[idx1]->part;
+    KEY_PART *key1_end= param->key[idx1]+tree1->keys[idx1]->max_part_no;
+    key_map::Iterator it2(oredable_keys);
+    while ((idx2= it2++) != key_map::Iterator::BITMAP_END)
     {
-      key1= tree1->keys + key_no;
-      key2= tree2->keys + key_no;
-      if ((*key1)->part == (*key2)->part)
-      {
-        DBUG_RETURN(TRUE);
+      if (idx2 <= idx1)
+        continue;
+      
+      KEY_PART *key2_init= param->key[idx2]+tree2->keys[idx2]->part;
+      KEY_PART *key2_end= param->key[idx2]+tree2->keys[idx2]->max_part_no;
+      KEY_PART *part1, *part2;
+      for (part1= key1_init, part2= key2_init;
+           part1 < key1_end && part2 < key2_end;
+           part1++, part2++)
+      { 
+        if (!part1->field->eq(part2->field))
+          DBUG_RETURN(FALSE);
       }
     }
   }
-  DBUG_RETURN(FALSE);
-}
+      
+  DBUG_RETURN(TRUE);
+}  
 
 
 /*
-  Remove the trees that are not suitable for record retrieval.
+  Remove the trees that are not suitable for record retrieval
+
   SYNOPSIS
-    param  Range analysis parameter
-    tree   Tree to be processed, tree->type is KEY or KEY_SMALLER
+    remove_nonrange_trees()
+      param  Context info for the function
+      tree   Tree to be processed, tree->type is KEY or KEY_SMALLER
  
   DESCRIPTION
     This function walks through tree->keys[] and removes the SEL_ARG* trees
@@ -6187,41 +8192,36 @@ bool sel_trees_can_be_ored(SEL_TREE *tree1, SEL_TREE *tree2,
 
     A SEL_ARG* tree cannot be used to construct quick select if it has
     tree->part != 0. (e.g. it could represent "keypart2 < const").
-
-    WHY THIS FUNCTION IS NEEDED
     
     Normally we allow construction of SEL_TREE objects that have SEL_ARG
-    trees that do not allow quick range select construction. For example for
-    " keypart1=1 AND keypart2=2 " the execution will proceed as follows:
+    trees that do not allow quick range select construction.
+    For example:
+    for " keypart1=1 AND keypart2=2 " the execution will proceed as follows:
     tree1= SEL_TREE { SEL_ARG{keypart1=1} }
     tree2= SEL_TREE { SEL_ARG{keypart2=2} } -- can't make quick range select
                                                from this
     call tree_and(tree1, tree2) -- this joins SEL_ARGs into a usable SEL_ARG
                                    tree.
-    
-    There is an exception though: when we construct index_merge SEL_TREE,
-    any SEL_ARG* tree that cannot be used to construct quick range select can
-    be removed, because current range analysis code doesn't provide any way
-    that tree could be later combined with another tree.
-    Consider an example: we should not construct
-    st1 = SEL_TREE { 
-      merges = SEL_IMERGE { 
-                            SEL_TREE(t.key1part1 = 1), 
-                            SEL_TREE(t.key2part2 = 2)   -- (*)
-                          } 
-                   };
-    because 
-     - (*) cannot be used to construct quick range select, 
-     - There is no execution path that would cause (*) to be converted to 
-       a tree that could be used.
-
-    The latter is easy to verify: first, notice that the only way to convert
-    (*) into a usable tree is to call tree_and(something, (*)).
-
-    Second look at what tree_and/tree_or function would do when passed a
-    SEL_TREE that has the structure like st1 tree has, and conlcude that 
-    tree_and(something, (*)) will not be called.
 
+    Another example:
+    tree3= SEL_TREE { SEL_ARG{key1part1 = 1} }
+    tree4= SEL_TREE { SEL_ARG{key2part2 = 2} }  -- can't make quick range select
+                                               from this
+    call tree_or(tree3, tree4) -- creates a SEL_MERGE ot of which no index
+    merge can be constructed, but it is potentially useful, as anding it with
+    tree5= SEL_TREE { SEL_ARG{key2part1 = 3} } creates an index merge that
+    represents the formula
+      key1part1=1 AND key2part1=3 OR key2part1=3 AND key2part2=2 
+    for which an index merge can be built. 
+
+    Any final SEL_TREE may contain SEL_ARG trees for which no quick select
+    can be built. Such SEL_ARG trees should be removed from the range part
+    before different range scans are evaluated. Such SEL_ARG trees also should
+    be removed from all range trees of each index merge before different
+    possible index merge plans are evaluated. If after this removal one
+    of the range trees in the index merge becomes empty the whole index merge
+    must be discarded.
+       
   RETURN
     0  Ok, some suitable trees left
     1  No tree->keys[] left.
@@ -6247,6 +8247,74 @@ static bool remove_nonrange_trees(RANGE_OPT_PARAM *param, SEL_TREE *tree)
 }
 
 
+/*
+  Build a SEL_TREE for a disjunction out of such trees for the disjuncts
+
+  SYNOPSIS
+    tree_or()
+      param           Context info for the operation
+      tree1           SEL_TREE for the first disjunct          
+      tree2           SEL_TREE for the second disjunct
+
+  DESCRIPTION
+    This function builds a tree for the formula (A OR B) out of the trees
+    tree1 and tree2 that has been built for the formulas A and B respectively.
+
+    In a general case
+      tree1 represents the formula RT1 AND MT1,
+        where RT1=R1_1 AND ... AND R1_k1, MT1=M1_1 AND ... AND M1_l1;
+      tree2 represents the formula RT2 AND MT2 
+        where RT2=R2_1 AND ... AND R2_k2, MT2=M2_1 and ... and M2_l2.
+
+    The function constructs the result tree according the formula
+      (RT1 OR RT2) AND (MT1 OR RT1) AND (MT2 OR RT2) AND (MT1 OR MT2)
+    that is equivalent to the formula (RT1 AND MT1) OR (RT2 AND MT2).
+
+    To limit the number of produced imerges the function considers
+    a weaker formula than the original one:
+      (RT1 AND M1_1) OR (RT2 AND M2_1) 
+    that is equivalent to:
+      (RT1 OR RT2)                  (1)
+        AND 
+      (M1_1 OR M2_1)                (2)
+        AND
+      (M1_1 OR RT2)                 (3)
+        AND
+      (M2_1 OR RT1)                 (4)
+
+    For the first conjunct (1) the function builds a tree with a range part
+    and, possibly, one imerge. For the other conjuncts (2-4)the function
+    produces sets of imerges. All constructed imerges are included into the
+    result tree.
+    
+    For the formula (1) the function produces the tree representing a formula  
+    of the structure RT [AND M], such that:
+     - the range tree rt contains the result of oring SEL_ARG trees from rt1
+       and rt2
+     - the imerge m consists of two range trees rt1 and rt2.
+    The imerge m is added if it's not true that rt1 and rt2 must be ored
+    If rt1 and rt2 can't be ored rt is empty and only m is produced for (1).
+
+    To produce imerges for the formula (2) the function calls the function
+    imerge_list_or_list passing it the merge parts of tree1 and tree2 as
+    parameters.
+
+    To produce imerges for the formula (3) the function calls the function
+    imerge_list_or_tree passing it the imerge m1_1 and the range tree rt2 as
+    parameters. Similarly, to produce imerges for the formula (4) the function
+    calls the function imerge_list_or_tree passing it the imerge m2_1 and the
+    range tree rt1.
+
+    If rt1 is empty then the trees for (1) and (4) are empty.
+    If rt2 is empty then the trees for (1) and (3) are empty.
+    If mt1 is empty then the trees for (2) and (3) are empty.
+    If mt2 is empty then the trees for (2) and (4) are empty.
+
+  RETURN
+    The result tree for the operation if a success
+    0 - otherwise
+*/
+
 static SEL_TREE *
 tree_or(RANGE_OPT_PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2)
 {
@@ -6262,74 +8330,100 @@ tree_or(RANGE_OPT_PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2)
   if (tree2->type == SEL_TREE::MAYBE)
     DBUG_RETURN(tree2);
 
-  SEL_TREE *result= 0;
-  key_map  result_keys;
-  result_keys.clear_all();
-  if (sel_trees_can_be_ored(tree1, tree2, param))
+  SEL_TREE *result= NULL;
+  key_map result_keys;
+  key_map ored_keys;
+  SEL_TREE *rtree[2]= {NULL,NULL};
+  SEL_IMERGE *imerge[2]= {NULL, NULL};
+  bool no_ranges1= tree1->without_ranges();
+  bool no_ranges2= tree2->without_ranges();
+  bool no_merges1= tree1->without_imerges();
+  bool no_merges2= tree2->without_imerges();
+  if (!no_ranges1 && !no_merges2)
   {
-    /* Join the trees key per key */
-    SEL_ARG **key1,**key2,**end;
-    for (key1= tree1->keys,key2= tree2->keys,end= key1+param->keys ;
-         key1 != end ; key1++,key2++)
-    {
-      *key1=key_or(param, *key1, *key2);
-      if (*key1)
-      {
-        result=tree1;				// Added to tree1
-        result_keys.set_bit(key1 - tree1->keys);
-#ifdef EXTRA_DEBUG
-        if (param->alloced_sel_args < SEL_ARG::MAX_SEL_ARGS) 
-          (*key1)->test_use_count(*key1);
-#endif
-      }
-    }
-    if (result)
-      result->keys_map= result_keys;
+    rtree[0]= new SEL_TREE(tree1, TRUE, param);
+    imerge[1]= new SEL_IMERGE(tree2->merges.head(), 0, param);
   }
-  else
+  if (!no_ranges2 && !no_merges1)
   {
-    /* ok, two trees have KEY type but cannot be used without index merge */
-    if (tree1->merges.is_empty() && tree2->merges.is_empty())
+    rtree[1]= new SEL_TREE(tree2, TRUE, param);
+    imerge[0]= new SEL_IMERGE(tree1->merges.head(), 0, param);
+  }
+  bool no_imerge_from_ranges= FALSE;
+  if (!(result= new SEL_TREE()))
+    DBUG_RETURN(result);
+
+  /* Build the range part of the tree for the formula (1) */ 
+  if (sel_trees_can_be_ored(param, tree1, tree2, &ored_keys))
+  {
+    bool must_be_ored= sel_trees_must_be_ored(param, tree1, tree2, ored_keys);
+    no_imerge_from_ranges= must_be_ored;
+    key_map::Iterator it(ored_keys);
+    int key_no;
+    while ((key_no= it++) != key_map::Iterator::BITMAP_END)
     {
-      if (param->remove_jump_scans)
+      SEL_ARG *key1= tree1->keys[key_no];
+      SEL_ARG *key2= tree2->keys[key_no];
+      if (!must_be_ored)
       {
-        bool no_trees= remove_nonrange_trees(param, tree1);
-        no_trees= no_trees || remove_nonrange_trees(param, tree2);
-        if (no_trees)
-          DBUG_RETURN(new SEL_TREE(SEL_TREE::ALWAYS));
+        key1->incr_refs();
+        key2->incr_refs();
       }
-      SEL_IMERGE *merge;
-      /* both trees are "range" trees, produce new index merge structure */
-      if (!(result= new SEL_TREE()) || !(merge= new SEL_IMERGE()) ||
-          (result->merges.push_back(merge)) ||
-          (merge->or_sel_tree(param, tree1)) ||
-          (merge->or_sel_tree(param, tree2)))
-        result= NULL;
-      else
-        result->type= tree1->type;
-    }
-    else if (!tree1->merges.is_empty() && !tree2->merges.is_empty())
-    {
-      if (imerge_list_or_list(param, &tree1->merges, &tree2->merges))
-        result= new SEL_TREE(SEL_TREE::ALWAYS);
-      else
-        result= tree1;
+      if ((result->keys[key_no]= key_or(param, key1, key2)))
+        result->keys_map.set_bit(key_no);
     }
-    else
-    {
-      /* one tree is index merge tree and another is range tree */
-      if (tree1->merges.is_empty())
-        swap_variables(SEL_TREE*, tree1, tree2);
+    result->type= tree1->type;
+  }
       
-      if (param->remove_jump_scans && remove_nonrange_trees(param, tree2))
-         DBUG_RETURN(new SEL_TREE(SEL_TREE::ALWAYS));
-      /* add tree2 to tree1->merges, checking if it collapses to ALWAYS */
-      if (imerge_list_or_tree(param, &tree1->merges, tree2))
-        result= new SEL_TREE(SEL_TREE::ALWAYS);
-      else
-        result= tree1;
-    }
+  if (no_imerge_from_ranges && no_merges1 && no_merges2)
+  {
+    if (result->keys_map.is_clear_all())
+      result->type= SEL_TREE::ALWAYS;
+    DBUG_RETURN(result);
+  }
+
+  SEL_IMERGE *imerge_from_ranges;
+  if (!(imerge_from_ranges= new SEL_IMERGE()))
+    result= NULL;
+  else if (!no_ranges1 && !no_ranges2 && !no_imerge_from_ranges)
+  {
+    /* Build the imerge part of the tree for the formula (1) */
+    SEL_TREE *rt1= tree1;
+    SEL_TREE *rt2= tree2;
+    if (no_merges1)
+      rt1= new SEL_TREE(tree1, TRUE, param);
+    if (no_merges2)
+      rt2= new SEL_TREE(tree2, TRUE, param);
+    if (!rt1 || !rt2 ||
+        result->merges.push_back(imerge_from_ranges) ||
+        imerge_from_ranges->or_sel_tree(param, rt1) ||
+        imerge_from_ranges->or_sel_tree(param, rt2))
+      result= NULL;
+  }
+  if (!result)
+    DBUG_RETURN(result);
+
+  result->type= tree1->type;
+
+  if (!no_merges1 && !no_merges2 && 
+      !imerge_list_or_list(param, &tree1->merges, &tree2->merges))
+  {
+    /* Build the imerges for the formula (2) */
+    imerge_list_and_list(&result->merges, &tree1->merges);
   }
+
+  /* Build the imerges for the formulas (3) and (4) */
+  for (uint i=0; i < 2; i++)
+  {
+    List<SEL_IMERGE> merges;
+    SEL_TREE *rt= rtree[i];
+    SEL_IMERGE *im= imerge[1-i];
+    
+    if (rt && im && !merges.push_back(im) && 
+        !imerge_list_or_tree(param, &merges, rt))
+      imerge_list_and_list(&result->merges, &merges);
+  }
+ 
   DBUG_RETURN(result);
 }
 
@@ -6375,6 +8469,7 @@ and_all_keys(RANGE_OPT_PARAM *param, SEL_ARG *key1, SEL_ARG *key2,
   if (!key1)
     return &null_element;			// Impossible ranges
   key1->use_count++;
+  key1->max_part_no= max(key2->max_part_no, key2->part+1);
   return key1;
 }
 
@@ -6467,6 +8562,7 @@ key_and(RANGE_OPT_PARAM *param, SEL_ARG *key1, SEL_ARG *key2, uint clone_flag)
   key1->use_count--;
   key2->use_count--;
   SEL_ARG *e1=key1->first(), *e2=key2->first(), *new_tree=0;
+  uint max_part_no= max(key1->max_part_no, key2->max_part_no);
 
   while (e1 && e2)
   {
@@ -6504,6 +8600,7 @@ key_and(RANGE_OPT_PARAM *param, SEL_ARG *key1, SEL_ARG *key2, uint clone_flag)
   key2->free_tree();
   if (!new_tree)
     return &null_element;			// Impossible range
+  new_tree->max_part_no= max_part_no;
   return new_tree;
 }
 
@@ -6609,7 +8706,7 @@ key_or(RANGE_OPT_PARAM *param, SEL_ARG *key1,SEL_ARG *key2)
   {
     key1->free_tree();
     key2->free_tree();
-    return 0;					// Can't optimize this
+    return 0;                                   // Can't optimize this
   }
 
   // If one of the key is MAYBE_KEY then the found region may be bigger
@@ -6632,247 +8729,555 @@ key_or(RANGE_OPT_PARAM *param, SEL_ARG *key1,SEL_ARG *key2)
     {
       swap_variables(SEL_ARG *,key1,key2);
     }
-    if (key1->use_count > 0 || !(key1=key1->clone_tree(param)))
-      return 0;					// OOM
+    if (key1->use_count > 0 && !(key1=key1->clone_tree(param)))
+      return 0;                                 // OOM
   }
 
   // Add tree at key2 to tree at key1
   bool key2_shared=key2->use_count != 0;
   key1->maybe_flag|=key2->maybe_flag;
 
+  /*
+    Notation for illustrations used in the rest of this function: 
+
+      Range: [--------]
+             ^        ^
+             start    stop
+
+      Two overlapping ranges:
+        [-----]               [----]            [--]
+            [---]     or    [---]       or   [-------]
+
+      Ambiguity: *** 
+        The range starts or stops somewhere in the "***" range.
+        Example: a starts before b and may end before/the same plase/after b
+        a: [----***]
+        b:   [---]
+
+      Adjacent ranges:
+        Ranges that meet but do not overlap. Example: a = "x < 3", b = "x >= 3"
+        a: ----]
+        b:      [----
+   */
+
+  uint max_part_no= max(key1->max_part_no, key2->max_part_no);
+
   for (key2=key2->first(); key2; )
   {
-    SEL_ARG *tmp=key1->find_range(key2);	// Find key1.min <= key2.min
-    int cmp;
+    /*
+      key1 consists of one or more ranges. tmp is the range currently
+      being handled.
+
+      initialize tmp to the latest range in key1 that starts the same
+      place or before the range in key2 starts
+
+      key2:           [------]
+      key1: [---] [-----] [----]
+                  ^
+                  tmp
+    */
+    SEL_ARG *tmp=key1->find_range(key2);
+
+    /*
+      Used to describe how two key values are positioned compared to
+      each other. Consider key_value_a.<cmp_func>(key_value_b):
+
+        -2: key_value_a is smaller than key_value_b, and they are adjacent
+        -1: key_value_a is smaller than key_value_b (not adjacent)
+         0: the key values are equal
+         1: key_value_a is bigger than key_value_b (not adjacent)
+        -2: key_value_a is bigger than key_value_b, and they are adjacent
+
+      Example: "cmp= tmp->cmp_max_to_min(key2)"
+
+      key2:         [--------            (10 <= x ...)
+      tmp:    -----]                      (... x <  10) => cmp==-2
+      tmp:    ----]                       (... x <=  9) => cmp==-1
+      tmp:    ------]                     (... x  = 10) => cmp== 0
+      tmp:    --------]                   (... x <= 12) => cmp== 1
+      (cmp == 2 does not make sense for cmp_max_to_min())
+     */
+    int cmp= 0;
 
     if (!tmp)
     {
-      tmp=key1->first();			// tmp.min > key2.min
+      /*
+        The range in key2 starts before the first range in key1. Use
+        the first range in key1 as tmp.
+
+        key2:     [--------]
+        key1:            [****--] [----]   [-------]
+                         ^
+                         tmp
+      */
+      tmp=key1->first();
       cmp= -1;
     }
-    else if ((cmp=tmp->cmp_max_to_min(key2)) < 0)
-    {						// Found tmp.max < key2.min
+    else if ((cmp= tmp->cmp_max_to_min(key2)) < 0)
+    {
+      /*
+        This is the case:
+        key2:          [-------]
+        tmp:   [----**]
+       */
       SEL_ARG *next=tmp->next;
-      /* key1 on the left of key2 non-overlapping */
       if (cmp == -2 && eq_tree(tmp->next_key_part,key2->next_key_part))
       {
-	// Join near ranges like tmp.max < 0 and key2.min >= 0
-	SEL_ARG *key2_next=key2->next;
-	if (key2_shared)
-	{
-	  if (!(key2=new SEL_ARG(*key2)))
-	    return 0;		// out of memory
-	  key2->increment_use_count(key1->use_count+1);
-	  key2->next=key2_next;			// New copy of key2
-	}
-	key2->copy_min(tmp);
-	if (!(key1=key1->tree_delete(tmp)))
-	{					// Only one key in tree
-	  key1=key2;
-	  key1->make_root();
-	  key2=key2_next;
-	  break;
-	}
+        /*
+          Adjacent (cmp==-2) and equal next_key_parts => ranges can be merged
+
+          This is the case:
+          key2:          [-------]
+          tmp:     [----]
+
+          Result:
+          key2:    [-------------]     => inserted into key1 below
+          tmp:                         => deleted
+        */
+        SEL_ARG *key2_next=key2->next;
+        if (key2_shared)
+        {
+          if (!(key2=new SEL_ARG(*key2)))
+            return 0;           // out of memory
+          key2->increment_use_count(key1->use_count+1);
+          key2->next=key2_next;                 // New copy of key2
+        }
+
+        key2->copy_min(tmp);
+        if (!(key1=key1->tree_delete(tmp)))
+        {                                       // Only one key in tree
+          key1=key2;
+          key1->make_root();
+          key2=key2_next;
+          break;
+        }
       }
-      if (!(tmp=next))				// tmp.min > key2.min
-	break;					// Copy rest of key2
+      if (!(tmp=next)) // Move to next range in key1. Now tmp.min > key2.min
+        break;         // No more ranges in key1. Copy rest of key2
     }
+
     if (cmp < 0)
-    {						// tmp.min > key2.min
+    {
+      /*
+        This is the case:
+        key2:  [--***]
+        tmp:       [----]
+      */
       int tmp_cmp;
-      if ((tmp_cmp=tmp->cmp_min_to_max(key2)) > 0) // if tmp.min > key2.max
+      if ((tmp_cmp=tmp->cmp_min_to_max(key2)) > 0)
       {
-        /* tmp is on the right of key2 non-overlapping */
-	if (tmp_cmp == 2 && eq_tree(tmp->next_key_part,key2->next_key_part))
-	{					// ranges are connected
-	  tmp->copy_min_to_min(key2);
-	  key1->merge_flags(key2);
-	  if (tmp->min_flag & NO_MIN_RANGE &&
-	      tmp->max_flag & NO_MAX_RANGE)
-	  {
-	    if (key1->maybe_flag)
-	      return new SEL_ARG(SEL_ARG::MAYBE_KEY);
-	    return 0;
-	  }
-	  key2->increment_use_count(-1);	// Free not used tree
-	  key2=key2->next;
-	  continue;
-	}
-	else
-	{
-	  SEL_ARG *next=key2->next;		// Keys are not overlapping
-	  if (key2_shared)
-	  {
-	    SEL_ARG *cpy= new SEL_ARG(*key2);	// Must make copy
-	    if (!cpy)
-	      return 0;				// OOM
-	    key1=key1->insert(cpy);
-	    key2->increment_use_count(key1->use_count+1);
-	  }
-	  else
-	    key1=key1->insert(key2);		// Will destroy key2_root
-	  key2=next;
-	  continue;
-	}
+        /*
+          This is the case:
+          key2:  [------**]
+          tmp:             [----]
+        */
+        if (tmp_cmp == 2 && eq_tree(tmp->next_key_part,key2->next_key_part))
+        {
+          /*
+            Adjacent ranges with equal next_key_part. Merge like this:
+
+            This is the case:
+            key2:    [------]
+            tmp:             [-----]
+
+            Result:
+            key2:    [------]
+            tmp:     [-------------]
+
+            Then move on to next key2 range.
+          */
+          tmp->copy_min_to_min(key2);
+          key1->merge_flags(key2);
+          if (tmp->min_flag & NO_MIN_RANGE &&
+              tmp->max_flag & NO_MAX_RANGE)
+          {
+            if (key1->maybe_flag)
+              return new SEL_ARG(SEL_ARG::MAYBE_KEY);
+            return 0;
+          }
+          key2->increment_use_count(-1);        // Free not used tree
+          key2=key2->next;
+          continue;
+        }
+        else
+        {
+          /*
+            key2 not adjacent to tmp or has different next_key_part.
+            Insert into key1 and move to next range in key2
+            
+            This is the case:
+            key2:  [------**]
+            tmp:             [----]
+
+            Result:
+            key1_  [------**][----]
+                   ^         ^
+                   insert    tmp
+          */
+          SEL_ARG *next=key2->next;
+          if (key2_shared)
+          {
+            SEL_ARG *cpy= new SEL_ARG(*key2);   // Must make copy
+            if (!cpy)
+              return 0;                         // OOM
+            key1=key1->insert(cpy);
+            key2->increment_use_count(key1->use_count+1);
+          }
+          else
+            key1=key1->insert(key2);            // Will destroy key2_root
+          key2=next;
+          continue;
+        }
       }
     }
 
-    /* 
-      tmp.min >= key2.min && tmp.min <= key.max  (overlapping ranges)
-      key2.min <= tmp.min <= key2.max 
-    */  
+    /*
+      The ranges in tmp and key2 are overlapping:
+
+      key2:          [----------] 
+      tmp:        [*****-----*****]
+
+      Corollary: tmp.min <= key2.max
+    */
     if (eq_tree(tmp->next_key_part,key2->next_key_part))
     {
+      // Merge overlapping ranges with equal next_key_part
       if (tmp->is_same(key2))
       {
-        /* 
-          Found exact match of key2 inside key1. 
+        /*
+          Found exact match of key2 inside key1.
           Use the relevant range in key1.
         */
-	tmp->merge_flags(key2);			// Copy maybe flags
-	key2->increment_use_count(-1);		// Free not used tree
+        tmp->merge_flags(key2);                 // Copy maybe flags
+        key2->increment_use_count(-1);          // Free not used tree
       }
       else
       {
-	SEL_ARG *last=tmp;
-        SEL_ARG *first=tmp;
-        /* 
-          Find the last range in tmp that overlaps key2 and has the same 
-          condition on the rest of the keyparts.
+        SEL_ARG *last= tmp;
+        SEL_ARG *first= tmp;
+
+        /*
+          Find the last range in key1 that overlaps key2 and
+          where all ranges first...last have the same next_key_part as
+          key2.
+
+          key2:  [****----------------------*******]
+          key1:     [--]  [----] [---]  [-----] [xxxx]
+                    ^                   ^       ^
+                    first               last    different next_key_part
+
+          Since key2 covers them, the ranges between first and last
+          are merged into one range by deleting first...last-1 from
+          the key1 tree. In the figure, this applies to first and the
+          two consecutive ranges. The range of last is then extended:
+            * last.min: Set to min(key2.min, first.min)
+            * last.max: If there is a last->next that overlaps key2 (i.e.,
+                        last->next has a different next_key_part):
+                                        Set adjacent to last->next.min
+                        Otherwise:      Set to max(key2.max, last.max)
+
+          Result:
+          key2:  [****----------------------*******]
+                    [--]  [----] [---]                   => deleted from key1
+          key1:  [**------------------------***][xxxx]
+                 ^                              ^
+                 tmp=last                       different next_key_part
         */
-	while (last->next && last->next->cmp_min_to_max(key2) <= 0 &&
-	       eq_tree(last->next->next_key_part,key2->next_key_part))
-	{
+        while (last->next && last->next->cmp_min_to_max(key2) <= 0 &&
+               eq_tree(last->next->next_key_part,key2->next_key_part))
+        {
           /*
-            We've found the last overlapping key1 range in last.
-            This means that the ranges between (and including) the 
-            first overlapping range (tmp) and the last overlapping range
-            (last) are fully nested into the current range of key2 
-            and can safely be discarded. We just need the minimum endpoint
-            of the first overlapping range (tmp) so we can compare it with
-            the minimum endpoint of the enclosing key2 range.
+            last->next is covered by key2 and has same next_key_part.
+            last can be deleted
           */
-	  SEL_ARG *save=last;
-	  last=last->next;
-	  key1=key1->tree_delete(save);
-	}
+          SEL_ARG *save=last;
+          last=last->next;
+          key1=key1->tree_delete(save);
+        }
+        // Redirect tmp to last which will cover the entire range
+        tmp= last;
+
         /*
-          The tmp range (the first overlapping range) could have been discarded
-          by the previous loop. We should re-direct tmp to the new united range 
-          that's taking its place.
+          We need the minimum endpoint of first so we can compare it
+          with the minimum endpoint of the enclosing key2 range.
         */
-        tmp= last;
         last->copy_min(first);
         bool full_range= last->copy_min(key2);
         if (!full_range)
         {
           if (last->next && key2->cmp_max_to_min(last->next) >= 0)
           {
-            last->max_value= last->next->min_value;
-            if (last->next->min_flag & NEAR_MIN)
-              last->max_flag&= ~NEAR_MAX;
-            else
-              last->max_flag|= NEAR_MAX;
+            /*
+              This is the case:
+              key2:    [-------------]
+              key1:  [***------]  [xxxx]
+                     ^            ^
+                     last         different next_key_part
+
+              Extend range of last up to last->next:
+              key2:    [-------------]
+              key1:  [***--------][xxxx]
+            */
+            last->copy_min_to_max(last->next);
           }
           else
+            /*
+              This is the case:
+              key2:    [--------*****]
+              key1:  [***---------]    [xxxx]
+                     ^                 ^
+                     last              different next_key_part
+
+              Extend range of last up to max(last.max, key2.max):
+              key2:    [--------*****]
+              key1:  [***----------**] [xxxx]
+             */
             full_range= last->copy_max(key2);
         }
-	if (full_range)
-	{					// Full range
-	  key1->free_tree();
-	  for (; key2 ; key2=key2->next)
-	    key2->increment_use_count(-1);	// Free not used tree
-	  if (key1->maybe_flag)
-	    return new SEL_ARG(SEL_ARG::MAYBE_KEY);
-	  return 0;
-	}
+        if (full_range)
+        {                                       // Full range
+          key1->free_tree();
+          for (; key2 ; key2=key2->next)
+            key2->increment_use_count(-1);      // Free not used tree
+          if (key1->maybe_flag)
+            return new SEL_ARG(SEL_ARG::MAYBE_KEY);
+          return 0;
+        }
       }
     }
 
     if (cmp >= 0 && tmp->cmp_min_to_min(key2) < 0)
-    {						// tmp.min <= x < key2.min
+    {
+      /*
+        This is the case ("cmp>=0" means that tmp.max >= key2.min):
+        key2:              [----]
+        tmp:     [------------*****]
+      */
+
+      if (!tmp->next_key_part)
+      {
+        /*
+          tmp->next_key_part is empty: cut the range that is covered
+          by tmp from key2. 
+          Reason: (key2->next_key_part OR tmp->next_key_part) will be
+          empty and therefore equal to tmp->next_key_part. Thus, this
+          part of the key2 range is completely covered by tmp.
+        */
+        if (tmp->cmp_max_to_max(key2) >= 0)
+        {
+          /*
+            tmp covers the entire range in key2. 
+            key2:              [----]
+            tmp:     [-----------------]
+
+            Move on to next range in key2
+          */
+          key2->increment_use_count(-1); // Free not used tree
+          key2=key2->next;
+          continue;
+        }
+        else
+        {
+          /*
+            This is the case:
+            key2:           [-------]
+            tmp:     [---------]
+
+            Result:
+            key2:               [---]
+            tmp:     [---------]
+          */
+          if (key2->use_count)
+	  {
+	    SEL_ARG *key2_cpy= new SEL_ARG(*key2);
+            if (key2_cpy)
+              return 0;
+            key2= key2_cpy;
+	  }
+          key2->copy_max_to_min(tmp);
+          continue;
+        }
+      }
+
+      /*
+        The ranges are overlapping but have not been merged because
+        next_key_part of tmp and key2 differ. 
+        key2:              [----]
+        tmp:     [------------*****]
+
+        Split tmp in two where key2 starts:
+        key2:              [----]
+        key1:    [--------][--*****]
+                 ^         ^
+                 insert    tmp
+      */
       SEL_ARG *new_arg=tmp->clone_first(key2);
       if (!new_arg)
-	return 0;				// OOM
-      if ((new_arg->next_key_part= key1->next_key_part))
-	new_arg->increment_use_count(key1->use_count+1);
+        return 0;                               // OOM
+      if ((new_arg->next_key_part= tmp->next_key_part))
+        new_arg->increment_use_count(key1->use_count+1);
       tmp->copy_min_to_min(key2);
       key1=key1->insert(new_arg);
-    }
+    } // tmp.min >= key2.min due to this if()
 
-    // tmp.min >= key2.min && tmp.min <= key2.max
-    SEL_ARG key(*key2);				// Get copy we can modify
+    /*
+      Now key2.min <= tmp.min <= key2.max:
+      key2:   [---------]
+      tmp:    [****---*****]
+     */
+    SEL_ARG key2_cpy(*key2); // Get copy we can modify
     for (;;)
     {
-      if (tmp->cmp_min_to_min(&key) > 0)
-      {						// key.min <= x < tmp.min
-	SEL_ARG *new_arg=key.clone_first(tmp);
-	if (!new_arg)
-	  return 0;				// OOM
-	if ((new_arg->next_key_part=key.next_key_part))
-	  new_arg->increment_use_count(key1->use_count+1);
-	key1=key1->insert(new_arg);
-      }
-      if ((cmp=tmp->cmp_max_to_max(&key)) <= 0)
-      {						// tmp.min. <= x <= tmp.max
-	tmp->maybe_flag|= key.maybe_flag;
-	key.increment_use_count(key1->use_count+1);
-	tmp->next_key_part= key_or(param, tmp->next_key_part, key.next_key_part);
-	if (!cmp)				// Key2 is ready
-	  break;
-	key.copy_max_to_min(tmp);
-	if (!(tmp=tmp->next))
-	{
-	  SEL_ARG *tmp2= new SEL_ARG(key);
-	  if (!tmp2)
-	    return 0;				// OOM
-	  key1=key1->insert(tmp2);
-	  key2=key2->next;
-	  goto end;
-	}
-	if (tmp->cmp_min_to_max(&key) > 0)
-	{
-	  SEL_ARG *tmp2= new SEL_ARG(key);
-	  if (!tmp2)
-	    return 0;				// OOM
-	  key1=key1->insert(tmp2);
-	  break;
-	}
+      if (tmp->cmp_min_to_min(&key2_cpy) > 0)
+      {
+        /*
+          This is the case:
+          key2_cpy:    [------------]
+          key1:                 [-*****]
+                                ^
+                                tmp
+                             
+          Result:
+          key2_cpy:             [---]
+          key1:        [-------][-*****]
+                       ^        ^
+                       insert   tmp
+         */
+        SEL_ARG *new_arg=key2_cpy.clone_first(tmp);
+        if (!new_arg)
+          return 0; // OOM
+        if ((new_arg->next_key_part=key2_cpy.next_key_part))
+          new_arg->increment_use_count(key1->use_count+1);
+        key1=key1->insert(new_arg);
+        key2_cpy.copy_min_to_min(tmp);
+      } 
+      // Now key2_cpy.min == tmp.min
+
+      if ((cmp= tmp->cmp_max_to_max(&key2_cpy)) <= 0)
+      {
+        /*
+          tmp.max <= key2_cpy.max:
+          key2_cpy:   a)  [-------]    or b)     [----]
+          tmp:            [----]                 [----]
+
+          Steps:
+           1) Update next_key_part of tmp: OR it with key2_cpy->next_key_part.
+           2) If case a: Insert range [tmp.max, key2_cpy.max] into key1 using
+                         next_key_part of key2_cpy
+
+           Result:
+           key1:      a)  [----][-]    or b)     [----]
+         */
+        tmp->maybe_flag|= key2_cpy.maybe_flag;
+        key2_cpy.increment_use_count(key1->use_count+1);
+        tmp->next_key_part= key_or(param, tmp->next_key_part,
+                                   key2_cpy.next_key_part);
+
+        if (!cmp)
+          break;                     // case b: done with this key2 range
+
+        // Make key2_cpy the range [tmp.max, key2_cpy.max]
+        key2_cpy.copy_max_to_min(tmp);
+        if (!(tmp=tmp->next))
+        {
+          /*
+            No more ranges in key1. Insert key2_cpy and go to "end"
+            label to insert remaining ranges in key2 if any.
+          */
+          SEL_ARG *tmp2= new SEL_ARG(key2_cpy);
+          if (!tmp2)
+            return 0; // OOM
+          key1=key1->insert(tmp2);
+          key2=key2->next;
+          goto end;
+        }
+        if (tmp->cmp_min_to_max(&key2_cpy) > 0)
+        {
+          /*
+            The next range in key1 does not overlap with key2_cpy.
+            Insert this range into key1 and move on to the next range
+            in key2.
+          */
+          SEL_ARG *tmp2= new SEL_ARG(key2_cpy);
+          if (!tmp2)
+            return 0;                           // OOM
+          key1=key1->insert(tmp2);
+          break;
+        }
+        /*
+          key2_cpy overlaps with the next range in key1 and the case
+          is now "key2.min <= tmp.min <= key2.max". Go back to for(;;)
+          to handle this situation.
+        */
+        continue;
       }
       else
       {
-	SEL_ARG *new_arg=tmp->clone_last(&key); // tmp.min <= x <= key.max
-	if (!new_arg)
-	  return 0;				// OOM
-	tmp->copy_max_to_min(&key);
-	tmp->increment_use_count(key1->use_count+1);
-	/* Increment key count as it may be used for next loop */
-	key.increment_use_count(1);
-	new_arg->next_key_part= key_or(param, tmp->next_key_part, key.next_key_part);
-	key1=key1->insert(new_arg);
-	break;
+        /*
+          This is the case:
+          key2_cpy:   [-------]
+          tmp:        [------------]
+
+          Result:
+          key1:       [-------][---]
+                      ^        ^
+                      new_arg  tmp
+          Steps:
+           0) If tmp->next_key_part is empty: do nothing. Reason:
+              (key2_cpy->next_key_part OR tmp->next_key_part) will be
+              empty and therefore equal to tmp->next_key_part. Thus,
+              the range in key2_cpy is completely covered by tmp
+           1) Make new_arg with range [tmp.min, key2_cpy.max].
+              new_arg->next_key_part is OR between next_key_part
+              of tmp and key2_cpy
+           2) Make tmp the range [key2.max, tmp.max]
+           3) Insert new_arg into key1
+        */
+        if (!tmp->next_key_part) // Step 0
+        {
+          key2_cpy.increment_use_count(-1);     // Free not used tree
+          break;
+        }
+        SEL_ARG *new_arg=tmp->clone_last(&key2_cpy);
+        if (!new_arg)
+          return 0; // OOM
+        tmp->copy_max_to_min(&key2_cpy);
+        tmp->increment_use_count(key1->use_count+1);
+        /* Increment key count as it may be used for next loop */
+        key2_cpy.increment_use_count(1);
+        new_arg->next_key_part= key_or(param, tmp->next_key_part,
+                                       key2_cpy.next_key_part);
+        key1=key1->insert(new_arg);
+        break;
       }
     }
-    key2=key2->next;
+    // Move on to next range in key2
+    key2=key2->next;                            
   }
 
 end:
+  /*
+    Add key2 ranges that are non-overlapping with and higher than the
+    highest range in key1.
+  */
   while (key2)
   {
     SEL_ARG *next=key2->next;
     if (key2_shared)
     {
-      SEL_ARG *tmp=new SEL_ARG(*key2);		// Must make copy
+      SEL_ARG *tmp=new SEL_ARG(*key2);          // Must make copy
       if (!tmp)
-	return 0;
+        return 0;
       key2->increment_use_count(key1->use_count+1);
       key1=key1->insert(tmp);
     }
     else
-      key1=key1->insert(key2);			// Will destroy key2_root
+      key1=key1->insert(key2);                  // Will destroy key2_root
     key2=next;
   }
   key1->use_count++;
+
+  key1->max_part_no= max_part_no;
   return key1;
 }
 
@@ -7348,11 +9753,7 @@ static ulong count_key_part_usage(SEL_ARG *root, SEL_ARG *key)
 void SEL_ARG::test_use_count(SEL_ARG *root)
 {
   uint e_count=0;
-  if (this == root && use_count != 1)
-  {
-    sql_print_information("Use_count: Wrong count %lu for root",use_count);
-    return;
-  }
+
   if (this->type != SEL_ARG::KEY_RANGE)
     return;
   for (SEL_ARG *pos=first(); pos ; pos=pos->next)
@@ -7377,324 +9778,130 @@ void SEL_ARG::test_use_count(SEL_ARG *root)
 }
 
 #endif
-
-
 /*
-  Calculate estimate of number records that will be retrieved by a range
-  scan on given index using given SEL_ARG intervals tree.
+  Calculate cost and E(#rows) for a given index and intervals tree 
+
   SYNOPSIS
-    check_quick_select
-      param  Parameter from test_quick_select
-      idx               Number of index to use in tree->keys
-      tree              Transformed selection condition, tree->keys[idx]
-                        holds the range tree to be used for scanning.
-      update_tbl_stats  If true, update table->quick_keys with information
+    check_quick_select()
+      param             Parameter from test_quick_select
+      idx               Number of index to use in PARAM::key SEL_TREE::key
+      index_only        TRUE  - assume only index tuples will be accessed
+                        FALSE - assume full table rows will be read
+      tree              Transformed selection condition, tree->key[idx] holds
+                        the intervals for the given index.
+      update_tbl_stats  TRUE <=> update table->quick_* with information
                         about range scan we've evaluated.
+      mrr_flags   INOUT MRR access flags
+      cost        OUT   Scan cost
 
   NOTES
     param->is_ror_scan is set to reflect if the key scan is a ROR (see
     is_key_scan_ror function for more info)
     param->table->quick_*, param->range_count (and maybe others) are
-    updated with data of given key scan, see check_quick_keys for details.
+    updated with data of given key scan, see quick_range_seq_next for details.
 
   RETURN
     Estimate # of records to be retrieved.
     HA_POS_ERROR if estimate calculation failed due to table handler problems.
-
 */
 
-static ha_rows
-check_quick_select(PARAM *param,uint idx,SEL_ARG *tree, bool update_tbl_stats)
-{
-  ha_rows records;
-  bool    cpk_scan;
-  uint key;
+static
+ha_rows check_quick_select(PARAM *param, uint idx, bool index_only,
+                           SEL_ARG *tree, bool update_tbl_stats, 
+                           uint *mrr_flags, uint *bufsize, COST_VECT *cost)
+{
+  SEL_ARG_RANGE_SEQ seq;
+  RANGE_SEQ_IF seq_if = {NULL, sel_arg_range_seq_init, sel_arg_range_seq_next, 0, 0};
+  handler *file= param->table->file;
+  ha_rows rows= HA_POS_ERROR;
+  uint keynr= param->real_keynr[idx];
   DBUG_ENTER("check_quick_select");
+  
+  /* Handle cases when we don't have a valid non-empty list of range */
+  if (!tree)
+    DBUG_RETURN(HA_POS_ERROR);
+  if (tree->type == SEL_ARG::IMPOSSIBLE)
+    DBUG_RETURN(0L);
+  if (tree->type != SEL_ARG::KEY_RANGE || tree->part != 0)
+    DBUG_RETURN(HA_POS_ERROR);
 
-  param->is_ror_scan= FALSE;
-  param->first_null_comp= 0;
+  seq.keyno= idx;
+  seq.real_keyno= keynr;
+  seq.param= param;
+  seq.start= tree;
 
-  if (!tree)
-    DBUG_RETURN(HA_POS_ERROR);			// Can't use it
-  param->max_key_part=0;
   param->range_count=0;
-  key= param->real_keynr[idx];
+  param->max_key_part=0;
 
-  if (tree->type == SEL_ARG::IMPOSSIBLE)
-    DBUG_RETURN(0L);				// Impossible select. return
-  if (tree->type != SEL_ARG::KEY_RANGE || tree->part != 0)
-    DBUG_RETURN(HA_POS_ERROR);				// Don't use tree
+  param->is_ror_scan= TRUE;
+  if (file->index_flags(keynr, 0, TRUE) & HA_KEY_SCAN_NOT_ROR)
+    param->is_ror_scan= FALSE;
+  
+  *mrr_flags= param->force_default_mrr? HA_MRR_USE_DEFAULT_IMPL: 0;
+  /*
+    Pass HA_MRR_SORTED to see if MRR implementation can handle sorting.
+  */
+  *mrr_flags|= HA_MRR_NO_ASSOCIATION | HA_MRR_SORTED;
 
-  enum ha_key_alg key_alg= param->table->key_info[key].algorithm;
-  if ((key_alg != HA_KEY_ALG_BTREE) && (key_alg!= HA_KEY_ALG_UNDEF))
-  {
-    /* Records are not ordered by rowid for other types of indexes. */
-    cpk_scan= FALSE;
-  }
-  else
-  {
-    /*
-      Clustered PK scan is a special case, check_quick_keys doesn't recognize
-      CPK scans as ROR scans (while actually any CPK scan is a ROR scan).
-    */
-    cpk_scan= ((param->table->s->primary_key == param->real_keynr[idx]) &&
-               param->table->file->primary_key_is_clustered());
-    param->is_ror_scan= !cpk_scan;
-  }
-  param->n_ranges= 0;
+  bool pk_is_clustered= file->primary_key_is_clustered();
+  if (index_only && 
+      (file->index_flags(keynr, param->max_key_part, 1) & HA_KEYREAD_ONLY) &&
+      !(file->index_flags(keynr, param->max_key_part, 1) & HA_CLUSTERED_INDEX))
+     *mrr_flags |= HA_MRR_INDEX_ONLY;
+  
+  if (param->thd->lex->sql_command != SQLCOM_SELECT)
+    *mrr_flags |= HA_MRR_USE_DEFAULT_IMPL;
 
-  records= check_quick_keys(param, idx, tree,
-                            param->min_key, 0, -1,
-                            param->max_key, 0, -1);
-  if (records != HA_POS_ERROR)
+  *bufsize= param->thd->variables.mrr_buff_size;
+  /*
+    Skip materialized derived table/view result table from MRR check as
+    they aren't contain any data yet.
+  */
+  if (param->table->pos_in_table_list->is_non_derived())
+    rows= file->multi_range_read_info_const(keynr, &seq_if, (void*)&seq, 0,
+                                            bufsize, mrr_flags, cost);
+  if (rows != HA_POS_ERROR)
   {
+    param->quick_rows[keynr]= rows;
     if (update_tbl_stats)
     {
-      param->table->quick_keys.set_bit(key);
-      param->table->quick_key_parts[key]=param->max_key_part+1;
-      param->table->quick_n_ranges[key]= param->n_ranges;
+      param->table->quick_keys.set_bit(keynr);
+      param->table->quick_key_parts[keynr]= param->max_key_part+1;
+      param->table->quick_n_ranges[keynr]= param->range_count;
       param->table->quick_condition_rows=
-        min(param->table->quick_condition_rows, records);
+        min(param->table->quick_condition_rows, rows);
+      param->table->quick_rows[keynr]= rows;
     }
-    /*
-      Need to save quick_rows in any case as it is used when calculating
-      cost of ROR intersection:
-    */
-    param->table->quick_rows[key]=records;
-    if (cpk_scan)
-      param->is_ror_scan= TRUE;
   }
-  if (param->table->file->index_flags(key, 0, TRUE) & HA_KEY_SCAN_NOT_ROR)
-    param->is_ror_scan= FALSE;
-  DBUG_PRINT("exit", ("Records: %lu", (ulong) records));
-  DBUG_RETURN(records);
-}
-
-
-/*
-  Recursively calculate estimate of # rows that will be retrieved by
-  key scan on key idx.
-  SYNOPSIS
-    check_quick_keys()
-      param         Parameter from test_quick select function.
-      idx           Number of key to use in PARAM::keys in list of used keys
-                    (param->real_keynr[idx] holds the key number in table)
-      key_tree      SEL_ARG tree being examined.
-      min_key       Buffer with partial min key value tuple
-      min_key_flag
-      max_key       Buffer with partial max key value tuple
-      max_key_flag
-
-  NOTES
-    The function does the recursive descent on the tree via SEL_ARG::left,
-    SEL_ARG::right, and SEL_ARG::next_key_part edges. The #rows estimates
-    are calculated using records_in_range calls at the leaf nodes and then
-    summed.
-
-    param->min_key and param->max_key are used to hold prefixes of key value
-    tuples.
-
-    The side effects are:
-
-    param->max_key_part is updated to hold the maximum number of key parts used
-      in scan minus 1.
-
-    param->range_count is incremented if the function finds a range that
-      wasn't counted by the caller.
-
-    param->is_ror_scan is cleared if the function detects that the key scan is
-      not a Rowid-Ordered Retrieval scan ( see comments for is_key_scan_ror
-      function for description of which key scans are ROR scans)
-
-  RETURN
-    #records      E(#records) for given subtree
-    HA_POS_ERROR  if subtree cannot be used for record retrieval
-
-*/
-
-static ha_rows
-check_quick_keys(PARAM *param, uint idx, SEL_ARG *key_tree,
-		 uchar *min_key, uint min_key_flag, int min_keypart,
-                 uchar *max_key, uint max_key_flag, int max_keypart)
-{
-  ha_rows records=0, tmp;
-  uint tmp_min_flag, tmp_max_flag, keynr, min_key_length, max_key_length;
-  uint tmp_min_keypart= min_keypart, tmp_max_keypart= max_keypart;
-  uchar *tmp_min_key, *tmp_max_key;
-  uint8 save_first_null_comp= param->first_null_comp;
-
-  param->max_key_part=max(param->max_key_part,key_tree->part);
-  if (key_tree->left != &null_element)
+  /* Figure out if the key scan is ROR (returns rows in ROWID order) or not */
+  enum ha_key_alg key_alg= param->table->key_info[seq.real_keyno].algorithm;
+  if ((key_alg != HA_KEY_ALG_BTREE) && (key_alg!= HA_KEY_ALG_UNDEF))
   {
-    /*
-      There are at least two intervals for current key part, i.e. condition
-      was converted to something like
-        (keyXpartY less/equals c1) OR (keyXpartY more/equals c2).
-      This is not a ROR scan if the key is not Clustered Primary Key.
+    /* 
+      All scans are non-ROR scans for those index types.
+      TODO: Don't have this logic here, make table engines return 
+      appropriate flags instead.
     */
     param->is_ror_scan= FALSE;
-    records=check_quick_keys(param, idx, key_tree->left,
-                             min_key, min_key_flag, min_keypart,
-			     max_key, max_key_flag, max_keypart);
-    if (records == HA_POS_ERROR)			// Impossible
-      return records;
   }
-
-  tmp_min_key= min_key;
-  tmp_max_key= max_key;
-  tmp_min_keypart+= key_tree->store_min(param->key[idx][key_tree->part].store_length,
-                                        &tmp_min_key, min_key_flag);
-  tmp_max_keypart+= key_tree->store_max(param->key[idx][key_tree->part].store_length,
-                                        &tmp_max_key, max_key_flag);
-  min_key_length= (uint) (tmp_min_key - param->min_key);
-  max_key_length= (uint) (tmp_max_key - param->max_key);
-
-  if (param->is_ror_scan)
+  else if (param->table->s->primary_key == keynr && pk_is_clustered)
   {
-    /*
-      If the index doesn't cover entire key, mark the scan as non-ROR scan.
-      Actually we're cutting off some ROR scans here.
-    */
-    uint16 fieldnr= param->table->key_info[param->real_keynr[idx]].
-                    key_part[key_tree->part].fieldnr - 1;
-    if (param->table->field[fieldnr]->key_length() !=
-        param->key[idx][key_tree->part].length)
-      param->is_ror_scan= FALSE;
+    /* Clustered PK scan is always a ROR scan (TODO: same as above) */
+    param->is_ror_scan= TRUE;
   }
-
-  if (!param->first_null_comp && key_tree->is_null_interval())
-    param->first_null_comp= key_tree->part+1;
-
-  if (key_tree->next_key_part &&
-      key_tree->next_key_part->type == SEL_ARG::KEY_RANGE &&
-      key_tree->next_key_part->part == key_tree->part+1)
-  {						// const key as prefix
-    if (min_key_length == max_key_length &&
-	!memcmp(min_key, max_key, (uint) (tmp_max_key - max_key)) &&
-	!key_tree->min_flag && !key_tree->max_flag)
-    {
-      tmp=check_quick_keys(param,idx,key_tree->next_key_part, tmp_min_key,
-                           min_key_flag | key_tree->min_flag, tmp_min_keypart,
-                           tmp_max_key, max_key_flag | key_tree->max_flag,
-                           tmp_max_keypart);
-      goto end;					// Ugly, but efficient
-    }
-    else
-    {
-      /* The interval for current key part is not c1 <= keyXpartY <= c1 */
-      param->is_ror_scan= FALSE;
-    }
-
-    tmp_min_flag=key_tree->min_flag;
-    tmp_max_flag=key_tree->max_flag;
-    if (!tmp_min_flag)
-      tmp_min_keypart+=
-      key_tree->next_key_part->store_min_key(param->key[idx], &tmp_min_key,
-					     &tmp_min_flag);
-    if (!tmp_max_flag)
-      tmp_max_keypart+=
-      key_tree->next_key_part->store_max_key(param->key[idx], &tmp_max_key,
-					     &tmp_max_flag);
-    min_key_length= (uint) (tmp_min_key - param->min_key);
-    max_key_length= (uint) (tmp_max_key - param->max_key);
-  }
-  else
-  {
-    tmp_min_flag= min_key_flag | key_tree->min_flag;
-    tmp_max_flag= max_key_flag | key_tree->max_flag;
-  }
-
-  if (unlikely(param->thd->killed != 0))
-    return HA_POS_ERROR;
-
-  keynr=param->real_keynr[idx];
-  param->range_count++;
-  if (!tmp_min_flag && ! tmp_max_flag &&
-      (uint) key_tree->part+1 == param->table->key_info[keynr].key_parts &&
-      (param->table->key_info[keynr].flags & HA_NOSAME) &&
-      min_key_length == max_key_length &&
-      !memcmp(param->min_key, param->max_key, min_key_length) &&
-      !param->first_null_comp)
-  {
-    tmp=1;					// Max one record
-    param->n_ranges++;
-  }
-  else
+  else if (param->range_count > 1)
   {
-    if (param->is_ror_scan)
-    {
-      /*
-        If we get here, the condition on the key was converted to form
-        "(keyXpart1 = c1) AND ... AND (keyXpart{key_tree->part - 1} = cN) AND
-          somecond(keyXpart{key_tree->part})"
-        Check if
-          somecond is "keyXpart{key_tree->part} = const" and
-          uncovered "tail" of KeyX parts is either empty or is identical to
-          first members of clustered primary key.
-      */
-      if (!(min_key_length == max_key_length &&
-            !memcmp(min_key, max_key, (uint) (tmp_max_key - max_key)) &&
-            !key_tree->min_flag && !key_tree->max_flag &&
-            is_key_scan_ror(param, keynr, key_tree->part + 1)))
-        param->is_ror_scan= FALSE;
-    }
-    param->n_ranges++;
-
-    if (tmp_min_flag & GEOM_FLAG)
-    {
-      key_range min_range;
-      min_range.key=    param->min_key;
-      min_range.length= min_key_length;
-      min_range.keypart_map= make_keypart_map(tmp_min_keypart);
-      /* In this case tmp_min_flag contains the handler-read-function */
-      min_range.flag=   (ha_rkey_function) (tmp_min_flag ^ GEOM_FLAG);
-
-      tmp= param->table->file->records_in_range(keynr,
-                                                &min_range, (key_range*) 0);
-    }
-    else
-    {
-      key_range min_range, max_range;
-
-      min_range.key=    param->min_key;
-      min_range.length= min_key_length;
-      min_range.flag=   (tmp_min_flag & NEAR_MIN ? HA_READ_AFTER_KEY :
-                         HA_READ_KEY_EXACT);
-      min_range.keypart_map= make_keypart_map(tmp_min_keypart);
-      max_range.key=    param->max_key;
-      max_range.length= max_key_length;
-      max_range.flag=   (tmp_max_flag & NEAR_MAX ?
-                         HA_READ_BEFORE_KEY : HA_READ_AFTER_KEY);
-      max_range.keypart_map= make_keypart_map(tmp_max_keypart);
-      tmp=param->table->file->records_in_range(keynr,
-                                               (min_key_length ? &min_range :
-                                                (key_range*) 0),
-                                               (max_key_length ? &max_range :
-                                                (key_range*) 0));
-    }
-  }
- end:
-  if (tmp == HA_POS_ERROR)			// Impossible range
-    return tmp;
-  records+=tmp;
-  if (key_tree->right != &null_element)
-  {
-    /*
-      There are at least two intervals for current key part, i.e. condition
-      was converted to something like
-        (keyXpartY less/equals c1) OR (keyXpartY more/equals c2).
-      This is not a ROR scan if the key is not Clustered Primary Key.
+    /* 
+      Scaning multiple key values in the index: the records are ROR
+      for each value, but not between values. E.g, "SELECT ... x IN
+      (1,3)" returns ROR order for all records with x=1, then ROR
+      order for records with x=3
     */
     param->is_ror_scan= FALSE;
-    tmp=check_quick_keys(param, idx, key_tree->right,
-                         min_key, min_key_flag, min_keypart,
-                         max_key, max_key_flag, max_keypart);
-    if (tmp == HA_POS_ERROR)
-      return tmp;
-    records+=tmp;
   }
-  param->first_null_comp= save_first_null_comp;
-  return records;
+
+  DBUG_PRINT("exit", ("Records: %lu", (ulong) rows));
+  DBUG_RETURN(rows); //psergey-merge:todo: maintain first_null_comp.
 }
 
 
@@ -7722,13 +9929,14 @@ check_quick_keys(PARAM *param, uint idx, SEL_ARG *key_tree,
     where the index is defined on (key1_1, ..., key1_N [,a_1, ..., a_n])
 
     and the table has a clustered Primary Key defined as 
-
       PRIMARY KEY(a_1, ..., a_n, b1, ..., b_k) 
     
     i.e. the first key parts of it are identical to uncovered parts ot the 
     key being scanned. This function assumes that the index flags do not
     include HA_KEY_SCAN_NOT_ROR flag (that is checked elsewhere).
 
+    Check (1) is made in quick_range_seq_next()
+
   RETURN
     TRUE   The scan is ROR-scan
     FALSE  Otherwise
@@ -7741,9 +9949,19 @@ static bool is_key_scan_ror(PARAM *param, uint keynr, uint8 nparts)
   KEY_PART_INFO *key_part_end= (table_key->key_part +
                                 table_key->key_parts);
   uint pk_number;
+  
+  for (KEY_PART_INFO *kp= table_key->key_part; kp < key_part; kp++)
+  {
+    uint16 fieldnr= param->table->key_info[keynr].
+                    key_part[kp - table_key->key_part].fieldnr - 1;
+    if (param->table->field[fieldnr]->key_length() != kp->length)
+      return FALSE;
+  }
 
   if (key_part == key_part_end)
     return TRUE;
+
+  key_part= table_key->key_part + nparts;
   pk_number= param->table->s->primary_key;
   if (!param->table->file->primary_key_is_clustered() || pk_number == MAX_KEY)
     return FALSE;
@@ -7768,12 +9986,14 @@ static bool is_key_scan_ror(PARAM *param, uint keynr, uint8 nparts)
   SYNOPSIS
     get_quick_select()
       param
-      idx          Index of used key in param->key.
-      key_tree     SEL_ARG tree for the used key
-      parent_alloc If not NULL, use it to allocate memory for
-                   quick select data. Otherwise use quick->alloc.
+      idx            Index of used key in param->key.
+      key_tree       SEL_ARG tree for the used key
+      mrr_flags      MRR parameter for quick select
+      mrr_buf_size   MRR parameter for quick select
+      parent_alloc   If not NULL, use it to allocate memory for
+                     quick select data. Otherwise use quick->alloc.
   NOTES
-    The caller must call QUICK_SELECT::init for returned quick select
+    The caller must call QUICK_SELECT::init for returned quick select.
 
     CAUTION! This function may change thd->mem_root to a MEM_ROOT which will be
     deallocated when the returned quick select is deleted.
@@ -7784,25 +10004,26 @@ static bool is_key_scan_ror(PARAM *param, uint keynr, uint8 nparts)
 */
 
 QUICK_RANGE_SELECT *
-get_quick_select(PARAM *param,uint idx,SEL_ARG *key_tree,
-                 MEM_ROOT *parent_alloc)
+get_quick_select(PARAM *param,uint idx,SEL_ARG *key_tree, uint mrr_flags,
+                 uint mrr_buf_size, MEM_ROOT *parent_alloc)
 {
   QUICK_RANGE_SELECT *quick;
+  bool create_err= FALSE;
   DBUG_ENTER("get_quick_select");
 
   if (param->table->key_info[param->real_keynr[idx]].flags & HA_SPATIAL)
     quick=new QUICK_RANGE_SELECT_GEOM(param->thd, param->table,
                                       param->real_keynr[idx],
                                       test(parent_alloc),
-                                      parent_alloc);
+                                      parent_alloc, &create_err);
   else
     quick=new QUICK_RANGE_SELECT(param->thd, param->table,
                                  param->real_keynr[idx],
-                                 test(parent_alloc));
+                                 test(parent_alloc), NULL, &create_err);
 
   if (quick)
   {
-    if (quick->error ||
+    if (create_err ||
 	get_quick_keys(param,quick,param->key[idx],key_tree,param->min_key,0,
 		       param->max_key,0))
     {
@@ -7811,6 +10032,8 @@ get_quick_select(PARAM *param,uint idx,SEL_ARG *key_tree,
     }
     else
     {
+      quick->mrr_flags= mrr_flags;
+      quick->mrr_buf_size= mrr_buf_size;
       quick->key_parts=(KEY_PART*)
         memdup_root(parent_alloc? parent_alloc : &quick->alloc,
                     (char*) param->key[idx],
@@ -7959,7 +10182,20 @@ bool QUICK_RANGE_SELECT::unique_key_range()
 }
 
 
-/* Returns TRUE if any part of the key is NULL */
+
+/*
+  Return TRUE if any part of the key is NULL
+
+  SYNOPSIS
+    null_part_in_key()    
+      key_part  Array of key parts (index description)
+      key       Key values tuple
+      length    Length of key values tuple in bytes.
+
+  RETURN
+    TRUE   The tuple has at least one "keypartX is NULL"
+    FALSE  Otherwise
+*/
 
 static bool null_part_in_key(KEY_PART *key_part, const uchar *key, uint length)
 {
@@ -7979,7 +10215,7 @@ bool QUICK_SELECT_I::is_keys_used(const MY_BITMAP *fields)
   return is_key_used(head, index, fields);
 }
 
-bool QUICK_INDEX_MERGE_SELECT::is_keys_used(const MY_BITMAP *fields)
+bool QUICK_INDEX_SORT_SELECT::is_keys_used(const MY_BITMAP *fields)
 {
   QUICK_RANGE_SELECT *quick;
   List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
@@ -8016,6 +10252,19 @@ bool QUICK_ROR_UNION_SELECT::is_keys_used(const MY_BITMAP *fields)
 }
 
 
+FT_SELECT *get_ft_select(THD *thd, TABLE *table, uint key)
+{
+  bool create_err= FALSE;
+  FT_SELECT *fts= new FT_SELECT(thd, table, key, &create_err);
+  if (create_err)
+  {
+    delete fts;
+    return NULL;
+  }
+  else
+    return fts;
+}
+
 /*
   Create quick select from ref/ref_or_null scan.
 
@@ -8044,10 +10293,12 @@ QUICK_RANGE_SELECT *get_quick_select_for_ref(THD *thd, TABLE *table,
   KEY_PART *key_part;
   QUICK_RANGE *range;
   uint part;
+  bool create_err= FALSE;
+  COST_VECT cost;
 
   old_root= thd->mem_root;
   /* The following call may change thd->mem_root */
-  quick= new QUICK_RANGE_SELECT(thd, table, ref->key, 0);
+  quick= new QUICK_RANGE_SELECT(thd, table, ref->key, 0, 0, &create_err);
   /* save mem_root set by QUICK_RANGE_SELECT constructor */
   alloc= thd->mem_root;
   /*
@@ -8056,7 +10307,7 @@ QUICK_RANGE_SELECT *get_quick_select_for_ref(THD *thd, TABLE *table,
   */
   thd->mem_root= old_root;
 
-  if (!quick)
+  if (!quick || create_err)
     return 0;			/* no ranges found */
   if (quick->init())
     goto err;
@@ -8110,8 +10361,25 @@ QUICK_RANGE_SELECT *get_quick_select_for_ref(THD *thd, TABLE *table,
       goto err;
   }
 
-  return quick;
+  /* Call multi_range_read_info() to get the MRR flags and buffer size */
+  quick->mrr_flags= HA_MRR_NO_ASSOCIATION | 
+                    (table->key_read ? HA_MRR_INDEX_ONLY : 0);
+  if (thd->lex->sql_command != SQLCOM_SELECT)
+    quick->mrr_flags |= HA_MRR_USE_DEFAULT_IMPL;
+#ifdef WITH_NDBCLUSTER_STORAGE_ENGINE
+  if (!ref->null_ref_key && !key_has_nulls(key_info, range->min_key,
+                                           ref->key_length))
+    quick->mrr_flags |= HA_MRR_NO_NULL_ENDPOINTS;
+#endif
+
+  quick->mrr_buf_size= thd->variables.mrr_buff_size;
+  if (table->file->multi_range_read_info(quick->index, 1, (uint)records,
+                                         ~0, 
+                                         &quick->mrr_buf_size,
+                                         &quick->mrr_flags, &cost))
+    goto err;
 
+  return quick;
 err:
   delete quick;
   return 0;
@@ -8135,13 +10403,23 @@ err:
     other error
 */
 
-int QUICK_INDEX_MERGE_SELECT::read_keys_and_merge()
+int read_keys_and_merge_scans(THD *thd,
+                              TABLE *head,
+                              List<QUICK_RANGE_SELECT> quick_selects,
+                              QUICK_RANGE_SELECT *pk_quick_select,
+                              READ_RECORD *read_record,
+                              bool intersection,
+                              key_map *filtered_scans,
+                              Unique **unique_ptr)
 {
   List_iterator_fast<QUICK_RANGE_SELECT> cur_quick_it(quick_selects);
   QUICK_RANGE_SELECT* cur_quick;
   int result;
+  Unique *unique= *unique_ptr;
   handler *file= head->file;
-  DBUG_ENTER("QUICK_INDEX_MERGE_SELECT::read_keys_and_merge");
+  bool with_cpk_filter= pk_quick_select != NULL;
+
+  DBUG_ENTER("read_keys_and_merge");
 
   /* We're going to just read rowids. */
   if (!head->key_read)
@@ -8152,6 +10430,7 @@ int QUICK_INDEX_MERGE_SELECT::read_keys_and_merge()
 
   cur_quick_it.rewind();
   cur_quick= cur_quick_it++;
+  bool first_quick= TRUE;
   DBUG_ASSERT(cur_quick != 0);
   
   /*
@@ -8169,9 +10448,11 @@ int QUICK_INDEX_MERGE_SELECT::read_keys_and_merge()
 
     unique= new Unique(refpos_order_cmp, (void *)file,
                        file->ref_length,
-                       thd->variables.sortbuff_size);
+                       thd->variables.sortbuff_size,
+		       intersection ? quick_selects.elements : 0);                     
     if (!unique)
       goto err;
+    *unique_ptr= unique;
   }
   else
     unique->reset();
@@ -8183,6 +10464,14 @@ int QUICK_INDEX_MERGE_SELECT::read_keys_and_merge()
   {
     while ((result= cur_quick->get_next()) == HA_ERR_END_OF_FILE)
     {
+      if (intersection)
+        with_cpk_filter= filtered_scans->is_set(cur_quick->index);
+      if (first_quick)
+      {
+        first_quick= FALSE;
+        if (intersection && unique->is_in_memory())
+          unique->close_for_expansion();
+      }
       cur_quick->range_end();
       cur_quick= cur_quick_it++;
       if (!cur_quick)
@@ -8207,8 +10496,8 @@ int QUICK_INDEX_MERGE_SELECT::read_keys_and_merge()
     if (thd->killed)
       goto err;
 
-    /* skip row if it will be retrieved by clustered PK scan */
-    if (pk_quick_select && pk_quick_select->row_in_ranges())
+    if (with_cpk_filter &&
+        pk_quick_select->row_in_ranges() != intersection )
       continue;
 
     cur_quick->file->position(cur_quick->record);
@@ -8222,14 +10511,13 @@ int QUICK_INDEX_MERGE_SELECT::read_keys_and_merge()
     sequence.
   */
   result= unique->get(head);
-  doing_pk_scan= FALSE;
   /*
-    index_merge currently doesn't support "using index" at all
+    index merge currently doesn't support "using index" at all
   */
   head->disable_keyread();
-  if (init_read_record(&read_record, thd, head, (SQL_SELECT*) 0, 1 , 1, TRUE))
+  if (init_read_record(read_record, thd, head, (SQL_SELECT*) 0, 1 , 1, TRUE))
     result= 1;
-  DBUG_RETURN(result);
+ DBUG_RETURN(result);
 
 err:
   head->disable_keyread();
@@ -8237,6 +10525,17 @@ err:
 }
 
 
+int QUICK_INDEX_MERGE_SELECT::read_keys_and_merge()
+
+{
+  int result;
+  DBUG_ENTER("QUICK_INDEX_MERGE_SELECT::read_keys_and_merge");
+  result= read_keys_and_merge_scans(thd, head, quick_selects, pk_quick_select,
+                                    &read_record, FALSE, NULL, &unique);
+  doing_pk_scan= FALSE;
+  DBUG_RETURN(result);
+}
+
 /*
   Get next row for index_merge.
   NOTES
@@ -8273,6 +10572,32 @@ int QUICK_INDEX_MERGE_SELECT::get_next()
   DBUG_RETURN(result);
 }
 
+int QUICK_INDEX_INTERSECT_SELECT::read_keys_and_merge()
+
+{
+  int result;
+  DBUG_ENTER("QUICK_INDEX_INTERSECT_SELECT::read_keys_and_merge");
+  result= read_keys_and_merge_scans(thd, head, quick_selects, pk_quick_select,
+                                    &read_record, TRUE, &filtered_scans,
+                                    &unique);
+  DBUG_RETURN(result);
+}
+
+int QUICK_INDEX_INTERSECT_SELECT::get_next()
+{
+  int result;
+  DBUG_ENTER("QUICK_INDEX_INTERSECT_SELECT::get_next");
+
+  if ((result= read_record.read_record(&read_record)) == -1)
+  {
+    result= HA_ERR_END_OF_FILE;
+    end_read_record(&read_record);
+    free_io_cache(head);
+  }
+
+  DBUG_RETURN(result);
+}
+
 
 /*
   Retrieve next record.
@@ -8433,12 +10758,12 @@ int QUICK_ROR_UNION_SELECT::get_next()
       {
         if (error != HA_ERR_END_OF_FILE)
           DBUG_RETURN(error);
-        queue_remove(&queue, 0);
+        queue_remove_top(&queue);
       }
       else
       {
         quick->save_last_pos();
-        queue_replaced(&queue);
+        queue_replace_top(&queue);
       }
 
       if (!have_prev_rowid)
@@ -8463,12 +10788,12 @@ int QUICK_ROR_UNION_SELECT::get_next()
 
 int QUICK_RANGE_SELECT::reset()
 {
-  uint  mrange_bufsiz;
+  uint  buf_size;
   uchar *mrange_buff;
+  int   error;
+  HANDLER_BUFFER empty_buf;
   DBUG_ENTER("QUICK_RANGE_SELECT::reset");
-  next=0;
   last_range= NULL;
-  in_range= FALSE;
   cur_range= (QUICK_RANGE**) ranges.buffer;
 
   if (file->inited == handler::NONE)
@@ -8479,69 +10804,43 @@ int QUICK_RANGE_SELECT::reset()
         DBUG_RETURN(error);
   }
 
-  /* Do not allocate the buffers twice. */
-  if (multi_range_length)
-  {
-    DBUG_ASSERT(multi_range_length == min(multi_range_count, ranges.elements));
-    DBUG_RETURN(0);
-  }
-
-  /* Allocate the ranges array. */
-  DBUG_ASSERT(ranges.elements);
-  multi_range_length= min(multi_range_count, ranges.elements);
-  DBUG_ASSERT(multi_range_length > 0);
-  while (multi_range_length && ! (multi_range= (KEY_MULTI_RANGE*)
-                                  my_malloc(multi_range_length *
-                                            sizeof(KEY_MULTI_RANGE),
-                                            MYF(MY_WME))))
+  /* Allocate buffer if we need one but haven't allocated it yet */
+  if (mrr_buf_size && !mrr_buf_desc)
   {
-    /* Try to shrink the buffers until it is 0. */
-    multi_range_length/= 2;
-  }
-  if (! multi_range)
-  {
-    multi_range_length= 0;
-    DBUG_RETURN(HA_ERR_OUT_OF_MEM);
-  }
-
-  /* Allocate the handler buffer if necessary.  */
-  if (file->ha_table_flags() & HA_NEED_READ_RANGE_BUFFER)
-  {
-    mrange_bufsiz= min(multi_range_bufsiz,
-                       ((uint)QUICK_SELECT_I::records + 1)* head->s->reclength);
-
-    while (mrange_bufsiz &&
-           ! my_multi_malloc(MYF(MY_WME),
-                             &multi_range_buff,
-                             (uint) sizeof(*multi_range_buff),
-                             &mrange_buff, (uint) mrange_bufsiz,
-                             NullS))
+    buf_size= mrr_buf_size;
+    while (buf_size && !my_multi_malloc(MYF(MY_WME),
+                                        &mrr_buf_desc, sizeof(*mrr_buf_desc),
+                                        &mrange_buff, buf_size,
+                                        NullS))
     {
       /* Try to shrink the buffers until both are 0. */
-      mrange_bufsiz/= 2;
+      buf_size/= 2;
     }
-    if (! multi_range_buff)
-    {
-      my_free((char*) multi_range, MYF(0));
-      multi_range= NULL;
-      multi_range_length= 0;
+    if (!mrr_buf_desc)
       DBUG_RETURN(HA_ERR_OUT_OF_MEM);
-    }
 
     /* Initialize the handler buffer. */
-    multi_range_buff->buffer= mrange_buff;
-    multi_range_buff->buffer_end= mrange_buff + mrange_bufsiz;
-    multi_range_buff->end_of_used_area= mrange_buff;
-#ifdef HAVE_valgrind
+    mrr_buf_desc->buffer= mrange_buff;
+    mrr_buf_desc->buffer_end= mrange_buff + buf_size;
+    mrr_buf_desc->end_of_used_area= mrange_buff;
+#ifdef HAVE_purify
     /*
       We need this until ndb will use the buffer efficiently
       (Now ndb stores  complete row in here, instead of only the used fields
       which gives us valgrind warnings in compare_record[])
     */
-    bzero((char*) mrange_buff, mrange_bufsiz);
+    bzero((char*) mrange_buff, buf_size);
 #endif
   }
-  DBUG_RETURN(0);
+
+  if (!mrr_buf_desc)
+    empty_buf.buffer= empty_buf.buffer_end= empty_buf.end_of_used_area= NULL;
+ 
+  RANGE_SEQ_IF seq_funcs= {NULL, quick_range_seq_init, quick_range_seq_next, 0, 0};
+  error= file->multi_range_read_init(&seq_funcs, (void*)this, ranges.elements,
+                                     mrr_flags, mrr_buf_desc? mrr_buf_desc: 
+                                                              &empty_buf);
+  DBUG_RETURN(error);
 }
 
 
@@ -8562,13 +10861,8 @@ int QUICK_RANGE_SELECT::reset()
 
 int QUICK_RANGE_SELECT::get_next()
 {
-  int             result;
-  KEY_MULTI_RANGE *mrange;
+  range_id_t dummy;
   DBUG_ENTER("QUICK_RANGE_SELECT::get_next");
-  DBUG_ASSERT(multi_range_length && multi_range &&
-              (cur_range >= (QUICK_RANGE**) ranges.buffer) &&
-              (cur_range <= (QUICK_RANGE**) ranges.buffer + ranges.elements));
-
   if (in_ror_merged_scan)
   {
     /*
@@ -8578,46 +10872,8 @@ int QUICK_RANGE_SELECT::get_next()
     head->column_bitmaps_set_no_signal(&column_bitmap, &column_bitmap);
   }
 
-  for (;;)
-  {
-    if (in_range)
-    {
-      /* We did already start to read this key. */
-      result= file->read_multi_range_next(&mrange);
-      if (result != HA_ERR_END_OF_FILE)
-        goto end;
-    }
-
-    uint count= min(multi_range_length, ranges.elements -
-                    (cur_range - (QUICK_RANGE**) ranges.buffer));
-    if (count == 0)
-    {
-      /* Ranges have already been used up before. None is left for read. */
-      in_range= FALSE;
-      if (in_ror_merged_scan)
-        head->column_bitmaps_set_no_signal(save_read_set, save_write_set);
-      DBUG_RETURN(HA_ERR_END_OF_FILE);
-    }
-    KEY_MULTI_RANGE *mrange_slot, *mrange_end;
-    for (mrange_slot= multi_range, mrange_end= mrange_slot+count;
-         mrange_slot < mrange_end;
-         mrange_slot++)
-    {
-      last_range= *(cur_range++);
-      last_range->make_min_endpoint(&mrange_slot->start_key);
-      last_range->make_max_endpoint(&mrange_slot->end_key);
-      mrange_slot->range_flag= last_range->flag;
-    }
+  int result= file->multi_range_read_next(&dummy);
 
-    result= file->read_multi_range_first(&mrange, multi_range, count,
-                                         sorted, multi_range_buff);
-    if (result != HA_ERR_END_OF_FILE)
-      goto end;
-    in_range= FALSE; /* No matching rows; go to next set of ranges. */
-  }
-
-end:
-  in_range= ! result;
   if (in_ror_merged_scan)
   {
     /* Restore bitmaps set on entry */
@@ -8626,6 +10882,7 @@ end:
   DBUG_RETURN(result);
 }
 
+
 /*
   Get the next record with a different prefix.
 
@@ -8664,9 +10921,7 @@ int QUICK_RANGE_SELECT::get_next_prefix(uint prefix_length,
     int result;
     if (last_range)
     {
-      /*
-        Read the next record in the same range with prefix after cur_prefix.
-      */
+      /* Read the next record in the same range with prefix after cur_prefix. */
       DBUG_ASSERT(cur_prefix != NULL);
       result= file->ha_index_read_map(record, cur_prefix, keypart_map,
                                       HA_READ_AFTER_KEY);
@@ -8795,7 +11050,8 @@ bool QUICK_RANGE_SELECT::row_in_ranges()
  */
 
 QUICK_SELECT_DESC::QUICK_SELECT_DESC(QUICK_RANGE_SELECT *q,
-                                     uint used_key_parts_arg)
+                                     uint used_key_parts_arg,
+                                     bool *create_err)
  :QUICK_RANGE_SELECT(*q), rev_it(rev_ranges),
   used_key_parts (used_key_parts_arg)
 {
@@ -8804,9 +11060,9 @@ QUICK_SELECT_DESC::QUICK_SELECT_DESC(QUICK_RANGE_SELECT *q,
     Use default MRR implementation for reverse scans. No table engine
     currently can do an MRR scan with output in reverse index order.
   */
-  multi_range_length= 0;
-  multi_range= NULL;
-  multi_range_buff= NULL;
+  mrr_buf_desc= NULL;
+  mrr_flags |= HA_MRR_USE_DEFAULT_IMPL;
+  mrr_buf_size= 0;
 
   QUICK_RANGE **pr= (QUICK_RANGE**)ranges.buffer;
   QUICK_RANGE **end_range= pr + ranges.elements;
@@ -8915,6 +11171,7 @@ int QUICK_SELECT_DESC::get_next()
 /*
   Compare if found key is over max-value
   Returns 0 if key <= range->max_key
+  TODO: Figure out why can't this function be as simple as cmp_prev(). 
 */
 
 int QUICK_RANGE_SELECT::cmp_next(QUICK_RANGE *range_arg)
@@ -8984,30 +11241,53 @@ bool QUICK_SELECT_DESC::range_reads_after_key(QUICK_RANGE *range_arg)
 }
 
 
-void QUICK_RANGE_SELECT::add_info_string(String *str)
+void QUICK_SELECT_I::add_key_name(String *str, bool *first)
 {
   KEY *key_info= head->key_info + index;
+
+  if (*first)
+    *first= FALSE;
+  else
+    str->append(',');
   str->append(key_info->name);
 }
+ 
+
+void QUICK_RANGE_SELECT::add_info_string(String *str)
+{
+  bool first= TRUE;
+  
+  add_key_name(str, &first);
+}
 
 void QUICK_INDEX_MERGE_SELECT::add_info_string(String *str)
 {
   QUICK_RANGE_SELECT *quick;
   bool first= TRUE;
   List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
+
   str->append(STRING_WITH_LEN("sort_union("));
   while ((quick= it++))
   {
-    if (!first)
-      str->append(',');
-    else
-      first= FALSE;
-    quick->add_info_string(str);
+    quick->add_key_name(str, &first);
   }
   if (pk_quick_select)
+    pk_quick_select->add_key_name(str, &first);
+  str->append(')');
+}
+
+void QUICK_INDEX_INTERSECT_SELECT::add_info_string(String *str)
+{
+  QUICK_RANGE_SELECT *quick;
+  bool first= TRUE;
+  List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
+
+  str->append(STRING_WITH_LEN("sort_intersect("));
+  if (pk_quick_select)
+    pk_quick_select->add_key_name(str, &first);
+  while ((quick= it++))
   {
-    str->append(',');
-    pk_quick_select->add_info_string(str);
+    quick->add_key_name(str, &first);
   }
   str->append(')');
 }
@@ -9017,130 +11297,125 @@ void QUICK_ROR_INTERSECT_SELECT::add_info_string(String *str)
   bool first= TRUE;
   QUICK_SELECT_WITH_RECORD *qr;
   List_iterator_fast<QUICK_SELECT_WITH_RECORD> it(quick_selects);
+
   str->append(STRING_WITH_LEN("intersect("));
   while ((qr= it++))
   {
-    KEY *key_info= head->key_info + qr->quick->index;
-    if (!first)
-      str->append(',');
-    else
-      first= FALSE;
-    str->append(key_info->name);
+    qr->quick->add_key_name(str, &first);
   }
   if (cpk_quick)
-  {
-    KEY *key_info= head->key_info + cpk_quick->index;
-    str->append(',');
-    str->append(key_info->name);
-  }
+    cpk_quick->add_key_name(str, &first);
   str->append(')');
 }
 
+
 void QUICK_ROR_UNION_SELECT::add_info_string(String *str)
 {
-  bool first= TRUE;
   QUICK_SELECT_I *quick;
+  bool first= TRUE;
   List_iterator_fast<QUICK_SELECT_I> it(quick_selects);
+
   str->append(STRING_WITH_LEN("union("));
   while ((quick= it++))
   {
-    if (!first)
-      str->append(',');
-    else
+    if (first)
       first= FALSE;
+    else
+      str->append(',');
     quick->add_info_string(str);
   }
   str->append(')');
 }
 
 
-void QUICK_RANGE_SELECT::add_keys_and_lengths(String *key_names,
-                                              String *used_lengths)
+void QUICK_SELECT_I::add_key_and_length(String *key_names,
+                                        String *used_lengths,
+                                        bool *first)
 {
   char buf[64];
   uint length;
   KEY *key_info= head->key_info + index;
+
+  if (*first)
+    *first= FALSE;
+  else
+  {
+    key_names->append(',');
+    used_lengths->append(',');
+  }
   key_names->append(key_info->name);
   length= longlong10_to_str(max_used_key_length, buf, 10) - buf;
   used_lengths->append(buf, length);
 }
 
+
+void QUICK_RANGE_SELECT::add_keys_and_lengths(String *key_names,
+                                              String *used_lengths)
+{
+  bool first= TRUE;
+
+  add_key_and_length(key_names, used_lengths, &first);
+}
+
 void QUICK_INDEX_MERGE_SELECT::add_keys_and_lengths(String *key_names,
                                                     String *used_lengths)
 {
-  char buf[64];
-  uint length;
-  bool first= TRUE;
   QUICK_RANGE_SELECT *quick;
+  bool first= TRUE;
 
   List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
+
   while ((quick= it++))
   {
-    if (first)
-      first= FALSE;
-    else
-    {
-      key_names->append(',');
-      used_lengths->append(',');
-    }
-
-    KEY *key_info= head->key_info + quick->index;
-    key_names->append(key_info->name);
-    length= longlong10_to_str(quick->max_used_key_length, buf, 10) - buf;
-    used_lengths->append(buf, length);
+    quick->add_key_and_length(key_names, used_lengths, &first);
   }
+
+  if (pk_quick_select)
+    pk_quick_select->add_key_and_length(key_names, used_lengths, &first);
+}
+
+
+void QUICK_INDEX_INTERSECT_SELECT::add_keys_and_lengths(String *key_names,
+                                                        String *used_lengths)
+{
+  QUICK_RANGE_SELECT *quick;
+  bool first= TRUE;
+
+  List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
+
   if (pk_quick_select)
+    pk_quick_select->add_key_and_length(key_names, used_lengths, &first);
+
+  while ((quick= it++))
   {
-    KEY *key_info= head->key_info + pk_quick_select->index;
-    key_names->append(',');
-    key_names->append(key_info->name);
-    length= (longlong10_to_str(pk_quick_select->max_used_key_length, buf, 10)
-             - buf);
-    used_lengths->append(',');
-    used_lengths->append(buf, length);
+    quick->add_key_and_length(key_names, used_lengths, &first);
   }
 }
 
 void QUICK_ROR_INTERSECT_SELECT::add_keys_and_lengths(String *key_names,
                                                       String *used_lengths)
 {
-  char buf[64];
-  uint length;
-  bool first= TRUE;
   QUICK_SELECT_WITH_RECORD *qr;
+  bool first= TRUE;
+
   List_iterator_fast<QUICK_SELECT_WITH_RECORD> it(quick_selects);
+
   while ((qr= it++))
   {
-    KEY *key_info= head->key_info + qr->quick->index;
-    if (first)
-      first= FALSE;
-    else
-    {
-      key_names->append(',');
-      used_lengths->append(',');
-    }
-    key_names->append(key_info->name);
-    length= longlong10_to_str(qr->quick->max_used_key_length, buf, 10) - buf;
-    used_lengths->append(buf, length);
+    qr->quick->add_key_and_length(key_names, used_lengths, &first);
   }
-
   if (cpk_quick)
-  {
-    KEY *key_info= head->key_info + cpk_quick->index;
-    key_names->append(',');
-    key_names->append(key_info->name);
-    length= longlong10_to_str(cpk_quick->max_used_key_length, buf, 10) - buf;
-    used_lengths->append(',');
-    used_lengths->append(buf, length);
-  }
+    cpk_quick->add_key_and_length(key_names, used_lengths, &first);
 }
 
 void QUICK_ROR_UNION_SELECT::add_keys_and_lengths(String *key_names,
                                                   String *used_lengths)
 {
-  bool first= TRUE;
   QUICK_SELECT_I *quick;
+  bool first= TRUE;
+
   List_iterator_fast<QUICK_SELECT_I> it(quick_selects);
+
   while ((quick= it++))
   {
     if (first)
@@ -9169,7 +11444,7 @@ static bool get_constant_key_infix(KEY *index_info, SEL_ARG *index_range_tree,
                        uchar *key_infix, uint *key_infix_len,
                        KEY_PART_INFO **first_non_infix_part);
 static bool
-check_group_min_max_predicates(COND *cond, Item_field *min_max_arg_item,
+check_group_min_max_predicates(Item *cond, Item_field *min_max_arg_item,
                                Field::imagetype image_type);
 
 static void
@@ -9335,7 +11610,7 @@ get_best_group_min_max(PARAM *param, SEL_TREE *tree)
   /* Perform few 'cheap' tests whether this access method is applicable. */
   if (!join)
     DBUG_RETURN(NULL);        /* This is not a select statement. */
-  if ((join->tables != 1) ||  /* The query must reference one table. */
+  if ((join->table_count != 1) ||  /* The query must reference one table. */
       ((!join->group_list) && /* Neither GROUP BY nor a DISTINCT query. */
        (!join->select_distinct)) ||
       (join->select_lex->olap == ROLLUP_TYPE)) /* Check (B3) for ROLLUP */
@@ -9649,8 +11924,14 @@ get_best_group_min_max(PARAM *param, SEL_TREE *tree)
       cur_index_tree= get_index_range_tree(cur_index, tree, param,
                                            &cur_param_idx);
       /* Check if this range tree can be used for prefix retrieval. */
+      COST_VECT dummy_cost;
+      uint mrr_flags= HA_MRR_USE_DEFAULT_IMPL;
+      uint mrr_bufsize=0;
       cur_quick_prefix_records= check_quick_select(param, cur_param_idx,
-                                                    cur_index_tree, TRUE);
+                                                   FALSE /*don't care*/,
+                                                   cur_index_tree, TRUE,
+                                                   &mrr_flags, &mrr_bufsize,
+                                                   &dummy_cost);
     }
     cost_group_min_max(table, cur_index_info, cur_used_key_parts,
                        cur_group_key_parts, tree, cur_index_tree,
@@ -9739,14 +12020,14 @@ get_best_group_min_max(PARAM *param, SEL_TREE *tree)
 */
 
 static bool
-check_group_min_max_predicates(COND *cond, Item_field *min_max_arg_item,
+check_group_min_max_predicates(Item *cond, Item_field *min_max_arg_item,
                                Field::imagetype image_type)
 {
   DBUG_ENTER("check_group_min_max_predicates");
   DBUG_ASSERT(cond && min_max_arg_item);
 
   cond= cond->real_item();
-  Item::Type cond_type= cond->type();
+  Item::Type cond_type= cond->real_type();
   if (cond_type == Item::COND_ITEM) /* 'AND' or 'OR' */
   {
     DBUG_PRINT("info", ("Analyzing: %s", ((Item_func*) cond)->func_name()));
@@ -9762,16 +12043,27 @@ check_group_min_max_predicates(COND *cond, Item_field *min_max_arg_item,
   }
 
   /*
-    TODO:
-    This is a very crude fix to handle sub-selects in the WHERE clause
-    (Item_subselect objects). With the test below we rule out from the
-    optimization all queries with subselects in the WHERE clause. What has to
-    be done, is that here we should analyze whether the subselect references
-    the MIN/MAX argument field, and disallow the optimization only if this is
-    so.
+    Disallow loose index scan if the MIN/MAX argument field is referenced by
+    a subquery in the WHERE clause.
   */
+
   if (cond_type == Item::SUBSELECT_ITEM)
-    DBUG_RETURN(FALSE);
+  {
+    Item_subselect *subs_cond= (Item_subselect*) cond;
+    if (subs_cond->is_correlated)
+    {
+      DBUG_ASSERT(subs_cond->upper_refs.elements > 0);
+      List_iterator_fast<Item_subselect::Ref_to_outside>
+        li(subs_cond->upper_refs);
+      Item_subselect::Ref_to_outside *dep;
+      while ((dep= li++))
+      {
+        if (dep->item->eq(min_max_arg_item, FALSE))
+          DBUG_RETURN(FALSE);
+      }
+    }
+    DBUG_RETURN(TRUE);
+  }
 
   /*
     Condition of the form 'field' is equivalent to 'field <> 0' and thus
@@ -9918,13 +12210,14 @@ get_constant_key_infix(KEY *index_info, SEL_ARG *index_range_tree,
       Find the range tree for the current keypart. We assume that
       index_range_tree points to the leftmost keypart in the index.
     */
-    for (cur_range= index_range_tree; cur_range;
+    for (cur_range= index_range_tree; 
+         cur_range && cur_range->type == SEL_ARG::KEY_RANGE;
          cur_range= cur_range->next_key_part)
     {
       if (cur_range->field->eq(cur_part->field))
         break;
     }
-    if (!cur_range)
+    if (!cur_range || cur_range->type != SEL_ARG::KEY_RANGE)
     {
       if (min_max_arg_part)
         return FALSE; /* The current keypart has no range predicates at all. */
@@ -10238,6 +12531,7 @@ TRP_GROUP_MIN_MAX::make_quick(PARAM *param, bool retrieve_full_rows,
       /* Make a QUICK_RANGE_SELECT to be used for group prefix retrieval. */
       quick->quick_prefix_select= get_quick_select(param, param_idx,
                                                    index_tree,
+                                                   HA_MRR_USE_DEFAULT_IMPL, 0,
                                                    &quick->alloc);
 
     /*
@@ -11294,11 +13588,9 @@ void QUICK_GROUP_MIN_MAX_SELECT::update_max_result()
 void QUICK_GROUP_MIN_MAX_SELECT::add_keys_and_lengths(String *key_names,
                                                       String *used_lengths)
 {
-  char buf[64];
-  uint length;
-  key_names->append(index_info->name);
-  length= longlong10_to_str(max_used_key_length, buf, 10) - buf;
-  used_lengths->append(buf, length);
+  bool first= TRUE;
+
+  add_key_and_length(key_names, used_lengths, &first);
 }
 
 
@@ -11330,7 +13622,7 @@ static void print_sel_tree(PARAM *param, SEL_TREE *tree, key_map *tree_map,
     tmp.append(STRING_WITH_LEN("(empty)"));
 
   DBUG_PRINT("info", ("SEL_TREE: 0x%lx (%s)  scans: %s", (long) tree, msg,
-                      tmp.c_ptr()));
+                      tmp.c_ptr_safe()));
 
   DBUG_VOID_RETURN;
 }
@@ -11357,6 +13649,7 @@ static void print_ror_scans_arr(TABLE *table, const char *msg,
   DBUG_VOID_RETURN;
 }
 
+
 /*****************************************************************************
 ** Print a quick range for debugging
 ** TODO:
@@ -11369,7 +13662,6 @@ print_key(KEY_PART *key_part, const uchar *key, uint used_length)
 {
   char buff[1024];
   const uchar *key_end= key+used_length;
-  String tmp(buff,sizeof(buff),&my_charset_bin);
   uint store_length;
   TABLE *table= key_part->field->table;
   my_bitmap_map *old_sets[2];
@@ -11378,6 +13670,7 @@ print_key(KEY_PART *key_part, const uchar *key, uint used_length)
 
   for (; key < key_end; key+=store_length, key_part++)
   {
+    String tmp(buff,sizeof(buff),&my_charset_bin);
     Field *field=      key_part->field;
     store_length= key_part->store_length;
 
@@ -11465,7 +13758,7 @@ void QUICK_RANGE_SELECT::dbug_dump(int indent, bool verbose)
   /* purecov: end */    
 }
 
-void QUICK_INDEX_MERGE_SELECT::dbug_dump(int indent, bool verbose)
+void QUICK_INDEX_SORT_SELECT::dbug_dump(int indent, bool verbose)
 {
   List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
   QUICK_RANGE_SELECT *quick;
diff --git a/sql/opt_range.h b/sql/opt_range.h
index eaaf4a2a2a8..57a62bbc143 100644
--- a/sql/opt_range.h
+++ b/sql/opt_range.h
@@ -195,13 +195,16 @@ class QUICK_RANGE :public Sql_alloc {
 
   4. Delete the select:
     delete quick;
-
+  
+  NOTE 
+    quick select doesn't use Sql_alloc/MEM_ROOT allocation because "range
+    checked for each record" functionality may create/destroy
+    O(#records_in_some_table) quick selects during query execution.
 */
 
 class QUICK_SELECT_I
 {
 public:
-  bool sorted;
   ha_rows records;  /* estimate of # of records to be retrieved */
   double  read_time; /* time to perform this retrieval          */
   TABLE   *head;
@@ -274,14 +277,21 @@ public:
   virtual bool reverse_sorted() = 0;
   virtual bool unique_key_range() { return false; }
 
+  /*
+    Request that this quick select produces sorted output. Not all quick
+    selects can do it, the caller is responsible for calling this function
+    only for those quick selects that can.
+  */
+  virtual void need_sorted_output() = 0;
   enum {
     QS_TYPE_RANGE = 0,
-    QS_TYPE_INDEX_MERGE = 1,
-    QS_TYPE_RANGE_DESC = 2,
-    QS_TYPE_FULLTEXT   = 3,
-    QS_TYPE_ROR_INTERSECT = 4,
-    QS_TYPE_ROR_UNION = 5,
-    QS_TYPE_GROUP_MIN_MAX = 6
+    QS_TYPE_INDEX_INTERSECT = 1,
+    QS_TYPE_INDEX_MERGE = 2,
+    QS_TYPE_RANGE_DESC = 3,
+    QS_TYPE_FULLTEXT   = 4,
+    QS_TYPE_ROR_INTERSECT = 5,
+    QS_TYPE_ROR_UNION = 6,
+    QS_TYPE_GROUP_MIN_MAX = 7
   };
 
   /* Get type of this quick select - one of the QS_TYPE_* values */
@@ -307,6 +317,10 @@ public:
     Save ROWID of last retrieved row in file->ref. This used in ROR-merging.
   */
   virtual void save_last_pos(){};
+  
+  void add_key_and_length(String *key_names,
+                          String *used_lengths,
+                          bool *first);
 
   /*
     Append comma-separated list of keys this quick select uses to key_names;
@@ -316,13 +330,16 @@ public:
   virtual void add_keys_and_lengths(String *key_names,
                                     String *used_lengths)=0;
 
+  void add_key_name(String *str, bool *first);
+
   /*
     Append text representation of quick select structure (what and how is
     merged) to str. The result is added to "Extra" field in EXPLAIN output.
     This function is implemented only by quick selects that merge other quick
     selects output and/or can produce output suitable for merging.
   */
-  virtual void add_info_string(String *str) {};
+  virtual void add_info_string(String *str) {}
+
   /*
     Return 1 if any index used by this quick select
     uses field which is marked in passed bitmap.
@@ -339,6 +356,12 @@ public:
     Table record buffer used by this quick select.
   */
   uchar    *record;
+
+  virtual void replace_handler(handler *new_file)
+  {
+    DBUG_ASSERT(0); /* Only supported in QUICK_RANGE_SELECT */
+  }
+
 #ifndef DBUG_OFF
   /*
     Print quick select information to DBUG_FILE. Caller is responsible
@@ -353,6 +376,22 @@ struct st_qsel_param;
 class PARAM;
 class SEL_ARG;
 
+
+/*
+  MRR range sequence, array<QUICK_RANGE> implementation: sequence traversal
+  context.
+*/
+typedef struct st_quick_range_seq_ctx
+{
+  QUICK_RANGE **first;
+  QUICK_RANGE **cur;
+  QUICK_RANGE **last;
+} QUICK_RANGE_SEQ_CTX;
+
+range_seq_t quick_range_seq_init(void *init_param, uint n_ranges, uint flags);
+bool quick_range_seq_next(range_seq_t rseq, KEY_MULTI_RANGE *range);
+
+
 /*
   Quick select that does a range scan on a single key. The records are
   returned in key order.
@@ -360,62 +399,47 @@ class SEL_ARG;
 class QUICK_RANGE_SELECT : public QUICK_SELECT_I
 {
 protected:
-  bool next,dont_free,in_ror_merged_scan;
   /* true if we enabled key only reads */
   bool doing_key_read;
-public:
-  int error;
-protected:
   handler *file;
-  /*
-    If true, this quick select has its "own" handler object which should be
-    closed no later then this quick select is deleted.
-  */
-  bool free_file;
-  bool in_range;
-  uint multi_range_count; /* copy from thd->variables.multi_range_count */
-  uint multi_range_length; /* the allocated length for the array */
-  uint multi_range_bufsiz; /* copy from thd->variables.read_rnd_buff_size */
-  KEY_MULTI_RANGE *multi_range; /* the multi-range array (allocated and
-                                       freed by QUICK_RANGE_SELECT) */
-  HANDLER_BUFFER *multi_range_buff; /* the handler buffer (allocated and
-                                       freed by QUICK_RANGE_SELECT) */
+
+  /* Members to deal with case when this quick select is a ROR-merged scan */
+  bool in_ror_merged_scan;
   MY_BITMAP column_bitmap, *save_read_set, *save_write_set;
+  bool free_file;   /* TRUE <=> this->file is "owned" by this quick select */
 
-  friend class TRP_ROR_INTERSECT;
-  friend
-  QUICK_RANGE_SELECT *get_quick_select_for_ref(THD *thd, TABLE *table,
-                                               struct st_table_ref *ref,
-                                               ha_rows records);
-  friend bool get_quick_keys(PARAM *param,
-                             QUICK_RANGE_SELECT *quick,KEY_PART *key,
-                             SEL_ARG *key_tree,
-                             uchar *min_key, uint min_key_flag,
-                             uchar *max_key, uint max_key_flag);
-  friend QUICK_RANGE_SELECT *get_quick_select(PARAM*,uint idx,
-                                              SEL_ARG *key_tree,
-                                              MEM_ROOT *alloc);
-  friend class QUICK_SELECT_DESC;
-  friend class QUICK_INDEX_MERGE_SELECT;
-  friend class QUICK_ROR_INTERSECT_SELECT;
-  friend class QUICK_GROUP_MIN_MAX_SELECT;
+  /* Range pointers to be used when not using MRR interface */
+  /* Members needed to use the MRR interface */
+  QUICK_RANGE_SEQ_CTX qr_traversal_ctx;
+public:
+  uint mrr_flags; /* Flags to be used with MRR interface */
+protected:
+  uint mrr_buf_size; /* copy from thd->variables.mrr_buff_size */  
+  HANDLER_BUFFER *mrr_buf_desc; /* the handler buffer */
 
+  /* Info about index we're scanning */
+  
   DYNAMIC_ARRAY ranges;     /* ordered array of range ptrs */
   QUICK_RANGE **cur_range;  /* current element in ranges  */
-
+  
   QUICK_RANGE *last_range;
+  
   KEY_PART *key_parts;
   KEY_PART_INFO *key_part_info;
+  
+  bool dont_free; /* Used by QUICK_SELECT_DESC */
+
   int cmp_next(QUICK_RANGE *range);
   int cmp_prev(QUICK_RANGE *range);
   bool row_in_ranges();
 public:
   MEM_ROOT alloc;
 
-  QUICK_RANGE_SELECT(THD *thd, TABLE *table,uint index_arg,bool no_alloc=0,
-                     MEM_ROOT *parent_alloc=NULL);
+  QUICK_RANGE_SELECT(THD *thd, TABLE *table,uint index_arg,bool no_alloc,
+                     MEM_ROOT *parent_alloc, bool *create_err);
   ~QUICK_RANGE_SELECT();
-
+  
+  void need_sorted_output();
   int init();
   int reset(void);
   int get_next();
@@ -433,8 +457,41 @@ public:
 #ifndef DBUG_OFF
   void dbug_dump(int indent, bool verbose);
 #endif
+  virtual void replace_handler(handler *new_file) { file= new_file; }
 private:
   /* Default copy ctor used by QUICK_SELECT_DESC */
+  friend class TRP_ROR_INTERSECT;
+  friend
+  QUICK_RANGE_SELECT *get_quick_select_for_ref(THD *thd, TABLE *table,
+                                               struct st_table_ref *ref,
+                                               ha_rows records);
+  friend bool get_quick_keys(PARAM *param, QUICK_RANGE_SELECT *quick, 
+                             KEY_PART *key, SEL_ARG *key_tree, 
+                             uchar *min_key, uint min_key_flag,
+                             uchar *max_key, uint max_key_flag);
+  friend QUICK_RANGE_SELECT *get_quick_select(PARAM*,uint idx,
+                                              SEL_ARG *key_tree,
+                                              uint mrr_flags,
+                                              uint mrr_buf_size,
+                                              MEM_ROOT *alloc);
+  friend class QUICK_SELECT_DESC;
+  friend class QUICK_INDEX_SORT_SELECT;
+  friend class QUICK_INDEX_MERGE_SELECT;
+  friend class QUICK_ROR_INTERSECT_SELECT;
+  friend class QUICK_INDEX_INTERSECT_SELECT;
+  friend class QUICK_GROUP_MIN_MAX_SELECT;
+  friend bool quick_range_seq_next(range_seq_t rseq, KEY_MULTI_RANGE *range);
+  friend range_seq_t quick_range_seq_init(void *init_param,
+                                          uint n_ranges, uint flags);
+  friend 
+  int read_keys_and_merge_scans(THD *thd, TABLE *head,
+                                List<QUICK_RANGE_SELECT> quick_selects,
+                                QUICK_RANGE_SELECT *pk_quick_select,
+                                READ_RECORD *read_record,
+                                bool intersection,
+                                key_map *filtered_scans,
+                                Unique **unique_ptr);
+
 };
 
 
@@ -442,48 +499,53 @@ class QUICK_RANGE_SELECT_GEOM: public QUICK_RANGE_SELECT
 {
 public:
   QUICK_RANGE_SELECT_GEOM(THD *thd, TABLE *table, uint index_arg,
-                          bool no_alloc, MEM_ROOT *parent_alloc)
-    :QUICK_RANGE_SELECT(thd, table, index_arg, no_alloc, parent_alloc)
+                          bool no_alloc, MEM_ROOT *parent_alloc, 
+                          bool *create_err)
+    :QUICK_RANGE_SELECT(thd, table, index_arg, no_alloc, parent_alloc,
+    create_err)
     {};
   virtual int get_next();
 };
 
 
 /*
-  QUICK_INDEX_MERGE_SELECT - index_merge access method quick select.
+  QUICK_INDEX_SORT_SELECT is the base class for the common functionality of:
+  - QUICK_INDEX_MERGE_SELECT, access based on multi-index merge/union 
+  - QUICK_INDEX_INTERSECT_SELECT, access based on  multi-index intersection 
+    
 
-    QUICK_INDEX_MERGE_SELECT uses
+    QUICK_INDEX_SORT_SELECT uses
      * QUICK_RANGE_SELECTs to get rows
-     * Unique class to remove duplicate rows
+     * Unique class
+       - to remove duplicate rows for QUICK_INDEX_MERGE_SELECT
+       - to intersect rows for QUICK_INDEX_INTERSECT_SELECT
 
   INDEX MERGE OPTIMIZER
-    Current implementation doesn't detect all cases where index_merge could
+    Current implementation doesn't detect all cases where index merge could
     be used, in particular:
-     * index_merge will never be used if range scan is possible (even if
-       range scan is more expensive)
 
-     * index_merge+'using index' is not supported (this the consequence of
-       the above restriction)
+     * index_merge+'using index' is not supported
 
      * If WHERE part contains complex nested AND and OR conditions, some ways
-       to retrieve rows using index_merge will not be considered. The choice
+       to retrieve rows using index merge will not be considered. The choice
        of read plan may depend on the order of conjuncts/disjuncts in WHERE
        part of the query, see comments near imerge_list_or_list and
        SEL_IMERGE::or_sel_tree_with_checks functions for details.
 
-     * There is no "index_merge_ref" method (but index_merge on non-first
+     * There is no "index_merge_ref" method (but index merge on non-first
        table in join is possible with 'range checked for each record').
 
-    See comments around SEL_IMERGE class and test_quick_select for more
-    details.
 
   ROW RETRIEVAL ALGORITHM
 
-    index_merge uses Unique class for duplicates removal.  index_merge takes
-    advantage of Clustered Primary Key (CPK) if the table has one.
-    The index_merge algorithm consists of two phases:
+    index merge/intersection uses Unique class for duplicates removal. 
+    index merge/intersection takes advantage of Clustered Primary Key (CPK)
+    if the table has one.
+    The index merge/intersection algorithm consists of two phases:
+
+    Phase 1 
+    (implemented by a QUICK_INDEX_MERGE_SELECT::read_keys_and_merge call):
 
-    Phase 1 (implemented in QUICK_INDEX_MERGE_SELECT::prepare_unique):
     prepare()
     {
       activate 'index only';
@@ -497,32 +559,32 @@ public:
       deactivate 'index only';
     }
 
-    Phase 2 (implemented as sequence of QUICK_INDEX_MERGE_SELECT::get_next
-    calls):
+    Phase 2 
+    (implemented as sequence of QUICK_INDEX_MERGE_SELECT::get_next calls):
 
     fetch()
     {
-      retrieve all rows from row pointers stored in Unique;
+      retrieve all rows from row pointers stored in Unique
+      (merging/intersecting them);
       free Unique;
-      retrieve all rows for CPK scan;
+      if (! intersection) 
+        retrieve all rows for CPK scan;
     }
 */
 
-class QUICK_INDEX_MERGE_SELECT : public QUICK_SELECT_I
+class QUICK_INDEX_SORT_SELECT : public QUICK_SELECT_I
 {
+protected:
   Unique *unique;
 public:
-  QUICK_INDEX_MERGE_SELECT(THD *thd, TABLE *table);
-  ~QUICK_INDEX_MERGE_SELECT();
+  QUICK_INDEX_SORT_SELECT(THD *thd, TABLE *table);
+  ~QUICK_INDEX_SORT_SELECT();
 
   int  init();
+  void need_sorted_output() { DBUG_ASSERT(0); /* Can't do it */ }
   int  reset(void);
-  int  get_next();
   bool reverse_sorted() { return false; }
   bool unique_key_range() { return false; }
-  int get_type() { return QS_TYPE_INDEX_MERGE; }
-  void add_keys_and_lengths(String *key_names, String *used_lengths);
-  void add_info_string(String *str);
   bool is_keys_used(const MY_BITMAP *fields);
 #ifndef DBUG_OFF
   void dbug_dump(int indent, bool verbose);
@@ -530,21 +592,53 @@ public:
 
   bool push_quick_back(QUICK_RANGE_SELECT *quick_sel_range);
 
-  /* range quick selects this index_merge read consists of */
+  /* range quick selects this index merge/intersect consists of */
   List<QUICK_RANGE_SELECT> quick_selects;
 
   /* quick select that uses clustered primary key (NULL if none) */
   QUICK_RANGE_SELECT* pk_quick_select;
 
+  MEM_ROOT alloc;
+  THD *thd;
+  virtual int read_keys_and_merge()= 0;
+  /* used to get rows collected in Unique */
+  READ_RECORD read_record;
+};
+
+
+
+class QUICK_INDEX_MERGE_SELECT : public QUICK_INDEX_SORT_SELECT
+{
+private:
   /* true if this select is currently doing a clustered PK scan */
   bool  doing_pk_scan;
+protected:
+  int read_keys_and_merge();
 
-  MEM_ROOT alloc;
-  THD *thd;
+public:
+  QUICK_INDEX_MERGE_SELECT(THD *thd, TABLE *table)
+    :QUICK_INDEX_SORT_SELECT(thd, table) {}
+
+  int get_next();
+  int get_type() { return QS_TYPE_INDEX_MERGE; }
+  void add_keys_and_lengths(String *key_names, String *used_lengths);
+  void add_info_string(String *str);
+};
+
+class QUICK_INDEX_INTERSECT_SELECT : public QUICK_INDEX_SORT_SELECT
+{
+protected:
   int read_keys_and_merge();
 
-  /* used to get rows collected in Unique */
-  READ_RECORD read_record;
+public:
+  QUICK_INDEX_INTERSECT_SELECT(THD *thd, TABLE *table)
+    :QUICK_INDEX_SORT_SELECT(thd, table) {}
+
+  key_map filtered_scans;
+  int get_next();
+  int get_type() { return QS_TYPE_INDEX_INTERSECT; }
+  void add_keys_and_lengths(String *key_names, String *used_lengths);
+  void add_info_string(String *str);
 };
 
 
@@ -575,6 +669,7 @@ public:
   ~QUICK_ROR_INTERSECT_SELECT();
 
   int  init();
+  void need_sorted_output() { DBUG_ASSERT(0); /* Can't do it */ }
   int  reset(void);
   int  get_next();
   bool reverse_sorted() { return false; }
@@ -637,6 +732,7 @@ public:
   ~QUICK_ROR_UNION_SELECT();
 
   int  init();
+  void need_sorted_output() { DBUG_ASSERT(0); /* Can't do it */ }
   int  reset(void);
   int  get_next();
   bool reverse_sorted() { return false; }
@@ -758,6 +854,7 @@ public:
   void adjust_prefix_ranges();
   bool alloc_buffers();
   int init();
+  void need_sorted_output() { /* always do it */ }
   int reset();
   int get_next();
   bool reverse_sorted() { return false; }
@@ -773,7 +870,8 @@ public:
 class QUICK_SELECT_DESC: public QUICK_RANGE_SELECT
 {
 public:
-  QUICK_SELECT_DESC(QUICK_RANGE_SELECT *q, uint used_key_parts);
+  QUICK_SELECT_DESC(QUICK_RANGE_SELECT *q, uint used_key_parts, 
+                    bool *create_err);
   int get_next();
   bool reverse_sorted() { return 1; }
   int get_type() { return QS_TYPE_RANGE_DESC; }
@@ -790,6 +888,13 @@ class SQL_SELECT :public Sql_alloc {
  public:
   QUICK_SELECT_I *quick;	// If quick-select used
   COND		*cond;		// where condition
+
+  /*
+    When using Index Condition Pushdown: condition that we've had before
+    extracting and pushing index condition.
+    In other cases, NULL.
+  */
+  Item *pre_idx_push_select_cond;
   TABLE	*head;
   IO_CACHE file;		// Positions to used records
   ha_rows records;		// Records in use if read from file
@@ -806,7 +911,7 @@ class SQL_SELECT :public Sql_alloc {
   {
     key_map tmp;
     tmp.set_all();
-    return test_quick_select(thd, tmp, 0, limit, force_quick_range) < 0;
+    return test_quick_select(thd, tmp, 0, limit, force_quick_range, FALSE) < 0;
   }
   /* 
     RETURN
@@ -822,21 +927,25 @@ class SQL_SELECT :public Sql_alloc {
     return rc;
   }
   int test_quick_select(THD *thd, key_map keys, table_map prev_tables,
-			ha_rows limit, bool force_quick_range);
+			ha_rows limit, bool force_quick_range, 
+                        bool ordered_output);
 };
 
 
-class FT_SELECT: public QUICK_RANGE_SELECT {
+class FT_SELECT: public QUICK_RANGE_SELECT 
+{
 public:
-  FT_SELECT(THD *thd, TABLE *table, uint key) :
-      QUICK_RANGE_SELECT (thd, table, key, 1) { VOID(init()); }
+  FT_SELECT(THD *thd, TABLE *table, uint key, bool *create_err) :
+      QUICK_RANGE_SELECT (thd, table, key, 1, NULL, create_err) 
+  { (void) init(); }
   ~FT_SELECT() { file->ft_end(); }
-  int init() { return error=file->ft_init(); }
+  int init() { return file->ft_init(); }
   int reset() { return 0; }
-  int get_next() { return error= file->ha_ft_read(record); }
+  int get_next() { return file->ha_ft_read(record); }
   int get_type() { return QS_TYPE_FULLTEXT; }
 };
 
+FT_SELECT *get_ft_select(THD *thd, TABLE *table, uint key);
 QUICK_RANGE_SELECT *get_quick_select_for_ref(THD *thd, TABLE *table,
                                              struct st_table_ref *ref,
                                              ha_rows records);
diff --git a/sql/opt_range_mrr.cc b/sql/opt_range_mrr.cc
new file mode 100644
index 00000000000..da6086d6cdc
--- /dev/null
+++ b/sql/opt_range_mrr.cc
@@ -0,0 +1,346 @@
+
+/****************************************************************************
+  MRR Range Sequence Interface implementation that walks a SEL_ARG* tree.
+ ****************************************************************************/
+
+/* MRR range sequence, SEL_ARG* implementation: stack entry */
+typedef struct st_range_seq_entry 
+{
+  /* 
+    Pointers in min and max keys. They point to right-after-end of key
+    images. The 0-th entry has these pointing to key tuple start.
+  */
+  uchar *min_key, *max_key;
+  
+  /* 
+    Flags, for {keypart0, keypart1, ... this_keypart} subtuple.
+    min_key_flag may have NULL_RANGE set.
+  */
+  uint min_key_flag, max_key_flag;
+  
+  /* Number of key parts */
+  uint min_key_parts, max_key_parts;
+  SEL_ARG *key_tree;
+} RANGE_SEQ_ENTRY;
+
+
+/*
+  MRR range sequence, SEL_ARG* implementation: SEL_ARG graph traversal context
+*/
+typedef struct st_sel_arg_range_seq
+{
+  uint keyno;      /* index of used tree in SEL_TREE structure */
+  uint real_keyno; /* Number of the index in tables */
+  PARAM *param;
+  SEL_ARG *start; /* Root node of the traversed SEL_ARG* graph */
+  
+  RANGE_SEQ_ENTRY stack[MAX_REF_PARTS];
+  int i; /* Index of last used element in the above array */
+  
+  bool at_start; /* TRUE <=> The traversal has just started */
+} SEL_ARG_RANGE_SEQ;
+
+
+/*
+  Range sequence interface, SEL_ARG* implementation: Initialize the traversal
+
+  SYNOPSIS
+    init()
+      init_params  SEL_ARG tree traversal context
+      n_ranges     [ignored] The number of ranges obtained 
+      flags        [ignored] HA_MRR_SINGLE_POINT, HA_MRR_FIXED_KEY
+
+  RETURN
+    Value of init_param
+*/
+
+range_seq_t sel_arg_range_seq_init(void *init_param, uint n_ranges, uint flags)
+{
+  SEL_ARG_RANGE_SEQ *seq= (SEL_ARG_RANGE_SEQ*)init_param;
+  seq->at_start= TRUE;
+  seq->stack[0].key_tree= NULL;
+  seq->stack[0].min_key= seq->param->min_key;
+  seq->stack[0].min_key_flag= 0;
+  seq->stack[0].min_key_parts= 0;
+
+  seq->stack[0].max_key= seq->param->max_key;
+  seq->stack[0].max_key_flag= 0;
+  seq->stack[0].max_key_parts= 0;
+  seq->i= 0;
+  return init_param;
+}
+
+
+static void step_down_to(SEL_ARG_RANGE_SEQ *arg, SEL_ARG *key_tree)
+{
+  RANGE_SEQ_ENTRY *cur= &arg->stack[arg->i+1];
+  RANGE_SEQ_ENTRY *prev= &arg->stack[arg->i];
+  
+  cur->key_tree= key_tree;
+  cur->min_key= prev->min_key;
+  cur->max_key= prev->max_key;
+  cur->min_key_parts= prev->min_key_parts;
+  cur->max_key_parts= prev->max_key_parts;
+
+  uint16 stor_length= arg->param->key[arg->keyno][key_tree->part].store_length;
+  cur->min_key_parts += key_tree->store_min(stor_length, &cur->min_key,
+                                            prev->min_key_flag);
+  cur->max_key_parts += key_tree->store_max(stor_length, &cur->max_key,
+                                            prev->max_key_flag);
+
+  cur->min_key_flag= prev->min_key_flag | key_tree->min_flag;
+  cur->max_key_flag= prev->max_key_flag | key_tree->max_flag;
+
+  if (key_tree->is_null_interval())
+    cur->min_key_flag |= NULL_RANGE;
+  (arg->i)++;
+}
+
+
+/*
+  Range sequence interface, SEL_ARG* implementation: get the next interval
+  
+  SYNOPSIS
+    sel_arg_range_seq_next()
+      rseq        Value returned from sel_arg_range_seq_init
+      range  OUT  Store information about the range here
+
+  DESCRIPTION
+    This is "get_next" function for Range sequence interface implementation
+    for SEL_ARG* tree.
+
+  IMPLEMENTATION
+    The traversal also updates those param members:
+      - is_ror_scan
+      - range_count
+      - max_key_part
+
+  RETURN
+    FALSE  Ok
+    TRUE   No more ranges in the sequence
+*/
+
+#if (_MSC_FULL_VER == 160030319)
+/*
+   Workaround Visual Studio 2010 RTM compiler backend bug, the function enters 
+   infinite loop.
+ */
+#pragma optimize("g", off)
+#endif
+
+bool sel_arg_range_seq_next(range_seq_t rseq, KEY_MULTI_RANGE *range)
+{
+  SEL_ARG *key_tree;
+  SEL_ARG_RANGE_SEQ *seq= (SEL_ARG_RANGE_SEQ*)rseq;
+  if (seq->at_start)
+  {
+    key_tree= seq->start;
+    seq->at_start= FALSE;
+    goto walk_up_n_right;
+  }
+
+  key_tree= seq->stack[seq->i].key_tree;
+  /* Ok, we're at some "full tuple" position in the tree */
+ 
+  /* Step down if we can */
+  if (key_tree->next && key_tree->next != &null_element)
+  {
+    //step down; (update the tuple, we'll step right and stay there)
+    seq->i--;
+    step_down_to(seq, key_tree->next);
+    key_tree= key_tree->next;
+    seq->param->is_ror_scan= FALSE;
+    goto walk_right_n_up;
+  }
+
+  /* Ok, can't step down, walk left until we can step down */
+  while (1)
+  {
+    if (seq->i == 1) // can't step left
+      return 1;
+    /* Step left */
+    seq->i--;
+    key_tree= seq->stack[seq->i].key_tree;
+
+    /* Step down if we can */
+    if (key_tree->next && key_tree->next != &null_element)
+    {
+      // Step down; update the tuple
+      seq->i--;
+      step_down_to(seq, key_tree->next);
+      key_tree= key_tree->next;
+      break;
+    }
+  }
+
+  /*
+    Ok, we've stepped down from the path to previous tuple.
+    Walk right-up while we can
+  */
+walk_right_n_up:
+  while (key_tree->next_key_part && key_tree->next_key_part != &null_element && 
+         key_tree->next_key_part->part == key_tree->part + 1 &&
+         key_tree->next_key_part->type == SEL_ARG::KEY_RANGE)
+  {
+    {
+      RANGE_SEQ_ENTRY *cur= &seq->stack[seq->i];
+      uint min_key_length= cur->min_key - seq->param->min_key;
+      uint max_key_length= cur->max_key - seq->param->max_key;
+      uint len= cur->min_key - cur[-1].min_key;
+      if (!(min_key_length == max_key_length &&
+            !memcmp(cur[-1].min_key, cur[-1].max_key, len) &&
+            !key_tree->min_flag && !key_tree->max_flag))
+      {
+        seq->param->is_ror_scan= FALSE;
+        if (!key_tree->min_flag)
+          cur->min_key_parts += 
+            key_tree->next_key_part->store_min_key(seq->param->key[seq->keyno],
+                                                   &cur->min_key,
+                                                   &cur->min_key_flag);
+        if (!key_tree->max_flag)
+          cur->max_key_parts += 
+            key_tree->next_key_part->store_max_key(seq->param->key[seq->keyno],
+                                                   &cur->max_key,
+                                                   &cur->max_key_flag);
+        break;
+      }
+    }
+  
+    /*
+      Ok, current atomic interval is in form "t.field=const" and there is
+      next_key_part interval. Step right, and walk up from there.
+    */
+    key_tree= key_tree->next_key_part;
+
+walk_up_n_right:
+    while (key_tree->prev && key_tree->prev != &null_element)
+    {
+      /* Step up */
+      key_tree= key_tree->prev;
+    }
+    step_down_to(seq, key_tree);
+  }
+
+  /* Ok got a tuple */
+  RANGE_SEQ_ENTRY *cur= &seq->stack[seq->i];
+  uint min_key_length= cur->min_key - seq->param->min_key;
+  
+  range->ptr= (char*)(intptr)(key_tree->part);
+  if (cur->min_key_flag & GEOM_FLAG)
+  {
+    range->range_flag= cur->min_key_flag;
+
+    /* Here minimum contains also function code bits, and maximum is +inf */
+    range->start_key.key=    seq->param->min_key;
+    range->start_key.length= min_key_length;
+    range->start_key.flag=  (ha_rkey_function) (cur->min_key_flag ^ GEOM_FLAG);
+  }
+  else
+  {
+    range->range_flag= cur->min_key_flag | cur->max_key_flag;
+    
+    range->start_key.key=    seq->param->min_key;
+    range->start_key.length= cur->min_key - seq->param->min_key;
+    range->start_key.keypart_map= make_prev_keypart_map(cur->min_key_parts);
+    range->start_key.flag= (cur->min_key_flag & NEAR_MIN ? HA_READ_AFTER_KEY : 
+                                                           HA_READ_KEY_EXACT);
+
+    range->end_key.key=    seq->param->max_key;
+    range->end_key.length= cur->max_key - seq->param->max_key;
+    range->end_key.flag= (cur->max_key_flag & NEAR_MAX ? HA_READ_BEFORE_KEY : 
+                                                         HA_READ_AFTER_KEY);
+    range->end_key.keypart_map= make_prev_keypart_map(cur->max_key_parts);
+
+    if (!(cur->min_key_flag & ~NULL_RANGE) && !cur->max_key_flag &&
+        (uint)key_tree->part+1 == seq->param->table->key_info[seq->real_keyno].key_parts &&
+        (seq->param->table->key_info[seq->real_keyno].flags & HA_NOSAME) &&
+        range->start_key.length == range->end_key.length &&
+        !memcmp(seq->param->min_key,seq->param->max_key,range->start_key.length))
+      range->range_flag= UNIQUE_RANGE | (cur->min_key_flag & NULL_RANGE);
+      
+    if (seq->param->is_ror_scan)
+    {
+      /*
+        If we get here, the condition on the key was converted to form
+        "(keyXpart1 = c1) AND ... AND (keyXpart{key_tree->part - 1} = cN) AND
+          somecond(keyXpart{key_tree->part})"
+        Check if
+          somecond is "keyXpart{key_tree->part} = const" and
+          uncovered "tail" of KeyX parts is either empty or is identical to
+          first members of clustered primary key.
+      */
+      if (!(!(cur->min_key_flag & ~NULL_RANGE) && !cur->max_key_flag &&
+            (range->start_key.length == range->end_key.length) &&
+            !memcmp(range->start_key.key, range->end_key.key, range->start_key.length) &&
+            is_key_scan_ror(seq->param, seq->real_keyno, key_tree->part + 1)))
+        seq->param->is_ror_scan= FALSE;
+    }
+  }
+  seq->param->range_count++;
+  seq->param->max_key_part=max(seq->param->max_key_part,key_tree->part);
+  return 0;
+}
+
+#if (_MSC_FULL_VER == 160030319)
+/* VS2010 compiler bug workaround */
+#pragma optimize("g", on)
+#endif
+
+
+/****************************************************************************
+  MRR Range Sequence Interface implementation that walks array<QUICK_RANGE>
+ ****************************************************************************/
+
+/*
+  Range sequence interface implementation for array<QUICK_RANGE>: initialize
+  
+  SYNOPSIS
+    quick_range_seq_init()
+      init_param  Caller-opaque paramenter: QUICK_RANGE_SELECT* pointer
+      n_ranges    Number of ranges in the sequence (ignored)
+      flags       MRR flags (currently not used) 
+
+  RETURN
+    Opaque value to be passed to quick_range_seq_next
+*/
+
+range_seq_t quick_range_seq_init(void *init_param, uint n_ranges, uint flags)
+{
+  QUICK_RANGE_SELECT *quick= (QUICK_RANGE_SELECT*)init_param;
+  quick->qr_traversal_ctx.first=  (QUICK_RANGE**)quick->ranges.buffer;
+  quick->qr_traversal_ctx.cur=    (QUICK_RANGE**)quick->ranges.buffer;
+  quick->qr_traversal_ctx.last=   quick->qr_traversal_ctx.cur + 
+                                  quick->ranges.elements;
+  return &quick->qr_traversal_ctx;
+}
+
+
+/*
+  Range sequence interface implementation for array<QUICK_RANGE>: get next
+  
+  SYNOPSIS
+    quick_range_seq_next()
+      rseq        Value returned from quick_range_seq_init
+      range  OUT  Store information about the range here
+
+  RETURN
+    0  Ok
+    1  No more ranges in the sequence
+*/
+
+bool quick_range_seq_next(range_seq_t rseq, KEY_MULTI_RANGE *range)
+{
+  QUICK_RANGE_SEQ_CTX *ctx= (QUICK_RANGE_SEQ_CTX*)rseq;
+
+  if (ctx->cur == ctx->last)
+    return 1; /* no more ranges */
+
+  QUICK_RANGE *cur= *(ctx->cur);
+  cur->make_min_endpoint(&range->start_key);
+  cur->make_max_endpoint(&range->end_key);
+  range->range_flag= cur->flag;
+  ctx->cur++;
+  return 0;
+}
+
+
diff --git a/sql/opt_subselect.cc b/sql/opt_subselect.cc
new file mode 100644
index 00000000000..4559b76764e
--- /dev/null
+++ b/sql/opt_subselect.cc
@@ -0,0 +1,5227 @@
+/**
+  @file
+
+  @brief
+    Semi-join subquery optimizations code
+
+*/
+
+#ifdef USE_PRAGMA_IMPLEMENTATION
+#pragma implementation				// gcc: Class implementation
+#endif
+
+#include "mysql_priv.h"
+#include "sql_select.h"
+#include "opt_subselect.h"
+
+#include <my_bit.h>
+
+/*
+  This file contains optimizations for semi-join subqueries.
+  
+  Contents
+  --------
+  1. What is a semi-join subquery
+  2. General idea about semi-join execution
+  2.1 Correlated vs uncorrelated semi-joins
+  2.2 Mergeable vs non-mergeable semi-joins
+  3. Code-level view of semi-join processing
+  3.1 Conversion
+  3.1.1 Merged semi-join TABLE_LIST object
+  3.1.2 Non-merged semi-join data structure
+  3.2 Semi-joins and query optimization
+  3.2.1 Non-merged semi-joins and join optimization
+  3.2.2 Merged semi-joins and join optimization
+  3.3 Semi-joins and query execution
+
+  1. What is a semi-join subquery
+  -------------------------------
+  We use this definition of semi-join:
+
+    outer_tbl SEMI JOIN inner_tbl ON cond = {set of outer_tbl.row such that
+                                             exist inner_tbl.row, for which 
+                                             cond(outer_tbl.row,inner_tbl.row)
+                                             is satisfied}
+  
+  That is, semi-join operation is similar to inner join operation, with
+  exception that we don't care how many matches a row from outer_tbl has in
+  inner_tbl.
+
+  In SQL terms: a semi-join subquery is an IN subquery that is an AND-part of
+  the WHERE/ON clause.
+
+  2. General idea about semi-join execution
+  -----------------------------------------
+  We can execute semi-join in a way similar to inner join, with exception that
+  we need to somehow ensure that we do not generate record combinations that
+  differ only in rows of inner tables.
+  There is a number of different ways to achieve this property, implemented by
+  a number of semi-join execution strategies.
+  Some strategies can handle any semi-joins, other can be applied only to
+  semi-joins that have certain properties that are described below:
+
+  2.1 Correlated vs uncorrelated semi-joins
+  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+  Uncorrelated semi-joins are special in the respect that they allow to
+   - execute the subquery (possible as it's uncorrelated)
+   - somehow make sure that generated set does not have duplicates
+   - perform an inner join with outer tables.
+  
+  or, rephrasing in SQL form:
+
+  SELECT ... FROM ot WHERE ot.col IN (SELECT it.col FROM it WHERE uncorr_cond)
+    ->
+  SELECT ... FROM ot JOIN (SELECT DISTINCT it.col FROM it WHERE uncorr_cond)
+
+  2.2 Mergeable vs non-mergeable semi-joins
+  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+  Semi-join operation has some degree of commutability with inner join
+  operation: we can join subquery's tables with ouside table(s) and eliminate
+  duplicate record combination after that:
+
+    ot1 JOIN ot2 SEMI_JOIN{it1,it2} (it1 JOIN it2) ON sjcond(ot2,it*) ->
+              |
+              +-------------------------------+
+                                              v
+    ot1 SEMI_JOIN{it1,it2} (it1 JOIN it2 JOIN ot2) ON sjcond(ot2,it*)
+ 
+  In order for this to work, subquery's top-level operation must be join, and
+  grouping or ordering with limit (grouping or ordering with limit are not
+  commutative with duplicate removal). In other words, the conversion is
+  possible when the subquery doesn't have GROUP BY clause, any aggregate
+  functions*, or ORDER BY ... LIMIT clause.
+
+  Definitions:
+  - Subquery whose top-level operation is a join is called *mergeable semi-join*
+  - All other kinds of semi-join subqueries are considered non-mergeable.
+
+  *- this requirement is actually too strong, but its exceptions are too
+  complicated to be considered here.
+
+  3. Code-level view of semi-join processing
+  ------------------------------------------
+  
+  3.1 Conversion and pre-optimization data structures
+  ---------------------------------------------------
+  * When doing JOIN::prepare for the subquery, we detect that it can be
+    converted into a semi-join and register it in parent_join->sj_subselects
+
+  * At the start of parent_join->optimize(), the predicate is converted into 
+    a semi-join node. A semi-join node is a TABLE_LIST object that is linked
+    somewhere in parent_join->join_list (either it is just present there, or
+    it is a descendant of some of its members).
+  
+  There are two kinds of semi-joins:
+  - Merged semi-joins
+  - Non-merged semi-joins
+   
+  3.1.1 Merged semi-join TABLE_LIST object
+  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+  Merged semi-join object is a TABLE_LIST that contains a sub-join of 
+  subquery tables and the semi-join ON expression (in this respect it is 
+  very similar to nested outer join representation)
+  Merged semi-join represents this SQL:
+
+    ... SEMI JOIN (inner_tbl1 JOIN ... JOIN inner_tbl_n) ON sj_on_expr
+  
+  Semi-join objects of this kind have TABLE_LIST::sj_subq_pred set.
+ 
+  3.1.2 Non-merged semi-join data structure
+  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+  Non-merged semi-join object is a leaf TABLE_LIST object that has a subquery
+  that produces rows. It is similar to a base table and represents this SQL:
+    
+    ... SEMI_JOIN (SELECT non_mergeable_select) ON sj_on_expr
+  
+  Subquery items that were converted into semi-joins are removed from the WHERE
+  clause. (They do remain in PS-saved WHERE clause, and they replace themselves
+  with Item_int(1) on subsequent re-executions).
+
+  3.2 Semi-joins and join optimization
+  ------------------------------------
+  
+  3.2.1 Non-merged semi-joins and join optimization
+  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+  For join optimization purposes, non-merged semi-join nests are similar to
+  base tables - they've got one JOIN_TAB, which can be accessed with one of
+  two methods:
+   - full table scan (representing SJ-Materialization-Scan strategy)
+   - eq_ref-like table lookup (representing SJ-Materialization-Lookup)
+
+  Unlike regular base tables, non-merged semi-joins have:
+   - non-zero JOIN_TAB::startup_cost, and
+   - join_tab->table->is_filled_at_execution()==TRUE, which means one
+     cannot do const table detection or range analysis or other table data-
+     dependent inferences
+  // instead, get_delayed_table_estimates() runs optimization on the nest so that 
+  // we get an idea about temptable size
+  
+  3.2.2 Merged semi-joins and join optimization
+  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+   - optimize_semijoin_nests() does pre-optimization 
+   - during join optimization, the join has one JOIN_TAB (or is it POSITION?) 
+     array, and suffix-based detection is used, see advance_sj_state()
+   - after join optimization is done, get_best_combination() switches 
+     the data-structure to prefix-based, multiple JOIN_TAB ranges format.
+
+  3.3 Semi-joins and query execution
+  ----------------------------------
+  * Join executor has hooks for all semi-join strategies.
+    TODO elaborate.
+
+*/
+
+
+static
+bool subquery_types_allow_materialization(Item_in_subselect *in_subs);
+static bool replace_where_subcondition(JOIN *join, Item **expr, 
+                                       Item *old_cond, Item *new_cond,
+                                       bool do_fix_fields);
+static int subq_sj_candidate_cmp(Item_in_subselect* el1, Item_in_subselect* el2,
+                                 void *arg);
+static bool convert_subq_to_sj(JOIN *parent_join, Item_in_subselect *subq_pred);
+static bool convert_subq_to_jtbm(JOIN *parent_join, 
+                                 Item_in_subselect *subq_pred, bool *remove);
+static TABLE_LIST *alloc_join_nest(THD *thd);
+static uint get_tmp_table_rec_length(Item **p_list, uint elements);
+static double get_tmp_table_lookup_cost(THD *thd, double row_count,
+                                        uint row_size);
+static double get_tmp_table_write_cost(THD *thd, double row_count,
+                                       uint row_size);
+bool find_eq_ref_candidate(TABLE *table, table_map sj_inner_tables);
+static SJ_MATERIALIZATION_INFO *
+at_sjmat_pos(const JOIN *join, table_map remaining_tables, const JOIN_TAB *tab,
+             uint idx, bool *loose_scan);
+void best_access_path(JOIN *join, JOIN_TAB *s, 
+                             table_map remaining_tables, uint idx, 
+                             bool disable_jbuf, double record_count,
+                             POSITION *pos, POSITION *loose_scan_pos);
+
+static Item *create_subq_in_equalities(THD *thd, SJ_MATERIALIZATION_INFO *sjm, 
+                                Item_in_subselect *subq_pred);
+static void remove_sj_conds(Item **tree);
+static bool is_cond_sj_in_equality(Item *item);
+static bool sj_table_is_included(JOIN *join, JOIN_TAB *join_tab);
+static Item *remove_additional_cond(Item* conds);
+static void remove_subq_pushed_predicates(JOIN *join, Item **where);
+
+enum_nested_loop_state 
+end_sj_materialize(JOIN *join, JOIN_TAB *join_tab, bool end_of_records);
+
+
+/*
+  Check if Materialization strategy is allowed for given subquery predicate.
+
+  @param thd           Thread handle
+  @param in_subs       The subquery predicate
+  @param child_select  The select inside predicate (the function will
+                       check it is the only one)
+
+  @return TRUE  - Materialization is applicable 
+          FALSE - Otherwise
+*/
+
+bool is_materialization_applicable(THD *thd, Item_in_subselect *in_subs,
+                                   st_select_lex *child_select)
+{
+  st_select_lex_unit* parent_unit= child_select->master_unit();
+  /*
+    Check if the subquery predicate can be executed via materialization.
+    The required conditions are:
+    0. The materialization optimizer switch was set.
+    1. Subquery is a single SELECT (not a UNION).
+       TODO: this is a limitation that can be fixed
+    2. Subquery is not a table-less query. In this case there is no
+       point in materializing.
+    2A The upper query is not a table-less SELECT ... FROM DUAL. We
+       can't do materialization for SELECT .. FROM DUAL because it
+       does not call setup_subquery_materialization(). We could make 
+       SELECT ... FROM DUAL call that function but that doesn't seem
+       to be the case that is worth handling.
+    3. Either the subquery predicate is a top-level predicate, or at
+       least one partial match strategy is enabled. If no partial match
+       strategy is enabled, then materialization cannot be used for
+       non-top-level queries because it cannot handle NULLs correctly.
+    4. Subquery is non-correlated
+       TODO:
+       This condition is too restrictive (limitation). It can be extended to:
+       (Subquery is non-correlated ||
+        Subquery is correlated to any query outer to IN predicate ||
+        (Subquery is correlated to the immediate outer query &&
+         Subquery !contains {GROUP BY, ORDER BY [LIMIT],
+         aggregate functions}) && subquery predicate is not under "NOT IN"))
+
+    (*) The subquery must be part of a SELECT or CREATE TABLE ... SELECT statement.
+        The current condition also excludes multi-table update statements.
+  A note about prepared statements: we want the if-branch to be taken on
+  PREPARE and each EXECUTE. The rewrites are only done once, but we need 
+  select_lex->sj_subselects list to be populated for every EXECUTE. 
+
+  */
+  if (optimizer_flag(thd, OPTIMIZER_SWITCH_MATERIALIZATION) &&      // 0
+        !child_select->is_part_of_union() &&                          // 1
+        parent_unit->first_select()->leaf_tables.elements &&          // 2
+        (thd->lex->sql_command == SQLCOM_SELECT ||                     // *
+         thd->lex->sql_command == SQLCOM_CREATE_TABLE) &&              // *
+        child_select->outer_select()->leaf_tables.elements &&           // 2A
+        subquery_types_allow_materialization(in_subs) &&
+        (in_subs->is_top_level_item() ||                               //3
+         optimizer_flag(thd,
+                        OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE) || //3
+         optimizer_flag(thd,
+                        OPTIMIZER_SWITCH_PARTIAL_MATCH_TABLE_SCAN)) && //3
+        !in_subs->is_correlated)                                       //4
+   {
+     return TRUE;
+   }
+  return FALSE;
+}
+
+
+/*
+  Check if we need JOIN::prepare()-phase subquery rewrites and if yes, do them
+
+  SYNOPSIS
+     check_and_do_in_subquery_rewrites()
+       join  Subquery's join
+
+  DESCRIPTION
+    Check if we need to do
+     - subquery -> mergeable semi-join rewrite
+     - if the subquery can be handled with materialization
+     - 'substitution' rewrite for table-less subqueries like "(select 1)"
+     - IN->EXISTS rewrite
+    and, depending on the rewrite, either do it, or record it to be done at a
+    later phase.
+
+  RETURN
+    0      - OK
+    Other  - Some sort of query error
+*/
+
+int check_and_do_in_subquery_rewrites(JOIN *join)
+{
+  THD *thd=join->thd;
+  st_select_lex *select_lex= join->select_lex;
+  st_select_lex_unit* parent_unit= select_lex->master_unit();
+  DBUG_ENTER("check_and_do_in_subquery_rewrites");
+
+  /*
+    IN/ALL/ANY rewrites are not applicable for so called fake select
+    (this select exists only to filter results of union if it is needed).
+  */
+  if (select_lex == select_lex->master_unit()->fake_select_lex)
+    DBUG_RETURN(0);
+
+  /*
+    If 
+      1) this join is inside a subquery (of any type except FROM-clause 
+         subquery) and
+      2) we aren't just normalizing a VIEW
+
+    Then perform early unconditional subquery transformations:
+     - Convert subquery predicate into semi-join, or
+     - Mark the subquery for execution using materialization, or
+     - Perform IN->EXISTS transformation, or
+     - Perform more/less ALL/ANY -> MIN/MAX rewrite
+     - Substitute trivial scalar-context subquery with its value
+
+    TODO: for PS, make the whole block execute only on the first execution
+  */
+  Item_subselect *subselect;
+  if (!(thd->lex->context_analysis_only & CONTEXT_ANALYSIS_ONLY_VIEW) && // (1)
+      (subselect= parent_unit->item))                                    // (2)
+  {
+    Item_in_subselect *in_subs= NULL;
+    Item_allany_subselect *allany_subs= NULL;
+    switch (subselect->substype()) {
+    case Item_subselect::IN_SUBS:
+      in_subs= (Item_in_subselect *)subselect;
+      break;
+    case Item_subselect::ALL_SUBS:
+    case Item_subselect::ANY_SUBS:
+      allany_subs= (Item_allany_subselect *)subselect;
+      break;
+    default:
+      break;
+    }
+
+
+    /* Resolve expressions and perform semantic analysis for IN query */
+    if (in_subs != NULL)
+      /*
+        TODO: Add the condition below to this if statement when we have proper
+        support for is_correlated handling for materialized semijoins.
+        If we were to add this condition now, the fix_fields() call in
+        convert_subq_to_sj() would force the flag is_correlated to be set
+        erroneously for prepared queries.
+
+        thd->stmt_arena->state != Query_arena::PREPARED)
+      */
+    {
+      /*
+        Check if the left and right expressions have the same # of
+        columns, i.e. we don't have a case like 
+          (oe1, oe2) IN (SELECT ie1, ie2, ie3 ...)
+
+        TODO why do we have this duplicated in IN->EXISTS transformers?
+        psergey-todo: fix these: grep for duplicated_subselect_card_check
+      */
+      if (select_lex->item_list.elements != in_subs->left_expr->cols())
+      {
+        my_error(ER_OPERAND_COLUMNS, MYF(0), in_subs->left_expr->cols());
+        DBUG_RETURN(-1);
+      }
+
+      SELECT_LEX *current= thd->lex->current_select;
+      thd->lex->current_select= current->return_after_parsing();
+      char const *save_where= thd->where;
+      thd->where= "IN/ALL/ANY subquery";
+        
+      bool failure= !in_subs->left_expr->fixed &&
+                     in_subs->left_expr->fix_fields(thd, &in_subs->left_expr);
+      thd->lex->current_select= current;
+      thd->where= save_where;
+      if (failure)
+        DBUG_RETURN(-1); /* purecov: deadcode */
+    }
+
+    DBUG_PRINT("info", ("Checking if subq can be converted to semi-join"));
+    /*
+      Check if we're in subquery that is a candidate for flattening into a
+      semi-join (which is done in flatten_subqueries()). The
+      requirements are:
+        1. Subquery predicate is an IN/=ANY subq predicate
+        2. Subquery is a single SELECT (not a UNION)
+        3. Subquery does not have GROUP BY or ORDER BY
+        4. Subquery does not use aggregate functions or HAVING
+        5. Subquery predicate is at the AND-top-level of ON/WHERE clause
+        6. We are not in a subquery of a single table UPDATE/DELETE that 
+             doesn't have a JOIN (TODO: We should handle this at some
+             point by switching to multi-table UPDATE/DELETE)
+        7. We're not in a table-less subquery like "SELECT 1"
+        8. No execution method was already chosen (by a prepared statement)
+        9. Parent select is not a table-less select
+        10. Neither parent nor child select have STRAIGHT_JOIN option.
+    */
+    if (optimizer_flag(thd, OPTIMIZER_SWITCH_SEMIJOIN) &&
+        in_subs &&                                                    // 1
+        !select_lex->is_part_of_union() &&                            // 2
+        !select_lex->group_list.elements && !join->order &&           // 3
+        !join->having && !select_lex->with_sum_func &&                // 4
+        in_subs->emb_on_expr_nest &&                                  // 5
+        select_lex->outer_select()->join &&                           // 6
+        parent_unit->first_select()->leaf_tables.elements &&          // 7
+        !in_subs->has_strategy() &&                                   // 8
+        select_lex->outer_select()->leaf_tables.elements &&           // 9
+        !((join->select_options |                                     // 10
+           select_lex->outer_select()->join->select_options)          // 10
+          & SELECT_STRAIGHT_JOIN))                                    // 10
+    {
+      DBUG_PRINT("info", ("Subquery is semi-join conversion candidate"));
+
+      (void)subquery_types_allow_materialization(in_subs);
+
+      in_subs->is_flattenable_semijoin= TRUE;
+
+      /* Register the subquery for further processing in flatten_subqueries() */
+      if (!in_subs->is_registered_semijoin)
+      {
+        Query_arena *arena, backup;
+        arena= thd->activate_stmt_arena_if_needed(&backup);
+        select_lex->outer_select()->sj_subselects.push_back(in_subs);
+        if (arena)
+          thd->restore_active_arena(arena, &backup);
+        in_subs->is_registered_semijoin= TRUE;
+      }
+    }
+    else
+    {
+      DBUG_PRINT("info", ("Subquery can't be converted to merged semi-join"));
+      /* Test if the user has set a legal combination of optimizer switches. */
+      if (!optimizer_flag(thd, OPTIMIZER_SWITCH_IN_TO_EXISTS) &&
+          !optimizer_flag(thd, OPTIMIZER_SWITCH_MATERIALIZATION))
+        my_error(ER_ILLEGAL_SUBQUERY_OPTIMIZER_SWITCHES, MYF(0));
+
+      /*
+        If the subquery predicate is IN/=ANY, analyse and set all possible
+        subquery execution strategies based on optimizer switches and syntactic
+        properties.
+      */
+      if (in_subs && !in_subs->has_strategy())
+      {
+        if (is_materialization_applicable(thd, in_subs, select_lex))
+        {
+          in_subs->add_strategy(SUBS_MATERIALIZATION);
+
+          /*
+            If the subquery is an AND-part of WHERE register for being processed
+            with jtbm strategy
+          */
+          if (in_subs->emb_on_expr_nest == NO_JOIN_NEST &&
+              optimizer_flag(thd, OPTIMIZER_SWITCH_SEMIJOIN))
+          {
+            in_subs->is_flattenable_semijoin= FALSE;
+            if (!in_subs->is_registered_semijoin)
+	    {
+              Query_arena *arena, backup;
+              arena= thd->activate_stmt_arena_if_needed(&backup);
+              select_lex->outer_select()->sj_subselects.push_back(in_subs);
+              if (arena)
+                thd->restore_active_arena(arena, &backup);
+              in_subs->is_registered_semijoin= TRUE;
+            }
+          }
+        }
+
+        /*
+          IN-TO-EXISTS is the only universal strategy. Choose it if the user
+          allowed it via an optimizer switch, or if materialization is not
+          possible.
+        */
+        if (optimizer_flag(thd, OPTIMIZER_SWITCH_IN_TO_EXISTS) ||
+            !in_subs->has_strategy())
+          in_subs->add_strategy(SUBS_IN_TO_EXISTS);
+      }
+
+      /* Check if max/min optimization applicable */
+      if (allany_subs && !allany_subs->is_set_strategy())
+      {
+        uchar strategy= (allany_subs->is_maxmin_applicable(join) ?
+                         (SUBS_MAXMIN_INJECTED | SUBS_MAXMIN_ENGINE) :
+                         SUBS_IN_TO_EXISTS);
+        allany_subs->add_strategy(strategy);
+      }
+
+      /*
+        Transform each subquery predicate according to its overloaded
+        transformer.
+      */
+      if (subselect->select_transformer(join))
+        DBUG_RETURN(-1);
+    }
+  }
+  DBUG_RETURN(0);
+}
+
+
+/**
+  @brief Check if subquery's compared types allow materialization.
+
+  @param in_subs Subquery predicate, updated as follows:
+    types_allow_materialization TRUE if subquery materialization is allowed.
+    sjm_scan_allowed            If types_allow_materialization is TRUE,
+                                indicates whether it is possible to use subquery
+                                materialization and scan the materialized table.
+
+  @retval TRUE   If subquery types allow materialization.
+  @retval FALSE  Otherwise.
+
+  @details
+    This is a temporary fix for BUG#36752.
+    
+    There are two subquery materialization strategies:
+
+    1. Materialize and do index lookups in the materialized table. See 
+       BUG#36752 for description of restrictions we need to put on the
+       compared expressions.
+
+    2. Materialize and then do a full scan of the materialized table. At the
+       moment, this strategy's applicability criteria are even stricter than
+       in #1.
+
+       This is so because of the following: consider an uncorrelated subquery
+       
+       ...WHERE (ot1.col1, ot2.col2 ...) IN (SELECT ie1,ie2,... FROM it1 ...)
+
+       and a join order that could be used to do sjm-materialization: 
+          
+          SJM-Scan(it1, it1), ot1, ot2
+       
+       IN-equalities will be parts of conditions attached to the outer tables:
+
+         ot1:  ot1.col1 = ie1 AND ... (C1)
+         ot2:  ot1.col2 = ie2 AND ... (C2)
+       
+       besides those there may be additional references to ie1 and ie2
+       generated by equality propagation. The problem with evaluating C1 and
+       C2 is that ie{1,2} refer to subquery tables' columns, while we only have 
+       current value of materialization temptable. Our solution is to 
+        * require that all ie{N} are table column references. This allows 
+          to copy the values of materialization temptable columns to the
+          original table's columns (see setup_sj_materialization for more
+          details)
+        * require that compared columns have exactly the same type. This is
+          a temporary measure to avoid BUG#36752-type problems.
+*/
+
+static 
+bool subquery_types_allow_materialization(Item_in_subselect *in_subs)
+{
+  DBUG_ENTER("subquery_types_allow_materialization");
+
+  DBUG_ASSERT(in_subs->left_expr->fixed);
+
+  List_iterator<Item> it(in_subs->unit->first_select()->item_list);
+  uint elements= in_subs->unit->first_select()->item_list.elements;
+
+  in_subs->types_allow_materialization= FALSE;  // Assign default values
+  in_subs->sjm_scan_allowed= FALSE;
+  
+  bool all_are_fields= TRUE;
+  for (uint i= 0; i < elements; i++)
+  {
+    Item *outer= in_subs->left_expr->element_index(i);
+    Item *inner= it++;
+    all_are_fields &= (outer->real_item()->type() == Item::FIELD_ITEM && 
+                       inner->real_item()->type() == Item::FIELD_ITEM);
+    if (outer->cmp_type() != inner->cmp_type())
+      DBUG_RETURN(FALSE);
+    switch (outer->cmp_type()) {
+    case STRING_RESULT:
+      if (!(outer->collation.collation == inner->collation.collation))
+        DBUG_RETURN(FALSE);
+      // Materialization does not work with BLOB columns
+      if (inner->field_type() == MYSQL_TYPE_BLOB || 
+          inner->field_type() == MYSQL_TYPE_GEOMETRY)
+        DBUG_RETURN(FALSE);
+      /* 
+        Materialization also is unable to work when create_tmp_table() will
+        create a blob column because item->max_length is too big.
+        The following check is copied from Item::make_string_field():
+      */ 
+      if (inner->max_length / inner->collation.collation->mbmaxlen > 
+          CONVERT_IF_BIGGER_TO_BLOB)
+      {
+        DBUG_RETURN(FALSE);
+      }
+      break;
+    case TIME_RESULT:
+      if (mysql_type_to_time_type(outer->field_type()) !=
+          mysql_type_to_time_type(inner->field_type()))
+        DBUG_RETURN(FALSE);
+    default:
+      /* suitable for materialization */
+      break;
+    }
+  }
+
+  in_subs->types_allow_materialization= TRUE;
+  in_subs->sjm_scan_allowed= all_are_fields;
+  DBUG_PRINT("info",("subquery_types_allow_materialization: ok, allowed"));
+  DBUG_RETURN(TRUE);
+}
+
+
+/**
+  Apply max min optimization of all/any subselect
+*/
+
+bool JOIN::transform_max_min_subquery()
+{
+  DBUG_ENTER("JOIN::transform_max_min_subquery");
+  Item_subselect *subselect= unit->item;
+  if (!subselect || (subselect->substype() != Item_subselect::ALL_SUBS &&
+                     subselect->substype() != Item_subselect::ANY_SUBS))
+    DBUG_RETURN(0);
+  DBUG_RETURN(((Item_allany_subselect *) subselect)->
+              transform_into_max_min(this));
+}
+
+
+/*
+  Finalize IN->EXISTS conversion in case we couldn't use materialization.
+
+  DESCRIPTION  Invoke the IN->EXISTS converter
+    Replace the Item_in_subselect with its wrapper Item_in_optimizer in WHERE.
+
+  RETURN 
+    FALSE - Ok
+    TRUE  - Fatal error
+*/
+
+bool make_in_exists_conversion(THD *thd, JOIN *join, Item_in_subselect *item)
+{
+  DBUG_ENTER("make_in_exists_conversion");
+  JOIN *child_join= item->unit->first_select()->join;
+  bool res;
+
+  /* 
+    We're going to finalize IN->EXISTS conversion. 
+    Normally, IN->EXISTS conversion takes place inside the 
+    Item_subselect::fix_fields() call, where item_subselect->fixed==FALSE (as
+    fix_fields() haven't finished yet) and item_subselect->changed==FALSE (as 
+    the conversion haven't been finalized)
+
+    At the end of Item_subselect::fix_fields() we had to set fixed=TRUE,
+    changed=TRUE (the only other option would have been to return error).
+
+    So, now we have to set these back for the duration of select_transformer()
+    call.
+  */
+  item->changed= 0;
+  item->fixed= 0;
+
+  SELECT_LEX *save_select_lex= thd->lex->current_select;
+  thd->lex->current_select= item->unit->first_select();
+
+  res= item->select_transformer(child_join);
+
+  thd->lex->current_select= save_select_lex;
+
+  if (res)
+    DBUG_RETURN(TRUE);
+
+  item->changed= 1;
+  item->fixed= 1;
+
+  Item *substitute= item->substitution;
+  bool do_fix_fields= !item->substitution->fixed;
+  /*
+    The Item_subselect has already been wrapped with Item_in_optimizer, so we
+    should search for item->optimizer, not 'item'.
+  */
+  Item *replace_me= item->optimizer;
+  DBUG_ASSERT(replace_me==substitute);
+
+  Item **tree= (item->emb_on_expr_nest == NO_JOIN_NEST)?
+                 &join->conds : &(item->emb_on_expr_nest->on_expr);
+  if (replace_where_subcondition(join, tree, replace_me, substitute, 
+                                 do_fix_fields))
+    DBUG_RETURN(TRUE);
+  item->substitution= NULL;
+   
+    /*
+      If this is a prepared statement, repeat the above operation for
+      prep_where (or prep_on_expr). 
+    */
+  if (!thd->stmt_arena->is_conventional())
+  {
+    tree= (item->emb_on_expr_nest == (TABLE_LIST*)NO_JOIN_NEST)?
+           &join->select_lex->prep_where : 
+           &(item->emb_on_expr_nest->prep_on_expr);
+
+    if (replace_where_subcondition(join, tree, replace_me, substitute, 
+                                   FALSE))
+      DBUG_RETURN(TRUE);
+  }
+  DBUG_RETURN(FALSE);
+}
+
+
+bool check_for_outer_joins(List<TABLE_LIST> *join_list)
+{
+  TABLE_LIST *table;
+  NESTED_JOIN *nested_join;
+  List_iterator<TABLE_LIST> li(*join_list);
+  while ((table= li++))
+  {
+    if ((nested_join= table->nested_join))
+    {
+      if (check_for_outer_joins(&nested_join->join_list))
+        return TRUE;
+    }
+    
+    if (table->outer_join)
+      return TRUE;
+  }
+  return FALSE;
+}
+
+
+/*
+  Convert semi-join subquery predicates into semi-join join nests
+
+  SYNOPSIS
+    convert_join_subqueries_to_semijoins()
+ 
+  DESCRIPTION
+
+    Convert candidate subquery predicates into semi-join join nests. This 
+    transformation is performed once in query lifetime and is irreversible.
+    
+    Conversion of one subquery predicate
+    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    We start with a join that has a semi-join subquery:
+
+      SELECT ...
+      FROM ot, ...
+      WHERE oe IN (SELECT ie FROM it1 ... itN WHERE subq_where) AND outer_where
+
+    and convert it into a semi-join nest:
+
+      SELECT ...
+      FROM ot SEMI JOIN (it1 ... itN), ...
+      WHERE outer_where AND subq_where AND oe=ie
+
+    that is, in order to do the conversion, we need to 
+
+     * Create the "SEMI JOIN (it1 .. itN)" part and add it into the parent
+       query's FROM structure.
+     * Add "AND subq_where AND oe=ie" into parent query's WHERE (or ON if
+       the subquery predicate was in an ON expression)
+     * Remove the subquery predicate from the parent query's WHERE
+
+    Considerations when converting many predicates
+    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    A join may have at most MAX_TABLES tables. This may prevent us from
+    flattening all subqueries when the total number of tables in parent and
+    child selects exceeds MAX_TABLES.
+    We deal with this problem by flattening children's subqueries first and
+    then using a heuristic rule to determine each subquery predicate's
+    "priority".
+
+  RETURN 
+    FALSE  OK
+    TRUE   Error
+*/
+
+bool convert_join_subqueries_to_semijoins(JOIN *join)
+{
+  Query_arena *arena, backup;
+  Item_in_subselect *in_subq;
+  THD *thd= join->thd;
+  List_iterator<TABLE_LIST> ti(join->select_lex->leaf_tables);
+  DBUG_ENTER("convert_join_subqueries_to_semijoins");
+
+  if (join->select_lex->sj_subselects.is_empty())
+    DBUG_RETURN(FALSE);
+
+  List_iterator_fast<Item_in_subselect> li(join->select_lex->sj_subselects);
+
+  while ((in_subq= li++))
+  {
+    SELECT_LEX *subq_sel= in_subq->get_select_lex();
+    if (subq_sel->handle_derived(thd->lex, DT_OPTIMIZE))
+      DBUG_RETURN(1);
+    if (subq_sel->handle_derived(thd->lex, DT_MERGE))
+      DBUG_RETURN(TRUE);
+    subq_sel->update_used_tables();
+  }
+
+  li.rewind();
+  /* First, convert child join's subqueries. We proceed bottom-up here */
+  while ((in_subq= li++)) 
+  {
+    st_select_lex *child_select= in_subq->get_select_lex();
+    JOIN *child_join= child_select->join;
+    child_join->outer_tables = child_join->table_count;
+
+    /*
+      child_select->where contains only the WHERE predicate of the
+      subquery itself here. We may be selecting from a VIEW, which has its
+      own predicate. The combined predicates are available in child_join->conds,
+      which was built by setup_conds() doing prepare_where() for all views.
+    */
+    child_select->where= child_join->conds;
+
+    if (convert_join_subqueries_to_semijoins(child_join))
+      DBUG_RETURN(TRUE);
+    in_subq->sj_convert_priority= 
+      test(in_subq->emb_on_expr_nest != NO_JOIN_NEST) * MAX_TABLES * 2 +
+      in_subq->is_correlated * MAX_TABLES + child_join->outer_tables;
+  }
+  
+  // Temporary measure: disable semi-joins when they are together with outer
+  // joins.
+#if 0  
+  if (check_for_outer_joins(join->join_list))
+  {
+    in_subq= join->select_lex->sj_subselects.head();
+    arena= thd->activate_stmt_arena_if_needed(&backup);
+    goto skip_conversion;
+  }
+#endif
+  //dump_TABLE_LIST_struct(select_lex, select_lex->leaf_tables);
+  /* 
+    2. Pick which subqueries to convert:
+      sort the subquery array
+      - prefer correlated subqueries over uncorrelated;
+      - prefer subqueries that have greater number of outer tables;
+  */
+  bubble_sort<Item_in_subselect>(&join->select_lex->sj_subselects,
+				 subq_sj_candidate_cmp, NULL);
+  // #tables-in-parent-query + #tables-in-subquery < MAX_TABLES
+  /* Replace all subqueries to be flattened with Item_int(1) */
+  arena= thd->activate_stmt_arena_if_needed(&backup);
+ 
+  li.rewind();
+  while ((in_subq= li++))
+  {
+    bool remove_item= TRUE;
+
+    /* Stop processing if we've reached a subquery that's attached to the ON clause */
+    if (in_subq->emb_on_expr_nest != NO_JOIN_NEST)
+      break;
+
+    if (in_subq->is_flattenable_semijoin) 
+    {
+      if (join->table_count + 
+          in_subq->unit->first_select()->join->table_count >= MAX_TABLES)
+        break;
+      if (convert_subq_to_sj(join, in_subq))
+        goto restore_arena_and_fail;
+    }
+    else
+    {
+      if (join->table_count + 1 >= MAX_TABLES)
+        break;
+      if (convert_subq_to_jtbm(join, in_subq, &remove_item))
+        goto restore_arena_and_fail;
+    }
+    if (remove_item)
+    {
+      Item **tree= (in_subq->emb_on_expr_nest == NO_JOIN_NEST)?
+                     &join->conds : &(in_subq->emb_on_expr_nest->on_expr);
+      Item *replace_me= in_subq->original_item();
+      if (replace_where_subcondition(join, tree, replace_me, new Item_int(1),
+                                     FALSE))
+        goto restore_arena_and_fail;
+    }
+  }
+//skip_conversion:
+  /* 
+    3. Finalize (perform IN->EXISTS rewrite) the subqueries that we didn't
+    convert:
+  */
+  while (in_subq)
+  {
+    JOIN *child_join= in_subq->unit->first_select()->join;
+    in_subq->changed= 0;
+    in_subq->fixed= 0;
+
+    SELECT_LEX *save_select_lex= thd->lex->current_select;
+    thd->lex->current_select= in_subq->unit->first_select();
+
+    bool res= in_subq->select_transformer(child_join);
+
+    thd->lex->current_select= save_select_lex;
+
+    if (res)
+      DBUG_RETURN(TRUE);
+
+    in_subq->changed= 1;
+    in_subq->fixed= 1;
+
+    Item *substitute= in_subq->substitution;
+    bool do_fix_fields= !in_subq->substitution->fixed;
+    Item **tree= (in_subq->emb_on_expr_nest == NO_JOIN_NEST)?
+                   &join->conds : &(in_subq->emb_on_expr_nest->on_expr);
+    Item *replace_me= in_subq->original_item();
+    if (replace_where_subcondition(join, tree, replace_me, substitute, 
+                                   do_fix_fields))
+      DBUG_RETURN(TRUE);
+    in_subq->substitution= NULL;
+    /*
+      If this is a prepared statement, repeat the above operation for
+      prep_where (or prep_on_expr). Subquery-to-semijoin conversion is 
+      done once for prepared statement.
+    */
+    if (!thd->stmt_arena->is_conventional())
+    {
+      tree= (in_subq->emb_on_expr_nest == NO_JOIN_NEST)?
+             &join->select_lex->prep_where : 
+             &(in_subq->emb_on_expr_nest->prep_on_expr);
+      /* 
+        prep_on_expr/ prep_where may be NULL in some cases. 
+        If that is the case, do nothing - simplify_joins() will copy 
+        ON/WHERE expression into prep_on_expr/prep_where.
+      */
+      if (*tree && replace_where_subcondition(join, tree, replace_me, substitute, 
+                                     FALSE))
+        DBUG_RETURN(TRUE);
+    }
+    /*
+      Revert to the IN->EXISTS strategy in the rare case when the subquery could
+      not be flattened.
+    */
+    in_subq->reset_strategy(SUBS_IN_TO_EXISTS);
+    if (is_materialization_applicable(thd, in_subq, 
+                                      in_subq->unit->first_select()))
+    {
+      in_subq->add_strategy(SUBS_MATERIALIZATION);
+    }
+
+    in_subq= li++;
+  }
+
+  if (arena)
+    thd->restore_active_arena(arena, &backup);
+  join->select_lex->sj_subselects.empty();
+  DBUG_RETURN(FALSE);
+
+restore_arena_and_fail:
+  if (arena)
+    thd->restore_active_arena(arena, &backup);
+  DBUG_RETURN(TRUE);
+}
+
+
+/*
+  Get #output_rows and scan_time estimates for a "delayed" table.
+
+  SYNOPSIS
+    get_delayed_table_estimates()
+      table         IN    Table to get estimates for
+      out_rows      OUT   E(#rows in the table)
+      scan_time     OUT   E(scan_time).
+      startup_cost  OUT   cost to populate the table.
+
+  DESCRIPTION
+    Get #output_rows and scan_time estimates for a "delayed" table. By
+    "delayed" here we mean that the table is filled at the start of query
+    execution. This means that the optimizer can't use table statistics to 
+    get #rows estimate for it, it has to call this function instead.
+
+    This function is expected to make different actions depending on the nature
+    of the table. At the moment there is only one kind of delayed tables,
+    non-flattenable semi-joins.
+*/
+
+void get_delayed_table_estimates(TABLE *table,
+                                 ha_rows *out_rows, 
+                                 double *scan_time,
+                                 double *startup_cost)
+{
+  Item_in_subselect *item= table->pos_in_table_list->jtbm_subselect;
+
+  DBUG_ASSERT(item->engine->engine_type() ==
+              subselect_engine::HASH_SJ_ENGINE);
+
+  subselect_hash_sj_engine *hash_sj_engine=
+    ((subselect_hash_sj_engine*)item->engine);
+
+  *out_rows= (ha_rows)item->jtbm_record_count;
+  *startup_cost= item->jtbm_read_time;
+
+  /* Calculate cost of scanning the temptable */
+  double data_size= item->jtbm_record_count * 
+                    hash_sj_engine->tmp_table->s->reclength;
+  /* Do like in handler::read_time */
+  *scan_time= data_size/IO_SIZE + 2;
+} 
+
+
+/**
+   @brief Replaces an expression destructively inside the expression tree of
+   the WHERE clase.
+
+   @note Because of current requirements for semijoin flattening, we do not
+   need to recurse here, hence this function will only examine the top-level
+   AND conditions. (see JOIN::prepare, comment starting with "Check if the 
+   subquery predicate can be executed via materialization".
+   
+   @param join The top-level query.
+   @param old_cond The expression to be replaced.
+   @param new_cond The expression to be substituted.
+   @param do_fix_fields If true, Item::fix_fields(THD*, Item**) is called for
+   the new expression.
+   @return <code>true</code> if there was an error, <code>false</code> if
+   successful.
+*/
+
+static bool replace_where_subcondition(JOIN *join, Item **expr, 
+                                       Item *old_cond, Item *new_cond,
+                                       bool do_fix_fields)
+{
+  if (*expr == old_cond)
+  {
+    *expr= new_cond;
+    if (do_fix_fields)
+      new_cond->fix_fields(join->thd, expr);
+    return FALSE;
+  }
+  
+  if ((*expr)->type() == Item::COND_ITEM) 
+  {
+    List_iterator<Item> li(*((Item_cond*)(*expr))->argument_list());
+    Item *item;
+    while ((item= li++))
+    {
+      if (item == old_cond) 
+      {
+        li.replace(new_cond);
+        if (do_fix_fields)
+          new_cond->fix_fields(join->thd, li.ref());
+        return FALSE;
+      }
+    }
+  }
+  /* 
+    We can come to here when 
+     - we're doing replace operations on both on_expr and prep_on_expr
+     - on_expr is the same as prep_on_expr, or they share a sub-tree 
+       (so, when we do replace in on_expr, we replace in prep_on_expr, too,
+        and when we try doing a replace in prep_on_expr, the item we wanted 
+        to replace there has already been replaced)
+  */
+  return FALSE;
+}
+
+static int subq_sj_candidate_cmp(Item_in_subselect* el1, Item_in_subselect* el2,
+                                 void *arg)
+{
+  return (el1->sj_convert_priority > el2->sj_convert_priority) ? 1 : 
+         ( (el1->sj_convert_priority == el2->sj_convert_priority)? 0 : -1);
+}
+
+
+/*
+  Convert a subquery predicate into a TABLE_LIST semi-join nest
+
+  SYNOPSIS
+    convert_subq_to_sj()
+       parent_join  Parent join, the one that has subq_pred in its WHERE/ON 
+                    clause
+       subq_pred    Subquery predicate to be converted
+  
+  DESCRIPTION
+    Convert a subquery predicate into a TABLE_LIST semi-join nest. All the 
+    prerequisites are already checked, so the conversion is always successfull.
+
+    Prepared Statements: the transformation is permanent:
+     - Changes in TABLE_LIST structures are naturally permanent
+     - Item tree changes are performed on statement MEM_ROOT:
+        = we activate statement MEM_ROOT 
+        = this function is called before the first fix_prepare_information
+          call.
+
+    This is intended because the criteria for subquery-to-sj conversion remain
+    constant for the lifetime of the Prepared Statement.
+
+  RETURN
+    FALSE  OK
+    TRUE   Out of memory error
+*/
+
+static bool convert_subq_to_sj(JOIN *parent_join, Item_in_subselect *subq_pred)
+{
+  SELECT_LEX *parent_lex= parent_join->select_lex;
+  TABLE_LIST *emb_tbl_nest= NULL;
+  List<TABLE_LIST> *emb_join_list= &parent_lex->top_join_list;
+  THD *thd= parent_join->thd;
+  DBUG_ENTER("convert_subq_to_sj");
+
+  /*
+    1. Find out where to put the predicate into.
+     Note: for "t1 LEFT JOIN t2" this will be t2, a leaf.
+  */
+  if ((void*)subq_pred->emb_on_expr_nest != (void*)NO_JOIN_NEST)
+  {
+    if (subq_pred->emb_on_expr_nest->nested_join)
+    {
+      /*
+        We're dealing with
+
+          ... [LEFT] JOIN  ( ... ) ON (subquery AND whatever) ...
+
+        The sj-nest will be inserted into the brackets nest.
+      */
+      emb_tbl_nest=  subq_pred->emb_on_expr_nest;
+      emb_join_list= &emb_tbl_nest->nested_join->join_list;
+    }
+    else if (!subq_pred->emb_on_expr_nest->outer_join)
+    {
+      /*
+        We're dealing with
+
+          ... INNER JOIN tblX ON (subquery AND whatever) ...
+
+        The sj-nest will be tblX's "sibling", i.e. another child of its
+        parent. This is ok because tblX is joined as an inner join.
+      */
+      emb_tbl_nest= subq_pred->emb_on_expr_nest->embedding;
+      if (emb_tbl_nest)
+        emb_join_list= &emb_tbl_nest->nested_join->join_list;
+    }
+    else if (!subq_pred->emb_on_expr_nest->nested_join)
+    {
+      TABLE_LIST *outer_tbl= subq_pred->emb_on_expr_nest;
+      TABLE_LIST *wrap_nest;
+      /*
+        We're dealing with
+
+          ... LEFT JOIN tbl ON (on_expr AND subq_pred) ...
+
+        we'll need to convert it into:
+
+          ... LEFT JOIN ( tbl SJ (subq_tables) ) ON (on_expr AND subq_pred) ...
+                        |                      |
+                        |<----- wrap_nest ---->|
+        
+        Q:  other subqueries may be pointing to this element. What to do?
+        A1: simple solution: copy *subq_pred->expr_join_nest= *parent_nest.
+            But we'll need to fix other pointers.
+        A2: Another way: have TABLE_LIST::next_ptr so the following
+            subqueries know the table has been nested.
+        A3: changes in the TABLE_LIST::outer_join will make everything work
+            automatically.
+      */
+      if (!(wrap_nest= alloc_join_nest(parent_join->thd)))
+      {
+        DBUG_RETURN(TRUE);
+      }
+      wrap_nest->embedding= outer_tbl->embedding;
+      wrap_nest->join_list= outer_tbl->join_list;
+      wrap_nest->alias= (char*) "(sj-wrap)";
+
+      wrap_nest->nested_join->join_list.empty();
+      wrap_nest->nested_join->join_list.push_back(outer_tbl);
+
+      outer_tbl->embedding= wrap_nest;
+      outer_tbl->join_list= &wrap_nest->nested_join->join_list;
+
+      /*
+        wrap_nest will take place of outer_tbl, so move the outer join flag
+        and on_expr
+      */
+      wrap_nest->outer_join= outer_tbl->outer_join;
+      outer_tbl->outer_join= 0;
+
+      wrap_nest->on_expr= outer_tbl->on_expr;
+      outer_tbl->on_expr= NULL;
+
+      List_iterator<TABLE_LIST> li(*wrap_nest->join_list);
+      TABLE_LIST *tbl;
+      while ((tbl= li++))
+      {
+        if (tbl == outer_tbl)
+        {
+          li.replace(wrap_nest);
+          break;
+        }
+      }
+      /*
+        Ok now wrap_nest 'contains' outer_tbl and we're ready to add the 
+        semi-join nest into it
+      */
+      emb_join_list= &wrap_nest->nested_join->join_list;
+      emb_tbl_nest=  wrap_nest;
+    }
+  }
+
+  TABLE_LIST *sj_nest;
+  NESTED_JOIN *nested_join;
+  if (!(sj_nest= alloc_join_nest(parent_join->thd)))
+  {
+    DBUG_RETURN(TRUE);
+  }
+  nested_join= sj_nest->nested_join;
+
+  sj_nest->join_list= emb_join_list;
+  sj_nest->embedding= emb_tbl_nest;
+  sj_nest->alias= (char*) "(sj-nest)";
+  sj_nest->sj_subq_pred= subq_pred;
+  /* Nests do not participate in those 'chains', so: */
+  /* sj_nest->next_leaf= sj_nest->next_local= sj_nest->next_global == NULL*/
+  emb_join_list->push_back(sj_nest);
+
+  /* 
+    nested_join->used_tables and nested_join->not_null_tables are
+    initialized in simplify_joins().
+  */
+  
+  /* 
+    2. Walk through subquery's top list and set 'embedding' to point to the
+       sj-nest.
+  */
+  st_select_lex *subq_lex= subq_pred->unit->first_select();
+  nested_join->join_list.empty();
+  List_iterator_fast<TABLE_LIST> li(subq_lex->top_join_list);
+  TABLE_LIST *tl;
+  while ((tl= li++))
+  {
+    tl->embedding= sj_nest;
+    tl->join_list= &nested_join->join_list;
+    nested_join->join_list.push_back(tl);
+  }
+  
+  /*
+    Reconnect the next_leaf chain.
+    TODO: Do we have to put subquery's tables at the end of the chain?
+          Inserting them at the beginning would be a bit faster.
+    NOTE: We actually insert them at the front! That's because the order is
+          reversed in this list.
+  */
+  parent_lex->leaf_tables.concat(&subq_lex->leaf_tables);
+
+  /*
+    Same as above for next_local chain
+    (a theory: a next_local chain always starts with ::leaf_tables
+     because view's tables are inserted after the view)
+  */
+  for (tl= parent_lex->leaf_tables.head(); tl->next_local; tl= tl->next_local) ;
+  tl->next_local= subq_lex->leaf_tables.head();
+
+  /* A theory: no need to re-connect the next_global chain */
+
+  /* 3. Remove the original subquery predicate from the WHERE/ON */
+
+  // The subqueries were replaced for Item_int(1) earlier
+  subq_pred->reset_strategy(SUBS_SEMI_JOIN);       // for subsequent executions
+  /*TODO: also reset the 'with_subselect' there. */
+
+  /* n. Adjust the parent_join->table_count counter */
+  uint table_no= parent_join->table_count;
+  /* n. Walk through child's tables and adjust table->map */
+  List_iterator_fast<TABLE_LIST> si(subq_lex->leaf_tables);
+  while ((tl= si++))
+  {
+    tl->set_tablenr(table_no);
+    if (tl->is_jtbm())
+      tl->jtbm_table_no= table_no;
+    SELECT_LEX *old_sl= tl->select_lex;
+    tl->select_lex= parent_join->select_lex; 
+    for (TABLE_LIST *emb= tl->embedding;
+         emb && emb->select_lex == old_sl;
+         emb= emb->embedding)
+      emb->select_lex= parent_join->select_lex;
+    table_no++;
+  }
+  parent_join->table_count += subq_lex->join->table_count;
+  //parent_join->table_count += subq_lex->leaf_tables.elements;
+
+  /* 
+    Put the subquery's WHERE into semi-join's sj_on_expr
+    Add the subquery-induced equalities too.
+  */
+  SELECT_LEX *save_lex= thd->lex->current_select;
+  thd->lex->current_select=subq_lex;
+  if (!subq_pred->left_expr->fixed &&
+       subq_pred->left_expr->fix_fields(thd, &subq_pred->left_expr))
+    DBUG_RETURN(TRUE);
+  thd->lex->current_select=save_lex;
+
+  sj_nest->nested_join->sj_corr_tables= subq_pred->used_tables();
+  sj_nest->nested_join->sj_depends_on=  subq_pred->used_tables() |
+                                        subq_pred->left_expr->used_tables();
+  sj_nest->sj_on_expr= subq_lex->join->conds;
+
+  /*
+    Create the IN-equalities and inject them into semi-join's ON expression.
+    Additionally, for LooseScan strategy
+     - Record the number of IN-equalities.
+     - Create list of pointers to (oe1, ..., ieN). We'll need the list to
+       see which of the expressions are bound and which are not (for those
+       we'll produce a distinct stream of (ie_i1,...ie_ik).
+
+       (TODO: can we just create a list of pointers and hope the expressions
+       will not substitute themselves on fix_fields()? or we need to wrap
+       them into Item_direct_view_refs and store pointers to those. The
+       pointers to Item_direct_view_refs are guaranteed to be stable as 
+       Item_direct_view_refs doesn't substitute itself with anything in 
+       Item_direct_view_ref::fix_fields.
+  */
+  sj_nest->sj_in_exprs= subq_pred->left_expr->cols();
+  sj_nest->nested_join->sj_outer_expr_list.empty();
+
+  if (subq_pred->left_expr->cols() == 1)
+  {
+    nested_join->sj_outer_expr_list.push_back(subq_pred->left_expr);
+    Item_func_eq *item_eq=
+      new Item_func_eq(subq_pred->left_expr, subq_lex->ref_pointer_array[0]);
+    item_eq->in_equality_no= 0;
+    sj_nest->sj_on_expr= and_items(sj_nest->sj_on_expr, item_eq);
+  }
+  else
+  {
+    for (uint i= 0; i < subq_pred->left_expr->cols(); i++)
+    {
+      nested_join->sj_outer_expr_list.push_back(subq_pred->left_expr->
+                                                element_index(i));
+      Item_func_eq *item_eq= 
+        new Item_func_eq(subq_pred->left_expr->element_index(i), 
+                         subq_lex->ref_pointer_array[i]);
+      item_eq->in_equality_no= i;
+      sj_nest->sj_on_expr= and_items(sj_nest->sj_on_expr, item_eq);
+    }
+  }
+  /* Fix the created equality and AND */
+  if (!sj_nest->sj_on_expr->fixed)
+    sj_nest->sj_on_expr->fix_fields(parent_join->thd, &sj_nest->sj_on_expr);
+
+  /*
+    Walk through sj nest's WHERE and ON expressions and call
+    item->fix_table_changes() for all items.
+  */
+  sj_nest->sj_on_expr->fix_after_pullout(parent_lex, &sj_nest->sj_on_expr);
+  fix_list_after_tbl_changes(parent_lex, &sj_nest->nested_join->join_list);
+
+
+  /* Unlink the child select_lex so it doesn't show up in EXPLAIN: */
+  subq_lex->master_unit()->exclude_level();
+
+  DBUG_EXECUTE("where",
+               print_where(sj_nest->sj_on_expr,"SJ-EXPR", QT_ORDINARY););
+
+  /* Inject sj_on_expr into the parent's WHERE or ON */
+  if (emb_tbl_nest)
+  {
+    emb_tbl_nest->on_expr= and_items(emb_tbl_nest->on_expr, 
+                                     sj_nest->sj_on_expr);
+    emb_tbl_nest->on_expr->top_level_item();
+    if (!emb_tbl_nest->on_expr->fixed)
+      emb_tbl_nest->on_expr->fix_fields(parent_join->thd,
+                                        &emb_tbl_nest->on_expr);
+  }
+  else
+  {
+    /* Inject into the WHERE */
+    parent_join->conds= and_items(parent_join->conds, sj_nest->sj_on_expr);
+    parent_join->conds->top_level_item();
+    /*
+      fix_fields must update the properties (e.g. st_select_lex::cond_count of
+      the correct select_lex.
+    */
+    save_lex= thd->lex->current_select;
+    thd->lex->current_select=parent_join->select_lex;
+    if (!parent_join->conds->fixed)
+      parent_join->conds->fix_fields(parent_join->thd, &parent_join->conds);
+    thd->lex->current_select=save_lex;
+    parent_join->select_lex->where= parent_join->conds;
+  }
+
+  if (subq_lex->ftfunc_list->elements)
+  {
+    Item_func_match *ifm;
+    List_iterator_fast<Item_func_match> li(*(subq_lex->ftfunc_list));
+    while ((ifm= li++))
+      parent_lex->ftfunc_list->push_front(ifm);
+  }
+
+  DBUG_RETURN(FALSE);
+}
+
+
+const int SUBQERY_TEMPTABLE_NAME_MAX_LEN= 20;
+
+static void create_subquery_temptable_name(char *to, uint number)
+{
+  DBUG_ASSERT(number < 10000);       
+  to= strmov(to, "<subquery");
+  to= int10_to_str((int) number, to, 10);
+  to[0]= '>';
+  to[1]= 0;
+}
+
+
+/*
+  Convert subquery predicate into non-mergeable semi-join nest.
+
+  TODO: 
+    why does this do IN-EXISTS conversion? Can't we unify it with mergeable
+    semi-joins? currently, convert_subq_to_sj() cannot fail to convert (unless
+    fatal errors)
+
+    
+  RETURN 
+    FALSE - Ok
+    TRUE  - Fatal error
+*/
+
+static bool convert_subq_to_jtbm(JOIN *parent_join, 
+                                 Item_in_subselect *subq_pred, 
+                                 bool *remove_item)
+{
+  SELECT_LEX *parent_lex= parent_join->select_lex;
+  List<TABLE_LIST> *emb_join_list= &parent_lex->top_join_list;
+  TABLE_LIST *emb_tbl_nest= NULL; // will change when we learn to handle outer joins
+  TABLE_LIST *tl;
+  DBUG_ENTER("convert_subq_to_jtbm");
+  bool optimization_delayed= TRUE;
+  subq_pred->set_strategy(SUBS_MATERIALIZATION);
+
+  subq_pred->is_jtbm_merged= TRUE;
+
+  *remove_item= TRUE;
+
+  TABLE_LIST *jtbm;
+  char *tbl_alias;
+  if (!(tbl_alias= (char*)parent_join->thd->calloc(SUBQERY_TEMPTABLE_NAME_MAX_LEN)) ||
+      !(jtbm= alloc_join_nest(parent_join->thd))) //todo: this is not a join nest!
+  {
+    DBUG_RETURN(TRUE);
+  }
+
+  jtbm->join_list= emb_join_list;
+  jtbm->embedding= emb_tbl_nest;
+  jtbm->jtbm_subselect= subq_pred;
+  jtbm->nested_join= NULL;
+
+  /* Nests do not participate in those 'chains', so: */
+  /* jtbm->next_leaf= jtbm->next_local= jtbm->next_global == NULL*/
+  emb_join_list->push_back(jtbm);
+  
+  /* 
+    Inject the jtbm table into TABLE_LIST::next_leaf list, so that 
+    make_join_statistics() and co. can find it.
+  */
+  parent_lex->leaf_tables.push_back(jtbm);
+
+  /*
+    Same as above for TABLE_LIST::next_local chain
+    (a theory: a next_local chain always starts with ::leaf_tables
+     because view's tables are inserted after the view)
+  */
+  for (tl= parent_lex->leaf_tables.head(); tl->next_local; tl= tl->next_local)
+  {}
+  tl->next_local= jtbm;
+
+  /* A theory: no need to re-connect the next_global chain */
+  if (optimization_delayed)
+  {
+    DBUG_ASSERT(parent_join->table_count < MAX_TABLES);
+
+    jtbm->jtbm_table_no= parent_join->table_count;
+
+    create_subquery_temptable_name(tbl_alias, 
+                                   subq_pred->unit->first_select()->select_number);
+    jtbm->alias= tbl_alias;
+    parent_join->table_count++;
+    DBUG_RETURN(FALSE);
+  }
+  subselect_hash_sj_engine *hash_sj_engine=
+    ((subselect_hash_sj_engine*)subq_pred->engine);
+  jtbm->table= hash_sj_engine->tmp_table;
+
+  jtbm->table->tablenr= parent_join->table_count;
+  jtbm->table->map= table_map(1) << (parent_join->table_count);
+  jtbm->jtbm_table_no= jtbm->table->tablenr;
+
+  parent_join->table_count++;
+  DBUG_ASSERT(parent_join->table_count < MAX_TABLES);
+
+  Item *conds= hash_sj_engine->semi_join_conds;
+  conds->fix_after_pullout(parent_lex, &conds);
+
+  DBUG_EXECUTE("where", print_where(conds,"SJ-EXPR", QT_ORDINARY););
+  
+  create_subquery_temptable_name(tbl_alias, hash_sj_engine->materialize_join->
+                                              select_lex->select_number);
+  jtbm->alias= tbl_alias;
+#if 0
+  /* Inject sj_on_expr into the parent's WHERE or ON */
+  if (emb_tbl_nest)
+  {
+    DBUG_ASSERT(0);
+    /*emb_tbl_nest->on_expr= and_items(emb_tbl_nest->on_expr, 
+                                     sj_nest->sj_on_expr);
+    emb_tbl_nest->on_expr->fix_fields(parent_join->thd, &emb_tbl_nest->on_expr);
+    */
+  }
+  else
+  {
+    /* Inject into the WHERE */
+    parent_join->conds= and_items(parent_join->conds, conds);
+    parent_join->conds->fix_fields(parent_join->thd, &parent_join->conds);
+    parent_join->select_lex->where= parent_join->conds;
+  }
+#endif
+  /* Don't unlink the child subselect, as the subquery will be used. */
+
+  DBUG_RETURN(FALSE);
+}
+
+
+static TABLE_LIST *alloc_join_nest(THD *thd)
+{
+  TABLE_LIST *tbl;
+  if (!(tbl= (TABLE_LIST*) thd->calloc(ALIGN_SIZE(sizeof(TABLE_LIST))+
+                                       sizeof(NESTED_JOIN))))
+    return NULL;
+  tbl->nested_join= (NESTED_JOIN*) ((uchar*)tbl + 
+                                    ALIGN_SIZE(sizeof(TABLE_LIST)));
+  return tbl;
+}
+
+
+void fix_list_after_tbl_changes(SELECT_LEX *new_parent, List<TABLE_LIST> *tlist)
+{
+  List_iterator<TABLE_LIST> it(*tlist);
+  TABLE_LIST *table;
+  while ((table= it++))
+  {
+    if (table->on_expr)
+      table->on_expr->fix_after_pullout(new_parent, &table->on_expr);
+    if (table->nested_join)
+      fix_list_after_tbl_changes(new_parent, &table->nested_join->join_list);
+  }
+}
+
+
+static void set_emb_join_nest(List<TABLE_LIST> *tables, TABLE_LIST *emb_sj_nest)
+{
+  List_iterator<TABLE_LIST> it(*tables);
+  TABLE_LIST *tbl;
+  while ((tbl= it++))
+  {
+    /*
+      Note: check for nested_join first. 
+       derived-merged tables have tbl->table!=NULL &&
+       tbl->table->reginfo==NULL.
+    */
+    if (tbl->nested_join)
+      set_emb_join_nest(&tbl->nested_join->join_list, emb_sj_nest);
+    else if (tbl->table)
+      tbl->table->reginfo.join_tab->emb_sj_nest= emb_sj_nest;
+
+  }
+}
+
+/*
+  Pull tables out of semi-join nests, if possible
+
+  SYNOPSIS
+    pull_out_semijoin_tables()
+      join  The join where to do the semi-join flattening
+
+  DESCRIPTION
+    Try to pull tables out of semi-join nests.
+     
+    PRECONDITIONS
+    When this function is called, the join may have several semi-join nests
+    but it is guaranteed that one semi-join nest does not contain another.
+   
+    ACTION
+    A table can be pulled out of the semi-join nest if
+     - It is a constant table, or
+     - It is accessed via eq_ref(outer_tables)
+
+    POSTCONDITIONS
+     * Tables that were pulled out have JOIN_TAB::emb_sj_nest == NULL
+     * Tables that were not pulled out have JOIN_TAB::emb_sj_nest pointing 
+       to semi-join nest they are in.
+     * Semi-join nests' TABLE_LIST::sj_inner_tables is updated accordingly
+
+    This operation is (and should be) performed at each PS execution since
+    tables may become/cease to be constant across PS reexecutions.
+    
+  NOTE
+    Table pullout may make uncorrelated subquery correlated. Consider this
+    example:
+    
+     ... WHERE oe IN (SELECT it1.primary_key WHERE p(it1, it2) ... ) 
+    
+    here table it1 can be pulled out (we have it1.primary_key=oe which gives
+    us functional dependency). Once it1 is pulled out, all references to it1
+    from p(it1, it2) become references to outside of the subquery and thus
+    make the subquery (i.e. its semi-join nest) correlated.
+    Making the subquery (i.e. its semi-join nest) correlated prevents us from
+    using Materialization or LooseScan to execute it. 
+
+  RETURN 
+    0 - OK
+    1 - Out of memory error
+*/
+
+int pull_out_semijoin_tables(JOIN *join)
+{
+  TABLE_LIST *sj_nest;
+  DBUG_ENTER("pull_out_semijoin_tables");
+  List_iterator<TABLE_LIST> sj_list_it(join->select_lex->sj_nests);
+   
+  /* Try pulling out of the each of the semi-joins */
+  while ((sj_nest= sj_list_it++))
+  {
+    List_iterator<TABLE_LIST> child_li(sj_nest->nested_join->join_list);
+    TABLE_LIST *tbl;
+
+    /*
+      Don't do table pull-out for nested joins (if we get nested joins here, it
+      means these are outer joins. It is theoretically possible to do pull-out
+      for some of the outer tables but we dont support this currently.
+    */
+    bool have_join_nest_children= FALSE;
+
+    set_emb_join_nest(&sj_nest->nested_join->join_list, sj_nest);
+
+    while ((tbl= child_li++))
+    {
+      if (tbl->nested_join)
+      {
+        have_join_nest_children= TRUE;
+        break;
+      }
+    }
+    
+    
+    table_map pulled_tables= 0;
+    if (have_join_nest_children)
+      goto skip;
+
+    /* Action #1: Mark the constant tables to be pulled out */
+    child_li.rewind();
+    while ((tbl= child_li++))
+    {
+      if (tbl->table)
+      {
+        tbl->table->reginfo.join_tab->emb_sj_nest= sj_nest;
+#if 0 
+        /* 
+          Do not pull out tables because they are constant. This operation has
+          a problem:
+          - Some constant tables may become/cease to be constant across PS
+            re-executions
+          - Contrary to our initial assumption, it turned out that table pullout 
+            operation is not easily undoable.
+
+          The solution is to leave constant tables where they are. This will
+          affect only constant tables that are 1-row or empty, tables that are
+          constant because they are accessed via eq_ref(const) access will
+          still be pulled out as functionally-dependent.
+
+          This will cause us to miss the chance to flatten some of the 
+          subqueries, but since const tables do not generate many duplicates,
+          it really doesn't matter that much whether they were pulled out or
+          not.
+
+          All of this was done as fix for BUG#43768.
+        */
+        if (tbl->table->map & join->const_table_map)
+        {
+          pulled_tables |= tbl->table->map;
+          DBUG_PRINT("info", ("Table %s pulled out (reason: constant)",
+                              tbl->table->alias));
+        }
+#endif
+      }
+    }
+    
+    /*
+      Action #2: Find which tables we can pull out based on
+      update_ref_and_keys() data. Note that pulling one table out can allow
+      us to pull out some other tables too.
+    */
+    bool pulled_a_table;
+    do 
+    {
+      pulled_a_table= FALSE;
+      child_li.rewind();
+      while ((tbl= child_li++))
+      {
+        if (tbl->table && !(pulled_tables & tbl->table->map))
+        {
+          if (find_eq_ref_candidate(tbl->table, 
+                                    sj_nest->nested_join->used_tables & 
+                                    ~pulled_tables))
+          {
+            pulled_a_table= TRUE;
+            pulled_tables |= tbl->table->map;
+            DBUG_PRINT("info", ("Table %s pulled out (reason: func dep)",
+                                tbl->table->alias.c_ptr()));
+            /*
+              Pulling a table out of uncorrelated subquery in general makes
+              makes it correlated. See the NOTE to this funtion. 
+            */
+            sj_nest->sj_subq_pred->is_correlated= TRUE;
+            sj_nest->nested_join->sj_corr_tables|= tbl->table->map;
+            sj_nest->nested_join->sj_depends_on|= tbl->table->map;
+          }
+        }
+      }
+    } while (pulled_a_table);
+ 
+    child_li.rewind();
+  skip:
+    /*
+      Action #3: Move the pulled out TABLE_LIST elements to the parents.
+    */
+    table_map inner_tables= sj_nest->nested_join->used_tables & 
+                            ~pulled_tables;
+    /* Record the bitmap of inner tables */
+    sj_nest->sj_inner_tables= inner_tables;
+    if (pulled_tables)
+    {
+      List<TABLE_LIST> *upper_join_list= (sj_nest->embedding != NULL)?
+                                           (&sj_nest->embedding->nested_join->join_list): 
+                                           (&join->select_lex->top_join_list);
+      Query_arena *arena, backup;
+      arena= join->thd->activate_stmt_arena_if_needed(&backup);
+      while ((tbl= child_li++))
+      {
+        if (tbl->table)
+        {
+          if (inner_tables & tbl->table->map)
+          {
+            /* This table is not pulled out */
+            tbl->table->reginfo.join_tab->emb_sj_nest= sj_nest;
+          }
+          else
+          {
+            /* This table has been pulled out of the semi-join nest */
+            tbl->table->reginfo.join_tab->emb_sj_nest= NULL;
+            /*
+              Pull the table up in the same way as simplify_joins() does:
+              update join_list and embedding pointers but keep next[_local]
+              pointers.
+            */
+            child_li.remove();
+            sj_nest->nested_join->used_tables &= ~tbl->table->map;
+            upper_join_list->push_back(tbl);
+            tbl->join_list= upper_join_list;
+            tbl->embedding= sj_nest->embedding;
+          }
+        }
+      }
+
+      /* Remove the sj-nest itself if we've removed everything from it */
+      if (!inner_tables)
+      {
+        List_iterator<TABLE_LIST> li(*upper_join_list);
+        /* Find the sj_nest in the list. */
+        while (sj_nest != li++) ;
+        li.remove();
+        /* Also remove it from the list of SJ-nests: */
+        sj_list_it.remove();
+      }
+
+      if (arena)
+        join->thd->restore_active_arena(arena, &backup);
+    }
+  }
+  DBUG_RETURN(0);
+}
+
+
+/* 
+  Optimize semi-join nests that could be run with sj-materialization
+
+  SYNOPSIS
+    optimize_semijoin_nests()
+      join           The join to optimize semi-join nests for
+      all_table_map  Bitmap of all tables in the join
+
+  DESCRIPTION
+    Optimize each of the semi-join nests that can be run with
+    materialization. For each of the nests, we
+     - Generate the best join order for this "sub-join" and remember it;
+     - Remember the sub-join execution cost (it's part of materialization
+       cost);
+     - Calculate other costs that will be incurred if we decide 
+       to use materialization strategy for this semi-join nest.
+
+    All obtained information is saved and will be used by the main join
+    optimization pass.
+  
+  NOTES 
+    Because of Join::reoptimize(), this function may be called multiple times.
+
+  RETURN
+    FALSE  Ok 
+    TRUE   Out of memory error
+*/
+
+bool optimize_semijoin_nests(JOIN *join, table_map all_table_map)
+{
+  DBUG_ENTER("optimize_semijoin_nests");
+  List_iterator<TABLE_LIST> sj_list_it(join->select_lex->sj_nests);
+  TABLE_LIST *sj_nest;
+  while ((sj_nest= sj_list_it++))
+  {
+    /* semi-join nests with only constant tables are not valid */
+   /// DBUG_ASSERT(sj_nest->sj_inner_tables & ~join->const_table_map);
+
+    sj_nest->sj_mat_info= NULL;
+    /*
+      The statement may have been executed with 'semijoin=on' earlier.
+      We need to verify that 'semijoin=on' still holds.
+     */
+    if (optimizer_flag(join->thd, OPTIMIZER_SWITCH_SEMIJOIN) &&
+        optimizer_flag(join->thd, OPTIMIZER_SWITCH_MATERIALIZATION))
+    {
+      if ((sj_nest->sj_inner_tables  & ~join->const_table_map) && /* not everything was pulled out */
+          !sj_nest->sj_subq_pred->is_correlated && 
+           sj_nest->sj_subq_pred->types_allow_materialization)
+      {
+        join->emb_sjm_nest= sj_nest;
+        if (choose_plan(join, all_table_map &~join->const_table_map))
+          DBUG_RETURN(TRUE); /* purecov: inspected */
+        /*
+          The best plan to run the subquery is now in join->best_positions,
+          save it.
+        */
+        uint n_tables= my_count_bits(sj_nest->sj_inner_tables & ~join->const_table_map);
+        SJ_MATERIALIZATION_INFO* sjm;
+        if (!(sjm= new SJ_MATERIALIZATION_INFO) ||
+            !(sjm->positions= (POSITION*)join->thd->alloc(sizeof(POSITION)*
+                                                          n_tables)))
+          DBUG_RETURN(TRUE); /* purecov: inspected */
+        sjm->tables= n_tables;
+        sjm->is_used= FALSE;
+        double subjoin_out_rows, subjoin_read_time;
+
+        /*
+        join->get_partial_cost_and_fanout(n_tables + join->const_tables,
+                                          table_map(-1),
+                                          &subjoin_read_time, 
+                                          &subjoin_out_rows);
+        */
+        join->get_prefix_cost_and_fanout(n_tables, 
+                                         &subjoin_read_time,
+                                         &subjoin_out_rows);
+
+        sjm->materialization_cost.convert_from_cost(subjoin_read_time);
+        sjm->rows= subjoin_out_rows;
+        
+        // Don't use the following list because it has "stale" items. use
+        // ref_pointer_array instead:
+        //
+        //List<Item> &right_expr_list= 
+        //  sj_nest->sj_subq_pred->unit->first_select()->item_list;
+        /*
+          Adjust output cardinality estimates. If the subquery has form
+
+           ... oe IN (SELECT t1.colX, t2.colY, func(X,Y,Z) )
+
+           then the number of distinct output record combinations has an
+           upper bound of product of number of records matching the tables 
+           that are used by the SELECT clause.
+           TODO:
+             We can get a more precise estimate if we
+              - use rec_per_key cardinality estimates. For simple cases like 
+                "oe IN (SELECT t.key ...)" it is trivial. 
+              - Functional dependencies between the tables in the semi-join
+                nest (the payoff is probably less here?)
+          
+          See also get_post_group_estimate().
+        */
+        SELECT_LEX *subq_select= sj_nest->sj_subq_pred->unit->first_select();
+        {
+          for (uint i=0 ; i < join->const_tables + sjm->tables ; i++)
+          {
+            JOIN_TAB *tab= join->best_positions[i].table;
+            join->map2table[tab->table->tablenr]= tab;
+          }
+          //List_iterator<Item> it(right_expr_list);
+          Item **ref_array= subq_select->ref_pointer_array;
+          Item **ref_array_end= ref_array + subq_select->item_list.elements; 
+          table_map map= 0;
+          //while ((item= it++))
+          for (;ref_array < ref_array_end; ref_array++)
+            map |= (*ref_array)->used_tables();
+          map= map & ~PSEUDO_TABLE_BITS;
+          Table_map_iterator tm_it(map);
+          int tableno;
+          double rows= 1.0;
+          while ((tableno = tm_it.next_bit()) != Table_map_iterator::BITMAP_END)
+            rows *= join->map2table[tableno]->table->quick_condition_rows;
+          sjm->rows= min(sjm->rows, rows);
+        }
+        memcpy(sjm->positions, join->best_positions + join->const_tables, 
+               sizeof(POSITION) * n_tables);
+
+        /*
+          Calculate temporary table parameters and usage costs
+        */
+        uint rowlen= get_tmp_table_rec_length(subq_select->ref_pointer_array,
+                                              subq_select->item_list.elements);
+        double lookup_cost= get_tmp_table_lookup_cost(join->thd,
+                                                      subjoin_out_rows, rowlen);
+        double write_cost= get_tmp_table_write_cost(join->thd,
+                                                    subjoin_out_rows, rowlen);
+
+        /*
+          Let materialization cost include the cost to write the data into the
+          temporary table:
+        */ 
+        sjm->materialization_cost.add_io(subjoin_out_rows, write_cost);
+        
+        /*
+          Set the cost to do a full scan of the temptable (will need this to 
+          consider doing sjm-scan):
+        */ 
+        sjm->scan_cost.zero();
+        sjm->scan_cost.add_io(sjm->rows, lookup_cost);
+
+        sjm->lookup_cost.convert_from_cost(lookup_cost);
+        sj_nest->sj_mat_info= sjm;
+        DBUG_EXECUTE("opt", print_sjm(sjm););
+      }
+    }
+  }
+  join->emb_sjm_nest= NULL;
+  DBUG_RETURN(FALSE);
+}
+
+
+/*
+  Get estimated record length for semi-join materialization temptable
+  
+  SYNOPSIS
+    get_tmp_table_rec_length()
+      items  IN subquery's select list.
+
+  DESCRIPTION
+    Calculate estimated record length for semi-join materialization
+    temptable. It's an estimate because we don't follow every bit of
+    create_tmp_table()'s logic. This isn't necessary as the return value of
+    this function is used only for cost calculations.
+
+  RETURN
+    Length of the temptable record, in bytes
+*/
+
+static uint get_tmp_table_rec_length(Item **p_items, uint elements)
+{
+  uint len= 0;
+  Item *item;
+  //List_iterator<Item> it(items);
+  Item **p_item;
+  for (p_item= p_items; p_item < p_items + elements ; p_item++)
+  {
+    item = *p_item;
+    switch (item->result_type()) {
+    case REAL_RESULT:
+      len += sizeof(double);
+      break;
+    case INT_RESULT:
+      if (item->max_length >= (MY_INT32_NUM_DECIMAL_DIGITS - 1))
+        len += 8;
+      else
+        len += 4;
+      break;
+    case STRING_RESULT:
+      enum enum_field_types type;
+      /* DATE/TIME and GEOMETRY fields have STRING_RESULT result type.  */
+      if ((type= item->field_type()) == MYSQL_TYPE_DATETIME ||
+          type == MYSQL_TYPE_TIME || type == MYSQL_TYPE_DATE ||
+          type == MYSQL_TYPE_TIMESTAMP || type == MYSQL_TYPE_GEOMETRY)
+        len += 8;
+      else
+        len += item->max_length;
+      break;
+    case DECIMAL_RESULT:
+      len += 10;
+      break;
+    case ROW_RESULT:
+    default:
+      DBUG_ASSERT(0); /* purecov: deadcode */
+      break;
+    }
+  }
+  return len;
+}
+
+
+/**
+  The cost of a lookup into a unique hash/btree index on a temporary table
+  with 'row_count' rows each of size 'row_size'.
+
+  @param thd  current query context
+  @param row_count  number of rows in the temp table
+  @param row_size   average size in bytes of the rows
+
+  @return  the cost of one lookup
+*/
+
+static double
+get_tmp_table_lookup_cost(THD *thd, double row_count, uint row_size)
+{
+  if (row_count * row_size > thd->variables.max_heap_table_size)
+    return (double) DISK_TEMPTABLE_LOOKUP_COST;
+  else
+    return (double) HEAP_TEMPTABLE_LOOKUP_COST;
+}
+
+/**
+  The cost of writing a row into a temporary table with 'row_count' unique
+  rows each of size 'row_size'.
+
+  @param thd  current query context
+  @param row_count  number of rows in the temp table
+  @param row_size   average size in bytes of the rows
+
+  @return  the cost of writing one row
+*/
+
+static double
+get_tmp_table_write_cost(THD *thd, double row_count, uint row_size)
+{
+  double lookup_cost= get_tmp_table_lookup_cost(thd, row_count, row_size);
+  /*
+    TODO:
+    This is an optimistic estimate. Add additional costs resulting from
+    actually writing the row to memory/disk and possible index reorganization.
+  */
+  return lookup_cost;
+}
+
+
+/*
+  Check if table's KEYUSE elements have an eq_ref(outer_tables) candidate
+
+  SYNOPSIS
+    find_eq_ref_candidate()
+      table             Table to be checked
+      sj_inner_tables   Bitmap of inner tables. eq_ref(inner_table) doesn't
+                        count.
+
+  DESCRIPTION
+    Check if table's KEYUSE elements have an eq_ref(outer_tables) candidate
+
+  TODO
+    Check again if it is feasible to factor common parts with constant table
+    search
+
+    Also check if it's feasible to factor common parts with table elimination
+
+  RETURN
+    TRUE  - There exists an eq_ref(outer-tables) candidate
+    FALSE - Otherwise
+*/
+
+bool find_eq_ref_candidate(TABLE *table, table_map sj_inner_tables)
+{
+  KEYUSE *keyuse= table->reginfo.join_tab->keyuse;
+
+  if (keyuse)
+  {
+    do
+    {
+      uint key= keyuse->key;
+      KEY *keyinfo;
+      key_part_map bound_parts= 0;
+      bool is_excluded_key= keyuse->is_for_hash_join(); 
+      if (!is_excluded_key)
+      {
+        keyinfo= table->key_info + key;
+        is_excluded_key= !test(keyinfo->flags & HA_NOSAME);
+      }
+      if (!is_excluded_key)
+      {
+        do  /* For all equalities on all key parts */
+        {
+          /* Check if this is "t.keypart = expr(outer_tables) */
+          if (!(keyuse->used_tables & sj_inner_tables) &&
+              !(keyuse->optimize & KEY_OPTIMIZE_REF_OR_NULL))
+          {
+            bound_parts |= 1 << keyuse->keypart;
+          }
+          keyuse++;
+        } while (keyuse->key == key && keyuse->table == table);
+
+        if (bound_parts == PREV_BITS(uint, keyinfo->key_parts))
+          return TRUE;
+      }
+      else
+      {
+        do
+        {
+          keyuse++;
+        } while (keyuse->key == key && keyuse->table == table);
+      }
+    } while (keyuse->table == table);
+  }
+  return FALSE;
+}
+
+
+/*
+  Do semi-join optimization step after we've added a new tab to join prefix
+
+  SYNOPSIS
+    advance_sj_state()
+      join                        The join we're optimizing
+      remaining_tables            Tables not in the join prefix
+      new_join_tab                Join tab we've just added to the join prefix
+      idx                         Index of this join tab (i.e. number of tables
+                                  in the prefix minus one)
+      current_record_count INOUT  Estimate of #records in join prefix's output
+      current_read_time    INOUT  Cost to execute the join prefix
+      loose_scan_pos       IN     A POSITION with LooseScan plan to access 
+                                  table new_join_tab
+                                  (produced by the last best_access_path call)
+
+  DESCRIPTION
+    Update semi-join optimization state after we've added another tab (table 
+    and access method) to the join prefix.
+    
+    The state is maintained in join->positions[#prefix_size]. Each of the
+    available strategies has its own state variables.
+    
+    for each semi-join strategy
+    {
+      update strategy's state variables;
+
+      if (join prefix has all the tables that are needed to consider
+          using this strategy for the semi-join(s))
+      {
+        calculate cost of using the strategy
+        if ((this is the first strategy to handle the semi-join nest(s)  ||
+            the cost is less than other strategies))
+        {
+          // Pick this strategy
+          pos->sj_strategy= ..
+          ..
+        }
+      }
+
+    Most of the new state is saved join->positions[idx] (and hence no undo
+    is necessary). Several members of class JOIN are updated also, these
+    changes can be rolled back with restore_prev_sj_state().
+
+    See setup_semijoin_dups_elimination() for a description of what kinds of
+    join prefixes each strategy can handle.
+*/
+
+bool is_multiple_semi_joins(JOIN *join, POSITION *prefix, uint idx, table_map inner_tables)
+{
+  for (int i= (int)idx; i >= 0; i--)
+  {
+    TABLE_LIST *emb_sj_nest;
+    if ((emb_sj_nest= prefix[i].table->emb_sj_nest))
+    {
+      if (inner_tables & emb_sj_nest->sj_inner_tables)
+        return !test(inner_tables == (emb_sj_nest->sj_inner_tables &
+                                      ~join->const_table_map));
+    }
+  }
+  return FALSE;
+}
+
+
+void advance_sj_state(JOIN *join, table_map remaining_tables, uint idx, 
+                      double *current_record_count, double *current_read_time,
+                      POSITION *loose_scan_pos)
+{
+  POSITION *pos= join->positions + idx;
+  const JOIN_TAB *new_join_tab= pos->table; 
+  Semi_join_strategy_picker *pickers[]=
+  {
+    &pos->firstmatch_picker,
+    &pos->loosescan_picker,
+    &pos->sjmat_picker,
+    &pos->dups_weedout_picker,
+    NULL,
+  };
+
+  if (join->emb_sjm_nest)
+  {
+    /* 
+      We're performing optimization inside SJ-Materialization nest:
+       - there are no other semi-joins inside semi-join nests
+       - attempts to build semi-join strategies here will confuse
+         the optimizer, so bail out.
+    */
+    pos->sj_strategy= SJ_OPT_NONE;
+    return;
+  }
+
+  /* 
+    Update join->cur_sj_inner_tables (Used by FirstMatch in this function and
+    LooseScan detector in best_access_path)
+  */
+  remaining_tables &= ~new_join_tab->table->map;
+  pos->prefix_dups_producing_tables= join->cur_dups_producing_tables;
+  TABLE_LIST *emb_sj_nest;
+  if ((emb_sj_nest= new_join_tab->emb_sj_nest))
+    join->cur_dups_producing_tables |= emb_sj_nest->sj_inner_tables;
+
+  Semi_join_strategy_picker **strategy;
+  if (idx == join->const_tables)
+  {
+    /* First table, initialize pickers */
+    for (strategy= pickers; *strategy != NULL; strategy++)
+      (*strategy)->set_empty();
+    pos->inner_tables_handled_with_other_sjs= 0;
+  }
+  else
+  {
+    for (strategy= pickers; *strategy != NULL; strategy++)
+    {
+      (*strategy)->set_from_prev(pos - 1);
+    }
+    pos->inner_tables_handled_with_other_sjs=
+       pos[-1].inner_tables_handled_with_other_sjs;
+  }
+
+  pos->prefix_cost.convert_from_cost(*current_read_time);
+  pos->prefix_record_count= *current_record_count;
+
+  {
+    pos->sj_strategy= SJ_OPT_NONE;
+
+    for (strategy= pickers; *strategy != NULL; strategy++)
+    {
+      table_map handled_fanout;
+      sj_strategy_enum sj_strategy;
+      double rec_count= *current_record_count;
+      double read_time= *current_read_time;
+      if ((*strategy)->check_qep(join, idx, remaining_tables, 
+                                 new_join_tab,
+                                 &rec_count,
+                                 &read_time,
+                                 &handled_fanout,
+                                 &sj_strategy,
+                                 loose_scan_pos))
+      {
+        /*
+          It's possible to use the strategy. Use it, if
+           - it removes semi-join fanout that was not removed before
+           - using it is cheaper than using something else,
+               and {if some other strategy has removed fanout
+               that this strategy is trying to remove, then it
+               did remove the fanout only for one semi-join}
+               This is to avoid a situation when
+                1. strategy X removes fanout for semijoin X,Y
+                2. using strategy Z is cheaper, but it only removes
+                   fanout from semijoin X.
+                3. We have no clue what to do about fanount of semi-join Y.
+        */
+        if ((join->cur_dups_producing_tables & handled_fanout) ||
+            (read_time < *current_read_time && 
+             !(handled_fanout & pos->inner_tables_handled_with_other_sjs)))
+        {
+          /* Mark strategy as used */ 
+          (*strategy)->mark_used();
+          pos->sj_strategy= sj_strategy;
+          *current_read_time= read_time;
+          *current_record_count= rec_count;
+          join->cur_dups_producing_tables &= ~handled_fanout;
+          //TODO: update bitmap of semi-joins that were handled together with
+          // others.
+          if (is_multiple_semi_joins(join, join->positions, idx, handled_fanout))
+            pos->inner_tables_handled_with_other_sjs |= handled_fanout;
+        }
+        else
+        {
+          /* We decided not to apply the strategy. */
+          (*strategy)->set_empty();
+        }
+      }
+    }
+  }
+
+  if ((emb_sj_nest= new_join_tab->emb_sj_nest))
+  {
+    join->cur_sj_inner_tables |= emb_sj_nest->sj_inner_tables;
+
+    /* Remove the sj_nest if all of its SJ-inner tables are in cur_table_map */
+    if (!(remaining_tables &
+          emb_sj_nest->sj_inner_tables & ~new_join_tab->table->map))
+      join->cur_sj_inner_tables &= ~emb_sj_nest->sj_inner_tables;
+  }
+
+  pos->prefix_cost.convert_from_cost(*current_read_time);
+  pos->prefix_record_count= *current_record_count;
+}
+
+
+void Sj_materialization_picker::set_from_prev(struct st_position *prev)
+{
+  if (prev->sjmat_picker.is_used)
+    set_empty();
+  else
+  {
+    sjm_scan_need_tables= prev->sjmat_picker.sjm_scan_need_tables; 
+    sjm_scan_last_inner=  prev->sjmat_picker.sjm_scan_last_inner;
+  }
+  is_used= FALSE;
+}
+
+
+bool Sj_materialization_picker::check_qep(JOIN *join,
+                                          uint idx,
+                                          table_map remaining_tables, 
+                                          const JOIN_TAB *new_join_tab,
+                                          double *record_count,
+                                          double *read_time,
+                                          table_map *handled_fanout,
+                                          sj_strategy_enum *strategy,
+                                          POSITION *loose_scan_pos)
+{
+  bool sjm_scan;
+  SJ_MATERIALIZATION_INFO *mat_info;
+  if ((mat_info= at_sjmat_pos(join, remaining_tables,
+                              new_join_tab, idx, &sjm_scan)))
+  {
+    if (sjm_scan)
+    {
+      /*
+        We can't yet evaluate this option yet. This is because we can't
+        accout for fanout of sj-inner tables yet:
+
+          ntX  SJM-SCAN(it1 ... itN) | ot1 ... otN  |
+                                     ^(1)           ^(2)
+
+        we're now at position (1). SJM temptable in general has multiple
+        records, so at point (1) we'll get the fanout from sj-inner tables (ie
+        there will be multiple record combinations).
+
+        The final join result will not contain any semi-join produced
+        fanout, i.e. tables within SJM-SCAN(...) will not contribute to
+        the cardinality of the join output.  Extra fanout produced by 
+        SJM-SCAN(...) will be 'absorbed' into fanout produced by ot1 ...  otN.
+
+        The simple way to model this is to remove SJM-SCAN(...) fanout once
+        we reach the point #2.
+      */
+      sjm_scan_need_tables=
+        new_join_tab->emb_sj_nest->sj_inner_tables | 
+        new_join_tab->emb_sj_nest->nested_join->sj_depends_on |
+        new_join_tab->emb_sj_nest->nested_join->sj_corr_tables;
+      sjm_scan_last_inner= idx;
+    }
+    else
+    {
+      /* This is SJ-Materialization with lookups */
+      COST_VECT prefix_cost; 
+      signed int first_tab= (int)idx - mat_info->tables;
+      double prefix_rec_count;
+      if (first_tab < (int)join->const_tables)
+      {
+        prefix_cost.zero();
+        prefix_rec_count= 1.0;
+      }
+      else
+      {
+        prefix_cost= join->positions[first_tab].prefix_cost;
+        prefix_rec_count= join->positions[first_tab].prefix_record_count;
+      }
+
+      double mat_read_time= prefix_cost.total_cost();
+      mat_read_time += mat_info->materialization_cost.total_cost() +
+                       prefix_rec_count * mat_info->lookup_cost.total_cost();
+
+      /*
+        NOTE: When we pick to use SJM[-Scan] we don't memcpy its POSITION
+        elements to join->positions as that makes it hard to return things
+        back when making one step back in join optimization. That's done 
+        after the QEP has been chosen.
+      */
+      *read_time=    mat_read_time;
+      *record_count= prefix_rec_count;
+      *handled_fanout= new_join_tab->emb_sj_nest->sj_inner_tables;
+      *strategy= SJ_OPT_MATERIALIZE;
+      return TRUE;
+    }
+  }
+  
+  /* 4.A SJM-Scan second phase check */
+  if (sjm_scan_need_tables && /* Have SJM-Scan prefix */
+      !(sjm_scan_need_tables & remaining_tables))
+  {
+    TABLE_LIST *mat_nest= 
+      join->positions[sjm_scan_last_inner].table->emb_sj_nest;
+    SJ_MATERIALIZATION_INFO *mat_info= mat_nest->sj_mat_info;
+
+    double prefix_cost;
+    double prefix_rec_count;
+    int first_tab= sjm_scan_last_inner + 1 - mat_info->tables;
+    /* Get the prefix cost */
+    if (first_tab == (int)join->const_tables)
+    {
+      prefix_rec_count= 1.0;
+      prefix_cost= 0.0;
+    }
+    else
+    {
+      prefix_cost= join->positions[first_tab - 1].prefix_cost.total_cost();
+      prefix_rec_count= join->positions[first_tab - 1].prefix_record_count;
+    }
+
+    /* Add materialization cost */
+    prefix_cost += mat_info->materialization_cost.total_cost() +
+                   prefix_rec_count * mat_info->scan_cost.total_cost();
+    prefix_rec_count *= mat_info->rows;
+    
+    uint i;
+    table_map rem_tables= remaining_tables;
+    for (i= idx; i != (first_tab + mat_info->tables - 1); i--)
+      rem_tables |= join->positions[i].table->table->map;
+
+    POSITION curpos, dummy;
+    /* Need to re-run best-access-path as we prefix_rec_count has changed */
+    bool disable_jbuf= (join->thd->variables.join_cache_level == 0);
+    for (i= first_tab + mat_info->tables; i <= idx; i++)
+    {
+      best_access_path(join, join->positions[i].table, rem_tables, i,
+                       disable_jbuf, prefix_rec_count, &curpos, &dummy);
+      prefix_rec_count *= curpos.records_read;
+      prefix_cost += curpos.read_time;
+    }
+
+    *strategy= SJ_OPT_MATERIALIZE_SCAN;
+    *read_time=    prefix_cost;
+    *record_count= prefix_rec_count;
+    *handled_fanout= mat_nest->sj_inner_tables;
+    return TRUE;
+  }
+  return FALSE;
+}
+
+
+void LooseScan_picker::set_from_prev(struct st_position *prev)
+{
+  if (prev->loosescan_picker.is_used)
+    set_empty();
+  else
+  {
+    first_loosescan_table= prev->loosescan_picker.first_loosescan_table;
+    loosescan_need_tables= prev->loosescan_picker.loosescan_need_tables;
+  }
+  is_used= FALSE;
+}
+
+
+bool LooseScan_picker::check_qep(JOIN *join,
+                                 uint idx,
+                                 table_map remaining_tables, 
+                                 const JOIN_TAB *new_join_tab,
+                                 double *record_count, 
+                                 double *read_time,
+                                 table_map *handled_fanout,
+                                 sj_strategy_enum *strategy,
+                                 struct st_position *loose_scan_pos)
+{
+  POSITION *first= join->positions + first_loosescan_table; 
+  /* 
+    LooseScan strategy can't handle interleaving between tables from the 
+    semi-join that LooseScan is handling and any other tables.
+
+    If we were considering LooseScan for the join prefix (1)
+       and the table we're adding creates an interleaving (2)
+    then 
+       stop considering loose scan
+  */
+  if ((first_loosescan_table != MAX_TABLES) &&   // (1)
+      (first->table->emb_sj_nest->sj_inner_tables & remaining_tables) && //(2)
+      new_join_tab->emb_sj_nest != first->table->emb_sj_nest) //(2)
+  {
+    first_loosescan_table= MAX_TABLES;
+  }
+
+  /*
+    If we got an option to use LooseScan for the current table, start
+    considering using LooseScan strategy
+  */
+  if (loose_scan_pos->read_time != DBL_MAX && !join->outer_join)
+  {
+    first_loosescan_table= idx;
+    loosescan_need_tables=
+      new_join_tab->emb_sj_nest->sj_inner_tables | 
+      new_join_tab->emb_sj_nest->nested_join->sj_depends_on |
+      new_join_tab->emb_sj_nest->nested_join->sj_corr_tables;
+  }
+  
+  if ((first_loosescan_table != MAX_TABLES) && 
+      !(remaining_tables & loosescan_need_tables) &&
+      (new_join_tab->table->map & loosescan_need_tables))
+  {
+    /* 
+      Ok we have LooseScan plan and also have all LooseScan sj-nest's
+      inner tables and outer correlated tables into the prefix.
+    */
+
+    first= join->positions + first_loosescan_table; 
+    uint n_tables= my_count_bits(first->table->emb_sj_nest->sj_inner_tables);
+    /* Got a complete LooseScan range. Calculate its cost */
+    /*
+      The same problem as with FirstMatch - we need to save POSITIONs
+      somewhere but reserving space for all cases would require too
+      much space. We will re-calculate POSITION structures later on. 
+    */
+    bool disable_jbuf= (join->thd->variables.join_cache_level == 0);
+    optimize_wo_join_buffering(join, first_loosescan_table, idx,
+                               remaining_tables, 
+                               TRUE,  //first_alt
+                               disable_jbuf ? join->table_count :
+                                 first_loosescan_table + n_tables,
+                               record_count,
+                               read_time);
+    /*
+      We don't yet have any other strategies that could handle this
+      semi-join nest (the other options are Duplicate Elimination or
+      Materialization, which need at least the same set of tables in 
+      the join prefix to be considered) so unconditionally pick the 
+      LooseScan.
+    */
+    *strategy= SJ_OPT_LOOSE_SCAN;
+    *handled_fanout= first->table->emb_sj_nest->sj_inner_tables;
+    return TRUE;
+  }
+  return FALSE;
+}
+
+void Firstmatch_picker::set_from_prev(struct st_position *prev)
+{
+  if (prev->firstmatch_picker.is_used)
+    invalidate_firstmatch_prefix();
+  else
+  {
+    first_firstmatch_table= prev->firstmatch_picker.first_firstmatch_table;
+    first_firstmatch_rtbl=  prev->firstmatch_picker.first_firstmatch_rtbl;
+    firstmatch_need_tables= prev->firstmatch_picker.firstmatch_need_tables;
+  }
+  is_used= FALSE;
+}
+
+bool Firstmatch_picker::check_qep(JOIN *join,
+                                  uint idx,
+                                  table_map remaining_tables, 
+                                  const JOIN_TAB *new_join_tab,
+                                  double *record_count,
+                                  double *read_time,
+                                  table_map *handled_fanout,
+                                  sj_strategy_enum *strategy,
+                                  POSITION *loose_scan_pos)
+{
+  if (new_join_tab->emb_sj_nest &&
+      optimizer_flag(join->thd, OPTIMIZER_SWITCH_FIRSTMATCH) &&
+      !join->outer_join)
+  {
+    const table_map outer_corr_tables=
+      new_join_tab->emb_sj_nest->nested_join->sj_corr_tables |
+      new_join_tab->emb_sj_nest->nested_join->sj_depends_on;
+    const table_map sj_inner_tables=
+      new_join_tab->emb_sj_nest->sj_inner_tables & ~join->const_table_map;
+
+    /* 
+      Enter condition:
+       1. The next join tab belongs to semi-join nest
+          (verified for the encompassing code block above).
+       2. We're not in a duplicate producer range yet
+       3. All outer tables that
+           - the subquery is correlated with, or
+           - referred to from the outer_expr 
+          are in the join prefix
+       4. All inner tables are still part of remaining_tables.
+    */
+    if (!join->cur_sj_inner_tables &&              // (2)
+        !(remaining_tables & outer_corr_tables) && // (3)
+        (sj_inner_tables ==                        // (4)
+         ((remaining_tables | new_join_tab->table->map) & sj_inner_tables)))
+    {
+      /* Start tracking potential FirstMatch range */
+      first_firstmatch_table= idx;
+      firstmatch_need_tables= sj_inner_tables;
+      first_firstmatch_rtbl= remaining_tables;
+    }
+
+    if (in_firstmatch_prefix())
+    {
+      if (outer_corr_tables & first_firstmatch_rtbl)
+      {
+        /*
+          Trying to add an sj-inner table whose sj-nest has an outer correlated 
+          table that was not in the prefix. This means FirstMatch can't be used.
+        */
+        invalidate_firstmatch_prefix();
+      }
+      else
+      {
+        /* Record that we need all of this semi-join's inner tables, too */
+        firstmatch_need_tables|= sj_inner_tables;
+      }
+    
+      if (in_firstmatch_prefix() && 
+          !(firstmatch_need_tables & remaining_tables))
+      {
+        /*
+          Got a complete FirstMatch range. Calculate correct costs and fanout
+        */
+
+        if (idx == first_firstmatch_table && 
+            optimizer_flag(join->thd, OPTIMIZER_SWITCH_SEMIJOIN_WITH_CACHE))
+        {
+          /* 
+            An important special case: only one inner table, and @@optimizer_switch
+            allows join buffering.
+             - read_time is the same (i.e. FirstMatch doesn't add any cost
+             - remove fanout added by the last table
+          */
+          if (*record_count)
+            *record_count /= join->positions[idx].records_read;
+        }
+        else
+        {
+          optimize_wo_join_buffering(join, first_firstmatch_table, idx,
+                                     remaining_tables, FALSE, idx,
+                                     record_count, 
+                                     read_time);
+        }
+        /*
+          We ought to save the alternate POSITIONs produced by
+          optimize_wo_join_buffering but the problem is that providing save
+          space uses too much space. Instead, we will re-calculate the
+          alternate POSITIONs after we've picked the best QEP.
+        */
+        *handled_fanout= firstmatch_need_tables;
+        /* *record_count and *read_time were set by the above call */
+        *strategy= SJ_OPT_FIRST_MATCH;
+        return TRUE;
+      }
+    }
+  }
+  return FALSE;
+}
+
+
+void Duplicate_weedout_picker::set_from_prev(POSITION *prev)
+{
+  if (prev->dups_weedout_picker.is_used)
+    set_empty();
+  else
+  {
+    dupsweedout_tables=      prev->dups_weedout_picker.dupsweedout_tables;
+    first_dupsweedout_table= prev->dups_weedout_picker.first_dupsweedout_table;
+  }
+  is_used= FALSE;
+}
+
+
+bool Duplicate_weedout_picker::check_qep(JOIN *join,
+                                         uint idx,
+                                         table_map remaining_tables, 
+                                         const JOIN_TAB *new_join_tab,
+                                         double *record_count,
+                                         double *read_time,
+                                         table_map *handled_fanout,
+                                         sj_strategy_enum *strategy,
+                                         POSITION *loose_scan_pos
+                                         )
+{
+  TABLE_LIST *nest;
+  if ((nest= new_join_tab->emb_sj_nest))
+  {
+    if (!dupsweedout_tables)
+      first_dupsweedout_table= idx;
+
+    dupsweedout_tables |= nest->sj_inner_tables |
+                          nest->nested_join->sj_depends_on |
+                          nest->nested_join->sj_corr_tables;
+  }
+  
+  if (dupsweedout_tables)
+  {
+    /* we're in the process of constructing a DuplicateWeedout range */
+    TABLE_LIST *emb= new_join_tab->table->pos_in_table_list->embedding;
+    /* and we've entered an inner side of an outer join*/
+    if (emb && emb->on_expr)
+      dupsweedout_tables |= emb->nested_join->used_tables;
+  }
+  
+  /* If this is the last table that we need for DuplicateWeedout range */
+  if (dupsweedout_tables && !(remaining_tables & ~new_join_tab->table->map &
+                              dupsweedout_tables))
+  {
+    /*
+      Ok, reached a state where we could put a dups weedout point.
+      Walk back and calculate
+        - the join cost (this is needed as the accumulated cost may assume 
+          some other duplicate elimination method)
+        - extra fanout that will be removed by duplicate elimination
+        - duplicate elimination cost
+      There are two cases:
+        1. We have other strategy/ies to remove all of the duplicates.
+        2. We don't.
+      
+      We need to calculate the cost in case #2 also because we need to make
+      choice between this join order and others.
+    */
+    uint first_tab= first_dupsweedout_table;
+    double dups_cost;
+    double prefix_rec_count;
+    double sj_inner_fanout= 1.0;
+    double sj_outer_fanout= 1.0;
+    uint temptable_rec_size;
+    if (first_tab == join->const_tables)
+    {
+      prefix_rec_count= 1.0;
+      temptable_rec_size= 0;
+      dups_cost= 0.0;
+    }
+    else
+    {
+      dups_cost= join->positions[first_tab - 1].prefix_cost.total_cost();
+      prefix_rec_count= join->positions[first_tab - 1].prefix_record_count;
+      temptable_rec_size= 8; /* This is not true but we'll make it so */
+    }
+    
+    table_map dups_removed_fanout= 0;
+    double current_fanout= prefix_rec_count;
+    for (uint j= first_dupsweedout_table; j <= idx; j++)
+    {
+      POSITION *p= join->positions + j;
+      current_fanout *= p->records_read;
+      dups_cost += p->read_time + current_fanout / TIME_FOR_COMPARE;
+      if (p->table->emb_sj_nest)
+      {
+        sj_inner_fanout *= p->records_read;
+        dups_removed_fanout |= p->table->table->map;
+      }
+      else
+      {
+        sj_outer_fanout *= p->records_read;
+        temptable_rec_size += p->table->table->file->ref_length;
+      }
+    }
+
+    /*
+      Add the cost of temptable use. The table will have sj_outer_fanout
+      records, and we will make 
+      - sj_outer_fanout table writes
+      - sj_inner_fanout*sj_outer_fanout  lookups.
+
+    */
+    double one_lookup_cost= get_tmp_table_lookup_cost(join->thd,
+                                                      sj_outer_fanout,
+                                                      temptable_rec_size);
+    double one_write_cost= get_tmp_table_write_cost(join->thd,
+                                                    sj_outer_fanout,
+                                                    temptable_rec_size);
+
+    double write_cost= join->positions[first_tab].prefix_record_count* 
+                       sj_outer_fanout * one_write_cost;
+    double full_lookup_cost= join->positions[first_tab].prefix_record_count* 
+                             sj_outer_fanout* sj_inner_fanout * 
+                             one_lookup_cost;
+    dups_cost += write_cost + full_lookup_cost;
+    
+    *read_time= dups_cost;
+    *record_count= prefix_rec_count * sj_outer_fanout;
+    *handled_fanout= dups_removed_fanout;
+    *strategy= SJ_OPT_DUPS_WEEDOUT;
+    return TRUE;
+  }
+  return FALSE;
+}
+
+
+/*
+  Remove the last join tab from from join->cur_sj_inner_tables bitmap
+  we assume remaining_tables doesnt contain @tab.
+*/
+
+void restore_prev_sj_state(const table_map remaining_tables, 
+                                  const JOIN_TAB *tab, uint idx)
+{
+  TABLE_LIST *emb_sj_nest;
+  if ((emb_sj_nest= tab->emb_sj_nest))
+  {
+    /* If we're removing the last SJ-inner table, remove the sj-nest */
+    if ((remaining_tables & emb_sj_nest->sj_inner_tables) == 
+        (emb_sj_nest->sj_inner_tables & ~tab->table->map))
+    {
+      tab->join->cur_sj_inner_tables &= ~emb_sj_nest->sj_inner_tables;
+    }
+  }
+  POSITION *pos= tab->join->positions + idx;
+  tab->join->cur_dups_producing_tables= pos->prefix_dups_producing_tables;
+}
+
+
+/*
+  Given a semi-join nest, find out which of the IN-equalities are bound
+
+  SYNOPSIS
+    get_bound_sj_equalities()
+      sj_nest           Semi-join nest
+      remaining_tables  Tables that are not yet bound
+
+  DESCRIPTION
+    Given a semi-join nest, find out which of the IN-equalities have their
+    left part expression bound (i.e. the said expression doesn't refer to
+    any of remaining_tables and can be evaluated).
+
+  RETURN
+    Bitmap of bound IN-equalities.
+*/
+
+ulonglong get_bound_sj_equalities(TABLE_LIST *sj_nest, 
+                                  table_map remaining_tables)
+{
+  List_iterator<Item> li(sj_nest->nested_join->sj_outer_expr_list);
+  Item *item;
+  uint i= 0;
+  ulonglong res= 0;
+  while ((item= li++))
+  {
+    /*
+      Q: should this take into account equality propagation and how?
+      A: If e->outer_side is an Item_field, walk over the equality
+         class and see if there is an element that is bound?
+      (this is an optional feature)
+    */
+    if (!(item->used_tables() & remaining_tables))
+    {
+      res |= 1ULL << i;
+    }
+    i++;
+  }
+  return res;
+}
+
+
+/*
+  Check if the last tables of the partial join order allow to use
+  sj-materialization strategy for them
+
+  SYNOPSIS
+    at_sjmat_pos()
+      join              
+      remaining_tables
+      tab                the last table's join tab
+      idx                last table's index
+      loose_scan    OUT  TRUE <=> use LooseScan
+
+  RETURN
+    TRUE   Yes, can apply sj-materialization
+    FALSE  No, some of the requirements are not met
+*/
+
+static SJ_MATERIALIZATION_INFO *
+at_sjmat_pos(const JOIN *join, table_map remaining_tables, const JOIN_TAB *tab,
+             uint idx, bool *loose_scan)
+{
+  /*
+   Check if 
+    1. We're in a semi-join nest that can be run with SJ-materialization
+    2. All the tables correlated through the IN subquery are in the prefix
+  */
+  TABLE_LIST *emb_sj_nest= tab->emb_sj_nest;
+  table_map suffix= remaining_tables & ~tab->table->map;
+  if (emb_sj_nest && emb_sj_nest->sj_mat_info &&
+      !(suffix & emb_sj_nest->sj_inner_tables))
+  {
+    /* 
+      Walk back and check if all immediately preceding tables are from
+      this semi-join.
+    */
+    uint n_tables= my_count_bits(tab->emb_sj_nest->sj_inner_tables);
+    for (uint i= 1; i < n_tables ; i++)
+    {
+      if (join->positions[idx - i].table->emb_sj_nest != tab->emb_sj_nest)
+        return NULL;
+    }
+    *loose_scan= test(remaining_tables & ~tab->table->map &
+                             (emb_sj_nest->sj_inner_tables |
+                              emb_sj_nest->nested_join->sj_depends_on));
+    if (*loose_scan && !emb_sj_nest->sj_subq_pred->sjm_scan_allowed)
+      return NULL;
+    else
+      return emb_sj_nest->sj_mat_info;
+  }
+  return NULL;
+}
+
+
+
+/*
+  Fix semi-join strategies for the picked join order
+
+  SYNOPSIS
+    fix_semijoin_strategies_for_picked_join_order()
+      join  The join with the picked join order
+
+  DESCRIPTION
+    Fix semi-join strategies for the picked join order. This is a step that
+    needs to be done right after we have fixed the join order. What we do
+    here is switch join's semi-join strategy description from backward-based
+    to forwards based.
+    
+    When join optimization is in progress, we re-consider semi-join
+    strategies after we've added another table. Here's an illustration.
+    Suppose the join optimization is underway:
+
+    1) ot1  it1  it2 
+                 sjX  -- looking at (ot1, it1, it2) join prefix, we decide
+                         to use semi-join strategy sjX.
+
+    2) ot1  it1  it2  ot2 
+                 sjX  sjY -- Having added table ot2, we now may consider
+                             another semi-join strategy and decide to use a 
+                             different strategy sjY. Note that the record
+                             of sjX has remained under it2. That is
+                             necessary because we need to be able to get
+                             back to (ot1, it1, it2) join prefix.
+      what makes things even worse is that there are cases where the choice
+      of sjY changes the way we should access it2. 
+
+    3) [ot1  it1  it2  ot2  ot3]
+                  sjX  sjY  -- This means that after join optimization is
+                               finished, semi-join info should be read
+                               right-to-left (while nearly all plan refinement
+                               functions, EXPLAIN, etc proceed from left to 
+                               right)
+
+    This function does the needed reversal, making it possible to read the
+    join and semi-join order from left to right.
+*/    
+
+void fix_semijoin_strategies_for_picked_join_order(JOIN *join)
+{
+  uint table_count=join->table_count;
+  uint tablenr;
+  table_map remaining_tables= 0;
+  table_map handled_tabs= 0;
+  for (tablenr= table_count - 1 ; tablenr != join->const_tables - 1; tablenr--)
+  {
+    POSITION *pos= join->best_positions + tablenr;
+    JOIN_TAB *s= pos->table;
+    uint first;
+    LINT_INIT(first); // Set by every branch except SJ_OPT_NONE which doesn't use it
+
+    if ((handled_tabs & s->table->map) || pos->sj_strategy == SJ_OPT_NONE)
+    {
+      remaining_tables |= s->table->map;
+      continue;
+    }
+    
+    if (pos->sj_strategy == SJ_OPT_MATERIALIZE)
+    {
+      SJ_MATERIALIZATION_INFO *sjm= s->emb_sj_nest->sj_mat_info;
+      sjm->is_used= TRUE;
+      sjm->is_sj_scan= FALSE;
+      memcpy(pos - sjm->tables + 1, sjm->positions, 
+             sizeof(POSITION) * sjm->tables);
+      first= tablenr - sjm->tables + 1;
+      join->best_positions[first].n_sj_tables= sjm->tables;
+      join->best_positions[first].sj_strategy= SJ_OPT_MATERIALIZE;
+    }
+    else if (pos->sj_strategy == SJ_OPT_MATERIALIZE_SCAN)
+    {
+      POSITION *first_inner= join->best_positions + pos->sjmat_picker.sjm_scan_last_inner;
+      SJ_MATERIALIZATION_INFO *sjm= first_inner->table->emb_sj_nest->sj_mat_info;
+      sjm->is_used= TRUE;
+      sjm->is_sj_scan= TRUE;
+      first= pos->sjmat_picker.sjm_scan_last_inner - sjm->tables + 1;
+      memcpy(join->best_positions + first, 
+             sjm->positions, sizeof(POSITION) * sjm->tables);
+      join->best_positions[first].sj_strategy= SJ_OPT_MATERIALIZE_SCAN;
+      join->best_positions[first].n_sj_tables= sjm->tables;
+      /* 
+        Do what advance_sj_state did: re-run best_access_path for every table
+        in the [last_inner_table + 1; pos..) range
+      */
+      double prefix_rec_count;
+      /* Get the prefix record count */
+      if (first == join->const_tables)
+        prefix_rec_count= 1.0;
+      else
+        prefix_rec_count= join->best_positions[first-1].prefix_record_count;
+      
+      /* Add materialization record count*/
+      prefix_rec_count *= sjm->rows;
+      
+      uint i;
+      table_map rem_tables= remaining_tables;
+      for (i= tablenr; i != (first + sjm->tables - 1); i--)
+        rem_tables |= join->best_positions[i].table->table->map;
+
+      POSITION dummy;
+      join->cur_sj_inner_tables= 0;
+      for (i= first + sjm->tables; i <= tablenr; i++)
+      {
+        best_access_path(join, join->best_positions[i].table, rem_tables, i, 
+                         FALSE, prefix_rec_count,
+                         join->best_positions + i, &dummy);
+        prefix_rec_count *= join->best_positions[i].records_read;
+        rem_tables &= ~join->best_positions[i].table->table->map;
+      }
+    }
+ 
+    if (pos->sj_strategy == SJ_OPT_FIRST_MATCH)
+    {
+      first= pos->firstmatch_picker.first_firstmatch_table;
+      join->best_positions[first].sj_strategy= SJ_OPT_FIRST_MATCH;
+      join->best_positions[first].n_sj_tables= tablenr - first + 1;
+      POSITION dummy; // For loose scan paths
+      double record_count= (first== join->const_tables)? 1.0: 
+                           join->best_positions[tablenr - 1].prefix_record_count;
+      
+      table_map rem_tables= remaining_tables;
+      uint idx;
+      for (idx= first; idx <= tablenr; idx++)
+      {
+        rem_tables |= join->best_positions[idx].table->table->map;
+      }
+      /*
+        Re-run best_access_path to produce best access methods that do not use
+        join buffering
+      */ 
+      join->cur_sj_inner_tables= 0;
+      for (idx= first; idx <= tablenr; idx++)
+      {
+        if (join->best_positions[idx].use_join_buffer)
+        {
+           best_access_path(join, join->best_positions[idx].table, 
+                            rem_tables, idx, TRUE /* no jbuf */,
+                            record_count, join->best_positions + idx, &dummy);
+        }
+        record_count *= join->best_positions[idx].records_read;
+        rem_tables &= ~join->best_positions[idx].table->table->map;
+      }
+    }
+
+    if (pos->sj_strategy == SJ_OPT_LOOSE_SCAN) 
+    {
+      first= pos->loosescan_picker.first_loosescan_table;
+      POSITION *first_pos= join->best_positions + first;
+      POSITION loose_scan_pos; // For loose scan paths
+      double record_count= (first== join->const_tables)? 1.0: 
+                           join->best_positions[tablenr - 1].prefix_record_count;
+      
+      table_map rem_tables= remaining_tables;
+      uint idx;
+      for (idx= first; idx <= tablenr; idx++)
+        rem_tables |= join->best_positions[idx].table->table->map;
+      /*
+        Re-run best_access_path to produce best access methods that do not use
+        join buffering
+      */ 
+      join->cur_sj_inner_tables= 0;
+      for (idx= first; idx <= tablenr; idx++)
+      {
+        if (join->best_positions[idx].use_join_buffer || (idx == first))
+        {
+           best_access_path(join, join->best_positions[idx].table,
+                            rem_tables, idx, TRUE /* no jbuf */,
+                            record_count, join->best_positions + idx,
+                            &loose_scan_pos);
+           if (idx==first)
+             join->best_positions[idx]= loose_scan_pos;
+        }
+        rem_tables &= ~join->best_positions[idx].table->table->map;
+        record_count *= join->best_positions[idx].records_read;
+      }
+      first_pos->sj_strategy= SJ_OPT_LOOSE_SCAN;
+      first_pos->n_sj_tables= my_count_bits(first_pos->table->emb_sj_nest->sj_inner_tables);
+    }
+
+    if (pos->sj_strategy == SJ_OPT_DUPS_WEEDOUT)
+    {
+      /* 
+        Duplicate Weedout starting at pos->first_dupsweedout_table, ending at
+        this table.
+      */
+      first= pos->dups_weedout_picker.first_dupsweedout_table;
+      join->best_positions[first].sj_strategy= SJ_OPT_DUPS_WEEDOUT;
+      join->best_positions[first].n_sj_tables= tablenr - first + 1;
+    }
+    
+    uint i_end= first + join->best_positions[first].n_sj_tables;
+    for (uint i= first; i < i_end; i++)
+    {
+      if (i != first)
+        join->best_positions[i].sj_strategy= SJ_OPT_NONE;
+      handled_tabs |= join->best_positions[i].table->table->map;
+    }
+
+    if (tablenr != first)
+      pos->sj_strategy= SJ_OPT_NONE;
+    remaining_tables |= s->table->map;
+    join->join_tab[first].sj_strategy= join->best_positions[first].sj_strategy;
+    join->join_tab[first].n_sj_tables= join->best_positions[first].n_sj_tables;
+  }
+}
+
+
+/*
+  Setup semi-join materialization strategy for one semi-join nest
+  
+  SYNOPSIS
+
+  setup_sj_materialization()
+    tab  The first tab in the semi-join
+
+  DESCRIPTION
+    Setup execution structures for one semi-join materialization nest:
+    - Create the materialization temporary table
+    - If we're going to do index lookups
+        create TABLE_REF structure to make the lookus
+    - else (if we're going to do a full scan of the temptable)
+        create Copy_field structures to do copying.
+
+  RETURN
+    FALSE  Ok
+    TRUE   Error
+*/
+
+bool setup_sj_materialization_part1(JOIN_TAB *sjm_tab)
+{
+  DBUG_ENTER("setup_sj_materialization");
+  JOIN_TAB *tab= sjm_tab->bush_children->start;
+  TABLE_LIST *emb_sj_nest= tab->table->pos_in_table_list->embedding;
+  
+  /* Walk out of outer join nests until we reach the semi-join nest we're in */
+  while (!emb_sj_nest->sj_mat_info)
+    emb_sj_nest= emb_sj_nest->embedding;
+
+  SJ_MATERIALIZATION_INFO *sjm= emb_sj_nest->sj_mat_info;
+  THD *thd= tab->join->thd;
+  /* First the calls come to the materialization function */
+  //List<Item> &item_list= emb_sj_nest->sj_subq_pred->unit->first_select()->item_list;
+  
+  DBUG_ASSERT(sjm->is_used);
+  /* 
+    Set up the table to write to, do as select_union::create_result_table does
+  */
+  sjm->sjm_table_param.init();
+  sjm->sjm_table_param.bit_fields_as_long= TRUE;
+  //List_iterator<Item> it(item_list);
+  SELECT_LEX *subq_select= emb_sj_nest->sj_subq_pred->unit->first_select();
+  Item **p_item= subq_select->ref_pointer_array;
+  Item **p_end= p_item + subq_select->item_list.elements;
+  //while((right_expr= it++))
+  for(;p_item != p_end; p_item++)
+    sjm->sjm_table_cols.push_back(*p_item);
+
+  sjm->sjm_table_param.field_count= subq_select->item_list.elements;
+  sjm->sjm_table_param.force_not_null_cols= TRUE;
+
+  if (!(sjm->table= create_tmp_table(thd, &sjm->sjm_table_param, 
+                                     sjm->sjm_table_cols, (ORDER*) 0, 
+                                     TRUE /* distinct */, 
+                                     1, /*save_sum_fields*/
+                                     thd->options | TMP_TABLE_ALL_COLUMNS, 
+                                     HA_POS_ERROR /*rows_limit */, 
+                                     (char*)"sj-materialize")))
+    DBUG_RETURN(TRUE); /* purecov: inspected */
+  sjm->table->map=  emb_sj_nest->nested_join->used_tables;
+  sjm->table->file->extra(HA_EXTRA_WRITE_CACHE);
+  sjm->table->file->extra(HA_EXTRA_IGNORE_DUP_KEY);
+
+  tab->join->sj_tmp_tables.push_back(sjm->table);
+  tab->join->sjm_info_list.push_back(sjm);
+  
+  sjm->materialized= FALSE;
+  sjm_tab->table= sjm->table;
+  sjm->table->pos_in_table_list= emb_sj_nest;
+ 
+  DBUG_RETURN(FALSE);
+}
+
+
+bool setup_sj_materialization_part2(JOIN_TAB *sjm_tab)
+{
+  DBUG_ENTER("setup_sj_materialization_part2");
+  JOIN_TAB *tab= sjm_tab->bush_children->start;
+  TABLE_LIST *emb_sj_nest= tab->table->pos_in_table_list->embedding;
+  /* Walk out of outer join nests until we reach the semi-join nest we're in */
+  while (!emb_sj_nest->sj_mat_info)
+    emb_sj_nest= emb_sj_nest->embedding;
+  SJ_MATERIALIZATION_INFO *sjm= emb_sj_nest->sj_mat_info;
+  THD *thd= tab->join->thd;
+  uint i;
+  //List<Item> &item_list= emb_sj_nest->sj_subq_pred->unit->first_select()->item_list;
+  //List_iterator<Item> it(item_list);
+
+  if (!sjm->is_sj_scan)
+  {
+    KEY           *tmp_key; /* The only index on the temporary table. */
+    uint          tmp_key_parts; /* Number of keyparts in tmp_key. */
+    tmp_key= sjm->table->key_info;
+    tmp_key_parts= tmp_key->key_parts;
+    
+    /*
+      Create/initialize everything we will need to index lookups into the
+      temptable.
+    */
+    TABLE_REF *tab_ref;
+    tab_ref= &sjm_tab->ref;
+    tab_ref->key= 0; /* The only temp table index. */
+    tab_ref->key_length= tmp_key->key_length;
+    if (!(tab_ref->key_buff=
+          (uchar*) thd->calloc(ALIGN_SIZE(tmp_key->key_length) * 2)) ||
+        !(tab_ref->key_copy=
+          (store_key**) thd->alloc((sizeof(store_key*) *
+                                    (tmp_key_parts + 1)))) ||
+        !(tab_ref->items=
+          (Item**) thd->alloc(sizeof(Item*) * tmp_key_parts)))
+      DBUG_RETURN(TRUE); /* purecov: inspected */
+
+    tab_ref->key_buff2=tab_ref->key_buff+ALIGN_SIZE(tmp_key->key_length);
+    tab_ref->key_err=1;
+    tab_ref->null_rejecting= 1;
+    tab_ref->disable_cache= FALSE;
+
+    KEY_PART_INFO *cur_key_part= tmp_key->key_part;
+    store_key **ref_key= tab_ref->key_copy;
+    uchar *cur_ref_buff= tab_ref->key_buff;
+    
+    for (i= 0; i < tmp_key_parts; i++, cur_key_part++, ref_key++)
+    {
+      tab_ref->items[i]= emb_sj_nest->sj_subq_pred->left_expr->element_index(i);
+      int null_count= test(cur_key_part->field->real_maybe_null());
+      *ref_key= new store_key_item(thd, cur_key_part->field,
+                                   /* TODO:
+                                      the NULL byte is taken into account in
+                                      cur_key_part->store_length, so instead of
+                                      cur_ref_buff + test(maybe_null), we could
+                                      use that information instead.
+                                   */
+                                   cur_ref_buff + null_count,
+                                   null_count ? cur_ref_buff : 0,
+                                   cur_key_part->length, tab_ref->items[i],
+                                   FALSE);
+      cur_ref_buff+= cur_key_part->store_length;
+    }
+    *ref_key= NULL; /* End marker. */
+      
+    /*
+      We don't ever have guarded conditions for SJM tables, but code at SQL
+      layer depends on cond_guards array being alloced.
+    */
+    if (!(tab_ref->cond_guards= (bool**) thd->calloc(sizeof(uint*)*tmp_key_parts)))
+    {
+      DBUG_RETURN(TRUE);
+    }
+
+    tab_ref->key_err= 1;
+    tab_ref->key_parts= tmp_key_parts;
+    sjm->tab_ref= tab_ref;
+
+    /*
+      Remove the injected semi-join IN-equalities from join_tab conds. This
+      needs to be done because the IN-equalities refer to columns of
+      sj-inner tables which are not available after the materialization
+      has been finished.
+    */
+    for (i= 0; i < sjm->tables; i++)
+    {
+      remove_sj_conds(&tab[i].select_cond);
+      if (tab[i].select)
+        remove_sj_conds(&tab[i].select->cond);
+    }
+    if (!(sjm->in_equality= create_subq_in_equalities(thd, sjm,
+                                                      emb_sj_nest->sj_subq_pred)))
+      DBUG_RETURN(TRUE); /* purecov: inspected */
+    sjm_tab->type= JT_EQ_REF;
+    sjm_tab->select_cond= sjm->in_equality;
+  }
+  else
+  {
+    /*
+      We'll be doing full scan of the temptable.  
+      Setup copying of temptable columns back to the record buffers
+      for their source tables. We need this because IN-equalities
+      refer to the original tables.
+
+      EXAMPLE
+
+      Consider the query:
+        SELECT * FROM ot WHERE ot.col1 IN (SELECT it.col2 FROM it)
+      
+      Suppose it's executed with SJ-Materialization-scan. We choose to do scan
+      if we can't do the lookup, i.e. the join order is (it, ot). The plan
+      would look as follows:
+
+        table    access method      condition
+         it      materialize+scan    -
+         ot      (whatever)          ot1.col1=it.col2 (C2)
+
+      The condition C2 refers to current row of table it. The problem is
+      that by the time we evaluate C2, we would have finished with scanning
+      it itself and will be scanning the temptable. 
+
+      At the moment, our solution is to copy back: when we get the next
+      temptable record, we copy its columns to their corresponding columns
+      in the record buffers for the source tables. 
+    */
+    sjm->copy_field= new Copy_field[sjm->sjm_table_cols.elements];
+    //it.rewind();
+    Item **p_item= emb_sj_nest->sj_subq_pred->unit->first_select()->ref_pointer_array;
+    for (uint i=0; i < sjm->sjm_table_cols.elements; i++)
+    {
+      bool dummy;
+      Item_equal *item_eq;
+      //Item *item= (it++)->real_item();
+      Item *item= (*(p_item++))->real_item();
+      DBUG_ASSERT(item->type() == Item::FIELD_ITEM);
+      Field *copy_to= ((Item_field*)item)->field;
+      /*
+        Tricks with Item_equal are due to the following: suppose we have a
+        query:
+        
+        ... WHERE cond(ot.col) AND ot.col IN (SELECT it2.col FROM it1,it2
+                                               WHERE it1.col= it2.col)
+         then equality propagation will create an 
+         
+           Item_equal(it1.col, it2.col, ot.col) 
+         
+         then substitute_for_best_equal_field() will change the conditions
+         according to the join order:
+
+         table | attached condition
+         ------+--------------------
+          it1  |
+          it2  | it1.col=it2.col
+          ot   | cond(it1.col)
+
+         although we've originally had "SELECT it2.col", conditions attached 
+         to subsequent outer tables will refer to it1.col, so SJM-Scan will
+         need to unpack data to there. 
+         That is, if an element from subquery's select list participates in 
+         equality propagation, then we need to unpack it to the first
+         element equality propagation member that refers to table that is
+         within the subquery.
+      */
+      item_eq= find_item_equal(tab->join->cond_equal, copy_to, &dummy);
+
+      if (item_eq)
+      {
+        List_iterator<Item> it(item_eq->equal_items);
+        /* We're interested in field items only */
+        if (item_eq->get_const())
+          it++;
+        Item *item;
+        while ((item= it++))
+        {
+          if (!(item->used_tables() & ~emb_sj_nest->sj_inner_tables))
+          {
+            DBUG_ASSERT(item->real_item()->type() == Item::FIELD_ITEM);
+            copy_to= ((Item_field *) (item->real_item()))->field;
+            break;
+          }
+        }
+      }
+      sjm->copy_field[i].set(copy_to, sjm->table->field[i], FALSE);
+      /* The write_set for source tables must be set up to allow the copying */
+      bitmap_set_bit(copy_to->table->write_set, copy_to->field_index);
+    }
+    sjm_tab->type= JT_ALL;
+
+    /* Initialize full scan */
+    sjm_tab->read_first_record= join_read_record_no_init;
+    sjm_tab->read_record.copy_field= sjm->copy_field;
+    sjm_tab->read_record.copy_field_end= sjm->copy_field +
+                                         sjm->sjm_table_cols.elements;
+    sjm_tab->read_record.read_record= rr_sequential_and_unpack;
+  }
+
+  sjm_tab->bush_children->end[-1].next_select= end_sj_materialize;
+
+  DBUG_RETURN(FALSE);
+}
+
+
+
+/*
+  Create subquery IN-equalities assuming use of materialization strategy
+  
+  SYNOPSIS
+    create_subq_in_equalities()
+      thd        Thread handle
+      sjm        Semi-join materialization structure
+      subq_pred  The subquery predicate
+
+  DESCRIPTION
+    Create subquery IN-equality predicates. That is, for a subquery
+    
+      (oe1, oe2, ...) IN (SELECT ie1, ie2, ... FROM ...)
+    
+    create "oe1=ie1 AND ie1=ie2 AND ..." expression, such that ie1, ie2, ..
+    refer to the columns of the table that's used to materialize the
+    subquery.
+
+  RETURN 
+    Created condition
+*/
+
+static Item *create_subq_in_equalities(THD *thd, SJ_MATERIALIZATION_INFO *sjm, 
+                                Item_in_subselect *subq_pred)
+{
+  Item *res= NULL;
+  if (subq_pred->left_expr->cols() == 1)
+  {
+    if (!(res= new Item_func_eq(subq_pred->left_expr,
+                                new Item_field(sjm->table->field[0]))))
+      return NULL; /* purecov: inspected */
+  }
+  else
+  {
+    Item *conj;
+    for (uint i= 0; i < subq_pred->left_expr->cols(); i++)
+    {
+      if (!(conj= new Item_func_eq(subq_pred->left_expr->element_index(i), 
+                                   new Item_field(sjm->table->field[i]))) ||
+          !(res= and_items(res, conj)))
+        return NULL; /* purecov: inspected */
+    }
+  }
+  if (res->fix_fields(thd, &res))
+    return NULL; /* purecov: inspected */
+  return res;
+}
+
+
+
+
+static void remove_sj_conds(Item **tree)
+{
+  if (*tree)
+  {
+    if (is_cond_sj_in_equality(*tree))
+    {
+      *tree= NULL;
+      return;
+    }
+    else if ((*tree)->type() == Item::COND_ITEM) 
+    {
+      Item *item;
+      List_iterator<Item> li(*(((Item_cond*)*tree)->argument_list()));
+      while ((item= li++))
+      {
+        if (is_cond_sj_in_equality(item))
+          li.replace(new Item_int(1));
+      }
+    }
+  }
+}
+
+/* Check if given Item was injected by semi-join equality */
+static bool is_cond_sj_in_equality(Item *item)
+{
+  if (item->type() == Item::FUNC_ITEM &&
+      ((Item_func*)item)->functype()== Item_func::EQ_FUNC)
+  {
+    Item_func_eq *item_eq= (Item_func_eq*)item;
+    return test(item_eq->in_equality_no != UINT_MAX);
+  }
+  return FALSE;
+}
+
+
+/*
+  Create a temporary table to weed out duplicate rowid combinations
+
+  SYNOPSIS
+
+    create_sj_weedout_tmp_table()
+      thd                    Thread handle
+
+  DESCRIPTION
+    Create a temporary table to weed out duplicate rowid combinations. The
+    table has a single column that is a concatenation of all rowids in the
+    combination. 
+
+    Depending on the needed length, there are two cases:
+
+    1. When the length of the column < max_key_length:
+
+      CREATE TABLE tmp (col VARBINARY(n) NOT NULL, UNIQUE KEY(col));
+
+    2. Otherwise (not a valid SQL syntax but internally supported):
+
+      CREATE TABLE tmp (col VARBINARY NOT NULL, UNIQUE CONSTRAINT(col));
+
+    The code in this function was produced by extraction of relevant parts
+    from create_tmp_table().
+
+  RETURN
+    created table
+    NULL on error
+*/
+
+bool
+SJ_TMP_TABLE::create_sj_weedout_tmp_table(THD *thd)
+{
+  MEM_ROOT *mem_root_save, own_root;
+  TABLE *table;
+  TABLE_SHARE *share;
+  uint  temp_pool_slot=MY_BIT_NONE;
+  char	*tmpname,path[FN_REFLEN];
+  Field **reg_field;
+  KEY_PART_INFO *key_part_info;
+  KEY *keyinfo;
+  uchar *group_buff;
+  uchar *bitmaps;
+  uint *blob_field;
+  bool using_unique_constraint=FALSE;
+  bool use_packed_rows= FALSE;
+  Field *field, *key_field;
+  uint null_pack_length, null_count;
+  uchar *null_flags;
+  uchar *pos;
+  DBUG_ENTER("create_sj_weedout_tmp_table");
+  DBUG_ASSERT(!is_degenerate);
+
+  tmp_table= NULL;
+  uint uniq_tuple_length_arg= rowid_len + null_bytes;
+  /*
+    STEP 1: Get temporary table name
+  */
+  statistic_increment(thd->status_var.created_tmp_tables, &LOCK_status);
+  if (use_temp_pool && !(test_flags & TEST_KEEP_TMP_TABLES))
+    temp_pool_slot = bitmap_lock_set_next(&temp_pool);
+
+  if (temp_pool_slot != MY_BIT_NONE) // we got a slot
+    sprintf(path, "%s_%lx_%i", tmp_file_prefix,
+	    current_pid, temp_pool_slot);
+  else
+  {
+    /* if we run out of slots or we are not using tempool */
+    sprintf(path,"%s%lx_%lx_%x", tmp_file_prefix,current_pid,
+            thd->thread_id, thd->tmp_table++);
+  }
+  fn_format(path, path, mysql_tmpdir, "", MY_REPLACE_EXT|MY_UNPACK_FILENAME);
+
+  /* STEP 2: Figure if we'll be using a key or blob+constraint */
+  if (uniq_tuple_length_arg >= CONVERT_IF_BIGGER_TO_BLOB)
+    using_unique_constraint= TRUE;
+
+  /* STEP 3: Allocate memory for temptable description */
+  init_sql_alloc(&own_root, TABLE_ALLOC_BLOCK_SIZE, 0);
+  if (!multi_alloc_root(&own_root,
+                        &table, sizeof(*table),
+                        &share, sizeof(*share),
+                        &reg_field, sizeof(Field*) * (1+1),
+                        &blob_field, sizeof(uint)*2,
+                        &keyinfo, sizeof(*keyinfo),
+                        &key_part_info, sizeof(*key_part_info) * 2,
+                        &start_recinfo,
+                        sizeof(*recinfo)*(1*2+4),
+                        &tmpname, (uint) strlen(path)+1,
+                        &group_buff, (!using_unique_constraint ?
+                                      uniq_tuple_length_arg : 0),
+                        &bitmaps, bitmap_buffer_size(1)*3,
+                        NullS))
+  {
+    if (temp_pool_slot != MY_BIT_NONE)
+      bitmap_lock_clear_bit(&temp_pool, temp_pool_slot);
+    DBUG_RETURN(TRUE);
+  }
+  strmov(tmpname,path);
+  
+
+  /* STEP 4: Create TABLE description */
+  bzero((char*) table,sizeof(*table));
+  bzero((char*) reg_field,sizeof(Field*)*2);
+
+  table->mem_root= own_root;
+  mem_root_save= thd->mem_root;
+  thd->mem_root= &table->mem_root;
+
+  table->field=reg_field;
+  table->alias.set("weedout-tmp", sizeof("weedout-tmp")-1,
+                   table_alias_charset);
+  table->reginfo.lock_type=TL_WRITE;	/* Will be updated */
+  table->db_stat=HA_OPEN_KEYFILE+HA_OPEN_RNDFILE;
+  table->map=1;
+  table->temp_pool_slot = temp_pool_slot;
+  table->copy_blobs= 1;
+  table->in_use= thd;
+  table->quick_keys.init();
+  table->covering_keys.init();
+  table->keys_in_use_for_query.init();
+
+  table->s= share;
+  init_tmp_table_share(thd, share, "", 0, tmpname, tmpname);
+  share->blob_field= blob_field;
+  share->blob_ptr_size= portable_sizeof_char_ptr;
+  share->table_charset= NULL;
+  share->primary_key= MAX_KEY;               // Indicate no primary key
+  share->keys_for_keyread.init();
+  share->keys_in_use.init();
+
+  /* Create the field */
+  {
+    /*
+      For the sake of uniformity, always use Field_varstring (altough we could
+      use Field_string for shorter keys)
+    */
+    field= new Field_varstring(uniq_tuple_length_arg, FALSE, "rowids", share,
+                               &my_charset_bin);
+    if (!field)
+      DBUG_RETURN(0);
+    field->table= table;
+    field->key_start.init(0);
+    field->part_of_key.init(0);
+    field->part_of_sortkey.init(0);
+    field->unireg_check= Field::NONE;
+    field->flags= (NOT_NULL_FLAG | BINARY_FLAG | NO_DEFAULT_VALUE_FLAG);
+    field->reset_fields();
+    field->init(table);
+    field->orig_table= NULL;
+     
+    field->field_index= 0;
+    
+    *(reg_field++)= field;
+    *blob_field= 0;
+    *reg_field= 0;
+
+    share->fields= 1;
+    share->blob_fields= 0;
+  }
+
+  uint reclength= field->pack_length();
+  if (using_unique_constraint)
+  { 
+    share->db_plugin= ha_lock_engine(0, TMP_ENGINE_HTON);
+    table->file= get_new_handler(share, &table->mem_root,
+                                 share->db_type());
+    DBUG_ASSERT(uniq_tuple_length_arg <= table->file->max_key_length());
+  }
+  else
+  {
+    share->db_plugin= ha_lock_engine(0, heap_hton);
+    table->file= get_new_handler(share, &table->mem_root,
+                                 share->db_type());
+  }
+  if (!table->file)
+    goto err;
+
+  null_count=1;
+  
+  null_pack_length= 1;
+  reclength += null_pack_length;
+
+  share->reclength= reclength;
+  {
+    uint alloc_length=ALIGN_SIZE(share->reclength + MI_UNIQUE_HASH_LENGTH+1);
+    share->rec_buff_length= alloc_length;
+    if (!(table->record[0]= (uchar*)
+                            alloc_root(&table->mem_root, alloc_length*3)))
+      goto err;
+    table->record[1]= table->record[0]+alloc_length;
+    share->default_values= table->record[1]+alloc_length;
+  }
+  setup_tmp_table_column_bitmaps(table, bitmaps);
+
+  recinfo= start_recinfo;
+  null_flags=(uchar*) table->record[0];
+  pos=table->record[0]+ null_pack_length;
+  if (null_pack_length)
+  {
+    bzero((uchar*) recinfo,sizeof(*recinfo));
+    recinfo->type=FIELD_NORMAL;
+    recinfo->length=null_pack_length;
+    recinfo++;
+    bfill(null_flags,null_pack_length,255);	// Set null fields
+
+    table->null_flags= (uchar*) table->record[0];
+    share->null_fields= null_count;
+    share->null_bytes= null_pack_length;
+  }
+  null_count=1;
+
+  {
+    //Field *field= *reg_field;
+    uint length;
+    bzero((uchar*) recinfo,sizeof(*recinfo));
+    field->move_field(pos,(uchar*) 0,0);
+
+    field->reset();
+    /*
+      Test if there is a default field value. The test for ->ptr is to skip
+      'offset' fields generated by initalize_tables
+    */
+    // Initialize the table field:
+    bzero(field->ptr, field->pack_length());
+
+    length=field->pack_length();
+    pos+= length;
+
+    /* Make entry for create table */
+    recinfo->length=length;
+    if (field->flags & BLOB_FLAG)
+      recinfo->type= FIELD_BLOB;
+    else if (use_packed_rows &&
+             field->real_type() == MYSQL_TYPE_STRING &&
+	     length >= MIN_STRING_LENGTH_TO_PACK_ROWS)
+      recinfo->type=FIELD_SKIP_ENDSPACE;
+    else
+      recinfo->type=FIELD_NORMAL;
+
+    field->set_table_name(&table->alias);
+  }
+
+  if (thd->variables.tmp_table_size == ~ (ulonglong) 0)		// No limit
+    share->max_rows= ~(ha_rows) 0;
+  else
+    share->max_rows= (ha_rows) (((share->db_type() == heap_hton) ?
+                                 min(thd->variables.tmp_table_size,
+                                     thd->variables.max_heap_table_size) :
+                                 thd->variables.tmp_table_size) /
+			         share->reclength);
+  set_if_bigger(share->max_rows,1);		// For dummy start options
+
+
+  //// keyinfo= param->keyinfo;
+  if (TRUE)
+  {
+    DBUG_PRINT("info",("Creating group key in temporary table"));
+    share->keys=1;
+    share->uniques= test(using_unique_constraint);
+    table->key_info=keyinfo;
+    keyinfo->key_part=key_part_info;
+    keyinfo->flags=HA_NOSAME;
+    keyinfo->usable_key_parts= keyinfo->key_parts= 1;
+    keyinfo->key_length=0;
+    keyinfo->rec_per_key=0;
+    keyinfo->algorithm= HA_KEY_ALG_UNDEF;
+    keyinfo->name= (char*) "weedout_key";
+    {
+      key_part_info->null_bit=0;
+      key_part_info->field=  field;
+      key_part_info->offset= field->offset(table->record[0]);
+      key_part_info->length= (uint16) field->key_length();
+      key_part_info->type=   (uint8) field->key_type();
+      key_part_info->key_type = FIELDFLAG_BINARY;
+      if (!using_unique_constraint)
+      {
+	if (!(key_field= field->new_key_field(thd->mem_root, table,
+                                              group_buff,
+                                              field->null_ptr,
+                                              field->null_bit)))
+	  goto err;
+        key_part_info->key_part_flag|= HA_END_SPACE_ARE_EQUAL; //todo need this?
+      }
+      keyinfo->key_length+=  key_part_info->length;
+    }
+  }
+
+  if (thd->is_fatal_error)			// If end of memory
+    goto err;
+  share->db_record_offset= 1;
+  table->no_rows= 1;              		// We don't need the data
+
+  // recinfo must point after last field
+  recinfo++;
+  if (share->db_type() == TMP_ENGINE_HTON)
+  {
+    if (create_internal_tmp_table(table, keyinfo, start_recinfo, &recinfo, 0))
+      goto err;
+  }
+  if (open_tmp_table(table))
+    goto err;
+
+  thd->mem_root= mem_root_save;
+  tmp_table= table;
+  DBUG_RETURN(FALSE);
+
+err:
+  thd->mem_root= mem_root_save;
+  free_tmp_table(thd,table);                    /* purecov: inspected */
+  if (temp_pool_slot != MY_BIT_NONE)
+    bitmap_lock_clear_bit(&temp_pool, temp_pool_slot);
+  DBUG_RETURN(TRUE);				/* purecov: inspected */
+}
+
+
+/*
+  SemiJoinDuplicateElimination: Reset the temporary table
+*/
+
+int SJ_TMP_TABLE::sj_weedout_delete_rows()
+{
+  DBUG_ENTER("SJ_TMP_TABLE::sj_weedout_delete_rows");
+  if (tmp_table)
+  {
+    int rc= tmp_table->file->ha_delete_all_rows();
+    DBUG_RETURN(rc);
+  }
+  have_degenerate_row= FALSE;
+  DBUG_RETURN(0);
+}
+
+
+/*
+  SemiJoinDuplicateElimination: Weed out duplicate row combinations
+
+  SYNPOSIS
+    sj_weedout_check_row()
+      thd    Thread handle
+
+  DESCRIPTION
+    Try storing current record combination of outer tables (i.e. their
+    rowids) in the temporary table. This records the fact that we've seen 
+    this record combination and also tells us if we've seen it before.
+
+  RETURN
+    -1  Error
+    1   The row combination is a duplicate (discard it)
+    0   The row combination is not a duplicate (continue)
+*/
+
+int SJ_TMP_TABLE::sj_weedout_check_row(THD *thd)
+{
+  int error;
+  SJ_TMP_TABLE::TAB *tab= tabs;
+  SJ_TMP_TABLE::TAB *tab_end= tabs_end;
+  uchar *ptr;
+  uchar *nulls_ptr;
+
+  DBUG_ENTER("SJ_TMP_TABLE::sj_weedout_check_row");
+
+  if (is_degenerate)
+  {
+    if (have_degenerate_row) 
+      DBUG_RETURN(1);
+
+    have_degenerate_row= TRUE;
+    DBUG_RETURN(0);
+  }
+
+  ptr= tmp_table->record[0] + 1;
+
+  /* Put the the rowids tuple into table->record[0]: */
+
+  // 1. Store the length 
+  if (((Field_varstring*)(tmp_table->field[0]))->length_bytes == 1)
+  {
+    *ptr= (uchar)(rowid_len + null_bytes);
+    ptr++;
+  }
+  else
+  {
+    int2store(ptr, rowid_len + null_bytes);
+    ptr += 2;
+  }
+
+  nulls_ptr= ptr;
+  // 2. Zero the null bytes 
+  if (null_bytes)
+  {
+    bzero(ptr, null_bytes);
+    ptr += null_bytes; 
+  }
+
+  // 3. Put the rowids
+  for (uint i=0; tab != tab_end; tab++, i++)
+  {
+    handler *h= tab->join_tab->table->file;
+    if (tab->join_tab->table->maybe_null && tab->join_tab->table->null_row)
+    {
+      /* It's a NULL-complemented row */
+      *(nulls_ptr + tab->null_byte) |= tab->null_bit;
+      bzero(ptr + tab->rowid_offset, h->ref_length);
+    }
+    else
+    {
+      /* Copy the rowid value */
+      memcpy(ptr + tab->rowid_offset, h->ref, h->ref_length);
+    }
+  }
+
+  error= tmp_table->file->ha_write_tmp_row(tmp_table->record[0]);
+  if (error)
+  {
+    /* create_internal_tmp_table_from_heap will generate error if needed */
+    if (!tmp_table->file->is_fatal_error(error, HA_CHECK_DUP))
+      DBUG_RETURN(1); /* Duplicate */
+    if (create_internal_tmp_table_from_heap(thd, tmp_table, start_recinfo,
+                                            &recinfo, error, 1))
+      DBUG_RETURN(-1);
+  }
+  DBUG_RETURN(0);
+}
+
+
+int init_dups_weedout(JOIN *join, uint first_table, int first_fanout_table, uint n_tables)
+{
+  THD *thd= join->thd;
+  DBUG_ENTER("init_dups_weedout");
+  SJ_TMP_TABLE::TAB sjtabs[MAX_TABLES];
+  SJ_TMP_TABLE::TAB *last_tab= sjtabs;
+  uint jt_rowid_offset= 0; // # tuple bytes are already occupied (w/o NULL bytes)
+  uint jt_null_bits= 0;    // # null bits in tuple bytes
+  /*
+    Walk through the range and remember
+     - tables that need their rowids to be put into temptable
+     - the last outer table
+  */
+  for (JOIN_TAB *j=join->join_tab + first_table; 
+       j < join->join_tab + first_table + n_tables; j++)
+  {
+    if (sj_table_is_included(join, j))
+    {
+      last_tab->join_tab= j;
+      last_tab->rowid_offset= jt_rowid_offset;
+      jt_rowid_offset += j->table->file->ref_length;
+      if (j->table->maybe_null)
+      {
+        last_tab->null_byte= jt_null_bits / 8;
+        last_tab->null_bit= jt_null_bits++;
+      }
+      last_tab++;
+      j->table->prepare_for_position();
+      j->keep_current_rowid= TRUE;
+    }
+  }
+
+  SJ_TMP_TABLE *sjtbl;
+  if (jt_rowid_offset) /* Temptable has at least one rowid */
+  {
+    size_t tabs_size= (last_tab - sjtabs) * sizeof(SJ_TMP_TABLE::TAB);
+    if (!(sjtbl= (SJ_TMP_TABLE*)thd->alloc(sizeof(SJ_TMP_TABLE))) ||
+        !(sjtbl->tabs= (SJ_TMP_TABLE::TAB*) thd->alloc(tabs_size)))
+      DBUG_RETURN(TRUE); /* purecov: inspected */
+    memcpy(sjtbl->tabs, sjtabs, tabs_size);
+    sjtbl->is_degenerate= FALSE;
+    sjtbl->tabs_end= sjtbl->tabs + (last_tab - sjtabs);
+    sjtbl->rowid_len= jt_rowid_offset;
+    sjtbl->null_bits= jt_null_bits;
+    sjtbl->null_bytes= (jt_null_bits + 7)/8;
+    if (sjtbl->create_sj_weedout_tmp_table(thd))
+      DBUG_RETURN(TRUE);
+    join->sj_tmp_tables.push_back(sjtbl->tmp_table);
+  }
+  else
+  {
+    /* 
+      This is a special case where the entire subquery predicate does 
+      not depend on anything at all, ie this is 
+        WHERE const IN (uncorrelated select)
+    */
+    if (!(sjtbl= (SJ_TMP_TABLE*)thd->alloc(sizeof(SJ_TMP_TABLE))))
+      DBUG_RETURN(TRUE); /* purecov: inspected */
+    sjtbl->tmp_table= NULL;
+    sjtbl->is_degenerate= TRUE;
+    sjtbl->have_degenerate_row= FALSE;
+  }
+
+  sjtbl->next_flush_table= join->join_tab[first_table].flush_weedout_table;
+  join->join_tab[first_table].flush_weedout_table= sjtbl;
+  join->join_tab[first_fanout_table].first_weedout_table= sjtbl;
+  join->join_tab[first_table + n_tables - 1].check_weed_out_table= sjtbl;
+  DBUG_RETURN(0);
+}
+
+
+/*
+  Setup the strategies to eliminate semi-join duplicates.
+  
+  SYNOPSIS
+    setup_semijoin_dups_elimination()
+      join           Join to process
+      options        Join options (needed to see if join buffering will be 
+                     used or not)
+      no_jbuf_after  Another bit of information re where join buffering will
+                     be used.
+
+  DESCRIPTION
+    Setup the strategies to eliminate semi-join duplicates. ATM there are 4
+    strategies:
+
+    1. DuplicateWeedout (use of temptable to remove duplicates based on rowids
+                         of row combinations)
+    2. FirstMatch (pick only the 1st matching row combination of inner tables)
+    3. LooseScan (scanning the sj-inner table in a way that groups duplicates
+                  together and picking the 1st one)
+    4. SJ-Materialization.
+    
+    The join order has "duplicate-generating ranges", and every range is
+    served by one strategy or a combination of FirstMatch with with some
+    other strategy.
+    
+    "Duplicate-generating range" is defined as a range within the join order
+    that contains all of the inner tables of a semi-join. All ranges must be
+    disjoint, if tables of several semi-joins are interleaved, then the ranges
+    are joined together, which is equivalent to converting
+      SELECT ... WHERE oe1 IN (SELECT ie1 ...) AND oe2 IN (SELECT ie2 )
+    to
+      SELECT ... WHERE (oe1, oe2) IN (SELECT ie1, ie2 ... ...)
+    .
+
+    Applicability conditions are as follows:
+
+    DuplicateWeedout strategy
+    ~~~~~~~~~~~~~~~~~~~~~~~~~
+
+      (ot|nt)*  [ it ((it|ot|nt)* (it|ot))]  (nt)*
+      +------+  +=========================+  +---+
+        (1)                 (2)               (3)
+
+       (1) - Prefix of OuterTables (those that participate in 
+             IN-equality and/or are correlated with subquery) and outer 
+             Non-correlated tables.
+       (2) - The handled range. The range starts with the first sj-inner
+             table, and covers all sj-inner and outer tables 
+             Within the range,  Inner, Outer, outer non-correlated tables
+             may follow in any order.
+       (3) - The suffix of outer non-correlated tables.
+    
+    FirstMatch strategy
+    ~~~~~~~~~~~~~~~~~~~
+
+      (ot|nt)*  [ it ((it|nt)* it) ]  (nt)*
+      +------+  +==================+  +---+
+        (1)             (2)          (3)
+
+      (1) - Prefix of outer and non-correlated tables
+      (2) - The handled range, which may contain only inner and
+            non-correlated tables.
+      (3) - The suffix of outer non-correlated tables.
+
+    LooseScan strategy 
+    ~~~~~~~~~~~~~~~~~~
+
+     (ot|ct|nt) [ loosescan_tbl (ot|nt|it)* it ]  (ot|nt)*
+     +--------+   +===========+ +=============+   +------+
+        (1)           (2)          (3)              (4)
+     
+      (1) - Prefix that may contain any outer tables. The prefix must contain
+            all the non-trivially correlated outer tables. (non-trivially means
+            that the correlation is not just through the IN-equality).
+      
+      (2) - Inner table for which the LooseScan scan is performed.
+
+      (3) - The remainder of the duplicate-generating range. It is served by 
+            application of FirstMatch strategy, with the exception that
+            outer IN-correlated tables are considered to be non-correlated.
+
+      (4) - THe suffix of outer and outer non-correlated tables.
+
+  
+  The choice between the strategies is made by the join optimizer (see
+  advance_sj_state() and fix_semijoin_strategies_for_picked_join_order()).
+  This function sets up all fields/structures/etc needed for execution except
+  for setup/initialization of semi-join materialization which is done in 
+  setup_sj_materialization() (todo: can't we move that to here also?)
+
+  RETURN
+    FALSE  OK 
+    TRUE   Out of memory error
+*/
+
+int setup_semijoin_dups_elimination(JOIN *join, ulonglong options, 
+                                    uint no_jbuf_after)
+{
+  uint i;
+  DBUG_ENTER("setup_semijoin_dups_elimination");
+  
+  join->complex_firstmatch_tables= table_map(0);
+
+  POSITION *pos= join->best_positions + join->const_tables;
+  for (i= join->const_tables ; i < join->top_join_tab_count; )
+  {
+    JOIN_TAB *tab=join->join_tab + i;
+    //POSITION *pos= join->best_positions + i;
+    uint keylen, keyno;
+    switch (pos->sj_strategy) {
+      case SJ_OPT_MATERIALIZE:
+      case SJ_OPT_MATERIALIZE_SCAN:
+        /* Do nothing */
+        i+= 1;// It used to be pos->n_sj_tables, but now they are embedded in a nest
+        pos += pos->n_sj_tables;
+        break;
+      case SJ_OPT_LOOSE_SCAN:
+      {
+        /* We jump from the last table to the first one */
+        tab->loosescan_match_tab= tab + pos->n_sj_tables - 1;
+        
+        /* LooseScan requires records to be produced in order */
+        if (tab->select && tab->select->quick)
+          tab->select->quick->need_sorted_output();
+
+        for (uint j= i; j < i + pos->n_sj_tables; j++)
+          join->join_tab[j].inside_loosescan_range= TRUE;
+
+        /* Calculate key length */
+        keylen= 0;
+        keyno= pos->loosescan_picker.loosescan_key;
+        for (uint kp=0; kp < pos->loosescan_picker.loosescan_parts; kp++)
+          keylen += tab->table->key_info[keyno].key_part[kp].store_length;
+
+        tab->loosescan_key= keyno;
+        tab->loosescan_key_len= keylen;
+        if (pos->n_sj_tables > 1) 
+          tab[pos->n_sj_tables - 1].do_firstmatch= tab;
+        i+= pos->n_sj_tables;
+        pos+= pos->n_sj_tables;
+        break;
+      }
+      case SJ_OPT_DUPS_WEEDOUT:
+      {
+        /*
+          Check for join buffering. If there is one, move the first table
+          forwards, but do not destroy other duplicate elimination methods.
+        */
+        uint first_table= i;
+
+        uint join_cache_level= join->thd->variables.join_cache_level;
+        for (uint j= i; j < i + pos->n_sj_tables; j++)
+        {
+          /*
+            When we'll properly take join buffering into account during
+            join optimization, the below check should be changed to 
+            "if (join->best_positions[j].use_join_buffer && 
+                 j <= no_jbuf_after)".
+            For now, use a rough criteria:
+          */
+          JOIN_TAB *js_tab=join->join_tab + j; 
+          if (j != join->const_tables && js_tab->use_quick != 2 &&
+              j <= no_jbuf_after &&
+              ((js_tab->type == JT_ALL && join_cache_level != 0) ||
+               (join_cache_level > 2 && (js_tab->type == JT_REF || 
+                                         js_tab->type == JT_EQ_REF))))
+          {
+            /* Looks like we'll be using join buffer */
+            first_table= join->const_tables;
+            /* 
+              Make sure that possible sorting of rows from the head table 
+              is not to be employed.
+            */
+            if (join->get_sort_by_join_tab())
+	    {
+              join->simple_order= 0;
+              join->simple_group= 0;
+              join->need_tmp= join->test_if_need_tmp_table();
+            }
+            break;
+          }
+        }
+
+        init_dups_weedout(join, first_table, i, i + pos->n_sj_tables - first_table);
+        i+= pos->n_sj_tables;
+        pos+= pos->n_sj_tables;
+        break;
+      }
+      case SJ_OPT_FIRST_MATCH:
+      {
+        JOIN_TAB *j;
+        JOIN_TAB *jump_to= tab-1;
+
+        bool complex_range= FALSE;
+        table_map tables_in_range= table_map(0);
+
+        for (j= tab; j != tab + pos->n_sj_tables; j++)
+        {
+          tables_in_range |= j->table->map;
+          if (!j->emb_sj_nest)
+          {
+            /* 
+              Got a table that's not within any semi-join nest. This is a case
+              like this:
+
+              SELECT * FROM ot1, nt1 WHERE ot1.col IN (SELECT expr FROM it1, it2)
+
+              with a join order of 
+
+                   +----- FirstMatch range ----+
+                   |                           |
+              ot1 it1 nt1 nt2 it2 it3 ...
+                   |   ^
+                   |   +-------- 'j' points here
+                   +------------- SJ_OPT_FIRST_MATCH was set for this table as
+                                  it's the first one that produces duplicates
+              
+            */
+            DBUG_ASSERT(j != tab);  /* table ntX must have an itX before it */
+
+            /* 
+              If the table right before us is an inner table (like it1 in the
+              picture), it should be set to jump back to previous outer-table
+            */
+            if (j[-1].emb_sj_nest)
+              j[-1].do_firstmatch= jump_to;
+
+            jump_to= j; /* Jump back to us */
+            complex_range= TRUE;
+          }
+          else
+          {
+            j->first_sj_inner_tab= tab;
+            j->last_sj_inner_tab= tab + pos->n_sj_tables - 1;
+          }
+        }
+        j[-1].do_firstmatch= jump_to;
+        i+= pos->n_sj_tables;
+        pos+= pos->n_sj_tables;
+
+        if (complex_range)
+          join->complex_firstmatch_tables|= tables_in_range;
+        break;
+      }
+      case SJ_OPT_NONE:
+        i++;
+        pos++;
+        break;
+    }
+  }
+  DBUG_RETURN(FALSE);
+}
+
+
+/*
+  Destroy all temporary tables created by NL-semijoin runtime
+*/
+
+void destroy_sj_tmp_tables(JOIN *join)
+{
+  List_iterator<TABLE> it(join->sj_tmp_tables);
+  TABLE *table;
+  while ((table= it++))
+  {
+    /* 
+      SJ-Materialization tables are initialized for either sequential reading 
+      or index lookup, DuplicateWeedout tables are not initialized for read 
+      (we only write to them), so need to call ha_index_or_rnd_end.
+    */
+    table->file->ha_index_or_rnd_end();
+    free_tmp_table(join->thd, table);
+  }
+  join->sj_tmp_tables.empty();
+  join->sjm_info_list.empty();
+}
+
+
+/*
+  Remove all records from all temp tables used by NL-semijoin runtime
+
+  SYNOPSIS
+    clear_sj_tmp_tables()
+      join  The join to remove tables for
+
+  DESCRIPTION
+    Remove all records from all temp tables used by NL-semijoin runtime. This 
+    must be done before every join re-execution.
+*/
+
+int clear_sj_tmp_tables(JOIN *join)
+{
+  int res;
+  List_iterator<TABLE> it(join->sj_tmp_tables);
+  TABLE *table;
+  while ((table= it++))
+  {
+    if ((res= table->file->ha_delete_all_rows()))
+      return res; /* purecov: inspected */
+   free_io_cache(table);
+   filesort_free_buffers(table,0);
+  }
+
+  SJ_MATERIALIZATION_INFO *sjm;
+  List_iterator<SJ_MATERIALIZATION_INFO> it2(join->sjm_info_list);
+  while ((sjm= it2++))
+  {
+    sjm->materialized= FALSE;
+  }
+  return 0;
+}
+
+
+/*
+  Check if the table's rowid is included in the temptable
+
+  SYNOPSIS
+    sj_table_is_included()
+      join      The join
+      join_tab  The table to be checked
+
+  DESCRIPTION
+    SemiJoinDuplicateElimination: check the table's rowid should be included
+    in the temptable. This is so if
+
+    1. The table is not embedded within some semi-join nest
+    2. The has been pulled out of a semi-join nest, or
+
+    3. The table is functionally dependent on some previous table
+
+    [4. This is also true for constant tables that can't be
+        NULL-complemented but this function is not called for such tables]
+
+  RETURN
+    TRUE  - Include table's rowid
+    FALSE - Don't
+*/
+
+static bool sj_table_is_included(JOIN *join, JOIN_TAB *join_tab)
+{
+  if (join_tab->emb_sj_nest)
+    return FALSE;
+  
+  /* Check if this table is functionally dependent on the tables that
+     are within the same outer join nest
+  */
+  TABLE_LIST *embedding= join_tab->table->pos_in_table_list->embedding;
+  if (join_tab->type == JT_EQ_REF)
+  {
+    table_map depends_on= 0;
+    uint idx;
+
+    for (uint kp= 0; kp < join_tab->ref.key_parts; kp++)
+      depends_on |= join_tab->ref.items[kp]->used_tables();
+
+    Table_map_iterator it(depends_on & ~PSEUDO_TABLE_BITS);
+    while ((idx= it.next_bit())!=Table_map_iterator::BITMAP_END)
+    {
+      JOIN_TAB *ref_tab= join->map2table[idx];
+      if (embedding != ref_tab->table->pos_in_table_list->embedding)
+        return TRUE;
+    }
+    /* Ok, functionally dependent */
+    return FALSE;
+  }
+  /* Not functionally dependent => need to include*/
+  return TRUE;
+}
+
+
+/*
+  Index lookup-based subquery: save some flags for EXPLAIN output
+
+  SYNOPSIS
+    save_index_subquery_explain_info()
+      join_tab  Subquery's join tab (there is only one as index lookup is
+                only used for subqueries that are single-table SELECTs)
+      where     Subquery's WHERE clause
+
+  DESCRIPTION
+    For index lookup-based subquery (i.e. one executed with
+    subselect_uniquesubquery_engine or subselect_indexsubquery_engine),
+    check its EXPLAIN output row should contain 
+      "Using index" (TAB_INFO_FULL_SCAN_ON_NULL) 
+      "Using Where" (TAB_INFO_USING_WHERE)
+      "Full scan on NULL key" (TAB_INFO_FULL_SCAN_ON_NULL)
+    and set appropriate flags in join_tab->packed_info.
+*/
+
+static void save_index_subquery_explain_info(JOIN_TAB *join_tab, Item* where)
+{
+  join_tab->packed_info= TAB_INFO_HAVE_VALUE;
+  if (join_tab->table->covering_keys.is_set(join_tab->ref.key))
+    join_tab->packed_info |= TAB_INFO_USING_INDEX;
+  if (where)
+    join_tab->packed_info |= TAB_INFO_USING_WHERE;
+  for (uint i = 0; i < join_tab->ref.key_parts; i++)
+  {
+    if (join_tab->ref.cond_guards[i])
+    {
+      join_tab->packed_info |= TAB_INFO_FULL_SCAN_ON_NULL;
+      break;
+    }
+  }
+}
+
+
+/*
+  Check if the join can be rewritten to [unique_]indexsubquery_engine
+
+  DESCRIPTION
+    Check if the join can be changed into [unique_]indexsubquery_engine.
+
+    The check is done after join optimization, the idea is that if the join
+    has only one table and uses a [eq_]ref access generated from subselect's
+    IN-equality then we replace it with a subselect_indexsubquery_engine or a
+    subselect_uniquesubquery_engine.
+
+  RETURN 
+    0 - Ok, rewrite done (stop join optimization and return)
+    1 - Fatal error (stop join optimization and return)
+   -1 - No rewrite performed, continue with join optimization
+*/
+
+int rewrite_to_index_subquery_engine(JOIN *join)
+{
+  THD *thd= join->thd;
+  JOIN_TAB* join_tab=join->join_tab;
+  SELECT_LEX_UNIT *unit= join->unit;
+  DBUG_ENTER("rewrite_to_index_subquery_engine");
+
+  /*
+    is this simple IN subquery?
+  */
+  /* TODO: In order to use these more efficient subquery engines in more cases,
+     the following problems need to be solved:
+     - the code that removes GROUP BY (group_list), also adds an ORDER BY
+       (order), thus GROUP BY queries (almost?) never pass through this branch.
+       Solution: remove the test below '!join->order', because we remove the
+       ORDER clase for subqueries anyway.
+     - in order to set a more efficient engine, the optimizer needs to both
+       decide to remove GROUP BY, *and* select one of the JT_[EQ_]REF[_OR_NULL]
+       access methods, *and* loose scan should be more expensive or
+       inapliccable. When is that possible?
+     - Consider expanding the applicability of this rewrite for loose scan
+       for group by queries.
+  */
+  if (!join->group_list && !join->order &&
+      join->unit->item && 
+      join->unit->item->substype() == Item_subselect::IN_SUBS &&
+      join->table_count == 1 && join->conds &&
+      !join->unit->is_union())
+  {
+    if (!join->having)
+    {
+      Item *where= join->conds;
+      if (join_tab[0].type == JT_EQ_REF &&
+	  join_tab[0].ref.items[0]->name == in_left_expr_name)
+      {
+        remove_subq_pushed_predicates(join, &where);
+        save_index_subquery_explain_info(join_tab, where);
+        join_tab[0].type= JT_UNIQUE_SUBQUERY;
+        join->error= 0;
+        DBUG_RETURN(unit->item->
+                    change_engine(new
+                                  subselect_uniquesubquery_engine(thd,
+                                                                  join_tab,
+                                                                  unit->item,
+                                                                  where)));
+      }
+      else if (join_tab[0].type == JT_REF &&
+	       join_tab[0].ref.items[0]->name == in_left_expr_name)
+      {
+	remove_subq_pushed_predicates(join, &where);
+        save_index_subquery_explain_info(join_tab, where);
+        join_tab[0].type= JT_INDEX_SUBQUERY;
+        join->error= 0;
+        DBUG_RETURN(unit->item->
+                    change_engine(new
+                                  subselect_indexsubquery_engine(thd,
+                                                                 join_tab,
+                                                                 unit->item,
+                                                                 where,
+                                                                 NULL,
+                                                                 0)));
+      }
+    } else if (join_tab[0].type == JT_REF_OR_NULL &&
+	       join_tab[0].ref.items[0]->name == in_left_expr_name &&
+               join->having->name == in_having_cond)
+    {
+      join_tab[0].type= JT_INDEX_SUBQUERY;
+      join->error= 0;
+      join->conds= remove_additional_cond(join->conds);
+      save_index_subquery_explain_info(join_tab, join->conds);
+      DBUG_RETURN(unit->item->
+		  change_engine(new subselect_indexsubquery_engine(thd,
+								   join_tab,
+								   unit->item,
+								   join->conds,
+                                                                   join->having,
+								   1)));
+    }
+  }
+
+  DBUG_RETURN(-1); /* Haven't done the rewrite */
+}
+
+
+/**
+  Remove additional condition inserted by IN/ALL/ANY transformation.
+
+  @param conds   condition for processing
+
+  @return
+    new conditions
+*/
+
+static Item *remove_additional_cond(Item* conds)
+{
+  if (conds->name == in_additional_cond)
+    return 0;
+  if (conds->type() == Item::COND_ITEM)
+  {
+    Item_cond *cnd= (Item_cond*) conds;
+    List_iterator<Item> li(*(cnd->argument_list()));
+    Item *item;
+    while ((item= li++))
+    {
+      if (item->name == in_additional_cond)
+      {
+	li.remove();
+	if (cnd->argument_list()->elements == 1)
+	  return cnd->argument_list()->head();
+	return conds;
+      }
+    }
+  }
+  return conds;
+}
+
+
+/*
+  Remove the predicates pushed down into the subquery
+
+  SYNOPSIS
+    remove_subq_pushed_predicates()
+      where   IN  Must be NULL
+              OUT The remaining WHERE condition, or NULL
+
+  DESCRIPTION
+    Given that this join will be executed using (unique|index)_subquery,
+    without "checking NULL", remove the predicates that were pushed down
+    into the subquery.
+
+    If the subquery compares scalar values, we can remove the condition that
+    was wrapped into trig_cond (it will be checked when needed by the subquery
+    engine)
+
+    If the subquery compares row values, we need to keep the wrapped
+    equalities in the WHERE clause: when the left (outer) tuple has both NULL
+    and non-NULL values, we'll do a full table scan and will rely on the
+    equalities corresponding to non-NULL parts of left tuple to filter out
+    non-matching records.
+
+    TODO: We can remove the equalities that will be guaranteed to be true by the
+    fact that subquery engine will be using index lookup. This must be done only
+    for cases where there are no conversion errors of significance, e.g. 257
+    that is searched in a byte. But this requires homogenization of the return 
+    codes of all Field*::store() methods.
+*/
+
+static void remove_subq_pushed_predicates(JOIN *join, Item **where)
+{
+  if (join->conds->type() == Item::FUNC_ITEM &&
+      ((Item_func *)join->conds)->functype() == Item_func::EQ_FUNC &&
+      ((Item_func *)join->conds)->arguments()[0]->type() == Item::REF_ITEM &&
+      ((Item_func *)join->conds)->arguments()[1]->type() == Item::FIELD_ITEM &&
+      test_if_ref (join->conds,
+                   (Item_field *)((Item_func *)join->conds)->arguments()[1],
+                   ((Item_func *)join->conds)->arguments()[0]))
+  {
+    *where= 0;
+    return;
+  }
+}
+
+
+
+
+/**
+  Optimize all subqueries of a query that were not flattened into a semijoin.
+
+  @details
+  Optimize all immediate children subqueries of a query.
+
+  This phase must be called after substitute_for_best_equal_field() because
+  that function may replace items with other items from a multiple equality,
+  and we need to reference the correct items in the index access method of the
+  IN predicate.
+
+  @return Operation status
+  @retval FALSE     success.
+  @retval TRUE      error occurred.
+*/
+
+bool JOIN::optimize_unflattened_subqueries()
+{
+  return select_lex->optimize_unflattened_subqueries();
+}
+
+
+/*
+  Join tab execution startup function.
+
+  SYNOPSIS
+    join_tab_execution_startup()
+      tab  Join tab to perform startup actions for
+
+  DESCRIPTION
+    Join tab execution startup function. This is different from
+    tab->read_first_record in the regard that this has actions that are to be
+    done once per join execution.
+
+    Currently there are only two possible startup functions, so we have them
+    both here inside if (...) branches. In future we could switch to function
+    pointers.
+
+  TODO: consider moving this together with JOIN_TAB::preread_init
+  
+  RETURN 
+    NESTED_LOOP_OK - OK
+    NESTED_LOOP_ERROR| NESTED_LOOP_KILLED - Error, abort the join execution
+*/
+
+enum_nested_loop_state join_tab_execution_startup(JOIN_TAB *tab)
+{
+  Item_in_subselect *in_subs;
+  DBUG_ENTER("join_tab_execution_startup");
+  
+  if (tab->table->pos_in_table_list && 
+      (in_subs= tab->table->pos_in_table_list->jtbm_subselect))
+  {
+    /* It's a non-merged SJM nest */
+    DBUG_ASSERT(in_subs->engine->engine_type() ==
+                subselect_engine::HASH_SJ_ENGINE);
+    subselect_hash_sj_engine *hash_sj_engine=
+      ((subselect_hash_sj_engine*)in_subs->engine);
+    if (!hash_sj_engine->is_materialized)
+    {
+      hash_sj_engine->materialize_join->exec();
+      hash_sj_engine->is_materialized= TRUE; 
+
+      if (hash_sj_engine->materialize_join->error || tab->join->thd->is_fatal_error)
+        DBUG_RETURN(NESTED_LOOP_ERROR);
+    }
+  }
+  else if (tab->bush_children)
+  {
+    /* It's a merged SJM nest */
+    enum_nested_loop_state rc;
+    SJ_MATERIALIZATION_INFO *sjm= tab->bush_children->start->emb_sj_nest->sj_mat_info;
+
+    if (!sjm->materialized)
+    {
+      JOIN *join= tab->join;
+      JOIN_TAB *join_tab= tab->bush_children->start;
+      JOIN_TAB *save_return_tab= join->return_tab;
+      /*
+        Now run the join for the inner tables. The first call is to run the
+        join, the second one is to signal EOF (this is essential for some
+        join strategies, e.g. it will make join buffering flush the records)
+      */
+      if ((rc= sub_select(join, join_tab, FALSE/* no EOF */)) < 0 ||
+          (rc= sub_select(join, join_tab, TRUE/* now EOF */)) < 0)
+      {
+        join->return_tab= save_return_tab;
+        DBUG_RETURN(rc); /* it's NESTED_LOOP_(ERROR|KILLED)*/
+      }
+      join->return_tab= save_return_tab;
+      sjm->materialized= TRUE;
+    }
+  }
+
+  DBUG_RETURN(NESTED_LOOP_OK);
+}
+
+
+/*
+  Create a dummy temporary table, useful only for the sake of having a 
+  TABLE* object with map,tablenr and maybe_null properties.
+  
+  This is used by non-mergeable semi-join materilization code to handle
+  degenerate cases where materialized subquery produced "Impossible WHERE" 
+  and thus wasn't materialized.
+*/
+
+TABLE *create_dummy_tmp_table(THD *thd)
+{
+  DBUG_ENTER("create_dummy_tmp_table");
+  TABLE *table;
+  TMP_TABLE_PARAM sjm_table_param;
+  sjm_table_param.init();
+  sjm_table_param.field_count= 1;
+  List<Item> sjm_table_cols;
+  Item *column_item= new Item_int(1);
+  sjm_table_cols.push_back(column_item);
+  if (!(table= create_tmp_table(thd, &sjm_table_param, 
+                                sjm_table_cols, (ORDER*) 0, 
+                                TRUE /* distinct */, 
+                                1, /*save_sum_fields*/
+                                thd->options | TMP_TABLE_ALL_COLUMNS, 
+                                HA_POS_ERROR /*rows_limit */, 
+                                (char*)"dummy", TRUE /* Do not open */)))
+  {
+    DBUG_RETURN(NULL);
+  }
+  DBUG_RETURN(table);
+}
+
+
+/*
+  A class that is used to catch one single tuple that is sent to the join
+  output, and save it in Item_cache element(s).
+
+  It is very similar to select_singlerow_subselect but doesn't require a 
+  Item_singlerow_subselect item.
+*/
+
+class select_value_catcher :public select_subselect
+{
+public:
+  select_value_catcher(Item_subselect *item_arg)
+    :select_subselect(item_arg)
+  {}
+  int send_data(List<Item> &items);
+  int setup(List<Item> *items);
+  bool assigned;  /* TRUE <=> we've caught a value */
+  uint n_elements; /* How many elements we get */
+  Item_cache **row; /* Array of cache elements */
+};
+
+
+int select_value_catcher::setup(List<Item> *items)
+{
+  assigned= FALSE;
+  n_elements= items->elements;
+ 
+  if (!(row= (Item_cache**) sql_alloc(sizeof(Item_cache*)*n_elements)))
+    return TRUE;
+  
+  Item *sel_item;
+  List_iterator<Item> li(*items);
+  for (uint i= 0; (sel_item= li++); i++)
+  {
+    if (!(row[i]= Item_cache::get_cache(sel_item)))
+      return TRUE;
+    row[i]->setup(sel_item);
+  }
+  return FALSE;
+}
+
+
+int select_value_catcher::send_data(List<Item> &items)
+{
+  DBUG_ENTER("select_value_catcher::send_data");
+  DBUG_ASSERT(!assigned);
+  DBUG_ASSERT(items.elements == n_elements);
+
+  if (unit->offset_limit_cnt)
+  {				          // Using limit offset,count
+    unit->offset_limit_cnt--;
+    DBUG_RETURN(0);
+  }
+
+  Item *val_item;
+  List_iterator_fast<Item> li(items);
+  for (uint i= 0; (val_item= li++); i++)
+  {
+    row[i]->store(val_item);
+    row[i]->cache_value();
+  }
+  assigned= TRUE;
+  DBUG_RETURN(0);
+}
+
+
+/*
+  Setup JTBM join tabs for execution
+*/
+
+bool setup_jtbm_semi_joins(JOIN *join, List<TABLE_LIST> *join_list, 
+                           Item **join_where)
+{
+  TABLE_LIST *table;
+  NESTED_JOIN *nested_join;
+  List_iterator<TABLE_LIST> li(*join_list);
+  DBUG_ENTER("setup_jtbm_semi_joins");
+  
+  while ((table= li++))
+  {
+    Item_in_subselect *item;
+    
+    if ((item= table->jtbm_subselect))
+    {
+      Item_in_subselect *subq_pred= item;
+      double rows;
+      double read_time;
+
+      /*
+        Perform optimization of the subquery, so that we know estmated
+        - cost of materialization process 
+        - how many records will be in the materialized temp.table
+      */
+      if (subq_pred->optimize(&rows, &read_time))
+        DBUG_RETURN(TRUE);
+
+      subq_pred->jtbm_read_time= read_time;
+      subq_pred->jtbm_record_count=rows;
+      JOIN *subq_join= subq_pred->unit->first_select()->join;
+
+      if (!subq_join->tables_list || !subq_join->table_count)
+      {
+        /*
+          A special case; subquery's join is degenerate, and it either produces
+          0 or 1 record. Examples of both cases:
+
+            select * from ot where col in (select ... from it where 2>3) 
+            select * from ot where col in (select min(it.key) from it)
+          
+          in this case, the subquery predicate has not been setup for
+          materialization. In particular, there is no materialized temp.table.
+          We'll now need to
+          1. Check whether 1 or 0 records are produced, setup this as a
+             constant join tab.
+          2. Create a dummy temporary table, because all of the join
+             optimization code relies on TABLE object being present (here we
+             follow a bad tradition started by derived tables)
+        */
+        DBUG_ASSERT(subq_pred->engine->engine_type() == 
+                    subselect_engine::SINGLE_SELECT_ENGINE);
+        subselect_single_select_engine *engine=
+          (subselect_single_select_engine*)subq_pred->engine;
+        select_value_catcher *new_sink;
+        if (!(new_sink= new select_value_catcher(subq_pred)))
+          DBUG_RETURN(TRUE);
+        if (new_sink->setup(&engine->select_lex->join->fields_list) ||
+            engine->select_lex->join->change_result(new_sink) ||
+            engine->exec())
+        {
+          DBUG_RETURN(TRUE);
+        }
+        subq_pred->is_jtbm_const_tab= TRUE;
+
+        if (new_sink->assigned)
+        {
+          subq_pred->jtbm_const_row_found= TRUE;
+          /* 
+            Subselect produced one row, which is saved in new_sink->row. 
+            Inject "left_expr[i] == row[i] equalities into parent's WHERE.
+          */
+          Item *eq_cond;
+          for (uint i= 0; i < subq_pred->left_expr->cols(); i++)
+          {
+            eq_cond= new Item_func_eq(subq_pred->left_expr->element_index(i),
+                                      new_sink->row[i]);
+            if (!eq_cond || eq_cond->fix_fields(join->thd, &eq_cond))
+              DBUG_RETURN(1);
+
+            (*join_where)= and_items(*join_where, eq_cond);
+          }
+        }
+        else
+        {
+          /* Subselect produced no rows. Just set the flag, */
+          subq_pred->jtbm_const_row_found= FALSE;
+        }
+
+        /* Set up a dummy TABLE*, optimizer code needs JOIN_TABs to have TABLE */
+        TABLE *dummy_table;
+        if (!(dummy_table= create_dummy_tmp_table(join->thd)))
+          DBUG_RETURN(1);
+        table->table= dummy_table;
+        table->table->pos_in_table_list= table;
+        setup_table_map(table->table, table, table->jtbm_table_no);
+      }
+      else
+      {
+        DBUG_ASSERT(subq_pred->test_set_strategy(SUBS_MATERIALIZATION));
+        subq_pred->is_jtbm_const_tab= FALSE;
+        subselect_hash_sj_engine *hash_sj_engine=
+          ((subselect_hash_sj_engine*)item->engine);
+        
+        table->table= hash_sj_engine->tmp_table;
+        table->table->pos_in_table_list= table;
+
+        setup_table_map(table->table, table, table->jtbm_table_no);
+
+        Item *sj_conds= hash_sj_engine->semi_join_conds;
+
+        (*join_where)= and_items(*join_where, sj_conds);
+        if (!(*join_where)->fixed)
+          (*join_where)->fix_fields(join->thd, join_where);
+      }
+    }
+
+    if ((nested_join= table->nested_join))
+    {
+      if (setup_jtbm_semi_joins(join, &nested_join->join_list, join_where))
+        DBUG_RETURN(TRUE);
+    }
+  }
+  DBUG_RETURN(FALSE);
+}
+
+
+/**
+  Choose an optimal strategy to execute an IN/ALL/ANY subquery predicate
+  based on cost.
+
+  @param join_tables  the set of tables joined in the subquery
+
+  @notes
+  The method chooses between the materialization and IN=>EXISTS rewrite
+  strategies for the execution of a non-flattened subquery IN predicate.
+  The cost-based decision is made as follows:
+
+  1. compute materialize_strategy_cost based on the unmodified subquery
+  2. reoptimize the subquery taking into account the IN-EXISTS predicates
+  3. compute in_exists_strategy_cost based on the reoptimized plan
+  4. compare and set the cheaper strategy
+     if (materialize_strategy_cost >= in_exists_strategy_cost)
+       in_strategy = MATERIALIZATION
+     else
+       in_strategy = IN_TO_EXISTS
+  5. if in_strategy = MATERIALIZATION and it is not possible to initialize it
+       revert to IN_TO_EXISTS
+  6. if (in_strategy == MATERIALIZATION)
+       revert the subquery plan to the original one before reoptimizing
+     else
+       inject the IN=>EXISTS predicates into the new EXISTS subquery plan
+
+  The implementation itself is a bit more complicated because it takes into
+  account two more factors:
+  - whether the user allowed both strategies through an optimizer_switch, and
+  - if materialization was the cheaper strategy, whether it can be executed
+    or not.
+
+  @retval FALSE     success.
+  @retval TRUE      error occurred.
+*/
+
+bool JOIN::choose_subquery_plan(table_map join_tables)
+{
+  Join_plan_state save_qep; /* The original QEP of the subquery. */
+  enum_reopt_result reopt_result= REOPT_NONE;
+  Item_in_subselect *in_subs;
+
+  /*
+    IN/ALL/ANY optimizations are not applicable for so called fake select
+    (this select exists only to filter results of union if it is needed).
+  */
+  if (select_lex == select_lex->master_unit()->fake_select_lex)
+    return 0;
+
+  if (is_in_subquery())
+  {
+    in_subs= (Item_in_subselect*) unit->item;
+    if (in_subs->create_in_to_exists_cond(this))
+      return true;
+  }
+  else
+    return false;
+  /* A strategy must be chosen earlier. */
+  DBUG_ASSERT(in_subs->has_strategy());
+  DBUG_ASSERT(in_to_exists_where || in_to_exists_having);
+  DBUG_ASSERT(!in_to_exists_where || in_to_exists_where->fixed);
+  DBUG_ASSERT(!in_to_exists_having || in_to_exists_having->fixed);
+
+  /*
+    Compute and compare the costs of materialization and in-exists if both
+    strategies are possible and allowed by the user (checked during the prepare
+    phase.
+  */
+  if (in_subs->test_strategy(SUBS_MATERIALIZATION) &&
+      in_subs->test_strategy(SUBS_IN_TO_EXISTS))
+  {
+    JOIN *outer_join;
+    JOIN *inner_join= this;
+    /* Number of unique value combinations filtered by the IN predicate. */
+    double outer_lookup_keys;
+    /* Cost and row count of the unmodified subquery. */
+    double inner_read_time_1, inner_record_count_1;
+    /* Cost of the subquery with injected IN-EXISTS predicates. */
+    double inner_read_time_2;
+    /* The cost to compute IN via materialization. */
+    double materialize_strategy_cost;
+    /* The cost of the IN->EXISTS strategy. */
+    double in_exists_strategy_cost;
+    double dummy;
+
+    /*
+      A. Estimate the number of rows of the outer table that will be filtered
+      by the IN predicate.
+    */
+    outer_join= unit->outer_select() ? unit->outer_select()->join : NULL;
+    if (outer_join && outer_join->table_count > 0)
+    {
+      /*
+        TODO:
+        Currently outer_lookup_keys is computed as the number of rows in
+        the partial join including the JOIN_TAB where the IN predicate is
+        pushed to. In the general case this is a gross overestimate because
+        due to caching we are interested only in the number of unique keys.
+        The search key may be formed by columns from much fewer than all
+        tables in the partial join. Example:
+        select * from t1, t2 where t1.c1 = t2.key AND t2.c2 IN (select ...);
+        If the join order: t1, t2, the number of unique lookup keys is ~ to
+        the number of unique values t2.c2 in the partial join t1 join t2.
+      */
+      outer_join->get_partial_cost_and_fanout(in_subs->get_join_tab_idx(),
+                                              table_map(-1),
+                                              &dummy,
+                                              &outer_lookup_keys);
+    }
+    else
+    {
+      /*
+        TODO: outer_join can be NULL for DELETE statements.
+        How to compute its cost?
+      */
+      outer_lookup_keys= 1;
+    }
+
+    /*
+      B. Estimate the cost and number of records of the subquery both
+      unmodified, and with injected IN->EXISTS predicates.
+    */
+    inner_read_time_1= inner_join->best_read;
+    inner_record_count_1= inner_join->record_count;
+
+    if (in_to_exists_where && const_tables != table_count)
+    {
+      /*
+        Re-optimize and cost the subquery taking into account the IN-EXISTS
+        conditions.
+      */
+      reopt_result= reoptimize(in_to_exists_where, join_tables, &save_qep);
+      if (reopt_result == REOPT_ERROR)
+        return TRUE;
+
+      /* Get the cost of the modified IN-EXISTS plan. */
+      inner_read_time_2= inner_join->best_read;
+
+    }
+    else
+    {
+      /* Reoptimization would not produce any better plan. */
+      inner_read_time_2= inner_read_time_1;
+    }
+
+    /*
+      C. Compute execution costs.
+    */
+    /* C.1 Compute the cost of the materialization strategy. */
+    //uint rowlen= get_tmp_table_rec_length(unit->first_select()->item_list);
+    uint rowlen= get_tmp_table_rec_length(ref_pointer_array, 
+                                          select_lex->item_list.elements);
+    /* The cost of writing one row into the temporary table. */
+    double write_cost= get_tmp_table_write_cost(thd, inner_record_count_1,
+                                                rowlen);
+    /* The cost of a lookup into the unique index of the materialized table. */
+    double lookup_cost= get_tmp_table_lookup_cost(thd, inner_record_count_1,
+                                                  rowlen);
+    /*
+      The cost of executing the subquery and storing its result in an indexed
+      temporary table.
+    */
+    double materialization_cost= inner_read_time_1 +
+                                 write_cost * inner_record_count_1;
+
+    materialize_strategy_cost= materialization_cost +
+                               outer_lookup_keys * lookup_cost;
+
+    /* C.2 Compute the cost of the IN=>EXISTS strategy. */
+    in_exists_strategy_cost= outer_lookup_keys * inner_read_time_2;
+
+    /* C.3 Compare the costs and choose the cheaper strategy. */
+    if (materialize_strategy_cost >= in_exists_strategy_cost)
+      in_subs->set_strategy(SUBS_IN_TO_EXISTS);
+    else
+      in_subs->set_strategy(SUBS_MATERIALIZATION);
+
+    DBUG_PRINT("info",
+               ("mat_strategy_cost: %.2f, mat_cost: %.2f, write_cost: %.2f, lookup_cost: %.2f",
+                materialize_strategy_cost, materialization_cost, write_cost, lookup_cost));
+    DBUG_PRINT("info",
+               ("inx_strategy_cost: %.2f, inner_read_time_2: %.2f",
+                in_exists_strategy_cost, inner_read_time_2));
+    DBUG_PRINT("info",("outer_lookup_keys: %.2f", outer_lookup_keys));
+  }
+
+  /*
+    If (1) materialization is a possible strategy based on semantic analysis
+    during the prepare phase, then if
+      (2) it is more expensive than the IN->EXISTS transformation, and
+      (3) it is not possible to create usable indexes for the materialization
+          strategy,
+      fall back to IN->EXISTS.
+    otherwise
+      use materialization.
+  */
+  if (in_subs->test_strategy(SUBS_MATERIALIZATION) &&
+      in_subs->setup_mat_engine())
+  {
+    /*
+      If materialization was the cheaper or the only user-selected strategy,
+      but it is not possible to execute it due to limitations in the
+      implementation, fall back to IN-TO-EXISTS.
+    */
+    in_subs->set_strategy(SUBS_IN_TO_EXISTS);
+  }
+
+  if (in_subs->test_strategy(SUBS_MATERIALIZATION))
+  {
+    /* Restore the original query plan used for materialization. */
+    if (reopt_result == REOPT_NEW_PLAN)
+      restore_query_plan(&save_qep);
+
+    in_subs->unit->uncacheable&= ~UNCACHEABLE_DEPENDENT_INJECTED;
+    select_lex->uncacheable&= ~UNCACHEABLE_DEPENDENT_INJECTED;
+
+    /*
+      Reset the "LIMIT 1" set in Item_exists_subselect::fix_length_and_dec.
+      TODO:
+      Currently we set the subquery LIMIT to infinity, and this is correct
+      because we forbid at parse time LIMIT inside IN subqueries (see
+      Item_in_subselect::test_limit). However, once we allow this, here
+      we should set the correct limit if given in the query.
+    */
+    in_subs->unit->global_parameters->select_limit= NULL;
+    in_subs->unit->set_limit(unit->global_parameters);
+    /*
+      Set the limit of this JOIN object as well, because normally its being
+      set in the beginning of JOIN::optimize, which was already done.
+    */
+    select_limit= in_subs->unit->select_limit_cnt;
+  }
+  else if (in_subs->test_strategy(SUBS_IN_TO_EXISTS))
+  {
+    if (reopt_result == REOPT_NONE && in_to_exists_where &&
+        const_tables != table_count)
+    {
+      /*
+        The subquery was not reoptimized with the newly injected IN-EXISTS
+        conditions either because the user allowed only the IN-EXISTS strategy,
+        or because materialization was not possible based on semantic analysis.
+      */
+      reopt_result= reoptimize(in_to_exists_where, join_tables, NULL);
+      if (reopt_result == REOPT_ERROR)
+        return TRUE;
+    }
+
+    if (in_subs->inject_in_to_exists_cond(this))
+      return TRUE;
+    /*
+      It is IN->EXISTS transformation so we should mark subquery as
+      dependent
+    */
+    in_subs->unit->uncacheable|= UNCACHEABLE_DEPENDENT_INJECTED;
+    select_lex->uncacheable|= UNCACHEABLE_DEPENDENT_INJECTED;
+    select_limit= 1;
+  }
+  else
+    DBUG_ASSERT(FALSE);
+
+  return FALSE;
+}
+
+
+/**
+  Choose a query plan for a table-less subquery.
+
+  @notes
+
+  @retval FALSE     success.
+  @retval TRUE      error occurred.
+*/
+
+bool JOIN::choose_tableless_subquery_plan()
+{
+  DBUG_ASSERT(!tables_list || !table_count);
+  if (unit->item)
+  {
+    DBUG_ASSERT(unit->item->type() == Item::SUBSELECT_ITEM);
+    Item_subselect *subs_predicate= unit->item;
+
+    /*
+      If the optimizer determined that his query has an empty result,
+      in most cases the subquery predicate is a known constant value -
+      either FALSE or NULL. The implementation of Item_subselect::reset()
+      determines which one.
+    */
+    if (zero_result_cause)
+    {
+      if (!implicit_grouping)
+      {
+        /*
+          Both group by queries and non-group by queries without aggregate
+          functions produce empty subquery result.
+        */
+        subs_predicate->reset();
+        subs_predicate->make_const();
+        return FALSE;
+      }
+
+      /* TODO:
+         A further optimization is possible when a non-group query with
+         MIN/MAX/COUNT is optimized by opt_sum_query. Then, if there are
+         only MIN/MAX functions over an empty result set, the subquery
+         result is a NULL value/row, thus the value of subs_predicate is
+         NULL.
+      */
+    }
+    
+    /*
+      For IN subqueries, use IN->EXISTS transfomation, unless the subquery 
+      has been converted to a JTBM semi-join. In that case, just leave
+      everything as-is, setup_jtbm_semi_joins() has special handling for cases
+      like this.
+    */
+    if (subs_predicate->is_in_predicate() && 
+        !(subs_predicate->substype() == Item_subselect::IN_SUBS && 
+          ((Item_in_subselect*)subs_predicate)->is_jtbm_merged))
+    {
+      Item_in_subselect *in_subs;
+      in_subs= (Item_in_subselect*) subs_predicate;
+      in_subs->set_strategy(SUBS_IN_TO_EXISTS);
+      if (in_subs->create_in_to_exists_cond(this) ||
+          in_subs->inject_in_to_exists_cond(this))
+        return TRUE;
+      tmp_having= having;
+    }
+  }
+  return FALSE;
+}
diff --git a/sql/opt_subselect.h b/sql/opt_subselect.h
new file mode 100644
index 00000000000..07f1fc77a20
--- /dev/null
+++ b/sql/opt_subselect.h
@@ -0,0 +1,379 @@
+/*
+  Semi-join subquery optimization code definitions
+*/
+
+#ifdef USE_PRAGMA_INTERFACE
+#pragma interface			/* gcc class implementation */
+#endif
+
+int check_and_do_in_subquery_rewrites(JOIN *join);
+bool convert_join_subqueries_to_semijoins(JOIN *join);
+int pull_out_semijoin_tables(JOIN *join);
+bool optimize_semijoin_nests(JOIN *join, table_map all_table_map);
+bool setup_jtbm_semi_joins(JOIN *join, List<TABLE_LIST> *join_list,  
+                           Item **join_where);
+
+// used by Loose_scan_opt
+ulonglong get_bound_sj_equalities(TABLE_LIST *sj_nest, 
+                                  table_map remaining_tables);
+
+/*
+  This is a class for considering possible loose index scan optimizations.
+  It's usage pattern is as follows:
+    best_access_path()
+    {
+       Loose_scan_opt opt;
+
+       opt.init()
+       for each index we can do ref access with
+       {
+         opt.next_ref_key();
+         for each keyuse 
+           opt.add_keyuse();
+         opt.check_ref_access();
+       }
+
+       if (some criteria for range scans)
+         opt.check_range_access();
+       
+       opt.get_best_option();
+    }
+*/
+
+class Loose_scan_opt
+{
+  /* All methods must check this before doing anything else */
+  bool try_loosescan;
+
+  /*
+    If we consider (oe1, .. oeN) IN (SELECT ie1, .. ieN) then ieK=oeK is
+    called sj-equality. If oeK depends only on preceding tables then such
+    equality is called 'bound'.
+  */
+  ulonglong bound_sj_equalities;
+ 
+  /* Accumulated properties of ref access we're now considering: */
+  ulonglong handled_sj_equalities;
+  key_part_map loose_scan_keyparts;
+  uint max_loose_keypart;
+  bool part1_conds_met;
+
+  /*
+    Use of quick select is a special case. Some of its properties:
+  */
+  uint quick_uses_applicable_index;
+  uint quick_max_loose_keypart;
+  
+  /* Best loose scan method so far */
+  uint   best_loose_scan_key;
+  double best_loose_scan_cost;
+  double best_loose_scan_records;
+  KEYUSE *best_loose_scan_start_key;
+
+  uint best_max_loose_keypart;
+
+public:
+  Loose_scan_opt():
+    try_loosescan(FALSE),
+    bound_sj_equalities(0),
+    quick_uses_applicable_index(FALSE)
+  {
+    UNINIT_VAR(quick_max_loose_keypart); /* Protected by quick_uses_applicable_index */
+    /* The following are protected by best_loose_scan_cost!= DBL_MAX */
+    UNINIT_VAR(best_loose_scan_key);
+    UNINIT_VAR(best_loose_scan_records);
+    UNINIT_VAR(best_max_loose_keypart);
+    UNINIT_VAR(best_loose_scan_start_key);
+  }
+  
+  void init(JOIN *join, JOIN_TAB *s, table_map remaining_tables)
+  {
+    /*
+      Discover the bound equalities. We need to do this if
+        1. The next table is an SJ-inner table, and
+        2. It is the first table from that semijoin, and
+        3. We're not within a semi-join range (i.e. all semi-joins either have
+           all or none of their tables in join_table_map), except
+           s->emb_sj_nest (which we've just entered, see #2).
+        4. All non-IN-equality correlation references from this sj-nest are 
+           bound
+        5. But some of the IN-equalities aren't (so this can't be handled by 
+           FirstMatch strategy)
+    */
+    best_loose_scan_cost= DBL_MAX;
+    if (!join->emb_sjm_nest && s->emb_sj_nest &&                        // (1)
+        s->emb_sj_nest->sj_in_exprs < 64 && 
+        ((remaining_tables & s->emb_sj_nest->sj_inner_tables) ==        // (2)
+         s->emb_sj_nest->sj_inner_tables) &&                            // (2)
+        join->cur_sj_inner_tables == 0 &&                                  // (3)
+        !(remaining_tables & 
+          s->emb_sj_nest->nested_join->sj_corr_tables) &&               // (4)
+        remaining_tables & s->emb_sj_nest->nested_join->sj_depends_on &&// (5)
+        optimizer_flag(join->thd, OPTIMIZER_SWITCH_LOOSE_SCAN))
+    {
+      /* This table is an LooseScan scan candidate */
+      bound_sj_equalities= get_bound_sj_equalities(s->emb_sj_nest, 
+                                                   remaining_tables);
+      try_loosescan= TRUE;
+      DBUG_PRINT("info", ("Will try LooseScan scan, bound_map=%llx",
+                          (longlong)bound_sj_equalities));
+    }
+  }
+
+  void next_ref_key()
+  {
+    handled_sj_equalities=0;
+    loose_scan_keyparts= 0;
+    max_loose_keypart= 0;
+    part1_conds_met= FALSE;
+  }
+  
+  void add_keyuse(table_map remaining_tables, KEYUSE *keyuse)
+  {
+    if (try_loosescan && keyuse->sj_pred_no != UINT_MAX)
+    {
+      if (!(remaining_tables & keyuse->used_tables))
+      {
+        /* 
+          This allows to use equality propagation to infer that some 
+          sj-equalities are bound.
+        */
+        bound_sj_equalities |= 1ULL << keyuse->sj_pred_no;
+      }
+      else
+      {
+        handled_sj_equalities |= 1ULL << keyuse->sj_pred_no;
+        loose_scan_keyparts |= ((key_part_map)1) << keyuse->keypart;
+        set_if_bigger(max_loose_keypart, keyuse->keypart);
+      }
+    }
+  }
+
+  bool have_a_case() { return test(handled_sj_equalities); }
+
+  void check_ref_access_part1(JOIN_TAB *s, uint key, KEYUSE *start_key, 
+                              table_map found_part)
+  {
+    /*
+      Check if we can use LooseScan semi-join strategy. We can if
+      1. This is the right table at right location
+      2. All IN-equalities are either
+         - "bound", ie. the outer_expr part refers to the preceding tables
+         - "handled", ie. covered by the index we're considering
+      3. Index order allows to enumerate subquery's duplicate groups in
+         order. This happens when the index definition matches this
+         pattern:
+
+           (handled_col|bound_col)* (other_col|bound_col)
+
+    */
+    if (try_loosescan &&                                       // (1)
+        (handled_sj_equalities | bound_sj_equalities) ==         // (2)
+        PREV_BITS(ulonglong, s->emb_sj_nest->sj_in_exprs) &&     // (2)
+        (PREV_BITS(key_part_map, max_loose_keypart+1) &        // (3)
+         (found_part | loose_scan_keyparts)) ==                // (3)
+        PREV_BITS(key_part_map, max_loose_keypart+1) &&        // (3)
+        !key_uses_partial_cols(s->table, key))
+    {
+      /* Ok, can use the strategy */
+      part1_conds_met= TRUE;
+      if (s->quick && s->quick->index == key && 
+          s->quick->get_type() == QUICK_SELECT_I::QS_TYPE_RANGE)
+      {
+        quick_uses_applicable_index= TRUE;
+        quick_max_loose_keypart= max_loose_keypart;
+      }
+      DBUG_PRINT("info", ("Can use LooseScan scan"));
+
+      /* 
+        Check if this is a special case where there are no usable bound
+        IN-equalities, i.e. we have
+
+          outer_expr IN (SELECT innertbl.key FROM ...) 
+        
+        and outer_expr cannot be evaluated yet, so it's actually full
+        index scan and not a ref access
+      */
+      if (!(found_part & 1 ) && /* no usable ref access for 1st key part */
+          s->table->covering_keys.is_set(key))
+      {
+        DBUG_PRINT("info", ("Can use full index scan for LooseScan"));
+        
+        /* Calculate the cost of complete loose index scan.  */
+        double records= rows2double(s->table->file->stats.records);
+
+        /* The cost is entire index scan cost (divided by 2) */
+        double read_time= s->table->file->keyread_time(key, 1,
+                                                       (ha_rows) records);
+
+        /*
+          Now find out how many different keys we will get (for now we
+          ignore the fact that we have "keypart_i=const" restriction for
+          some key components, that may make us think think that loose
+          scan will produce more distinct records than it actually will)
+        */
+        ulong rpc;
+        if ((rpc= s->table->key_info[key].rec_per_key[max_loose_keypart]))
+          records= records / rpc;
+
+        // TODO: previous version also did /2
+        if (read_time < best_loose_scan_cost)
+        {
+          best_loose_scan_key= key;
+          best_loose_scan_cost= read_time;
+          best_loose_scan_records= records;
+          best_max_loose_keypart= max_loose_keypart;
+          best_loose_scan_start_key= start_key;
+        }
+      }
+    }
+  }
+  
+  void check_ref_access_part2(uint key, KEYUSE *start_key, double records, 
+                              double read_time)
+  {
+    if (part1_conds_met && read_time < best_loose_scan_cost)
+    {
+      /* TODO use rec-per-key-based fanout calculations */
+      best_loose_scan_key= key;
+      best_loose_scan_cost= read_time;
+      best_loose_scan_records= records;
+      best_max_loose_keypart= max_loose_keypart;
+      best_loose_scan_start_key= start_key;
+    }
+  }
+
+  void check_range_access(JOIN *join, uint idx, QUICK_SELECT_I *quick)
+  {
+    /* TODO: this the right part restriction: */
+    if (quick_uses_applicable_index && idx == join->const_tables && 
+        quick->read_time < best_loose_scan_cost)
+    {
+      best_loose_scan_key= quick->index;
+      best_loose_scan_cost= quick->read_time;
+      /* this is ok because idx == join->const_tables */
+      best_loose_scan_records= rows2double(quick->records);
+      best_max_loose_keypart= quick_max_loose_keypart;
+      best_loose_scan_start_key= NULL;
+    }
+  }
+
+  void save_to_position(JOIN_TAB *tab, POSITION *pos)
+  {
+    pos->read_time=       best_loose_scan_cost;
+    if (best_loose_scan_cost != DBL_MAX)
+    {
+      pos->records_read=    best_loose_scan_records;
+      pos->key=             best_loose_scan_start_key;
+      pos->loosescan_picker.loosescan_key=   best_loose_scan_key;
+      pos->loosescan_picker.loosescan_parts= best_max_loose_keypart + 1;
+      pos->use_join_buffer= FALSE;
+      pos->table=           tab;
+      // todo need ref_depend_map ?
+      DBUG_PRINT("info", ("Produced a LooseScan plan, key %s, %s",
+                          tab->table->key_info[best_loose_scan_key].name,
+                          best_loose_scan_start_key? "(ref access)":
+                                                     "(range/index access)"));
+    }
+  }
+};
+
+
+void advance_sj_state(JOIN *join, const table_map remaining_tables, uint idx, 
+                      double *current_record_count, double *current_read_time,
+                      POSITION *loose_scan_pos);
+void restore_prev_sj_state(const table_map remaining_tables, 
+                                  const JOIN_TAB *tab, uint idx);
+
+void fix_semijoin_strategies_for_picked_join_order(JOIN *join);
+
+bool setup_sj_materialization_part1(JOIN_TAB *sjm_tab);
+bool setup_sj_materialization_part2(JOIN_TAB *sjm_tab);
+
+
+/*
+  Temporary table used by semi-join DuplicateElimination strategy
+
+  This consists of the temptable itself and data needed to put records
+  into it. The table's DDL is as follows:
+
+    CREATE TABLE tmptable (col VARCHAR(n) BINARY, PRIMARY KEY(col));
+
+  where the primary key can be replaced with unique constraint if n exceeds
+  the limit (as it is always done for query execution-time temptables).
+
+  The record value is a concatenation of rowids of tables from the join we're
+  executing. If a join table is on the inner side of the outer join, we
+  assume that its rowid can be NULL and provide means to store this rowid in
+  the tuple.
+*/
+
+class SJ_TMP_TABLE : public Sql_alloc
+{
+public:
+  /*
+    Array of pointers to tables whose rowids compose the temporary table
+    record.
+  */
+  class TAB
+  {
+  public:
+    JOIN_TAB *join_tab;
+    uint rowid_offset;
+    ushort null_byte;
+    uchar null_bit;
+  };
+  TAB *tabs;
+  TAB *tabs_end;
+  
+  /* 
+    is_degenerate==TRUE means this is a special case where the temptable record
+    has zero length (and presence of a unique key means that the temptable can
+    have either 0 or 1 records). 
+    In this case we don't create the physical temptable but instead record
+    its state in SJ_TMP_TABLE::have_degenerate_row.
+  */
+  bool is_degenerate;
+
+  /* 
+    When is_degenerate==TRUE: the contents of the table (whether it has the
+    record or not).
+  */
+  bool have_degenerate_row;
+  
+  /* table record parameters */
+  uint null_bits;
+  uint null_bytes;
+  uint rowid_len;
+
+  /* The temporary table itself (NULL means not created yet) */
+  TABLE *tmp_table;
+  
+  /*
+    These are the members we got from temptable creation code. We'll need
+    them if we'll need to convert table from HEAP to MyISAM/Maria.
+  */
+  ENGINE_COLUMNDEF *start_recinfo;
+  ENGINE_COLUMNDEF *recinfo;
+
+  SJ_TMP_TABLE *next_flush_table; 
+
+  int sj_weedout_delete_rows();
+  int sj_weedout_check_row(THD *thd);
+  bool create_sj_weedout_tmp_table(THD *thd);
+};
+
+int setup_semijoin_dups_elimination(JOIN *join, ulonglong options, 
+                                    uint no_jbuf_after);
+void destroy_sj_tmp_tables(JOIN *join);
+int clear_sj_tmp_tables(JOIN *join);
+int rewrite_to_index_subquery_engine(JOIN *join);
+
+
+void get_delayed_table_estimates(TABLE *table,
+                                 ha_rows *out_rows, 
+                                 double *scan_time,
+                                 double *startup_cost);
+
+enum_nested_loop_state join_tab_execution_startup(JOIN_TAB *tab);
+
diff --git a/sql/opt_sum.cc b/sql/opt_sum.cc
index 92a286a3b1c..9e097af1e6e 100644
--- a/sql/opt_sum.cc
+++ b/sql/opt_sum.cc
@@ -76,10 +76,12 @@ static int maxmin_in_range(bool max_fl, Field* field, COND *cond);
     #			Multiplication of number of rows in all tables
 */
 
-static ulonglong get_exact_record_count(TABLE_LIST *tables)
+static ulonglong get_exact_record_count(List<TABLE_LIST> &tables)
 {
   ulonglong count= 1;
-  for (TABLE_LIST *tl= tables; tl; tl= tl->next_leaf)
+  TABLE_LIST *tl;
+  List_iterator<TABLE_LIST> ti(tables);
+  while ((tl= ti++))
   {
     ha_rows tmp= tl->table->file->records();
     if ((tmp == HA_POS_ERROR))
@@ -236,9 +238,11 @@ static int get_index_max_value(TABLE *table, TABLE_REF *ref, uint range_fl)
 */
 
 int opt_sum_query(THD *thd,
-                  TABLE_LIST *tables, List<Item> &all_fields, COND *conds)
+                  List<TABLE_LIST> &tables, List<Item> &all_fields, COND *conds)
 {
   List_iterator_fast<Item> it(all_fields);
+  List_iterator<TABLE_LIST> ti(tables);
+  TABLE_LIST *tl;
   int const_result= 1;
   bool recalc_const_item= 0;
   ulonglong count= 1;
@@ -246,7 +250,7 @@ int opt_sum_query(THD *thd,
   table_map removed_tables= 0, outer_tables= 0, used_tables= 0;
   table_map where_tables= 0;
   Item *item;
-  int error;
+  int error= 0;
   DBUG_ENTER("opt_sum_query");
 
   if (conds)
@@ -256,7 +260,7 @@ int opt_sum_query(THD *thd,
     Analyze outer join dependencies, and, if possible, compute the number
     of returned rows.
   */
-  for (TABLE_LIST *tl= tables; tl; tl= tl->next_leaf)
+  while ((tl= ti++))
   {
     TABLE_LIST *embedded;
     for (embedded= tl ; embedded; embedded= embedded->embedding)
@@ -297,6 +301,15 @@ int opt_sum_query(THD *thd,
       is_exact_count= FALSE;
       count= 1;                                 // ensure count != 0
     }
+    else if (tl->is_materialized_derived() || 
+             tl->jtbm_subselect)
+    {
+      /*
+        Can't remove a derived table as it's number of rows is just an
+        estimate.
+      */
+      DBUG_RETURN(0);
+    }
     else
     {
       error= tl->table->file->info(HA_STATUS_VARIABLE | HA_STATUS_NO_LOCK);
@@ -484,30 +497,32 @@ bool simple_pred(Item_func *func_item, Item **args, bool *inv_order)
     /* MULT_EQUAL_FUNC */
     {
       Item_equal *item_equal= (Item_equal *) func_item;
-      Item_equal_iterator it(*item_equal);
-      args[0]= it++;
-      if (it++)
-        return 0;
       if (!(args[1]= item_equal->get_const()))
         return 0;
+      Item_equal_fields_iterator it(*item_equal);
+      if (!(item= it++))
+        return 0;
+      args[0]= item->real_item();
       if (args[0]->max_length < args[1]->max_length)
         return 0;
+      if (it++)
+        return 0;
     }
     break;
   case 1:
     /* field IS NULL */
-    item= func_item->arguments()[0];
+    item= func_item->arguments()[0]->real_item();
     if (item->type() != Item::FIELD_ITEM)
       return 0;
     args[0]= item;
     break;
   case 2:
     /* 'field op const' or 'const op field' */
-    item= func_item->arguments()[0];
+    item= func_item->arguments()[0]->real_item();
     if (item->type() == Item::FIELD_ITEM)
     {
       args[0]= item;
-      item= func_item->arguments()[1];
+      item= func_item->arguments()[1]->real_item();
       if (!item->const_item())
         return 0;
       args[1]= item;
@@ -515,7 +530,7 @@ bool simple_pred(Item_func *func_item, Item **args, bool *inv_order)
     else if (item->const_item())
     {
       args[1]= item;
-      item= func_item->arguments()[1];
+      item= func_item->arguments()[1]->real_item();
       if (item->type() != Item::FIELD_ITEM)
         return 0;
       args[0]= item;
@@ -528,13 +543,13 @@ bool simple_pred(Item_func *func_item, Item **args, bool *inv_order)
     break;
   case 3:
     /* field BETWEEN const AND const */
-    item= func_item->arguments()[0];
+    item= func_item->arguments()[0]->real_item();
     if (item->type() == Item::FIELD_ITEM)
     {
       args[0]= item;
       for (int i= 1 ; i <= 2; i++)
       {
-        item= func_item->arguments()[i];
+        item= func_item->arguments()[i]->real_item();
         if (!item->const_item())
           return 0;
         args[i]= item;
@@ -622,8 +637,10 @@ static bool matching_cond(bool max_fl, TABLE_REF *ref, KEY *keyinfo,
       test(cond->used_tables() & ~PSEUDO_TABLE_BITS))
   {
     /* Condition doesn't restrict the used table */
-    DBUG_RETURN(TRUE);
+    DBUG_RETURN(!cond->const_item());
   }
+  else if (cond->is_expensive())
+    DBUG_RETURN(FALSE);
   if (cond->type() == Item::COND_ITEM)
   {
     if (((Item_cond*) cond)->functype() == Item_func::COND_OR_FUNC)
diff --git a/sql/opt_table_elimination.cc b/sql/opt_table_elimination.cc
index 65a7d662425..545001c9df1 100644
--- a/sql/opt_table_elimination.cc
+++ b/sql/opt_table_elimination.cc
@@ -588,10 +588,8 @@ void eliminate_tables(JOIN *join)
   if (!join->outer_join)
     DBUG_VOID_RETURN;
 
-#ifndef DBUG_OFF
   if (!optimizer_flag(thd, OPTIMIZER_SWITCH_TABLE_ELIMINATION))
     DBUG_VOID_RETURN; /* purecov: inspected */
-#endif
 
   /* Find the tables that are referred to from WHERE/HAVING */
   used_tables= (join->conds?  join->conds->used_tables() : 0) | 
@@ -727,7 +725,11 @@ eliminate_tables_for_list(JOIN *join, List<TABLE_LIST> *join_list,
     }
     else
     {
-      DBUG_ASSERT(!tbl->nested_join);
+      DBUG_ASSERT(!tbl->nested_join || tbl->sj_on_expr);
+      //psergey-todo: is the following really correct or we'll need to descend
+      //down all ON clauses: ? 
+      if (tbl->sj_on_expr)
+        tables_used_on_left |= tbl->sj_on_expr->used_tables();
     }
   }
 
@@ -920,8 +922,9 @@ public:
   Field_dependency_recorder(Dep_analysis_context *ctx_arg): ctx(ctx_arg)
   {}
   
-  void visit_field(Field *field)
+  void visit_field(Item_field *item)
   {
+    Field *field= item->field;
     Dep_value_table *tbl_dep;
     if ((tbl_dep= ctx->table_deps[field->table->tablenr]))
     {
@@ -1205,15 +1208,16 @@ void build_eq_mods_for_cond(Dep_analysis_context *ctx,
     if (!(fvl= new List<Dep_value_field>))
       break; /* purecov: inspected */
 
-    Item_equal_iterator it(*item_equal);
-    Item_field *item;
+    Item_equal_fields_iterator it(*item_equal);
+    Item *item;
     Item *bound_item= item_equal->get_const();
     while ((item= it++))
     {
+      Field *equal_field= it.get_curr_field();
       if ((item->used_tables() & ctx->usable_tables))
       {
         Dep_value_field *field_val;
-        if ((field_val= ctx->get_field_value(item->field)))
+        if ((field_val= ctx->get_field_value(equal_field)))
           fvl->push_back(field_val);
       }
       else
@@ -1229,7 +1233,7 @@ void build_eq_mods_for_cond(Dep_analysis_context *ctx,
     if (fvl->elements)
     {
       
-      exchange_sort<Dep_value_field>(fvl, compare_field_values, NULL);
+      bubble_sort<Dep_value_field>(fvl, compare_field_values, NULL);
       add_module_expr(ctx, eq_mod, *and_level, NULL, bound_item, fvl);
     }
     break;
diff --git a/sql/parse_file.cc b/sql/parse_file.cc
index e5b2953e969..3e1f254f610 100644
--- a/sql/parse_file.cc
+++ b/sql/parse_file.cc
@@ -384,7 +384,7 @@ sql_parse_prepare(const LEX_STRING *file_name, MEM_ROOT *mem_root,
     DBUG_RETURN(0);
   }
 
-  if (!(parser->buff= (char*) alloc_root(mem_root, stat_info.st_size+1)))
+  if (!(parser->buff= (char*) alloc_root(mem_root, (size_t)(stat_info.st_size+1))))
   {
     DBUG_RETURN(0);
   }
@@ -395,7 +395,7 @@ sql_parse_prepare(const LEX_STRING *file_name, MEM_ROOT *mem_root,
   }
   
   if ((len= my_read(file, (uchar *)parser->buff,
-                    stat_info.st_size, MYF(MY_WME))) ==
+                    (size_t) stat_info.st_size, MYF(MY_WME))) ==
       MY_FILE_ERROR)
   {
     my_close(file, MYF(MY_WME));
diff --git a/sql/plistsort.c b/sql/plistsort.c
new file mode 100644
index 00000000000..71d287e7b45
--- /dev/null
+++ b/sql/plistsort.c
@@ -0,0 +1,166 @@
+/* Copyright (c) 2000, 2010 Oracle and/or its affiliates. All rights reserved.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
+
+
+/*
+things to define before including the file:
+
+#define LS_LIST_ITEM ListItem
+#define LS_COMPARE_FUNC_DECL compare_func var_name,
+#define LS_COMPARE_FUNC_CALL(list_el1, list_el2) (*var_name)(list_el1, list_el2)
+#define LS_NEXT(A) (A)->next
+#define LS_SET_NEXT(A,val) (A)->next= val
+#define LS_P_NEXT(A) &(A)->next
+#define LS_NAME plistsort
+#define LS_SCOPE static
+#define LS_STRUCT_NAME ls_struct_name
+*/
+
+typedef struct LS_STRUCT_NAME
+{
+  LS_LIST_ITEM *list1;
+  int list_len;
+  int return_point;
+} LS_STRUCT_NAME;
+
+LS_SCOPE LS_LIST_ITEM* LS_NAME(LS_COMPARE_FUNC_DECL LS_LIST_ITEM *list, int list_len)
+{
+  LS_LIST_ITEM *list_end;
+  LS_LIST_ITEM *sorted_list;
+
+  struct LS_STRUCT_NAME stack[63], *sp= stack;
+
+  if (list_len < 2)
+    return list;
+
+  sp->list_len= list_len;
+  sp->return_point= 2;
+
+recursion_point:
+
+  if (sp->list_len < 4)
+  {
+    LS_LIST_ITEM *e1, *e2;
+    sorted_list= list;
+    e1= LS_NEXT(sorted_list);
+    list_end= LS_NEXT(e1);
+    if (LS_COMPARE_FUNC_CALL(sorted_list, e1))
+    {
+      sorted_list= e1;
+      e1= list;
+    }
+    if (sp->list_len == 2)
+    {
+      LS_SET_NEXT(sorted_list, e1);
+      LS_SET_NEXT(e1, NULL);
+      goto exit_point;
+    }
+    e2= list_end;
+    list_end= LS_NEXT(e2);
+    if (LS_COMPARE_FUNC_CALL(e1, e2))
+    {
+      {
+        LS_LIST_ITEM *tmp_e= e1;
+        e1= e2;
+        e2= tmp_e;
+      }
+      if (LS_COMPARE_FUNC_CALL(sorted_list, e1))
+      {
+        LS_LIST_ITEM *tmp_e= sorted_list;
+        sorted_list= e1;
+        e1= tmp_e;
+      }
+    }
+
+    LS_SET_NEXT(sorted_list, e1);
+    LS_SET_NEXT(e1, e2);
+    LS_SET_NEXT(e2, NULL);
+    goto exit_point;
+  }
+
+  {
+    register struct LS_STRUCT_NAME *sp0= sp++;
+    sp->list_len= sp0->list_len >> 1;
+    sp0->list_len-= sp->list_len;
+    sp->return_point= 0;
+  }
+  goto recursion_point;
+return_point0:
+  sp->list1= sorted_list;
+  {
+    register struct LS_STRUCT_NAME *sp0= sp++;
+    list= list_end;
+    sp->list_len= sp0->list_len;
+    sp->return_point= 1;
+  }
+  goto recursion_point;
+return_point1:
+  {
+    register LS_LIST_ITEM **hook= &sorted_list;
+    register LS_LIST_ITEM *list1= sp->list1;
+    register LS_LIST_ITEM *list2= sorted_list;
+
+    if (LS_COMPARE_FUNC_CALL(list1, list2))
+    {
+      LS_LIST_ITEM *tmp_e= list2;
+      list2= list1;
+      list1= tmp_e;
+    }
+    for (;;)
+    {
+      *hook= list1;
+      do
+      {
+        if (!(list1= *(hook= LS_P_NEXT(list1))))
+        {
+          *hook= list2;
+          goto exit_point;
+        }
+      } while (LS_COMPARE_FUNC_CALL(list2, list1));
+
+      *hook= list2;
+      do
+      {
+        if (!(list2= *(hook= LS_P_NEXT(list2))))
+        {
+          *hook= list1;
+          goto exit_point;
+        }
+      } while (LS_COMPARE_FUNC_CALL(list1, list2));
+    }
+  }
+
+exit_point:
+  switch ((sp--)->return_point)
+  {
+    case 0: goto return_point0;
+    case 1: goto return_point1;
+    default:;
+  }
+
+  return sorted_list;
+}
+
+
+#undef LS_LIST_ITEM
+#undef LS_NEXT
+#undef LS_SET_NEXT
+#undef LS_P_NEXT
+#undef LS_NAME
+#undef LS_STRUCT_NAME
+#undef LS_SCOPE
+#undef LS_COMPARE_FUNC_DECL
+#undef LS_COMPARE_FUNC_CALL
+
diff --git a/sql/procedure.h b/sql/procedure.h
index 30a8a0efccb..488d461905e 100644
--- a/sql/procedure.h
+++ b/sql/procedure.h
@@ -13,6 +13,8 @@
    along with this program; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
 
+#ifndef PROCEDURE_INCLUDED
+#define PROCEDURE_INCLUDED
 
 /* When using sql procedures */
 
@@ -153,3 +155,5 @@ public:
 
 Procedure *setup_procedure(THD *thd,ORDER *proc_param,select_result *result,
 			   List<Item> &field_list,int *error);
+
+#endif /* PROCEDURE_INCLUDED */
diff --git a/sql/protocol.cc b/sql/protocol.cc
index 3381cf2f1bb..b75ff1ad18b 100644
--- a/sql/protocol.cc
+++ b/sql/protocol.cc
@@ -517,6 +517,56 @@ void net_end_statement(THD *thd)
 }
 
 
+/**
+   Send a progress report to the client
+
+   What we send is:
+   header (255,255,255,1)
+   stage, max_stage as on byte integers
+   percentage withing the stage as percentage*1000
+   (that is, ratio*100000) as a 3 byte integer
+   proc_info as a string
+*/
+
+const uchar progress_header[2]= {(uchar) 255, (uchar) 255 };
+
+void net_send_progress_packet(THD *thd)
+{
+  uchar buff[200], *pos;
+  const char *proc_info= thd->proc_info ? thd->proc_info : "";
+  uint length= strlen(proc_info);
+  ulonglong progress;
+  DBUG_ENTER("net_send_progress_packet");
+
+  if (unlikely(!thd->net.vio))
+    DBUG_VOID_RETURN;                           // Socket is closed
+
+  pos= buff;
+  /*
+    Store number of strings first. This allows us to later expand the
+    progress indicator if needed.
+  */
+  *pos++= (uchar) 1;                            // Number of strings
+  *pos++= (uchar) thd->progress.stage + 1;
+  /*
+    We have the max() here to avoid problems if max_stage is not set,
+    which may happen during automatic repair of table
+  */
+  *pos++= (uchar) max(thd->progress.max_stage, thd->progress.stage + 1);
+  progress= 0;
+  if (thd->progress.max_counter)
+    progress= 100000ULL * thd->progress.counter / thd->progress.max_counter;
+  int3store(pos, progress);                          // Between 0 & 100000
+  pos+= 3;
+  pos= net_store_data(pos, (const uchar*) proc_info,
+                      min(length, sizeof(buff)-7));
+  net_write_command(&thd->net, (uchar) 255, progress_header,
+                    sizeof(progress_header), (uchar*) buff,
+                    (uint) (pos - buff));
+  DBUG_VOID_RETURN;
+}
+
+  
 /****************************************************************************
   Functions used by the protocol functions (like net_send_ok) to store
   strings and numbers in the header result packet.
@@ -619,7 +669,7 @@ bool Protocol::send_fields(List<Item> *list, uint flags)
   Protocol_text prot(thd);
   String *local_packet= prot.storage_packet();
   CHARSET_INFO *thd_charset= thd->variables.character_set_results;
-  DBUG_ENTER("send_fields");
+  DBUG_ENTER("Protocol::send_fields");
 
   if (flags & SEND_NUM_ROWS)
   {				// Packet with number of elements
@@ -1043,13 +1093,7 @@ bool Protocol_text::store(Field *field)
 }
 
 
-/**
-  @todo
-    Second_part format ("%06") needs to change when 
-    we support 0-6 decimals for time.
-*/
-
-bool Protocol_text::store(MYSQL_TIME *tm)
+bool Protocol_text::store(MYSQL_TIME *tm, int decimals)
 {
 #ifndef DBUG_OFF
   DBUG_ASSERT(field_types == 0 ||
@@ -1057,18 +1101,8 @@ bool Protocol_text::store(MYSQL_TIME *tm)
 	      field_types[field_pos] == MYSQL_TYPE_TIMESTAMP);
   field_pos++;
 #endif
-  char buff[40];
-  uint length;
-  length= my_sprintf(buff,(buff, "%04d-%02d-%02d %02d:%02d:%02d",
-			   (int) tm->year,
-			   (int) tm->month,
-			   (int) tm->day,
-			   (int) tm->hour,
-			   (int) tm->minute,
-			   (int) tm->second));
-  if (tm->second_part)
-    length+= my_sprintf(buff+length,(buff+length, ".%06d",
-                                     (int)tm->second_part));
+  char buff[MAX_DATE_STRING_REP_LENGTH];
+  uint length= my_datetime_to_str(tm, buff, decimals);
   return net_store_data((uchar*) buff, length);
 }
 
@@ -1086,29 +1120,15 @@ bool Protocol_text::store_date(MYSQL_TIME *tm)
 }
 
 
-/**
-  @todo 
-    Second_part format ("%06") needs to change when 
-    we support 0-6 decimals for time.
-*/
-
-bool Protocol_text::store_time(MYSQL_TIME *tm)
+bool Protocol_text::store_time(MYSQL_TIME *tm, int decimals)
 {
 #ifndef DBUG_OFF
   DBUG_ASSERT(field_types == 0 ||
 	      field_types[field_pos] == MYSQL_TYPE_TIME);
   field_pos++;
 #endif
-  char buff[40];
-  uint length;
-  uint day= (tm->year || tm->month) ? 0 : tm->day;
-  length= my_sprintf(buff,(buff, "%s%02ld:%02d:%02d",
-			   tm->neg ? "-" : "",
-			   (long) day*24L+(long) tm->hour,
-			   (int) tm->minute,
-			   (int) tm->second));
-  if (tm->second_part)
-    length+= my_sprintf(buff+length,(buff+length, ".%06d", (int)tm->second_part));
+  char buff[MAX_DATE_STRING_REP_LENGTH];
+  uint length= my_time_to_str(tm, buff, decimals);
   return net_store_data((uchar*) buff, length);
 }
 
@@ -1265,7 +1285,7 @@ bool Protocol_binary::store(Field *field)
 }
 
 
-bool Protocol_binary::store(MYSQL_TIME *tm)
+bool Protocol_binary::store(MYSQL_TIME *tm, int decimals)
 {
   char buff[12],*pos;
   uint length;
@@ -1278,6 +1298,10 @@ bool Protocol_binary::store(MYSQL_TIME *tm)
   pos[4]= (uchar) tm->hour;
   pos[5]= (uchar) tm->minute;
   pos[6]= (uchar) tm->second;
+  DBUG_ASSERT(decimals == AUTO_SEC_PART_DIGITS ||
+              (decimals >= 0 && decimals <= TIME_SECOND_PART_DIGITS));
+  if (decimals != AUTO_SEC_PART_DIGITS)
+    tm->second_part= sec_part_truncate(tm->second_part, decimals);
   int4store(pos+7, tm->second_part);
   if (tm->second_part)
     length=11;
@@ -1295,11 +1319,11 @@ bool Protocol_binary::store_date(MYSQL_TIME *tm)
 {
   tm->hour= tm->minute= tm->second=0;
   tm->second_part= 0;
-  return Protocol_binary::store(tm);
+  return Protocol_binary::store(tm, 0);
 }
 
 
-bool Protocol_binary::store_time(MYSQL_TIME *tm)
+bool Protocol_binary::store_time(MYSQL_TIME *tm, int decimals)
 {
   char buff[13], *pos;
   uint length;
@@ -1308,7 +1332,6 @@ bool Protocol_binary::store_time(MYSQL_TIME *tm)
   pos[0]= tm->neg ? 1 : 0;
   if (tm->hour >= 24)
   {
-    /* Fix if we come from Item::send */
     uint days= tm->hour/24;
     tm->hour-= days*24;
     tm->day+= days;
@@ -1317,6 +1340,10 @@ bool Protocol_binary::store_time(MYSQL_TIME *tm)
   pos[5]= (uchar) tm->hour;
   pos[6]= (uchar) tm->minute;
   pos[7]= (uchar) tm->second;
+  DBUG_ASSERT(decimals == AUTO_SEC_PART_DIGITS ||
+              (decimals >= 0 && decimals <= TIME_SECOND_PART_DIGITS));
+  if (decimals != AUTO_SEC_PART_DIGITS)
+    tm->second_part= sec_part_truncate(tm->second_part, decimals);
   int4store(pos+8, tm->second_part);
   if (tm->second_part)
     length=12;
diff --git a/sql/protocol.h b/sql/protocol.h
index 1560f7272b5..bd7462a1280 100644
--- a/sql/protocol.h
+++ b/sql/protocol.h
@@ -94,9 +94,9 @@ public:
   		     CHARSET_INFO *fromcs, CHARSET_INFO *tocs)=0;
   virtual bool store(float from, uint32 decimals, String *buffer)=0;
   virtual bool store(double from, uint32 decimals, String *buffer)=0;
-  virtual bool store(MYSQL_TIME *time)=0;
+  virtual bool store(MYSQL_TIME *time, int decimals)=0;
   virtual bool store_date(MYSQL_TIME *time)=0;
-  virtual bool store_time(MYSQL_TIME *time)=0;
+  virtual bool store_time(MYSQL_TIME *time, int decimals)=0;
   virtual bool store(Field *field)=0;
 #ifdef EMBEDDED_LIBRARY
   int begin_dataset();
@@ -133,9 +133,9 @@ public:
   virtual bool store(const char *from, size_t length, CHARSET_INFO *cs);
   virtual bool store(const char *from, size_t length,
   		     CHARSET_INFO *fromcs, CHARSET_INFO *tocs);
-  virtual bool store(MYSQL_TIME *time);
+  virtual bool store(MYSQL_TIME *time, int decimals);
   virtual bool store_date(MYSQL_TIME *time);
-  virtual bool store_time(MYSQL_TIME *time);
+  virtual bool store_time(MYSQL_TIME *time, int decimals);
   virtual bool store(float nr, uint32 decimals, String *buffer);
   virtual bool store(double from, uint32 decimals, String *buffer);
   virtual bool store(Field *field);
@@ -168,9 +168,9 @@ public:
   virtual bool store(const char *from, size_t length, CHARSET_INFO *cs);
   virtual bool store(const char *from, size_t length,
   		     CHARSET_INFO *fromcs, CHARSET_INFO *tocs);
-  virtual bool store(MYSQL_TIME *time);
+  virtual bool store(MYSQL_TIME *time, int decimals);
   virtual bool store_date(MYSQL_TIME *time);
-  virtual bool store_time(MYSQL_TIME *time);
+  virtual bool store_time(MYSQL_TIME *time, int decimals);
   virtual bool store(float nr, uint32 decimals, String *buffer);
   virtual bool store(double from, uint32 decimals, String *buffer);
   virtual bool store(Field *field);
@@ -180,6 +180,7 @@ public:
 void send_warning(THD *thd, uint sql_errno, const char *err=0);
 bool net_send_error(THD *thd, uint sql_errno=0, const char *err=0);
 void net_end_statement(THD *thd);
+void net_send_progress_packet(THD *thd);
 uchar *net_store_data(uchar *to,const uchar *from, size_t length);
 uchar *net_store_data(uchar *to,int32 from);
 uchar *net_store_data(uchar *to,longlong from);
diff --git a/sql/records.cc b/sql/records.cc
index 208405b0657..10817dd8e51 100644
--- a/sql/records.cc
+++ b/sql/records.cc
@@ -61,7 +61,6 @@ void init_read_record_idx(READ_RECORD *info, THD *thd, TABLE *table,
   bzero((char*) info,sizeof(*info));
   info->thd= thd;
   info->table= table;
-  info->file=  table->file;
   info->record= table->record[0];
   info->print_error= print_error;
   info->unlock_row= rr_unlock_row;
@@ -171,7 +170,6 @@ bool init_read_record(READ_RECORD *info,THD *thd, TABLE *table,
   bzero((char*) info,sizeof(*info));
   info->thd=thd;
   info->table=table;
-  info->file= table->file;
   info->forms= &info->table;		/* Only one table */
   
   if (table->s->tmp_table == NON_TRANSACTIONAL_TMP_TABLE &&
@@ -292,9 +290,10 @@ void end_read_record(READ_RECORD *info)
   if (info->table)
   {
     filesort_free_buffers(info->table,0);
-    (void) info->file->extra(HA_EXTRA_NO_CACHE);
+    if (info->table->created)
+      (void) info->table->file->extra(HA_EXTRA_NO_CACHE);
     if (info->read_record != rr_quick) // otherwise quick_range does it
-      (void) info->file->ha_index_or_rnd_end();
+      (void) info->table->file->ha_index_or_rnd_end();
     info->table=0;
   }
 }
@@ -353,7 +352,7 @@ static int rr_quick(READ_RECORD *info)
 
 static int rr_index_first(READ_RECORD *info)
 {
-  int tmp= info->file->ha_index_first(info->record);
+  int tmp= info->table->file->ha_index_first(info->record);
   info->read_record= rr_index;
   if (tmp)
     tmp= rr_handle_error(info, tmp);
@@ -379,7 +378,7 @@ static int rr_index_first(READ_RECORD *info)
 
 static int rr_index(READ_RECORD *info)
 {
-  int tmp= info->file->ha_index_next(info->record);
+  int tmp= info->table->file->ha_index_next(info->record);
   if (tmp)
     tmp= rr_handle_error(info, tmp);
   return tmp;
@@ -389,7 +388,7 @@ static int rr_index(READ_RECORD *info)
 int rr_sequential(READ_RECORD *info)
 {
   int tmp;
-  while ((tmp= info->file->ha_rnd_next(info->record)))
+  while ((tmp= info->table->file->ha_rnd_next(info->record)))
   {
     /*
       rnd_next can return RECORD_DELETED for MyISAM when one thread is
@@ -414,7 +413,7 @@ static int rr_from_tempfile(READ_RECORD *info)
   {
     if (my_b_read(info->io_cache,info->ref_pos,info->ref_length))
       return -1;					/* End of file */
-    if (!(tmp= info->file->ha_rnd_pos(info->record,info->ref_pos)))
+    if (!(tmp= info->table->file->ha_rnd_pos(info->record,info->ref_pos)))
       break;
     /* The following is extremely unlikely to happen */
     if (tmp == HA_ERR_RECORD_DELETED ||
@@ -448,7 +447,8 @@ static int rr_unpack_from_tempfile(READ_RECORD *info)
   if (my_b_read(info->io_cache, info->rec_buf, info->ref_length))
     return -1;
   TABLE *table= info->table;
-  (*table->sort.unpack)(table->sort.addon_field, info->rec_buf);
+  (*table->sort.unpack)(table->sort.addon_field, info->rec_buf,
+                        info->rec_buf + info->ref_length);
 
   return 0;
 }
@@ -465,7 +465,7 @@ static int rr_from_pointers(READ_RECORD *info)
     cache_pos= info->cache_pos;
     info->cache_pos+= info->ref_length;
 
-    if (!(tmp= info->file->ha_rnd_pos(info->record,cache_pos)))
+    if (!(tmp= info->table->file->ha_rnd_pos(info->record,cache_pos)))
       break;
 
     /* The following is extremely unlikely to happen */
@@ -499,7 +499,8 @@ static int rr_unpack_from_buffer(READ_RECORD *info)
   if (info->cache_pos == info->cache_end)
     return -1;                      /* End of buffer */
   TABLE *table= info->table;
-  (*table->sort.unpack)(table->sort.addon_field, info->cache_pos);
+  (*table->sort.unpack)(table->sort.addon_field, info->cache_pos,
+                        info->cache_end);
   info->cache_pos+= info->ref_length;
 
   return 0;
@@ -598,7 +599,7 @@ static int rr_from_cache(READ_RECORD *info)
       record=uint3korr(position);
       position+=3;
       record_pos=info->cache+record*info->reclength;
-      if ((error=(int16) info->file->ha_rnd_pos(record_pos,info->ref_pos)))
+      if ((error=(int16) info->table->file->ha_rnd_pos(record_pos,info->ref_pos)))
       {
 	record_pos[info->error_offset]=1;
 	shortstore(record_pos,error);
diff --git a/sql/repl_failsafe.cc b/sql/repl_failsafe.cc
index 374ef77c297..fee8a538fa9 100644
--- a/sql/repl_failsafe.cc
+++ b/sql/repl_failsafe.cc
@@ -246,7 +246,8 @@ static int find_target_pos(LEX_MASTER_INFO *mi, IO_CACHE *log, char *errmsg)
   for (;;)
   {
     Log_event* ev;
-    if (!(ev = Log_event::read_log_event(log, (pthread_mutex_t*) 0, 0)))
+    if (!(ev = Log_event::read_log_event(log, (pthread_mutex_t*) 0, 0,
+                                         opt_slave_sql_verify_checksum)))
     {
       if (log->error > 0)
 	strmov(errmsg, "Binary log truncated in the middle of event");
@@ -420,7 +421,8 @@ static Slave_log_event* find_slave_event(IO_CACHE* log,
 
   for (i = 0; i < 2; i++)
   {
-    if (!(ev = Log_event::read_log_event(log, (pthread_mutex_t*)0, 0)))
+    if (!(ev = Log_event::read_log_event(log, (pthread_mutex_t*)0, 0,
+                                         opt_slave_sql_verify_checksum)))
     {
       my_snprintf(errmsg, SLAVE_ERRMSG_SIZE,
 		  "Error reading event in log '%s'",
diff --git a/sql/rpl_mi.cc b/sql/rpl_mi.cc
index 7011f59adf0..3239c64d6c8 100644
--- a/sql/rpl_mi.cc
+++ b/sql/rpl_mi.cc
@@ -26,7 +26,8 @@
 
 Master_info::Master_info()
   :Slave_reporting_capability("I/O"),
-   ssl(0), ssl_verify_server_cert(0), fd(-1), io_thd(0), inited(0),
+   ssl(0), ssl_verify_server_cert(0), fd(-1), io_thd(0),
+   checksum_alg_before_fd(BINLOG_CHECKSUM_ALG_UNDEF), inited(0),
    abort_slave(0),slave_running(0),
    slave_run_id(0)
 {
diff --git a/sql/rpl_mi.h b/sql/rpl_mi.h
index 8bb1757d917..05d3fc412d9 100644
--- a/sql/rpl_mi.h
+++ b/sql/rpl_mi.h
@@ -85,6 +85,12 @@ class Master_info : public Slave_reporting_capability
   Relay_log_info rli;
   uint port;
   uint connect_retry;
+  /*
+    to hold checksum alg in use until IO thread has received FD.
+    Initialized to novalue, then set to the queried from master
+    @@global.binlog_checksum and deactivated once FD has been received.
+  */
+  uint8 checksum_alg_before_fd;
 #ifndef DBUG_OFF
   int events_till_disconnect;
 #endif
diff --git a/sql/rpl_record.cc b/sql/rpl_record.cc
index 5a33b62538b..ea8324a1fc7 100644
--- a/sql/rpl_record.cc
+++ b/sql/rpl_record.cc
@@ -106,7 +106,7 @@ pack_row(TABLE *table, MY_BITMAP const* cols,
         const uchar *old_pack_ptr= pack_ptr;
 #endif
         pack_ptr= field->pack(pack_ptr, field->ptr + offset,
-                              field->max_data_length(), TRUE);
+                              field->max_data_length());
         DBUG_PRINT("debug", ("field: %s; pack_ptr: 0x%lx;"
                              " pack_ptr':0x%lx; bytes: %d",
                              field->field_name, (ulong) old_pack_ptr,
@@ -176,13 +176,15 @@ pack_row(TABLE *table, MY_BITMAP const* cols,
    @retval ER_NO_DEFAULT_FOR_FIELD
    Returned if one of the fields existing on the slave but not on the
    master does not have a default value (and isn't nullable)
-
+   @retval ER_SLAVE_CORRUPT_EVENT
+   Found error when trying to unpack fields.
  */
 #if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION)
 int
 unpack_row(Relay_log_info const *rli,
            TABLE *table, uint const colcnt,
-           uchar const *const row_data, MY_BITMAP const *cols,
+           uchar const *const row_data, uchar const *const row_buffer_end,
+           MY_BITMAP const *cols,
            uchar const **const row_end, ulong *const master_reclength)
 {
   DBUG_ENTER("unpack_row");
@@ -224,9 +226,6 @@ unpack_row(Relay_log_info const *rli,
 
       DBUG_ASSERT(null_mask & 0xFF); // One of the 8 LSB should be set
 
-      /* Field...::unpack() cannot return 0 */
-      DBUG_ASSERT(pack_ptr != NULL);
-
       if (null_bits & null_mask)
       {
         if (f->maybe_null())
@@ -272,14 +271,21 @@ unpack_row(Relay_log_info const *rli,
 #ifndef DBUG_OFF
         uchar const *const old_pack_ptr= pack_ptr;
 #endif
-        pack_ptr= f->unpack(f->ptr, pack_ptr, metadata, TRUE);
+        pack_ptr= f->unpack(f->ptr, pack_ptr, row_buffer_end, metadata);
 	DBUG_PRINT("debug", ("field: %s; metadata: 0x%x;"
                              " pack_ptr: 0x%lx; pack_ptr': 0x%lx; bytes: %d",
                              f->field_name, metadata,
                              (ulong) old_pack_ptr, (ulong) pack_ptr,
                              (int) (pack_ptr - old_pack_ptr)));
+        if (!pack_ptr)
+        {
+          rli->report(ERROR_LEVEL, ER_SLAVE_CORRUPT_EVENT,
+                      "Could not read field `%s` of table `%s`.`%s`",
+                      f->field_name, table->s->db.str,
+                      table->s->table_name.str);
+          DBUG_RETURN(ER_SLAVE_CORRUPT_EVENT);
+        }
       }
-
       null_mask <<= 1;
     }
     i++;
diff --git a/sql/rpl_record.h b/sql/rpl_record.h
index 6005d57daf3..eefcdebc114 100644
--- a/sql/rpl_record.h
+++ b/sql/rpl_record.h
@@ -29,7 +29,8 @@ size_t pack_row(TABLE* table, MY_BITMAP const* cols,
 #if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION)
 int unpack_row(Relay_log_info const *rli,
                TABLE *table, uint const colcnt,
-               uchar const *const row_data, MY_BITMAP const *cols,
+               uchar const *const row_data, uchar const *row_buffer_end,
+               MY_BITMAP const *cols,
                uchar const **const row_end, ulong *const master_reclength);
 
 // Fill table's record[0] with default values.
diff --git a/sql/rpl_record_old.cc b/sql/rpl_record_old.cc
index f861ffb10f7..6709c4f6e1a 100644
--- a/sql/rpl_record_old.cc
+++ b/sql/rpl_record_old.cc
@@ -82,12 +82,15 @@ pack_row_old(TABLE *table, MY_BITMAP const* cols,
       ER_NO_DEFAULT_FOR_FIELD
         Returned if one of the fields existing on the slave but not on
         the master does not have a default value (and isn't nullable)
+      ER_SLAVE_CORRUPT_EVENT
+        Wrong data for field found.
  */
 #if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION)
 int
 unpack_row_old(Relay_log_info *rli,
                TABLE *table, uint const colcnt, uchar *record,
-               uchar const *row, MY_BITMAP const *cols,
+               uchar const *row, const uchar *row_buffer_end,
+               MY_BITMAP const *cols,
                uchar const **row_end, ulong *master_reclength,
                MY_BITMAP* const rw_set, Log_event_type const event_type)
 {
@@ -133,10 +136,16 @@ unpack_row_old(Relay_log_info *rli,
     if (bitmap_is_set(cols, field_ptr -  begin_ptr))
     {
       f->move_field_offset(offset);
-      ptr= f->unpack(f->ptr, ptr);
+      ptr= f->unpack(f->ptr, ptr, row_buffer_end, 0);
       f->move_field_offset(-offset);
-      /* Field...::unpack() cannot return 0 */
-      DBUG_ASSERT(ptr != NULL);
+      if (!ptr)
+      {
+        rli->report(ERROR_LEVEL, ER_SLAVE_CORRUPT_EVENT,
+                    "Could not read field `%s` of table `%s`.`%s`",
+                    f->field_name, table->s->db.str,
+                    table->s->table_name.str);
+        return(ER_SLAVE_CORRUPT_EVENT);
+      }
     }
     else
       bitmap_clear_bit(rw_set, field_ptr - begin_ptr);
diff --git a/sql/rpl_record_old.h b/sql/rpl_record_old.h
index 300c9d9a1a6..c9c2f5739cb 100644
--- a/sql/rpl_record_old.h
+++ b/sql/rpl_record_old.h
@@ -23,7 +23,8 @@ size_t pack_row_old(TABLE *table, MY_BITMAP const* cols,
 #ifdef HAVE_REPLICATION
 int unpack_row_old(Relay_log_info *rli,
                    TABLE *table, uint const colcnt, uchar *record,
-                   uchar const *row, MY_BITMAP const *cols,
+                   uchar const *row, uchar const *row_buffer_end,
+                   MY_BITMAP const *cols,
                    uchar const **row_end, ulong *master_reclength,
                    MY_BITMAP* const rw_set,
                    Log_event_type const event_type);
diff --git a/sql/rpl_rli.cc b/sql/rpl_rli.cc
index 3a9990bb20f..962be1c5153 100644
--- a/sql/rpl_rli.cc
+++ b/sql/rpl_rli.cc
@@ -40,7 +40,8 @@ Relay_log_info::Relay_log_info()
    inited(0), abort_slave(0), slave_running(0), until_condition(UNTIL_NONE),
    until_log_pos(0), retried_trans(0),
    tables_to_lock(0), tables_to_lock_count(0),
-   last_event_start_time(0), m_flags(0)
+   last_event_start_time(0), m_flags(0),
+   m_annotate_event(0)
 {
   DBUG_ENTER("Relay_log_info::Relay_log_info");
 
@@ -74,6 +75,7 @@ Relay_log_info::~Relay_log_info()
   pthread_cond_destroy(&stop_cond);
   pthread_cond_destroy(&log_space_cond);
   relay_log.cleanup();
+  free_annotate_event();
   DBUG_VOID_RETURN;
 }
 
@@ -176,6 +178,9 @@ a file name for --relay-log-index option", opt_relaylog_index_name);
                         "this problem.", ln);
       name_warning_sent= 1;
     }
+
+    rli->relay_log.is_relay_log= TRUE;
+
     /*
       note, that if open() fails, we'll still have index file open
       but a destructor will take care of that
@@ -189,7 +194,6 @@ a file name for --relay-log-index option", opt_relaylog_index_name);
       sql_print_error("Failed in open_log() called from init_relay_log_info()");
       DBUG_RETURN(1);
     }
-    rli->relay_log.is_relay_log= TRUE;
   }
 
   /* if file does not exist */
@@ -535,8 +539,9 @@ int init_relay_log_pos(Relay_log_info* rli,const char* log,
         Because of we have rli->data_lock and log_lock, we can safely read an
         event
       */
-      if (!(ev=Log_event::read_log_event(rli->cur_log,0,
-                                         rli->relay_log.description_event_for_exec)))
+      if (!(ev= Log_event::read_log_event(rli->cur_log, 0,
+                                          rli->relay_log.description_event_for_exec,
+                                          opt_slave_sql_verify_checksum)))
       {
         DBUG_PRINT("info",("could not read event, rli->cur_log->error=%d",
                            rli->cur_log->error));
@@ -1172,11 +1177,7 @@ void Relay_log_info::stmt_done(my_off_t event_master_log_pos,
       is that value may take some time to display in
       Seconds_Behind_Master - not critical).
     */
-#ifndef DBUG_OFF
-    if (!(event_creation_time == 0 && debug_not_change_ts_if_art_event > 0))
-#else
-      if (event_creation_time != 0)
-#endif
+    if (!(event_creation_time == 0 IF_DBUG(&& debug_not_change_ts_if_art_event > 0)))
         last_master_timestamp= event_creation_time;
   }
 }
diff --git a/sql/rpl_rli.h b/sql/rpl_rli.h
index 0fd07901fdf..7fcc8f3b490 100644
--- a/sql/rpl_rli.h
+++ b/sql/rpl_rli.h
@@ -431,8 +431,46 @@ public:
       (m_flags & (1UL << IN_STMT));
   }
 
+  /**
+    Save pointer to Annotate_rows event and switch on the
+    binlog_annotate_row_events for this sql thread.
+    To be called when sql thread recieves an Annotate_rows event.
+  */
+  inline void set_annotate_event(Annotate_rows_log_event *event)
+  {
+    free_annotate_event();
+    m_annotate_event= event;
+    sql_thd->variables.binlog_annotate_row_events= 1;
+  }
+
+  /**
+    Returns pointer to the saved Annotate_rows event or NULL if there is
+    no saved event.
+  */
+  inline Annotate_rows_log_event* get_annotate_event()
+  {
+    return m_annotate_event;
+  }
+
+  /**
+    Delete saved Annotate_rows event (if any) and switch off the
+    binlog_annotate_row_events for this sql thread.
+    To be called when sql thread has applied the last (i.e. with
+    STMT_END_F flag) rbr event.
+  */
+  inline void free_annotate_event()
+  {
+    if (m_annotate_event)
+    {
+      sql_thd->variables.binlog_annotate_row_events= 0;
+      delete m_annotate_event;
+      m_annotate_event= 0;
+    }
+  }
+
 private:
   uint32 m_flags;
+  Annotate_rows_log_event *m_annotate_event;
 };
 
 
diff --git a/sql/rpl_utility.cc b/sql/rpl_utility.cc
index d304d5fbd6a..bba38798ca8 100644
--- a/sql/rpl_utility.cc
+++ b/sql/rpl_utility.cc
@@ -16,6 +16,8 @@
 */
 
 #include "rpl_utility.h"
+
+#ifndef MYSQL_CLIENT
 #include "rpl_rli.h"
 
 /*********************************************************************
@@ -135,7 +137,7 @@ uint32 table_def::calc_field_size(uint col, uchar *master_data) const
       always read the length in little-endian order.
     */
     Field_blob fb(m_field_metadata[col]);
-    length= fb.get_packed_size(master_data, TRUE);
+    length= fb.get_packed_size(master_data);
 #else
     /*
       Compute the length of the data. We cannot use get_length() here
@@ -226,3 +228,62 @@ table_def::compatible_with(Relay_log_info const *rli_arg, TABLE *table)
 
   return error;
 }
+
+#endif /* MYSQL_CLIENT */
+
+
+/**
+   @param   even_buf    point to the buffer containing serialized event
+   @param   event_len   length of the event accounting possible checksum alg
+
+   @return  TRUE        if test fails
+            FALSE       as success
+*/
+bool event_checksum_test(uchar *event_buf, ulong event_len, uint8 alg)
+{
+  bool res= FALSE;
+  uint16 flags= 0; // to store in FD's buffer flags orig value
+
+  if (alg != BINLOG_CHECKSUM_ALG_OFF && alg != BINLOG_CHECKSUM_ALG_UNDEF)
+  {
+    ha_checksum incoming;
+    ha_checksum computed;
+
+    if (event_buf[EVENT_TYPE_OFFSET] == FORMAT_DESCRIPTION_EVENT)
+    {
+#ifndef DBUG_OFF
+      int8 fd_alg= event_buf[event_len - BINLOG_CHECKSUM_LEN - 
+                             BINLOG_CHECKSUM_ALG_DESC_LEN];
+#endif
+      /*
+        FD event is checksummed and therefore verified w/o the binlog-in-use flag
+      */
+      flags= uint2korr(event_buf + FLAGS_OFFSET);
+      if (flags & LOG_EVENT_BINLOG_IN_USE_F)
+        event_buf[FLAGS_OFFSET] &= ~LOG_EVENT_BINLOG_IN_USE_F;
+      /* 
+         The only algorithm currently is CRC32. Zero indicates 
+         the binlog file is checksum-free *except* the FD-event.
+      */
+      DBUG_ASSERT(fd_alg == BINLOG_CHECKSUM_ALG_CRC32 || fd_alg == 0);
+      DBUG_ASSERT(alg == BINLOG_CHECKSUM_ALG_CRC32);
+      /*
+        Complile time guard to watch over  the max number of alg
+      */
+      compile_time_assert(BINLOG_CHECKSUM_ALG_ENUM_END <= 0x80);
+    }
+    incoming= uint4korr(event_buf + event_len - BINLOG_CHECKSUM_LEN);
+    computed= my_checksum(0L, NULL, 0);
+    /* checksum the event content but the checksum part itself */
+    computed= my_checksum(computed, (const uchar*) event_buf, 
+                          event_len - BINLOG_CHECKSUM_LEN);
+    if (flags != 0)
+    {
+      /* restoring the orig value of flags of FD */
+      DBUG_ASSERT(event_buf[EVENT_TYPE_OFFSET] == FORMAT_DESCRIPTION_EVENT);
+      event_buf[FLAGS_OFFSET]= (uchar) flags;
+    }
+    res= !(computed == incoming);
+  }
+  return DBUG_EVALUATE_IF("simulate_checksum_test_failure", TRUE, res);
+}
diff --git a/sql/scheduler.cc b/sql/scheduler.cc
index d301b205b58..2f8aa2bef11 100644
--- a/sql/scheduler.cc
+++ b/sql/scheduler.cc
@@ -376,9 +376,7 @@ void libevent_kill_thd_callback(int Fd, short, void*)
   {
     THD *thd= (THD*)list->data;
     list= list_rest(list);
-    if (thd->killed == THD::KILL_CONNECTION ||
-        thd->killed == THD::KILL_SYSTEM_THREAD ||
-        thd->killed == THD::KILL_SERVER)
+    if ((int) thd->killed >= (int) KILL_CONNECTION)
     {
       /*
         Delete from libevent and add to the processing queue.
@@ -512,7 +510,7 @@ static void libevent_connection_close(THD *thd)
   DBUG_ENTER("libevent_connection_close");
   DBUG_PRINT("enter", ("thd: %p", thd));
 
-  thd->killed= THD::KILL_CONNECTION;          // Avoid error messages
+  thd->killed= KILL_CONNECTION;                // Avoid error messages
 
   if (thd->net.vio->type != VIO_CLOSED) // not already closed
   {
@@ -535,8 +533,7 @@ static bool libevent_should_close_connection(THD* thd)
 {
   return (thd->net.error ||
           thd->net.vio == 0 ||
-          thd->killed == THD::KILL_CONNECTION ||
-          thd->killed == THD::KILL_SERVER);
+          (int) thd->killed >= (int) KILL_CONNECTION);
 }
 
 
diff --git a/sql/set_var.cc b/sql/set_var.cc
index b5b19f52c9e..e6c1bf94135 100644
--- a/sql/set_var.cc
+++ b/sql/set_var.cc
@@ -130,9 +130,11 @@ static void fix_net_read_timeout(THD *thd, enum_var_type type);
 static void fix_net_write_timeout(THD *thd, enum_var_type type);
 static void fix_net_retry_count(THD *thd, enum_var_type type);
 static void fix_max_join_size(THD *thd, enum_var_type type);
-static void fix_query_cache_size(THD *thd, enum_var_type type);
 #ifdef HAVE_QUERY_CACHE
+static void fix_query_cache_size(THD *thd, enum_var_type type);
 static void fix_query_cache_min_res_unit(THD *thd, enum_var_type type);
+static int check_query_cache_type(THD *thd, set_var *var);
+static void fix_query_cache_type(THD *thd, enum_var_type type);
 #endif
 static void fix_myisam_max_sort_file_size(THD *thd, enum_var_type type);
 static void fix_max_binlog_size(THD *thd, enum_var_type type);
@@ -156,6 +158,7 @@ static bool sys_update_slow_log_path(THD *thd, set_var * var);
 static void sys_default_slow_log_path(THD *thd, enum_var_type type);
 static void fix_sys_log_slow_filter(THD *thd, enum_var_type);
 static uchar *get_myisam_mmap_size(THD *thd);
+static uchar *in_transaction(THD *thd);
 static int check_max_allowed_packet(THD *thd,  set_var *var);
 static int check_net_buffer_length(THD *thd,  set_var *var);
 
@@ -187,6 +190,9 @@ static sys_var_const            sys_back_log(&vars, "back_log",
                                              OPT_GLOBAL, SHOW_LONG,
                                              (uchar*) &back_log);
 static sys_var_const_os_str       sys_basedir(&vars, "basedir", mysql_home);
+static sys_var_thd_bool
+sys_binlog_annotate_row_events(&vars, "binlog_annotate_row_events",
+                            &SV::binlog_annotate_row_events);
 static sys_var_long_ptr	sys_binlog_cache_size(&vars, "binlog_cache_size",
 					      &binlog_cache_size);
 static sys_var_thd_binlog_format sys_binlog_format(&vars, "binlog_format",
@@ -261,6 +267,9 @@ static sys_var_thd_ulong sys_deadlock_timeout_long(&vars,
                                  &SV::wt_timeout_long);
 #ifndef DBUG_OFF
 static sys_var_thd_dbug        sys_dbug(&vars, "debug");
+static sys_var_long_ptr        sys_var_debug_binlog_fsync_sleep(&vars,
+                                        "debug_binlog_fsync_sleep",
+                                        &opt_binlog_dbug_fsync_sleep);
 #endif
 static sys_var_enum		sys_delay_key_write(&vars, "delay_key_write",
 					    &delay_key_write_options,
@@ -325,6 +334,11 @@ static sys_var_thd_ulong	sys_interactive_timeout(&vars, "interactive_timeout",
 						&SV::net_interactive_timeout);
 static sys_var_thd_ulong	sys_join_buffer_size(&vars, "join_buffer_size",
 					     &SV::join_buff_size);
+static sys_var_thd_ulonglong    sys_join_buffer_space_limit(&vars,
+                                                "join_buffer_space_limit",
+					        &SV::join_buff_space_limit);
+static sys_var_thd_ulong	sys_join_cache_level(&vars, "join_cache_level",
+					             &SV::join_cache_level);
 static sys_var_key_buffer_size	sys_key_buffer_size(&vars, "key_buffer_size");
 static sys_var_key_cache_long  sys_key_cache_block_size(&vars,
                                    "key_cache_block_size",
@@ -461,8 +475,6 @@ static sys_var_long_ptr	sys_max_write_lock_count(&vars, "max_write_lock_count",
 						 &max_write_lock_count);
 static sys_var_thd_ulong       sys_min_examined_row_limit(&vars, "min_examined_row_limit",
                                                           &SV::min_examined_row_limit);
-static sys_var_thd_ulong       sys_multi_range_count(&vars, "multi_range_count",
-                                              &SV::multi_range_count);
 static sys_var_long_ptr	sys_myisam_data_pointer_size(&vars, "myisam_data_pointer_size",
                                                     &myisam_data_pointer_size);
 static sys_var_thd_ulonglong	sys_myisam_max_sort_file_size(&vars, "myisam_max_sort_file_size", &SV::myisam_max_sort_file_size, fix_myisam_max_sort_file_size, 1);
@@ -520,6 +532,11 @@ static sys_var_thd_ulong        sys_optimizer_search_depth(&vars, "optimizer_sea
                                                    &SV::optimizer_search_depth);
 static sys_var_thd_optimizer_switch   sys_optimizer_switch(&vars, "optimizer_switch",
                                      &SV::optimizer_switch);
+
+static sys_var_thd_ulong        sys_progress_report_time(&vars,
+                                                         "progress_report_time",
+                                                         &SV::progress_report_time);
+
 static sys_var_const            sys_pid_file(&vars, "pid_file",
                                              OPT_GLOBAL, SHOW_CHAR,
                                              (uchar*) pidfile_name);
@@ -543,16 +560,18 @@ static sys_var_bool_ptr	        sys_userstat(&vars, "userstat",
 
 static sys_var_thd_ulong	sys_read_rnd_buff_size(&vars, "read_rnd_buffer_size",
 					       &SV::read_rnd_buff_size);
+static sys_var_thd_ulong	sys_mrr_buff_size(&vars, "mrr_buffer_size",
+					          &SV::mrr_buff_size);
 static sys_var_thd_ulong	sys_div_precincrement(&vars, "div_precision_increment",
                                               &SV::div_precincrement);
 static sys_var_long_ptr	sys_rpl_recovery_rank(&vars, "rpl_recovery_rank",
 					      &rpl_recovery_rank);
-static sys_var_long_ptr	sys_query_cache_size(&vars, "query_cache_size",
-					     &query_cache_size,
-					     fix_query_cache_size);
 
 static sys_var_thd_ulong	sys_range_alloc_block_size(&vars, "range_alloc_block_size",
 						   &SV::range_alloc_block_size);
+static sys_var_thd_ulong	sys_rowid_merge_buff_size(&vars, "rowid_merge_buff_size",
+					   &SV::rowid_merge_buff_size);
+
 static sys_var_thd_ulong	sys_query_alloc_block_size(&vars, "query_alloc_block_size",
 						   &SV::query_alloc_block_size,
 						   0, fix_thd_mem_root);
@@ -614,17 +633,25 @@ sys_var_enum_const        sys_thread_handling(&vars, "thread_handling",
                                               &thread_handling_typelib);
 
 #ifdef HAVE_QUERY_CACHE
+static sys_var_long_ptr	sys_query_cache_size(&vars, "query_cache_size",
+                                             &query_cache_size,
+                                             fix_query_cache_size);
 static sys_var_long_ptr	sys_query_cache_limit(&vars, "query_cache_limit",
-					      &query_cache.query_cache_limit);
-static sys_var_long_ptr        sys_query_cache_min_res_unit(&vars, "query_cache_min_res_unit",
-						     &query_cache_min_res_unit,
-						     fix_query_cache_min_res_unit);
+                                              &query_cache.query_cache_limit);
+static sys_var_long_ptr
+  sys_query_cache_min_res_unit(&vars, "query_cache_min_res_unit",
+                               &query_cache_min_res_unit,
+                               fix_query_cache_min_res_unit);
 static sys_var_thd_enum	sys_query_cache_type(&vars, "query_cache_type",
 					     &SV::query_cache_type,
-					     &query_cache_type_typelib);
+					     &query_cache_type_typelib,
+                                             fix_query_cache_type,
+                                             check_query_cache_type);
 static sys_var_thd_bool
 sys_query_cache_wlock_invalidate(&vars, "query_cache_wlock_invalidate",
 				 &SV::query_cache_wlock_invalidate);
+static sys_var_bool_ptr       sys_query_cache_strip_comments(&vars, "query_cache_strip_comments",
+                                                       &opt_query_cache_strip_comments);
 #endif /* HAVE_QUERY_CACHE */
 static sys_var_bool_ptr	sys_secure_auth(&vars, "secure_auth", &opt_secure_auth);
 static sys_var_const_str_ptr sys_secure_file_priv(&vars, "secure_file_priv",
@@ -980,6 +1007,12 @@ static sys_var_enum_const     sys_plugin_maturity(&vars, "plugin_maturity",
                                                   &plugin_maturity,
                                                   &plugin_maturity_values);
 
+static sys_var_readonly       sys_in_transaction(&vars, "in_transaction",
+                                                 OPT_SESSION, SHOW_BOOL,
+                                                 in_transaction);
+
+
+
 bool sys_var::check(THD *thd, set_var *var)
 {
   var->save_result.ulonglong_value= var->value->val_int();
@@ -1206,10 +1239,9 @@ static void fix_net_retry_count(THD *thd __attribute__((unused)),
 {}
 #endif /* HAVE_REPLICATION */
 
-
+#ifdef HAVE_QUERY_CACHE
 static void fix_query_cache_size(THD *thd, enum_var_type type)
 {
-#ifdef HAVE_QUERY_CACHE
   ulong new_cache_size= query_cache.resize(query_cache_size);
 
   /*
@@ -1223,11 +1255,60 @@ static void fix_query_cache_size(THD *thd, enum_var_type type)
 			query_cache_size, new_cache_size);
   
   query_cache_size= new_cache_size;
-#endif
 }
 
 
-#ifdef HAVE_QUERY_CACHE
+/**
+  Trigger before query_cache_type variable is updated.
+  @param thd Thread handler
+  @param var Pointer to the new variable status
+
+  @return Status code
+   @retval TRUE  Failure
+   @retval FALSE Success
+*/
+
+static int check_query_cache_type(THD *thd, set_var *var)
+{
+  /*
+    Don't allow changes of the query_cache_type if the query cache
+    is disabled.
+  */
+  if (query_cache.is_disable_in_progress())
+  {
+    my_error(ER_QUERY_CACHE_IS_DISABLED, MYF(0));
+    return TRUE;
+  }
+  if (var->type != OPT_GLOBAL &&
+      global_system_variables.query_cache_type == 0 &&
+      var->value->val_int() != 0)
+  {
+    my_error(ER_QUERY_CACHE_IS_GLOBALY_DISABLED, MYF(0));
+    return TRUE;
+  }
+
+  return FALSE;
+}
+
+
+static void fix_query_cache_type(THD *thd, enum_var_type type)
+{
+  if (type == OPT_GLOBAL)
+  {
+    if (global_system_variables.query_cache_type != 0 &&
+        query_cache.is_disabled())
+    {
+      /* if disabling in progress variable will not be set */
+      DBUG_ASSERT(!query_cache.is_disable_in_progress());
+      /* Enable query cache because it was disabled */
+      fix_query_cache_size(thd, type);
+    }
+    else if (global_system_variables.query_cache_type == 0)
+      query_cache.disable_query_cache(thd);
+  }
+}
+
+
 static void fix_query_cache_min_res_unit(THD *thd, enum_var_type type)
 {
   query_cache_min_res_unit= 
@@ -2835,38 +2916,43 @@ int set_var_collation_client::update(THD *thd)
 
 bool sys_var_timestamp::check(THD *thd, set_var *var)
 {
-  longlong val;
-  var->save_result.ulonglong_value= var->value->val_int();
-  val= (longlong) var->save_result.ulonglong_value;
-  if (val != 0 &&          // this is how you set the default value
-      (val < TIMESTAMP_MIN_VALUE || val > TIMESTAMP_MAX_VALUE))
+  ulonglong sec;
+  ulong sec_part;
+  char buf[64], *errval= 0;
+  if (var->value->get_seconds(&sec, &sec_part))
+    errval= llstr(sec, buf);
+  else if (sec > TIMESTAMP_MAX_VALUE)
+    errval= ullstr(sec, buf);
+
+  if (errval)
   {
-    char buf[64];
-    my_error(ER_WRONG_VALUE_FOR_VAR, MYF(0), "timestamp", llstr(val, buf));
+    my_error(ER_WRONG_VALUE_FOR_VAR, MYF(0), "timestamp", errval);
     return TRUE;
   }
+  var->save_result.ulonglong_value= hrtime_from_time(sec)+sec_part;
   return FALSE;
 }
 
 
 bool sys_var_timestamp::update(THD *thd,  set_var *var)
 {
-  thd->set_time((time_t) var->save_result.ulonglong_value);
+  my_hrtime_t hrtime = { var->save_result.ulonglong_value };
+  thd->set_time(hrtime);
   return FALSE;
 }
 
 
 void sys_var_timestamp::set_default(THD *thd, enum_var_type type)
 {
-  thd->user_time=0;
+  thd->user_time.val= 0;
 }
 
 
 uchar *sys_var_timestamp::value_ptr(THD *thd, enum_var_type type,
 				   LEX_STRING *base)
 {
-  thd->sys_var_tmp.long_value= (long) thd->start_time;
-  return (uchar*) &thd->sys_var_tmp.long_value;
+  thd->sys_var_tmp.double_value= thd->start_time + thd->start_time_sec_part/1e6;
+  return (uchar*) &thd->sys_var_tmp.double_value;
 }
 
 
@@ -3001,7 +3087,19 @@ void sys_var_thd_time_zone::set_default(THD *thd, enum_var_type type)
 bool sys_var_max_user_conn::check(THD *thd, set_var *var)
 {
   if (var->type == OPT_GLOBAL)
+  {
+    if (! max_user_connections_checking)
+    {
+      /*
+        We can't change the value of max_user_connections from 0 as then
+        connect counting would be wrong.
+      */
+      my_error(ER_OPTION_PREVENTS_STATEMENT, MYF(0),
+               "--max-user-connections=0");
+      return TRUE;
+    }
     return sys_var_thd::check(thd, var);
+  }
   else
   {
     /*
@@ -3017,7 +3115,7 @@ bool sys_var_max_user_conn::update(THD *thd, set_var *var)
 {
   DBUG_ASSERT(var->type == OPT_GLOBAL);
   pthread_mutex_lock(&LOCK_global_system_variables);
-  max_user_connections= (uint)var->save_result.ulonglong_value;
+  max_user_connections= (int) var->save_result.ulonglong_value;
   pthread_mutex_unlock(&LOCK_global_system_variables);
   return 0;
 }
@@ -3119,7 +3217,7 @@ void sys_var_thd_lc_time_names::set_default(THD *thd, enum_var_type type)
 }
 
 /*
-  Handling of microseoncds given as seconds.part_seconds
+  Handling of microseconds given as seconds.part_seconds
 
   NOTES
     The argument to long query time is in seconds in decimal
@@ -3165,10 +3263,10 @@ void sys_var_microseconds::set_default(THD *thd, enum_var_type type)
 uchar *sys_var_microseconds::value_ptr(THD *thd, enum_var_type type,
                                           LEX_STRING *base)
 {
-  thd->tmp_double_value= (double) ((type == OPT_GLOBAL) ?
+  thd->sys_var_tmp.double_value= (double) ((type == OPT_GLOBAL) ?
                                    global_system_variables.*offset :
                                    thd->variables.*offset) / 1000000.0;
-  return (uchar*) &thd->tmp_double_value;
+  return (uchar*) &thd->sys_var_tmp.double_value;
 }
 
 
@@ -3199,35 +3297,26 @@ static bool set_option_log_bin_bit(THD *thd, set_var *var)
 
 static bool set_option_autocommit(THD *thd, set_var *var)
 {
-  /* The test is negative as the flag we use is NOT autocommit */
-
-  ulonglong org_options= thd->options;
+  ulonglong new_options= thd->options;
 
-  if (var->save_result.ulong_value != 0)
-    thd->options&= ~((sys_var_thd_bit*) var->var)->bit_flag;
+  /* The test is negative as the flag we use is NOT autocommit */
+  if (var->save_result.ulong_value)
+    new_options&= ~OPTION_NOT_AUTOCOMMIT;
   else
-    thd->options|= ((sys_var_thd_bit*) var->var)->bit_flag;
+    new_options|= OPTION_NOT_AUTOCOMMIT;
 
-  if ((org_options ^ thd->options) & OPTION_NOT_AUTOCOMMIT)
+  if ((new_options ^ thd->options) & OPTION_NOT_AUTOCOMMIT)
   {
-    if ((org_options & OPTION_NOT_AUTOCOMMIT))
+    if ((thd->options & OPTION_NOT_AUTOCOMMIT))
     {
-      /* We changed to auto_commit mode */
-      if (thd->transaction.xid_state.xa_state != XA_NOTR)
-      {
-        thd->options= org_options;
-        my_error(ER_XAER_RMFAIL, MYF(0),
-                 xa_state_names[thd->transaction.xid_state.xa_state]);
+      if (end_active_trans(thd))
         return 1;
-      }
-      thd->options&= ~(ulonglong) (OPTION_BEGIN | OPTION_KEEP_LOG);
-      thd->transaction.all.modified_non_trans_table= FALSE;
       thd->server_status|= SERVER_STATUS_AUTOCOMMIT;
-      if (ha_commit(thd))
-	return 1;
+      thd->options= new_options;
     }
     else
     {
+      thd->options= new_options;
       thd->transaction.all.modified_non_trans_table= FALSE;
       thd->server_status&= ~SERVER_STATUS_AUTOCOMMIT;
     }
@@ -3329,6 +3418,12 @@ static uchar *get_myisam_mmap_size(THD *thd)
   return (uchar *)&myisam_mmap_size;
 }
 
+static uchar *in_transaction(THD *thd)
+{
+  thd->sys_var_tmp.my_bool_value=
+    test(thd->server_status & SERVER_STATUS_IN_TRANS);
+  return (uchar*) &thd->sys_var_tmp.my_bool_value;
+}
 
 /****************************************************************************
   Main handling of variables:
@@ -3651,6 +3746,16 @@ bool not_all_support_one_shot(List<set_var_base> *var_list)
   Functions to handle SET mysql_internal_variable=const_expr
 *****************************************************************************/
 
+/**
+  Verify that the supplied value is correct.
+
+  @param thd Thread handler
+
+  @return status code
+    @retval -1 Failure
+    @retval 0 Success
+*/
+
 int set_var::check(THD *thd)
 {
   if (var->is_readonly())
@@ -4044,7 +4149,7 @@ bool
 sys_var_thd_optimizer_switch::
 symbolic_mode_representation(THD *thd, ulonglong val, LEX_STRING *rep)
 {
-  char buff[STRING_BUFFER_USUAL_SIZE*8];
+  char buff[STRING_BUFFER_USUAL_SIZE*18];
   String tmp(buff, sizeof(buff), &my_charset_latin1);
   int i;
   ulonglong bit;
diff --git a/sql/set_var.h b/sql/set_var.h
index 95885357b83..c591ab5b866 100644
--- a/sql/set_var.h
+++ b/sql/set_var.h
@@ -516,10 +516,15 @@ public:
   { chain_sys_var(chain); }
   bool check(THD *thd, set_var *var)
   {
-    int ret= 0;
-    if (check_func)
-      ret= (*check_func)(thd, var);
-    return ret ? ret : check_enum(thd, var, enum_names);
+    /*
+      check_enum fails if the character representation supplied was wrong
+      or that the integer value was wrong or missing.
+    */
+    if (check_enum(thd, var, enum_names))
+      return TRUE;
+    if ((check_func && (*check_func)(thd, var)))
+      return TRUE;
+    return FALSE;
   }
   bool update(THD *thd, set_var *var);
   void set_default(THD *thd, enum_var_type type);
@@ -675,8 +680,12 @@ public:
   void set_default(THD *thd, enum_var_type type);
   bool check_type(enum_var_type type)    { return type == OPT_GLOBAL; }
   bool check_default(enum_var_type type) { return 0; }
-  SHOW_TYPE show_type() { return SHOW_LONG; }
+  SHOW_TYPE show_type() { return SHOW_DOUBLE; }
   uchar *value_ptr(THD *thd, enum_var_type type, LEX_STRING *base);
+  virtual bool check_update_type(Item_result type)
+  {
+    return type != INT_RESULT && type != REAL_RESULT && type != DECIMAL_RESULT;
+  }
 };
 
 
diff --git a/sql/share/Makefile.am b/sql/share/Makefile.am
index 892a720900e..8b7792481e6 100644
--- a/sql/share/Makefile.am
+++ b/sql/share/Makefile.am
@@ -49,6 +49,7 @@ install-data-local:
 	$(INSTALL_DATA) $(srcdir)/charsets/*.xml $(DESTDIR)$(pkgdatadir)/charsets
 
 # FIXME maybe shouldn't remove, could be needed by other installation?
+# Note that this removes the directory that support-files are using!
 uninstall-local:
 	@RM@ -f -r $(DESTDIR)$(pkgdatadir)
 
diff --git a/sql/share/errmsg.txt b/sql/share/errmsg.txt
index d3d038e9b82..e8d9f8b8e01 100644
--- a/sql/share/errmsg.txt
+++ b/sql/share/errmsg.txt
@@ -5321,7 +5321,7 @@ ER_NO_DEFAULT_FOR_FIELD
 ER_DIVISION_BY_ZERO 22012 
         eng "Division by 0"
         ger "Division durch 0"
-ER_TRUNCATED_WRONG_VALUE_FOR_FIELD  
+ER_TRUNCATED_WRONG_VALUE_FOR_FIELD 22007
         eng "Incorrect %-.32s value: '%-.128s' for column '%.192s' at row %lu"
         ger "Falscher %-.32s-Wert: '%-.128s' f�r Feld '%.192s' in Zeile %lu"
 ER_ILLEGAL_VALUE_FOR_TYPE 22007 
@@ -5513,11 +5513,11 @@ ER_SP_NO_RECURSION
         eng "Recursive stored functions and triggers are not allowed."
         ger "Rekursive gespeicherte Routinen und Triggers sind nicht erlaubt"
 ER_TOO_BIG_SCALE 42000 S1009
-        eng "Too big scale %d specified for column '%-.192s'. Maximum is %lu."
-        ger "Zu gro�er Skalierungsfaktor %d f�r Feld '%-.192s' angegeben. Maximum ist %lu"
+        eng "Too big scale %u specified for '%-.192s'. Maximum is %lu."
+        ger "Zu gro�er Skalierungsfaktor %u f�r '%-.192s' angegeben. Maximum ist %lu"
 ER_TOO_BIG_PRECISION 42000 S1009
-        eng "Too big precision %d specified for column '%-.192s'. Maximum is %lu."
-        ger "Zu gro�e Genauigkeit %d f�r Feld '%-.192s' angegeben. Maximum ist %lu"
+        eng "Too big precision %u specified for '%-.192s'. Maximum is %lu."
+        ger "Zu gro�e Genauigkeit %u f�r '%-.192s' angegeben. Maximum ist %lu"
 ER_M_BIGGER_THAN_D 42000 S1009
         eng "For float(M,D), double(M,D) or decimal(M,D), M must be >= D (column '%-.192s')."
         ger "F�r FLOAT(M,D), DOUBLE(M,D) oder DECIMAL(M,D) muss M >= D sein (Feld '%-.192s')"
@@ -5555,8 +5555,8 @@ ER_WARN_CANT_DROP_DEFAULT_KEYCACHE
         eng "Cannot drop default keycache"
         ger "Der vorgabem��ige Schl�ssel-Cache kann nicht gel�scht werden"
 ER_TOO_BIG_DISPLAYWIDTH 42000 S1009
-        eng "Display width out of range for column '%-.192s' (max = %lu)"
-        ger "Anzeigebreite au�erhalb des zul�ssigen Bereichs f�r Spalte '%-.192s' (Maximum: %lu)"
+        eng "Display width out of range for '%-.192s' (max = %lu)"
+        ger "Anzeigebreite au�erhalb des zul�ssigen Bereichs f�r '%-.192s' (Maximum: %lu)"
 ER_XAER_DUPID XAE08
         eng "XAER_DUPID: The XID already exists"
         ger "XAER_DUPID: Die XID existiert bereits"
@@ -6214,6 +6214,19 @@ ER_DEBUG_SYNC_HIT_LIMIT
   eng "debug sync point hit limit reached"
   ger "Debug Sync Point Hit Limit erreicht"
 
+#
+# MariaDB error messages section starts here
+#
+
+# The following is here to allow us to detect if there was missing
+# error messages in the errmsg.sys file
+
+ER_LAST_MYSQL_ERROR_MESSAGE
+   eng ""
+
+# MariaDB error numbers starts from 1900
+start-error-number 1900
+
 ER_VCOL_BASED_ON_VCOL
         eng "A computed column cannot be based on a computed column"
 
@@ -6249,3 +6262,38 @@ ER_UNKNOWN_OPTION
         eng "Unknown option '%-.64s'"
 ER_BAD_OPTION_VALUE
         eng "Incorrect value '%-.64s' for option '%-.64s'"
+ER_NETWORK_READ_EVENT_CHECKSUM_FAILURE
+  eng "Replication event checksum verification failed while reading from network."
+ER_BINLOG_READ_EVENT_CHECKSUM_FAILURE
+  eng "Replication event checksum verification failed while reading from a log file."
+ER_CANT_DO_ONLINE
+        eng "Can't execute the given '%s' command as online"
+ER_DATA_OVERFLOW 22003
+        eng "Got overflow when converting '%-.128s' to %-.32s. Value truncated."
+ER_DATA_TRUNCATED 22003
+        eng "Truncated value '%-.128s' when converting to %-.32s"
+ER_BAD_DATA 22007
+        eng "Encountered illegal value '%-.128s' when converting to %-.32s"
+ER_DYN_COL_WRONG_FORMAT
+        eng "Encountered illegal format of dynamic column string"
+ER_DYN_COL_IMPLEMENTATION_LIMIT
+        eng "Dynamic column implementation limit reached"
+ER_DYN_COL_DATA 22007
+        eng "Illegal value used as argument of dynamic column function"
+ER_DYN_COL_WRONG_CHARSET
+        eng "Dynamic column contains unknown character set"
+ER_ILLEGAL_SUBQUERY_OPTIMIZER_SWITCHES
+        eng "At least one of the 'in_to_exists' or 'materialization' optimizer_switch flags must be 'on'."
+ER_QUERY_CACHE_IS_DISABLED
+        eng "Query cache is disabled (resize or similar command in progress); repeat this command later"
+ER_QUERY_CACHE_IS_GLOBALY_DISABLED
+        eng "Query cache is globally disabled and you can't enable it only for this session"
+ER_VIEW_ORDERBY_IGNORED
+        eng "View '%-.192s'.'%-.192s' ORDER BY clause ignored because there is other ORDER BY clause already."
+ER_CONNECTION_KILLED 70100 
+        eng "Connection was killed"
+ER_INTERNAL_ERROR
+        eng "Internal error: '%-.192s'"
+ER_SPATIAL_MUST_HAVE_GEOM_COL 42000
+  eng "A SPATIAL index may only contain a geometrical type column"
+
diff --git a/sql/signal_handler.cc b/sql/signal_handler.cc
index 3847b7a5f6d..d58c28c2936 100644
--- a/sql/signal_handler.cc
+++ b/sql/signal_handler.cc
@@ -102,6 +102,8 @@ extern "C" sig_handler handle_fatal_signal(int sig)
     "diagnose the problem, but since we have already crashed, \n"
     "something is definitely wrong and this may fail.\n\n");
 
+  set_server_version();
+  my_safe_printf_stderr("Server version: %s\n", server_version);
   my_safe_printf_stderr("key_buffer_size=%lu\n",
                         (ulong) dflt_key_cache->key_cache_mem_size);
 
@@ -165,37 +167,53 @@ extern "C" sig_handler handle_fatal_signal(int sig)
   {
     const char *kreason= "UNKNOWN";
     switch (thd->killed) {
-    case THD::NOT_KILLED:
+    case NOT_KILLED:
+    case KILL_HARD_BIT:
       kreason= "NOT_KILLED";
       break;
-    case THD::KILL_BAD_DATA:
+    case KILL_BAD_DATA:
+    case KILL_BAD_DATA_HARD:
       kreason= "KILL_BAD_DATA";
       break;
-    case THD::KILL_CONNECTION:
+    case KILL_CONNECTION:
+    case KILL_CONNECTION_HARD:
       kreason= "KILL_CONNECTION";
       break;
-    case THD::KILL_QUERY:
+    case KILL_QUERY:
+    case KILL_QUERY_HARD:
       kreason= "KILL_QUERY";
       break;
-    case THD::KILL_SYSTEM_THREAD:
+    case KILL_SYSTEM_THREAD:
+    case KILL_SYSTEM_THREAD_HARD:
       kreason= "KILL_SYSTEM_THREAD";
       break;
-    case THD::KILL_SERVER:
+    case KILL_SERVER:
+    case KILL_SERVER_HARD:
       kreason= "KILL_SERVER";
       break;
-    case THD::KILLED_NO_VALUE:
-      kreason= "KILLED_NO_VALUE";
-      break;
     }
     my_safe_printf_stderr("%s", "\n"
       "Trying to get some variables.\n"
       "Some pointers may be invalid and cause the dump to abort.\n");
 
     my_safe_printf_stderr("Query (%p): ", thd->query());
-    my_safe_print_str(thd->query(), min(1024U, thd->query_length()));
+    my_safe_print_str(thd->query(), min(65535, thd->query_length()));
     my_safe_printf_stderr("Connection ID (thread ID): %lu\n",
                           (ulong) thd->thread_id);
-    my_safe_printf_stderr("Status: %s\n\n", kreason);
+    my_safe_printf_stderr("Status: %s\n", kreason);
+
+    ulonglong optsw= thd->variables.optimizer_switch;
+    const char **optimizer_switch_names= optimizer_switch_typelib.type_names;
+    my_safe_printf_stderr("Optimizer switch: ");
+    for (uint i= 0; optimizer_switch_names[i+1]; i++, optsw >>= 1)
+    {
+      if (i)
+        my_safe_printf_stderr(",");
+      my_safe_printf_stderr("%s=%s",
+                            optimizer_switch_names[i],
+                            optsw & 1 ? "on" : "off");
+    }
+    my_safe_printf_stderr("\n\n");
   }
   my_safe_printf_stderr("%s",
     "The manual page at "
diff --git a/sql/slave.cc b/sql/slave.cc
index ff12a660883..7935754e25c 100644
--- a/sql/slave.cc
+++ b/sql/slave.cc
@@ -526,7 +526,7 @@ terminate_slave_thread(THD *thd,
     IF_DBUG(int err= ) pthread_kill(thd->real_id, thr_client_alarm);
     DBUG_ASSERT(err != EINVAL);
 #endif
-    thd->awake(THD::NOT_KILLED);
+    thd->awake(KILL_CONNECTION);
     pthread_mutex_unlock(&thd->LOCK_thd_data);
 
     /*
@@ -843,6 +843,7 @@ bool is_network_error(uint errorno)
       errorno == CR_SERVER_GONE_ERROR ||
       errorno == CR_SERVER_LOST ||
       errorno == ER_CON_COUNT_ERROR ||
+      errorno == ER_CONNECTION_KILLED ||
       errorno == ER_NEW_ABORTING_CONNECTION ||
       errorno == ER_SERVER_SHUTDOWN)
     return TRUE;
@@ -945,6 +946,48 @@ static int get_master_version_and_clock(MYSQL* mysql, Master_info* mi)
   }
 
   /*
+    FD_q's (A) is set initially from RL's (A): FD_q.(A) := RL.(A).
+    It's necessary to adjust FD_q.(A) at this point because in the following
+    course FD_q is going to be dumped to RL.
+    Generally FD_q is derived from a received FD_m (roughly FD_q := FD_m) 
+    in queue_event and the master's (A) is installed.
+    At one step with the assignment the Relay-Log's checksum alg is set to 
+    a new value: RL.(A) := FD_q.(A). If the slave service is stopped
+    the last time assigned RL.(A) will be passed over to the restarting
+    service (to the current execution point).
+    RL.A is a "codec" to verify checksum in queue_event() almost all the time
+    the first fake Rotate event.
+    Starting from this point IO thread will executes the following checksum
+    warmup sequence  of actions:
+
+    FD_q.A := RL.A,
+    A_m^0 := master.@@global.binlog_checksum,
+    {queue_event(R_f): verifies(R_f, A_m^0)},
+    {queue_event(FD_m): verifies(FD_m, FD_m.A), dump(FD_q), rotate(RL),
+                        FD_q := FD_m, RL.A := FD_q.A)}
+
+    See legends definition on MYSQL_BIN_LOG::relay_log_checksum_alg
+    docs lines (binlog.h).
+    In above A_m^0 - the value of master's
+    @@binlog_checksum determined in the upcoming handshake (stored in
+    mi->checksum_alg_before_fd).
+
+
+    After the warm-up sequence IO gets to "normal" checksum verification mode
+    to use RL.A in 
+    
+    {queue_event(E_m): verifies(E_m, RL.A)}
+
+    until it has received a new FD_m.
+  */
+  mi->rli.relay_log.description_event_for_queue->checksum_alg=
+    mi->rli.relay_log.relay_log_checksum_alg;
+
+  DBUG_ASSERT(mi->rli.relay_log.description_event_for_queue->checksum_alg !=
+              BINLOG_CHECKSUM_ALG_UNDEF);
+  DBUG_ASSERT(mi->rli.relay_log.relay_log_checksum_alg !=
+              BINLOG_CHECKSUM_ALG_UNDEF); 
+  /*
     Compare the master and slave's clock. Do not die if master's clock is
     unavailable (very old master not supporting UNIX_TIMESTAMP()?).
   */
@@ -1183,6 +1226,103 @@ when it try to get the value of TIME_ZONE global variable from master.";
     }
   }
 
+  /*
+    Querying if master is capable to checksum and notifying it about own
+    CRC-awareness. The master's side instant value of @@global.binlog_checksum 
+    is stored in the dump thread's uservar area as well as cached locally
+    to become known in consensus by master and slave.
+  */
+  DBUG_EXECUTE_IF("simulate_slave_unaware_checksum",
+                  mi->checksum_alg_before_fd= BINLOG_CHECKSUM_ALG_OFF;
+                  goto past_checksum;);
+  {
+    int rc;
+    const char query[]= "SET @master_binlog_checksum= @@global.binlog_checksum";
+    master_res= NULL;
+    mi->checksum_alg_before_fd= BINLOG_CHECKSUM_ALG_UNDEF; //initially undefined
+    /*
+      @c checksum_alg_before_fd is queried from master in this block.
+      If master is old checksum-unaware the value stays undefined.
+      Once the first FD will be received its alg descriptor will replace
+      the being queried one.
+    */
+    rc= mysql_real_query(mysql, query, strlen(query));
+    if (rc != 0)
+    {
+      if (check_io_slave_killed(mi->io_thd, mi, NULL))
+        goto slave_killed_err;
+
+      if (mysql_errno(mysql) == ER_UNKNOWN_SYSTEM_VARIABLE)
+      {
+        // this is tolerable as OM -> NS is supported
+        mi->report(WARNING_LEVEL, mysql_errno(mysql),
+                   "Notifying master by %s failed with "
+                   "error: %s", query, mysql_error(mysql));
+      }
+      else
+      {
+        if (is_network_error(mysql_errno(mysql)))
+        {
+          mi->report(WARNING_LEVEL, mysql_errno(mysql),
+                     "Notifying master by %s failed with "
+                     "error: %s", query, mysql_error(mysql));
+          mysql_free_result(mysql_store_result(mysql));
+          goto network_err;
+        }
+        else
+        {
+          errmsg= "The slave I/O thread stops because a fatal error is encountered "
+            "when it tried to SET @master_binlog_checksum on master.";
+          err_code= ER_SLAVE_FATAL_ERROR;
+          sprintf(err_buff, "%s Error: %s", errmsg, mysql_error(mysql));
+          mysql_free_result(mysql_store_result(mysql));
+          goto err;
+        }
+      }
+    }
+    else
+    {
+      mysql_free_result(mysql_store_result(mysql));
+      if (!mysql_real_query(mysql,
+                            STRING_WITH_LEN("SELECT @master_binlog_checksum")) &&
+          (master_res= mysql_store_result(mysql)) &&
+          (master_row= mysql_fetch_row(master_res)) &&
+          (master_row[0] != NULL))
+      {
+        mi->checksum_alg_before_fd= (uint8)
+          find_type(master_row[0], &binlog_checksum_typelib, 1) - 1;
+        // valid outcome is either of
+        DBUG_ASSERT(mi->checksum_alg_before_fd == BINLOG_CHECKSUM_ALG_OFF ||
+                    mi->checksum_alg_before_fd == BINLOG_CHECKSUM_ALG_CRC32);
+      }
+      else if (check_io_slave_killed(mi->io_thd, mi, NULL))
+        goto slave_killed_err;
+      else if (is_network_error(mysql_errno(mysql)))
+      {
+        mi->report(WARNING_LEVEL, mysql_errno(mysql),
+                   "Get master BINLOG_CHECKSUM failed with error: %s", mysql_error(mysql));
+        goto network_err;
+      }
+      else
+      {
+        errmsg= "The slave I/O thread stops because a fatal error is encountered "
+          "when it tried to SELECT @master_binlog_checksum.";
+        err_code= ER_SLAVE_FATAL_ERROR;
+        sprintf(err_buff, "%s Error: %s", errmsg, mysql_error(mysql));
+        mysql_free_result(mysql_store_result(mysql));
+        goto err;
+      }
+    }
+    if (master_res)
+    {
+      mysql_free_result(master_res);
+      master_res= NULL;
+    }
+  }
+
+#ifndef DBUG_OFF
+past_checksum:
+#endif
 err:
   if (errmsg)
   {
@@ -1199,6 +1339,11 @@ network_err:
   if (master_res)
     mysql_free_result(master_res);
   DBUG_RETURN(2);
+
+slave_killed_err:
+  if (master_res)
+    mysql_free_result(master_res);
+  DBUG_RETURN(2);
 }
 
 /*
@@ -1875,6 +2020,9 @@ static int request_dump(MYSQL* mysql, Master_info* mi,
   
   *suppress_warnings= FALSE;
 
+  if (opt_log_slave_updates && opt_replicate_annotate_row_events)
+    binlog_flags|= BINLOG_SEND_ANNOTATE_ROWS_EVENT;
+
   // TODO if big log files: Change next to int8store()
   int4store(buf, (ulong) mi->master_log_pos);
   int2store(buf + 4, binlog_flags);
@@ -2120,7 +2268,11 @@ int apply_event_and_update_pos(Log_event* ev, THD* thd, Relay_log_info* rli)
   thd->set_time();                            // time the query
   thd->lex->current_select= 0;
   if (!ev->when)
-    ev->when= my_time(0);
+  {
+    my_hrtime_t hrtime= my_hrtime();
+    ev->when= hrtime_to_my_time(hrtime);
+    ev->when_sec_part= hrtime_sec_part(hrtime);
+  }
   ev->thd = thd; // because up to this point, ev->thd == 0
 
   int reason= ev->shall_skip(rli);
@@ -2269,17 +2421,41 @@ static int exec_relay_log_event(THD* thd, Relay_log_info* rli)
     }
     exec_res= apply_event_and_update_pos(ev, thd, rli);
 
-    /*
-      Format_description_log_event should not be deleted because it will be
-      used to read info about the relay log's format; it will be deleted when
-      the SQL thread does not need it, i.e. when this thread terminates.
-    */
-    if (ev->get_type_code() != FORMAT_DESCRIPTION_EVENT)
-    {
-      DBUG_PRINT("info", ("Deleting the event after it has been executed"));
-      delete ev;
+    switch (ev->get_type_code()) {
+      case FORMAT_DESCRIPTION_EVENT:
+        /*
+          Format_description_log_event should not be deleted because it
+          will be used to read info about the relay log's format;
+          it will be deleted when the SQL thread does not need it,
+          i.e. when this thread terminates.
+        */
+        break;
+      case ANNOTATE_ROWS_EVENT:
+        /*
+          Annotate_rows event should not be deleted because after it has
+          been applied, thd->query points to the string inside this event.
+          The thd->query will be used to generate new Annotate_rows event
+          during applying the subsequent Rows events.
+        */
+        rli->set_annotate_event((Annotate_rows_log_event*) ev);
+        break;
+      case DELETE_ROWS_EVENT:
+      case UPDATE_ROWS_EVENT:
+      case WRITE_ROWS_EVENT:
+        /*
+          After the last Rows event has been applied, the saved Annotate_rows
+          event (if any) is not needed anymore and can be deleted.
+        */
+        if (((Rows_log_event*)ev)->get_flags(Rows_log_event::STMT_END_F))
+          rli->free_annotate_event();
+        /* fall through */
+      default:
+        DBUG_PRINT("info", ("Deleting the event after it has been executed"));
+        delete ev;
+        break;
     }
 
+
     /*
       update_log_pos failed: this should not happen, so we don't
       retry.
@@ -2507,9 +2683,8 @@ pthread_handler_t handle_slave_io(void *arg)
   if (init_slave_thread(thd, SLAVE_THD_IO))
   {
     pthread_cond_broadcast(&mi->start_cond);
-    pthread_mutex_unlock(&mi->run_lock);
     sql_print_error("Failed during slave I/O thread initialization");
-    goto err;
+    goto err_during_init;
   }
   pthread_mutex_lock(&LOCK_thread_count);
   threads.append(thd);
@@ -2777,6 +2952,7 @@ err:
   thd_proc_info(thd, "Waiting for slave mutex on exit");
   pthread_mutex_lock(&mi->run_lock);
 
+err_during_init:
   /* Forget the relay log's format */
   delete mi->rli.relay_log.description_event_for_queue;
   mi->rli.relay_log.description_event_for_queue= 0;
@@ -2900,14 +3076,19 @@ pthread_handler_t handle_slave_sql(void *arg)
       will be stuck if we fail here
     */
     pthread_cond_broadcast(&rli->start_cond);
-    pthread_mutex_unlock(&rli->run_lock);
     rli->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR, 
                 "Failed during slave thread initialization");
-    goto err;
+    goto err_during_init;
   }
   thd->init_for_queries();
   thd->temporary_tables = rli->save_temporary_tables; // restore temp tables
   set_thd_in_use_temporary_tables(rli);   // (re)set sql_thd in use for saved temp tables
+  /*
+    binlog_annotate_row_events must be TRUE only after an Annotate_rows event
+    has been recieved and only till the last corresponding rbr event has been
+    applied. In all other cases it must be FALSE.
+  */
+  thd->variables.binlog_annotate_row_events= 0;
   pthread_mutex_lock(&LOCK_thread_count);
   threads.append(thd);
   pthread_mutex_unlock(&LOCK_thread_count);
@@ -3150,6 +3331,7 @@ the slave SQL thread with \"SLAVE START\". We stopped at log \
   thd->reset_db(NULL, 0);
   thd_proc_info(thd, "Waiting for slave mutex on exit");
   pthread_mutex_lock(&rli->run_lock);
+err_during_init:
   /* We need data_lock, at least to wake up any waiting master_pos_wait() */
   pthread_mutex_lock(&rli->data_lock);
   DBUG_ASSERT(rli->slave_running == 1); // tracking buffer overrun
@@ -3360,10 +3542,15 @@ static int process_io_rotate(Master_info *mi, Rotate_log_event *rev)
   */
   if (mi->rli.relay_log.description_event_for_queue->binlog_version >= 4)
   {
+    DBUG_ASSERT(mi->rli.relay_log.description_event_for_queue->checksum_alg ==
+                mi->rli.relay_log.relay_log_checksum_alg);
+    
     delete mi->rli.relay_log.description_event_for_queue;
     /* start from format 3 (MySQL 4.0) again */
     mi->rli.relay_log.description_event_for_queue= new
       Format_description_log_event(3);
+    mi->rli.relay_log.description_event_for_queue->checksum_alg=
+      mi->rli.relay_log.relay_log_checksum_alg;    
   }
   /*
     Rotate the relay log makes binlog format detection easier (at next slave
@@ -3390,7 +3577,7 @@ static int queue_binlog_ver_1_event(Master_info *mi, const char *buf,
     If we get Load event, we need to pass a non-reusable buffer
     to read_log_event, so we do a trick
   */
-  if (buf[EVENT_TYPE_OFFSET] == LOAD_EVENT)
+  if ((uchar)buf[EVENT_TYPE_OFFSET] == LOAD_EVENT)
   {
     if (unlikely(!(tmp_buf=(char*)my_malloc(event_len+1,MYF(MY_WME)))))
     {
@@ -3416,8 +3603,9 @@ static int queue_binlog_ver_1_event(Master_info *mi, const char *buf,
     Append_block/Exec_load (the SQL thread needs the data, as that thread is not
     connected to the master).
   */
-  Log_event *ev = Log_event::read_log_event(buf,event_len, &errmsg,
-                                            mi->rli.relay_log.description_event_for_queue);
+  Log_event *ev=
+    Log_event::read_log_event(buf, event_len, &errmsg,
+                              mi->rli.relay_log.description_event_for_queue, 0);
   if (unlikely(!ev))
   {
     sql_print_error("Read invalid event from master: '%s',\
@@ -3504,8 +3692,9 @@ static int queue_binlog_ver_3_event(Master_info *mi, const char *buf,
   DBUG_ENTER("queue_binlog_ver_3_event");
 
   /* read_log_event() will adjust log_pos to be end_log_pos */
-  Log_event *ev = Log_event::read_log_event(buf,event_len, &errmsg,
-                                            mi->rli.relay_log.description_event_for_queue);
+  Log_event *ev=
+    Log_event::read_log_event(buf,event_len, &errmsg,
+                              mi->rli.relay_log.description_event_for_queue, 0);
   if (unlikely(!ev))
   {
     sql_print_error("Read invalid event from master: '%s',\
@@ -3531,6 +3720,7 @@ static int queue_binlog_ver_3_event(Master_info *mi, const char *buf,
     inc_pos= event_len;
     break;
   }
+
   if (unlikely(rli->relay_log.append(ev)))
   {
     delete ev;
@@ -3592,18 +3782,79 @@ static int queue_event(Master_info* mi,const char* buf, ulong event_len)
   ulong inc_pos;
   Relay_log_info *rli= &mi->rli;
   pthread_mutex_t *log_lock= rli->relay_log.get_log_lock();
+  bool unlock_data_lock= TRUE;
+  /*
+    FD_q must have been prepared for the first R_a event
+    inside get_master_version_and_clock()
+    Show-up of FD:s affects checksum_alg at once because
+    that changes FD_queue.
+  */
+  uint8 checksum_alg= mi->checksum_alg_before_fd != BINLOG_CHECKSUM_ALG_UNDEF ? 
+    mi->checksum_alg_before_fd :
+    mi->rli.relay_log.relay_log_checksum_alg;
+
+  char *save_buf= NULL; // needed for checksumming the fake Rotate event
+  char rot_buf[LOG_EVENT_HEADER_LEN + ROTATE_HEADER_LEN + FN_REFLEN];
+
+  DBUG_ASSERT(checksum_alg == BINLOG_CHECKSUM_ALG_OFF || 
+              checksum_alg == BINLOG_CHECKSUM_ALG_UNDEF || 
+              checksum_alg == BINLOG_CHECKSUM_ALG_CRC32); 
+
   DBUG_ENTER("queue_event");
+  /*
+    FD_queue checksum alg description does not apply in a case of
+    FD itself. The one carries both parts of the checksum data.
+  */
+  if (buf[EVENT_TYPE_OFFSET] == FORMAT_DESCRIPTION_EVENT)
+  {
+    checksum_alg= get_checksum_alg(buf, event_len);
+  }
+  else if (buf[EVENT_TYPE_OFFSET] == START_EVENT_V3)
+  {
+    // checksum behaviour is similar to the pre-checksum FD handling
+    mi->checksum_alg_before_fd= BINLOG_CHECKSUM_ALG_UNDEF;
+    mi->rli.relay_log.description_event_for_queue->checksum_alg=
+      mi->rli.relay_log.relay_log_checksum_alg= checksum_alg=
+      BINLOG_CHECKSUM_ALG_OFF;
+  }
+
+  // does not hold always because of old binlog can work with NM 
+  // DBUG_ASSERT(checksum_alg != BINLOG_CHECKSUM_ALG_UNDEF);
+
+  // should hold unless manipulations with RL. Tests that do that
+  // will have to refine the clause.
+  DBUG_ASSERT(mi->rli.relay_log.relay_log_checksum_alg !=
+              BINLOG_CHECKSUM_ALG_UNDEF);
+              
+  // Emulate the network corruption
+  DBUG_EXECUTE_IF("corrupt_queue_event",
+    if (buf[EVENT_TYPE_OFFSET] != FORMAT_DESCRIPTION_EVENT)
+    {
+      char *debug_event_buf_c = (char*) buf;
+      int debug_cor_pos = rand() % (event_len - BINLOG_CHECKSUM_LEN);
+      debug_event_buf_c[debug_cor_pos] =~ debug_event_buf_c[debug_cor_pos];
+      DBUG_PRINT("info", ("Corrupt the event at queue_event: byte on position %d", debug_cor_pos));
+      DBUG_SET("-d,corrupt_queue_event");
+    }
+  );
+                                              
+  if (event_checksum_test((uchar *) buf, event_len, checksum_alg))
+  {
+    error= ER_NETWORK_READ_EVENT_CHECKSUM_FAILURE;
+    unlock_data_lock= FALSE;
+    goto err;
+  }
 
   LINT_INIT(inc_pos);
 
   if (mi->rli.relay_log.description_event_for_queue->binlog_version<4 &&
-      buf[EVENT_TYPE_OFFSET] != FORMAT_DESCRIPTION_EVENT /* a way to escape */)
+      (uchar)buf[EVENT_TYPE_OFFSET] != FORMAT_DESCRIPTION_EVENT /* a way to escape */)
     DBUG_RETURN(queue_old_event(mi,buf,event_len));
 
   LINT_INIT(inc_pos);
   pthread_mutex_lock(&mi->data_lock);
 
-  switch (buf[EVENT_TYPE_OFFSET]) {
+  switch ((uchar)buf[EVENT_TYPE_OFFSET]) {
   case STOP_EVENT:
     /*
       We needn't write this event to the relay log. Indeed, it just indicates a
@@ -3620,12 +3871,67 @@ static int queue_event(Master_info* mi,const char* buf, ulong event_len)
     goto err;
   case ROTATE_EVENT:
   {
-    Rotate_log_event rev(buf,event_len,mi->rli.relay_log.description_event_for_queue);
-    if (unlikely(process_io_rotate(mi,&rev)))
+    Rotate_log_event rev(buf, checksum_alg != BINLOG_CHECKSUM_ALG_OFF ?
+                         event_len - BINLOG_CHECKSUM_LEN : event_len,
+                         mi->rli.relay_log.description_event_for_queue);
+
+    if (unlikely(process_io_rotate(mi, &rev)))
     {
       error= 1;
       goto err;
     }
+    /* 
+       Checksum special cases for the fake Rotate (R_f) event caused by the protocol
+       of events generation and serialization in RL where Rotate of master is 
+       queued right next to FD of slave.
+       Since it's only FD that carries the alg desc of FD_s has to apply to R_m.
+       Two special rules apply only to the first R_f which comes in before any FD_m.
+       The 2nd R_f should be compatible with the FD_s that must have taken over
+       the last seen FD_m's (A).
+       
+       RSC_1: If OM \and fake Rotate \and slave is configured to
+              to compute checksum for its first FD event for RL
+              the fake Rotate gets checksummed here.
+    */
+    if (uint4korr(&buf[0]) == 0 && checksum_alg == BINLOG_CHECKSUM_ALG_OFF &&
+        mi->rli.relay_log.relay_log_checksum_alg != BINLOG_CHECKSUM_ALG_OFF)
+    {
+      ha_checksum rot_crc= my_checksum(0L, NULL, 0);
+      event_len += BINLOG_CHECKSUM_LEN;
+      memcpy(rot_buf, buf, event_len - BINLOG_CHECKSUM_LEN);
+      int4store(&rot_buf[EVENT_LEN_OFFSET],
+                uint4korr(&rot_buf[EVENT_LEN_OFFSET]) + BINLOG_CHECKSUM_LEN);
+      rot_crc= my_checksum(rot_crc, (const uchar *) rot_buf,
+                           event_len - BINLOG_CHECKSUM_LEN);
+      int4store(&rot_buf[event_len - BINLOG_CHECKSUM_LEN], rot_crc);
+      DBUG_ASSERT(event_len == uint4korr(&rot_buf[EVENT_LEN_OFFSET]));
+      DBUG_ASSERT(mi->rli.relay_log.description_event_for_queue->checksum_alg ==
+                  mi->rli.relay_log.relay_log_checksum_alg);
+      /* the first one */
+      DBUG_ASSERT(mi->checksum_alg_before_fd != BINLOG_CHECKSUM_ALG_UNDEF);
+      save_buf= (char *) buf;
+      buf= rot_buf;
+    }
+    else
+      /*
+        RSC_2: If NM \and fake Rotate \and slave does not compute checksum
+        the fake Rotate's checksum is stripped off before relay-logging.
+      */
+      if (uint4korr(&buf[0]) == 0 && checksum_alg != BINLOG_CHECKSUM_ALG_OFF &&
+          mi->rli.relay_log.relay_log_checksum_alg == BINLOG_CHECKSUM_ALG_OFF)
+      {
+        event_len -= BINLOG_CHECKSUM_LEN;
+        memcpy(rot_buf, buf, event_len);
+        int4store(&rot_buf[EVENT_LEN_OFFSET],
+                  uint4korr(&rot_buf[EVENT_LEN_OFFSET]) - BINLOG_CHECKSUM_LEN);
+        DBUG_ASSERT(event_len == uint4korr(&rot_buf[EVENT_LEN_OFFSET]));
+        DBUG_ASSERT(mi->rli.relay_log.description_event_for_queue->checksum_alg ==
+                    mi->rli.relay_log.relay_log_checksum_alg);
+        /* the first one */
+        DBUG_ASSERT(mi->checksum_alg_before_fd != BINLOG_CHECKSUM_ALG_UNDEF);
+        save_buf= (char *) buf;
+        buf= rot_buf;
+      }
     /*
       Now the I/O thread has just changed its mi->master_log_name, so
       incrementing mi->master_log_pos is nonsense.
@@ -3646,15 +3952,24 @@ static int queue_event(Master_info* mi,const char* buf, ulong event_len)
     */
     Format_description_log_event* tmp;
     const char* errmsg;
+    // mark it as undefined that is irrelevant anymore
+    mi->checksum_alg_before_fd= BINLOG_CHECKSUM_ALG_UNDEF;
     if (!(tmp= (Format_description_log_event*)
           Log_event::read_log_event(buf, event_len, &errmsg,
-                                    mi->rli.relay_log.description_event_for_queue)))
+                                    mi->rli.relay_log.description_event_for_queue,
+                                    1)))
     {
       error= 2;
       goto err;
     }
     delete mi->rli.relay_log.description_event_for_queue;
     mi->rli.relay_log.description_event_for_queue= tmp;
+    if (tmp->checksum_alg == BINLOG_CHECKSUM_ALG_UNDEF)
+      tmp->checksum_alg= BINLOG_CHECKSUM_ALG_OFF;
+
+    /* installing new value of checksum Alg for relay log */
+    mi->rli.relay_log.relay_log_checksum_alg= tmp->checksum_alg;
+
     /*
        Though this does some conversion to the slave's format, this will
        preserve the master's binlog format version, and number of event types.
@@ -3706,9 +4021,9 @@ static int queue_event(Master_info* mi,const char* buf, ulong event_len)
       the master's binlog (i.e. Format_desc, Rotate & Stop) should not increment
       mi->master_log_pos.
     */
-    if (buf[EVENT_TYPE_OFFSET]!=FORMAT_DESCRIPTION_EVENT &&
-        buf[EVENT_TYPE_OFFSET]!=ROTATE_EVENT &&
-        buf[EVENT_TYPE_OFFSET]!=STOP_EVENT)
+    if ((uchar)buf[EVENT_TYPE_OFFSET]!=FORMAT_DESCRIPTION_EVENT &&
+        (uchar)buf[EVENT_TYPE_OFFSET]!=ROTATE_EVENT &&
+        (uchar)buf[EVENT_TYPE_OFFSET]!=STOP_EVENT)
     {
       mi->master_log_pos+= inc_pos;
       memcpy(rli->ign_master_log_name_end, mi->master_log_name, FN_REFLEN);
@@ -3731,12 +4046,15 @@ static int queue_event(Master_info* mi,const char* buf, ulong event_len)
     else
       error= 3;
     rli->ign_master_log_name_end[0]= 0; // last event is not ignored
+    if (save_buf != NULL)
+      buf= save_buf;
   }
   pthread_mutex_unlock(log_lock);
 
 
 err:
-  pthread_mutex_unlock(&mi->data_lock);
+  if (unlock_data_lock)
+    pthread_mutex_unlock(&mi->data_lock);
   DBUG_PRINT("info", ("error: %d", error));
   DBUG_RETURN(error);
 }
@@ -4126,8 +4444,9 @@ static Log_event* next_event(Relay_log_info* rli)
       But if the relay log is created by new_file(): then the solution is:
       MYSQL_BIN_LOG::open() will write the buffered description event.
     */
-    if ((ev=Log_event::read_log_event(cur_log,0,
-                                      rli->relay_log.description_event_for_exec)))
+    if ((ev= Log_event::read_log_event(cur_log,0,
+                                       rli->relay_log.description_event_for_exec,
+                                       opt_slave_sql_verify_checksum)))
 
     {
       DBUG_ASSERT(thd==rli->sql_thd);
@@ -4525,9 +4844,9 @@ bool rpl_master_has_bug(const Relay_log_info *rli, uint bug_id, bool report,
     {37426, { 5, 1,  0 }, { 5, 1, 26 } },
   };
   const uchar *master_ver=
-    rli->relay_log.description_event_for_exec->server_version_split;
+    rli->relay_log.description_event_for_exec->server_version_split.ver;
 
-  DBUG_ASSERT(sizeof(rli->relay_log.description_event_for_exec->server_version_split) == 3);
+  DBUG_ASSERT(sizeof(rli->relay_log.description_event_for_exec->server_version_split.ver) == 3);
 
   for (uint i= 0;
        i < sizeof(versions_for_all_bugs)/sizeof(*versions_for_all_bugs);i++)
diff --git a/sql/slave.h b/sql/slave.h
index f174b40c108..30574f42d69 100644
--- a/sql/slave.h
+++ b/sql/slave.h
@@ -108,6 +108,7 @@ extern MYSQL_PLUGIN_IMPORT char *relay_log_info_file;
 extern char *opt_relay_logname, *opt_relaylog_index_name;
 extern my_bool opt_skip_slave_start, opt_reckless_slave;
 extern my_bool opt_log_slave_updates;
+extern my_bool opt_replicate_annotate_row_events;
 extern ulonglong relay_log_space_limit;
 
 /*
diff --git a/sql/sp_head.cc b/sql/sp_head.cc
index f1f453bada5..b30347a5ca7 100644
--- a/sql/sp_head.cc
+++ b/sql/sp_head.cc
@@ -51,19 +51,7 @@ static void reset_start_time_for_sp(THD *thd)
     constant during the execution of those.
   */
   if (!thd->in_sub_stmt)
-  {
-    /*
-      First investigate if there is a cached time stamp
-    */
-    if (thd->user_time)
-    {
-      thd->start_time= thd->user_time;
-    }
-    else
-    {
-      my_micro_time_and_time(&thd->start_time);
-    }
-  }
+    thd->set_time();
 }
 
 Item_result
@@ -1036,17 +1024,18 @@ subst_spvars(THD *thd, sp_instr *instr, LEX_STRING *query_str)
        buffer :==
             <statement>   The input statement(s)
             '\0'          Terminating null char
-            <length>      Length of following current database name (size_t)
+            <length>      Length of following current database name 2
             <db_name>     Name of current database
             <flags>       Flags struct
   */
-  buf_len= qbuf.length() + 1 + sizeof(size_t) + thd->db_length + 
-           QUERY_CACHE_FLAGS_SIZE + 1;
+  buf_len= (qbuf.length() + 1 + QUERY_CACHE_DB_LENGTH_SIZE + thd->db_length +
+            QUERY_CACHE_FLAGS_SIZE + 1);
   if ((pbuf= (char *) alloc_root(thd->mem_root, buf_len)))
   {
+    char *ptr= pbuf + qbuf.length();
     memcpy(pbuf, qbuf.ptr(), qbuf.length());
-    pbuf[qbuf.length()]= 0;
-    memcpy(pbuf+qbuf.length()+1, (char *) &thd->db_length, sizeof(size_t));
+    *ptr= 0;
+    int2store(ptr+1, thd->db_length);
   }
   else
     DBUG_RETURN(TRUE);
@@ -1336,7 +1325,7 @@ sp_head::execute(THD *thd)
 	ctx->enter_handler(hip);
         thd->clear_error();
         thd->is_fatal_error= 0;
-	thd->killed= THD::NOT_KILLED;
+	thd->killed= NOT_KILLED;
         thd->mysys_var->abort= 0;
 	continue;
       }
@@ -1386,10 +1375,7 @@ sp_head::execute(THD *thd)
     If the DB has changed, the pointer has changed too, but the
     original thd->db will then have been freed
   */
-  if (cur_db_changed &&
-      thd->killed != THD::KILL_CONNECTION &&
-      thd->killed != THD::KILL_SERVER &&
-      thd->killed != THD::KILL_SYSTEM_THREAD)
+  if (cur_db_changed && thd->killed < KILL_CONNECTION)
   {
     /*
       Force switching back to the saved current database, because it may be
@@ -1823,7 +1809,7 @@ sp_head::execute_function(THD *thd, Item **argp, uint argcount,
     thd->options= binlog_save_options;
     if (thd->binlog_evt_union.unioned_events)
     {
-      int errcode = query_error_code(thd, thd->killed == THD::NOT_KILLED);
+      int errcode = query_error_code(thd, thd->killed == NOT_KILLED);
       Query_log_event qinfo(thd, binlog_buf.ptr(), binlog_buf.length(),
                             thd->binlog_evt_union.unioned_events_trans, FALSE, errcode);
       if (mysql_bin_log.write(&qinfo) &&
@@ -2878,6 +2864,9 @@ int sp_instr::exec_open_and_lock_tables(THD *thd, TABLE_LIST *tables)
     result= -1;
   else
     result= 0;
+  /* Prepare all derived tables/views to catch possible errors. */
+  if (!result)
+    result= mysql_handle_derived(thd->lex, DT_PREPARE) ? -1 : 0;
 
   return result;
 }
diff --git a/sql/spatial.cc b/sql/spatial.cc
index 6e2e03bf5bb..288c7dabbaf 100644
--- a/sql/spatial.cc
+++ b/sql/spatial.cc
@@ -40,7 +40,7 @@
   17
 */
 
-#define MAX_DIGITS_IN_DOUBLE 22
+#define MAX_DIGITS_IN_DOUBLE 30
 
 /***************************** Gis_class_info *******************************/
 
@@ -180,6 +180,7 @@ Geometry *Geometry::create_from_wkt(Geometry_buffer *buffer,
 {
   LEX_STRING name;
   Class_info *ci;
+  char next_sym;
 
   if (trs->get_next_word(&name))
   {
@@ -192,9 +193,13 @@ Geometry *Geometry::create_from_wkt(Geometry_buffer *buffer,
   Geometry *result= (*ci->m_create_func)(buffer->data);
   wkt->q_append((char) wkb_ndr);
   wkt->q_append((uint32) result->get_class_info()->m_type_id);
-  if (trs->check_next_symbol('(') ||
+  if (!(next_sym= trs->next_symbol()))
+    return NULL;
+  if (!(next_sym= trs->next_symbol()))
+    return NULL;
+  if ((next_sym == '(' && trs->check_next_symbol('(')) ||
       result->init_from_wkt(trs, wkt) ||
-      trs->check_next_symbol(')'))
+      (next_sym == '(' && trs->check_next_symbol(')')))
     return NULL;
   if (init_stream)  
   {
@@ -205,6 +210,22 @@ Geometry *Geometry::create_from_wkt(Geometry_buffer *buffer,
 }
 
 
+int Geometry::as_wkt(String *wkt, const char **end)
+{
+  uint32 len= (uint) get_class_info()->m_name.length;
+  if (wkt->reserve(len + 2, 512))
+    return 1;
+  wkt->qs_append(get_class_info()->m_name.str, len);
+  if (get_class_info() != &geometrycollection_class)
+    wkt->qs_append('(');
+  if (get_data_as_wkt(wkt, end))
+    return 1;
+  if (get_class_info() != &geometrycollection_class)
+    wkt->qs_append(')');
+  return 0;
+}
+
+
 static double wkb_get_double(const char *ptr, Geometry::wkbByteOrder bo)
 {
   double res;
@@ -266,12 +287,37 @@ Geometry *Geometry::create_from_wkb(Geometry_buffer *buffer,
 }
 
 
+int Geometry::create_from_opresult(Geometry_buffer *g_buf,
+                                   String *res, Gcalc_result_receiver &rr)
+{
+  uint32 geom_type= rr.get_result_typeid();
+  Geometry *obj= create_by_typeid(g_buf, geom_type);
+
+  if (!obj || res->reserve(WKB_HEADER_SIZE, 512))
+    return 1;
+
+  res->q_append((char) wkb_ndr);
+  res->q_append(geom_type);
+  return obj->init_from_opresult(res, rr.result(), rr.length());
+}
+
+
 bool Geometry::envelope(String *result) const
 {
   MBR mbr;
   const char *end;
 
-  if (get_mbr(&mbr, &end) || result->reserve(1+4*3+SIZEOF_STORED_DOUBLE*10))
+  if (get_mbr(&mbr, &end))
+  {
+    /* Empty geometry */
+    if (result->reserve(1 + 4*2))
+      return 1;
+    result->q_append((char) wkb_ndr);
+    result->q_append((uint32) wkb_geometrycollection);
+    result->q_append((uint32) 0);
+    return 0;
+  }
+  if (result->reserve(1 + 4 * 3 + SIZEOF_STORED_DOUBLE * 10))
     return 1;
 
   result->q_append((char) wkb_ndr);
@@ -308,13 +354,13 @@ bool Geometry::envelope(String *result) const
 
 bool Geometry::create_point(String *result, const char *data) const
 {
-  if (no_data(data, SIZEOF_STORED_DOUBLE * 2) ||
-      result->reserve(1 + 4 + SIZEOF_STORED_DOUBLE * 2))
+  if (no_data(data, POINT_DATA_SIZE) ||
+      result->reserve(1 + 4 + POINT_DATA_SIZE))
     return 1;
   result->q_append((char) wkb_ndr);
   result->q_append((uint32) wkb_point);
   /* Copy two double in same format */
-  result->q_append(data, SIZEOF_STORED_DOUBLE*2);
+  result->q_append(data, POINT_DATA_SIZE);
   return 0;
 }
 
@@ -334,7 +380,7 @@ bool Geometry::create_point(String *result, const char *data) const
 
 bool Geometry::create_point(String *result, double x, double y) const
 {
-  if (result->reserve(1 + 4 + SIZEOF_STORED_DOUBLE * 2))
+  if (result->reserve(1 + 4 + POINT_DATA_SIZE))
     return 1;
 
   result->q_append((char) wkb_ndr);
@@ -366,7 +412,7 @@ const char *Geometry::append_points(String *txt, uint32 n_points,
     double x,y;
     data+= offset;
     get_point(&x, &y, data);
-    data+= SIZEOF_STORED_DOUBLE * 2;
+    data+= POINT_DATA_SIZE;
     txt->qs_append(x);
     txt->qs_append(' ');
     txt->qs_append(y);
@@ -401,7 +447,7 @@ const char *Geometry::get_mbr_for_points(MBR *mbr, const char *data,
   points= uint4korr(data);
   data+= 4;
 
-  if (no_data(data, (SIZEOF_STORED_DOUBLE * 2 + offset) * points))
+  if (no_data(data, (POINT_DATA_SIZE + offset) * points))
     return 0;
 
   /* Calculate MBR for points */
@@ -409,7 +455,7 @@ const char *Geometry::get_mbr_for_points(MBR *mbr, const char *data,
   {
     data+= offset;
     mbr->add_xy(data, data + SIZEOF_STORED_DOUBLE);
-    data+= SIZEOF_STORED_DOUBLE * 2;
+    data+= POINT_DATA_SIZE;
   }
   return data;
 }
@@ -427,7 +473,7 @@ bool Gis_point::init_from_wkt(Gis_read_stream *trs, String *wkb)
 {
   double x, y;
   if (trs->get_next_number(&x) || trs->get_next_number(&y) ||
-      wkb->reserve(SIZEOF_STORED_DOUBLE * 2))
+      wkb->reserve(POINT_DATA_SIZE))
     return 1;
   wkb->q_append(x);
   wkb->q_append(y);
@@ -474,6 +520,31 @@ bool Gis_point::get_mbr(MBR *mbr, const char **end) const
   return 0;
 }
 
+
+int Gis_point::area(double *ar, const char **end) const
+{
+  *ar= 0;
+  *end= m_data+ POINT_DATA_SIZE;
+  return 0;
+}
+
+
+int Gis_point::geom_length(double *len, const char **end) const
+{
+  *len= 0;
+  *end= m_data+ POINT_DATA_SIZE;
+  return 0;
+}
+
+
+int Gis_point::store_shapes(Gcalc_shape_transporter *trn) const
+{
+  double x, y;
+
+  return get_xy(&x, &y) || trn->single_point(x, y);
+}
+
+
 const Geometry::Class_info *Gis_point::get_class_info() const
 {
   return &point_class;
@@ -525,12 +596,12 @@ uint Gis_line_string::init_from_wkb(const char *wkb, uint len,
   const char *wkb_end;
   Gis_point p;
 
-  if (len < 4)
+  if (len < 4 ||
+      (n_points= wkb_get_uint(wkb, bo))<1)
     return 0;
-  n_points= wkb_get_uint(wkb, bo);
   proper_length= 4 + n_points * POINT_DATA_SIZE;
 
-  if (!n_points || len < proper_length || res->reserve(proper_length))
+  if (len < proper_length || res->reserve(proper_length))
     return 0;
 
   res->q_append(n_points);
@@ -556,7 +627,7 @@ bool Gis_line_string::get_data_as_wkt(String *txt, const char **end) const
   data += 4;
 
   if (n_points < 1 ||
-      no_data(data, SIZEOF_STORED_DOUBLE * 2 * n_points) ||
+      no_data(data, POINT_DATA_SIZE * n_points) ||
       txt->reserve(((MAX_DIGITS_IN_DOUBLE + 1)*2 + 1) * n_points))
     return 1;
 
@@ -564,7 +635,7 @@ bool Gis_line_string::get_data_as_wkt(String *txt, const char **end) const
   {
     double x, y;
     get_point(&x, &y, data);
-    data+= SIZEOF_STORED_DOUBLE * 2;
+    data+= POINT_DATA_SIZE;
     txt->qs_append(x);
     txt->qs_append(' ');
     txt->qs_append(y);
@@ -582,7 +653,7 @@ bool Gis_line_string::get_mbr(MBR *mbr, const char **end) const
 }
 
 
-int Gis_line_string::geom_length(double *len) const
+int Gis_line_string::geom_length(double *len, const char **end) const
 {
   uint32 n_points;
   double prev_x, prev_y;
@@ -593,21 +664,35 @@ int Gis_line_string::geom_length(double *len) const
     return 1;
   n_points= uint4korr(data);
   data+= 4;
-  if (n_points < 1 || no_data(data, SIZEOF_STORED_DOUBLE * 2 * n_points))
+  if (n_points < 1 || no_data(data, POINT_DATA_SIZE * n_points))
     return 1;
 
   get_point(&prev_x, &prev_y, data);
-  data+= SIZEOF_STORED_DOUBLE*2;
-
+  data+= POINT_DATA_SIZE;
   while (--n_points)
   {
     double x, y;
     get_point(&x, &y, data);
-    data+= SIZEOF_STORED_DOUBLE * 2;
+    data+= POINT_DATA_SIZE;
     *len+= sqrt(pow(prev_x-x,2)+pow(prev_y-y,2));
     prev_x= x;
     prev_y= y;
   }
+  *end= data;
+  return 0;
+}
+
+
+int Gis_line_string::area(double *ar, const char **end) const
+{
+  uint32 n_points;
+  *ar= 0.0;
+
+  /* read number of points */
+  if (no_data(m_data, 4))
+    return 1;
+  n_points= uint4korr(m_data);
+  *end= m_data + 4 + POINT_DATA_SIZE * n_points;
   return 0;
 }
 
@@ -627,14 +712,14 @@ int Gis_line_string::is_closed(int *closed) const
     return 0;
   }
   data+= 4;
-  if (no_data(data, SIZEOF_STORED_DOUBLE * 2 * n_points))
+  if (no_data(data, POINT_DATA_SIZE * n_points))
     return 1;
 
   /* Get first point */
   get_point(&x1, &y1, data);
 
   /* get last point */
-  data+= SIZEOF_STORED_DOUBLE*2 + (n_points-2)*POINT_DATA_SIZE;
+  data+= POINT_DATA_SIZE + (n_points-2)*POINT_DATA_SIZE;
   get_point(&x2, &y2, data);
 
   *closed= (x1==x2) && (y1==y2);
@@ -678,6 +763,42 @@ int Gis_line_string::point_n(uint32 num, String *result) const
   return create_point(result, m_data + 4 + (num - 1) * POINT_DATA_SIZE);
 }
 
+
+int Gis_line_string::store_shapes(Gcalc_shape_transporter *trn) const
+{
+  uint32 n_points;
+  double x, y;
+  double prev_x= 0.0;
+  double prev_y= 0.0;
+  int first_point= 1;
+  const char *data= m_data;
+
+  if (no_data(m_data, 4))
+    return 1;
+  n_points= uint4korr(data);
+  data+= 4;
+  if (n_points < 1 || no_data(data, POINT_DATA_SIZE * n_points))
+    return 1;
+
+  trn->start_line();
+
+  while (n_points--)
+  {
+    get_point(&x, &y, data);
+    data+= POINT_DATA_SIZE;
+    if (!first_point && x == prev_x && y == prev_y)
+      continue;
+    if (trn->add_point(x, y))
+      return 1;
+    first_point= 0;
+    prev_x= x;
+    prev_y= y;
+  }
+
+  return trn->complete_line();
+}
+
+
 const Geometry::Class_info *Gis_line_string::get_class_info() const
 {
   return &linestring_class;
@@ -739,6 +860,52 @@ bool Gis_polygon::init_from_wkt(Gis_read_stream *trs, String *wkb)
 }
 
 
+uint Gis_polygon::init_from_opresult(String *bin,
+                                     const char *opres, uint res_len)
+{
+  const char *opres_orig= opres;
+  uint32 position= bin->length();
+  uint32 poly_shapes= 0;
+
+  if (bin->reserve(4, 512))
+    return 0;
+  bin->q_append(poly_shapes);
+
+  while (opres_orig + res_len > opres)
+  {
+    uint32 n_points, proper_length;
+    const char *op_end, *p1_position;
+    Gis_point p;
+    Gcalc_function::shape_type st;
+
+    st= (Gcalc_function::shape_type) uint4korr(opres);
+    if (poly_shapes && st != Gcalc_function::shape_hole)
+      break;
+    poly_shapes++;
+    n_points= uint4korr(opres + 4) + 1; /* skip shape type id */
+    proper_length= 4 + n_points * POINT_DATA_SIZE;
+
+    if (bin->reserve(proper_length, 512))
+      return 0;
+
+    bin->q_append(n_points);
+    op_end= opres + 8 + (n_points-1) * 8 * 2;
+    p1_position= (opres+= 8);
+    for (; opres<op_end; opres+= POINT_DATA_SIZE)
+    {
+      if (!p.init_from_wkb(opres, POINT_DATA_SIZE, wkb_ndr, bin))
+        return 0;
+    }
+    if (!p.init_from_wkb(p1_position, POINT_DATA_SIZE, wkb_ndr, bin))
+      return 0;
+  }
+
+  bin->write_at_position(position, poly_shapes);
+
+  return (uint) (opres - opres_orig);
+}
+
+
 uint Gis_polygon::init_from_wkb(const char *wkb, uint len, wkbByteOrder bo,
                                 String *res)
 {
@@ -748,9 +915,7 @@ uint Gis_polygon::init_from_wkb(const char *wkb, uint len, wkbByteOrder bo,
   if (len < 4)
     return 0;
 
-  if (!(n_linear_rings= wkb_get_uint(wkb, bo)))
-    return 0;
-
+  n_linear_rings= wkb_get_uint(wkb, bo);
   if (res->reserve(4, 512))
     return 0;
   wkb+= 4;
@@ -796,7 +961,7 @@ bool Gis_polygon::get_data_as_wkt(String *txt, const char **end) const
       return 1;
     n_points= uint4korr(data);
     data+= 4;
-    if (no_data(data, (SIZEOF_STORED_DOUBLE*2) * n_points) ||
+    if (no_data(data, POINT_DATA_SIZE * n_points) ||
 	txt->reserve(2 + ((MAX_DIGITS_IN_DOUBLE + 1) * 2 + 1) * n_points))
       return 1;
     txt->qs_append('(');
@@ -850,16 +1015,16 @@ int Gis_polygon::area(double *ar, const char **end_of_data) const
     if (no_data(data, 4))
       return 1;
     n_points= uint4korr(data);
-    if (no_data(data, (SIZEOF_STORED_DOUBLE*2) * n_points))
+    if (no_data(data, POINT_DATA_SIZE * n_points))
       return 1;
     get_point(&prev_x, &prev_y, data+4);
-    data+= (4+SIZEOF_STORED_DOUBLE*2);
+    data+= (4+POINT_DATA_SIZE);
 
     while (--n_points)				// One point is already read
     {
       double x, y;
       get_point(&x, &y, data);
-      data+= (SIZEOF_STORED_DOUBLE*2);
+      data+= POINT_DATA_SIZE;
       lr_area+= (prev_x + x)* (prev_y - y);
       prev_x= x;
       prev_y= y;
@@ -886,7 +1051,7 @@ int Gis_polygon::exterior_ring(String *result) const
   n_points= uint4korr(data);
   data+= 4;
   length= n_points * POINT_DATA_SIZE;
-  if (no_data(data, length) || result->reserve(1+4+4+ length))
+  if (no_data(data, length) || result->reserve(1 + 4 + 4 + length))
     return 1;
 
   result->q_append((char) wkb_ndr);
@@ -932,7 +1097,7 @@ int Gis_polygon::interior_ring_n(uint32 num, String *result) const
   n_points= uint4korr(data);
   points_size= n_points * POINT_DATA_SIZE;
   data+= 4;
-  if (no_data(data, points_size) || result->reserve(1+4+4+ points_size))
+  if (no_data(data, points_size) || result->reserve(1 + 4 + 4 + points_size))
     return 1;
 
   result->q_append((char) wkb_ndr);
@@ -971,16 +1136,16 @@ int Gis_polygon::centroid_xy(double *x, double *y) const
       return 1;
     org_n_points= n_points= uint4korr(data);
     data+= 4;
-    if (no_data(data, (SIZEOF_STORED_DOUBLE*2) * n_points))
+    if (no_data(data, POINT_DATA_SIZE * n_points))
       return 1;
     get_point(&prev_x, &prev_y, data);
-    data+= (SIZEOF_STORED_DOUBLE*2);
+    data+= POINT_DATA_SIZE;
 
     while (--n_points)				// One point is already read
     {
       double tmp_x, tmp_y;
       get_point(&tmp_x, &tmp_y, data);
-      data+= (SIZEOF_STORED_DOUBLE*2);
+      data+= POINT_DATA_SIZE;
       cur_area+= (prev_x + tmp_x) * (prev_y - tmp_y);
       cur_cx+= tmp_x;
       cur_cy+= tmp_y;
@@ -1020,6 +1185,74 @@ int Gis_polygon::centroid(String *result) const
   return create_point(result, x, y);
 }
 
+
+int Gis_polygon::store_shapes(Gcalc_shape_transporter *trn) const
+{
+  uint32 n_linear_rings;
+  const char *data= m_data;
+  double first_x, first_y;
+  double prev_x, prev_y;
+  int was_equal_first= 0;
+
+  if (trn->start_poly())
+    return 1;
+
+  if (no_data(data, 4))
+    return 1;
+  n_linear_rings= uint4korr(data);
+  data+= 4;
+
+  while (n_linear_rings--)
+  {
+    uint32 n_points;
+
+    if (no_data(data, 4))
+      return 1;
+    n_points= uint4korr(data);
+    data+= 4;
+    if (!n_points || no_data(data, POINT_DATA_SIZE * n_points))
+      return 1;
+
+    trn->start_ring();
+    get_point(&first_x, &first_y, data);
+    data+= POINT_DATA_SIZE;
+    n_points--;
+    prev_x= first_x;
+    prev_y= first_y;
+    if (trn->add_point(first_x, first_y))
+      return 1;
+    while (--n_points)
+    {
+      double x, y;
+      get_point(&x, &y, data);
+      data+= POINT_DATA_SIZE;
+      if (x == prev_x && y == prev_y)
+        continue;
+      prev_x= x;
+      prev_y= y;
+      if (was_equal_first)
+      {
+        if (trn->add_point(first_x, first_y))
+          return 1;
+        was_equal_first= 0;
+      }
+      if (x == first_x && y == first_y)
+      {
+        was_equal_first= 1;
+        continue;
+      }
+      if (trn->add_point(x, y))
+        return 1;
+    }
+    data+= POINT_DATA_SIZE;
+    trn->complete_ring();
+  }
+
+  trn->complete_poly();
+  return 0;
+}
+
+
 const Geometry::Class_info *Gis_polygon::get_class_info() const
 {
   return &polygon_class;
@@ -1048,7 +1281,7 @@ bool Gis_multi_point::init_from_wkt(Gis_read_stream *trs, String *wkb)
 
   for (;;)
   {
-    if (wkb->reserve(1+4, 512))
+    if (wkb->reserve(1 + 4, 512))
       return 1;
     wkb->q_append((char) wkb_ndr);
     wkb->q_append((uint32) wkb_point);
@@ -1063,6 +1296,32 @@ bool Gis_multi_point::init_from_wkt(Gis_read_stream *trs, String *wkb)
 }
 
 
+uint Gis_multi_point::init_from_opresult(String *bin,
+                                         const char *opres, uint res_len)
+{
+  uint bin_size, n_points;
+  Gis_point p;
+  const char *opres_end;
+
+  n_points= res_len/(4+8*2);
+  bin_size= n_points * (WKB_HEADER_SIZE + POINT_DATA_SIZE) + 4;
+ 
+  if (bin->reserve(bin_size, 512))
+    return 0;
+    
+  bin->q_append(n_points);
+  opres_end= opres + res_len;
+  for (; opres < opres_end; opres+= (4 + 8*2))
+  {
+    bin->q_append((char)wkb_ndr);
+    bin->q_append((uint32)wkb_point);
+    if (!p.init_from_wkb(opres + 4, POINT_DATA_SIZE, wkb_ndr, bin))
+      return 0;
+  }
+  return res_len;
+}
+
+
 uint Gis_multi_point::init_from_wkb(const char *wkb, uint len, wkbByteOrder bo,
                                     String *res)
 {
@@ -1101,7 +1360,7 @@ bool Gis_multi_point::get_data_as_wkt(String *txt, const char **end) const
 
   n_points= uint4korr(m_data);
   if (no_data(m_data+4,
-	      n_points * (SIZEOF_STORED_DOUBLE * 2 + WKB_HEADER_SIZE)) ||
+	      n_points * (POINT_DATA_SIZE + WKB_HEADER_SIZE)) ||
       txt->reserve(((MAX_DIGITS_IN_DOUBLE + 1) * 2 + 1) * n_points))
     return 1;
   *end= append_points(txt, n_points, m_data+4, WKB_HEADER_SIZE);
@@ -1142,6 +1401,35 @@ int Gis_multi_point::geometry_n(uint32 num, String *result) const
   return 0;
 }
 
+
+int Gis_multi_point::store_shapes(Gcalc_shape_transporter *trn) const
+{
+  uint32 n_points;
+  Gis_point pt;
+  const char *data= m_data;
+
+  if (no_data(data, 4))
+    return 1;
+  n_points= uint4korr(data);
+  data+= 4;
+
+  if (trn->start_collection(n_points))
+    return 1;
+
+  while (n_points--)
+  {
+    if (no_data(data, WKB_HEADER_SIZE))
+      return 1;
+    data+= WKB_HEADER_SIZE;
+    pt.set_data_ptr(data, (uint32) (m_data_end - data));
+    if (pt.store_shapes(trn))
+      return 1;
+    data+= pt.get_data_size();
+  }
+  return 0;
+}
+
+
 const Geometry::Class_info *Gis_multi_point::get_class_info() const
 {
   return &multipoint_class;
@@ -1184,10 +1472,9 @@ bool Gis_multi_line_string::init_from_wkt(Gis_read_stream *trs, String *wkb)
   {
     Gis_line_string ls;
 
-    if (wkb->reserve(1+4, 512))
+    if (wkb->reserve(1 + 4, 512))
       return 1;
-    wkb->q_append((char) wkb_ndr);
-    wkb->q_append((uint32) wkb_linestring);
+    wkb->q_append((char) wkb_ndr); wkb->q_append((uint32) wkb_linestring);
 
     if (trs->check_next_symbol('(') ||
 	ls.init_from_wkt(trs, wkb) ||
@@ -1202,15 +1489,48 @@ bool Gis_multi_line_string::init_from_wkt(Gis_read_stream *trs, String *wkb)
 }
 
 
+uint Gis_multi_line_string::init_from_opresult(String *bin,
+                                               const char *opres, uint res_len)
+{
+  const char *opres_orig= opres;
+  int ns_pos= bin->length();
+  uint n_linestring= 0;
+
+  if (bin->reserve(4, 512))
+    return 0;
+  bin->q_append(n_linestring);
+  
+  while (res_len)
+  {
+    Gis_line_string ls;
+    int ls_len;
+
+    if (bin->reserve(WKB_HEADER_SIZE, 512))
+      return 0;
+
+    bin->q_append((char) wkb_ndr);
+    bin->q_append((uint32) wkb_linestring);
+
+    if (!(ls_len= ls.init_from_opresult(bin, opres, res_len)))
+      return 0;
+    opres+= ls_len;
+    res_len-= ls_len;
+    n_linestring++;
+  }
+  bin->write_at_position(ns_pos, n_linestring);
+  return (uint) (opres - opres_orig);
+}
+
+
 uint Gis_multi_line_string::init_from_wkb(const char *wkb, uint len,
                                           wkbByteOrder bo, String *res)
 {
   uint32 n_line_strings;
   const char *wkb_orig= wkb;
 
-  if (len < 4)
+  if (len < 4 ||
+      (n_line_strings= wkb_get_uint(wkb, bo))< 1)
     return 0;
-  n_line_strings= wkb_get_uint(wkb, bo);
 
   if (res->reserve(4, 512))
     return 0;
@@ -1258,7 +1578,7 @@ bool Gis_multi_line_string::get_data_as_wkt(String *txt,
       return 1;
     n_points= uint4korr(data + WKB_HEADER_SIZE);
     data+= WKB_HEADER_SIZE + 4;
-    if (no_data(data, n_points * (SIZEOF_STORED_DOUBLE*2)) ||
+    if (no_data(data, n_points * POINT_DATA_SIZE) ||
 	txt->reserve(2 + ((MAX_DIGITS_IN_DOUBLE + 1) * 2 + 1) * n_points))
       return 1;
     txt->qs_append('(');
@@ -1329,10 +1649,11 @@ int Gis_multi_line_string::geometry_n(uint32 num, String *result) const
 }
 
 
-int Gis_multi_line_string::geom_length(double *len) const
+int Gis_multi_line_string::geom_length(double *len, const char **end) const
 {
   uint32 n_line_strings;
   const char *data= m_data;
+  const char *line_end;
 
   if (no_data(data, 4))
     return 1;
@@ -1346,7 +1667,7 @@ int Gis_multi_line_string::geom_length(double *len) const
     Gis_line_string ls;
     data+= WKB_HEADER_SIZE;
     ls.set_data_ptr(data, (uint32) (m_data_end - data));
-    if (ls.geom_length(&ls_len))
+    if (ls.geom_length(&ls_len, &line_end))
       return 1;
     *len+= ls_len;
     /*
@@ -1355,6 +1676,7 @@ int Gis_multi_line_string::geom_length(double *len) const
     */
     data+= ls.get_data_size();
   }
+  *end= data;
   return 0;
 }
 
@@ -1388,6 +1710,35 @@ int Gis_multi_line_string::is_closed(int *closed) const
   return 0;
 }
 
+
+int Gis_multi_line_string::store_shapes(Gcalc_shape_transporter *trn) const
+{
+  uint32 n_lines;
+  Gis_line_string ls;
+  const char *data= m_data;
+
+  if (no_data(data, 4))
+    return 1;
+  n_lines= uint4korr(data);
+  data+= 4;
+
+  if (trn->start_collection(n_lines))
+    return 1;
+
+  while (n_lines--)
+  {
+    if (no_data(data, WKB_HEADER_SIZE))
+      return 1;
+    data+= WKB_HEADER_SIZE;
+    ls.set_data_ptr(data, (uint32) (m_data_end - data));
+    if (ls.store_shapes(trn))
+      return 1;
+    data+= ls.get_data_size();
+  }
+  return 0;
+}
+
+
 const Geometry::Class_info *Gis_multi_line_string::get_class_info() const
 {
   return &multilinestring_class;
@@ -1438,7 +1789,7 @@ bool Gis_multi_polygon::init_from_wkt(Gis_read_stream *trs, String *wkb)
 
   for (;;)  
   {
-    if (wkb->reserve(1+4, 512))
+    if (wkb->reserve(1 + 4, 512))
       return 1;
     wkb->q_append((char) wkb_ndr);
     wkb->q_append((uint32) wkb_polygon);
@@ -1493,6 +1844,36 @@ uint Gis_multi_polygon::init_from_wkb(const char *wkb, uint len,
 }
 
 
+uint Gis_multi_polygon::init_from_opresult(String *bin,
+                                           const char *opres, uint res_len)
+{
+  Gis_polygon p;
+  const char *opres_orig= opres;
+  uint p_len;
+  uint32 n_poly= 0;
+  uint32 np_pos= bin->length();
+
+  if (bin->reserve(4, 512))
+    return 0;
+    
+  bin->q_append(n_poly);
+  while (res_len)
+  {
+    if (bin->reserve(1 + 4, 512))
+      return 0;
+    bin->q_append((char)wkb_ndr);
+    bin->q_append((uint32)wkb_polygon);
+    if (!(p_len= p.init_from_opresult(bin, opres, res_len)))
+      return 0;
+    opres+= p_len;
+    res_len-= p_len;
+    n_poly++;
+  }
+  bin->write_at_position(np_pos, n_poly);
+  return opres - opres_orig;
+}
+
+
 bool Gis_multi_polygon::get_data_as_wkt(String *txt, const char **end) const
 {
   uint32 n_polygons;
@@ -1519,7 +1900,7 @@ bool Gis_multi_polygon::get_data_as_wkt(String *txt, const char **end) const
         return 1;
       uint32 n_points= uint4korr(data);
       data+= 4;
-      if (no_data(data, (SIZEOF_STORED_DOUBLE * 2) * n_points) ||
+      if (no_data(data, POINT_DATA_SIZE * n_points) ||
 	  txt->reserve(2 + ((MAX_DIGITS_IN_DOUBLE + 1) * 2 + 1) * n_points,
 		       512))
 	return 1;
@@ -1680,6 +2061,35 @@ int Gis_multi_polygon::centroid(String *result) const
   return create_point(result, res_cx, res_cy);
 }
 
+
+int Gis_multi_polygon::store_shapes(Gcalc_shape_transporter *trn) const
+{
+  uint32 n_polygons;
+  Gis_polygon p;
+  const char *data= m_data;
+
+  if (no_data(data, 4))
+    return 1;
+  n_polygons= uint4korr(data);
+  data+= 4;
+
+  if (trn->start_collection(n_polygons))
+    return 1;
+
+  while (n_polygons--)
+  {
+    if (no_data(data, WKB_HEADER_SIZE))
+      return 1;
+    data+= WKB_HEADER_SIZE;
+    p.set_data_ptr(data, (uint32) (m_data_end - data));
+    if (p.store_shapes(trn))
+      return 1;
+    data+= p.get_data_size();
+  }
+  return 0;
+}
+
+
 const Geometry::Class_info *Gis_multi_polygon::get_class_info() const
 {
   return &multipolygon_class;
@@ -1726,24 +2136,41 @@ bool Gis_geometry_collection::init_from_wkt(Gis_read_stream *trs, String *wkb)
   uint32 no_pos= wkb->length();
   Geometry_buffer buffer;
   Geometry *g;
+  char next_sym;
 
   if (wkb->reserve(4, 512))
     return 1;
   wkb->length(wkb->length()+4);			// Reserve space for points
 
-  for (;;)
+  if (!(next_sym= trs->next_symbol()))
+    return 1;
+
+  if (next_sym != ')')
   {
-    if (!(g= create_from_wkt(&buffer, trs, wkb)))
+    LEX_STRING next_word;
+    if (trs->lookup_next_word(&next_word))
       return 1;
 
-    if (g->get_class_info()->m_type_id == wkb_geometrycollection)
+    if (next_word.length != 5 ||
+	(my_strnncoll(&my_charset_latin1,
+		      (const uchar*) "empty", 5,
+		      (const uchar*) next_word.str, 5) != 0))
     {
-      trs->set_error_msg("Unexpected GEOMETRYCOLLECTION");
-      return 1;
+      for (;;)
+      {
+        if (!(g= create_from_wkt(&buffer, trs, wkb)))
+          return 1;
+
+        if (g->get_class_info()->m_type_id == wkb_geometrycollection)
+        {
+          trs->set_error_msg("Unexpected GEOMETRYCOLLECTION");
+          return 1;
+        }
+        n_objects++;
+        if (trs->skip_char(','))			// Didn't find ','
+          break;
+      }
     }
-    n_objects++;
-    if (trs->skip_char(','))			// Didn't find ','
-      break;
   }
 
   wkb->write_at_position(no_pos, n_objects);
@@ -1751,6 +2178,50 @@ bool Gis_geometry_collection::init_from_wkt(Gis_read_stream *trs, String *wkb)
 }
 
 
+uint Gis_geometry_collection::init_from_opresult(String *bin,
+                                                 const char *opres,
+                                                 uint res_len)
+{
+  const char *opres_orig= opres;
+  Geometry_buffer buffer;
+  Geometry *geom;
+  int g_len;
+  uint32 wkb_type;
+  int no_pos= bin->length();
+  uint32 n_objects= 0;
+
+  if (bin->reserve(4, 512))
+    return 0;
+  bin->q_append(n_objects);
+  
+  while (res_len)
+  {
+    switch ((Gcalc_function::shape_type) uint4korr(opres))
+    {
+      case Gcalc_function::shape_point:   wkb_type= wkb_point; break;
+      case Gcalc_function::shape_line:    wkb_type= wkb_linestring; break;
+      case Gcalc_function::shape_polygon: wkb_type= wkb_polygon; break;
+      default: wkb_type= 0; DBUG_ASSERT(FALSE);
+    };
+
+    if (bin->reserve(WKB_HEADER_SIZE, 512))
+      return 0;
+
+    bin->q_append((char) wkb_ndr);
+    bin->q_append(wkb_type);
+
+    if (!(geom= create_by_typeid(&buffer, wkb_type)) ||
+        !(g_len= geom->init_from_opresult(bin, opres, res_len)))
+      return 0;
+    opres+= g_len;
+    res_len-= g_len;
+    n_objects++;
+  }
+  bin->write_at_position(no_pos, n_objects);
+  return (uint) (opres - opres_orig);
+}
+
+
 uint Gis_geometry_collection::init_from_wkb(const char *wkb, uint len,
                                             wkbByteOrder bo, String *res)
 {
@@ -1806,6 +2277,13 @@ bool Gis_geometry_collection::get_data_as_wkt(String *txt,
   n_objects= uint4korr(data);
   data+= 4;
 
+  if (n_objects == 0)
+  {
+    txt->append(STRING_WITH_LEN(" EMPTY"), 512);
+    goto exit;
+  }
+
+  txt->qs_append('(');
   while (n_objects--)
   {
     uint32 wkb_type;
@@ -1820,10 +2298,11 @@ bool Gis_geometry_collection::get_data_as_wkt(String *txt,
     geom->set_data_ptr(data, (uint) (m_data_end - data));
     if (geom->as_wkt(txt, &data))
       return 1;
-    if (txt->append(STRING_WITH_LEN(","), 512))
+    if (n_objects && txt->append(STRING_WITH_LEN(","), 512))
       return 1;
   }
-  txt->length(txt->length() - 1);
+  txt->qs_append(')');
+exit:
   *end= data;
   return 0;
 }
@@ -1840,6 +2319,8 @@ bool Gis_geometry_collection::get_mbr(MBR *mbr, const char **end) const
     return 1;
   n_objects= uint4korr(data);
   data+= 4;
+  if (n_objects == 0)
+    return 1;
 
   while (n_objects--)
   {
@@ -1861,6 +2342,82 @@ bool Gis_geometry_collection::get_mbr(MBR *mbr, const char **end) const
 }
 
 
+int Gis_geometry_collection::area(double *ar,  const char **end) const
+{
+  uint32 n_objects;
+  const char *data= m_data;
+  Geometry_buffer buffer;
+  Geometry *geom;
+  double result;
+
+  if (no_data(data, 4))
+    return 1;
+  n_objects= uint4korr(data);
+  data+= 4;
+  if (n_objects == 0)
+    return 1;
+
+  result= 0.0;
+  while (n_objects--)
+  {
+    uint32 wkb_type;
+
+    if (no_data(data, WKB_HEADER_SIZE))
+      return 1;
+    wkb_type= uint4korr(data + 1);
+    data+= WKB_HEADER_SIZE;
+
+    if (!(geom= create_by_typeid(&buffer, wkb_type)))
+      return 1;
+    geom->set_data_ptr(data, (uint32) (m_data_end - data));
+    if (geom->area(ar, &data))
+      return 1;
+    result+= *ar;
+  }
+  *end= data;
+  *ar= result;
+  return 0;
+}
+
+
+int Gis_geometry_collection::geom_length(double *len, const char **end) const
+{
+  uint32 n_objects;
+  const char *data= m_data;
+  Geometry_buffer buffer;
+  Geometry *geom;
+  double result;
+
+  if (no_data(data, 4))
+    return 1;
+  n_objects= uint4korr(data);
+  data+= 4;
+  if (n_objects == 0)
+    return 1;
+
+  result= 0.0;
+  while (n_objects--)
+  {
+    uint32 wkb_type;
+
+    if (no_data(data, WKB_HEADER_SIZE))
+      return 1;
+    wkb_type= uint4korr(data + 1);
+    data+= WKB_HEADER_SIZE;
+
+    if (!(geom= create_by_typeid(&buffer, wkb_type)))
+      return 1;
+    geom->set_data_ptr(data, (uint32) (m_data_end - data));
+    if (geom->geom_length(len, &data))
+      return 1;
+    result+= *len;
+  }
+  *end= data;
+  *len= result;
+  return 0;
+}
+
+
 int Gis_geometry_collection::num_geometries(uint32 *num) const
 {
   if (no_data(m_data, 4))
@@ -1900,7 +2457,7 @@ int Gis_geometry_collection::geometry_n(uint32 num, String *result) const
   } while (--num);
 
   /* Copy found object to result */
-  if (result->reserve(1+4+length))
+  if (result->reserve(1 + 4 + length))
     return 1;
   result->q_append((char) wkb_ndr);
   result->q_append((uint32) wkb_type);
@@ -1961,6 +2518,48 @@ bool Gis_geometry_collection::dimension(uint32 *res_dim, const char **end) const
   return 0;
 }
 
+
+int Gis_geometry_collection::store_shapes(Gcalc_shape_transporter *trn) const
+{
+  uint32 n_objects;
+  const char *data= m_data;
+  Geometry_buffer buffer;
+  Geometry *geom;
+
+  if (no_data(data, 4))
+    return 1;
+  n_objects= uint4korr(data);
+  data+= 4;
+
+  if (!n_objects)
+  {
+    trn->empty_shape();
+    return 0;
+  }
+
+  if (trn->start_collection(n_objects))
+    return 1;
+
+  while (n_objects--)
+  {
+    uint32 wkb_type;
+
+    if (no_data(data, WKB_HEADER_SIZE))
+      return 1;
+    wkb_type= uint4korr(data + 1);
+    data+= WKB_HEADER_SIZE;
+    if (!(geom= create_by_typeid(&buffer, wkb_type)))
+      return 1;
+    geom->set_data_ptr(data, (uint32) (m_data_end - data));
+    if (geom->store_shapes(trn))
+      return 1;
+
+    data+= geom->get_data_size();
+  }
+  return 0;
+}
+
+
 const Geometry::Class_info *Gis_geometry_collection::get_class_info() const
 {
   return &geometrycollection_class;
diff --git a/sql/spatial.h b/sql/spatial.h
index 662c377dc1d..cd29b14ff38 100644
--- a/sql/spatial.h
+++ b/sql/spatial.h
@@ -18,8 +18,13 @@
 #ifndef _spatial_h
 #define _spatial_h
 
+#include "sql_string.h"                         /* String, LEX_STRING */
+#include <my_compiler.h>
+
 #ifdef HAVE_SPATIAL
 
+#include "gcalc_tools.h"
+
 const uint SRID_SIZE= 4;
 const uint SIZEOF_STORED_DOUBLE= 8;
 const uint POINT_DATA_SIZE= SIZEOF_STORED_DOUBLE*2; 
@@ -95,6 +100,13 @@ struct MBR
     if (mbr->ymax > ymax)
       ymax= mbr->ymax;
   }
+  void buffer(double d)
+  {
+    xmin-= d;
+    ymin-= d;
+    xmax+= d;
+    ymax+= d;
+  }
 
   int equals(const MBR *mbr)
   {
@@ -244,16 +256,19 @@ public:
   virtual const Class_info *get_class_info() const=0;
   virtual uint32 get_data_size() const=0;
   virtual bool init_from_wkt(Gis_read_stream *trs, String *wkb)=0;
-
   /* returns the length of the wkb that was read */
   virtual uint init_from_wkb(const char *wkb, uint len, wkbByteOrder bo,
                              String *res)=0;
+  virtual uint init_from_opresult(String *bin,
+                                  const char *opres, uint res_len)
+  { return init_from_wkb(opres + 4, UINT_MAX32, wkb_ndr, bin) + 4; }
+
   virtual bool get_data_as_wkt(String *txt, const char **end) const=0;
   virtual bool get_mbr(MBR *mbr, const char **end) const=0;
   virtual bool dimension(uint32 *dim, const char **end) const=0;
   virtual int get_x(double *x) const { return -1; }
   virtual int get_y(double *y) const { return -1; }
-  virtual int geom_length(double *len) const  { return -1; }
+  virtual int geom_length(double *len, const char **end) const  { return -1; }
   virtual int area(double *ar, const char **end) const { return -1;}
   virtual int is_closed(int *closed) const { return -1; }
   virtual int num_interior_ring(uint32 *n_int_rings) const { return -1; }
@@ -266,28 +281,21 @@ public:
   virtual int point_n(uint32 num, String *result) const { return -1; }
   virtual int interior_ring_n(uint32 num, String *result) const { return -1; }
   virtual int geometry_n(uint32 num, String *result) const { return -1; }
+  virtual int store_shapes(Gcalc_shape_transporter *trn) const=0;
 
 public:
   static Geometry *create_by_typeid(Geometry_buffer *buffer, int type_id);
+
   static Geometry *construct(Geometry_buffer *buffer,
                              const char *data, uint32 data_len);
   static Geometry *create_from_wkt(Geometry_buffer *buffer,
 				   Gis_read_stream *trs, String *wkt,
 				   bool init_stream=1);
-  static Geometry *create_from_wkb(Geometry_buffer *buffer, const char *wkb, 
-                                   uint32 len, String *res);
-  int as_wkt(String *wkt, const char **end)
-  {
-    uint32 len= (uint) get_class_info()->m_name.length;
-    if (wkt->reserve(len + 2, 512))
-      return 1;
-    wkt->qs_append(get_class_info()->m_name.str, len);
-    wkt->qs_append('(');
-    if (get_data_as_wkt(wkt, end))
-      return 1;
-    wkt->qs_append(')');
-    return 0;
-  }
+  static Geometry *create_from_wkb(Geometry_buffer *buffer,
+                                   const char *wkb, uint32 len, String *res);
+  static int create_from_opresult(Geometry_buffer *g_buf,
+                                  String *res, Gcalc_result_receiver &rr);
+  int as_wkt(String *wkt, const char **end);
 
   inline void set_data_ptr(const char *data, uint32 data_len)
   {
@@ -365,12 +373,15 @@ public:
     return 0;
   }
 
+  int geom_length(double *len, const char **end) const;
+  int area(double *ar, const char **end) const;
   bool dimension(uint32 *dim, const char **end) const
   {
     *dim= 0;
     *end= 0;					/* No default end */
     return 0;
   }
+  int store_shapes(Gcalc_shape_transporter *trn) const;
   const Class_info *get_class_info() const;
 };
 
@@ -387,7 +398,8 @@ public:
   uint init_from_wkb(const char *wkb, uint len, wkbByteOrder bo, String *res);
   bool get_data_as_wkt(String *txt, const char **end) const;
   bool get_mbr(MBR *mbr, const char **end) const;
-  int geom_length(double *len) const;
+  int geom_length(double *len, const char **end) const;
+  int area(double *ar, const char **end) const;
   int is_closed(int *closed) const;
   int num_points(uint32 *n_points) const;
   int start_point(String *point) const;
@@ -399,6 +411,7 @@ public:
     *end= 0;					/* No default end */
     return 0;
   }
+  int store_shapes(Gcalc_shape_transporter *trn) const;
   const Class_info *get_class_info() const;
 };
 
@@ -413,6 +426,7 @@ public:
   uint32 get_data_size() const;
   bool init_from_wkt(Gis_read_stream *trs, String *wkb);
   uint init_from_wkb(const char *wkb, uint len, wkbByteOrder bo, String *res);
+  uint init_from_opresult(String *bin, const char *opres, uint res_len);
   bool get_data_as_wkt(String *txt, const char **end) const;
   bool get_mbr(MBR *mbr, const char **end) const;
   int area(double *ar, const char **end) const;
@@ -427,6 +441,7 @@ public:
     *end= 0;					/* No default end */
     return 0;
   }
+  int store_shapes(Gcalc_shape_transporter *trn) const;
   const Class_info *get_class_info() const;
 };
 
@@ -441,6 +456,7 @@ public:
   uint32 get_data_size() const;
   bool init_from_wkt(Gis_read_stream *trs, String *wkb);
   uint init_from_wkb(const char *wkb, uint len, wkbByteOrder bo, String *res);
+  uint init_from_opresult(String *bin, const char *opres, uint res_len);
   bool get_data_as_wkt(String *txt, const char **end) const;
   bool get_mbr(MBR *mbr, const char **end) const;
   int num_geometries(uint32 *num) const;
@@ -451,6 +467,7 @@ public:
     *end= 0;					/* No default end */
     return 0;
   }
+  int store_shapes(Gcalc_shape_transporter *trn) const;
   const Class_info *get_class_info() const;
 };
 
@@ -465,11 +482,12 @@ public:
   uint32 get_data_size() const;
   bool init_from_wkt(Gis_read_stream *trs, String *wkb);
   uint init_from_wkb(const char *wkb, uint len, wkbByteOrder bo, String *res);
+  uint init_from_opresult(String *bin, const char *opres, uint res_len);
   bool get_data_as_wkt(String *txt, const char **end) const;
   bool get_mbr(MBR *mbr, const char **end) const;
   int num_geometries(uint32 *num) const;
   int geometry_n(uint32 num, String *result) const;
-  int geom_length(double *len) const;
+  int geom_length(double *len, const char **end) const;
   int is_closed(int *closed) const;
   bool dimension(uint32 *dim, const char **end) const
   {
@@ -477,6 +495,7 @@ public:
     *end= 0;					/* No default end */
     return 0;
   }
+  int store_shapes(Gcalc_shape_transporter *trn) const;
   const Class_info *get_class_info() const;
 };
 
@@ -503,7 +522,9 @@ public:
     *end= 0;					/* No default end */
     return 0;
   }
+  int store_shapes(Gcalc_shape_transporter *trn) const;
   const Class_info *get_class_info() const;
+  uint init_from_opresult(String *bin, const char *opres, uint res_len);
 };
 
 
@@ -517,11 +538,15 @@ public:
   uint32 get_data_size() const;
   bool init_from_wkt(Gis_read_stream *trs, String *wkb);
   uint init_from_wkb(const char *wkb, uint len, wkbByteOrder bo, String *res);
+  uint init_from_opresult(String *bin, const char *opres, uint res_len);
   bool get_data_as_wkt(String *txt, const char **end) const;
   bool get_mbr(MBR *mbr, const char **end) const;
+  int area(double *ar, const char **end) const;
+  int geom_length(double *len, const char **end) const;
   int num_geometries(uint32 *num) const;
   int geometry_n(uint32 num, String *result) const;
   bool dimension(uint32 *dim, const char **end) const;
+  int store_shapes(Gcalc_shape_transporter *trn) const;
   const Class_info *get_class_info() const;
 };
 
diff --git a/sql/sql_acl.cc b/sql/sql_acl.cc
index 0920c18ad24..dc664b2779b 100644
--- a/sql/sql_acl.cc
+++ b/sql/sql_acl.cc
@@ -2088,7 +2088,7 @@ static int replace_user_table(THD *thd, TABLE *table, const LEX_USER &combo,
       table->field[next_field+2]->store((longlong) mqh.conn_per_hour, TRUE);
     if (table->s->fields >= 36 &&
         (mqh.specified_limits & USER_RESOURCES::USER_CONNECTIONS))
-      table->field[next_field+3]->store((longlong) mqh.user_conn, TRUE);
+      table->field[next_field+3]->store((longlong) mqh.user_conn, FALSE);
     mqh_used= mqh_used || mqh.questions || mqh.updates || mqh.conn_per_hour;
 
     next_field+=4;
@@ -3057,7 +3057,8 @@ int mysql_table_grant(THD *thd, TABLE_LIST *table_list,
       class LEX_COLUMN *column;
       List_iterator <LEX_COLUMN> column_iter(columns);
 
-      if (open_and_lock_tables(thd, table_list))
+      if (open_and_lock_tables(thd, table_list) ||
+          mysql_handle_derived(thd->lex, DT_PREPARE))
         DBUG_RETURN(TRUE);
 
       while ((column = column_iter++))
@@ -4576,7 +4577,8 @@ ulong get_column_grant(THD *thd, GRANT_INFO *grant,
 
 /* Help function for mysql_show_grants */
 
-static void add_user_option(String *grant, ulong value, const char *name)
+static void add_user_option(String *grant, long value, const char *name,
+                            my_bool is_signed)
 {
   if (value)
   {
@@ -4584,7 +4586,7 @@ static void add_user_option(String *grant, ulong value, const char *name)
     grant->append(' ');
     grant->append(name, strlen(name));
     grant->append(' ');
-    p=int10_to_str(value, buff, 10);
+    p=int10_to_str(value, buff, is_signed ? -10 : 10);
     grant->append(buff,p-buff);
   }
 }
@@ -4766,13 +4768,13 @@ bool mysql_show_grants(THD *thd,LEX_USER *lex_user)
       if (want_access & GRANT_ACL)
 	global.append(STRING_WITH_LEN(" GRANT OPTION"));
       add_user_option(&global, acl_user->user_resource.questions,
-		      "MAX_QUERIES_PER_HOUR");
+		      "MAX_QUERIES_PER_HOUR", 0);
       add_user_option(&global, acl_user->user_resource.updates,
-		      "MAX_UPDATES_PER_HOUR");
+		      "MAX_UPDATES_PER_HOUR", 0);
       add_user_option(&global, acl_user->user_resource.conn_per_hour,
-		      "MAX_CONNECTIONS_PER_HOUR");
+		      "MAX_CONNECTIONS_PER_HOUR", 0);
       add_user_option(&global, acl_user->user_resource.user_conn,
-		      "MAX_USER_CONNECTIONS");
+		      "MAX_USER_CONNECTIONS", 1);
     }
     protocol->prepare_for_resend();
     protocol->store(global.ptr(),global.length(),global.charset());
@@ -7475,7 +7477,6 @@ static ulong parse_client_handshake_packet(MPVIO_EXT *mpvio,
   THD *thd= mpvio->thd;
   NET *net= &thd->net;
   char *end;
-
   DBUG_ASSERT(mpvio->status == MPVIO_EXT::FAILURE);
 
   if (pkt_len < MIN_HANDSHAKE_SIZE)
@@ -8161,10 +8162,16 @@ bool acl_authenticate(THD *thd, uint connect_errors,
       DBUG_RETURN(1);
     }
 
-    /* Don't allow the user to connect if he has done too many queries */
-    if ((acl_user->user_resource.questions || acl_user->user_resource.updates ||
+    /*
+      Don't allow the user to connect if he has done too many queries.
+      As we are testing max_user_connections == 0 here, it means that we
+      can't let the user change max_user_connections from 0 in the server
+      without a restart as it would lead to wrong connect counting.
+    */
+    if ((acl_user->user_resource.questions ||
+         acl_user->user_resource.updates ||
          acl_user->user_resource.conn_per_hour ||
-         acl_user->user_resource.user_conn || max_user_connections) &&
+         acl_user->user_resource.user_conn || max_user_connections_checking) &&
         get_or_create_user_conn(thd,
           (opt_old_style_user_limits ? sctx->user : sctx->priv_user),
           (opt_old_style_user_limits ? sctx->host_or_ip : sctx->priv_host),
@@ -8177,7 +8184,7 @@ bool acl_authenticate(THD *thd, uint connect_errors,
   if (thd->user_connect &&
       (thd->user_connect->user_resources.conn_per_hour ||
        thd->user_connect->user_resources.user_conn ||
-       max_user_connections) &&
+       max_user_connections_checking) &&
       check_for_max_user_connections(thd, thd->user_connect))
   {
     /* Ensure we don't decrement thd->user_connections->connections twice */
diff --git a/sql/sql_array.h b/sql/sql_array.h
index e1b22921519..233b3f24263 100644
--- a/sql/sql_array.h
+++ b/sql/sql_array.h
@@ -66,3 +66,75 @@ public:
   }
 };
 
+
+/* 
+  Array of pointers to Elem that uses memory from MEM_ROOT
+
+  MEM_ROOT has no realloc() so this is supposed to be used for cases when
+  reallocations are rare.
+*/
+
+template <class Elem> class Array
+{
+  enum {alloc_increment = 16};
+  Elem **buffer;
+  uint n_elements, max_element;
+public:
+  Array(MEM_ROOT *mem_root, uint prealloc=16)
+  {
+    buffer= (Elem**)alloc_root(mem_root, prealloc * sizeof(Elem**));
+    max_element = buffer? prealloc : 0;
+    n_elements= 0;
+  }
+
+  Elem& at(int idx)
+  {
+    return *(((Elem*)buffer) + idx);
+  }
+
+  Elem **front()
+  {
+    return buffer;
+  }
+
+  Elem **back()
+  {
+    return buffer + n_elements;
+  }
+
+  bool append(MEM_ROOT *mem_root, Elem *el)
+  {
+    if (n_elements == max_element)
+    {
+      Elem **newbuf;
+      if (!(newbuf= (Elem**)alloc_root(mem_root, (n_elements + alloc_increment)*
+                                                  sizeof(Elem**))))
+      {
+        return FALSE;
+      }
+      memcpy(newbuf, buffer, n_elements*sizeof(Elem*));
+      buffer= newbuf;
+    }
+    buffer[n_elements++]= el;
+    return FALSE;
+  }
+
+  int elements()
+  {
+    return n_elements;
+  }
+
+  void clear()
+  {
+    n_elements= 0;
+  }
+
+  typedef int (*CMP_FUNC)(Elem * const *el1, Elem *const *el2);
+
+  void sort(CMP_FUNC cmp_func)
+  {
+    my_qsort(buffer, n_elements, sizeof(Elem*), (qsort_cmp)cmp_func);
+  }
+};
+
+
diff --git a/sql/sql_base.cc b/sql/sql_base.cc
index bd02bbca7ea..19fabd27d72 100644
--- a/sql/sql_base.cc
+++ b/sql/sql_base.cc
@@ -22,6 +22,7 @@
 #include "sp_head.h"
 #include "sp.h"
 #include "sql_trigger.h"
+#include "sql_handler.h"
 #include <m_ctype.h>
 #include <my_dir.h>
 #include <hash.h>
@@ -682,7 +683,7 @@ void close_handle_and_leave_table_as_lock(TABLE *table)
   */
   if (table->child_l || table->parent)
     detach_merge_children(table, FALSE);
-  table->file->close();
+  table->file->ha_close();
   table->db_stat= 0;                            // Mark file closed
   release_table_share(table->s, RELEASE_NORMAL);
   table->s= share;
@@ -1390,10 +1391,10 @@ bool close_thread_table(THD *thd, TABLE **table_ptr)
   bool found_old_table= 0;
   TABLE *table= *table_ptr;
   DBUG_ENTER("close_thread_table");
-  DBUG_ASSERT(table->key_read == 0);
-  DBUG_ASSERT(!table->file || table->file->inited == handler::NONE);
   DBUG_PRINT("tcache", ("table: '%s'.'%s' 0x%lx", table->s->db.str,
                         table->s->table_name.str, (long) table));
+  DBUG_ASSERT(table->key_read == 0);
+  DBUG_ASSERT(!table->file || table->file->inited == handler::NONE);
 
  if (table->file)
  {
@@ -1706,11 +1707,12 @@ TABLE_LIST* unique_table(THD *thd, TABLE_LIST *table, TABLE_LIST *table_list,
   t_name= table->table_name;
   t_alias= table->alias;
 
+retry:
   DBUG_PRINT("info", ("real table: %s.%s", d_name, t_name));
-  for (;;)
+  for (TABLE_LIST *tl= table_list;;)
   {
-    if (((! (res= find_table_in_global_list(table_list, d_name, t_name))) &&
-         (! (res= mysql_lock_have_duplicate(thd, table, table_list)))) ||
+    if (((! (res= find_table_in_global_list(tl, d_name, t_name))) &&
+         (! (res= mysql_lock_have_duplicate(thd, table, tl)))) ||
         ((!res->table || res->table != table->table) &&
          (!check_alias || !(lower_case_table_names ?
           my_strcasecmp(files_charset_info, t_alias, res->alias) :
@@ -1723,10 +1725,23 @@ TABLE_LIST* unique_table(THD *thd, TABLE_LIST *table, TABLE_LIST *table_list,
       processed in derived table or top select of multi-update/multi-delete
       (exclude_from_table_unique_test) or prelocking placeholder.
     */
-    table_list= res->next_global;
+    tl= res->next_global;
     DBUG_PRINT("info",
                ("found same copy of table or table which we should skip"));
   }
+  if (res && res->belong_to_derived)
+  {
+    /* Try to fix */
+    TABLE_LIST *derived=  res->belong_to_derived;
+    if (derived->is_merged_derived())
+    {
+      DBUG_PRINT("info",
+                 ("convert merged to materialization to resolve the conflict"));
+      derived->change_refs_to_fields();
+      derived->set_materialized_derived();
+    }
+    goto retry;
+  }
   DBUG_RETURN(res);
 }
 
@@ -2970,7 +2985,9 @@ TABLE *open_table(THD *thd, TABLE_LIST *table_list, MEM_ROOT *mem_root,
     }
 
     error= open_unireg_entry(thd, table, table_list, alias, key, key_length,
-                             mem_root, (flags & OPEN_VIEW_NO_PARSE));
+                             mem_root,
+                             (flags & (OPEN_VIEW_NO_PARSE |
+                                       MYSQL_LOCK_IGNORE_FLUSH)));
     if (error > 0)
     {
       my_free((uchar*)table, MYF(0));
@@ -3039,6 +3056,7 @@ TABLE *open_table(THD *thd, TABLE_LIST *table_list, MEM_ROOT *mem_root,
   */
   DBUG_ASSERT(table->file->pushed_cond == NULL);
   table->reginfo.impossible_range= 0;
+  table->created= TRUE;
   /* Catch wrong handling of the auto_increment_field_not_null. */
   DBUG_ASSERT(!table->auto_increment_field_not_null);
   table->auto_increment_field_not_null= FALSE;
@@ -3047,13 +3065,19 @@ TABLE *open_table(THD *thd, TABLE_LIST *table_list, MEM_ROOT *mem_root,
   table->pos_in_table_list= table_list;
   table_list->updatable= 1; // It is not derived table nor non-updatable VIEW
   table->clear_column_bitmaps();
-#if !defined(DBUG_OFF) && !defined(HAVE_valgrind)
   /*
     Fill record with random values to find bugs where we access fields
     without first reading them.
   */
-  bfill(table->record[0], table->s->reclength, 254);
-#endif
+  TRASH(table->record[0], table->s->reclength);
+  /*
+    Initialize the null marker bits, to ensure that if we are doing a read
+    of only selected columns (like in keyread), all null markers are
+    initialized.
+  */
+  bfill(table->record[0], table->s->null_bytes, 255); 
+  bfill(table->record[1], table->s->null_bytes, 255); 
+
   DBUG_ASSERT(table->key_read == 0);
   DBUG_RETURN(table);
 }
@@ -3804,7 +3828,7 @@ TABLE *drop_locked_tables(THD *thd,const char *db, const char *table_name)
         if (table->db_stat)
         {
           table->db_stat= 0;
-          table->file->close();
+          table->file->ha_close();
         }
       }
       else
@@ -4052,8 +4076,11 @@ retry:
                                       HA_GET_INDEX | HA_TRY_READ_ONLY),
                               READ_KEYINFO | COMPUTE_TYPES | EXTRA_RECORD |
                               (flags & OPEN_VIEW_NO_PARSE),
-                              thd->open_options, entry, table_list,
-                              mem_root);
+                              thd->open_options |
+                              (thd->version == 0 &&
+                               (flags & MYSQL_LOCK_IGNORE_FLUSH) ?
+                               HA_OPEN_FOR_STATUS : 0),
+                              entry, table_list, mem_root);
     if (error)
       goto err;
     /* TODO: Don't free this */
@@ -4091,7 +4118,11 @@ retry:
                                                HA_TRY_READ_ONLY),
                                        (READ_KEYINFO | COMPUTE_TYPES |
                                         EXTRA_RECORD),
-                                       thd->open_options, entry, FALSE)))
+                                       thd->open_options |
+                                       (thd->version == 0 &&
+                                        (flags & MYSQL_LOCK_IGNORE_FLUSH) ?
+                                        HA_OPEN_FOR_STATUS : 0),
+                                       entry, FALSE)))
   {
     if (error == 7)                             // Table def changed
     {
@@ -5219,9 +5250,10 @@ int open_and_lock_tables_derived(THD *thd, TABLE_LIST *tables, bool derived)
     close_tables_for_reopen(thd, &tables);
   }
   if (derived &&
-      (mysql_handle_derived(thd->lex, &mysql_derived_prepare) ||
-       (thd->fill_derived_tables() &&
-        mysql_handle_derived(thd->lex, &mysql_derived_filling))))
+      (mysql_handle_derived(thd->lex, DT_INIT)))
+    DBUG_RETURN(TRUE); /* purecov: inspected */
+  if (thd->prepare_derived_at_open && derived &&
+      (mysql_handle_derived(thd->lex, DT_PREPARE)))
     DBUG_RETURN(TRUE); /* purecov: inspected */
   DBUG_RETURN(0);
 }
@@ -5237,6 +5269,7 @@ int open_and_lock_tables_derived(THD *thd, TABLE_LIST *tables, bool derived)
     flags       - bitmap of flags to modify how the tables will be open:
                   MYSQL_LOCK_IGNORE_FLUSH - open table even if someone has
                   done a flush on it.
+    dt_phases   - set of flags to pass to the mysql_handle_derived
 
   RETURN
     FALSE - ok
@@ -5247,13 +5280,14 @@ int open_and_lock_tables_derived(THD *thd, TABLE_LIST *tables, bool derived)
     data from the tables.
 */
 
-bool open_normal_and_derived_tables(THD *thd, TABLE_LIST *tables, uint flags)
+bool open_normal_and_derived_tables(THD *thd, TABLE_LIST *tables, uint flags,
+                                    uint dt_phases)
 {
   uint counter;
   DBUG_ENTER("open_normal_and_derived_tables");
   DBUG_ASSERT(!thd->fill_derived_tables());
   if (open_tables(thd, &tables, &counter, flags) ||
-      mysql_handle_derived(thd->lex, &mysql_derived_prepare))
+      mysql_handle_derived(thd->lex, dt_phases))
     DBUG_RETURN(TRUE); /* purecov: inspected */
   DBUG_RETURN(0);
 }
@@ -5925,9 +5959,7 @@ find_field_in_view(THD *thd, TABLE_LIST *table_list,
   Field_iterator_view field_it;
   field_it.set(table_list);
   Query_arena *arena= 0, backup;  
-  
-  DBUG_ASSERT(table_list->schema_table_reformed ||
-              (ref != 0 && table_list->view != 0));
+
   for (; !field_it.end_of_fields(); field_it.next())
   {
     if (!my_strcasecmp(system_charset_info, field_it.name(), name))
@@ -5946,6 +5978,8 @@ find_field_in_view(THD *thd, TABLE_LIST *table_list,
       
       if (!item)
         DBUG_RETURN(0);
+      if (!ref)
+        DBUG_RETURN((Field*) view_ref_found);
       /*
        *ref != NULL means that *ref contains the item that we need to
        replace. If the item was aliased by the user, set the alias to
@@ -5954,10 +5988,15 @@ find_field_in_view(THD *thd, TABLE_LIST *table_list,
       */
       if (*ref && !(*ref)->is_autogenerated_name)
       {
+        if (register_tree_change &&
+            thd->stmt_arena->is_stmt_prepare_or_first_stmt_execute())
+          arena= thd->activate_stmt_arena_if_needed(&backup);
         item->set_name((*ref)->name, (*ref)->name_length,
                        system_charset_info);
         item->real_item()->set_name((*ref)->name, (*ref)->name_length,
                        system_charset_info);
+        if (arena)
+          thd->restore_active_arena(arena, &backup);
       }
       if (register_tree_change)
         thd->change_item_tree(ref, item);
@@ -6350,6 +6389,8 @@ find_field_in_table_ref(THD *thd, TABLE_LIST *table_list,
         Field *field_to_set= NULL;
         if (fld == view_ref_found)
         {
+          if (!ref)
+            DBUG_RETURN(fld);
           Item *it= (*ref)->real_item();
           if (it->type() == Item::FIELD_ITEM)
             field_to_set= ((Item_field*)it)->field;
@@ -6357,6 +6398,8 @@ find_field_in_table_ref(THD *thd, TABLE_LIST *table_list,
           {
             if (thd->mark_used_columns == MARK_COLUMNS_READ)
               it->walk(&Item::register_field_in_read_map, 1, (uchar *) 0);
+            else
+              it->walk(&Item::register_field_in_write_map, 1, (uchar *) 0);
           }
         }
         else
@@ -6496,8 +6539,11 @@ find_field_in_tables(THD *thd, Item_ident *item,
       find_field_in_table even in the case of information schema tables
       when table_ref->field_translation != NULL.
       */
-    if (table_ref->table && !table_ref->view)
+    if (table_ref->table && !table_ref->view &&
+        (!table_ref->is_merged_derived() ||
+         (!table_ref->is_multitable() && table_ref->merged_for_insert)))
     {
+
       found= find_field_in_table(thd, table_ref->table, name, length,
                                  TRUE, &(item->cached_field_index));
 #ifndef NO_EMBEDDED_ACCESS_CHECKS
@@ -6522,21 +6568,40 @@ find_field_in_tables(THD *thd, Item_ident *item,
         Only views fields should be marked as dependent, not an underlying
         fields.
       */
-      if (!table_ref->belong_to_view)
+      if (!table_ref->belong_to_view &&
+          !table_ref->belong_to_derived)
       {
         SELECT_LEX *current_sel= thd->lex->current_select;
         SELECT_LEX *last_select= table_ref->select_lex;
+        bool all_merged= TRUE;
+        for (SELECT_LEX *sl= current_sel; sl && sl!=last_select;
+             sl=sl->outer_select())
+        {
+          Item *subs= sl->master_unit()->item;
+          if (subs->type() == Item::SUBSELECT_ITEM && 
+              ((Item_subselect*)subs)->substype() == Item_subselect::IN_SUBS &&
+              ((Item_in_subselect*)subs)->test_strategy(SUBS_SEMI_JOIN))
+          {
+            continue;
+          }
+          all_merged= FALSE;
+          break;
+        }
         /*
           If the field was an outer referencee, mark all selects using this
           sub query as dependent on the outer query
         */
-        if (current_sel != last_select)
+        if (!all_merged && current_sel != last_select)
+        {
           mark_select_range_as_dependent(thd, last_select, current_sel,
                                          found, *ref, item);
+        }
       }
       return found;
     }
   }
+  else
+    item->can_be_depended= TRUE;
 
   if (db && lower_case_table_names)
   {
@@ -7108,6 +7173,10 @@ mark_common_columns(THD *thd, TABLE_LIST *table_ref_1, TABLE_LIST *table_ref_2,
     */
     if (nj_col_2 && (!using_fields ||is_using_column_1))
     {
+      /*
+        Create non-fixed fully qualified field and let fix_fields to
+        resolve it.
+      */
       Item *item_1=   nj_col_1->create_item(thd);
       Item *item_2=   nj_col_2->create_item(thd);
       Field *field_1= nj_col_1->field();
@@ -7561,7 +7630,11 @@ static bool setup_natural_join_row_types(THD *thd,
   for (left_neighbor= table_ref_it++; left_neighbor ; )
   {
     table_ref= left_neighbor;
-    left_neighbor= table_ref_it++;
+    do
+    {
+      left_neighbor= table_ref_it++;
+    }
+    while (left_neighbor && left_neighbor->sj_subq_pred);
     /* 
       Do not redo work if already done:
       1) for stored procedures,
@@ -7605,13 +7678,11 @@ int setup_wild(THD *thd, TABLE_LIST *tables, List<Item> &fields,
 	       List<Item> *sum_func_list,
 	       uint wild_num)
 {
-  if (!wild_num)
-    return(0);
-
   Item *item;
   List_iterator<Item> it(fields);
   Query_arena *arena, backup;
   DBUG_ENTER("setup_wild");
+  DBUG_ASSERT(wild_num != 0);
 
   /*
     Don't use arena if we are not in prepared statements or stored procedures
@@ -7700,6 +7771,7 @@ bool setup_fields(THD *thd, Item **ref_pointer_array,
   List_iterator<Item> it(fields);
   bool save_is_item_list_lookup;
   DBUG_ENTER("setup_fields");
+  DBUG_PRINT("enter", ("ref_pointer_array: %p", ref_pointer_array));
 
   thd->mark_used_columns= mark_used_columns;
   DBUG_PRINT("info", ("thd->mark_used_columns: %d", thd->mark_used_columns));
@@ -7777,27 +7849,36 @@ bool setup_fields(THD *thd, Item **ref_pointer_array,
     make_leaves_list()
     list    pointer to pointer on list first element
     tables  table list
+    full_table_list whether to include tables from mergeable derived table/view.
+                    we need them for checks for INSERT/UPDATE statements only.
 
   RETURN pointer on pointer to next_leaf of last element
 */
 
-TABLE_LIST **make_leaves_list(TABLE_LIST **list, TABLE_LIST *tables)
+void make_leaves_list(List<TABLE_LIST> &list, TABLE_LIST *tables,
+                      bool full_table_list, TABLE_LIST *boundary)
+ 
 {
   for (TABLE_LIST *table= tables; table; table= table->next_local)
   {
-    if (table->merge_underlying_list)
+    if (table == boundary)
+      full_table_list= !full_table_list;
+    if (full_table_list && table->is_merged_derived())
     {
-      DBUG_ASSERT(table->view &&
-                  table->effective_algorithm == VIEW_ALGORITHM_MERGE);
-      list= make_leaves_list(list, table->merge_underlying_list);
+      SELECT_LEX *select_lex= table->get_single_select();
+      /*
+        It's safe to use select_lex->leaf_tables because all derived
+        tables/views were already prepared and has their leaf_tables
+        set properly.
+      */
+      make_leaves_list(list, select_lex->get_table_list(),
+      full_table_list, boundary);
     }
     else
     {
-      *list= table;
-      list= &table->next_leaf;
+      list.push_back(table);
     }
   }
-  return list;
 }
 
 /*
@@ -7812,6 +7893,7 @@ TABLE_LIST **make_leaves_list(TABLE_LIST **list, TABLE_LIST *tables)
     leaves        List of join table leaves list (select_lex->leaf_tables)
     refresh       It is onle refresh for subquery
     select_insert It is SELECT ... INSERT command
+    full_table_list a parameter to pass to the make_leaves_list function
 
   NOTE
     Check also that the 'used keys' and 'ignored keys' exists and set up the
@@ -7830,9 +7912,13 @@ TABLE_LIST **make_leaves_list(TABLE_LIST **list, TABLE_LIST *tables)
 
 bool setup_tables(THD *thd, Name_resolution_context *context,
                   List<TABLE_LIST> *from_clause, TABLE_LIST *tables,
-                  TABLE_LIST **leaves, bool select_insert)
+                  List<TABLE_LIST> &leaves, bool select_insert,
+                  bool full_table_list)
 {
   uint tablenr= 0;
+  List_iterator<TABLE_LIST> ti(leaves);
+  TABLE_LIST *table_list;
+
   DBUG_ENTER("setup_tables");
 
   DBUG_ASSERT ((select_insert && !tables->next_name_resolution_table) || !tables || 
@@ -7844,40 +7930,86 @@ bool setup_tables(THD *thd, Name_resolution_context *context,
   TABLE_LIST *first_select_table= (select_insert ?
                                    tables->next_local:
                                    0);
-  if (!(*leaves))
-    make_leaves_list(leaves, tables);
-
-  TABLE_LIST *table_list;
-  for (table_list= *leaves;
-       table_list;
-       table_list= table_list->next_leaf, tablenr++)
+  SELECT_LEX *select_lex= select_insert ? &thd->lex->select_lex :
+                                          thd->lex->current_select;
+  if (select_lex->first_cond_optimization)
   {
-    TABLE *table= table_list->table;
-    table->pos_in_table_list= table_list;
-    if (first_select_table &&
-        table_list->top_table() == first_select_table)
+    leaves.empty();
+    if (!select_lex->is_prep_leaf_list_saved)
+    {
+      make_leaves_list(leaves, tables, full_table_list, first_select_table);
+      select_lex->leaf_tables_exec.empty();
+    }
+    else
     {
-      /* new counting for SELECT of INSERT ... SELECT command */
-      first_select_table= 0;
-      tablenr= 0;
+      List_iterator_fast <TABLE_LIST> ti(select_lex->leaf_tables_prep);
+      while ((table_list= ti++))
+        leaves.push_back(table_list);
     }
-    setup_table_map(table, table_list, tablenr);
-    if (table_list->process_index_hints(table))
+      
+    while ((table_list= ti++))
+    {
+      TABLE *table= table_list->table;
+      if (table)
+        table->pos_in_table_list= table_list;
+      if (first_select_table &&
+          table_list->top_table() == first_select_table)
+      {
+        /* new counting for SELECT of INSERT ... SELECT command */
+        first_select_table= 0;
+        thd->lex->select_lex.insert_tables= tablenr;
+        tablenr= 0;
+      }
+      if(table_list->jtbm_subselect)
+      {
+        table_list->jtbm_table_no= tablenr;
+      }
+      else if (table)
+      {
+        table->pos_in_table_list= table_list;
+        setup_table_map(table, table_list, tablenr);
+
+        if (table_list->process_index_hints(table))
+          DBUG_RETURN(1);
+      }
+      tablenr++;
+    }
+    if (tablenr > MAX_TABLES)
+    {
+      my_error(ER_TOO_MANY_TABLES,MYF(0), (int) MAX_TABLES);
       DBUG_RETURN(1);
+    }
   }
-  if (tablenr > MAX_TABLES)
-  {
-    my_error(ER_TOO_MANY_TABLES, MYF(0), static_cast<int>(MAX_TABLES));
-    DBUG_RETURN(1);
-  }
+  else
+  { 
+    List_iterator_fast <TABLE_LIST> ti(select_lex->leaf_tables_exec);
+    select_lex->leaf_tables.empty();
+    while ((table_list= ti++))
+    {
+      if(table_list->jtbm_subselect)
+      {
+        table_list->jtbm_table_no= table_list->tablenr_exec;
+      }
+      else
+      {
+        table_list->table->tablenr= table_list->tablenr_exec;
+        table_list->table->map= table_list->map_exec;
+        table_list->table->maybe_null= table_list->maybe_null_exec;
+        table_list->table->pos_in_table_list= table_list;
+        if (table_list->process_index_hints(table_list->table))
+          DBUG_RETURN(1);
+      }
+      select_lex->leaf_tables.push_back(table_list);
+    }
+  }    
+
   for (table_list= tables;
        table_list;
        table_list= table_list->next_local)
   {
     if (table_list->merge_underlying_list)
     {
-      DBUG_ASSERT(table_list->view &&
-                  table_list->effective_algorithm == VIEW_ALGORITHM_MERGE);
+      DBUG_ASSERT(table_list->is_merged_derived());
       Query_arena *arena= thd->stmt_arena, backup;
       bool res;
       if (arena->is_conventional())
@@ -7890,6 +8022,17 @@ bool setup_tables(THD *thd, Name_resolution_context *context,
       if (res)
         DBUG_RETURN(1);
     }
+
+    if (table_list->jtbm_subselect)
+    {
+      Item *item= table_list->jtbm_subselect->optimizer;
+      if (table_list->jtbm_subselect->optimizer->fix_fields(thd, &item))
+      {
+        my_error(ER_TOO_MANY_TABLES,MYF(0),MAX_TABLES); /* psergey-todo: WHY ER_TOO_MANY_TABLES ???*/
+        DBUG_RETURN(1);
+      }
+      DBUG_ASSERT(item == table_list->jtbm_subselect->optimizer);
+    }
   }
 
   /* Precompute and store the row types of NATURAL/USING joins. */
@@ -7904,7 +8047,7 @@ bool setup_tables(THD *thd, Name_resolution_context *context,
   prepare tables and check access for the view tables
 
   SYNOPSIS
-    setup_tables_and_check_view_access()
+    setup_tables_and_check_access()
     thd		  Thread handler
     context       name resolution contest to setup table list there
     from_clause   Top-level list of table references in the FROM clause
@@ -7914,6 +8057,7 @@ bool setup_tables(THD *thd, Name_resolution_context *context,
     refresh       It is onle refresh for subquery
     select_insert It is SELECT ... INSERT command
     want_access   what access is needed
+    full_table_list a parameter to pass to the make_leaves_list function
 
   NOTE
     a wrapper for check_tables that will also check the resulting
@@ -7927,33 +8071,33 @@ bool setup_tables_and_check_access(THD *thd,
                                    Name_resolution_context *context,
                                    List<TABLE_LIST> *from_clause,
                                    TABLE_LIST *tables,
-                                   TABLE_LIST **leaves,
+                                   List<TABLE_LIST> &leaves,
                                    bool select_insert,
                                    ulong want_access_first,
-                                   ulong want_access)
+                                   ulong want_access,
+                                   bool full_table_list)
 {
-  TABLE_LIST *leaves_tmp= NULL;
   bool first_table= true;
+  DBUG_ENTER("setup_tables_and_check_access");
 
   if (setup_tables(thd, context, from_clause, tables,
-                   &leaves_tmp, select_insert))
-    return TRUE;
-
-  if (leaves)
-    *leaves= leaves_tmp;
+                   leaves, select_insert, full_table_list))
+    DBUG_RETURN(TRUE);
 
-  for (; leaves_tmp; leaves_tmp= leaves_tmp->next_leaf)
+  List_iterator<TABLE_LIST> ti(leaves);
+  TABLE_LIST *table_list;
+  while((table_list= ti++))
   {
-    if (leaves_tmp->belong_to_view && 
+    if (table_list->belong_to_view && !table_list->view && 
         check_single_table_access(thd, first_table ? want_access_first :
-                                  want_access, leaves_tmp, FALSE))
+                                  want_access, table_list, FALSE))
     {
       tables->hide_view_error(thd);
-      return TRUE;
+      DBUG_RETURN(TRUE);
     }
     first_table= 0;
   }
-  return FALSE;
+  DBUG_RETURN(FALSE);
 }
 
 
@@ -8089,8 +8233,10 @@ insert_fields(THD *thd, Name_resolution_context *context, const char *db_name,
        information_schema table, or a nested table reference. See the comment
        for TABLE_LIST.
     */
-    if (!((table && !tables->view && (table->grant.privilege & SELECT_ACL)) ||
-          (tables->view && (tables->grant.privilege & SELECT_ACL))) &&
+    if (!((table && tables->is_non_derived() &&
+          (table->grant.privilege & SELECT_ACL)) ||
+	  ((!tables->is_non_derived() && 
+	    (tables->grant.privilege & SELECT_ACL)))) &&
         !any_privileges)
     {
       field_iterator.set(tables);
@@ -8120,7 +8266,7 @@ insert_fields(THD *thd, Name_resolution_context *context, const char *db_name,
 
       if (!(item= field_iterator.create_item(thd)))
         DBUG_RETURN(TRUE);
-      DBUG_ASSERT(item->fixed);
+//      DBUG_ASSERT(item->fixed);
       /* cache the table for the Item_fields inserted by expanding stars */
       if (item->type() == Item::FIELD_ITEM && tables->cacheable_table)
         ((Item_field *)item)->cached_table= tables;
@@ -8232,6 +8378,29 @@ insert_fields(THD *thd, Name_resolution_context *context, const char *db_name,
 }
 
 
+/**
+  Wrap Item_ident
+
+  @param thd             thread handle
+  @param conds           pointer to the condition which should be wrapped
+*/
+
+void wrap_ident(THD *thd, Item **conds)
+{
+  Item_direct_ref_to_ident *wrapper;
+  DBUG_ASSERT((*conds)->type() == Item::FIELD_ITEM || (*conds)->type() == Item::REF_ITEM);
+  Query_arena *arena= thd->stmt_arena, backup;
+  if (arena->is_conventional())
+    arena= 0;
+  else
+    thd->set_n_backup_active_arena(arena, &backup);
+  if ((wrapper= new Item_direct_ref_to_ident((Item_ident *)(*conds))))
+    (*conds)= (Item*) wrapper;
+  if (arena)
+    thd->restore_active_arena(arena, &backup);
+}
+
+
 /*
   Fix all conditions and outer join expressions.
 
@@ -8250,12 +8419,13 @@ insert_fields(THD *thd, Name_resolution_context *context, const char *db_name,
     FALSE if all is OK
 */
 
-int setup_conds(THD *thd, TABLE_LIST *tables, TABLE_LIST *leaves,
+int setup_conds(THD *thd, TABLE_LIST *tables, List<TABLE_LIST> &leaves,
                 COND **conds)
 {
   SELECT_LEX *select_lex= thd->lex->current_select;
   Query_arena *arena= thd->stmt_arena, backup;
   TABLE_LIST *table= NULL;	// For HP compilers
+  List_iterator<TABLE_LIST> ti(leaves);
   /*
     it_is_update set to TRUE when tables of primary SELECT_LEX (SELECT_LEX
     which belong to LEX, i.e. most up SELECT) will be updated by
@@ -8267,9 +8437,15 @@ int setup_conds(THD *thd, TABLE_LIST *tables, TABLE_LIST *leaves,
   bool it_is_update= (select_lex == &thd->lex->select_lex) &&
     thd->lex->which_check_option_applicable();
   bool save_is_item_list_lookup= select_lex->is_item_list_lookup;
-  select_lex->is_item_list_lookup= 0;
+  TABLE_LIST *derived= select_lex->master_unit()->derived;
   DBUG_ENTER("setup_conds");
 
+  /* Do not fix conditions for the derived tables that have been merged */
+  if (derived && derived->merged)
+    DBUG_RETURN(0);
+
+  select_lex->is_item_list_lookup= 0;
+
   if (select_lex->conds_processed_with_permanent_arena ||
       arena->is_conventional())
     arena= 0;                                   // For easier test
@@ -8282,13 +8458,27 @@ int setup_conds(THD *thd, TABLE_LIST *tables, TABLE_LIST *leaves,
 
   for (table= tables; table; table= table->next_local)
   {
-    if (table->prepare_where(thd, conds, FALSE))
+    if (select_lex == &thd->lex->select_lex &&
+        select_lex->first_cond_optimization &&
+        table->merged_for_insert &&
+        table->prepare_where(thd, conds, FALSE))
       goto err_no_arena;
   }
 
   if (*conds)
   {
     thd->where="where clause";
+    DBUG_EXECUTE("where",
+                 print_where(*conds,
+                             "WHERE in setup_conds",
+                             QT_ORDINARY););
+    /*
+      Wrap alone field in WHERE clause in case it will be outer field of subquery
+      which need persistent pointer on it, but conds could be changed by optimizer
+    */
+    if ((*conds)->type() == Item::FIELD_ITEM && !derived)
+      wrap_ident(thd, conds);
+    (*conds)->mark_as_condition_AND_part(NO_JOIN_NEST);
     if ((!(*conds)->fixed && (*conds)->fix_fields(thd, conds)) ||
 	(*conds)->check_cols(1))
       goto err_no_arena;
@@ -8298,7 +8488,7 @@ int setup_conds(THD *thd, TABLE_LIST *tables, TABLE_LIST *leaves,
     Apply fix_fields() to all ON clauses at all levels of nesting,
     including the ones inside view definitions.
   */
-  for (table= leaves; table; table= table->next_leaf)
+  while ((table= ti++))
   {
     TABLE_LIST *embedded; /* The table at the current level of nesting. */
     TABLE_LIST *embedding= table; /* The parent nested table reference. */
@@ -8309,12 +8499,24 @@ int setup_conds(THD *thd, TABLE_LIST *tables, TABLE_LIST *leaves,
       {
         /* Make a join an a expression */
         thd->where="on clause";
+        embedded->on_expr->mark_as_condition_AND_part(embedded);
         if ((!embedded->on_expr->fixed &&
              embedded->on_expr->fix_fields(thd, &embedded->on_expr)) ||
 	    embedded->on_expr->check_cols(1))
 	  goto err_no_arena;
         select_lex->cond_count++;
       }
+      /*
+        If it's a semi-join nest, fix its "left expression", as it is used by
+        the SJ-Materialization
+      */
+      if (embedded->sj_subq_pred)
+      {
+        Item **left_expr= &embedded->sj_subq_pred->left_expr;
+        if (!(*left_expr)->fixed && (*left_expr)->fix_fields(thd, left_expr))
+          goto err_no_arena;
+      }
+
       embedding= embedded->embedding;
     }
     while (embedding &&
@@ -8387,9 +8589,11 @@ fill_record(THD * thd, List<Item> &fields, List<Item> &values,
   Item *value, *fld;
   Item_field *field;
   TABLE *table= 0, *vcol_table= 0;
-  bool abort_on_warning_saved= thd->abort_on_warning;
+  bool save_abort_on_warning= thd->abort_on_warning;
+  bool save_no_errors= thd->no_errors;
   DBUG_ENTER("fill_record");
 
+  thd->no_errors= ignore_errors;
   /*
     Reset the table->auto_increment_field_not_null as it is valid for
     only one row.
@@ -8429,14 +8633,12 @@ fill_record(THD * thd, List<Item> &fields, List<Item> &values,
         value->type() != Item::NULL_ITEM &&
         table->s->table_category != TABLE_CATEGORY_TEMPORARY)
     {
-      thd->abort_on_warning= FALSE;
       push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
                           ER_WARNING_NON_DEFAULT_VALUE_FOR_VIRTUAL_COLUMN,
                           ER(ER_WARNING_NON_DEFAULT_VALUE_FOR_VIRTUAL_COLUMN),
                           rfield->field_name, table->s->table_name.str);
-      thd->abort_on_warning= abort_on_warning_saved;
     }
-    if ((value->save_in_field(rfield, 0) < 0) && !ignore_errors)
+    if ((value->save_in_field(rfield, 0)) < 0 && !ignore_errors)
     {
       my_message(ER_UNKNOWN_ERROR, ER(ER_UNKNOWN_ERROR), MYF(0));
       goto err;
@@ -8454,10 +8656,13 @@ fill_record(THD * thd, List<Item> &fields, List<Item> &values,
         goto err;
     }
   }
-  thd->abort_on_warning= abort_on_warning_saved;
+  thd->abort_on_warning= save_abort_on_warning;
+  thd->no_errors=        save_no_errors;
   DBUG_RETURN(thd->is_error());
+
 err:
-  thd->abort_on_warning= abort_on_warning_saved;
+  thd->abort_on_warning= save_abort_on_warning;
+  thd->no_errors=        save_no_errors;
   if (table)
     table->auto_increment_field_not_null= FALSE;
   DBUG_RETURN(TRUE);
@@ -8530,6 +8735,7 @@ fill_record_n_invoke_before_triggers(THD *thd, List<Item> &fields,
     ptr           pointer on pointer to record
     values        list of fields
     ignore_errors TRUE if we should ignore errors
+    use_value     forces usage of value of the items instead of result
 
   NOTE
     fill_record() may set table->auto_increment_field_not_null and a
@@ -8542,7 +8748,8 @@ fill_record_n_invoke_before_triggers(THD *thd, List<Item> &fields,
 */
 
 bool
-fill_record(THD *thd, Field **ptr, List<Item> &values, bool ignore_errors)
+fill_record(THD *thd, Field **ptr, List<Item> &values, bool ignore_errors,
+            bool use_value)
 {
   List_iterator_fast<Item> v(values);
   List<TABLE> tbl_list;
@@ -8582,15 +8789,17 @@ fill_record(THD *thd, Field **ptr, List<Item> &values, bool ignore_errors)
         value->type() != Item::NULL_ITEM &&
         table->s->table_category != TABLE_CATEGORY_TEMPORARY)
     {
-      thd->abort_on_warning= FALSE;
       push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
                           ER_WARNING_NON_DEFAULT_VALUE_FOR_VIRTUAL_COLUMN,
                           ER(ER_WARNING_NON_DEFAULT_VALUE_FOR_VIRTUAL_COLUMN),
                           field->field_name, table->s->table_name.str);
-      thd->abort_on_warning= abort_on_warning_saved;
     }
-    if (value->save_in_field(field, 0) < 0)
-      goto err;
+
+    if (use_value)
+      value->save_val(field);
+    else
+      if (value->save_in_field(field, 0) < 0)
+        goto err;
   }
   /* Update virtual fields*/
   thd->abort_on_warning= FALSE;
@@ -8636,7 +8845,7 @@ fill_record_n_invoke_before_triggers(THD *thd, Field **ptr,
                                      enum trg_event_type event)
 {
   bool result;
-  result= (fill_record(thd, ptr, values, ignore_errors) ||
+  result= (fill_record(thd, ptr, values, ignore_errors, FALSE) ||
            (triggers && triggers->process_triggers(thd, event,
                                                    TRG_ACTION_BEFORE, TRUE)));
   /*
@@ -8851,7 +9060,7 @@ bool remove_table_from_cache(THD *thd, const char *db, const char *table_name,
         {
           if (!in_use->killed)
           {
-            in_use->killed= THD::KILL_SYSTEM_THREAD;
+            in_use->killed= KILL_SYSTEM_THREAD;
             pthread_mutex_lock(&in_use->mysys_var->mutex);
             if (in_use->mysys_var->current_cond)
             {
@@ -9185,7 +9394,7 @@ void mysql_wait_completed_table(ALTER_PARTITION_PARAM_TYPE *lpt, TABLE *my_table
       if ((in_use->system_thread & SYSTEM_THREAD_DELAYED_INSERT) &&
           ! in_use->killed)
       {
-        in_use->killed= THD::KILL_SYSTEM_THREAD;
+        in_use->killed= KILL_SYSTEM_THREAD;
         pthread_mutex_lock(&in_use->mysys_var->mutex);
         if (in_use->mysys_var->current_cond)
         {
@@ -9417,13 +9626,12 @@ open_performance_schema_table(THD *thd, TABLE_LIST *one_table,
   else
   {
     /*
-      If error in mysql_lock_tables(), open_ltable doesn't close the
-      table. Thread kill during mysql_lock_tables() is such error. But
-      open tables cannot be accepted when restoring the open tables
-      state.
+      This can happen during a thd->kill or while we are trying to log
+      data for a stored procedure/trigger and someone causes the table
+      to be flushed (for example by creating a new trigger for the
+      table)
     */
-    if (thd->killed)
-      close_thread_tables(thd);
+    close_thread_tables(thd);
     thd->restore_backup_open_tables_state(backup);
   }
 
@@ -9483,6 +9691,61 @@ void close_performance_schema_table(THD *thd, Open_tables_state *backup)
   thd->restore_backup_open_tables_state(backup);
 }
 
+
+/**
+  @brief
+  Remove 'fixed' flag from items in a list
+
+  @param items list of items to un-fix
+
+  @details
+  This function sets to 0 the 'fixed' flag for items in the 'items' list.
+  It's needed to force correct marking of views' fields for INSERT/UPDATE
+  statements.
+*/
+
+void unfix_fields(List<Item> &fields)
+{
+  List_iterator<Item> li(fields);
+  Item *item;
+  while ((item= li++))
+    item->fixed= 0;
+}
+
+
+/**
+  Check result of dynamic column function and issue error if it is needed
+
+  @param rc              The result code of dynamic column function
+
+  @return the result code which was get as an argument\
+*/
+
+int dynamic_column_error_message(enum_dyncol_func_result rc)
+{
+  switch (rc) {
+  case ER_DYNCOL_YES:
+  case ER_DYNCOL_OK:
+    break; // it is not an error
+  case ER_DYNCOL_FORMAT:
+    my_error(ER_DYN_COL_WRONG_FORMAT, MYF(0));
+    break;
+  case ER_DYNCOL_LIMIT:
+    my_error(ER_DYN_COL_IMPLEMENTATION_LIMIT, MYF(0));
+    break;
+  case ER_DYNCOL_RESOURCE:
+    my_error(ER_OUT_OF_RESOURCES, MYF(0));
+    break;
+  case ER_DYNCOL_DATA:
+    my_error(ER_DYN_COL_DATA, MYF(0));
+    break;
+  case ER_DYNCOL_UNKNOWN_CHARSET:
+    my_error(ER_DYN_COL_WRONG_CHARSET, MYF(0));
+    break;
+  }
+  return rc;
+}
+
 /**
   @} (end of group Data_Dictionary)
 */
diff --git a/sql/sql_binlog.cc b/sql/sql_binlog.cc
index 1624a9639a1..58683a5f1e9 100644
--- a/sql/sql_binlog.cc
+++ b/sql/sql_binlog.cc
@@ -176,7 +176,7 @@ void mysql_client_binlog_statement(THD* thd)
       */
       if (!have_fd_event)
       {
-        int type = bufptr[EVENT_TYPE_OFFSET];
+        int type = (uchar)bufptr[EVENT_TYPE_OFFSET];
         if (type == FORMAT_DESCRIPTION_EVENT || type == START_EVENT_V3)
           have_fd_event= TRUE;
         else
@@ -188,7 +188,8 @@ void mysql_client_binlog_statement(THD* thd)
       }
 
       ev= Log_event::read_log_event(bufptr, event_len, &error,
-                                    rli->relay_log.description_event_for_exec);
+                                    rli->relay_log.description_event_for_exec,
+                                    0);
 
       DBUG_PRINT("info",("binlog base64 err=%s", error));
       if (!ev)
diff --git a/sql/sql_bitmap.h b/sql/sql_bitmap.h
index 5385acc934f..25725ff6c42 100644
--- a/sql/sql_bitmap.h
+++ b/sql/sql_bitmap.h
@@ -91,6 +91,10 @@ public:
     DBUG_ASSERT(sizeof(buffer) >= 4);
     return (ulonglong) uint4korr(buffer);
   }
+  uint bits_set()
+  {
+    return bitmap_bits_set(&map);
+  }
 };
 
 /* An iterator to quickly walk over bits in unlonglong bitmap. */
@@ -169,5 +173,16 @@ public:
   public:
     Iterator(Bitmap<64> &bmp) : Table_map_iterator(bmp.map) {}
   };
+  uint bits_set()
+  {
+    //TODO: use my_count_bits()
+    uint res= 0, i= 0;
+    for (; i < 64 ; i++)
+    {
+      if (map & ((ulonglong)1<<i))
+        res++;
+    }
+    return res;
+  }
 };
 
diff --git a/sql/sql_cache.cc b/sql/sql_cache.cc
index de9447fa356..fbe42c85711 100644
--- a/sql/sql_cache.cc
+++ b/sql/sql_cache.cc
@@ -288,6 +288,7 @@ functions:
          if (and only if) this query has a registered result set writer
          (thd->net.query_cache_query).
  4. Query_cache::invalidate
+    Query_cache::invalidate_locked_for_write
        - Called from various places to invalidate query cache based on data-
          base, table and myisam file name. During an on going invalidation
          the query cache is temporarily disabled.
@@ -336,6 +337,8 @@ TODO list:
 #include "../storage/myisammrg/ha_myisammrg.h"
 #include "../storage/myisammrg/myrg_def.h"
 
+const uchar *query_state_map;
+
 #ifdef EMBEDDED_LIBRARY
 #include "emb_qcache.h"
 #endif
@@ -390,7 +393,7 @@ static void debug_wait_for_kill(const char *info)
   sql_print_information("%s", info);
   while(!thd->killed)
     my_sleep(1000);
-  thd->killed= THD::NOT_KILLED;
+  thd->killed= NOT_KILLED;
   /*
     Remove the set debug variable, to ensure we don't get stuck on it again
     This is needed as for MyISAM, invalidate_table() may be called twice
@@ -424,6 +427,141 @@ TYPELIB query_cache_type_typelib=
 };
 
 
+/*
+  Check if character is a white space.
+*/
+
+inline bool is_white_space(char c)
+{
+  return (query_state_map[(uint) ((uchar) c)] == MY_LEX_SKIP);
+}
+
+
+/**
+  Generate a query_string without query comments or duplicated space
+
+  @param new_query	    New query without 'fluff' is stored here
+  @param query		    Original query
+  @param query_length	    Length of original query
+  @param additional_length  Extra space for query cache we need to allocate
+  			    in new_query buffer.
+
+  Note:
+    If there is no space to allocate new_query, we will put original query
+    into new_query.
+*/
+
+static void make_base_query(String *new_query,
+                            const char *query, size_t query_length,
+                            size_t additional_length)
+{
+  char *buffer;
+  const char *query_end, *last_space;
+
+  /* The following is guaranteed by the query_cache interface */
+  DBUG_ASSERT(query[query_length] == 0);
+  DBUG_ASSERT(!is_white_space(query[0]));
+
+  new_query->length(0);           // Don't copy anything from old buffer
+  if (new_query->realloc(query_length + additional_length))
+  {
+    /*
+      We could not allocate the query.  Use original query for
+      the query cache;  Better than nothing....
+    */
+    new_query->set(query, query_length, system_charset_info);
+    return;
+  }
+
+  buffer= (char*) new_query->ptr();             // Store base query here
+  query_end= query + query_length;
+  last_space= 0;                                // No space found yet
+
+  while (query < query_end)
+  {
+    char current = *(query++);
+    switch (current) {
+    case '\'':
+    case '`':
+    case '"':
+      *(buffer++)= current;                     // copy first quote
+      while (query < query_end)
+      {
+        *(buffer++)= *query;
+        if (*(query++) == current)              // found pair quote
+          break;
+      }
+      continue;                                 // Continue with next symbol
+    case '/':                                   // Start of comment ?
+      /*
+        Comment of format /#!number #/ or /#M!number #/, must be skipped.
+        These may include '"' and other comments, but it should
+        be safe to parse the content as a normal string.
+      */
+      if (query[0] != '*' || query[1] == '!' ||
+          (query[1] == 'M' && query[2] == '!'))
+        break;
+
+      query++;                               // skip "/"
+      while (++query < query_end)
+      {
+        if (query[0] == '*' && query[1] == '/')
+        {
+          query+= 2;
+          goto insert_space;
+        }
+      }
+      continue;                                 // Will end outer loop
+    case '-':
+      if (*query != '-' || !is_white_space(query[1])) // Not a comment
+        break;
+      query++;                 // skip second "-", and go to search of "\n"
+      /* fall through */
+    case '#':
+      while (query < query_end)
+      {
+        if (*(query++) == '\n')
+          goto insert_space;
+      }
+      continue;                                 // Will end outer loop
+    default:
+      if (is_white_space(current))
+        goto insert_space;
+      break;
+    }
+    *(buffer++)= current;
+    continue;
+
+insert_space:
+    if (buffer != last_space)
+    {
+      *(buffer++)= ' ';
+      last_space= buffer;
+    }
+  }
+  if (buffer == last_space)
+    buffer--;                                   // Remove the last space
+  *buffer= 0;                                   // End zero after query
+  new_query->length((size_t) (buffer - new_query->ptr()));
+
+  /* Copy db_length */
+  memcpy(buffer+1, query_end+1, QUERY_CACHE_DB_LENGTH_SIZE);
+}
+
+
+/**
+  Check and change local variable if global one is switched
+
+  @param thd             thread handle
+*/
+
+void inline fix_local_query_cache_mode(THD *thd)
+{
+  if (global_system_variables.query_cache_type == 0)
+    thd->variables.query_cache_type= 0;
+}
+
+
 /**
   Serialize access to the query cache.
   If the lock cannot be granted the thread hangs in a conditional wait which
@@ -433,31 +571,47 @@ TYPELIB query_cache_type_typelib=
   effect by another thread. This enables a quick path in execution to skip waits
   when the outcome is known.
 
-  @param use_timeout TRUE if the lock can abort because of a timeout.
+  @param mode TIMEOUT the lock can abort because of a timeout
+              TRY the lock can abort because it is locked now
+              WAIT wait for lock (default)
 
-  @note use_timeout is optional and default value is FALSE.
+  @note mode is optional and default value is WAIT.
 
   @return
    @retval FALSE An exclusive lock was taken
    @retval TRUE The locking attempt failed
 */
 
-bool Query_cache::try_lock(bool use_timeout)
+bool Query_cache::try_lock(THD *thd, Cache_try_lock_mode mode)
 {
-  bool interrupt= FALSE;
+  bool interrupt= TRUE;
+  const char* old_proc_info;
   DBUG_ENTER("Query_cache::try_lock");
 
+  old_proc_info= thd->proc_info;
+  thd_proc_info(thd,"Waiting on query cache mutex");
+
   pthread_mutex_lock(&structure_guard_mutex);
+  DBUG_EXECUTE_IF("status_wait_query_cache_mutex_sleep", {
+      sleep(5);
+    });
+  if (m_cache_status == DISABLED)
+  {
+    pthread_mutex_unlock(&structure_guard_mutex);
+    thd->proc_info= old_proc_info;
+    DBUG_RETURN(TRUE);
+  }
+  m_requests_in_progress++;
+  fix_local_query_cache_mode(thd);
   while (1)
   {
     if (m_cache_lock_status == Query_cache::UNLOCKED)
     {
       m_cache_lock_status= Query_cache::LOCKED;
 #ifndef DBUG_OFF
-      THD *thd= current_thd;
-      if (thd)
-        m_cache_lock_thread_id= thd->thread_id;
+      m_cache_lock_thread_id= thd->thread_id;
 #endif
+      interrupt= FALSE;
       break;
     }
     else if (m_cache_lock_status == Query_cache::LOCKED_NO_WAIT)
@@ -466,7 +620,6 @@ bool Query_cache::try_lock(bool use_timeout)
         If query cache is protected by a LOCKED_NO_WAIT lock this thread
         should avoid using the query cache as it is being evicted.
       */
-      interrupt= TRUE;
       break;
     }
     else
@@ -476,25 +629,36 @@ bool Query_cache::try_lock(bool use_timeout)
         To prevent send_result_to_client() and query_cache_insert() from
         blocking execution for too long a timeout is put on the lock.
       */
-      if (use_timeout)
+      if (mode == WAIT)
+      {
+        pthread_cond_wait(&COND_cache_status_changed, &structure_guard_mutex);
+      }
+      else if (mode == TIMEOUT)
       {
         struct timespec waittime;
         set_timespec_nsec(waittime,(ulong)(50000000L));  /* Wait for 50 msec */
         int res= pthread_cond_timedwait(&COND_cache_status_changed,
                                         &structure_guard_mutex,&waittime);
         if (res == ETIMEDOUT)
-        {
-          interrupt= TRUE;
           break;
-        }
       }
       else
       {
-        pthread_cond_wait(&COND_cache_status_changed, &structure_guard_mutex);
+        /**
+          If we are here, then mode is == TRY and there was someone else using
+          the query cache. (m_cache_lock_status != Query_cache::UNLOCKED).
+          Signal that we didn't get a lock.
+        */
+        DBUG_ASSERT(m_requests_in_progress > 1);
+        DBUG_ASSERT(mode == TRY);
+        break;
       }
     }
   }
+  if (interrupt)
+    m_requests_in_progress--;
   pthread_mutex_unlock(&structure_guard_mutex);
+  thd->proc_info = old_proc_info;
 
   DBUG_RETURN(interrupt);
 }
@@ -516,10 +680,12 @@ void Query_cache::lock_and_suspend(void)
   DBUG_ENTER("Query_cache::lock_and_suspend");
 
   pthread_mutex_lock(&structure_guard_mutex);
+  m_requests_in_progress++;
   while (m_cache_lock_status != Query_cache::UNLOCKED)
     pthread_cond_wait(&COND_cache_status_changed, &structure_guard_mutex);
   m_cache_lock_status= Query_cache::LOCKED_NO_WAIT;
 #ifndef DBUG_OFF
+  /* Here thd may not be set during shutdown */
   THD *thd= current_thd;
   if (thd)
     m_cache_lock_thread_id= thd->thread_id;
@@ -539,18 +705,18 @@ void Query_cache::lock_and_suspend(void)
   It is used by all methods which invalidates one or more tables.
  */
 
-void Query_cache::lock(void)
+void Query_cache::lock(THD *thd)
 {
   DBUG_ENTER("Query_cache::lock");
 
   pthread_mutex_lock(&structure_guard_mutex);
+  m_requests_in_progress++;
+  fix_local_query_cache_mode(thd);
   while (m_cache_lock_status != Query_cache::UNLOCKED)
     pthread_cond_wait(&COND_cache_status_changed, &structure_guard_mutex);
   m_cache_lock_status= Query_cache::LOCKED;
 #ifndef DBUG_OFF
-  THD *thd= current_thd;
-  if (thd)
-    m_cache_lock_thread_id= thd->thread_id;
+  m_cache_lock_thread_id= thd->thread_id;
 #endif
   pthread_mutex_unlock(&structure_guard_mutex);
 
@@ -567,6 +733,7 @@ void Query_cache::unlock(void)
   DBUG_ENTER("Query_cache::unlock");
   pthread_mutex_lock(&structure_guard_mutex);
 #ifndef DBUG_OFF
+  /* Thd may not be set in resize() at mysqld start */
   THD *thd= current_thd;
   if (thd)
     DBUG_ASSERT(m_cache_lock_thread_id == thd->thread_id);
@@ -576,6 +743,14 @@ void Query_cache::unlock(void)
   m_cache_lock_status= Query_cache::UNLOCKED;
   DBUG_PRINT("Query_cache",("Sending signal"));
   pthread_cond_signal(&COND_cache_status_changed);
+  DBUG_ASSERT(m_requests_in_progress > 0);
+  m_requests_in_progress--;
+  if (m_requests_in_progress == 0 && m_cache_status == DISABLE_REQUEST)
+  {
+    /* No clients => just free query cache */
+    free_cache();
+    m_cache_status= DISABLED;
+  }
   pthread_mutex_unlock(&structure_guard_mutex);
   DBUG_VOID_RETURN;
 }
@@ -592,25 +767,24 @@ void Query_cache::unlock(void)
    @retval FALSE No directive found.
 */
  
-static bool has_no_cache_directive(char *sql)
+static bool has_no_cache_directive(const char *sql)
 {
-  int i=0;
-  while (sql[i] == ' ')
-    ++i;
+  while (is_white_space(*sql))
+    sql++;
     
-  if (my_toupper(system_charset_info, sql[i])    == 'S' &&
-      my_toupper(system_charset_info, sql[i+1])  == 'Q' &&
-      my_toupper(system_charset_info, sql[i+2])  == 'L' &&
-      my_toupper(system_charset_info, sql[i+3])  == '_' &&
-      my_toupper(system_charset_info, sql[i+4])  == 'N' &&
-      my_toupper(system_charset_info, sql[i+5])  == 'O' &&
-      my_toupper(system_charset_info, sql[i+6])  == '_' &&
-      my_toupper(system_charset_info, sql[i+7])  == 'C' &&
-      my_toupper(system_charset_info, sql[i+8])  == 'A' &&
-      my_toupper(system_charset_info, sql[i+9])  == 'C' &&
-      my_toupper(system_charset_info, sql[i+10]) == 'H' &&
-      my_toupper(system_charset_info, sql[i+11]) == 'E' &&
-      my_toupper(system_charset_info, sql[i+12]) == ' ')
+  if (my_toupper(system_charset_info, sql[0])  == 'S' &&
+      my_toupper(system_charset_info, sql[1])  == 'Q' &&
+      my_toupper(system_charset_info, sql[2])  == 'L' &&
+      my_toupper(system_charset_info, sql[3])  == '_' &&
+      my_toupper(system_charset_info, sql[4])  == 'N' &&
+      my_toupper(system_charset_info, sql[5])  == 'O' &&
+      my_toupper(system_charset_info, sql[6])  == '_' &&
+      my_toupper(system_charset_info, sql[7])  == 'C' &&
+      my_toupper(system_charset_info, sql[8])  == 'A' &&
+      my_toupper(system_charset_info, sql[9])  == 'C' &&
+      my_toupper(system_charset_info, sql[10]) == 'H' &&
+      my_toupper(system_charset_info, sql[11]) == 'E' &&
+      my_isspace(system_charset_info, sql[12]))
     return TRUE;
   
   return FALSE;       
@@ -868,14 +1042,23 @@ void query_cache_insert(NET *net, const char *packet, ulong length)
 {
   DBUG_ENTER("query_cache_insert");
 
-  /* See the comment on double-check locking usage above. */
   if (net->query_cache_query == 0)
     DBUG_VOID_RETURN;
 
+  DBUG_ASSERT(current_thd);
+
   DBUG_EXECUTE_IF("wait_in_query_cache_insert",
                   debug_wait_for_kill("wait_in_query_cache_insert"); );
 
-  if (query_cache.try_lock())
+  /* First we check if query cache is disable without doing a mutex lock */
+  if(query_cache.is_disabled())
+    DBUG_VOID_RETURN;
+
+  /*
+    Lock the cache with try_lock(). try_lock() will fail if
+    cache was disabled between the above test and lock.
+  */
+  if (query_cache.try_lock(current_thd, Query_cache::WAIT))
     DBUG_VOID_RETURN;
 
   Query_cache_block *query_block= (Query_cache_block*)net->query_cache_query;
@@ -933,7 +1116,7 @@ void query_cache_abort(NET *net)
   if (net->query_cache_query == 0)
     DBUG_VOID_RETURN;
 
-  if (query_cache.try_lock())
+  if (query_cache.try_lock(current_thd, Query_cache::WAIT))
   {
     net->query_cache_query = 0;
     DBUG_VOID_RETURN;
@@ -986,7 +1169,7 @@ void query_cache_end_of_result(THD *thd)
                      emb_count_querycache_size(thd));
 #endif
 
-  if (query_cache.try_lock())
+  if (query_cache.try_lock(thd, Query_cache::WAIT))
   {
     thd->net.query_cache_query= 0;
     DBUG_VOID_RETURN;
@@ -1075,6 +1258,7 @@ Query_cache::Query_cache(ulong query_cache_limit_arg,
    query_cache_limit(query_cache_limit_arg),
    queries_in_cache(0), hits(0), inserts(0), refused(0),
    total_blocks(0), lowmem_prunes(0),
+   m_cache_status(OK),
    min_allocation_unit(ALIGN_SIZE(min_allocation_unit_arg)),
    min_result_data_size(ALIGN_SIZE(min_result_data_size_arg)),
    def_query_hash_size(ALIGN_SIZE(def_query_hash_size_arg)),
@@ -1098,6 +1282,13 @@ ulong Query_cache::resize(ulong query_cache_size_arg)
 			query_cache_size_arg));
   DBUG_ASSERT(initialized);
 
+  if (global_system_variables.query_cache_type == 0)
+  {
+    if (query_cache_size_arg != 0)
+      my_error(ER_QUERY_CACHE_IS_DISABLED, MYF(0));
+    DBUG_RETURN(0);
+  }
+
   lock_and_suspend();
 
   /*
@@ -1130,8 +1321,17 @@ ulong Query_cache::resize(ulong query_cache_size_arg)
   query_cache_size= query_cache_size_arg;
   new_query_cache_size= init_cache();
 
+  /*
+    m_cache_status is internal query cache switch so switching it on/off
+    will not be reflected on global_system_variables.query_cache_type
+  */
   if (new_query_cache_size)
+  {
     DBUG_EXECUTE("check_querycache",check_integrity(1););
+    m_cache_status= OK;                         // size > 0 => enable cache
+  }
+  else
+    m_cache_status= DISABLED;                   // size 0 means the cache disabled
 
   unlock();
   DBUG_RETURN(new_query_cache_size);
@@ -1150,6 +1350,9 @@ void Query_cache::store_query(THD *thd, TABLE_LIST *tables_used)
 {
   TABLE_COUNTER_TYPE local_tables;
   ulong tot_length;
+  const char *query;
+  size_t query_length;
+  uint8 tables_type;
   DBUG_ENTER("Query_cache::store_query");
   /*
     Testing 'query_cache_size' without a lock here is safe: the thing
@@ -1159,12 +1362,23 @@ void Query_cache::store_query(THD *thd, TABLE_LIST *tables_used)
 
     See also a note on double-check locking usage above.
   */
-  if (thd->locked_tables || query_cache_size == 0)
+  if (!thd->query_cache_is_applicable || query_cache_size == 0)
+  {
+    DBUG_PRINT("qcache", ("Query cache not ready"));
+    DBUG_VOID_RETURN;
+  }
+  if (thd->lex->sql_command != SQLCOM_SELECT)
+  {
+    DBUG_PRINT("qcache", ("Ignoring not SELECT command"));
     DBUG_VOID_RETURN;
-  uint8 tables_type= 0;
+  }
+
+  /* The following assert fails if we haven't called send_result_to_client */
+  DBUG_ASSERT(thd->base_query.is_alloced() ||
+              thd->base_query.ptr() == thd->query());
 
-  if ((local_tables= is_cacheable(thd, thd->query_length(),
-				  thd->query(), thd->lex, tables_used,
+  tables_type= 0;
+  if ((local_tables= is_cacheable(thd, thd->lex, tables_used,
 				  &tables_type)))
   {
     NET *net= &thd->net;
@@ -1238,10 +1452,10 @@ def_week_frmt: %lu, in_trans: %d, autocommit: %d",
       In case the wait time can't be determined there is an upper limit which
       causes try_lock() to abort with a time out.
 
-      The 'TRUE' parameter indicate that the lock is allowed to timeout
+      The 'TIMEOUT' parameter indicate that the lock is allowed to timeout
 
     */
-    if (try_lock(TRUE))
+    if (try_lock(thd, Query_cache::TIMEOUT))
       DBUG_VOID_RETURN;
     if (query_cache_size == 0)
     {
@@ -1257,10 +1471,13 @@ def_week_frmt: %lu, in_trans: %d, autocommit: %d",
       DBUG_VOID_RETURN;
     }
 
+    query=        thd->base_query.ptr();
+    query_length= thd->base_query.length();
+
     /* Key is query + database + flag */
     if (thd->db_length)
     {
-      memcpy(thd->query() + thd->query_length() + 1 + sizeof(size_t), 
+      memcpy((char*) (query + query_length + 1 + QUERY_CACHE_DB_LENGTH_SIZE),
              thd->db, thd->db_length);
       DBUG_PRINT("qcache", ("database: %s  length: %u",
 			    thd->db, (unsigned) thd->db_length)); 
@@ -1269,24 +1486,24 @@ def_week_frmt: %lu, in_trans: %d, autocommit: %d",
     {
       DBUG_PRINT("qcache", ("No active database"));
     }
-    tot_length= thd->query_length() + thd->db_length + 1 +
-      sizeof(size_t) + QUERY_CACHE_FLAGS_SIZE;
+    tot_length= (query_length + thd->db_length + 1 + 
+                 QUERY_CACHE_DB_LENGTH_SIZE + QUERY_CACHE_FLAGS_SIZE);
     /*
       We should only copy structure (don't use it location directly)
       because of alignment issue
     */
-    memcpy((void*) (thd->query() + (tot_length - QUERY_CACHE_FLAGS_SIZE)),
+    memcpy((void*) (query + (tot_length - QUERY_CACHE_FLAGS_SIZE)),
 	   &flags, QUERY_CACHE_FLAGS_SIZE);
 
     /* Check if another thread is processing the same query? */
     Query_cache_block *competitor = (Query_cache_block *)
-      hash_search(&queries, (uchar*) thd->query(), tot_length);
+      hash_search(&queries, (uchar*) query, tot_length);
     DBUG_PRINT("qcache", ("competitor 0x%lx", (ulong) competitor));
     if (competitor == 0)
     {
       /* Query is not in cache and no one is working with it; Store it */
       Query_cache_block *query_block;
-      query_block= write_block_data(tot_length, (uchar*) thd->query(),
+      query_block= write_block_data(tot_length, (uchar*) query,
 				    ALIGN_SIZE(sizeof(Query_cache_query)),
 				    Query_cache_block::QUERY, local_tables);
       if (query_block != 0)
@@ -1343,7 +1560,7 @@ def_week_frmt: %lu, in_trans: %d, autocommit: %d",
       DBUG_PRINT("qcache", ("Another thread process same query"));
     }
   }
-  else if (thd->lex->sql_command == SQLCOM_SELECT)
+  else
     statistic_increment(refused, &structure_guard_mutex);
 
 end:
@@ -1418,7 +1635,7 @@ send_data_in_chunks(NET *net, const uchar *packet, ulong len)
 */
 
 int
-Query_cache::send_result_to_client(THD *thd, char *sql, uint query_length)
+Query_cache::send_result_to_client(THD *thd, char *org_sql, uint query_length)
 {
   ulonglong engine_data;
   Query_cache_query *query;
@@ -1429,6 +1646,7 @@ Query_cache::send_result_to_client(THD *thd, char *sql, uint query_length)
   Query_cache_block_table *block_table, *block_table_end;
   ulong tot_length;
   Query_cache_query_flags flags;
+  const char *sql, *sql_end, *found_brace= 0;
   DBUG_ENTER("Query_cache::send_result_to_client");
 
   /*
@@ -1438,52 +1656,115 @@ Query_cache::send_result_to_client(THD *thd, char *sql, uint query_length)
 
     See also a note on double-check locking usage above.
   */
-  if (thd->locked_tables || thd->variables.query_cache_type == 0 ||
-      query_cache_size == 0)
+  if (is_disabled() || thd->locked_tables ||
+      thd->variables.query_cache_type == 0)
     goto err;
 
+  /*
+    The following can only happen for prepared statements that was found
+    during parsing or later that the query was not cacheable.
+  */
   if (!thd->lex->safe_to_cache_query)
   {
     DBUG_PRINT("qcache", ("SELECT is non-cacheable"));
     goto err;
   }
 
-  {
-    uint i= 0;
-    /*
-      Skip '(' characters in queries like following:
-      (select a from t1) union (select a from t1);
-    */
-    while (sql[i]=='(')
-      i++;
+  DBUG_ASSERT(query_cache_size != 0);           // otherwise cache would be disabled
 
-    /*
-      Test if the query is a SELECT
-      (pre-space is removed in dispatch_command).
+  thd->query_cache_is_applicable= 1;
+  sql= org_sql; sql_end= sql + query_length;
 
-      First '/' looks like comment before command it is not
-      frequently appeared in real life, consequently we can
-      check all such queries, too.
-    */
-    if ((my_toupper(system_charset_info, sql[i])     != 'S' ||
-         my_toupper(system_charset_info, sql[i + 1]) != 'E' ||
-         my_toupper(system_charset_info, sql[i + 2]) != 'L') &&
-        sql[i] != '/')
-    {
-      DBUG_PRINT("qcache", ("The statement is not a SELECT; Not cached"));
-      goto err;
-    }
-    
-    if (query_length > 20 && has_no_cache_directive(&sql[i+6]))
+  /*
+    Skip all comments at start of query. The following tests is false for
+    all normal queries.
+  */
+  if (!my_isalpha(system_charset_info, *sql))
+  {
+    while (sql < sql_end)
     {
-      /*
-        We do not increase 'refused' statistics here since it will be done
-        later when the query is parsed.
-      */
-      DBUG_PRINT("qcache", ("The statement has a SQL_NO_CACHE directive"));
-      goto err;
+      char current= *sql;
+      switch (current) {
+      case '/':
+        if (sql[1] != '*')
+          break;
+        sql+= 2;                              // Skip '/*'
+        if (*sql == '!')
+        {
+          /*
+            Found / *!number comment; Skip number to see if sql
+            starts with 'select'
+          */
+          sql++;
+          while (my_isdigit(system_charset_info, *sql))
+            sql++;
+        }
+        else
+        {
+          while (sql++ < sql_end)
+          {
+            if (sql[-1] == '*' && *sql == '/')
+            {
+              sql++;
+              break;
+            }
+          }
+        }
+        continue;
+      case '-':
+        if (sql[1] != '-' || !is_white_space(sql[2])) // Not a comment
+          break;
+        sql++;                               // Skip first '-'
+        /* Fall through */
+      case '#':
+        while (++sql < sql_end)
+        {
+          if (*sql == '\n')
+          {
+            sql++;                            // Skip '\n'
+            break;
+          }
+        }
+        /* Continue with analyzing current symbol */
+        continue;
+      case '\r':
+      case '\n':
+      case '\t':
+      case ' ':
+        sql++;
+        continue;
+      case '(':    // To handle (select a from t1) union (select a from t1);
+        if (!found_brace)
+        {
+          found_brace= sql;
+          sql++;
+          continue;
+        }
+        /* fall trough */
+      default:
+        break;
+      }
+      /* We only come here when we found the first word of the sql */
+      break;
     }
   }
+  if ((my_toupper(system_charset_info, sql[0]) != 'S' ||
+       my_toupper(system_charset_info, sql[1]) != 'E' ||
+       my_toupper(system_charset_info, sql[2]) != 'L'))
+  {
+    DBUG_PRINT("qcache", ("The statement is not a SELECT; Not cached"));
+    goto err;
+  }
+
+  if ((sql_end - sql) > 20 && has_no_cache_directive(sql+6))
+  {
+    /*
+      We do not increase 'refused' statistics here since it will be done
+      later when the query is parsed.
+    */
+    DBUG_PRINT("qcache", ("The statement has a SQL_NO_CACHE directive"));
+    goto err;
+  }
   {
     /*
       We have allocated buffer space (in alloc_query) to hold the
@@ -1493,8 +1774,7 @@ Query_cache::send_result_to_client(THD *thd, char *sql, uint query_length)
       sure the new current database has a name with the same length
       as the previous one.
     */
-    size_t db_len;
-    memcpy((char *) &db_len, (sql + query_length + 1), sizeof(size_t));
+    size_t db_len= uint2korr(sql_end+1);
     if (thd->db_length != db_len)
     {
       /*
@@ -1512,9 +1792,9 @@ Query_cache::send_result_to_client(THD *thd, char *sql, uint query_length)
     disabled or if a full cache flush is in progress, the attempt to
     get the lock is aborted.
 
-    The 'TRUE' parameter indicate that the lock is allowed to timeout
+    The TIMEOUT parameter indicate that the lock is allowed to timeout.
   */
-  if (try_lock(TRUE))
+  if (try_lock(thd, Query_cache::TIMEOUT))
     goto err;
 
   if (query_cache_size == 0)
@@ -1527,15 +1807,31 @@ Query_cache::send_result_to_client(THD *thd, char *sql, uint query_length)
   DBUG_ASSERT(thd->net.query_cache_query == 0);
 
   Query_cache_block *query_block;
+  if (opt_query_cache_strip_comments)
+  {
+    if (found_brace)
+      sql= found_brace;
+    make_base_query(&thd->base_query, sql, (size_t) (sql_end - sql),
+                    thd->db_length + 1 + QUERY_CACHE_DB_LENGTH_SIZE +
+                    QUERY_CACHE_FLAGS_SIZE);
+    sql=          thd->base_query.ptr();
+    query_length= thd->base_query.length();
+  }
+  else
+  {
+    sql= org_sql;
+    thd->base_query.set(sql, query_length, system_charset_info);
+  }
 
-  tot_length= query_length + 1 + sizeof(size_t) + 
-              thd->db_length + QUERY_CACHE_FLAGS_SIZE;
+  tot_length= (query_length + 1 + QUERY_CACHE_DB_LENGTH_SIZE +
+               thd->db_length + QUERY_CACHE_FLAGS_SIZE);
 
   if (thd->db_length)
   {
-    memcpy(sql + query_length + 1 + sizeof(size_t), thd->db, thd->db_length);
+    memcpy((uchar*) sql + query_length + 1 + QUERY_CACHE_DB_LENGTH_SIZE,
+           thd->db, thd->db_length);
     DBUG_PRINT("qcache", ("database: '%s'  length: %u",
-			  thd->db, (unsigned)thd->db_length));
+			  thd->db, (uint) thd->db_length));
   }
   else
   {
@@ -1657,14 +1953,15 @@ def_week_frmt: %lu, in_trans: %d, autocommit: %d",
       {
         DBUG_PRINT("qcache",
                    ("Temporary table detected: '%s.%s'",
-                    table_list.db, table_list.alias));
+                    tmptable->s->db.str, tmptable->alias.c_ptr()));
         unlock();
         /*
           We should not store result of this query because it contain
           temporary tables => assign following variable to make check
           faster.
         */
-        thd->lex->safe_to_cache_query=0;
+        thd->query_cache_is_applicable= 0;      // Query can't be cached
+        thd->lex->safe_to_cache_query= 0;       // For prepared statements
         BLOCK_UNLOCK_RD(query_block);
         DBUG_RETURN(-1);
       }
@@ -1680,7 +1977,8 @@ def_week_frmt: %lu, in_trans: %d, autocommit: %d",
 		 ("probably no SELECT access to %s.%s =>  return to normal processing",
 		  table_list.db, table_list.alias));
       unlock();
-      thd->lex->safe_to_cache_query=0;		// Don't try to cache this
+      thd->query_cache_is_applicable= 0;        // Query can't be cached
+      thd->lex->safe_to_cache_query= 0;         // For prepared statements
       BLOCK_UNLOCK_RD(query_block);
       DBUG_RETURN(-1);				// Privilege error
     }
@@ -1689,7 +1987,8 @@ def_week_frmt: %lu, in_trans: %d, autocommit: %d",
       DBUG_PRINT("qcache", ("Need to check column privileges for %s.%s",
 			    table_list.db, table_list.alias));
       BLOCK_UNLOCK_RD(query_block);
-      thd->lex->safe_to_cache_query= 0;		// Don't try to cache this
+      thd->query_cache_is_applicable= 0;        // Query can't be cached
+      thd->lex->safe_to_cache_query= 0;         // For prepared statements
       goto err_unlock;				// Parse query
     }
 #endif /*!NO_EMBEDDED_ACCESS_CHECKS*/
@@ -1713,7 +2012,13 @@ def_week_frmt: %lu, in_trans: %d, autocommit: %d",
                                   table->key_length());
       }
       else
-        thd->lex->safe_to_cache_query= 0;       // Don't try to cache this
+      {
+        /*
+          As this can change from call to call, don't reset set
+          thd->lex->safe_to_cache_query
+        */
+        thd->query_cache_is_applicable= 0;      // Query can't be cached
+      }
       goto err_unlock;				// Parse query
     }
     else
@@ -1764,12 +2069,15 @@ def_week_frmt: %lu, in_trans: %d, autocommit: %d",
 
 err_unlock:
   unlock();
-err:
   /*
     query_plan_flags doesn't have to be changed here as it contains
     QPLAN_QC_NO by default
   */
   DBUG_RETURN(0);				// Query was not cached
+
+err:
+  thd->query_cache_is_applicable= 0;            // Query can't be cached
+  DBUG_RETURN(0);				// Query was not cached
 }
 
 
@@ -1781,6 +2089,8 @@ void Query_cache::invalidate(THD *thd, TABLE_LIST *tables_used,
 			     my_bool using_transactions)
 {
   DBUG_ENTER("Query_cache::invalidate (table list)");
+  if (is_disabled())
+    DBUG_VOID_RETURN;
 
   using_transactions= using_transactions &&
     (thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN));
@@ -1808,10 +2118,12 @@ void Query_cache::invalidate(THD *thd, TABLE_LIST *tables_used,
   DBUG_VOID_RETURN;
 }
 
-void Query_cache::invalidate(CHANGED_TABLE_LIST *tables_used)
+void Query_cache::invalidate(THD *thd, CHANGED_TABLE_LIST *tables_used)
 {
   DBUG_ENTER("Query_cache::invalidate (changed table list)");
-  THD *thd= current_thd;
+  if (is_disabled())
+    DBUG_VOID_RETURN;
+
   for (; tables_used; tables_used= tables_used->next)
   {
     thd_proc_info(thd, "invalidating query cache entries (table list)");
@@ -1834,10 +2146,13 @@ void Query_cache::invalidate(CHANGED_TABLE_LIST *tables_used)
   NOTE
     can be used only for opened tables
 */
-void Query_cache::invalidate_locked_for_write(TABLE_LIST *tables_used)
+void Query_cache::invalidate_locked_for_write(THD *thd,
+                                              TABLE_LIST *tables_used)
 {
-  THD *thd= current_thd;
   DBUG_ENTER("Query_cache::invalidate_locked_for_write");
+  if (is_disabled())
+    DBUG_VOID_RETURN;
+
   for (; tables_used; tables_used= tables_used->next_local)
   {
     thd_proc_info(thd, "invalidating query cache entries (table)");
@@ -1858,7 +2173,9 @@ void Query_cache::invalidate(THD *thd, TABLE *table,
 			     my_bool using_transactions)
 {
   DBUG_ENTER("Query_cache::invalidate (table)");
-  
+  if (is_disabled())
+    DBUG_VOID_RETURN;
+
   using_transactions= using_transactions &&
     (thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN));
   if (using_transactions && 
@@ -1875,6 +2192,8 @@ void Query_cache::invalidate(THD *thd, const char *key, uint32  key_length,
 			     my_bool using_transactions)
 {
   DBUG_ENTER("Query_cache::invalidate (key)");
+  if (is_disabled())
+   DBUG_VOID_RETURN;
 
   using_transactions= using_transactions &&
     (thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN));
@@ -1891,18 +2210,19 @@ void Query_cache::invalidate(THD *thd, const char *key, uint32  key_length,
    Remove all cached queries that uses the given database.
 */
 
-void Query_cache::invalidate(char *db)
+void Query_cache::invalidate(THD *thd, char *db)
 {
-  bool restart= FALSE;
+  bool restart;
   DBUG_ENTER("Query_cache::invalidate (db)");
+  if (is_disabled())
+    DBUG_VOID_RETURN;
 
+  restart= FALSE;
   /*
     Lock the query cache and queue all invalidation attempts to avoid
     the risk of a race between invalidation, cache inserts and flushes.
   */
-  lock();
-
-  THD *thd= current_thd;
+  lock(thd);
 
   if (query_cache_size > 0)
   {
@@ -1980,6 +2300,9 @@ void Query_cache::invalidate_by_MyISAM_filename(const char *filename)
 void Query_cache::flush()
 {
   DBUG_ENTER("Query_cache::flush");
+  if (is_disabled())
+    DBUG_VOID_RETURN;
+
   DBUG_EXECUTE_IF("wait_in_query_cache_flush1",
                   debug_wait_for_kill("wait_in_query_cache_flush1"););
 
@@ -2007,7 +2330,7 @@ void Query_cache::flush()
 
 */
 
-void Query_cache::pack(ulong join_limit, uint iteration_limit)
+void Query_cache::pack(THD *thd, ulong join_limit, uint iteration_limit)
 {
   DBUG_ENTER("Query_cache::pack");
 
@@ -2015,7 +2338,7 @@ void Query_cache::pack(ulong join_limit, uint iteration_limit)
     If the entire qc is being invalidated we can bail out early
     instead of waiting for the lock.
   */
-  if (try_lock())
+  if (try_lock(thd, Query_cache::WAIT))
     DBUG_VOID_RETURN;
 
   if (query_cache_size == 0)
@@ -2052,11 +2375,25 @@ void Query_cache::destroy()
     pthread_cond_destroy(&COND_cache_status_changed);
     pthread_mutex_destroy(&structure_guard_mutex);
     initialized = 0;
+    DBUG_ASSERT(m_requests_in_progress == 0);
   }
   DBUG_VOID_RETURN;
 }
 
 
+void Query_cache::disable_query_cache(THD *thd)
+{
+  m_cache_status= DISABLE_REQUEST;
+  /*
+    If there is no requests in progress try to free buffer.
+    try_lock(TRY) will exit immediately if there is lock.
+    unlock() should free block.
+  */
+  if (m_requests_in_progress == 0 && !try_lock(thd, TRY))
+    unlock();
+}
+
+
 /*****************************************************************************
   init/destroy
 *****************************************************************************/
@@ -2067,7 +2404,21 @@ void Query_cache::init()
   pthread_mutex_init(&structure_guard_mutex,MY_MUTEX_INIT_FAST);
   pthread_cond_init(&COND_cache_status_changed, NULL);
   m_cache_lock_status= Query_cache::UNLOCKED;
+  m_cache_status= Query_cache::OK;
+  m_requests_in_progress= 0;
   initialized = 1;
+  query_state_map= default_charset_info->state_map;
+  /*
+    If we explicitly turn off query cache from the command line query
+    cache will be disabled for the reminder of the server life
+    time. This is because we want to avoid locking the QC specific
+    mutex if query cache isn't going to be used.
+  */
+  if (global_system_variables.query_cache_type == 0)
+  {
+    free_cache();
+    m_cache_status= DISABLED;
+  }
   DBUG_VOID_RETURN;
 }
 
@@ -2773,7 +3124,7 @@ void Query_cache::invalidate_table(THD *thd, uchar * key, uint32  key_length)
     Lock the query cache and queue all invalidation attempts to avoid
     the risk of a race between invalidation, cache inserts and flushes.
   */
-  lock();
+  lock(thd);
 
   DBUG_EXECUTE_IF("wait_in_query_cache_invalidate2",
                   debug_wait_for_kill("wait_in_query_cache_invalidate2"); );
@@ -3554,7 +3905,8 @@ Query_cache::process_and_count_tables(THD *thd, TABLE_LIST *tables_used,
     {
       DBUG_PRINT("qcache", ("Don't cache statement as it refers to "
                             "tables with column privileges."));
-      thd->lex->safe_to_cache_query= 0;
+      thd->query_cache_is_applicable= 0;        // Query can't be cached
+      thd->lex->safe_to_cache_query= 0;         // For prepared statements
       DBUG_RETURN(0);
     }
 #endif
@@ -3567,16 +3919,17 @@ Query_cache::process_and_count_tables(THD *thd, TABLE_LIST *tables_used,
     }
     else
     {
-      DBUG_PRINT("qcache", ("table: %s  db:  %s  type: %u",
-                            tables_used->table->s->table_name.str,
-                            tables_used->table->s->db.str,
-                            tables_used->table->s->db_type()->db_type));
       if (tables_used->derived)
       {
+        DBUG_PRINT("qcache", ("table: %s", tables_used->alias));
         table_count--;
         DBUG_PRINT("qcache", ("derived table skipped"));
         continue;
       }
+      DBUG_PRINT("qcache", ("table: %s  db:  %s  type: %u",
+                            tables_used->table->s->table_name.str,
+                            tables_used->table->s->db.str,
+                            tables_used->table->s->db_type()->db_type));
       *tables_type|= tables_used->table->file->table_cache_type();
 
       /*
@@ -3619,13 +3972,13 @@ Query_cache::process_and_count_tables(THD *thd, TABLE_LIST *tables_used,
 */
 
 TABLE_COUNTER_TYPE
-Query_cache::is_cacheable(THD *thd, uint32 query_len, char *query, LEX *lex,
+Query_cache::is_cacheable(THD *thd, LEX *lex,
                           TABLE_LIST *tables_used, uint8 *tables_type)
 {
   TABLE_COUNTER_TYPE table_count;
   DBUG_ENTER("Query_cache::is_cacheable");
 
-  if (query_cache_is_cacheable_query(lex) &&
+  if (thd->lex->safe_to_cache_query &&
       (thd->variables.query_cache_type == 1 ||
        (thd->variables.query_cache_type == 2 && (lex->select_lex.options &
 						 OPTION_TO_QUERY_CACHE))))
@@ -3690,7 +4043,11 @@ my_bool Query_cache::ask_handler_allowance(THD *thd,
     {
       DBUG_PRINT("qcache", ("Handler does not allow caching for %s.%s",
 			    tables_used->db, tables_used->alias));
-      thd->lex->safe_to_cache_query= 0;          // Don't try to cache this
+      /*
+        As this can change from call to call, don't reset set
+        thd->lex->safe_to_cache_query
+      */
+      thd->query_cache_is_applicable= 0;        // Query can't be cached
       DBUG_RETURN(1);
     }
   }
@@ -4148,7 +4505,7 @@ void Query_cache::wreck(uint line, const char *message)
   DBUG_PRINT("warning", ("%5d QUERY CACHE WRECK => DISABLED",line));
   DBUG_PRINT("warning", ("=================================="));
   if (thd)
-    thd->killed= THD::KILL_CONNECTION;
+    thd->killed= KILL_CONNECTION;
   cache_dump();
   /* check_integrity(0); */ /* Can't call it here because of locks */
   bins_dump();
@@ -4783,3 +5140,4 @@ err2:
 #endif /* DBUG_OFF */
 
 #endif /*HAVE_QUERY_CACHE*/
+
diff --git a/sql/sql_cache.h b/sql/sql_cache.h
index a3a91127129..b2e88a3c619 100644
--- a/sql/sql_cache.h
+++ b/sql/sql_cache.h
@@ -279,8 +279,11 @@ private:
   my_thread_id m_cache_lock_thread_id;
 #endif
   pthread_cond_t COND_cache_status_changed;
+  uint m_requests_in_progress;
   enum Cache_lock_status { UNLOCKED, LOCKED_NO_WAIT, LOCKED };
   Cache_lock_status m_cache_lock_status;
+  enum Cache_staus {OK, DISABLE_REQUEST, DISABLED};
+  Cache_staus m_cache_status;
 
   void free_query_internal(Query_cache_block *point);
   void invalidate_table_internal(THD *thd, uchar *key, uint32 key_length);
@@ -295,7 +298,7 @@ protected:
       2. query block (for operation inside query (query block/results))
 
     Thread doing cache flush releases the mutex once it sets
-    m_cache_status flag, so other threads may bypass the cache as
+    m_cache_lock_status flag, so other threads may bypass the cache as
     if it is disabled, not waiting for reset to finish.  The exception
     is other threads that were going to do cache flush---they'll wait
     till the end of a flush operation.
@@ -410,7 +413,7 @@ protected:
     If query is cacheable return number tables in query
     (query without tables not cached)
   */
-  TABLE_COUNTER_TYPE is_cacheable(THD *thd, uint32 query_len, char *query,
+  TABLE_COUNTER_TYPE is_cacheable(THD *thd,
                                   LEX *lex, TABLE_LIST *tables_used,
                                   uint8 *tables_type);
   TABLE_COUNTER_TYPE process_and_count_tables(THD *thd,
@@ -426,6 +429,10 @@ protected:
 	      uint def_query_hash_size = QUERY_CACHE_DEF_QUERY_HASH_SIZE,
 	      uint def_table_hash_size = QUERY_CACHE_DEF_TABLE_HASH_SIZE);
 
+  inline bool is_disabled(void) { return m_cache_status != OK; }
+  inline bool is_disable_in_progress(void)
+  { return m_cache_status == DISABLE_REQUEST; }
+
   /* initialize cache (mutex) */
   void init();
   /* resize query cache (return real query size, 0 if disabled) */
@@ -445,22 +452,23 @@ protected:
   int send_result_to_client(THD *thd, char *query, uint query_length);
 
   /* Remove all queries that uses any of the listed following tables */
-  void invalidate(THD* thd, TABLE_LIST *tables_used,
+  void invalidate(THD *thd, TABLE_LIST *tables_used,
 		  my_bool using_transactions);
-  void invalidate(CHANGED_TABLE_LIST *tables_used);
-  void invalidate_locked_for_write(TABLE_LIST *tables_used);
-  void invalidate(THD* thd, TABLE *table, my_bool using_transactions);
+  void invalidate(THD *thd, CHANGED_TABLE_LIST *tables_used);
+  void invalidate_locked_for_write(THD *thd, TABLE_LIST *tables_used);
+  void invalidate(THD *thd, TABLE *table, my_bool using_transactions);
   void invalidate(THD *thd, const char *key, uint32  key_length,
 		  my_bool using_transactions);
 
   /* Remove all queries that uses any of the tables in following database */
-  void invalidate(char *db);
+  void invalidate(THD *thd, char *db);
 
   /* Remove all queries that uses any of the listed following table */
   void invalidate_by_MyISAM_filename(const char *filename);
 
   void flush();
-  void pack(ulong join_limit = QUERY_CACHE_PACK_LIMIT,
+  void pack(THD *thd,
+            ulong join_limit = QUERY_CACHE_PACK_LIMIT,
 	    uint iteration_limit = QUERY_CACHE_PACK_ITERATION);
 
   void destroy();
@@ -488,10 +496,13 @@ protected:
 			const char *name);
   my_bool in_blocks(Query_cache_block * point);
 
-  bool try_lock(bool use_timeout= FALSE);
-  void lock(void);
+  enum Cache_try_lock_mode {WAIT, TIMEOUT, TRY};
+  bool try_lock(THD *thd, Cache_try_lock_mode mode= WAIT);
+  void lock(THD *thd);
   void lock_and_suspend(void);
   void unlock(void);
+
+  void disable_query_cache(THD *thd);
 };
 
 extern Query_cache query_cache;
diff --git a/sql/sql_class.cc b/sql/sql_class.cc
index a31c4728720..9a72b647261 100644
--- a/sql/sql_class.cc
+++ b/sql/sql_class.cc
@@ -44,7 +44,9 @@
 
 #include "sp_rcontext.h"
 #include "sp_cache.h"
+#include "sql_select.h" /* declares create_tmp_table() */
 #include "debug_sync.h"
+#include "sql_handler.h"
 
 /*
   The following is used to initialise Table_ident with a internal
@@ -663,7 +665,7 @@ THD::THD()
               /* statement id */ 0),
    Open_tables_state(refresh_version), rli_fake(0),
    lock_id(&main_lock_id),
-   user_time(0), in_sub_stmt(0),
+   in_sub_stmt(0),
    sql_log_bin_toplevel(false), log_all_errors(0),
    binlog_table_maps(0), binlog_flags(0UL),
    table_map_for_update(0),
@@ -674,6 +676,7 @@ THD::THD()
    stmt_depends_on_first_successful_insert_id_in_prev_stmt(FALSE),
    examined_row_count(0),
    global_read_lock(0),
+   global_disable_checkpoint(0),
    is_fatal_error(0),
    transaction_rollback_request(0),
    is_fatal_sub_stmt_error(0),
@@ -703,8 +706,8 @@ THD::THD()
   catalog= (char*)"std"; // the only catalog we have for now
   main_security_ctx.init();
   security_ctx= &main_security_ctx;
-  locked=some_tables_deleted=no_errors=password= 0;
-  query_start_used= 0;
+  some_tables_deleted=no_errors=password= 0;
+  query_start_used= query_start_sec_part_used= 0;
   count_cuted_fields= CHECK_FIELD_IGNORE;
   killed= NOT_KILLED;
   col_access=0;
@@ -720,9 +723,11 @@ THD::THD()
 #endif
   // Must be reset to handle error with THD's created for init of mysqld
   lex->current_select= 0;
-  start_time=(time_t) 0;
+  user_time.val= start_time= start_time_sec_part= 0;
   start_utime= prior_thr_create_utime= 0L;
   utime_after_lock= 0L;
+  progress.report_to_client= 0;
+  progress.max_counter= 0;
   current_linfo =  0;
   slave_thread = 0;
   bzero(&variables, sizeof(variables));
@@ -762,6 +767,8 @@ THD::THD()
   active_vio = 0;
 #endif
   pthread_mutex_init(&LOCK_thd_data, MY_MUTEX_INIT_FAST);
+  pthread_mutex_init(&LOCK_wakeup_ready, MY_MUTEX_INIT_FAST);
+  pthread_cond_init(&COND_wakeup_ready, 0);
 
   /* Variables with default values */
   proc_info="login";
@@ -809,6 +816,9 @@ THD::THD()
   m_binlog_invoker= FALSE;
   memset(&invoker_user, 0, sizeof(invoker_user));
   memset(&invoker_host, 0, sizeof(invoker_host));
+  prepare_derived_at_open= FALSE;
+  create_tmp_table_for_derived= FALSE;
+  save_prep_leaf_list= FALSE;
 }
 
 
@@ -982,17 +992,21 @@ void THD::update_stats(void)
 
 void THD::update_all_stats()
 {
-  time_t save_time;
   ulonglong end_cpu_time, end_utime;
   double busy_time, cpu_time;
 
+  /* Reset status variables used by information_schema.processlist */
+  progress.max_counter= 0;
+  progress.max_stage= 0;
+  progress.report= 0;
+
   /* This is set at start of query if opt_userstat_running was set */
   if (!userstat_running)
     return;
 
   end_cpu_time= my_getcputime();
-  end_utime=    my_micro_time_and_time(&save_time);
-  busy_time= (end_utime - start_utime)  / 1000000.0;
+  end_utime=    microsecond_interval_timer();
+  busy_time= (end_utime - start_utime) / 1000000.0;
   cpu_time=  (end_cpu_time - start_cpu_time) / 10000000.0;
   /* In case there are bad values, 2629743 is the #seconds in a month. */
   if (cpu_time > 2629743.0)
@@ -1000,7 +1014,8 @@ void THD::update_all_stats()
   status_var_add(status_var.cpu_time, cpu_time);
   status_var_add(status_var.busy_time, busy_time);
 
-  update_global_user_stats(this, TRUE, save_time);
+  update_global_user_stats(this, TRUE, my_time(0));
+  // Has to be updated after update_global_user_stats()
   userstat_running= 0;
 }
 
@@ -1153,6 +1168,8 @@ THD::~THD()
   free_root(&transaction.mem_root,MYF(0));
 #endif
   mysys_var=0;					// Safety (shouldn't be needed)
+  pthread_cond_destroy(&COND_wakeup_ready);
+  pthread_mutex_destroy(&LOCK_wakeup_ready);
   pthread_mutex_destroy(&LOCK_thd_data);
 #ifndef DBUG_OFF
   dbug_sentry= THD_SENTRY_GONE;
@@ -1198,6 +1215,7 @@ void add_to_status(STATUS_VAR *to_var, STATUS_VAR *from_var)
   to_var->bytes_sent+=          from_var->bytes_sent;
   to_var->rows_read+=           from_var->rows_read;
   to_var->rows_sent+=           from_var->rows_sent;
+  to_var->rows_tmp_read+=       from_var->rows_tmp_read;
   to_var->binlog_bytes_written+= from_var->binlog_bytes_written;
   to_var->cpu_time+=            from_var->cpu_time;
   to_var->busy_time+=           from_var->busy_time;
@@ -1233,6 +1251,7 @@ void add_diff_to_status(STATUS_VAR *to_var, STATUS_VAR *from_var,
   to_var->bytes_sent+=           from_var->bytes_sent - dec_var->bytes_sent;
   to_var->rows_read+=            from_var->rows_read - dec_var->rows_read;
   to_var->rows_sent+=            from_var->rows_sent - dec_var->rows_sent;
+  to_var->rows_tmp_read+=        from_var->rows_tmp_read - dec_var->rows_tmp_read;
   to_var->binlog_bytes_written+= from_var->binlog_bytes_written -
                                  dec_var->binlog_bytes_written;
   to_var->cpu_time+=             from_var->cpu_time - dec_var->cpu_time;
@@ -1248,7 +1267,7 @@ void add_diff_to_status(STATUS_VAR *to_var, STATUS_VAR *from_var,
 #endif
 
 
-void THD::awake(THD::killed_state state_to_set)
+void THD::awake(killed_state state_to_set)
 {
   DBUG_ENTER("THD::awake");
   DBUG_PRINT("enter", ("this: 0x%lx", (long) this));
@@ -1265,7 +1284,7 @@ void THD::awake(THD::killed_state state_to_set)
                       "KILLED");
   }
   killed= state_to_set;
-  if (state_to_set != THD::KILL_QUERY)
+  if (state_to_set >= KILL_CONNECTION)
   {
     thr_alarm_kill(thread_id);
     if (!slave_thread)
@@ -1345,6 +1364,38 @@ void THD::awake(THD::killed_state state_to_set)
   DBUG_VOID_RETURN;
 }
 
+
+/*
+  Get error number for killed state
+  Note that the error message can't have any parameters.
+  See thd::kill_message()
+*/
+
+int killed_errno(killed_state killed)
+{
+  switch (killed) {
+  case NOT_KILLED:
+  case KILL_HARD_BIT:
+    return 0;                                   // Probably wrong usage
+  case KILL_BAD_DATA:
+  case KILL_BAD_DATA_HARD:
+    return 0;                                   // Not a real error
+  case KILL_CONNECTION:
+  case KILL_CONNECTION_HARD:
+  case KILL_SYSTEM_THREAD:
+  case KILL_SYSTEM_THREAD_HARD:
+    return ER_CONNECTION_KILLED;
+  case KILL_QUERY:
+  case KILL_QUERY_HARD:
+    return ER_QUERY_INTERRUPTED;
+  case KILL_SERVER:
+  case KILL_SERVER_HARD:
+    return ER_SERVER_SHUTDOWN;
+  }
+  return 0;                                     // Keep compiler happy
+}
+
+
 /*
   Remember the location of thread info, the structure needed for
   sql_alloc() and the structure for the net buffer
@@ -2432,7 +2483,6 @@ int select_export::send_data(List<Item> &items)
     {						// Fill with space
       if (item->max_length > used_length)
       {
-	/* QQ:  Fix by adding a my_b_fill() function */
 	if (!space_inited)
 	{
 	  space_inited=1;
@@ -2535,7 +2585,8 @@ int select_singlerow_subselect::send_data(List<Item> &items)
   Item_singlerow_subselect *it= (Item_singlerow_subselect *)item;
   if (it->assigned())
   {
-    my_message(ER_SUBQUERY_NO_1_ROW, ER(ER_SUBQUERY_NO_1_ROW), MYF(0));
+    my_message(ER_SUBQUERY_NO_1_ROW, ER(ER_SUBQUERY_NO_1_ROW),
+               MYF(current_thd->lex->ignore ? ME_JUST_WARNING : 0));
     DBUG_RETURN(1);
   }
   if (unit->offset_limit_cnt)
@@ -2592,6 +2643,7 @@ int select_max_min_finder_subselect::send_data(List<Item> &items)
         op= &select_max_min_finder_subselect::cmp_decimal;
         break;
       case ROW_RESULT:
+      case TIME_RESULT:
       case IMPOSSIBLE_RESULT:
         // This case should never be choosen
 	DBUG_ASSERT(0);
@@ -2737,6 +2789,7 @@ void Query_arena::free_items()
   for (; free_list; free_list= next)
   {
     next= free_list->next;
+    DBUG_ASSERT(free_list != next);
     free_list->delete_self();
   }
   /* Postcondition: free_list is 0 */
@@ -3080,6 +3133,99 @@ bool select_dumpvar::send_eof()
   return 0;
 }
 
+
+bool
+select_materialize_with_stats::
+create_result_table(THD *thd_arg, List<Item> *column_types,
+                    bool is_union_distinct, ulonglong options,
+                    const char *table_alias, bool bit_fields_as_long,
+                    bool create_table)
+{
+  DBUG_ASSERT(table == 0);
+  tmp_table_param.field_count= column_types->elements;
+  tmp_table_param.bit_fields_as_long= bit_fields_as_long;
+
+  if (! (table= create_tmp_table(thd_arg, &tmp_table_param, *column_types,
+                                 (ORDER*) 0, is_union_distinct, 1,
+                                 options, HA_POS_ERROR, (char*) table_alias)))
+    return TRUE;
+
+  col_stat= (Column_statistics*) table->in_use->alloc(table->s->fields *
+                                                      sizeof(Column_statistics));
+  if (!col_stat)
+    return TRUE;
+
+  reset();
+  table->file->extra(HA_EXTRA_WRITE_CACHE);
+  table->file->extra(HA_EXTRA_IGNORE_DUP_KEY);
+  return FALSE;
+}
+
+
+void select_materialize_with_stats::reset()
+{
+  memset(col_stat, 0, table->s->fields * sizeof(Column_statistics));
+  max_nulls_in_row= 0;
+  count_rows= 0;
+}
+
+
+void select_materialize_with_stats::cleanup()
+{
+  reset();
+  select_union::cleanup();
+}
+
+
+/**
+  Override select_union::send_data to analyze each row for NULLs and to
+  update null_statistics before sending data to the client.
+
+  @return TRUE if fatal error when sending data to the client
+  @return FALSE on success
+*/
+
+int select_materialize_with_stats::send_data(List<Item> &items)
+{
+  List_iterator_fast<Item> item_it(items);
+  Item *cur_item;
+  Column_statistics *cur_col_stat= col_stat;
+  uint nulls_in_row= 0;
+  int res;
+
+  if ((res= select_union::send_data(items)))
+    return res;
+  if (table->null_catch_flags & REJECT_ROW_DUE_TO_NULL_FIELDS)
+  {
+    table->null_catch_flags&= ~REJECT_ROW_DUE_TO_NULL_FIELDS;
+    return 0;
+  }
+  /* Skip duplicate rows. */
+  if (write_err == HA_ERR_FOUND_DUPP_KEY ||
+      write_err == HA_ERR_FOUND_DUPP_UNIQUE)
+    return 0;
+
+  ++count_rows;
+
+  while ((cur_item= item_it++))
+  {
+    if (cur_item->is_null_result())
+    {
+      ++cur_col_stat->null_count;
+      cur_col_stat->max_null_row= count_rows;
+      if (!cur_col_stat->min_null_row)
+        cur_col_stat->min_null_row= count_rows;
+      ++nulls_in_row;
+    }
+    ++cur_col_stat;
+  }
+  if (nulls_in_row > max_nulls_in_row)
+    max_nulls_in_row= nulls_in_row;
+
+  return 0;
+}
+
+
 /****************************************************************************
   TMP_TABLE_PARAM
 ****************************************************************************/
@@ -3093,6 +3239,10 @@ void TMP_TABLE_PARAM::init()
   quick_group= 1;
   table_charset= 0;
   precomputed_group_by= 0;
+  bit_fields_as_long= 0;
+  materialized_subquery= 0;
+  force_not_null_cols= 0;
+  skip_create_table= 0;
   DBUG_VOID_RETURN;
 }
 
@@ -3306,15 +3456,128 @@ void THD::restore_backup_open_tables_state(Open_tables_state *backup)
   @param thd  user thread
   @retval 0 the user thread is active
   @retval 1 the user thread has been killed
+
+  This is used to signal a storage engine if it should be killed.
 */
+
 extern "C" int thd_killed(const MYSQL_THD thd)
 {
-  if (thd->killed == THD::NOT_KILLED || thd->killed == THD::KILL_BAD_DATA ||
-      thd->killed == THD::KILL_SYSTEM_THREAD)
+  if (!(thd->killed & KILL_HARD_BIT))
     return 0;
   return thd->killed;
 }
 
+
+/**
+   Send an out-of-band progress report to the client
+
+   The report is sent every 'thd->...progress_report_time' second,
+   however not more often than global.progress_report_time.
+   If global.progress_report_time is 0, then don't send progress reports, but
+   check every second if the value has changed
+*/
+
+static void thd_send_progress(THD *thd)
+{
+  /* Check if we should send the client a progress report */
+  ulonglong report_time= my_interval_timer();
+  if (report_time > thd->progress.next_report_time)
+  {
+    uint seconds_to_next= max(thd->variables.progress_report_time,
+                              global_system_variables.progress_report_time);
+    if (seconds_to_next == 0)             // Turned off
+      seconds_to_next= 1;                 // Check again after 1 second
+
+    thd->progress.next_report_time= (report_time +
+                                     seconds_to_next * 1000000000ULL);
+    if (global_system_variables.progress_report_time &&
+        thd->variables.progress_report_time)
+      net_send_progress_packet(thd);
+  }
+}
+
+
+/** Initialize progress report handling **/
+
+extern "C" void thd_progress_init(MYSQL_THD thd, uint max_stage)
+{
+  /*
+    Send progress reports to clients that supports it, if the command
+    is a high level command (like ALTER TABLE) and we are not in a
+    stored procedure
+  */
+  thd->progress.report= ((thd->client_capabilities & CLIENT_PROGRESS) &&
+                         thd->progress.report_to_client &&
+                         !thd->in_sub_stmt);
+  thd->progress.next_report_time= 0;
+  thd->progress.stage= 0;
+  thd->progress.counter= thd->progress.max_counter= 0;
+  thd->progress.max_stage= max_stage;
+}
+
+
+/* Inform processlist and the client that some progress has been made */
+
+extern "C" void thd_progress_report(MYSQL_THD thd,
+                                    ulonglong progress, ulonglong max_progress)
+{
+  if (thd->progress.max_counter != max_progress)        // Simple optimization
+  {
+    pthread_mutex_lock(&thd->LOCK_thd_data);
+    thd->progress.counter= progress;
+    thd->progress.max_counter= max_progress;
+    pthread_mutex_unlock(&thd->LOCK_thd_data);
+  }
+  else
+    thd->progress.counter= progress;
+
+  if (thd->progress.report)
+    thd_send_progress(thd);
+}
+
+/**
+  Move to next stage in process list handling
+
+  This will reset the timer to ensure the progress is sent to the client
+  if client progress reports are activated.
+*/
+
+extern "C" void thd_progress_next_stage(MYSQL_THD thd)
+{
+  pthread_mutex_lock(&thd->LOCK_thd_data);
+  thd->progress.stage++;
+  thd->progress.counter= 0;
+  DBUG_ASSERT(thd->progress.stage < thd->progress.max_stage);
+  pthread_mutex_unlock(&thd->LOCK_thd_data);
+  if (thd->progress.report)
+  {
+    thd->progress.next_report_time= 0;          // Send new stage info
+    thd_send_progress(thd);
+  }
+}
+
+/**
+  Disable reporting of progress in process list.
+
+  @note
+  This function is safe to call even if one has not called thd_progress_init.
+
+  This function should be called by all parts that does progress
+  reporting to ensure that progress list doesn't contain 100 % done
+  forever.
+*/
+
+
+extern "C" void thd_progress_end(MYSQL_THD thd)
+{
+  /*
+    It's enough to reset max_counter to set disable progress indicator
+    in processlist.
+  */
+  thd->progress.max_counter= 0;
+}
+
+
 /**
   Return the thread id of a user thread
   @param thd user thread
@@ -3574,16 +3837,6 @@ void mark_transaction_to_rollback(THD *thd, bool all)
   {
     thd->is_fatal_sub_stmt_error= TRUE;
     thd->transaction_rollback_request= all;
-    /*
-      Aborted transactions can not be IGNOREd.
-      Switch off the IGNORE flag for the current
-      SELECT_LEX. This should allow my_error()
-      to report the error and abort the execution
-      flow, even in presence
-      of IGNORE clause.
-    */
-    if (thd->lex->current_select)
-      thd->lex->current_select->no_error= FALSE;
   }
 }
 /***************************************************************************
@@ -4204,6 +4457,25 @@ int THD::binlog_query(THD::enum_binlog_query_type qtype, char const *query_arg,
   DBUG_RETURN(0);
 }
 
+void
+THD::wait_for_wakeup_ready()
+{
+  pthread_mutex_lock(&LOCK_wakeup_ready);
+  while (!wakeup_ready)
+    pthread_cond_wait(&COND_wakeup_ready, &LOCK_wakeup_ready);
+  pthread_mutex_unlock(&LOCK_wakeup_ready);
+}
+
+void
+THD::signal_wakeup_ready()
+{
+  pthread_mutex_lock(&LOCK_wakeup_ready);
+  wakeup_ready= true;
+  pthread_mutex_unlock(&LOCK_wakeup_ready);
+  pthread_cond_signal(&COND_wakeup_ready);
+}
+
+
 bool Discrete_intervals_list::append(ulonglong start, ulonglong val,
                                  ulonglong incr)
 {
diff --git a/sql/sql_class.h b/sql/sql_class.h
index 4bf1bc3964c..20ca9bd47c5 100644
--- a/sql/sql_class.h
+++ b/sql/sql_class.h
@@ -26,6 +26,92 @@
 #include "log.h"
 #include "rpl_tblmap.h"
 
+
+/**
+  Interface for Item iterator
+*/
+
+class Item_iterator
+{
+public:
+  /**
+    Shall set this iterator to the position before the first item
+
+    @note
+    This method also may perform some other initialization actions like
+    allocation of certain resources.
+  */
+  virtual void open()= 0;
+  /**
+    Shall return the next Item (or NULL if there is no next item) and
+    move pointer to position after it.
+  */
+  virtual Item *next()= 0;
+  /**
+    Shall force iterator to free resources (if it holds them)
+
+    @note
+    One should not use the iterator without open() call after close()
+  */
+  virtual void close()= 0;
+
+  virtual ~Item_iterator() {}
+};
+
+
+/**
+  Item iterator over List_iterator_fast for Item references
+*/
+
+class Item_iterator_ref_list: public Item_iterator
+{
+  List_iterator<Item*> list;
+public:
+  Item_iterator_ref_list(List_iterator<Item*> &arg_list):
+    list(arg_list) {}
+  void open() { list.rewind(); }
+  Item *next() { return *(list++); }
+  void close() {}
+};
+
+
+/**
+  Item iterator over List_iterator_fast for Items
+*/
+
+class Item_iterator_list: public Item_iterator
+{
+  List_iterator<Item> list;
+public:
+  Item_iterator_list(List_iterator<Item> &arg_list):
+    list(arg_list) {}
+  void open() { list.rewind(); }
+  Item *next() { return (list++); }
+  void close() {}
+};
+
+
+/**
+  Item iterator over Item interface for rows
+*/
+
+class Item_iterator_row: public Item_iterator
+{
+  Item *base_item;
+  uint current;
+public:
+  Item_iterator_row(Item *base) : base_item(base), current(0) {}
+  void open() { current= 0; }
+  Item *next()
+  {
+    if (current >= base_item->cols())
+      return NULL;
+    return base_item->element_index(current++);
+  }
+  void close() {}
+};
+
+
 /**
   An interface that is used to take an action when
   the locking module notices that a table version has changed
@@ -120,7 +206,7 @@ typedef struct st_user_var_events
 
 #define RP_LOCK_LOG_IS_ALREADY_LOCKED 1
 #define RP_FORCE_ROTATE               2
-
+#define RP_BINLOG_CHECKSUM_ALG_CHANGE 4
 /*
   The COPY_INFO structure is used by INSERT/REPLACE code.
   The schema of the row counting by the INSERT/INSERT ... ON DUPLICATE KEY
@@ -278,6 +364,38 @@ public:
   LEX_COLUMN (const String& x,const  uint& y ): column (x),rights (y) {}
 };
 
+
+/* Note: these states are actually bit coded with HARD */
+enum killed_state
+{
+  NOT_KILLED= 0,
+  KILL_HARD_BIT= 1,                             /* Bit for HARD KILL */
+  KILL_BAD_DATA= 2,
+  KILL_BAD_DATA_HARD= 3,
+  KILL_QUERY= 4,
+  KILL_QUERY_HARD= 5,
+  /*
+    All of the following killed states will kill the connection
+    KILL_CONNECTION must be the first of these!
+  */
+  KILL_CONNECTION= 6,
+  KILL_CONNECTION_HARD= 7,
+  KILL_SYSTEM_THREAD= 8,
+  KILL_SYSTEM_THREAD_HARD= 9,
+  KILL_SERVER= 10,
+  KILL_SERVER_HARD= 11
+};
+
+extern int killed_errno(killed_state killed);
+#define killed_mask_hard(killed) ((killed_state) ((killed) & ~KILL_HARD_BIT))
+
+enum killed_type
+{
+  KILL_TYPE_ID,
+  KILL_TYPE_USER
+};
+
+
 #include "sql_lex.h"				/* Must be here */
 
 class Delayed_insert;
@@ -313,11 +431,13 @@ struct system_variables
   ulonglong max_heap_table_size;
   ulonglong tmp_table_size;
   ulonglong long_query_time;
+  ulonglong join_buff_space_limit;
   ha_rows select_limit;
   ha_rows max_join_size;
   ulong auto_increment_increment, auto_increment_offset;
   ulong bulk_insert_buff_size;
   ulong join_buff_size;
+  ulong join_cache_level;
   ulong max_allowed_packet;
   ulong max_error_count;
   ulong max_length_for_sort_data;
@@ -325,7 +445,6 @@ struct system_variables
   ulong max_tmp_tables;
   ulong max_insert_delayed_threads;
   ulong min_examined_row_limit;
-  ulong multi_range_count;
   ulong myisam_repair_threads;
   ulong myisam_sort_buff_size;
   ulong myisam_stats_method;
@@ -344,8 +463,12 @@ struct system_variables
   ulong query_cache_type;
   ulong read_buff_size;
   ulong read_rnd_buff_size;
+  ulong mrr_buff_size;
   ulong div_precincrement;
   ulong sortbuff_size;
+  /* Total size of all buffers used by the subselect_rowid_merge_engine. */
+  ulong rowid_merge_buff_size;
+  ulong thread_handling;
   ulong tx_isolation;
   ulong completion_type;
   /* Determines which non-standard SQL behaviour should be enabled */
@@ -370,6 +493,8 @@ struct system_variables
   ulong ndb_index_stat_cache_entries;
   ulong ndb_index_stat_update_freq;
   ulong binlog_format; // binlog format for this thd (see enum_binlog_format)
+  ulong progress_report_time;
+  my_bool binlog_annotate_row_events;
   my_bool binlog_direct_non_trans_update;
   /*
     In slave thread we need to know in behalf of which
@@ -446,10 +571,25 @@ typedef struct system_status_var
   ulong ha_read_prev_count;
   ulong ha_read_rnd_count;
   ulong ha_read_rnd_next_count;
+  ulong ha_read_rnd_deleted_count;
+  /*
+    This number doesn't include calls to the default implementation and
+    calls made by range access. The intent is to count only calls made by
+    BatchedKeyAccess.
+  */
+  ulong ha_mrr_init_count;
+  ulong ha_mrr_key_refills_count;
+  ulong ha_mrr_rowid_refills_count;
+
   ulong ha_rollback_count;
   ulong ha_update_count;
   ulong ha_write_count;
+  /* The following are for internal temporary tables */
+  ulong ha_tmp_update_count;
+  ulong ha_tmp_write_count;
   ulong ha_prepare_count;
+  ulong ha_icp_attempts;
+  ulong ha_icp_match;
   ulong ha_discover_count;
   ulong ha_savepoint_count;
   ulong ha_savepoint_rollback_count;
@@ -501,6 +641,7 @@ typedef struct system_status_var
   ulonglong bytes_sent;
   ulonglong rows_read;
   ulonglong rows_sent;
+  ulonglong rows_tmp_read;
   ulonglong binlog_bytes_written;
   double last_query_cost;
   double cpu_time, busy_time;
@@ -678,8 +819,12 @@ public:
     ENGINE INNODB STATUS.
   */
   LEX_STRING query_string;
+  /*
+    If opt_query_cache_strip_comments is set, this contains query without
+    comments. If not set, it contains pointer to query_string.
+  */
+  String base_query;
   Server_side_cursor *cursor;
-
   inline char *query() { return query_string.str; }
   inline uint32 query_length() { return (uint32)query_string.length; }
   void set_query_inner(char *query_arg, uint32 query_length_arg);
@@ -700,7 +845,8 @@ public:
   char *db;
   size_t db_length;
 
-public:
+  /* This is set to 1 of last call to send_result_to_client() was ok */
+  my_bool query_cache_is_applicable;
 
   /* This constructor is called for backup statements */
   Statement() {}
@@ -1431,7 +1577,6 @@ public:
   */
   const char *where;
 
-  double tmp_double_value;                    /* Used in set_var.cc */
   ulong client_capabilities;		/* What the client supports */
   ulong max_client_packet_length;
 
@@ -1455,11 +1600,32 @@ public:
   uint32     file_id;			// for LOAD DATA INFILE
   /* remote (peer) port */
   uint16     peer_port;
-  time_t     start_time, user_time;
+  my_time_t  start_time;             // start_time and its sec_part 
+  ulong      start_time_sec_part;    // are almost always used separately
+  my_hrtime_t user_time;
   // track down slow pthread_create
   ulonglong  prior_thr_create_utime, thr_create_utime;
   ulonglong  start_utime, utime_after_lock;
 
+  // Process indicator
+  struct {
+    /*
+      true, if the currently running command can send progress report
+      packets to a client. Set by mysql_execute_command() for safe commands
+      See CF_REPORT_PROGRESS
+    */
+    bool       report_to_client;
+    /*
+      true, if we will send progress report packets to a client
+      (client has requested them, see CLIENT_PROGRESS; report_to_client
+      is true; not in sub-statement)
+    */
+    bool       report;
+    uint       stage, max_stage;
+    ulonglong  counter, max_counter;
+    ulonglong  next_report_time;
+  } progress;
+
   thr_lock_type update_lock_default;
   Delayed_insert *di;
 
@@ -1475,6 +1641,16 @@ public:
   /* container for handler's private per-connection data */
   Ha_data ha_data[MAX_HA];
 
+  bool prepare_derived_at_open;
+
+  /* 
+    To signal that the tmp table to be created is created for materialized
+    derived table or a view.
+  */ 
+  bool create_tmp_table_for_derived;
+
+  bool save_prep_leaf_list;
+
 #ifndef MYSQL_CLIENT
   int binlog_setup_trx_data();
 
@@ -1483,7 +1659,8 @@ public:
   */
   void binlog_start_trans_and_stmt();
   void binlog_set_stmt_begin();
-  int binlog_write_table_map(TABLE *table, bool is_transactional);
+  int binlog_write_table_map(TABLE *table, bool is_transactional,
+                             my_bool *with_annotate= 0);
   int binlog_write_row(TABLE* table, bool is_transactional,
                        MY_BITMAP const* cols, size_t colcnt,
                        const uchar *buf);
@@ -1817,7 +1994,7 @@ public:
   ulong      query_plan_fsort_passes; 
   pthread_t  real_id;                           /* For debugging */
   my_thread_id  thread_id;
-  uint	     tmp_table, global_read_lock;
+  uint	     tmp_table, global_read_lock, global_disable_checkpoint;
   uint	     server_status,open_options;
   enum enum_thread_type system_thread;
   uint       select_number;             //number of select (used for EXPLAIN)
@@ -1828,16 +2005,6 @@ public:
   DYNAMIC_ARRAY user_var_events;        /* For user variables replication */
   MEM_ROOT      *user_var_events_alloc; /* Allocate above array elements here */
 
-  enum killed_state
-  {
-    NOT_KILLED=0,
-    KILL_BAD_DATA=1,
-    KILL_CONNECTION=ER_SERVER_SHUTDOWN,
-    KILL_SYSTEM_THREAD=ER_NEW_ABORTING_CONNECTION, /* Kill connection nicely */
-    KILL_QUERY=ER_QUERY_INTERRUPTED,
-    KILL_SERVER,	 /* Placeholder for shortdown */
-    KILLED_NO_VALUE      /* means neither of the states */
-  };
   killed_state volatile killed;
 
   /* scramble - random string sent to client on handshake */
@@ -1846,7 +2013,7 @@ public:
   bool       slave_thread, one_shot_set;
   /* tells if current statement should binlog row-based(1) or stmt-based(0) */
   bool       current_stmt_binlog_row_based;
-  bool	     locked, some_tables_deleted;
+  bool	     some_tables_deleted;
   bool       last_cuted_field;
   bool	     no_errors, password;
   bool       extra_port;                        /* If extra connection */
@@ -1878,6 +2045,7 @@ public:
   */
   bool       is_fatal_sub_stmt_error;
   bool	     query_start_used, rand_used, time_zone_used;
+  bool       query_start_sec_part_used;
   /* for IS NULL => = last_insert_id() fix in remove_eq_conds() */
   bool       substitute_null_with_insert_id;
   bool	     in_lock_tables;
@@ -1938,6 +2106,7 @@ public:
     long      long_value;
     ulong     ulong_value;
     ulonglong ulonglong_value;
+    double    double_value;
   } sys_var_tmp;
 
   struct {
@@ -2019,7 +2188,7 @@ public:
   }
   void close_active_vio();
 #endif
-  void awake(THD::killed_state state_to_set);
+  void awake(killed_state state_to_set);
 
 #ifndef MYSQL_CLIENT
   enum enum_binlog_query_type {
@@ -2077,30 +2246,41 @@ public:
     proc_info = old_msg;
     pthread_mutex_unlock(&mysys_var->mutex);
   }
-  inline time_t query_start() { query_start_used=1; return start_time; }
+  inline my_time_t query_start() { query_start_used=1; return start_time; }
+  inline ulong query_start_sec_part()
+  { query_start_sec_part_used=1; return start_time_sec_part; }
+  inline void set_current_time()
+  {
+    my_hrtime_t hrtime= my_hrtime();
+    start_time= hrtime_to_my_time(hrtime);
+    start_time_sec_part= hrtime_sec_part(hrtime);
+  }
   inline void set_time()
   {
-    if (user_time)
+    if (user_time.val)
     {
-      start_time= user_time;
-      start_utime= utime_after_lock= my_micro_time();
+      start_time= hrtime_to_my_time(user_time);
+      start_time_sec_part= hrtime_sec_part(user_time);
     }
     else
-      start_utime= utime_after_lock= my_micro_time_and_time(&start_time);
+      set_current_time();
+    start_utime= utime_after_lock= microsecond_interval_timer();
   }
-  inline void	set_current_time()    { start_time= my_time(MY_WME); }
-  inline void	set_time(time_t t)
+  inline void	set_time(my_hrtime_t t)
   {
-    start_time= user_time= t;
-    start_utime= utime_after_lock= my_micro_time();
+    user_time= t;
+    start_time= hrtime_to_my_time(user_time);
+    start_time_sec_part= hrtime_sec_part(user_time);
+    start_utime= utime_after_lock= microsecond_interval_timer();
   }
-  /*TODO: this will be obsolete when we have support for 64 bit my_time_t */
-  inline bool	is_valid_time() 
-  { 
-    return (IS_TIME_T_VALID_FOR_TIMESTAMP(start_time));
+  inline void	set_time(my_time_t t, ulong sec_part)
+  {
+    my_hrtime_t hrtime= { hrtime_from_time(t) + sec_part };
+    set_time(hrtime);
   }
-  void set_time_after_lock()  { utime_after_lock= my_micro_time(); }
-  ulonglong current_utime()  { return my_micro_time(); }
+  void set_time_after_lock()  { utime_after_lock= microsecond_interval_timer(); }
+  ulonglong current_utime()  { return microsecond_interval_timer(); }
+
   inline ulonglong found_rows(void)
   {
     return limit_found_rows;
@@ -2223,7 +2403,11 @@ public:
   {
     if (!stmt_arena->is_conventional())
       check_and_register_item_tree_change(place, new_value, mem_root);
-    *place= *new_value;
+    /*
+      We have to use memcpy instead of  *place= *new_value merge to
+      avoid problems with strict aliasing.
+    */
+    memcpy((char*) place, new_value, sizeof(*new_value));
   }
   void nocheck_register_item_tree_change(Item **place, Item *old_value,
                                          MEM_ROOT *runtime_memroot);
@@ -2238,18 +2422,13 @@ public:
   void end_statement();
   inline int killed_errno() const
   {
-    killed_state killed_val; /* to cache the volatile 'killed' */
-    return (killed_val= killed) != KILL_BAD_DATA ? killed_val : 0;
+    return ::killed_errno(killed);
   }
   inline void send_kill_message() const
   {
     int err= killed_errno();
     if (err)
-    {
-      if ((err == KILL_CONNECTION) && !shutdown_in_progress)
-        err = KILL_QUERY;
       my_message(err, ER(err), MYF(0));
-    }
   }
   /* return TRUE if we will abort query if we make a warning now */
   inline bool really_abort_on_warning()
@@ -2453,6 +2632,14 @@ public:
     return backup;
   }
 
+  void clear_wakeup_ready() { wakeup_ready= false; }
+  /*
+    Sleep waiting for others to wake us up with signal_wakeup_ready().
+    Must call clear_wakeup_ready() before waiting.
+  */
+  void wait_for_wakeup_ready();
+  /* Wake this thread up from wait_for_wakeup_ready(). */
+  void signal_wakeup_ready();
 private:
   /** The current internal error handler for this thread, or NULL. */
   Internal_error_handler *m_internal_handler;
@@ -2491,6 +2678,16 @@ private:
    */
   LEX_STRING invoker_user;
   LEX_STRING invoker_host;
+  /*
+    Flag, mutex and condition for a thread to wait for a signal from another
+    thread.
+
+    Currently used to wait for group commit to complete, can also be used for
+    other purposes.
+  */
+  bool wakeup_ready;
+  pthread_mutex_t LOCK_wakeup_ready;
+  pthread_cond_t COND_wakeup_ready;
 };
 
 /** A short cut for thd->main_da.set_ok_status(). */
@@ -2519,6 +2716,27 @@ my_eof(THD *thd)
 
 
 /*
+  These functions are for making it later easy to add strict
+  checking for all date handling.
+*/
+
+extern my_bool strict_date_checking;
+
+inline ulong sql_mode_for_dates(THD *thd)
+{
+  if (unlikely(strict_date_checking))
+    return (thd->variables.sql_mode &
+            (MODE_NO_ZERO_DATE | MODE_NO_ZERO_IN_DATE |
+             MODE_INVALID_DATES));
+  return (thd->variables.sql_mode & MODE_INVALID_DATES);
+}
+
+inline ulong sql_mode_for_dates()
+{
+  return sql_mode_for_dates(current_thd);
+}
+
+/*
   Used to hold information about file and file structure in exchange
   via non-DB file (...INTO OUTFILE..., ...LOAD DATA...)
   XXX: We never call destructor for objects of this class.
@@ -2613,7 +2831,12 @@ public:
 class select_result_interceptor: public select_result
 {
 public:
-  select_result_interceptor() {}              /* Remove gcc warning */
+  select_result_interceptor()
+  {
+    DBUG_ENTER("select_result_interceptor::select_result_interceptor");
+    DBUG_PRINT("enter", ("this 0x%lx", (ulong) this));
+    DBUG_VOID_RETURN;
+  }              /* Remove gcc warning */
   uint field_count(List<Item> &fields) const { return 0; }
   bool send_fields(List<Item> &fields, uint flag) { return FALSE; }
 };
@@ -2856,6 +3079,10 @@ public:
   uint  convert_blob_length;
   CHARSET_INFO *table_charset;
   bool schema_table;
+  /* TRUE if the temp table is created for subquery materialization. */
+  bool materialized_subquery;
+  /* TRUE if all columns of the table are guaranteed to be non-nullable */
+  bool force_not_null_cols;
   /*
     True if GROUP BY and its aggregate functions are already computed
     by a table access method (e.g. by loose index scan). In this case
@@ -2864,11 +3091,24 @@ public:
   */
   bool precomputed_group_by;
   bool force_copy_fields;
+  /*
+    If TRUE, create_tmp_field called from create_tmp_table will convert
+    all BIT fields to 64-bit longs. This is a workaround the limitation
+    that MEMORY tables cannot index BIT columns.
+  */
+  bool bit_fields_as_long;
+  /*
+    Whether to create or postpone actual creation of this temporary table.
+    TRUE <=> create_tmp_table will create only the TABLE structure.
+  */
+  bool skip_create_table;
 
   TMP_TABLE_PARAM()
     :copy_field(0), group_parts(0),
      group_length(0), group_null_parts(0), convert_blob_length(0),
-     schema_table(0), precomputed_group_by(0), force_copy_fields(0)
+    schema_table(0), materialized_subquery(0), force_not_null_cols(0),
+    precomputed_group_by(0),
+    force_copy_fields(0), bit_fields_as_long(0), skip_create_table(0)
   {}
   ~TMP_TABLE_PARAM()
   {
@@ -2887,19 +3127,25 @@ public:
 
 class select_union :public select_result_interceptor
 {
+public:
   TMP_TABLE_PARAM tmp_table_param;
+  int write_err; /* Error code from the last send_data->ha_write_row call. */
 public:
   TABLE *table;
+  ha_rows records;
 
-  select_union() :table(0) {}
+  select_union() :write_err(0), table(0), records(0) { tmp_table_param.init(); }
   int prepare(List<Item> &list, SELECT_LEX_UNIT *u);
   int send_data(List<Item> &items);
   bool send_eof();
   bool flush();
-
-  bool create_result_table(THD *thd, List<Item> *column_types,
-                           bool is_distinct, ulonglong options,
-                           const char *alias);
+  void cleanup();
+  virtual bool create_result_table(THD *thd, List<Item> *column_types,
+                                   bool is_distinct, ulonglong options,
+                                   const char *alias, 
+                                   bool bit_fields_as_long,
+                                   bool create_table);
+  TMP_TABLE_PARAM *get_tmp_table_param() { return &tmp_table_param; }
 };
 
 /* Base subselect interface class */
@@ -2923,6 +3169,74 @@ public:
   int send_data(List<Item> &items);
 };
 
+
+/*
+  This class specializes select_union to collect statistics about the
+  data stored in the temp table. Currently the class collects statistcs
+  about NULLs.
+*/
+
+class select_materialize_with_stats : public select_union
+{
+protected:
+  class Column_statistics
+  {
+  public:
+    /* Count of NULLs per column. */
+    ha_rows null_count;
+    /* The row number that contains the first NULL in a column. */
+    ha_rows min_null_row;
+    /* The row number that contains the last NULL in a column. */
+    ha_rows max_null_row;
+  };
+
+  /* Array of statistics data per column. */
+  Column_statistics* col_stat;
+
+  /*
+    The number of columns in the biggest sub-row that consists of only
+    NULL values.
+  */
+  uint max_nulls_in_row;
+  /*
+    Count of rows writtent to the temp table. This is redundant as it is
+    already stored in handler::stats.records, however that one is relatively
+    expensive to compute (given we need that for evry row).
+  */
+  ha_rows count_rows;
+
+protected:
+  void reset();
+
+public:
+  select_materialize_with_stats() { tmp_table_param.init(); }
+  bool create_result_table(THD *thd, List<Item> *column_types,
+                           bool is_distinct, ulonglong options,
+                           const char *alias, 
+                           bool bit_fields_as_long,
+                           bool create_table);
+  bool init_result_table(ulonglong select_options);
+  int send_data(List<Item> &items);
+  void cleanup();
+  ha_rows get_null_count_of_col(uint idx)
+  {
+    DBUG_ASSERT(idx < table->s->fields);
+    return col_stat[idx].null_count;
+  }
+  ha_rows get_max_null_of_col(uint idx)
+  {
+    DBUG_ASSERT(idx < table->s->fields);
+    return col_stat[idx].max_null_row;
+  }
+  ha_rows get_min_null_of_col(uint idx)
+  {
+    DBUG_ASSERT(idx < table->s->fields);
+    return col_stat[idx].min_null_row;
+  }
+  uint get_max_nulls_in_row() { return max_nulls_in_row; }
+};
+
+
 /* used in independent ALL/ANY optimisation */
 class select_max_min_finder_subselect :public select_subselect
 {
@@ -2952,6 +3266,67 @@ public:
   int send_data(List<Item> &items);
 };
 
+
+
+
+/*
+  Optimizer and executor structure for the materialized semi-join info. This
+  structure contains
+   - The sj-materialization temporary table
+   - Members needed to make index lookup or a full scan of the temptable.
+*/
+class SJ_MATERIALIZATION_INFO : public Sql_alloc
+{
+public:
+  /* Optimal join sub-order */
+  struct st_position *positions;
+
+  uint tables; /* Number of tables in the sj-nest */
+
+  /* Expected #rows in the materialized table */
+  double rows;
+
+  /* 
+    Cost to materialize - execute the sub-join and write rows into temp.table
+  */
+  COST_VECT materialization_cost;
+
+  /* Cost to make one lookup in the temptable */
+  COST_VECT lookup_cost;
+  
+  /* Cost of scanning the materialized table */
+  COST_VECT scan_cost;
+
+  /* --- Execution structures ---------- */
+  
+  /*
+    TRUE <=> This structure is used for execution. We don't necessarily pick
+    sj-materialization, so some of SJ_MATERIALIZATION_INFO structures are not
+    used by materialization
+  */
+  bool is_used;
+  
+  bool materialized; /* TRUE <=> materialization already performed */
+  /*
+    TRUE  - the temptable is read with full scan
+    FALSE - we use the temptable for index lookups
+  */
+  bool is_sj_scan; 
+  
+  /* The temptable and its related info */
+  TMP_TABLE_PARAM sjm_table_param;
+  List<Item> sjm_table_cols;
+  TABLE *table;
+
+  /* Structure used to make index lookups */
+  struct st_table_ref *tab_ref;
+  Item *in_equality; /* See create_subq_in_equalities() */
+
+  Item *join_cond; /* See comments in make_join_select() */
+  Copy_field *copy_field; /* Needed for SJ_Materialization scan */
+};
+
+
 /* Structs used when sorting */
 
 typedef struct st_sort_field {
@@ -3035,6 +3410,7 @@ class user_var_entry
   DTCollation collation;
 };
 
+
 /*
    Unique -- class for unique (removing of duplicates).
    Puts all values to the TREE. If the tree becomes too big,
@@ -3051,28 +3427,44 @@ class Unique :public Sql_alloc
   IO_CACHE file;
   TREE tree;
   uchar *record_pointers;
+  ulong filtered_out_elems;
   bool flush();
   uint size;
+  uint full_size;
+  uint min_dupl_count;   /* always 0 for unions, > 0 for intersections */
 
 public:
   ulong elements;
   Unique(qsort_cmp2 comp_func, void *comp_func_fixed_arg,
-	 uint size_arg, ulonglong max_in_memory_size_arg);
+	 uint size_arg, ulonglong max_in_memory_size_arg,
+         uint min_dupl_count_arg= 0);
   ~Unique();
   ulong elements_in_tree() { return tree.elements_in_tree; }
   inline bool unique_add(void *ptr)
   {
     DBUG_ENTER("unique_add");
     DBUG_PRINT("info", ("tree %u - %lu", tree.elements_in_tree, max_elements));
-    if (tree.elements_in_tree > max_elements && flush())
+    if (!(tree.flag & TREE_ONLY_DUPS) && 
+        tree.elements_in_tree >= max_elements && flush())
       DBUG_RETURN(1);
     DBUG_RETURN(!tree_insert(&tree, ptr, 0, tree.custom_arg));
   }
 
+  bool is_in_memory() { return (my_b_tell(&file) == 0); }
+  void close_for_expansion() { tree.flag= TREE_ONLY_DUPS; }
+
   bool get(TABLE *table);
-  static double get_use_cost(uint *buffer, uint nkeys, uint key_size,
-                             ulonglong max_in_memory_size);
-  inline static int get_cost_calc_buff_size(ulong nkeys, uint key_size,
+  
+  /* Cost of searching for an element in the tree */
+  inline static double get_search_cost(ulonglong tree_elems, uint compare_factor)
+  {
+    return log((double) tree_elems) / (compare_factor * M_LN2);
+  }  
+
+  static double get_use_cost(uint *buffer, size_t nkeys, uint key_size,
+                             ulonglong max_in_memory_size, uint compare_factor,
+                             bool intersect_fl, bool *in_memory);
+  inline static int get_cost_calc_buff_size(size_t nkeys, uint key_size,
                                             ulonglong max_in_memory_size)
   {
     register ulonglong max_elems_in_tree=
@@ -3088,6 +3480,11 @@ public:
 
   friend int unique_write_to_file(uchar* key, element_count count, Unique *unique);
   friend int unique_write_to_ptrs(uchar* key, element_count count, Unique *unique);
+
+  friend int unique_write_to_file_with_count(uchar* key, element_count count,
+                                             Unique *unique);
+  friend int unique_intersect_write_to_ptrs(uchar* key, element_count count, 
+				            Unique *unique);
 };
 
 
@@ -3127,7 +3524,7 @@ public:
 class multi_update :public select_result_interceptor
 {
   TABLE_LIST *all_tables; /* query/update command tables */
-  TABLE_LIST *leaves;     /* list of leves of join table tree */
+  List<TABLE_LIST> *leaves;     /* list of leves of join table tree */
   TABLE_LIST *update_tables, *table_being_updated;
   TABLE **tmp_tables, *main_table, *table_to_update;
   TMP_TABLE_PARAM *tmp_table_param;
@@ -3153,7 +3550,7 @@ class multi_update :public select_result_interceptor
   bool error_handled;
 
 public:
-  multi_update(TABLE_LIST *ut, TABLE_LIST *leaves_list,
+  multi_update(TABLE_LIST *ut, List<TABLE_LIST> *leaves_list,
 	       List<Item> *fields, List<Item> *values,
 	       enum_duplicates handle_duplicates, bool ignore);
   ~multi_update();
@@ -3215,6 +3612,7 @@ public:
 #define CF_STATUS_COMMAND	4
 #define CF_SHOW_TABLE_COMMAND	8
 #define CF_WRITE_LOGS_COMMAND  16
+
 /**
   Must be set for SQL statements that may contain
   Item expressions and/or use joins and tables.
@@ -3229,6 +3627,7 @@ public:
   joins are currently prohibited in these statements.
 */
 #define CF_REEXECUTION_FRAGILE 32
+#define CF_REPORT_PROGRESS     64
 
 /* Functions in sql_class.cc */
 
@@ -3264,16 +3663,24 @@ inline int handler::ha_index_read_map(uchar * buf, const uchar * key,
   return error;
 }
 
+
+/*
+  @note: Other index lookup/navigation functions require prior
+  handler->index_init() call. This function is different, it requires
+  that the scan is not initialized, and accepts "uint index" as an argument.
+*/
+
 inline int handler::ha_index_read_idx_map(uchar * buf, uint index,
                                           const uchar * key,
                                           key_part_map keypart_map,
                                           enum ha_rkey_function find_flag)
 {
+  DBUG_ASSERT(inited==NONE);
   increment_statistics(&SSV::ha_read_key_count);
   int error= index_read_idx_map(buf, index, key, keypart_map, find_flag);
   if (!error)
   {
-    rows_read++;
+    update_rows_read();
     index_rows_read[index]++;
   }
   table->status=error ? STATUS_NOT_FOUND: 0;
@@ -3340,17 +3747,25 @@ inline int handler::ha_ft_read(uchar *buf)
 {
   int error= ft_read(buf);
   if (!error)
-    rows_read++;
+    update_rows_read();
+
   table->status=error ? STATUS_NOT_FOUND: 0;
   return error;
 }
 
 inline int handler::ha_rnd_next(uchar *buf)
 {
-  increment_statistics(&SSV::ha_read_rnd_next_count);
   int error= rnd_next(buf);
   if (!error)
-    rows_read++;
+  {
+    update_rows_read();
+    increment_statistics(&SSV::ha_read_rnd_next_count);
+  }
+  else if (error == HA_ERR_RECORD_DELETED)
+    increment_statistics(&SSV::ha_read_rnd_deleted_count);
+  else
+    increment_statistics(&SSV::ha_read_rnd_next_count);
+
   table->status=error ? STATUS_NOT_FOUND: 0;
   return error;
 }
@@ -3360,7 +3775,7 @@ inline int handler::ha_rnd_pos(uchar *buf, uchar *pos)
   increment_statistics(&SSV::ha_read_rnd_count);
   int error= rnd_pos(buf, pos);
   if (!error)
-    rows_read++;
+    update_rows_read();
   table->status=error ? STATUS_NOT_FOUND: 0;
   return error;
 }
@@ -3369,7 +3784,7 @@ inline int handler::ha_rnd_pos_by_record(uchar *buf)
 {
   int error= rnd_pos_by_record(buf);
   if (!error)
-    rows_read++;
+    update_rows_read();
   table->status=error ? STATUS_NOT_FOUND: 0;
   return error;
 }
@@ -3378,10 +3793,21 @@ inline int handler::ha_read_first_row(uchar *buf, uint primary_key)
 {
   int error= read_first_row(buf, primary_key);
   if (!error)
-    rows_read++;
+    update_rows_read();
   table->status=error ? STATUS_NOT_FOUND: 0;
   return error;
 }
 
+inline int handler::ha_write_tmp_row(uchar *buf)
+{
+  increment_statistics(&SSV::ha_tmp_write_count);
+  return write_row(buf);
+}
+
+inline int handler::ha_update_tmp_row(const uchar *old_data, uchar *new_data)
+{
+  increment_statistics(&SSV::ha_tmp_update_count);
+  return update_row(old_data, new_data);
+}
 
 #endif /* MYSQL_SERVER */
diff --git a/sql/sql_connect.cc b/sql/sql_connect.cc
index e32dde04de0..2734b942947 100644
--- a/sql/sql_connect.cc
+++ b/sql/sql_connect.cc
@@ -115,8 +115,11 @@ int check_for_max_user_connections(THD *thd, USER_CONN *uc)
   DBUG_ENTER("check_for_max_user_connections");
 
   (void) pthread_mutex_lock(&LOCK_user_conn);
+
+  /* Root is not affected by the value of max_user_connections */
   if (max_user_connections && !uc->user_resources.user_conn &&
-      max_user_connections < (uint) uc->connections)
+      max_user_connections < uc->connections &&
+      !(thd->security_ctx->master_access & SUPER_ACL))
   {
     my_error(ER_TOO_MANY_USER_CONNECTIONS, MYF(0), uc->user);
     goto end;
@@ -204,7 +207,7 @@ void time_out_user_resource_limits(THD *thd, USER_CONN *uc)
   /* If more than a hour since last check, reset resource checking */
   if (check_time  - uc->reset_utime >= LL(3600000000))
   {
-    uc->questions=1;
+    uc->questions=0;
     uc->updates=0;
     uc->conn_per_hour=0;
     uc->reset_utime= check_time;
@@ -233,7 +236,7 @@ bool check_mqh(THD *thd, uint check_command)
   if (uc->user_resources.questions &&
       uc->questions++ >= uc->user_resources.questions)
   {
-    my_error(ER_USER_LIMIT_REACHED, MYF(0), uc->user, "max_questions",
+    my_error(ER_USER_LIMIT_REACHED, MYF(0), uc->user, "max_queries_per_hour",
              (long) uc->user_resources.questions);
     error=1;
     goto end;
@@ -245,7 +248,7 @@ bool check_mqh(THD *thd, uint check_command)
         (sql_command_flags[check_command] & CF_CHANGES_DATA) &&
 	uc->updates++ >= uc->user_resources.updates)
     {
-      my_error(ER_USER_LIMIT_REACHED, MYF(0), uc->user, "max_updates",
+      my_error(ER_USER_LIMIT_REACHED, MYF(0), uc->user, "max_updates_per_hour",
                (long) uc->user_resources.updates);
       error=1;
       goto end;
@@ -691,6 +694,7 @@ static void update_global_user_stats_with_user(THD *thd,
   user_stats->binlog_bytes_written+=
     (thd->status_var.binlog_bytes_written -
      thd->org_status_var.binlog_bytes_written);
+  /* We are not counting rows in internal temporary tables here ! */
   user_stats->rows_read+=      (thd->status_var.rows_read -
                                 thd->org_status_var.rows_read);
   user_stats->rows_sent+=      (thd->status_var.rows_sent -
@@ -1096,7 +1100,7 @@ void prepare_new_connection_state(THD* thd)
     execute_init_command(thd, &sys_init_connect, &LOCK_sys_init_connect);
     if (thd->is_error())
     {
-      thd->killed= THD::KILL_CONNECTION;
+      thd->killed= KILL_CONNECTION;
       sql_print_warning(ER(ER_NEW_ABORTING_CONNECTION),
                         thd->thread_id,(thd->db ? thd->db : "unconnected"),
                         sctx->user ? sctx->user : "unauthenticated",
@@ -1131,7 +1135,9 @@ pthread_handler_t handle_one_connection(void *arg)
 {
   THD *thd= (THD*) arg;
 
-  thd->thr_create_utime= my_micro_time();
+  thd->thr_create_utime= microsecond_interval_timer();
+  /* We need to set this because of time_out_user_resource_limits */
+  thd->start_utime= thd->thr_create_utime;
 
   if (thread_scheduler.init_new_connection_thread())
   {
@@ -1182,8 +1188,7 @@ pthread_handler_t handle_one_connection(void *arg)
     prepare_new_connection_state(thd);
 
     while (!net->error && net->vio != 0 &&
-           thd->killed != THD::KILL_CONNECTION &&
-           thd->killed != THD::KILL_SERVER)
+           thd->killed < KILL_CONNECTION)
     {
       if (do_command(thd))
 	break;
diff --git a/sql/sql_cursor.cc b/sql/sql_cursor.cc
index 630ba787c72..c171017a993 100644
--- a/sql/sql_cursor.cc
+++ b/sql/sql_cursor.cc
@@ -365,7 +365,6 @@ Sensitive_cursor::open(JOIN *join_arg)
   join= join_arg;
   THD *thd= join->thd;
   /* First non-constant table */
-  JOIN_TAB *join_tab= join->join_tab + join->const_tables;
   DBUG_ENTER("Sensitive_cursor::open");
 
   join->change_result(result);
@@ -383,26 +382,29 @@ Sensitive_cursor::open(JOIN *join_arg)
 
   /* Prepare JOIN for reading rows. */
   join->tmp_table= 0;
-  join->join_tab[join->tables-1].next_select= setup_end_select_func(join);
+  join->join_tab[join->top_join_tab_count - 1].next_select= setup_end_select_func(join);
   join->send_records= 0;
   join->fetch_limit= join->unit->offset_limit_cnt;
 
   /* Disable JOIN CACHE as it is not working with cursors yet */
-  for (JOIN_TAB *tab= join_tab;
-       tab != join->join_tab + join->tables - 1;
-       tab++)
+  for (JOIN_TAB *tab= first_linear_tab(join, WITHOUT_CONST_TABLES); 
+       tab != join->join_tab + join->top_join_tab_count - 1;
+       tab= next_linear_tab(join, tab, WITH_BUSH_ROOTS))
   {
     if (tab->next_select == sub_select_cache)
       tab->next_select= sub_select;
   }
 
-  DBUG_ASSERT(join_tab->table->reginfo.not_exists_optimize == 0);
-  DBUG_ASSERT(join_tab->not_used_in_distinct == 0);
+#ifndef DBUG_OFF
+  JOIN_TAB *first_tab= first_linear_tab(join, WITHOUT_CONST_TABLES);
+  DBUG_ASSERT(first_tab->table->reginfo.not_exists_optimize == 0);
+  DBUG_ASSERT(first_tab->not_used_in_distinct == 0);
   /*
     null_row is set only if row not found and it's outer join: should never
     happen for the first table in join_tab list
   */
-  DBUG_ASSERT(join_tab->table->null_row == 0);
+  DBUG_ASSERT(first_tab->table->null_row == 0);
+#endif
   DBUG_RETURN(0);
 }
 
@@ -721,8 +723,9 @@ bool Select_materialize::send_fields(List<Item> &list, uint flags)
 {
   DBUG_ASSERT(table == 0);
   if (create_result_table(unit->thd, unit->get_unit_column_types(),
-                          FALSE, thd->options | TMP_TABLE_ALL_COLUMNS, ""))
-    return TRUE;
+                          FALSE, thd->options | TMP_TABLE_ALL_COLUMNS, "",
+                          FALSE, TRUE))
+   return TRUE;
 
   materialized_cursor= new (&table->mem_root)
                        Materialized_cursor(result, table);
diff --git a/sql/sql_db.cc b/sql/sql_db.cc
index 2a2efe27952..07dd1984d80 100644
--- a/sql/sql_db.cc
+++ b/sql/sql_db.cc
@@ -23,6 +23,7 @@
 #include "sp_head.h"
 #include "sp.h"
 #include "events.h"
+#include "sql_handler.h"
 #include <my_dir.h>
 #include <m_ctype.h>
 #include "log.h"
@@ -976,7 +977,7 @@ bool mysql_rm_db(THD *thd,char *db,bool if_exists, bool silent)
     {
       ha_drop_database(path);
       tmp_disable_binlog(thd);
-      query_cache_invalidate1(db);
+      query_cache_invalidate1(thd, db);
       (void) sp_drop_db_routines(thd, db); /* @todo Do not ignore errors */
 #ifdef HAVE_EVENT_SCHEDULER
       Events::drop_schema_events(thd, db);
diff --git a/sql/sql_delete.cc b/sql/sql_delete.cc
index 72a5b9703b3..84e88196f20 100644
--- a/sql/sql_delete.cc
+++ b/sql/sql_delete.cc
@@ -25,6 +25,7 @@
 #include "sql_select.h"
 #include "sp_head.h"
 #include "sql_trigger.h"
+#include "sql_handler.h"
 
 /**
   Implement DELETE SQL word.
@@ -50,7 +51,7 @@ bool mysql_delete(THD *thd, TABLE_LIST *table_list, COND *conds,
   bool          triggers_applicable;
   uint usable_index= MAX_KEY;
   SELECT_LEX   *select_lex= &thd->lex->select_lex;
-  THD::killed_state killed_status= THD::NOT_KILLED;
+  killed_state killed_status= NOT_KILLED;
   DBUG_ENTER("mysql_delete");
   bool save_binlog_row_based;
 
@@ -61,10 +62,21 @@ bool mysql_delete(THD *thd, TABLE_LIST *table_list, COND *conds,
 
   if (open_and_lock_tables(thd, table_list))
     DBUG_RETURN(TRUE);
-  if (!(table= table_list->table))
+
+  if (mysql_handle_list_of_derived(thd->lex, table_list, DT_MERGE_FOR_INSERT))
+    DBUG_RETURN(TRUE);
+  if (mysql_handle_list_of_derived(thd->lex, table_list, DT_PREPARE))
+    DBUG_RETURN(TRUE);
+
+  if (!table_list->single_table_updatable())
   {
-    my_error(ER_VIEW_DELETE_MERGE_VIEW, MYF(0),
-	     table_list->view_db.str, table_list->view_name.str);
+     my_error(ER_NON_UPDATABLE_TABLE, MYF(0), table_list->alias, "DELETE");
+     DBUG_RETURN(TRUE);
+  }
+  if (!(table= table_list->table) || !table->created)
+  {
+      my_error(ER_VIEW_DELETE_MERGE_VIEW, MYF(0),
+	       table_list->view_db.str, table_list->view_name.str);
     DBUG_RETURN(TRUE);
   }
   thd_proc_info(thd, "init");
@@ -73,6 +85,11 @@ bool mysql_delete(THD *thd, TABLE_LIST *table_list, COND *conds,
   if (mysql_prepare_delete(thd, table_list, &conds))
     DBUG_RETURN(TRUE);
 
+  if (thd->lex->current_select->first_cond_optimization)
+  {
+    thd->lex->current_select->save_leaf_tables(thd);
+    thd->lex->current_select->first_cond_optimization= 0;
+  }
   /* check ORDER BY even if it can be ignored */
   if (order && order->elements)
   {
@@ -94,6 +111,10 @@ bool mysql_delete(THD *thd, TABLE_LIST *table_list, COND *conds,
     }
   }
 
+  /* Apply the IN=>EXISTS transformation to all subqueries and optimize them. */
+  if (select_lex->optimize_unflattened_subqueries())
+    DBUG_RETURN(TRUE);
+
   const_cond= (!conds || conds->const_item());
   safe_update=test(thd->options & OPTION_SAFE_UPDATES);
   if (safe_update && const_cond)
@@ -103,8 +124,6 @@ bool mysql_delete(THD *thd, TABLE_LIST *table_list, COND *conds,
     DBUG_RETURN(TRUE);
   }
 
-  select_lex->no_error= thd->lex->ignore;
-
   const_cond_result= const_cond && (!conds || conds->val_int());
   if (thd->is_error())
   {
@@ -344,16 +363,9 @@ bool mysql_delete(THD *thd, TABLE_LIST *table_list, COND *conds,
       }
       else
       {
-	table->file->print_error(error,MYF(0));
-	/*
-	  In < 4.0.14 we set the error number to 0 here, but that
-	  was not sensible, because then MySQL would not roll back the
-	  failed DELETE, and also wrote it to the binlog. For MyISAM
-	  tables a DELETE probably never should fail (?), but for
-	  InnoDB it can fail in a FOREIGN KEY error or an
-	  out-of-tablespace error.
-	*/
-        if (!select_lex->no_error)
+	table->file->print_error(error,
+                                 MYF(thd->lex->ignore ? ME_JUST_WARNING : 0));
+        if (thd->is_error())
         {
           error= 1;
           break;
@@ -364,7 +376,7 @@ bool mysql_delete(THD *thd, TABLE_LIST *table_list, COND *conds,
       table->file->unlock_row();  // Row failed selection, release lock on it
   }
   killed_status= thd->killed;
-  if (killed_status != THD::NOT_KILLED || thd->is_error())
+  if (killed_status != NOT_KILLED || thd->is_error())
     error= 1;					// Aborted
   if (will_batch && (loc_error= table->file->end_bulk_delete()))
   {
@@ -402,6 +414,12 @@ cleanup:
     query_cache_invalidate3(thd, table_list, 1);
   }
 
+  if (thd->lex->current_select->first_cond_optimization)
+  {
+    thd->lex->current_select->save_leaf_tables(thd);
+    thd->lex->current_select->first_cond_optimization= 0;
+  }
+
   delete select;
   transactional_table= table->file->has_transactions();
 
@@ -425,7 +443,7 @@ cleanup:
       if (error < 0)
         thd->clear_error();
       else
-        errcode= query_error_code(thd, killed_status == THD::NOT_KILLED);
+        errcode= query_error_code(thd, killed_status == NOT_KILLED);
       
       /*
         [binlog]: If 'handler::delete_all_rows()' was called and the
@@ -503,8 +521,8 @@ int mysql_prepare_delete(THD *thd, TABLE_LIST *table_list, Item **conds)
   if (setup_tables_and_check_access(thd, &thd->lex->select_lex.context,
                                     &thd->lex->select_lex.top_join_list,
                                     table_list, 
-                                    &select_lex->leaf_tables, FALSE, 
-                                    DELETE_ACL, SELECT_ACL) ||
+                                    select_lex->leaf_tables, FALSE, 
+                                    DELETE_ACL, SELECT_ACL, TRUE) ||
       setup_conds(thd, table_list, select_lex->leaf_tables, conds) ||
       setup_ftfuncs(select_lex))
     DBUG_RETURN(TRUE);
@@ -527,7 +545,7 @@ int mysql_prepare_delete(THD *thd, TABLE_LIST *table_list, Item **conds)
     fix_inner_refs(thd, all_fields, select_lex, select_lex->ref_pointer_array))
     DBUG_RETURN(TRUE);
 
-  select_lex->fix_prepare_information(thd, conds, &fake_conds);
+  select_lex->fix_prepare_information(thd, conds, &fake_conds); 
   DBUG_RETURN(FALSE);
 }
 
@@ -563,6 +581,12 @@ int mysql_multi_delete_prepare(THD *thd)
   TABLE_LIST *target_tbl;
   DBUG_ENTER("mysql_multi_delete_prepare");
 
+  if (mysql_handle_derived(lex, DT_INIT))
+    DBUG_RETURN(TRUE);
+  if (mysql_handle_derived(lex, DT_MERGE_FOR_INSERT))
+    DBUG_RETURN(TRUE);
+  if (mysql_handle_derived(lex, DT_PREPARE))
+    DBUG_RETURN(TRUE);
   /*
     setup_tables() need for VIEWs. JOIN::prepare() will not do it second
     time.
@@ -572,10 +596,12 @@ int mysql_multi_delete_prepare(THD *thd)
   if (setup_tables_and_check_access(thd, &thd->lex->select_lex.context,
                                     &thd->lex->select_lex.top_join_list,
                                     lex->query_tables,
-                                    &lex->select_lex.leaf_tables, FALSE, 
-                                    DELETE_ACL, SELECT_ACL))
+                                    lex->select_lex.leaf_tables, FALSE, 
+                                    DELETE_ACL, SELECT_ACL, FALSE))
     DBUG_RETURN(TRUE);
 
+  if (lex->select_lex.handle_derived(thd->lex, DT_MERGE))  
+    DBUG_RETURN(TRUE);
 
   /*
     Multi-delete can't be constructed over-union => we always have
@@ -587,16 +613,14 @@ int mysql_multi_delete_prepare(THD *thd)
        target_tbl;
        target_tbl= target_tbl->next_local)
   {
-    if (!(target_tbl->table= target_tbl->correspondent_table->table))
+
+    target_tbl->table= target_tbl->correspondent_table->table;
+    if (target_tbl->correspondent_table->is_multitable())
     {
-      DBUG_ASSERT(target_tbl->correspondent_table->view &&
-                  target_tbl->correspondent_table->merge_underlying_list &&
-                  target_tbl->correspondent_table->merge_underlying_list->
-                  next_local);
-      my_error(ER_VIEW_DELETE_MERGE_VIEW, MYF(0),
-               target_tbl->correspondent_table->view_db.str,
-               target_tbl->correspondent_table->view_name.str);
-      DBUG_RETURN(TRUE);
+       my_error(ER_VIEW_DELETE_MERGE_VIEW, MYF(0),
+                target_tbl->correspondent_table->view_db.str,
+                target_tbl->correspondent_table->view_name.str);
+       DBUG_RETURN(TRUE);
     }
 
     if (!target_tbl->correspondent_table->single_table_updatable() ||
@@ -626,6 +650,10 @@ int mysql_multi_delete_prepare(THD *thd)
     with further calls to unique_table
   */
   lex->select_lex.exclude_from_table_unique_test= FALSE;
+  
+  if (lex->select_lex.save_prep_leaf_tables(thd))
+    DBUG_RETURN(TRUE);
+  
   DBUG_RETURN(FALSE);
 }
 
@@ -646,6 +674,12 @@ multi_delete::prepare(List<Item> &values, SELECT_LEX_UNIT *u)
   unit= u;
   do_delete= 1;
   thd_proc_info(thd, "deleting from main table");
+  SELECT_LEX *select_lex= u->first_select();
+  if (select_lex->first_cond_optimization)
+  {
+    if (select_lex->handle_derived(thd->lex, DT_MERGE))
+      DBUG_RETURN(TRUE);
+  }
   DBUG_RETURN(0);
 }
 
@@ -679,9 +713,10 @@ multi_delete::initialize_tables(JOIN *join)
 
 
   walk= delete_tables;
-  for (JOIN_TAB *tab=join->join_tab, *end=join->join_tab+join->tables;
-       tab < end;
-       tab++)
+
+  for (JOIN_TAB *tab= first_linear_tab(join, WITH_CONST_TABLES); 
+       tab; 
+       tab= next_linear_tab(join, tab, WITHOUT_BUSH_ROOTS))
   {
     if (tab->table->map & tables_to_delete_from)
     {
@@ -766,7 +801,7 @@ int multi_delete::send_data(List<Item> &values)
   TABLE_LIST *del_table;
   DBUG_ENTER("multi_delete::send_data");
 
-  bool ignore= thd->lex->current_select->no_error;
+  bool ignore= thd->lex->ignore;
 
   for (del_table= delete_tables;
        del_table;
@@ -875,7 +910,7 @@ void multi_delete::abort()
     */
     if (mysql_bin_log.is_open())
     {
-      int errcode= query_error_code(thd, thd->killed == THD::NOT_KILLED);
+      int errcode= query_error_code(thd, thd->killed == NOT_KILLED);
       /* possible error of writing binary log is ignored deliberately */
       (void) thd->binlog_query(THD::ROW_QUERY_TYPE,
                               thd->query(), thd->query_length(),
@@ -915,11 +950,11 @@ int multi_delete::do_deletes()
        table_being_deleted= table_being_deleted->next_local, counter++)
   { 
     TABLE *table = table_being_deleted->table;
+    int local_error; 
     if (tempfiles[counter]->get(table))
       DBUG_RETURN(1);
 
-    int local_error= 
-      do_table_deletes(table, thd->lex->current_select->no_error);
+    local_error= do_table_deletes(table, thd->lex->ignore);
 
     if (thd->killed && !local_error)
       DBUG_RETURN(1);
@@ -1025,7 +1060,7 @@ int multi_delete::do_table_deletes(TABLE *table, bool ignore)
 
 bool multi_delete::send_eof()
 {
-  THD::killed_state killed_status= THD::NOT_KILLED;
+  killed_state killed_status= NOT_KILLED;
   thd_proc_info(thd, "deleting from reference tables");
 
   /* Does deletes for the last n - 1 tables, returns 0 if ok */
@@ -1033,7 +1068,7 @@ bool multi_delete::send_eof()
 
   /* compute a total error to know if something failed */
   local_error= local_error || error;
-  killed_status= (local_error == 0)? THD::NOT_KILLED : thd->killed;
+  killed_status= (local_error == 0)? NOT_KILLED : thd->killed;
   /* reset used flags */
   thd_proc_info(thd, "end");
 
@@ -1053,7 +1088,7 @@ bool multi_delete::send_eof()
       if (local_error == 0)
         thd->clear_error();
       else
-        errcode= query_error_code(thd, killed_status == THD::NOT_KILLED);
+        errcode= query_error_code(thd, killed_status == NOT_KILLED);
       if (thd->binlog_query(THD::ROW_QUERY_TYPE,
                             thd->query(), thd->query_length(),
                             transactional_tables, FALSE, errcode) &&
diff --git a/sql/sql_derived.cc b/sql/sql_derived.cc
index 713a9d86863..99d20090623 100644
--- a/sql/sql_derived.cc
+++ b/sql/sql_derived.cc
@@ -25,38 +25,82 @@
 #include "mysql_priv.h"
 #include "sql_select.h"
 
+typedef bool (*dt_processor)(THD *thd, LEX *lex, TABLE_LIST *derived);
 
+bool mysql_derived_init(THD *thd, LEX *lex, TABLE_LIST *derived);
+bool mysql_derived_prepare(THD *thd, LEX *lex, TABLE_LIST *derived);
+bool mysql_derived_optimize(THD *thd, LEX *lex, TABLE_LIST *derived);
+bool mysql_derived_merge(THD *thd, LEX *lex, TABLE_LIST *derived);
+bool mysql_derived_create(THD *thd, LEX *lex, TABLE_LIST *derived);
+bool mysql_derived_fill(THD *thd, LEX *lex, TABLE_LIST *derived);
+bool mysql_derived_reinit(THD *thd, LEX *lex, TABLE_LIST *derived);
+bool mysql_derived_merge_for_insert(THD *thd, LEX *lex, TABLE_LIST *derived);
+
+
+dt_processor processors[]=
+{
+  &mysql_derived_init,
+  &mysql_derived_prepare,
+  &mysql_derived_optimize,
+  &mysql_derived_merge,
+  &mysql_derived_merge_for_insert,
+  &mysql_derived_create,
+  &mysql_derived_fill,
+  &mysql_derived_reinit,
+};
 
 /*
-  Call given derived table processor (preparing or filling tables)
+  @brief
+  Run specified phases on all derived tables/views in given LEX.
 
-  SYNOPSIS
-    mysql_handle_derived()
-    lex                 LEX for this thread
-    processor           procedure of derived table processing
+  @param lex              LEX for this thread
+  @param phases           phases to run derived tables/views through
 
-  RETURN
-    FALSE  OK
-    TRUE   Error
+  @return FALSE  OK
+  @return TRUE   Error
 */
-
 bool
-mysql_handle_derived(LEX *lex, bool (*processor)(THD*, LEX*, TABLE_LIST*))
+mysql_handle_derived(LEX *lex, uint phases)
 {
   bool res= FALSE;
-  if (lex->derived_tables)
+  THD *thd= lex->thd;
+  if (!lex->derived_tables)
+    return FALSE;
+
+  lex->thd->derived_tables_processing= TRUE;
+
+  for (uint phase= 0; phase < DT_PHASES && !res; phase++)
   {
-    lex->thd->derived_tables_processing= TRUE;
+    uint phase_flag= DT_INIT << phase;
+    if (phase_flag > phases)
+      break;
+    if (!(phases & phase_flag))
+      continue;
+    if (phase_flag >= DT_CREATE && !thd->fill_derived_tables())
+      break;
+
     for (SELECT_LEX *sl= lex->all_selects_list;
-	 sl;
+	 sl && !res;
 	 sl= sl->next_select_in_list())
     {
       for (TABLE_LIST *cursor= sl->get_table_list();
-	   cursor;
+	   cursor && !res;
 	   cursor= cursor->next_local)
       {
-	if ((res= (*processor)(lex->thd, lex, cursor)))
-	  goto out;
+        if (!cursor->is_view_or_derived() && phases == DT_MERGE_FOR_INSERT)
+          continue;
+        uint8 allowed_phases= (cursor->is_merged_derived() ? DT_PHASES_MERGE :
+                               DT_PHASES_MATERIALIZE | DT_MERGE_FOR_INSERT);
+        /*
+          Skip derived tables to which the phase isn't applicable.
+          TODO: mark derived at the parse time, later set it's type
+          (merged or materialized)
+        */
+        if ((phase_flag != DT_PREPARE && !(allowed_phases & phase_flag)) ||
+            (cursor->merged_for_insert && phase_flag != DT_REINIT &&
+             phase_flag != DT_PREPARE))
+          continue;
+	res= (*processors[phase])(lex->thd, lex, cursor);
       }
       if (lex->describe)
       {
@@ -69,30 +113,441 @@ mysql_handle_derived(LEX *lex, bool (*processor)(THD*, LEX*, TABLE_LIST*))
       }
     }
   }
-out:
+  lex->thd->derived_tables_processing= FALSE;
+  return res;
+}
+
+/*
+  @brief
+  Run through phases for the given derived table/view.
+
+  @param lex             LEX for this thread
+  @param derived         the derived table to handle
+  @param phase_map       phases to process tables/views through
+
+  @details
+
+  This function process the derived table (view) 'derived' to performs all
+  actions that are to be done on the table at the phases specified by
+  phase_map. The processing is carried out starting from the actions
+  performed at the earlier phases (those having smaller ordinal numbers).
+
+  @note
+  This function runs specified phases of the derived tables handling on the
+  given derived table/view. This function is used in the chain of calls:
+    SELECT_LEX::handle_derived ->
+      TABLE_LIST::handle_derived ->
+        mysql_handle_single_derived
+  This chain of calls implements the bottom-up handling of the derived tables:
+  i.e. most inner derived tables/views are handled first. This order is
+  required for the all phases except the merge and the create steps.
+  For the sake of code simplicity this order is kept for all phases.
+
+  @return FALSE ok
+  @return TRUE  error
+*/
+
+bool
+mysql_handle_single_derived(LEX *lex, TABLE_LIST *derived, uint phases)
+{
+  bool res= FALSE;
+  THD *thd= lex->thd;
+  uint8 allowed_phases= (derived->is_merged_derived() ? DT_PHASES_MERGE :
+                         DT_PHASES_MATERIALIZE);
+  if (!lex->derived_tables)
+    return FALSE;
+
+  lex->thd->derived_tables_processing= TRUE;
+
+  for (uint phase= 0; phase < DT_PHASES; phase++)
+  {
+    uint phase_flag= DT_INIT << phase;
+    if (phase_flag > phases)
+      break;
+    if (!(phases & phase_flag))
+      continue;
+    /* Skip derived tables to which the phase isn't applicable.  */
+    if (phase_flag != DT_PREPARE &&
+        !(allowed_phases & phase_flag))
+      continue;
+    if (phase_flag >= DT_CREATE && !thd->fill_derived_tables())
+      break;
+
+    if ((res= (*processors[phase])(lex->thd, lex, derived)))
+      break;
+  }
   lex->thd->derived_tables_processing= FALSE;
   return res;
 }
 
 
 /**
-  @brief Create temporary table structure (but do not fill it).
+  @brief
+  Run specified phases for derived tables/views in the given list
 
-  @param thd Thread handle
-  @param lex LEX for this thread
-  @param orig_table_list TABLE_LIST for the upper SELECT
+  @param lex        LEX for this thread
+  @param table_list list of derived tables/view to handle
+  @param phase_map  phases to process tables/views through
 
-  @details 
+  @details
+  This function runs phases specified by the 'phases_map' on derived
+  tables/views found in the 'dt_list' with help of the
+  TABLE_LIST::handle_derived function.
+  'lex' is passed as an argument to the TABLE_LIST::handle_derived.
 
-  This function is called before any command containing derived tables is
-  executed. Currently the function is used for derived tables, i.e.
+  @return FALSE ok
+  @return TRUE  error
+*/
 
-  - Anonymous derived tables, or 
-  - Named derived tables (aka views) with the @c TEMPTABLE algorithm.
-   
-  The table reference, contained in @c orig_table_list, is updated with the
-  fields of a new temporary table.
+bool
+mysql_handle_list_of_derived(LEX *lex, TABLE_LIST *table_list, uint phases)
+{
+  for (TABLE_LIST *tl= table_list; tl; tl= tl->next_local)
+  {
+    if (tl->is_view_or_derived() &&
+        tl->handle_derived(lex, phases))
+      return TRUE;
+  }
+  return FALSE;
+}
+
+
+/**
+  @brief
+  Merge a derived table/view into the embedding select
+
+  @param thd     thread handle
+  @param lex     LEX of the embedding query.
+  @param derived reference to the derived table.
+
+  @details
+  This function merges the given derived table / view into the parent select
+  construction. Any derived table/reference to view occurred in the FROM
+  clause of the embedding select is represented by a TABLE_LIST structure a
+  pointer to which is passed to the function as in the parameter 'derived'.
+  This structure contains  the number/map, alias, a link to SELECT_LEX of the
+  derived table and other info. If the 'derived' table is used in a nested join
+  then additionally the structure contains a reference to the ON expression
+  for this join.
+
+  The merge process results in elimination of the derived table (or the
+  reference to a view) such that:
+    - the FROM list of the derived table/view is wrapped into a nested join
+      after which the nest is added to the FROM list of the embedding select
+    - the WHERE condition of the derived table (view) is ANDed with the ON
+      condition attached to the table.
+
+  @note
+  Tables are merged into the leaf_tables list, original derived table is removed
+  from this list also. SELECT_LEX::table_list list is left untouched.
+  Where expression is merged with derived table's on_expr and can be found after
+  the merge through the SELECT_LEX::table_list.
+
+  Examples of the derived table/view merge:
+
+  Schema:
+  Tables: t1(f1), t2(f2), t3(f3)
+  View v1: SELECT f1 FROM t1 WHERE f1 < 1
+
+  Example with a view:
+    Before merge:
+
+    The query (Q1): SELECT f1,f2 FROM t2 LEFT JOIN v1 ON f1 = f2
+
+       (LEX of the main query)
+                 |
+           (select_lex)
+                 |
+         (FROM table list)
+                 |
+            (join list)= t2, v1
+                             / \
+                            /  (on_expr)= (f1 = f2)
+                            |
+                    (LEX of the v1 view)
+                            |
+                       (select_lex)= SELECT f1 FROM t1 WHERE f1 < 1
+
+
+    After merge:
+
+    The rewritten query Q1 (Q1'):
+      SELECT f1,f2 FROM t2 LEFT JOIN (t1) ON ((f1 = f2) and (f1 < 1))
+
+        (LEX of the main query)
+                   |
+             (select_lex)
+                   |
+           (FROM table list)
+                   |
+               (join list)= t2, (t1)
+                                    \
+                                   (on_expr)= (f1 = f2) and (f1 < 1)
+
+    In this example table numbers are assigned as follows:
+      (outer select): t2 - 1, v1 - 2
+      (inner select): t1 - 1
+    After the merge table numbers will be:
+      (outer select): t2 - 1, t1 - 2
+
+  Example with a derived table:
+    The query Q2:
+      SELECT f1,f2
+       FROM (SELECT f1 FROM t1, t3 WHERE f1=f3 and f1 < 1) tt, t2
+       WHERE f1 = f2
+
+    Before merge:
+              (LEX of the main query)
+                        |
+                  (select_lex)
+                  /           \
+       (FROM table list)   (WHERE clause)= (f1 = f2)
+                  |
+           (join list)= tt, t2
+                       / \
+                      /  (on_expr)= (empty)
+                     /
+           (select_lex)= SELECT f1 FROM t1, t3 WHERE f1 = f3 and f1 < 1
+
+    After merge:
+
+    The rewritten query Q2 (Q2'):
+      SELECT f1,f2
+       FROM (t1, t3) JOIN t2 ON (f1 = f3 and f1 < 1)
+       WHERE f1 = f2
+
+              (LEX of the main query)
+                        |
+                  (select_lex)
+                  /           \
+       (FROM table list)   (WHERE clause)= (f1 = f2)
+                 |
+          (join list)= t2, (t1, t3)
+                                   \
+                                 (on_expr)= (f1 = f3 and f1 < 1)
+
+  In this example table numbers are assigned as follows:
+    (outer select): tt - 1, t2 - 2
+    (inner select): t1 - 1, t3 - 2
+  After the merge table numbers will be:
+    (outer select): t1 - 1, t2 - 2, t3 - 3
+
+  @return FALSE if derived table/view were successfully merged.
+  @return TRUE if an error occur.
+*/
+
+bool mysql_derived_merge(THD *thd, LEX *lex, TABLE_LIST *derived)
+{
+  bool res= FALSE;
+  SELECT_LEX *dt_select= derived->get_single_select();
+  table_map map;
+  uint tablenr;
+  SELECT_LEX *parent_lex= derived->select_lex;
+  Query_arena *arena, backup;
+
+  if (derived->merged)
+    return FALSE;
+
+ if (thd->lex->sql_command == SQLCOM_UPDATE_MULTI ||
+     thd->lex->sql_command == SQLCOM_DELETE_MULTI)
+   thd->save_prep_leaf_list= TRUE;
+
+  arena= thd->activate_stmt_arena_if_needed(&backup);  // For easier test
+  derived->merged= TRUE;
+
+  if (!derived->merged_for_insert || 
+      (derived->is_multitable() && 
+       (thd->lex->sql_command == SQLCOM_UPDATE_MULTI ||
+        thd->lex->sql_command == SQLCOM_DELETE_MULTI)))
+  {
+    /*
+      Check whether there is enough free bits in table map to merge subquery.
+      If not - materialize it. This check isn't cached so when there is a big
+      and small subqueries, and the bigger one can't be merged it wouldn't
+      block the smaller one.
+    */
+    if (parent_lex->get_free_table_map(&map, &tablenr))
+    {
+      /* There is no enough table bits, fall back to materialization. */
+      derived->change_refs_to_fields();
+      derived->set_materialized_derived();
+      goto exit_merge;
+    }
+
+    if (dt_select->leaf_tables.elements + tablenr > MAX_TABLES)
+    {
+      /* There is no enough table bits, fall back to materialization. */
+      derived->change_refs_to_fields();
+      derived->set_materialized_derived();
+      goto exit_merge;
+    }
+
+    if (dt_select->options & OPTION_SCHEMA_TABLE)
+      parent_lex->options |= OPTION_SCHEMA_TABLE;
+
+    parent_lex->cond_count+= dt_select->cond_count;
+
+    if (!derived->get_unit()->prepared)
+    {
+      dt_select->leaf_tables.empty();
+      make_leaves_list(dt_select->leaf_tables, derived, TRUE, 0);
+    } 
+
+    derived->nested_join= (NESTED_JOIN*) thd->calloc(sizeof(NESTED_JOIN));
+    if (!derived->nested_join)
+    {
+      res= TRUE;
+      goto exit_merge;
+    }
+
+    /* Merge derived table's subquery in the parent select. */
+    if (parent_lex->merge_subquery(thd, derived, dt_select, tablenr, map))
+    {
+      res= TRUE;
+      goto exit_merge;
+    }
+
+    /*
+      exclude select lex so it doesn't show up in explain.
+      do this only for derived table as for views this is already done.
+
+      From sql_view.cc
+        Add subqueries units to SELECT into which we merging current view.
+        unit(->next)* chain starts with subqueries that are used by this
+        view and continues with subqueries that are used by other views.
+        We must not add any subquery twice (otherwise we'll form a loop),
+        to do this we remember in end_unit the first subquery that has
+        been already added.
+    */
+    derived->get_unit()->exclude_level();
+    if (parent_lex->join) 
+      parent_lex->join->table_count+= dt_select->join->table_count - 1;
+  }
+  if (derived->get_unit()->prepared)
+  {
+    Item *expr= derived->on_expr;
+    expr= and_conds(expr, dt_select->join ? dt_select->join->conds : 0);
+    if (expr && (derived->prep_on_expr || expr != derived->on_expr))
+    {
+      derived->on_expr= expr;
+      derived->prep_on_expr= expr->copy_andor_structure(thd);
+    }
+    if (derived->on_expr &&
+        ((!derived->on_expr->fixed &&
+          derived->on_expr->fix_fields(thd, &derived->on_expr)) ||
+          derived->on_expr->check_cols(1)))
+    {
+      res= TRUE; /* purecov: inspected */
+      goto exit_merge;
+    }
+    // Update used tables cache according to new table map
+    if (derived->on_expr)
+    {
+      derived->on_expr->fix_after_pullout(parent_lex, &derived->on_expr);
+      fix_list_after_tbl_changes(parent_lex, &derived->nested_join->join_list);
+    }
+  }
+
+exit_merge:
+  if (arena)
+    thd->restore_active_arena(arena, &backup);
+  return res;
+}
+
+
+/**
+  @brief
+  Merge a view for the embedding INSERT/UPDATE/DELETE
+
+  @param thd     thread handle
+  @param lex     LEX of the embedding query.
+  @param derived reference to the derived table.
+
+  @details
+  This function substitutes the derived table for the first table from
+  the query of the derived table thus making it a correct target table for the
+  INSERT/UPDATE/DELETE statements. As this operation is correct only for
+  single table views only, for multi table views this function does nothing.
+  The derived parameter isn't checked to be a view as derived tables aren't
+  allowed for INSERT/UPDATE/DELETE statements.
+
+  @return FALSE if derived table/view were successfully merged.
+  @return TRUE if an error occur.
+*/
+
+bool mysql_derived_merge_for_insert(THD *thd, LEX *lex, TABLE_LIST *derived)
+{
+  if (derived->merged_for_insert)
+    return FALSE;
+  if (derived->is_materialized_derived())
+    return mysql_derived_prepare(thd, lex, derived);
+  if (!derived->is_multitable())
+  {
+    if (!derived->single_table_updatable())
+      return derived->create_field_translation(thd);
+    if (derived->merge_underlying_list)
+    {
+      derived->table= derived->merge_underlying_list->table;
+      derived->schema_table= derived->merge_underlying_list->schema_table;
+      derived->merged_for_insert= TRUE;
+    }
+  }  
+  return FALSE;
+}
+
+
+/*
+  @brief
+  Initialize a derived table/view
+
+  @param thd	     Thread handle
+  @param lex         LEX of the embedding query.
+  @param derived     reference to the derived table.
+
+  @detail
+  Fill info about derived table/view without preparing an
+  underlying select. Such as: create a field translation for views, mark it as
+  a multitable if it is and so on.
+
+  @return
+    false  OK
+    true   Error
+*/
+
+
+bool mysql_derived_init(THD *thd, LEX *lex, TABLE_LIST *derived)
+{
+  SELECT_LEX_UNIT *unit= derived->get_unit();
+  DBUG_ENTER("mysql_derived_init");
+
+  // Skip already prepared views/DT
+  if (!unit || unit->prepared)
+    DBUG_RETURN(FALSE);
+
+  DBUG_RETURN(derived->init_derived(thd, TRUE));
+}
+
+
+/*
+  @brief
+  Create temporary table structure (but do not fill it)
+
+  @param thd	     Thread handle
+  @param lex         LEX of the embedding query.
+  @param derived     reference to the derived table.
+
+  @detail
+  Prepare underlying select for a derived table/view. To properly resolve
+  names in the embedding query the TABLE structure is created. Actual table
+  is created later by the mysql_derived_create function.
 
+  This function is called before any command containing derived table
+  is executed. All types of derived tables are handled by this function:
+  - Anonymous derived tables, or
+  - Named derived tables (aka views).
+
+  The table reference, contained in @c derived, is updated with the
+  fields of a new temporary table.
   Derived tables are stored in @c thd->derived_tables and closed by
   close_thread_tables().
 
@@ -116,200 +571,366 @@ out:
   the state of privilege checking (GRANT_INFO struct) is copied as-is to the
   temporary table.
 
-  This function implements a signature called "derived table processor", and
-  is passed as a function pointer to mysql_handle_derived().
+  Only the TABLE structure is created here, actual table is created by the
+  mysql_derived_create function.
 
   @note This function sets @c SELECT_ACL for @c TEMPTABLE views as well as
   anonymous derived tables, but this is ok since later access checking will
   distinguish between them.
 
-  @see mysql_handle_derived(), mysql_derived_filling(), GRANT_INFO
+  @see mysql_handle_derived(), mysql_derived_fill(), GRANT_INFO
 
   @return
     false  OK
     true   Error
 */
 
-bool mysql_derived_prepare(THD *thd, LEX *lex, TABLE_LIST *orig_table_list)
+bool mysql_derived_prepare(THD *thd, LEX *lex, TABLE_LIST *derived)
 {
-  SELECT_LEX_UNIT *unit= orig_table_list->derived;
-  ulonglong create_options;
+  SELECT_LEX_UNIT *unit= derived->get_unit();
   DBUG_ENTER("mysql_derived_prepare");
   bool res= FALSE;
-  if (unit)
+
+  // Skip already prepared views/DT
+  if (!unit || unit->prepared ||
+      (derived->merged_for_insert && 
+       !(derived->is_multitable() &&
+         (thd->lex->sql_command == SQLCOM_UPDATE_MULTI ||
+          thd->lex->sql_command == SQLCOM_DELETE_MULTI))))
+    DBUG_RETURN(FALSE);
+
+  Query_arena *arena= thd->stmt_arena, backup;
+  if (arena->is_conventional())
+    arena= 0;                                   // For easier test
+  else
+    thd->set_n_backup_active_arena(arena, &backup);
+
+  SELECT_LEX *first_select= unit->first_select();
+
+  /* prevent name resolving out of derived table */
+  for (SELECT_LEX *sl= first_select; sl; sl= sl->next_select())
   {
-    SELECT_LEX *first_select= unit->first_select();
-    TABLE *table= 0;
-    select_union *derived_result;
-
-    /* prevent name resolving out of derived table */
-    for (SELECT_LEX *sl= first_select; sl; sl= sl->next_select())
-      sl->context.outer_context= 0;
-
-    if (!(derived_result= new select_union))
-      DBUG_RETURN(TRUE); // out of memory
-
-    lex->context_analysis_only|= CONTEXT_ANALYSIS_ONLY_DERIVED;
-    // st_select_lex_unit::prepare correctly work for single select
-    if ((res= unit->prepare(thd, derived_result, 0)))
-      goto exit;
-    lex->context_analysis_only&= ~CONTEXT_ANALYSIS_ONLY_DERIVED;
-    if ((res= check_duplicate_names(unit->types, 0)))
-      goto exit;
-
-    create_options= (first_select->options | thd->options |
-                     TMP_TABLE_ALL_COLUMNS);
-    /*
-      Temp table is created so that it hounours if UNION without ALL is to be 
-      processed
-
-      As 'distinct' parameter we always pass FALSE (0), because underlying
-      query will control distinct condition by itself. Correct test of
-      distinct underlying query will be is_union &&
-      !unit->union_distinct->next_select() (i.e. it is union and last distinct
-      SELECT is last SELECT of UNION).
-    */
-    if ((res= derived_result->create_result_table(thd, &unit->types, FALSE,
-                                                 create_options,
-                                                 orig_table_list->alias)))
-      goto exit;
+    sl->context.outer_context= 0;
+    // Prepare underlying views/DT first.
+    sl->handle_derived(lex, DT_PREPARE);
+  }
 
-    table= derived_result->table;
+  unit->derived= derived;
+
+  if (!(derived->derived_result= new select_union))
+    DBUG_RETURN(TRUE); // out of memory
+
+  lex->context_analysis_only|= CONTEXT_ANALYSIS_ONLY_DERIVED;
+  // st_select_lex_unit::prepare correctly work for single select
+  if ((res= unit->prepare(thd, derived->derived_result, 0)))
+    goto exit;
+  lex->context_analysis_only&= ~CONTEXT_ANALYSIS_ONLY_DERIVED;
+  if ((res= check_duplicate_names(unit->types, 0)))
+    goto exit;
+
+  /*
+    Check whether we can merge this derived table into main select.
+    Depending on the result field translation will or will not
+    be created.
+  */
+  if (derived->init_derived(thd, FALSE))
+    goto exit;
+
+  /*
+    Temp table is created so that it hounours if UNION without ALL is to be 
+    processed
+
+    As 'distinct' parameter we always pass FALSE (0), because underlying
+    query will control distinct condition by itself. Correct test of
+    distinct underlying query will be is_union &&
+    !unit->union_distinct->next_select() (i.e. it is union and last distinct
+    SELECT is last SELECT of UNION).
+  */
+  thd->create_tmp_table_for_derived= TRUE;
+  if (derived->derived_result->create_result_table(thd, &unit->types, FALSE,
+                                                (first_select->options |
+                                                 thd->options |
+                                                 TMP_TABLE_ALL_COLUMNS),
+                                                derived->alias,
+                                                FALSE, FALSE))
+  { 
+    thd->create_tmp_table_for_derived= FALSE;
+    goto exit;
+  }
+  thd->create_tmp_table_for_derived= FALSE;
+
+  derived->table= derived->derived_result->table;
+  if (derived->is_derived() && derived->is_merged_derived())
+    first_select->mark_as_belong_to_derived(derived);
 
 exit:
-    /* Hide "Unknown column" or "Unknown function" error */
-    if (orig_table_list->view)
-    {
-      if (thd->is_error() &&
+  /* Hide "Unknown column" or "Unknown function" error */
+  if (derived->view)
+  {
+    if (thd->is_error() &&
           (thd->main_da.sql_errno() == ER_BAD_FIELD_ERROR ||
-          thd->main_da.sql_errno() == ER_FUNC_INEXISTENT_NAME_COLLISION ||
-          thd->main_da.sql_errno() == ER_SP_DOES_NOT_EXIST))
-      {
-        thd->clear_error();
-        my_error(ER_VIEW_INVALID, MYF(0), orig_table_list->db,
-                 orig_table_list->table_name);
-      }
-    }
-
-    /*
-      if it is preparation PS only or commands that need only VIEW structure
-      then we do not need real data and we can skip execution (and parameters
-      is not defined, too)
-    */
-    if (res)
+           thd->main_da.sql_errno() == ER_FUNC_INEXISTENT_NAME_COLLISION ||
+           thd->main_da.sql_errno() == ER_SP_DOES_NOT_EXIST))
     {
-      if (table)
-	free_tmp_table(thd, table);
-      delete derived_result;
+      thd->clear_error();
+      my_error(ER_VIEW_INVALID, MYF(0), derived->db,
+               derived->table_name);
     }
+  }
+
+  /*
+    if it is preparation PS only or commands that need only VIEW structure
+    then we do not need real data and we can skip execution (and parameters
+    is not defined, too)
+  */
+  if (res)
+  {
+    if (derived->table)
+      free_tmp_table(thd, derived->table);
+    delete derived->derived_result;
+  }
+  else
+  {
+    TABLE *table= derived->table;
+    table->derived_select_number= first_select->select_number;
+    table->s->tmp_table= NON_TRANSACTIONAL_TMP_TABLE;
+#ifndef NO_EMBEDDED_ACCESS_CHECKS
+    if (derived->referencing_view)
+      table->grant= derived->grant;
     else
     {
-      if (!thd->fill_derived_tables())
-      {
-	delete derived_result;
-	derived_result= NULL;
-      }
-      orig_table_list->derived_result= derived_result;
-      orig_table_list->table= table;
-      table->derived_select_number= first_select->select_number;
-      table->s->tmp_table= NON_TRANSACTIONAL_TMP_TABLE;
-#ifndef NO_EMBEDDED_ACCESS_CHECKS
-      if (orig_table_list->referencing_view)
-        table->grant= orig_table_list->grant;
-      else
-        table->grant.privilege= SELECT_ACL;
-#endif
-      orig_table_list->db= (char *)"";
-      orig_table_list->db_length= 0;
-      // Force read of table stats in the optimizer
-      table->file->info(HA_STATUS_VARIABLE);
-      /* Add new temporary table to list of open derived tables */
-      table->next= thd->derived_tables;
-      thd->derived_tables= table;
+      table->grant.privilege= SELECT_ACL;
+      if (derived->is_derived())
+        derived->grant.privilege= SELECT_ACL;
     }
+#endif
+    /* Add new temporary table to list of open derived tables */
+    table->next= thd->derived_tables;
+    thd->derived_tables= table;
   }
-  else if (orig_table_list->merge_underlying_list)
-    orig_table_list->set_underlying_merge();
+  if (arena)
+    thd->restore_active_arena(arena, &backup);
   DBUG_RETURN(res);
 }
 
 
-/*
-  fill derived table
-
-  SYNOPSIS
-    mysql_derived_filling()
-    thd			Thread handle
-    lex                 LEX for this thread
-    unit                node that contains all SELECT's for derived tables
-    orig_table_list     TABLE_LIST for the upper SELECT
-
-  IMPLEMENTATION
-    Derived table is resolved with temporary table. It is created based on the
-    queries defined. After temporary table is filled, if this is not EXPLAIN,
-    then the entire unit / node is deleted. unit is deleted if UNION is used
-    for derived table and node is deleted is it is a  simple SELECT.
-    If you use this function, make sure it's not called at prepare.
-    Due to evaluation of LIMIT clause it can not be used at prepared stage.
-
-  RETURN
-    FALSE  OK
-    TRUE   Error
+/**
+  @brief
+  Runs optimize phase for a derived table/view.
+
+  @param thd     thread handle
+  @param lex     LEX of the embedding query.
+  @param derived reference to the derived table.
+
+  @details
+  Runs optimize phase for given 'derived' derived table/view.
+  If optimizer finds out that it's of the type "SELECT a_constant" then this
+  functions also materializes it.
+
+  @return FALSE ok.
+  @return TRUE if an error occur.
 */
 
-bool mysql_derived_filling(THD *thd, LEX *lex, TABLE_LIST *orig_table_list)
+bool mysql_derived_optimize(THD *thd, LEX *lex, TABLE_LIST *derived)
 {
-  TABLE *table= orig_table_list->table;
-  SELECT_LEX_UNIT *unit= orig_table_list->derived;
+  SELECT_LEX_UNIT *unit= derived->get_unit();
+  SELECT_LEX *first_select= unit->first_select();
+  SELECT_LEX *save_current_select= lex->current_select;
+
   bool res= FALSE;
 
-  /*check that table creation pass without problem and it is derived table */
-  if (table && unit)
+  if (unit->optimized)
+    return FALSE;
+  lex->current_select= first_select;
+
+  if (unit->is_union())
   {
-    SELECT_LEX *first_select= unit->first_select();
-    select_union *derived_result= orig_table_list->derived_result;
-    SELECT_LEX *save_current_select= lex->current_select;
-    if (unit->is_union())
-    {
-      // execute union without clean up
-      res= unit->exec();
-    }
-    else
+    // optimize union without execution
+    res= unit->optimize();
+  }
+  else if (unit->derived)
+  {
+    if (!derived->is_merged_derived())
     {
+      JOIN *join= first_select->join;
       unit->set_limit(first_select);
-      if (unit->select_limit_cnt == HA_POS_ERROR)
-	first_select->options&= ~OPTION_FOUND_ROWS;
-
-      lex->current_select= first_select;
-      res= mysql_select(thd, &first_select->ref_pointer_array,
-			first_select->table_list.first,
-			first_select->with_wild,
-			first_select->item_list, first_select->where,
-			(first_select->order_list.elements+
-			 first_select->group_list.elements),
-			first_select->order_list.first,
-			first_select->group_list.first,
-			first_select->having, (ORDER*) NULL,
-			(first_select->options | thd->options |
-			 SELECT_NO_UNLOCK),
-			derived_result, unit, first_select);
+      unit->optimized= TRUE;
+      if ((res= join->optimize()))
+        goto err;
+      if (join->table_count == join->const_tables)
+        derived->fill_me= TRUE;
     }
-
-    if (!res)
+  }
+  /*
+    Materialize derived tables/views of the "SELECT a_constant" type.
+    Such tables should be materialized at the optimization phase for
+    correct constant evaluation.
+  */
+  if (!res && derived->fill_me && !derived->merged_for_insert)
+  {
+    if (derived->is_merged_derived())
     {
-      /*
-        Here we entirely fix both TABLE_LIST and list of SELECT's as
-        there were no derived tables
-      */
-      if (derived_result->flush())
-        res= TRUE;
-
-      if (!lex->describe)
-        unit->cleanup();
+      derived->change_refs_to_fields();
+      derived->set_materialized_derived();
     }
-    else
-      unit->cleanup();
-    lex->current_select= save_current_select;
+    if ((res= mysql_derived_create(thd, lex, derived)))
+      goto err;
+    if ((res= mysql_derived_fill(thd, lex, derived)))
+      goto err;
+  }
+err:
+  lex->current_select= save_current_select;
+  return res;
+}
+
+
+/**
+  @brief
+  Actually create result table for a materialized derived table/view.
+
+  @param thd     thread handle
+  @param lex     LEX of the embedding query.
+  @param derived reference to the derived table.
+
+  @details
+  This function actually creates the result table for given 'derived'
+  table/view, but it doesn't fill it.
+  'thd' and 'lex' parameters are not used  by this function.
+
+  @return FALSE ok.
+  @return TRUE if an error occur.
+*/
+
+bool mysql_derived_create(THD *thd, LEX *lex, TABLE_LIST *derived)
+{
+  TABLE *table= derived->table;
+  SELECT_LEX_UNIT *unit= derived->get_unit();
+
+  if (table->created)
+    return FALSE;
+  select_union *result= (select_union*)unit->result;
+  if (table->s->db_type() == TMP_ENGINE_HTON)
+  {
+    result->tmp_table_param.keyinfo= table->s->key_info;
+    if (create_internal_tmp_table(table, result->tmp_table_param.keyinfo,
+                                  result->tmp_table_param.start_recinfo,
+                                  &result->tmp_table_param.recinfo,
+                                  (unit->first_select()->options |
+                                   thd->options | TMP_TABLE_ALL_COLUMNS)))
+      return(TRUE);
+  }
+  if (open_tmp_table(table))
+    return TRUE;
+  table->file->extra(HA_EXTRA_WRITE_CACHE);
+  table->file->extra(HA_EXTRA_IGNORE_DUP_KEY);
+  return FALSE;
+}
+
+
+/*
+  @brief
+  Execute subquery of a materialized derived table/view and fill the result
+  table.
+
+  @param thd      Thread handle
+  @param lex      LEX for this thread
+  @param derived  reference to the derived table.
+
+  @details
+  Execute subquery of given 'derived' table/view and fill the result
+  table. After result table is filled, if this is not the EXPLAIN statement,
+  the entire unit / node is deleted. unit is deleted if UNION is used
+  for derived table and node is deleted is it is a simple SELECT.
+  'lex' is unused and 'thd' is passed as an argument to an underlying function.
+
+  @note
+  If you use this function, make sure it's not called at prepare.
+  Due to evaluation of LIMIT clause it can not be used at prepared stage.
+
+  @return FALSE  OK
+  @return TRUE   Error
+*/
+
+bool mysql_derived_fill(THD *thd, LEX *lex, TABLE_LIST *derived)
+{
+  SELECT_LEX_UNIT *unit= derived->get_unit();
+  bool res= FALSE;
+
+  if (unit->executed && !unit->uncacheable && !unit->describe)
+    return FALSE;
+  /*check that table creation passed without problems. */
+  DBUG_ASSERT(derived->table && derived->table->created);
+  SELECT_LEX *first_select= unit->first_select();
+  select_union *derived_result= derived->derived_result;
+  SELECT_LEX *save_current_select= lex->current_select;
+  if (unit->is_union())
+  {
+    // execute union without clean up
+    res= unit->exec();
+  }
+  else
+  {
+    unit->set_limit(first_select);
+    if (unit->select_limit_cnt == HA_POS_ERROR)
+      first_select->options&= ~OPTION_FOUND_ROWS;
+
+    lex->current_select= first_select;
+    res= mysql_select(thd, &first_select->ref_pointer_array,
+                      (TABLE_LIST*) first_select->table_list.first,
+                      first_select->with_wild,
+                      first_select->item_list, first_select->where,
+                      (first_select->order_list.elements+
+                       first_select->group_list.elements),
+                      (ORDER *) first_select->order_list.first,
+                      (ORDER *) first_select->group_list.first,
+                      first_select->having, (ORDER*) NULL,
+                      (first_select->options | thd->options |
+                       SELECT_NO_UNLOCK),
+                      derived_result, unit, first_select);
+  }
+
+  if (!res)
+  {
+    if (derived_result->flush())
+      res= TRUE;
+    unit->executed= TRUE;
   }
+  if (res || !lex->describe) 
+    unit->cleanup();
+  lex->current_select= save_current_select;
+
   return res;
 }
+
+
+/**
+  @brief
+  Re-initialize given derived table/view for the next execution.
+
+  @param  thd         thread handle
+  @param  lex         LEX for this thread
+  @param  derived     reference to the derived table.
+
+  @details
+  Re-initialize given 'derived' table/view for the next execution.
+  All underlying views/derived tables are recursively reinitialized prior
+  to re-initialization of given derived table.
+  'thd' and 'lex' are passed as arguments to called functions.
+
+  @return FALSE  OK
+  @return TRUE   Error
+*/
+
+bool mysql_derived_reinit(THD *thd, LEX *lex, TABLE_LIST *derived)
+{
+  st_select_lex_unit *unit= derived->get_unit();
+
+  if (derived->table)
+    derived->merged_for_insert= FALSE;
+  unit->unclean();
+  unit->types.empty();
+  /* for derived tables & PS (which can't be reset by Item_subquery) */
+  unit->reinit_exec_mechanism();
+  unit->set_thd(thd);
+  return FALSE;
+}
diff --git a/sql/sql_error.cc b/sql/sql_error.cc
index 55d85625eae..a4c98daac1f 100644
--- a/sql/sql_error.cc
+++ b/sql/sql_error.cc
@@ -134,7 +134,7 @@ MYSQL_ERROR *push_warning(THD *thd, MYSQL_ERROR::enum_warning_level level,
     thd->no_warnings_for_error= 1;
     thd->spcont= NULL;
 
-    thd->killed= THD::KILL_BAD_DATA;
+    thd->killed= KILL_BAD_DATA;
     my_message(code, msg, MYF(0));
 
     thd->spcont= spcont;
diff --git a/sql/sql_expression_cache.cc b/sql/sql_expression_cache.cc
new file mode 100644
index 00000000000..4174f72f080
--- /dev/null
+++ b/sql/sql_expression_cache.cc
@@ -0,0 +1,322 @@
+/* Copyright (C) 2010-2011 Monty Program Ab & Oleksandr Byelkin
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
+
+#include "mysql_priv.h"
+#include "sql_select.h"
+
+/**
+  Minimum hit ration to proceed on disk if in memory table overflowed.
+  hit_rate = hit / (miss + hit);
+*/
+#define EXPCACHE_MIN_HIT_RATE_FOR_DISK_TABLE 0.7
+/**
+  Minimum hit ratio to keep in memory table (do not switch cache off)
+  hit_rate = hit / (miss + hit);
+*/
+#define EXPCACHE_MIN_HIT_RATE_FOR_MEM_TABLE  0.2
+/**
+  Number of cache miss to check hit ratio (maximum cache performance
+  impact in the case when the cache is not applicable)
+*/
+#define EXPCACHE_CHECK_HIT_RATIO_AFTER 200
+
+/*
+  Expression cache is used only for caching subqueries now, so its statistic
+  variables we call subquery_cache*.
+*/
+ulong subquery_cache_miss, subquery_cache_hit;
+
+Expression_cache_tmptable::Expression_cache_tmptable(THD *thd,
+                                                     List<Item> &dependants,
+                                                     Item *value)
+  :cache_table(NULL), table_thd(thd), items(dependants), val(value),
+   hit(0), miss(0), inited (0)
+{
+  DBUG_ENTER("Expression_cache_tmptable::Expression_cache_tmptable");
+  DBUG_VOID_RETURN;
+};
+
+
+/**
+  Disable cache
+*/
+
+void Expression_cache_tmptable::disable_cache()
+{
+  free_tmp_table(table_thd, cache_table);
+  cache_table= NULL;
+}
+
+
+/**
+  Field enumerator for TABLE::add_tmp_key
+
+  @param arg             reference variable with current field number
+
+  @return field number
+*/
+
+static uint field_enumerator(uchar *arg)
+{
+  return ((uint*)arg)[0]++;
+}
+
+
+/**
+  Initialize temporary table and auxiliary structures for the expression
+  cache
+
+  @details
+  The function creates a temporary table for the expression cache, defines
+  the search index and initializes auxiliary search structures used to check
+  whether a given set of of values of the expression parameters is in some
+  cache entry.
+*/
+
+void Expression_cache_tmptable::init()
+{
+  List_iterator<Item> li(items);
+  Item_iterator_list it(li);
+  uint field_counter;
+  DBUG_ENTER("Expression_cache_tmptable::init");
+  DBUG_ASSERT(!inited);
+  inited= TRUE;
+  cache_table= NULL;
+
+  if (items.elements == 0)
+  {
+    DBUG_PRINT("info", ("All parameters were removed by optimizer."));
+    DBUG_VOID_RETURN;
+  }
+
+  /* add result field */
+  items.push_front(val);
+
+  cache_table_param.init();
+  /* dependent items and result */
+  cache_table_param.field_count= items.elements;
+  /* postpone table creation to index description */
+  cache_table_param.skip_create_table= 1;
+
+  if (!(cache_table= create_tmp_table(table_thd, &cache_table_param,
+                                      items, (ORDER*) NULL,
+                                      FALSE, TRUE,
+                                      ((table_thd->options |
+                                        TMP_TABLE_ALL_COLUMNS) &
+                                       ~(OPTION_BIG_TABLES |
+                                         TMP_TABLE_FORCE_MYISAM)),
+                                      HA_POS_ERROR,
+                                      (char *)"subquery-cache-table",
+                                      TRUE)))
+  {
+    DBUG_PRINT("error", ("create_tmp_table failed, caching switched off"));
+    DBUG_VOID_RETURN;
+  }
+
+  if (cache_table->s->db_type() != heap_hton)
+  {
+    DBUG_PRINT("error", ("we need only heap table"));
+    goto error;
+  }
+
+  field_counter= 1;
+
+  if (cache_table->alloc_keys(1) ||
+      cache_table->add_tmp_key(0, items.elements - 1, &field_enumerator,
+                                (uchar*)&field_counter, TRUE) ||
+      ref.tmp_table_index_lookup_init(table_thd, cache_table->key_info, it,
+                                      TRUE, 1 /* skip result field*/))
+  {
+    DBUG_PRINT("error", ("creating index failed"));
+    goto error;
+  }
+  cache_table->s->keys= 1;
+  ref.null_rejecting= 1;
+  ref.disable_cache= FALSE;
+  ref.has_record= 0;
+  ref.use_count= 0;
+
+
+  if (open_tmp_table(cache_table))
+  {
+    DBUG_PRINT("error", ("Opening (creating) temporary table failed"));
+    goto error;
+  }
+
+  if (!(cached_result= new Item_field(cache_table->field[0])))
+  {
+    DBUG_PRINT("error", ("Creating Item_field failed"));
+    goto error;
+  }
+
+  DBUG_VOID_RETURN;
+
+error:
+  disable_cache();
+  DBUG_VOID_RETURN;
+}
+
+
+Expression_cache_tmptable::~Expression_cache_tmptable()
+{
+  /* Add accumulated statistics */
+  statistic_add(subquery_cache_miss, miss, &LOCK_status);
+  statistic_add(subquery_cache_hit, hit, &LOCK_status);
+
+  if (cache_table)
+    disable_cache();
+}
+
+
+/**
+  Check if a given set of parameters of the expression is in the cache
+
+  @param [out] value     the expression value found in the cache if any
+
+  @details
+  For a given set of the parameters of the expression the function
+  checks whether it can be found in some entry of the cache. If so
+  the function returns the result of the expression extracted from
+  the cache.
+
+  @retval Expression_cache::HIT if the set of parameters is in the cache
+  @retval Expression_cache::MISS - otherwise
+*/
+
+Expression_cache::result Expression_cache_tmptable::check_value(Item **value)
+{
+  int res;
+  DBUG_ENTER("Expression_cache_tmptable::check_value");
+
+  if (cache_table)
+  {
+    DBUG_PRINT("info", ("status: %u  has_record %u",
+                        (uint)cache_table->status, (uint)ref.has_record));
+    if ((res= join_read_key2(table_thd, NULL, cache_table, &ref)) == 1)
+      DBUG_RETURN(ERROR);
+
+    if (res)
+    {
+      if (((++miss) == EXPCACHE_CHECK_HIT_RATIO_AFTER) &&
+          ((double)hit / ((double)hit + miss)) <
+          EXPCACHE_MIN_HIT_RATE_FOR_MEM_TABLE)
+      {
+        DBUG_PRINT("info",
+                   ("Early check: hit rate is not so good to keep the cache"));
+        disable_cache();
+      }
+
+      DBUG_RETURN(MISS);
+    }
+
+    hit++;
+    *value= cached_result;
+    DBUG_RETURN(Expression_cache::HIT);
+  }
+  DBUG_RETURN(Expression_cache::MISS);
+}
+
+
+/**
+  Put a new entry into the expression cache
+
+  @param value     the result of the expression to be put into the cache
+
+  @details
+  The function evaluates 'value' and puts the result into the cache as the
+  result of the expression for the current set of parameters.
+
+  @retval FALSE OK
+  @retval TRUE  Error
+*/
+
+my_bool Expression_cache_tmptable::put_value(Item *value)
+{
+  int error;
+  DBUG_ENTER("Expression_cache_tmptable::put_value");
+  DBUG_ASSERT(inited);
+
+  if (!cache_table)
+  {
+    DBUG_PRINT("info", ("No table so behave as we successfully put value"));
+    DBUG_RETURN(FALSE);
+  }
+
+  *(items.head_ref())= value;
+  fill_record(table_thd, cache_table->field, items, TRUE, TRUE);
+  if (table_thd->is_error())
+    goto err;;
+
+  if ((error= cache_table->file->ha_write_tmp_row(cache_table->record[0])))
+  {
+    /* create_myisam_from_heap will generate error if needed */
+    if (cache_table->file->is_fatal_error(error, HA_CHECK_DUP))
+      goto err;
+    else
+    {
+      double hit_rate= ((double)hit / ((double)hit + miss));
+      DBUG_ASSERT(miss > 0);
+      if (hit_rate < EXPCACHE_MIN_HIT_RATE_FOR_MEM_TABLE)
+      {
+        DBUG_PRINT("info", ("hit rate is not so good to keep the cache"));
+        disable_cache();
+        DBUG_RETURN(FALSE);
+      }
+      else if (hit_rate < EXPCACHE_MIN_HIT_RATE_FOR_DISK_TABLE)
+      {
+        DBUG_PRINT("info", ("hit rate is not so good to go to disk"));
+        if (cache_table->file->ha_delete_all_rows() ||
+            cache_table->file->ha_write_tmp_row(cache_table->record[0]))
+          goto err;
+      }
+      else
+      {
+        if (create_internal_tmp_table_from_heap(table_thd, cache_table,
+                                                cache_table_param.start_recinfo,
+                                                &cache_table_param.recinfo,
+                                                error, 1))
+          goto err;
+      }
+    }
+  }
+  cache_table->status= 0; /* cache_table->record contains an existed record */
+  ref.has_record= TRUE; /* the same as above */
+  DBUG_PRINT("info", ("has_record: TRUE  status: 0"));
+
+  DBUG_RETURN(FALSE);
+
+err:
+  disable_cache();
+  DBUG_RETURN(TRUE);
+}
+
+
+void Expression_cache_tmptable::print(String *str, enum_query_type query_type)
+{
+  List_iterator<Item> li(items);
+  Item *item;
+  bool is_first= TRUE;
+
+  str->append('<');
+  li++;  // skip result field
+  while ((item= li++))
+  {
+    if (!is_first)
+      str->append(',');
+    item->print(str, query_type);
+    is_first= FALSE;
+  }
+  str->append('>');
+}
diff --git a/sql/sql_expression_cache.h b/sql/sql_expression_cache.h
new file mode 100644
index 00000000000..32aecc61dc9
--- /dev/null
+++ b/sql/sql_expression_cache.h
@@ -0,0 +1,95 @@
+#ifndef SQL_EXPRESSION_CACHE_INCLUDED
+#define SQL_EXPRESSION_CACHE_INCLUDED
+
+#include "sql_select.h"
+
+/**
+  Interface for expression cache
+
+  @note
+  Parameters of an expression cache interface are set on the creation of the
+  cache. They are passed when a cache object of the implementation class is
+  constructed. That's why they are not visible in this interface.
+*/
+
+extern ulong subquery_cache_miss, subquery_cache_hit;
+
+class Expression_cache :public Sql_alloc
+{
+public:
+  enum result {ERROR, HIT, MISS};
+
+  Expression_cache(){};
+  virtual ~Expression_cache() {};
+  /**
+    Shall check the presence of expression value in the cache for a given
+    set of values of the expression parameters.  Return the result of the
+    expression if it's found in the cache.
+  */
+  virtual result check_value(Item **value)= 0;
+  /**
+    Shall put the value of an expression for given set of its parameters
+    into the expression cache
+  */
+  virtual my_bool put_value(Item *value)= 0;
+
+  /**
+    Print cache parameters
+  */
+  virtual void print(String *str, enum_query_type query_type)= 0;
+
+  /**
+    Is this cache initialized
+  */
+  virtual bool is_inited()= 0;
+  /**
+    Initialize this cache
+  */
+  virtual void init()= 0;
+};
+
+struct st_table_ref;
+struct st_join_table;
+class Item_field;
+
+
+/**
+  Implementation of expression cache over a temporary table
+*/
+
+class Expression_cache_tmptable :public Expression_cache
+{
+public:
+  Expression_cache_tmptable(THD *thd, List<Item> &dependants, Item *value);
+  virtual ~Expression_cache_tmptable();
+  virtual result check_value(Item **value);
+  virtual my_bool put_value(Item *value);
+
+  void print(String *str, enum_query_type query_type);
+  bool is_inited() { return inited; };
+  void init();
+
+private:
+  void disable_cache();
+
+  /* tmp table parameters */
+  TMP_TABLE_PARAM cache_table_param;
+  /* temporary table to store this cache */
+  TABLE *cache_table;
+  /* Thread handle for the temporary table */
+  THD *table_thd;
+  /* TABLE_REF for index lookup */
+  struct st_table_ref ref;
+  /* Cached result */
+  Item_field *cached_result;
+  /* List of parameter items */
+  List<Item> &items;
+  /* Value Item example */
+  Item *val;
+  /* hit/miss counters */
+  uint hit, miss;
+  /* Set on if the object has been succesfully initialized with init() */
+  bool inited;
+};
+
+#endif /* SQL_EXPRESSION_CACHE_INCLUDED */
diff --git a/sql/sql_handler.cc b/sql/sql_handler.cc
index 60d3740931e..4f1c63930d6 100644
--- a/sql/sql_handler.cc
+++ b/sql/sql_handler.cc
@@ -58,9 +58,13 @@
   second container. When the table is flushed, the pointer is cleared.
 */
 
+#ifdef USE_PRAGMA_IMPLEMENTATION
+#pragma implementation				// gcc: Class implementation
+#endif
+
 #include "mysql_priv.h"
 #include "sql_select.h"
-#include <assert.h>
+#include "sql_handler.h"
 
 #define HANDLER_TABLES_HASH_SIZE 120
 
@@ -68,6 +72,28 @@ static enum enum_ha_read_modes rkey_to_rnext[]=
 { RNEXT_SAME, RNEXT, RPREV, RNEXT, RPREV, RNEXT, RPREV, RPREV };
 
 /*
+  Set handler to state after create, but keep base information about
+  which table is used
+*/
+
+void SQL_HANDLER::reset()
+{
+  fields.empty();
+  arena.free_items();
+  free_root(&mem_root, MYF(0));
+  my_free(lock, MYF(MY_ALLOW_ZERO_PTR));
+  init();
+}  
+  
+/* Free all allocated data */
+
+SQL_HANDLER::~SQL_HANDLER()
+{
+  reset();
+  my_free(base_data, MYF(MY_ALLOW_ZERO_PTR));
+}
+
+/*
   Get hash key and hash key length.
 
   SYNOPSIS
@@ -86,11 +112,11 @@ static enum enum_ha_read_modes rkey_to_rnext[]=
     Pointer to the TABLE_LIST struct.
 */
 
-static char *mysql_ha_hash_get_key(TABLE_LIST *tables, size_t *key_len_p,
+static char *mysql_ha_hash_get_key(SQL_HANDLER *table, size_t *key_len,
                                    my_bool first __attribute__((unused)))
 {
-  *key_len_p= strlen(tables->alias) + 1 ; /* include '\0' in comparisons */
-  return tables->alias;
+  *key_len= table->handler_name.length + 1 ; /* include '\0' in comparisons */
+  return table->handler_name.str;
 }
 
 
@@ -108,9 +134,9 @@ static char *mysql_ha_hash_get_key(TABLE_LIST *tables, size_t *key_len_p,
     Nothing
 */
 
-static void mysql_ha_hash_free(TABLE_LIST *tables)
+static void mysql_ha_hash_free(SQL_HANDLER *table)
 {
-  my_free((char*) tables, MYF(0));
+  delete table;
 }
 
 /**
@@ -122,14 +148,21 @@ static void mysql_ha_hash_free(TABLE_LIST *tables)
 
   @note Though this function takes a list of tables, only the first list entry
   will be closed.
+  @mote handler_object is not deleted!
   @note Broadcasts refresh if it closed a table with old version.
 */
 
-static void mysql_ha_close_table(THD *thd, TABLE_LIST *tables,
+static void mysql_ha_close_table(SQL_HANDLER *handler,
                                  bool is_locked)
 {
+  THD *thd= handler->thd;
+  TABLE *table= handler->table;
   TABLE **table_ptr;
 
+  /* check if table was already closed */
+  if (!table)
+    return;
+
   /*
     Though we could take the table pointer from hash_tables->table,
     we must follow the thd->handler_tables chain anyway, as we need the
@@ -137,13 +170,18 @@ static void mysql_ha_close_table(THD *thd, TABLE_LIST *tables,
     for close_thread_table().
   */
   for (table_ptr= &(thd->handler_tables);
-       *table_ptr && (*table_ptr != tables->table);
+       *table_ptr && (*table_ptr != table);
          table_ptr= &(*table_ptr)->next)
     ;
 
   if (*table_ptr)
   {
-    (*table_ptr)->file->ha_index_or_rnd_end();
+    if (handler->lock)
+    {
+      // Mark it unlocked, like in reset_lock_data()
+      reset_lock_data(handler->lock, 1);
+    }
+    table->file->ha_index_or_rnd_end();
     if (! is_locked)
       VOID(pthread_mutex_lock(&LOCK_open));
     if (close_thread_table(thd, table_ptr))
@@ -154,17 +192,15 @@ static void mysql_ha_close_table(THD *thd, TABLE_LIST *tables,
     if (! is_locked)
       VOID(pthread_mutex_unlock(&LOCK_open));
   }
-  else if (tables->table)
+  else
   {
     /* Must be a temporary table */
-    TABLE *table= tables->table;
     table->file->ha_index_or_rnd_end();
     table->query_id= thd->query_id;
     table->open_by_handler= 0;
   }
-
-  /* Mark table as closed, ready for re-open if necessary. */
-  tables->table= NULL;
+  my_free(handler->lock, MYF(MY_ALLOW_ZERO_PTR));
+  handler->init();
 }
 
 /*
@@ -180,7 +216,7 @@ static void mysql_ha_close_table(THD *thd, TABLE_LIST *tables,
     Though this function takes a list of tables, only the first list entry
     will be opened.
     'reopen' is set when a handler table is to be re-opened. In this case,
-    'tables' is the pointer to the hashed TABLE_LIST object which has been
+    'tables' is the pointer to the hashed SQL_HANDLER object which has been
     saved on the original open.
     'reopen' is also used to suppress the sending of an 'ok' message.
 
@@ -189,17 +225,17 @@ static void mysql_ha_close_table(THD *thd, TABLE_LIST *tables,
     TRUE  Error
 */
 
-bool mysql_ha_open(THD *thd, TABLE_LIST *tables, bool reopen)
+bool mysql_ha_open(THD *thd, TABLE_LIST *tables, SQL_HANDLER *reopen)
 {
-  TABLE_LIST    *hash_tables = NULL;
-  char          *db, *name, *alias;
-  uint          dblen, namelen, aliaslen, counter;
+  SQL_HANDLER   *sql_handler= 0;
+  uint          counter;
   int           error;
-  TABLE         *backup_open_tables;
+  TABLE         *table, *backup_open_tables, *write_lock_used;
+  Query_arena backup_arena;
   DBUG_ENTER("mysql_ha_open");
   DBUG_PRINT("enter",("'%s'.'%s' as '%s'  reopen: %d",
                       tables->db, tables->table_name, tables->alias,
-                      (int) reopen));
+                      reopen != 0));
 
   if (tables->schema_table)
   {
@@ -212,7 +248,7 @@ bool mysql_ha_open(THD *thd, TABLE_LIST *tables, bool reopen)
   if (! hash_inited(&thd->handler_tables_hash))
   {
     /*
-      HASH entries are of type TABLE_LIST.
+      HASH entries are of type SQL_HANDLER
     */
     if (hash_init(&thd->handler_tables_hash, &my_charset_latin1,
                   HANDLER_TABLES_HASH_SIZE, 0, 0,
@@ -290,8 +326,10 @@ bool mysql_ha_open(THD *thd, TABLE_LIST *tables, bool reopen)
   if (error)
     goto err;
 
+  table= tables->table;
+
   /* There can be only one table in '*tables'. */
-  if (! (tables->table->file->ha_table_flags() & HA_CAN_SQL_HANDLER))
+  if (! (table->file->ha_table_flags() & HA_CAN_SQL_HANDLER))
   {
     my_error(ER_ILLEGAL_HA, MYF(0), tables->alias);
     goto err;
@@ -299,36 +337,69 @@ bool mysql_ha_open(THD *thd, TABLE_LIST *tables, bool reopen)
 
   if (! reopen)
   {
-    /* copy the TABLE_LIST struct */
-    dblen= strlen(tables->db) + 1;
-    namelen= strlen(tables->table_name) + 1;
-    aliaslen= strlen(tables->alias) + 1;
-    if (!(my_multi_malloc(MYF(MY_WME),
-                          &hash_tables, (uint) sizeof(*hash_tables),
-                          &db, (uint) dblen,
-                          &name, (uint) namelen,
-                          &alias, (uint) aliaslen,
+    /* copy data to sql_handler */
+    if (!(sql_handler= new SQL_HANDLER(thd)))
+      goto err;
+    init_alloc_root(&sql_handler->mem_root, 1024, 0);
+
+    sql_handler->table= table;
+    sql_handler->db.length= strlen(tables->db);
+    sql_handler->table_name.length= strlen(tables->table_name);
+    sql_handler->handler_name.length= strlen(tables->alias);
+
+    if (!(my_multi_malloc(MY_WME,
+                          &sql_handler->db.str,
+                          (uint) sql_handler->db.length + 1,
+                          &sql_handler->table_name.str,
+                          (uint) sql_handler->table_name.length + 1,
+                          &sql_handler->handler_name.str,
+                          (uint) sql_handler->handler_name.length + 1,
                           NullS)))
       goto err;
-    /* structure copy */
-    *hash_tables= *tables;
-    hash_tables->db= db;
-    hash_tables->table_name= name;
-    hash_tables->alias= alias;
-    memcpy(hash_tables->db, tables->db, dblen);
-    memcpy(hash_tables->table_name, tables->table_name, namelen);
-    memcpy(hash_tables->alias, tables->alias, aliaslen);
+    sql_handler->base_data= sql_handler->db.str;  // Free this
+    memcpy(sql_handler->db.str, tables->db, sql_handler->db.length +1);
+    memcpy(sql_handler->table_name.str, tables->table_name,
+           sql_handler->table_name.length+1);
+    memcpy(sql_handler->handler_name.str, tables->alias,
+           sql_handler->handler_name.length +1);
 
     /* add to hash */
-    if (my_hash_insert(&thd->handler_tables_hash, (uchar*) hash_tables))
+    if (my_hash_insert(&thd->handler_tables_hash, (uchar*) sql_handler))
       goto err;
   }
+  else
+  {
+    sql_handler= reopen;
+    sql_handler->reset();
+  }    
+  sql_handler->table= table;
+
+  if (!(sql_handler->lock= get_lock_data(thd, &sql_handler->table, 1,
+                                         GET_LOCK_STORE_LOCKS,
+                                         &write_lock_used)))
+    goto err;
+
+  /* Get a list of all fields for send_fields */
+  thd->set_n_backup_active_arena(&sql_handler->arena, &backup_arena);
+  error= table->fill_item_list(&sql_handler->fields);
+  thd->restore_active_arena(&sql_handler->arena, &backup_arena);
+
+  if (error)
+  {
+    if (reopen)
+      sql_handler= 0;
+    goto err;
+  }
+
+  /* Always read all columns */
+  table->read_set= &table->s->all_set;
+  table->vcol_set= &table->s->all_set;
 
   /*
     If it's a temp table, don't reset table->query_id as the table is
     being used by this handler. Otherwise, no meaning at all.
   */
-  tables->table->open_by_handler= 1;
+  table->open_by_handler= 1;
 
   if (! reopen)
     my_ok(thd);
@@ -336,10 +407,13 @@ bool mysql_ha_open(THD *thd, TABLE_LIST *tables, bool reopen)
   DBUG_RETURN(FALSE);
 
 err:
-  if (hash_tables)
-    my_free((char*) hash_tables, MYF(0));
+  delete sql_handler;
   if (tables->table)
-    mysql_ha_close_table(thd, tables, FALSE);
+  {
+    SQL_HANDLER tmp_sql_handler(thd);
+    tmp_sql_handler.table= tables->table;
+    mysql_ha_close_table(&tmp_sql_handler, FALSE);
+  }
   DBUG_PRINT("exit",("ERROR"));
   DBUG_RETURN(TRUE);
 }
@@ -364,17 +438,17 @@ err:
 
 bool mysql_ha_close(THD *thd, TABLE_LIST *tables)
 {
-  TABLE_LIST    *hash_tables;
+  SQL_HANDLER *handler;
   DBUG_ENTER("mysql_ha_close");
   DBUG_PRINT("enter",("'%s'.'%s' as '%s'",
                       tables->db, tables->table_name, tables->alias));
 
-  if ((hash_tables= (TABLE_LIST*) hash_search(&thd->handler_tables_hash,
-                                              (uchar*) tables->alias,
-                                              strlen(tables->alias) + 1)))
+  if ((handler= (SQL_HANDLER*) hash_search(&thd->handler_tables_hash,
+                                           (uchar*) tables->alias,
+                                           strlen(tables->alias) + 1)))
   {
-    mysql_ha_close_table(thd, hash_tables, FALSE);
-    hash_delete(&thd->handler_tables_hash, (uchar*) hash_tables);
+    mysql_ha_close_table(handler, FALSE);
+    hash_delete(&thd->handler_tables_hash, (uchar*) handler);
   }
   else
   {
@@ -389,6 +463,167 @@ bool mysql_ha_close(THD *thd, TABLE_LIST *tables)
 }
 
 
+/**
+   Finds an open HANDLER table.
+
+   @params name		Name of handler to open
+
+   @return 0 failure
+   @return handler
+*/  
+
+SQL_HANDLER *mysql_ha_find_handler(THD *thd, const char *name)
+{
+  SQL_HANDLER *handler;
+  if ((handler= (SQL_HANDLER*) hash_search(&thd->handler_tables_hash,
+                                           (uchar*) name,
+                                           strlen(name) + 1)))
+  {
+    DBUG_PRINT("info-in-hash",("'%s'.'%s' as '%s' table: %p",
+                               handler->db.str,
+                               handler->table_name.str,
+                               handler->handler_name.str, handler->table));
+    if (!handler->table)
+    {
+      /* The handler table has been closed. Re-open it. */
+      TABLE_LIST tmp;
+      tmp.init_one_table(handler->db.str, handler->table_name.str,
+                         TL_READ);
+      tmp.alias= handler->handler_name.str;
+
+      if (mysql_ha_open(thd, &tmp, handler))
+      {
+        DBUG_PRINT("exit",("reopen failed"));
+        return 0;
+      }
+    }
+  }
+  else
+  {
+    my_error(ER_UNKNOWN_TABLE, MYF(0), name, "HANDLER");
+    return 0;
+  }
+  return handler;
+}
+
+
+/**
+   Check that condition and key name are ok
+
+   @param handler
+   @param mode		Read mode (RFIRST, RNEXT etc...)
+   @param keyname	Key to use.
+   @param key_expr      List of key column values
+   @param cond		Where clause
+   @param in_prepare	If we are in prepare phase (we can't evalute items yet)
+
+   @return 0 ok
+   @return 1 error
+
+   In ok, then values of used key and mode is stored in sql_handler
+*/
+
+static bool
+mysql_ha_fix_cond_and_key(SQL_HANDLER *handler, 
+                          enum enum_ha_read_modes mode, char *keyname,
+                          List<Item> *key_expr,
+                          Item *cond, bool in_prepare)
+{
+  THD *thd= handler->thd;
+  TABLE *table= handler->table;
+  if (cond)
+  {
+    /* This can only be true for temp tables */
+    if (table->query_id != thd->query_id)
+      cond->cleanup();                          // File was reopened
+    if ((!cond->fixed &&
+	 cond->fix_fields(thd, &cond)) || cond->check_cols(1))
+      return 1;
+  }
+
+  if (keyname)
+  {
+    /* Check if same as last keyname. If not, do a full lookup */
+    if (handler->keyno < 0 ||
+        my_strcasecmp(&my_charset_latin1,
+                      keyname,
+                      table->s->key_info[handler->keyno].name))
+    {
+      if ((handler->keyno= find_type(keyname, &table->s->keynames, 1+2)-1)<0)
+      {
+        my_error(ER_KEY_DOES_NOT_EXITS, MYF(0), keyname,
+                 handler->handler_name.str);
+        return 1;
+      }
+    }
+
+    /* Check key parts */
+    if (mode == RKEY)
+    {
+      TABLE *table= handler->table;
+      KEY *keyinfo= table->key_info + handler->keyno;
+      KEY_PART_INFO *key_part= keyinfo->key_part;
+      List_iterator<Item> it_ke(*key_expr);
+      Item *item;
+      key_part_map keypart_map;
+      uint key_len;
+
+      if (key_expr->elements > keyinfo->key_parts)
+      {
+        my_error(ER_TOO_MANY_KEY_PARTS, MYF(0), keyinfo->key_parts);
+        return 1;
+      }
+      for (keypart_map= key_len=0 ; (item=it_ke++) ; key_part++)
+      {
+        my_bitmap_map *old_map;
+	/* note that 'item' can be changed by fix_fields() call */
+        if ((!item->fixed &&
+             item->fix_fields(thd, it_ke.ref())) ||
+	    (item= *it_ke.ref())->check_cols(1))
+          return 1;
+	if (item->used_tables() & ~(RAND_TABLE_BIT | PARAM_TABLE_BIT))
+        {
+          my_error(ER_WRONG_ARGUMENTS,MYF(0),"HANDLER ... READ");
+	  return 1;
+        }
+        if (!in_prepare)
+        {
+          old_map= dbug_tmp_use_all_columns(table, table->write_set);
+          (void) item->save_in_field(key_part->field, 1);
+          dbug_tmp_restore_column_map(table->write_set, old_map);
+        }
+        key_len+= key_part->store_length;
+        keypart_map= (keypart_map << 1) | 1;
+      }
+      handler->keypart_map= keypart_map;
+      handler->key_len= key_len;
+    }
+    else
+    {
+      /*
+        Check if the same index involved.
+        We need to always do this check because we may not have yet
+        called the handler since the last keyno change.
+      */
+      if ((uint) handler->keyno != table->file->get_index())
+      {
+        if (mode == RNEXT)
+          mode= RFIRST;
+        else if (mode == RPREV)
+          mode= RLAST;
+      }
+    }
+  }
+  else if (table->file->inited != handler::RND)
+  {
+    /* Convert RNEXT to RFIRST if we haven't started row scan */
+    if (mode == RNEXT)
+      mode= RFIRST;
+  }
+  handler->mode= mode;                          // Store adjusted mode
+  return 0;
+}
+
 /*
   Read from a HANDLER table.
 
@@ -415,153 +650,76 @@ bool mysql_ha_read(THD *thd, TABLE_LIST *tables,
                    enum ha_rkey_function ha_rkey_mode, Item *cond,
                    ha_rows select_limit_cnt, ha_rows offset_limit_cnt)
 {
-  TABLE_LIST    *hash_tables;
-  TABLE         *table, *backup_open_tables;
-  MYSQL_LOCK    *lock;
+  SQL_HANDLER   *handler;
+  TABLE         *table;
   List<Item>	list;
   Protocol	*protocol= thd->protocol;
   char		buff[MAX_FIELD_WIDTH];
   String	buffer(buff, sizeof(buff), system_charset_info);
-  int           error, keyno= -1;
+  int           error, keyno;
   uint          num_rows;
   uchar		*UNINIT_VAR(key);
-  uint		UNINIT_VAR(key_len);
   bool          need_reopen;
+  List_iterator<Item> it;
   DBUG_ENTER("mysql_ha_read");
   DBUG_PRINT("enter",("'%s'.'%s' as '%s'",
                       tables->db, tables->table_name, tables->alias));
 
-  thd->lex->select_lex.context.resolve_in_table_list_only(tables);
-  list.push_front(new Item_field(&thd->lex->select_lex.context,
-                                 NULL, NULL, "*"));
-  List_iterator<Item> it(list);
-  it++;
 
 retry:
-  if ((hash_tables= (TABLE_LIST*) hash_search(&thd->handler_tables_hash,
-                                              (uchar*) tables->alias,
-                                              strlen(tables->alias) + 1)))
+  if (!(handler= mysql_ha_find_handler(thd, tables->alias)))
+    goto err0;
+
+  table= handler->table;
+  tables->table= table;                         // This is used by fix_fields
+
+  /* save open_tables state */
+  if (handler->lock->lock_count > 0)
   {
-    table= hash_tables->table;
-    DBUG_PRINT("info-in-hash",("'%s'.'%s' as '%s' table: 0x%lx",
-                               hash_tables->db, hash_tables->table_name,
-                               hash_tables->alias, (long) table));
-    if (!table)
+    int lock_error;
+
+    handler->lock->locks[0]->type= handler->lock->locks[0]->org_type;
+    lock_error= mysql_lock_tables(thd, handler->lock, 0, 
+                                  (MYSQL_LOCK_NOTIFY_IF_NEED_REOPEN |
+                                   (handler->table->s->tmp_table ==
+                                    NO_TMP_TABLE ?
+                                    MYSQL_LOCK_NOT_TEMPORARY : 0)),
+                                  &need_reopen);
+    if (need_reopen)
     {
-      /*
-        The handler table has been closed. Re-open it.
-      */
-      if (mysql_ha_open(thd, hash_tables, 1))
+      mysql_ha_close_table(handler, FALSE);
+      if (thd->stmt_arena->is_stmt_execute())
       {
-        DBUG_PRINT("exit",("reopen failed"));
+        /*
+          As we have already sent field list and types to the client, we can't
+          handle any changes in the table format for prepared statements.
+          Better to force a reprepare.
+        */
+        my_error(ER_NEED_REPREPARE, MYF(0));
         goto err0;
       }
 
-      table= hash_tables->table;
-      DBUG_PRINT("info",("re-opened '%s'.'%s' as '%s' tab %p",
-                         hash_tables->db, hash_tables->table_name,
-                         hash_tables->alias, table));
-    }
-
-#if MYSQL_VERSION_ID < 40100
-    if (*tables->db && strcmp(table->table_cache_key, tables->db))
-    {
-      DBUG_PRINT("info",("wrong db"));
-      table= NULL;
+      /*
+        The lock might have been aborted, we need to manually reset
+        thd->some_tables_deleted because handler's tables are closed
+        in a non-standard way. Otherwise we might loop indefinitely.
+      */
+      thd->some_tables_deleted= 0;
+      goto retry;
     }
-#endif
-  }
-  else
-    table= NULL;
-
-  if (!table)
-  {
-#if MYSQL_VERSION_ID < 40100
-    char buff[MAX_DBKEY_LENGTH];
-    if (*tables->db)
-      strxnmov(buff, sizeof(buff)-1, tables->db, ".", tables->table_name,
-               NullS);
-    else
-      strncpy(buff, tables->alias, sizeof(buff));
-    my_error(ER_UNKNOWN_TABLE, MYF(0), buff, "HANDLER");
-#else
-    my_error(ER_UNKNOWN_TABLE, MYF(0), tables->alias, "HANDLER");
-#endif
-    goto err0;
-  }
-  tables->table=table;
-
-  /* save open_tables state */
-  backup_open_tables= thd->open_tables;
-  /*
-    mysql_lock_tables() needs thd->open_tables to be set correctly to
-    be able to handle aborts properly. When the abort happens, it's
-    safe to not protect thd->handler_tables because it won't close any
-    tables.
-  */
-  thd->open_tables= thd->handler_tables;
-
-  lock= mysql_lock_tables(thd, &tables->table, 1,
-                          MYSQL_LOCK_NOTIFY_IF_NEED_REOPEN, &need_reopen);
-
-  /* restore previous context */
-  thd->open_tables= backup_open_tables;
-
-  if (need_reopen)
-  {
-    mysql_ha_close_table(thd, hash_tables, FALSE);
-    /*
-      The lock might have been aborted, we need to manually reset
-      thd->some_tables_deleted because handler's tables are closed
-      in a non-standard way. Otherwise we might loop indefinitely.
-    */
-    thd->some_tables_deleted= 0;
-    goto retry;
-  }
-
-  if (!lock)
-    goto err0; // mysql_lock_tables() printed error message already
 
-  // Always read all columns
-  tables->table->read_set= &tables->table->s->all_set;
-
-  if (cond)
-  {
-    if (table->query_id != thd->query_id)
-      cond->cleanup();                          // File was reopened
-    if ((!cond->fixed &&
-	 cond->fix_fields(thd, &cond)) || cond->check_cols(1))
-      goto err;
+    if (lock_error)
+      goto err0; // mysql_lock_tables() printed error message already
   }
 
-  if (keyname)
-  {
-    if ((keyno=find_type(keyname, &table->s->keynames, 1+2)-1)<0)
-    {
-      my_error(ER_KEY_DOES_NOT_EXITS, MYF(0), keyname, tables->alias);
-      goto err;
-    }
-    /* Check if the same index involved. */
-    if ((uint) keyno != table->file->get_index())
-    {
-      if (mode == RNEXT)
-        mode= RFIRST;
-      else if (mode == RPREV)
-        mode= RLAST;
-    }
-  }
-  else if (table->file->inited != handler::RND)
-  {
-    /* Convert RNEXT to RFIRST if we haven't started row scan */
-    if (mode == RNEXT)
-      mode= RFIRST;
-  }
-
-  if (insert_fields(thd, &thd->lex->select_lex.context,
-                    tables->db, tables->alias, &it, 0))
+  if (mysql_ha_fix_cond_and_key(handler, mode, keyname, key_expr, cond, 0))
     goto err;
+  mode= handler->mode;
+  keyno= handler->keyno;
 
-  protocol->send_fields(&list, Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF);
+  it.init(handler->fields);
+  protocol->send_fields(&handler->fields,
+                        Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF);
 
   /*
     In ::external_lock InnoDB resets the fields which tell it that
@@ -586,9 +744,7 @@ retry:
           error= table->file->ha_index_next(table->record[0]);
         }
         else
-        {
           error= table->file->ha_rnd_next(table->record[0]);
-        }
         break;
       }
       /* else fall through */
@@ -605,7 +761,7 @@ retry:
 	if (!(error= table->file->ha_rnd_init(1)))
           error= table->file->ha_rnd_next(table->record[0]);
       }
-      mode=RNEXT;
+      mode= RNEXT;
       break;
     case RPREV:
       DBUG_ASSERT(keyname != 0);
@@ -624,54 +780,28 @@ retry:
       table->file->ha_index_or_rnd_end();
       table->file->ha_index_init(keyno, 1);
       error= table->file->ha_index_last(table->record[0]);
-      mode=RPREV;
+      mode= RPREV;
       break;
     case RNEXT_SAME:
       /* Continue scan on "(keypart1,keypart2,...)=(c1, c2, ...)  */
       DBUG_ASSERT(keyname != 0);
-      error= table->file->ha_index_next_same(table->record[0], key, key_len);
+      error= table->file->ha_index_next_same(table->record[0], key,
+                                             handler->key_len);
       break;
     case RKEY:
     {
       DBUG_ASSERT(keyname != 0);
-      KEY *keyinfo=table->key_info+keyno;
-      KEY_PART_INFO *key_part=keyinfo->key_part;
-      if (key_expr->elements > keyinfo->key_parts)
-      {
-	my_error(ER_TOO_MANY_KEY_PARTS, MYF(0), keyinfo->key_parts);
-	goto err;
-      }
-      List_iterator<Item> it_ke(*key_expr);
-      Item *item;
-      key_part_map keypart_map;
-      for (keypart_map= key_len=0 ; (item=it_ke++) ; key_part++)
-      {
-        my_bitmap_map *old_map;
-	// 'item' can be changed by fix_fields() call
-        if ((!item->fixed &&
-             item->fix_fields(thd, it_ke.ref())) ||
-	    (item= *it_ke.ref())->check_cols(1))
-	  goto err;
-	if (item->used_tables() & ~RAND_TABLE_BIT)
-        {
-          my_error(ER_WRONG_ARGUMENTS,MYF(0),"HANDLER ... READ");
-	  goto err;
-        }
-        old_map= dbug_tmp_use_all_columns(table, table->write_set);
-	(void) item->save_in_field(key_part->field, 1);
-        dbug_tmp_restore_column_map(table->write_set, old_map);
-	key_len+=key_part->store_length;
-        keypart_map= (keypart_map << 1) | 1;
-      }
 
-      if (!(key= (uchar*) thd->calloc(ALIGN_SIZE(key_len))))
+      if (!(key= (uchar*) thd->calloc(ALIGN_SIZE(handler->key_len))))
 	goto err;
       table->file->ha_index_or_rnd_end();
       table->file->ha_index_init(keyno, 1);
-      key_copy(key, table->record[0], table->key_info + keyno, key_len);
+      key_copy(key, table->record[0], table->key_info + keyno,
+               handler->key_len);
       error= table->file->ha_index_read_map(table->record[0],
-                                            key, keypart_map, ha_rkey_mode);
-      mode=rkey_to_rnext[(int)ha_rkey_mode];
+                                            key, handler->keypart_map,
+                                            ha_rkey_mode);
+      mode= rkey_to_rnext[(int)ha_rkey_mode];
       break;
     }
     default:
@@ -691,6 +821,7 @@ retry:
                           "table '%s'",
                           error, tables->table_name);
         table->file->print_error(error,MYF(0));
+        table->file->ha_index_or_rnd_end();
         goto err;
       }
       goto ok;
@@ -718,13 +849,13 @@ retry:
     num_rows++;
   }
 ok:
-  mysql_unlock_tables(thd,lock);
+  mysql_unlock_tables(thd, handler->lock, 0);
   my_eof(thd);
   DBUG_PRINT("exit",("OK"));
   DBUG_RETURN(FALSE);
 
 err:
-  mysql_unlock_tables(thd,lock);
+  mysql_unlock_tables(thd, handler->lock, 0);
 err0:
   DBUG_PRINT("exit",("ERROR"));
   DBUG_RETURN(TRUE);
@@ -732,6 +863,28 @@ err0:
 
 
 /**
+   Prepare for handler read
+
+   For parameters, see mysql_ha_read()
+*/
+
+SQL_HANDLER *mysql_ha_read_prepare(THD *thd, TABLE_LIST *tables,
+                                   enum enum_ha_read_modes mode, char *keyname,
+                                   List<Item> *key_expr, Item *cond)
+{
+  SQL_HANDLER *handler;
+  DBUG_ENTER("mysql_ha_read_prepare");
+  if (!(handler= mysql_ha_find_handler(thd, tables->alias)))
+    DBUG_RETURN(0);
+  tables->table= handler->table;         // This is used by fix_fields
+  if (mysql_ha_fix_cond_and_key(handler, mode, keyname, key_expr, cond, 1))
+    DBUG_RETURN(0);
+  DBUG_RETURN(handler);
+}
+
+  
+
+/**
   Scan the handler tables hash for matching tables.
 
   @param thd Thread identifier.
@@ -742,30 +895,32 @@ err0:
           table was matched.
 */
 
-static TABLE_LIST *mysql_ha_find(THD *thd, TABLE_LIST *tables)
+static SQL_HANDLER *mysql_ha_find_match(THD *thd, TABLE_LIST *tables)
 {
-  TABLE_LIST *hash_tables, *head= NULL, *first= tables;
-  DBUG_ENTER("mysql_ha_find");
+  SQL_HANDLER *hash_tables, *head= NULL;
+  TABLE_LIST *first= tables;
+  DBUG_ENTER("mysql_ha_find_match");
 
   /* search for all handlers with matching table names */
   for (uint i= 0; i < thd->handler_tables_hash.records; i++)
   {
-    hash_tables= (TABLE_LIST*) hash_element(&thd->handler_tables_hash, i);
+    hash_tables= (SQL_HANDLER*) hash_element(&thd->handler_tables_hash, i);
+
     for (tables= first; tables; tables= tables->next_local)
     {
       if ((! *tables->db ||
-          ! my_strcasecmp(&my_charset_latin1, hash_tables->db, tables->db)) &&
-          ! my_strcasecmp(&my_charset_latin1, hash_tables->table_name,
+          ! my_strcasecmp(&my_charset_latin1, hash_tables->db.str,
+                          tables->db)) &&
+          ! my_strcasecmp(&my_charset_latin1, hash_tables->table_name.str,
                           tables->table_name))
+      {
+        /* Link into hash_tables list */
+        hash_tables->next= head;
+        head= hash_tables;
         break;
-    }
-    if (tables)
-    {
-      hash_tables->next_local= head;
-      head= hash_tables;
+      }
     }
   }
-
   DBUG_RETURN(head);
 }
 
@@ -782,18 +937,18 @@ static TABLE_LIST *mysql_ha_find(THD *thd, TABLE_LIST *tables)
 
 void mysql_ha_rm_tables(THD *thd, TABLE_LIST *tables, bool is_locked)
 {
-  TABLE_LIST *hash_tables, *next;
+  SQL_HANDLER *hash_tables, *next;
   DBUG_ENTER("mysql_ha_rm_tables");
 
   DBUG_ASSERT(tables);
 
-  hash_tables= mysql_ha_find(thd, tables);
+  hash_tables= mysql_ha_find_match(thd, tables);
 
   while (hash_tables)
   {
-    next= hash_tables->next_local;
+    next= hash_tables->next;
     if (hash_tables->table)
-      mysql_ha_close_table(thd, hash_tables, is_locked);
+      mysql_ha_close_table(hash_tables, is_locked);
     hash_delete(&thd->handler_tables_hash, (uchar*) hash_tables);
     hash_tables= next;
   }
@@ -813,16 +968,16 @@ void mysql_ha_rm_tables(THD *thd, TABLE_LIST *tables, bool is_locked)
 
 void mysql_ha_flush(THD *thd)
 {
-  TABLE_LIST *hash_tables;
+  SQL_HANDLER *hash_tables;
   DBUG_ENTER("mysql_ha_flush");
 
   safe_mutex_assert_owner(&LOCK_open);
 
   for (uint i= 0; i < thd->handler_tables_hash.records; i++)
   {
-    hash_tables= (TABLE_LIST*) hash_element(&thd->handler_tables_hash, i);
+    hash_tables= (SQL_HANDLER*) hash_element(&thd->handler_tables_hash, i);
     if (hash_tables->table && hash_tables->table->needs_reopen_or_name_lock())
-      mysql_ha_close_table(thd, hash_tables, TRUE);
+      mysql_ha_close_table(hash_tables, TRUE);
   }
 
   DBUG_VOID_RETURN;
@@ -839,14 +994,14 @@ void mysql_ha_flush(THD *thd)
 
 void mysql_ha_cleanup(THD *thd)
 {
-  TABLE_LIST *hash_tables;
+  SQL_HANDLER *hash_tables;
   DBUG_ENTER("mysql_ha_cleanup");
 
   for (uint i= 0; i < thd->handler_tables_hash.records; i++)
   {
-    hash_tables= (TABLE_LIST*) hash_element(&thd->handler_tables_hash, i);
+    hash_tables= (SQL_HANDLER*) hash_element(&thd->handler_tables_hash, i);
     if (hash_tables->table)
-      mysql_ha_close_table(thd, hash_tables, FALSE);
+      mysql_ha_close_table(hash_tables, FALSE);
    }
 
   hash_free(&thd->handler_tables_hash);
diff --git a/sql/sql_handler.h b/sql/sql_handler.h
new file mode 100644
index 00000000000..54e72e9f50e
--- /dev/null
+++ b/sql/sql_handler.h
@@ -0,0 +1,61 @@
+/* Copyright (C) 2010 Monty Program Ab
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
+
+#ifdef USE_PRAGMA_INTERFACE
+#pragma interface			/* gcc class implementation */
+#endif
+
+/* Open handlers are stored here */
+
+class SQL_HANDLER {
+public:
+  TABLE *table;
+  List<Item> fields;                            /* Fields, set on open */
+  THD *thd;
+  LEX_STRING handler_name;
+  LEX_STRING db;
+  LEX_STRING table_name;
+  MEM_ROOT mem_root;
+  MYSQL_LOCK *lock;
+
+  key_part_map keypart_map;
+  int keyno;                                    /* Used key */
+  uint key_len;
+  enum enum_ha_read_modes mode;
+
+  /* This is only used when deleting many handler objects */
+  SQL_HANDLER *next;
+
+  Query_arena arena;
+  char *base_data;
+  SQL_HANDLER(THD *thd_arg) :
+    thd(thd_arg), arena(&mem_root, Query_arena::INITIALIZED)
+  { init(); clear_alloc_root(&mem_root); base_data= 0; }
+  void init() { keyno= -1; table= 0; lock= 0; }
+  void reset();
+
+  ~SQL_HANDLER();
+};
+
+bool mysql_ha_open(THD *thd, TABLE_LIST *tables, SQL_HANDLER *reopen);
+bool mysql_ha_close(THD *thd, TABLE_LIST *tables);
+bool mysql_ha_read(THD *, TABLE_LIST *,enum enum_ha_read_modes,char *,
+                   List<Item> *,enum ha_rkey_function,Item *,ha_rows,ha_rows);
+void mysql_ha_flush(THD *thd);
+void mysql_ha_rm_tables(THD *thd, TABLE_LIST *tables, bool is_locked);
+void mysql_ha_cleanup(THD *thd);
+
+SQL_HANDLER *mysql_ha_read_prepare(THD *thd, TABLE_LIST *tables,
+                                   enum enum_ha_read_modes mode, char *keyname,
+                                   List<Item> *key_expr, Item *cond);
diff --git a/sql/sql_help.cc b/sql/sql_help.cc
index a93fd23a57f..7879c3c74b7 100644
--- a/sql/sql_help.cc
+++ b/sql/sql_help.cc
@@ -639,7 +639,7 @@ bool mysqld_help(THD *thd, const char *mask)
   Protocol *protocol= thd->protocol;
   SQL_SELECT *select;
   st_find_field used_fields[array_elements(init_used_fields)];
-  TABLE_LIST *leaves= 0;
+  List<TABLE_LIST> leaves;
   TABLE_LIST tables[4];
   List<String> topics_list, categories_list, subcategories_list;
   String name, description, example;
@@ -678,7 +678,7 @@ bool mysqld_help(THD *thd, const char *mask)
     thd->lex->select_lex.context.first_name_resolution_table= &tables[0];
   if (setup_tables(thd, &thd->lex->select_lex.context,
                    &thd->lex->select_lex.top_join_list,
-                   tables, &leaves, FALSE))
+                   tables, leaves, FALSE, FALSE))
     goto error;
   memcpy((char*) used_fields, (char*) init_used_fields, sizeof(used_fields));
   if (init_fields(thd, tables, used_fields, array_elements(used_fields)))
diff --git a/sql/sql_insert.cc b/sql/sql_insert.cc
index c350d2deeee..a4943d5e13a 100644
--- a/sql/sql_insert.cc
+++ b/sql/sql_insert.cc
@@ -119,7 +119,7 @@ bool check_view_single_update(List<Item> &fields, List<Item> *values,
   {
     it.init(*values);
     while ((item= it++))
-      tables|= item->used_tables();
+      tables|= item->view_used_tables(view);
   }
 
   /* Convert to real table bits */
@@ -135,6 +135,11 @@ bool check_view_single_update(List<Item> &fields, List<Item> *values,
   if (view->check_single_table(&tbl, tables, view) || tbl == 0)
     goto error;
 
+  /*
+    A buffer for the insert values was allocated for the merged view.
+    Use it.
+  */
+  tbl->table->insert_values= view->table->insert_values;
   view->table= tbl->table;
   if (!tbl->single_table_updatable())
   {
@@ -246,6 +251,10 @@ static int check_insert_fields(THD *thd, TABLE_LIST *table_list,
     */
     table_list->next_local= 0;
     context->resolve_in_table_list_only(table_list);
+    /* 'Unfix' fields to allow correct marking by the setup_fields function. */
+    if (table_list->is_view())
+      unfix_fields(fields);
+
     res= setup_fields(thd, 0, fields, MARK_COLUMNS_WRITE, 0, 0);
 
     /* Restore the current context. */
@@ -255,7 +264,7 @@ static int check_insert_fields(THD *thd, TABLE_LIST *table_list,
     if (res)
       return -1;
 
-    if (table_list->effective_algorithm == VIEW_ALGORITHM_MERGE)
+    if (table_list->is_view() && table_list->is_merged_derived())
     {
       if (check_view_single_update(fields, 
                                    fields_and_values_from_different_maps ?
@@ -344,7 +353,8 @@ static int check_update_fields(THD *thd, TABLE_LIST *insert_table_list,
   if (setup_fields(thd, 0, update_fields, MARK_COLUMNS_WRITE, 0, 0))
     return -1;
 
-  if (insert_table_list->effective_algorithm == VIEW_ALGORITHM_MERGE &&
+  if (insert_table_list->is_view() &&
+      insert_table_list->is_merged_derived() &&
       check_view_single_update(update_fields, &update_values,
                                insert_table_list, map, false))
     return -1;
@@ -634,8 +644,7 @@ bool mysql_insert(THD *thd,TABLE_LIST *table_list,
   /*
     We can't write-delayed into a table locked with LOCK TABLES:
     this will lead to a deadlock, since the delayed thread will
-    never be able to get a lock on the table. QQQ: why not
-    upgrade the lock here instead?
+    never be able to get a lock on the table.
   */
   if (table_list->lock_type == TL_WRITE_DELAYED && thd->locked_tables &&
       find_locked_table(thd, table_list->db, table_list->table_name))
@@ -644,6 +653,12 @@ bool mysql_insert(THD *thd,TABLE_LIST *table_list,
              table_list->table_name);
     DBUG_RETURN(TRUE);
   }
+  /*
+    mark the table_list as a target for insert, to skip the DT/view prepare phase 
+    for correct access rights checks
+    TODO: remove this hack
+  */
+  table_list->skip_prepare_derived= TRUE;
 
   if (table_list->lock_type == TL_WRITE_DELAYED)
   {
@@ -655,6 +670,7 @@ bool mysql_insert(THD *thd,TABLE_LIST *table_list,
     if (open_and_lock_tables(thd, table_list))
       DBUG_RETURN(TRUE);
   }
+
   lock_type= table_list->lock_type;
 
   thd_proc_info(thd, "init");
@@ -820,7 +836,12 @@ bool mysql_insert(THD *thd,TABLE_LIST *table_list,
           be overwritten by fill_record() anyway (and fill_record() does not
           use default values in this case).
         */
-        table->record[0][0]= share->default_values[0];
+#ifdef HAVE_valgrind
+        if (table->file->ha_table_flags() && HA_RECORD_MUST_BE_CLEAN_ON_WRITE)
+          restore_record(table,s->default_values);	// Get empty record
+        else
+#endif
+          table->record[0][0]= share->default_values[0];
 
         /* Fix undefined null_bits. */
         if (share->null_bytes > 1 && share->last_null_bit_pos)
@@ -934,7 +955,7 @@ bool mysql_insert(THD *thd,TABLE_LIST *table_list,
 	  thd->clear_error();
 	}
         else
-          errcode= query_error_code(thd, thd->killed == THD::NOT_KILLED);
+          errcode= query_error_code(thd, thd->killed == NOT_KILLED);
         
 	/* bug#22725:
 
@@ -948,7 +969,7 @@ bool mysql_insert(THD *thd,TABLE_LIST *table_list,
 	routines did not result in any error due to the KILLED.  In
 	such case the flag is ignored for constructing binlog event.
 	*/
-	DBUG_ASSERT(thd->killed != THD::KILL_BAD_DATA || error > 0);
+	DBUG_ASSERT(thd->killed != KILL_BAD_DATA || error > 0);
 	if (thd->binlog_query(THD::ROW_QUERY_TYPE,
                               thd->query(), thd->query_length(),
 			      transactional_table, FALSE,
@@ -1013,6 +1034,12 @@ bool mysql_insert(THD *thd,TABLE_LIST *table_list,
     ::my_ok(thd, (ulong) thd->row_count_func, id, buff);
   }
   thd->abort_on_warning= 0;
+  if (thd->lex->current_select->first_cond_optimization)
+  {
+    thd->lex->current_select->save_leaf_tables(thd);
+    thd->lex->current_select->first_cond_optimization= 0;
+  }
+  
   DBUG_RETURN(FALSE);
 
 abort:
@@ -1141,6 +1168,11 @@ static bool mysql_prepare_insert_check_table(THD *thd, TABLE_LIST *table_list,
   bool insert_into_view= (table_list->view != 0);
   DBUG_ENTER("mysql_prepare_insert_check_table");
 
+  if (!table_list->single_table_updatable())
+  {
+    my_error(ER_NON_INSERTABLE_TABLE, MYF(0), table_list->alias, "INSERT");
+    DBUG_RETURN(TRUE);
+  }
   /*
      first table in list is the one we'll INSERT into, requires INSERT_ACL.
      all others require SELECT_ACL only. the ACL requirement below is for
@@ -1151,14 +1183,16 @@ static bool mysql_prepare_insert_check_table(THD *thd, TABLE_LIST *table_list,
   if (setup_tables_and_check_access(thd, &thd->lex->select_lex.context,
                                     &thd->lex->select_lex.top_join_list,
                                     table_list,
-                                    &thd->lex->select_lex.leaf_tables,
-                                    select_insert, INSERT_ACL, SELECT_ACL))
+                                    thd->lex->select_lex.leaf_tables,
+                                    select_insert, INSERT_ACL, SELECT_ACL,
+                                    TRUE))
     DBUG_RETURN(TRUE);
 
   if (insert_into_view && !fields.elements)
   {
     thd->lex->empty_field_list_on_rset= 1;
-    if (!table_list->table)
+    if (!thd->lex->select_lex.leaf_tables.head()->table ||
+        table_list->is_multitable())
     {
       my_error(ER_VIEW_NO_INSERT_FIELD_LIST, MYF(0),
                table_list->view_db.str, table_list->view_name.str);
@@ -1249,6 +1283,12 @@ bool mysql_prepare_insert(THD *thd, TABLE_LIST *table_list,
   /* INSERT should have a SELECT or VALUES clause */
   DBUG_ASSERT (!select_insert || !values);
 
+  if (mysql_handle_derived(thd->lex, DT_INIT))
+    DBUG_RETURN(TRUE); 
+  if (table_list->handle_derived(thd->lex, DT_MERGE_FOR_INSERT))
+    DBUG_RETURN(TRUE); 
+  if (mysql_handle_list_of_derived(thd->lex, table_list, DT_PREPARE))
+    DBUG_RETURN(TRUE); 
   /*
     For subqueries in VALUES() we should not see the table in which we are
     inserting (for INSERT ... SELECT this is done by changing table_list,
@@ -1548,11 +1588,12 @@ int write_record(THD *thd, TABLE *table,COPY_INFO *info)
           else
             error= 0;
           /*
-            If ON DUP KEY UPDATE updates a row instead of inserting one, it's
-            like a regular UPDATE statement: it should not affect the value of a
-            next SELECT LAST_INSERT_ID() or mysql_insert_id().
-            Except if LAST_INSERT_ID(#) was in the INSERT query, which is
-            handled separately by THD::arg_of_last_insert_id_function.
+            If ON DUP KEY UPDATE updates a row instead of inserting
+            one, it's like a regular UPDATE statement: it should not
+            affect the value of a next SELECT LAST_INSERT_ID() or
+            mysql_insert_id().  Except if LAST_INSERT_ID(#) was in the
+            INSERT query, which is handled separately by
+            THD::arg_of_last_insert_id_function.
           */
           insert_id_for_cur_row= table->file->insert_id_for_cur_row= 0;
           trg_error= (table->triggers &&
@@ -1629,11 +1670,12 @@ int write_record(THD *thd, TABLE *table,COPY_INFO *info)
     }
     
     /*
-        If more than one iteration of the above while loop is done, from the second 
-        one the row being inserted will have an explicit value in the autoinc field, 
-        which was set at the first call of handler::update_auto_increment(). This 
-        value is saved to avoid thd->insert_id_for_cur_row becoming 0. Use this saved
-        autoinc value.
+      If more than one iteration of the above while loop is done, from
+      the second one the row being inserted will have an explicit
+      value in the autoinc field, which was set at the first call of
+      handler::update_auto_increment(). This value is saved to avoid
+      thd->insert_id_for_cur_row becoming 0. Use this saved autoinc
+      value.
      */
     if (table->file->insert_id_for_cur_row == 0)
       table->file->insert_id_for_cur_row= insert_id_for_cur_row;
@@ -1673,9 +1715,6 @@ ok_or_after_trg_err:
 
 err:
   info->last_errno= error;
-  /* current_select is NULL if this is a delayed insert */
-  if (thd->lex->current_select)
-    thd->lex->current_select->no_error= 0;        // Give error
   table->file->print_error(error,MYF(0));
   
 before_trg_err:
@@ -1741,10 +1780,11 @@ class delayed_row :public ilink {
 public:
   char *record;
   enum_duplicates dup;
-  time_t start_time;
+  my_time_t start_time;
+  ulong start_time_sec_part;
   ulong sql_mode;
   bool auto_increment_field_not_null;
-  bool query_start_used, ignore, log_query;
+  bool query_start_used, ignore, log_query, query_start_sec_part_used;
   bool stmt_depends_on_first_successful_insert_id_in_prev_stmt;
   ulonglong first_successful_insert_id_in_prev_stmt;
   ulonglong forced_insert_id;
@@ -1981,6 +2021,11 @@ bool delayed_get_table(THD *thd, TABLE_LIST *table_list)
       pthread_mutex_lock(&LOCK_thread_count);
       thread_count++;
       pthread_mutex_unlock(&LOCK_thread_count);
+      /*
+        Annotating delayed inserts is not supported.
+      */
+      di->thd.variables.binlog_annotate_row_events= 0;
+
       di->thd.set_db(table_list->db, (uint) strlen(table_list->db));
       di->thd.set_query(my_strdup(table_list->table_name, MYF(MY_WME)), 0);
       if (di->thd.db == NULL || di->thd.query() == NULL)
@@ -2240,8 +2285,10 @@ int write_delayed(THD *thd, TABLE *table, enum_duplicates duplic,
   if (!(row->record= (char*) my_malloc(table->s->reclength, MYF(MY_WME))))
     goto err;
   memcpy(row->record, table->record[0], table->s->reclength);
-  row->start_time=		thd->start_time;
-  row->query_start_used=	thd->query_start_used;
+  row->start_time=                thd->start_time;
+  row->query_start_used=          thd->query_start_used;
+  row->start_time_sec_part=       thd->start_time_sec_part;
+  row->query_start_sec_part_used= thd->query_start_sec_part_used;
   /*
     those are for the binlog: LAST_INSERT_ID() has been evaluated at this
     time, so record does not need it, but statement-based binlogging of the
@@ -2329,7 +2376,7 @@ void kill_delayed_threads(void)
   Delayed_insert *di;
   while ((di= it++))
   {
-    di->thd.killed= THD::KILL_SYSTEM_THREAD;
+    di->thd.killed= KILL_SYSTEM_THREAD;
     pthread_mutex_lock(&di->thd.LOCK_thd_data);
     if (di->thd.mysys_var)
     {
@@ -2414,8 +2461,7 @@ static void handle_delayed_insert_impl(THD *thd, Delayed_insert *di)
 
   for (;;)
   {
-    if (thd->killed == THD::KILL_CONNECTION ||
-        thd->killed == THD::KILL_SYSTEM_THREAD || thd->killed == THD::KILL_SERVER)
+    if (thd->killed >= KILL_CONNECTION)
     {
       uint lock_count;
       /*
@@ -2463,7 +2509,7 @@ static void handle_delayed_insert_impl(THD *thd, Delayed_insert *di)
 	  break;
 	if (error == ETIMEDOUT || error == ETIME)
 	{
-	  thd->killed= THD::KILL_SYSTEM_THREAD;
+	  thd->killed= KILL_SYSTEM_THREAD;
 	  break;
 	}
       }
@@ -2496,7 +2542,7 @@ static void handle_delayed_insert_impl(THD *thd, Delayed_insert *di)
       {
 	/* Fatal error */
 	di->dead= 1;
-	thd->killed= THD::KILL_SYSTEM_THREAD;
+	thd->killed= KILL_SYSTEM_THREAD;
       }
       pthread_cond_broadcast(&di->cond_client);
     }
@@ -2506,7 +2552,7 @@ static void handle_delayed_insert_impl(THD *thd, Delayed_insert *di)
       {
 	/* Some fatal error */
 	di->dead= 1;
-	thd->killed= THD::KILL_SYSTEM_THREAD;
+	thd->killed= KILL_SYSTEM_THREAD;
       }
     }
     di->status=0;
@@ -2566,7 +2612,7 @@ pthread_handler_t handle_delayed_insert(void *arg)
   thd->thread_id= thd->variables.pseudo_thread_id= thread_id++;
   thd->set_current_time();
   threads.append(thd);
-  thd->killed=abort_loop ? THD::KILL_SYSTEM_THREAD : THD::NOT_KILLED;
+  thd->killed=abort_loop ? KILL_SYSTEM_THREAD : NOT_KILLED;
   pthread_mutex_unlock(&LOCK_thread_count);
 
   /*
@@ -2600,7 +2646,7 @@ end:
 
   di->table=0;
   di->dead= 1;                                  // If error
-  thd->killed= THD::KILL_SYSTEM_THREAD;	        // If error
+  thd->killed= KILL_SYSTEM_THREAD;	        // If error
   pthread_mutex_unlock(&di->mutex);
 
   close_thread_tables(thd);			// Free the table
@@ -2679,7 +2725,7 @@ bool Delayed_insert::handle_inserts(void)
   max_rows= delayed_insert_limit;
   if (thd.killed || table->needs_reopen_or_name_lock())
   {
-    thd.killed= THD::KILL_SYSTEM_THREAD;
+    thd.killed= KILL_SYSTEM_THREAD;
     max_rows= ULONG_MAX;                     // Do as much as possible
   }
 
@@ -2700,6 +2746,8 @@ bool Delayed_insert::handle_inserts(void)
 
     thd.start_time=row->start_time;
     thd.query_start_used=row->query_start_used;
+    thd.start_time_sec_part=row->start_time_sec_part;
+    thd.query_start_sec_part_used=row->query_start_sec_part_used;
     /*
       To get the exact auto_inc interval to store in the binlog we must not
       use values from the previous interval (of the previous rows).
@@ -2790,7 +2838,7 @@ bool Delayed_insert::handle_inserts(void)
       /* if the delayed insert was killed, the killed status is
          ignored while binlogging */
       int errcode= 0;
-      if (thd.killed == THD::NOT_KILLED)
+      if (thd.killed == NOT_KILLED)
         errcode= query_error_code(&thd, TRUE);
       
       /*
@@ -2936,9 +2984,9 @@ bool mysql_insert_select_prepare(THD *thd)
 {
   LEX *lex= thd->lex;
   SELECT_LEX *select_lex= &lex->select_lex;
-  TABLE_LIST *first_select_leaf_table;
   DBUG_ENTER("mysql_insert_select_prepare");
 
+
   /*
     Statement-based replication of INSERT ... SELECT ... LIMIT is not safe
     as order of rows is not defined, so in mixed mode we go to row-based.
@@ -2964,21 +3012,38 @@ bool mysql_insert_select_prepare(THD *thd)
                            &select_lex->where, TRUE, FALSE, FALSE))
     DBUG_RETURN(TRUE);
 
+  DBUG_ASSERT(select_lex->leaf_tables.elements != 0);
+  List_iterator<TABLE_LIST> ti(select_lex->leaf_tables);
+  TABLE_LIST *table;
+  uint insert_tables;
+
+  if (select_lex->first_cond_optimization)
+  {
+    /* Back up leaf_tables list. */
+    Query_arena *arena= thd->stmt_arena, backup;
+    arena= thd->activate_stmt_arena_if_needed(&backup);  // For easier test
+
+    insert_tables= select_lex->insert_tables;
+    while ((table= ti++) && insert_tables--)
+    {
+      select_lex->leaf_tables_exec.push_back(table);
+      table->tablenr_exec= table->table->tablenr;
+      table->map_exec= table->table->map;
+      table->maybe_null_exec= table->table->maybe_null;
+    }
+    if (arena)
+      thd->restore_active_arena(arena, &backup);
+  }
+  ti.rewind();
   /*
     exclude first table from leaf tables list, because it belong to
     INSERT
   */
-  DBUG_ASSERT(select_lex->leaf_tables != 0);
-  lex->leaf_tables_insert= select_lex->leaf_tables;
   /* skip all leaf tables belonged to view where we are insert */
-  for (first_select_leaf_table= select_lex->leaf_tables->next_leaf;
-       first_select_leaf_table &&
-       first_select_leaf_table->belong_to_view &&
-       first_select_leaf_table->belong_to_view ==
-       lex->leaf_tables_insert->belong_to_view;
-       first_select_leaf_table= first_select_leaf_table->next_leaf)
-  {}
-  select_lex->leaf_tables= first_select_leaf_table;
+  insert_tables= select_lex->insert_tables;
+  while ((table= ti++) && insert_tables--)
+    ti.remove();
+
   DBUG_RETURN(FALSE);
 }
 
@@ -3021,8 +3086,6 @@ select_insert::prepare(List<Item> &values, SELECT_LEX_UNIT *u)
   */
   lex->current_select= &lex->select_lex;
 
-  /* Errors during check_insert_fields() should not be ignored. */
-  lex->current_select->no_error= FALSE;
   res= (setup_fields(thd, 0, values, MARK_COLUMNS_READ, 0, 0) ||
         check_insert_fields(thd, table_list, *fields, values,
                             !insert_into_view, 1, &map));
@@ -3195,7 +3258,7 @@ void select_insert::cleanup()
 select_insert::~select_insert()
 {
   DBUG_ENTER("~select_insert");
-  if (table)
+  if (table && table->created)
   {
     table->next_number_field=0;
     table->auto_increment_field_not_null= FALSE;
@@ -3302,7 +3365,7 @@ bool select_insert::send_eof()
   bool const trans_table= table->file->has_transactions();
   ulonglong id;
   bool changed;
-  THD::killed_state killed_status= thd->killed;
+  killed_state killed_status= thd->killed;
   DBUG_ENTER("select_insert::send_eof");
   DBUG_PRINT("enter", ("trans_table=%d, table_type='%s'",
                        trans_table, table->file->table_type()));
@@ -3337,7 +3400,7 @@ bool select_insert::send_eof()
     if (!error)
       thd->clear_error();
     else
-      errcode= query_error_code(thd, killed_status == THD::NOT_KILLED);
+      errcode= query_error_code(thd, killed_status == NOT_KILLED);
 
     if (write_to_binlog(trans_table, errcode))
     {
@@ -3411,7 +3474,7 @@ void select_insert::abort() {
     {
         if (mysql_bin_log.is_open())
         {
-          int errcode= query_error_code(thd, thd->killed == THD::NOT_KILLED);
+          int errcode= query_error_code(thd, thd->killed == NOT_KILLED);
           /* error of writing binary log is ignored */
           write_to_binlog(transactional_table, errcode);
         }
@@ -3597,9 +3660,6 @@ static TABLE *create_table_from_items(THD *thd, HA_CREATE_INFO *create_info,
 
   tmp_table.s->db_create_options=0;
   tmp_table.s->blob_ptr_size= portable_sizeof_char_ptr;
-  tmp_table.s->db_low_byte_first= 
-        test(create_info->db_type == myisam_hton ||
-             create_info->db_type == heap_hton);
   tmp_table.null_row= 0;
   tmp_table.maybe_null= 0;
 
@@ -3775,7 +3835,7 @@ select_create::prepare(List<Item> &values, SELECT_LEX_UNIT *u)
           !table->s->tmp_table &&
           !ptr->get_create_info()->table_existed)
       {
-        int errcode= query_error_code(thd, thd->killed == THD::NOT_KILLED);
+        int errcode= query_error_code(thd, thd->killed == NOT_KILLED);
         if (int error= ptr->binlog_show_create_table(tables, count, errcode))
           return error;
       }
@@ -3818,7 +3878,7 @@ select_create::prepare(List<Item> &values, SELECT_LEX_UNIT *u)
                           create_table->table_name);
       if (thd->current_stmt_binlog_row_based)
       {
-        int errcode= query_error_code(thd, thd->killed == THD::NOT_KILLED);
+        int errcode= query_error_code(thd, thd->killed == NOT_KILLED);
         binlog_show_create_table(&(create_table->table), 1, errcode);
       }
       table= create_table->table;
diff --git a/sql/sql_join_cache.cc b/sql/sql_join_cache.cc
new file mode 100644
index 00000000000..a960c63e782
--- /dev/null
+++ b/sql/sql_join_cache.cc
@@ -0,0 +1,4635 @@
+/* Copyright (C) 2000-2006 MySQL AB
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
+
+/**
+  @file
+
+  @brief
+  join cache optimizations
+
+  @defgroup Query_Optimizer  Query Optimizer
+  @{
+*/
+
+#ifdef USE_PRAGMA_IMPLEMENTATION
+#pragma implementation				// gcc: Class implementation
+#endif
+
+#include "mysql_priv.h"
+#include "sql_select.h"
+#include "opt_subselect.h"
+
+#define NO_MORE_RECORDS_IN_BUFFER  (uint)(-1)
+
+static void save_or_restore_used_tabs(JOIN_TAB *join_tab, bool save);
+
+/*****************************************************************************
+ *  Join cache module
+******************************************************************************/
+
+/* 
+  Fill in the descriptor of a flag field associated with a join cache    
+
+  SYNOPSIS
+    add_field_flag_to_join_cache()
+      str           position in a record buffer to copy the field from/to
+      length        length of the field 
+      field  IN/OUT pointer to the field descriptor to fill in 
+
+  DESCRIPTION
+    The function fill in the descriptor of a cache flag field to which
+    the parameter 'field' points to. The function uses the first two
+    parameters to set the position in the record buffer from/to which 
+    the field value is to be copied and the length of the copied fragment. 
+    Before returning the result the function increments the value of
+    *field by 1.
+    The function ignores the fields 'blob_length' and 'ofset' of the
+    descriptor.
+
+  RETURN VALUE
+    the length of the field  
+*/
+
+static
+uint add_flag_field_to_join_cache(uchar *str, uint length, CACHE_FIELD **field)
+{
+  CACHE_FIELD *copy= *field;
+  copy->str= str;
+  copy->length= length;
+  copy->type= 0;
+  copy->field= 0;
+  copy->referenced_field_no= 0;
+  (*field)++;
+  return length;    
+}
+
+
+/* 
+  Fill in the descriptors of table data fields associated with a join cache    
+
+  SYNOPSIS
+    add_table_data_fields_to_join_cache()
+      tab              descriptors of fields from this table are to be filled
+      field_set        descriptors for only these fields are to be created
+      field_cnt IN/OUT     counter of data fields  
+      descr  IN/OUT        pointer to the first descriptor to be filled
+      field_ptr_cnt IN/OUT counter of pointers to the data fields
+      descr_ptr IN/OUT     pointer to the first pointer to blob descriptors 
+
+  DESCRIPTION
+    The function fills in the descriptors of cache data fields from the table
+    'tab'. The descriptors are filled only for the fields marked in the 
+    bitmap 'field_set'. 
+    The function fills the descriptors starting from the position pointed
+    by 'descr'. If an added field is of a BLOB type then a pointer to the 
+    its descriptor is added to the array descr_ptr.   
+    At the return 'descr' points to the position after the last added
+    descriptor  while 'descr_ptr' points to the position right after the
+    last added pointer.  
+
+  RETURN VALUE
+    the total length of the added fields  
+*/
+
+static
+uint add_table_data_fields_to_join_cache(JOIN_TAB *tab, 
+                                         MY_BITMAP *field_set,
+                                         uint *field_cnt, 
+                                         CACHE_FIELD **descr,
+                                         uint *field_ptr_cnt,
+                                         CACHE_FIELD ***descr_ptr)
+{
+  Field **fld_ptr;
+  uint len= 0;
+  CACHE_FIELD *copy= *descr;
+  CACHE_FIELD **copy_ptr= *descr_ptr;
+  uint used_fields= bitmap_bits_set(field_set);
+  for (fld_ptr= tab->table->field; used_fields; fld_ptr++)
+  {
+    if (bitmap_is_set(field_set, (*fld_ptr)->field_index))
+    {
+      len+= (*fld_ptr)->fill_cache_field(copy);
+      if (copy->type == CACHE_BLOB)
+      {
+        *copy_ptr= copy;
+        copy_ptr++;
+        (*field_ptr_cnt)++;
+      }
+      copy->field= *fld_ptr;
+      copy->referenced_field_no= 0;
+      copy++;
+      (*field_cnt)++;
+      used_fields--;
+    }
+  }
+  *descr= copy;
+  *descr_ptr= copy_ptr;
+  return len;
+}
+
+/* 
+  Determine different counters of fields associated with a record in the cache  
+
+  SYNOPSIS
+    calc_record_fields()
+
+  DESCRIPTION
+    The function counts the number of total fields stored in a record
+    of the cache and saves this number in the 'fields' member. It also
+    determines the number of flag fields and the number of blobs.
+    The function sets 'with_match_flag' on if 'join_tab' needs a match flag
+    i.e. if it is the first inner table of an outer join or a semi-join.  
+
+  RETURN VALUE
+    none 
+*/
+
+void JOIN_CACHE::calc_record_fields()
+{
+  JOIN_TAB *tab;
+
+  if (prev_cache)
+    tab= prev_cache->join_tab;
+  else
+  {
+    if (join_tab->bush_root_tab)
+    {
+      /* 
+        --ot1--SJM1--------------ot2--...
+                |
+                |
+                +-it1--...--itN
+                        ^____________ this->join_tab is somewhere here, 
+                                      inside an sjm nest.
+
+        The join buffer should store the values of it1.*, it2.*, ..
+        It should not store values of ot1.*.
+      */
+      tab= join_tab->bush_root_tab->bush_children->start;
+    }
+    else
+    {
+      /*
+        -ot1--ot2--SJM1--SJM2--------------ot3--...--otN
+                    |     |                      ^   
+                    |     +-it21--...--it2N      |
+                    |                            \-- we're somewhere here,
+                    +-it11--...--it1N                at the top level
+        
+        The join buffer should store the values of 
+
+          ot1.*, ot2.*, it1{i}, it2{j}.*, ot3.*, ...
+        
+        that is, we should start from the first non-const top-level table. 
+
+        We will need to store columns of SJ-inner tables (it_X_Y.*), but we're
+        not interested in storing the columns of materialization tables
+        themselves. Beause of that, if the first non-const top-level table is a
+        materialized table, we move to its bush_children:
+      */
+      tab= join->join_tab + join->const_tables;
+      if (tab->bush_children)
+        tab= tab->bush_children->start;
+    }
+  }
+  DBUG_ASSERT(!tab->bush_children);
+
+  start_tab= tab;
+  fields= 0;
+  blobs= 0;
+  flag_fields= 0;
+  data_field_count= 0;
+  data_field_ptr_count= 0;
+  referenced_fields= 0;
+
+  /*
+    The following loop will get inside SJM nests, because data may be unpacked
+    to sjm-inner tables.
+  */
+  for (; tab != join_tab ; tab= next_linear_tab(join, tab, WITHOUT_BUSH_ROOTS))
+  {	    
+    tab->calc_used_field_length(FALSE);
+    flag_fields+= test(tab->used_null_fields || tab->used_uneven_bit_fields);
+    flag_fields+= test(tab->table->maybe_null);
+    fields+= tab->used_fields;
+    blobs+= tab->used_blobs;
+  }
+  if ((with_match_flag= join_tab->use_match_flag()))
+    flag_fields++;
+  fields+= flag_fields;
+}
+
+
+/* 
+  Collect information on join key arguments  
+
+  SYNOPSIS
+    collect_info_on_key_args()
+
+  DESCRIPTION
+    The function traverses the ref expressions that are used to access the
+    joined table join_tab. For each table 'tab' whose fields are to be stored
+    in the join buffer of the cache the function finds the fields from 'tab'
+    that occur in the ref expressions and marks these fields in the bitmap
+    tab->table->tmp_set. The function counts the number of them stored
+    in this cache and the total number of them stored in the previous caches
+    and saves the results of the counting in 'local_key_arg_fields' and
+    'external_key_arg_fields' respectively.
+
+  NOTES
+    The function does not do anything if no key is used to join the records
+    from join_tab.
+    
+  RETURN VALUE
+    none 
+*/  
+
+void JOIN_CACHE::collect_info_on_key_args()
+{
+  JOIN_TAB *tab;
+  JOIN_CACHE *cache;
+  local_key_arg_fields= 0;
+  external_key_arg_fields= 0;
+
+  if (!is_key_access())
+    return;
+
+  TABLE_REF *ref= &join_tab->ref;
+  cache= this;
+  do
+  {
+    for (tab= cache->start_tab; tab != cache->join_tab;
+         tab= next_linear_tab(join, tab, WITHOUT_BUSH_ROOTS))
+    { 
+      uint key_args;
+      bitmap_clear_all(&tab->table->tmp_set);
+      for (uint i= 0; i < ref->key_parts; i++)
+      {
+        Item *ref_item= ref->items[i]; 
+        if (!(tab->table->map & ref_item->used_tables()))
+	  continue;
+	 ref_item->walk(&Item::add_field_to_set_processor, 1,
+                        (uchar *) tab->table);
+      }
+      if ((key_args= bitmap_bits_set(&tab->table->tmp_set)))
+      {
+        if (cache == this)
+          local_key_arg_fields+= key_args;
+        else
+          external_key_arg_fields+= key_args;
+      }
+    }
+    cache= cache->prev_cache;
+  } 
+  while (cache);
+
+  return;
+}
+
+
+/* 
+  Allocate memory for descriptors and pointers to them associated with the cache  
+
+  SYNOPSIS
+    alloc_fields()
+
+  DESCRIPTION
+    The function allocates memory for the array of fields descriptors
+    and the array of pointers to the field descriptors used to copy
+    join record data from record buffers into the join buffer and
+    backward. Some pointers refer to the field descriptor associated
+    with previous caches. They are placed at the beginning of the array
+    of pointers and its total number is stored in external_key_arg_fields.
+    The pointer of the first array is assigned to field_descr and the number
+    of the elements in it is precalculated by the function calc_record_fields. 
+    The allocated arrays are adjacent.
+  
+  NOTES
+    The memory is allocated in join->thd->memroot
+
+  RETURN VALUE
+    pointer to the first array  
+*/
+
+int JOIN_CACHE::alloc_fields()
+{
+  uint ptr_cnt= external_key_arg_fields+blobs+1;
+  uint fields_size= sizeof(CACHE_FIELD)*fields;
+  field_descr= (CACHE_FIELD*) sql_alloc(fields_size +
+                                        sizeof(CACHE_FIELD*)*ptr_cnt);
+  blob_ptr= (CACHE_FIELD **) ((uchar *) field_descr + fields_size);
+  return (field_descr == NULL);
+}  
+
+
+/* 
+  Create descriptors of the record flag fields stored in the join buffer 
+
+  SYNOPSIS
+    create_flag_fields()
+
+  DESCRIPTION
+    The function creates descriptors of the record flag fields stored
+    in the join buffer. These are descriptors for:
+    - an optional match flag field,
+    - table null bitmap fields, 
+    - table null row fields.
+    The match flag field is created when 'join_tab' is the first inner
+    table of an outer join our a semi-join. A null bitmap field is
+    created for any table whose fields are to be stored in the join
+    buffer if at least one of these fields is nullable or is a BIT field
+    whose bits are partially stored with null bits. A null row flag
+    is created for any table assigned to the cache if it is an inner
+    table of an outer join.
+    The descriptor for flag fields are placed one after another at the
+    beginning of the array of field descriptors 'field_descr' that
+    contains 'fields' elements. If there is a match flag field the 
+    descriptor for it is always first in the sequence of flag fields.
+    The descriptors for other flag fields can follow in an arbitrary
+    order. 
+    The flag field values follow in a record stored in the join buffer
+    in the same order as field descriptors, with the match flag always
+    following first.
+    The function sets the value of 'flag_fields' to the total number
+    of the descriptors created for the flag fields.
+    The function sets the value of 'length' to the total length of the
+    flag fields.
+  
+  RETURN VALUE
+    none
+*/
+
+void JOIN_CACHE::create_flag_fields()
+{
+  CACHE_FIELD *copy;
+  JOIN_TAB *tab;
+
+  copy= field_descr;
+
+  length=0;
+
+  /* If there is a match flag the first field is always used for this flag */ 
+  if (with_match_flag)
+    length+= add_flag_field_to_join_cache((uchar*) &join_tab->found,
+                                          sizeof(join_tab->found),
+	                                  &copy);
+
+  /* Create fields for all null bitmaps and null row flags that are needed */
+  for (tab= start_tab; tab != join_tab; 
+       tab= next_linear_tab(join, tab, WITHOUT_BUSH_ROOTS))
+  {
+    TABLE *table= tab->table;
+
+    /* Create a field for the null bitmap from table if needed */
+    if (tab->used_null_fields || tab->used_uneven_bit_fields)			    
+      length+= add_flag_field_to_join_cache(table->null_flags,
+                                            table->s->null_bytes,
+                                            &copy);
+ 
+    /* Create table for the null row flag if needed */
+    if (table->maybe_null)
+      length+= add_flag_field_to_join_cache((uchar*) &table->null_row,
+                                            sizeof(table->null_row),
+                                            &copy);
+  }
+
+  /* Theoretically the new value of flag_fields can be less than the old one */   
+  flag_fields= copy-field_descr;
+}
+
+
+/* 
+  Create descriptors of the fields used to build access keys to the joined table
+
+  SYNOPSIS
+    create_key_arg_fields()
+
+  DESCRIPTION
+    The function creates descriptors of the record fields stored in the join
+    buffer that are used to build access keys to the joined table. These
+    fields are put into the buffer ahead of other records fields stored in
+    the buffer. Such placement helps to optimize construction of access keys.
+    For each field that is used to build access keys to the joined table but
+    is stored in some other join cache buffer the function saves a pointer
+    to the the field descriptor. The array of such pointers are placed in the
+    the join cache structure just before the array of pointers to the
+    blob fields blob_ptr.
+    Any field stored in a join cache buffer that is used to construct keys
+    to access tables associated with other join caches is called a referenced
+    field. It receives a unique number that is saved by the function in the
+    member 'referenced_field_no' of the CACHE_FIELD descriptor for the field.
+    This number is used as index to the array of offsets to the referenced
+    fields that are saved and put in the join cache buffer after all record
+    fields.
+    The function also finds out whether that the keys to access join_tab
+    can be considered as embedded and, if so, sets the flag 'use_emb_key' in
+    this join cache appropriately. 
+     
+  NOTES.
+    When a key to access the joined table 'join_tab' is constructed the array
+    of pointers to the field descriptors for the external fields is looked
+    through. For each of this pointers we find out in what previous key cache
+    the referenced field is stored. The value of 'referenced_field_no'
+    provides us with the index into the array of offsets for referenced 
+    fields stored in the join cache. The offset read by the the index allows
+    us to read the field without reading all other fields of the record 
+    stored the join cache buffer. This optimizes the construction of keys
+    to access 'join_tab' when some key arguments are stored in the previous
+    join caches.  
+
+  NOTES
+    The function does not do anything if no key is used to join the records
+    from join_tab.
+ 
+  RETURN VALUE
+    none
+*/
+void JOIN_CACHE::create_key_arg_fields()
+{
+  JOIN_TAB *tab;
+  JOIN_CACHE *cache;
+
+  if (!is_key_access())
+    return;
+
+  /* 
+    Save pointers to the cache fields in previous caches
+    that  are used to build keys for this key access.
+  */
+  cache= this;
+  uint ext_key_arg_cnt= external_key_arg_fields;
+  CACHE_FIELD *copy;
+  CACHE_FIELD **copy_ptr= blob_ptr;
+  while (ext_key_arg_cnt)
+  {
+    cache= cache->prev_cache;
+    for (tab= cache->start_tab; tab != cache->join_tab; 
+         tab= next_linear_tab(join, tab, WITHOUT_BUSH_ROOTS))
+    { 
+      CACHE_FIELD *copy_end;
+      MY_BITMAP *key_read_set= &tab->table->tmp_set;
+      /* key_read_set contains the bitmap of tab's fields referenced by ref */ 
+      if (bitmap_is_clear_all(key_read_set))
+        continue;
+      copy_end= cache->field_descr+cache->fields;
+      for (copy= cache->field_descr+cache->flag_fields; copy < copy_end; copy++)
+      {
+        /*
+          (1) - when we store rowids for DuplicateWeedout, they have
+                copy->field==NULL
+        */
+        if (copy->field &&  // (1)
+            copy->field->table == tab->table &&
+            bitmap_is_set(key_read_set, copy->field->field_index))
+        {
+          *copy_ptr++= copy; 
+          ext_key_arg_cnt--;
+          if (!copy->referenced_field_no)
+          {
+            /* 
+              Register the referenced field 'copy': 
+              - set the offset number in copy->referenced_field_no,
+              - adjust the value of the flag 'with_length',
+              - adjust the values of 'pack_length' and 
+                of 'pack_length_with_blob_ptrs'.
+	    */
+            copy->referenced_field_no= ++cache->referenced_fields;
+            if (!cache->with_length)
+            {
+              cache->with_length= TRUE;
+              uint sz= cache->get_size_of_rec_length();
+              cache->base_prefix_length+= sz;
+              cache->pack_length+= sz;
+              cache->pack_length_with_blob_ptrs+= sz;
+            }
+	    cache->pack_length+= cache->get_size_of_fld_offset();
+            cache->pack_length_with_blob_ptrs+= cache->get_size_of_fld_offset();
+          }        
+        }
+      }
+    } 
+  }
+  /* After this 'blob_ptr' shall not be be changed */ 
+  blob_ptr= copy_ptr;
+  
+  /* Now create local fields that are used to build ref for this key access */
+  copy= field_descr+flag_fields;
+  for (tab= start_tab; tab != join_tab; 
+       tab= next_linear_tab(join, tab, WITHOUT_BUSH_ROOTS))
+  {
+    length+= add_table_data_fields_to_join_cache(tab, &tab->table->tmp_set,
+                                                 &data_field_count, &copy,
+                                                 &data_field_ptr_count, 
+                                                 &copy_ptr);
+  }
+
+  use_emb_key= check_emb_key_usage();
+
+  return;
+}
+
+
+/* 
+  Create descriptors of all remaining data fields stored in the join buffer    
+
+  SYNOPSIS
+    create_remaining_fields()
+
+  DESCRIPTION
+    The function creates descriptors for all remaining data fields of a
+    record from the join buffer. If the value returned by is_key_access() is
+    false the function creates fields for all read record fields that
+    comprise the partial join record joined with join_tab. Otherwise, 
+    for each table tab, the set of the read fields for which the descriptors
+    have to be added is determined as the difference between all read fields
+    and and those for which the descriptors have been already created.
+    The latter are supposed to be marked in the bitmap tab->table->tmp_set.
+    The function increases the value of 'length' to the the total length of
+    the added fields.
+   
+  NOTES
+    If is_key_access() returns true the function modifies the value of
+    tab->table->tmp_set for a each table whose fields are stored in the cache.
+    The function calls the method Field::fill_cache_field to figure out
+    the type of the cache field and the maximal length of its representation
+    in the join buffer. If this is a blob field then additionally a pointer
+    to this field is added as an element of the array blob_ptr. For a blob
+    field only the size of the length of the blob data is taken into account.
+    It is assumed that 'data_field_count' contains the number of descriptors
+    for data fields that have been already created and 'data_field_ptr_count'
+    contains the number of the pointers to such descriptors having been
+    stored up to the moment.
+
+  RETURN VALUE
+    none 
+*/
+
+void JOIN_CACHE::create_remaining_fields()
+{
+  JOIN_TAB *tab;
+  bool all_read_fields= !is_key_access();
+  CACHE_FIELD *copy= field_descr+flag_fields+data_field_count;
+  CACHE_FIELD **copy_ptr= blob_ptr+data_field_ptr_count;
+
+  for (tab= start_tab; tab != join_tab; 
+       tab= next_linear_tab(join, tab, WITHOUT_BUSH_ROOTS))
+  {
+    MY_BITMAP *rem_field_set;
+    TABLE *table= tab->table;
+
+    if (all_read_fields)
+      rem_field_set= table->read_set;
+    else
+    {
+      bitmap_invert(&table->tmp_set);
+      bitmap_intersect(&table->tmp_set, table->read_set);
+      rem_field_set= &table->tmp_set;
+    }  
+
+    length+= add_table_data_fields_to_join_cache(tab, rem_field_set,
+                                                 &data_field_count, &copy,
+                                                 &data_field_ptr_count,
+                                                 &copy_ptr);
+  
+    /* SemiJoinDuplicateElimination: allocate space for rowid if needed */
+    if (tab->keep_current_rowid)
+    {
+      copy->str= table->file->ref;
+      if (copy->str)
+        copy->length= table->file->ref_length;
+      else
+      {
+        /* This may happen only for materialized derived tables and views */
+        copy->length= 0;
+        copy->str= (uchar *) table;
+      } 
+      copy->type= CACHE_ROWID;
+      copy->field= 0;
+      copy->referenced_field_no= 0;
+      /* 
+        Note: this may seem odd, but at this point we have
+        table->file->ref==NULL while table->file->ref_length is already set 
+        to correct value.
+      */
+      length += table->file->ref_length;
+      data_field_count++;
+      copy++;
+    }
+  }
+}
+
+
+
+/* 
+  Calculate and set all cache constants      
+
+  SYNOPSIS
+    set_constants()
+
+  DESCRIPTION
+    The function calculates and set all precomputed constants that are used
+    when writing records into the join buffer and reading them from it.
+    It calculates the size of offsets of a record within the join buffer
+    and of a field within a record. It also calculates the number of bytes
+    used to store record lengths.
+    The function also calculates the maximal length of the representation
+    of record in the cache excluding blob_data. This value is used when
+    making a dicision whether more records should be added into the join
+    buffer or not.
+  
+  RETURN VALUE
+    none 
+*/
+
+void JOIN_CACHE::set_constants()
+{ 
+  /* 
+    Any record from a BKA cache is prepended with the record length.
+    We use the record length when reading the buffer and building key values
+    for each record. The length allows us not to read the fields that are
+    not needed for keys.
+    If a record has match flag it also may be skipped when the match flag
+    is on. It happens if the cache is used for a semi-join operation or
+    for outer join when the 'not exist' optimization can be applied.
+    If some of the fields are referenced from other caches then
+    the record length allows us to easily reach the saved offsets for
+    these fields since the offsets are stored at the very end of the record.
+    However at this moment we don't know whether we have referenced fields for
+    the cache or not. Later when a referenced field is registered for the cache
+    we adjust the value of the flag 'with_length'.
+  */ 
+  with_length= is_key_access() || 
+               join_tab->is_inner_table_of_semi_join_with_first_match() ||
+               join_tab->is_inner_table_of_outer_join();
+  /* 
+     At this moment we don't know yet the value of 'referenced_fields',
+     but in any case it can't be greater than the value of 'fields'.
+  */
+  uint len= length + fields*sizeof(uint)+blobs*sizeof(uchar *) +
+            (prev_cache ? prev_cache->get_size_of_rec_offset() : 0) +
+            sizeof(ulong);
+  buff_size= max(join->thd->variables.join_buff_size, 2*len);
+  size_of_rec_ofs= offset_size(buff_size);
+  size_of_rec_len= blobs ? size_of_rec_ofs : offset_size(len); 
+  size_of_fld_ofs= size_of_rec_len;
+  base_prefix_length= (with_length ? size_of_rec_len : 0) +
+                      (prev_cache ? prev_cache->get_size_of_rec_offset() : 0);
+  /* 
+    The size of the offsets for referenced fields will be added later.
+    The values of 'pack_length' and 'pack_length_with_blob_ptrs' are adjusted
+    every time when the first reference to the referenced field is registered.
+  */
+  pack_length= (with_length ? size_of_rec_len : 0) +
+               (prev_cache ? prev_cache->get_size_of_rec_offset() : 0) + 
+               length;
+  pack_length_with_blob_ptrs= pack_length + blobs*sizeof(uchar *);
+}
+
+
+/* 
+  Get maximum total length of all affixes of a record in the join cache buffer
+
+  SYNOPSIS
+    get_record_max_affix_length()
+
+  DESCRIPTION
+    The function calculates the maximum possible total length of all affixes
+    of a record in the join cache buffer, that is made of:
+      - the length of all prefixes used in this cache,
+      - the length of the match flag if it's needed
+      - the total length of the maximum possible offsets to the fields of
+        a record in the buffer.
+
+  RETURN VALUE
+    The maximum total length of all affixes of a record in the join buffer  
+*/ 
+     
+uint JOIN_CACHE::get_record_max_affix_length()
+{
+  uint len= get_prefix_length() +
+            test(with_match_flag) + 
+            size_of_fld_ofs * data_field_count;
+  return len;
+}
+
+
+/* 
+  Get the minimum possible size of the cache join buffer 
+
+  SYNOPSIS
+    get_min_join_buffer_size()
+
+  DESCRIPTION
+    At the first its invocation for the cache the function calculates the
+    minimum possible size of the join buffer of the cache. This value depends
+    on the minimal number of records 'min_records' to be stored in the join
+    buffer. The number is supposed to be determined by the procedure that 
+    chooses the best access path to the joined table join_tab in the execution
+    plan. After the calculation of the interesting size the function saves it
+    in the field 'min_buff_size' in order to use it directly at the next     
+    invocations of the function.
+
+  NOTES
+    Currently the number of minimal records is just set to 1.
+
+  RETURN VALUE
+    The minimal possible size of the join buffer of this cache 
+*/
+
+ulong JOIN_CACHE::get_min_join_buffer_size()
+{
+  if (!min_buff_size)
+  {
+    size_t len= 0;
+    for (JOIN_TAB *tab= start_tab; tab != join_tab; 
+         tab= next_linear_tab(join, tab, WITHOUT_BUSH_ROOTS))
+    {
+      len+= tab->get_max_used_fieldlength();
+    }
+    len+= get_record_max_affix_length() + get_max_key_addon_space_per_record();  
+    size_t min_sz= len*min_records;
+    size_t add_sz= 0;
+    for (uint i=0; i < min_records; i++)
+      add_sz+= join_tab_scan->aux_buffer_incr(i+1);
+    avg_aux_buffer_incr= add_sz/min_records;
+    min_sz+= add_sz;
+    min_sz+= pack_length_with_blob_ptrs;
+    set_if_bigger(min_sz, 1);
+    min_buff_size= min_sz;
+  }
+  return min_buff_size;
+}
+
+
+/* 
+  Get the maximum possible size of the cache join buffer 
+
+  SYNOPSIS
+    get_max_join_buffer_size()
+
+    optimize_buff_size  FALSE <-> do not take more memory than needed for
+                        the estimated number of records in the partial join 
+
+  DESCRIPTION
+    At the first its invocation for the cache the function calculates the
+    maximum possible size of join buffer for the cache. If the parameter
+    optimize_buff_size true then this value does not exceed the size of the
+    space needed for the estimated number of records 'max_records' in the
+    partial join that joins tables from the first one through join_tab. This
+    value is also capped off by the value of join_tab->join_buffer_size_limit,
+    if it has been set a to non-zero value, and by the value of the system
+    parameter join_buffer_size - otherwise. After the calculation of the
+    interesting size the function saves the value in the field 'max_buff_size'
+    in order to use it directly at the next  invocations of the function.
+
+  NOTES
+    Currently the value of join_tab->join_buffer_size_limit is initialized
+    to 0 and is never reset.
+
+  RETURN VALUE
+    The maximum possible size of the join buffer of this cache 
+*/
+
+ulong JOIN_CACHE::get_max_join_buffer_size(bool optimize_buff_size)
+{
+  if (!max_buff_size)
+  {
+    size_t max_sz;
+    size_t min_sz= get_min_join_buffer_size(); 
+    size_t len= 0;
+    for (JOIN_TAB *tab= start_tab; tab != join_tab;
+         tab= next_linear_tab(join, tab, WITHOUT_BUSH_ROOTS))
+    {
+      len+= tab->get_used_fieldlength();
+    }
+    len+= get_record_max_affix_length();
+    avg_record_length= len;
+    len+= get_max_key_addon_space_per_record() + avg_aux_buffer_incr;
+    space_per_record= len;
+    
+    size_t limit_sz= join->thd->variables.join_buff_size;
+    if (join_tab->join_buffer_size_limit)
+      set_if_smaller(limit_sz, join_tab->join_buffer_size_limit);
+    if (!optimize_buff_size)
+      max_sz= limit_sz;
+    else
+    {    
+      if (limit_sz / max_records > space_per_record)
+        max_sz= space_per_record * max_records;
+      else
+        max_sz= limit_sz;
+      max_sz+= pack_length_with_blob_ptrs;
+      set_if_smaller(max_sz, limit_sz);
+    }
+    set_if_bigger(max_sz, min_sz);
+    max_buff_size= max_sz;
+  }
+  return max_buff_size;
+}    
+      
+
+/* 
+  Allocate memory for a join buffer      
+
+  SYNOPSIS
+    alloc_buffer()
+
+  DESCRIPTION
+    The function allocates a lump of memory for the cache join buffer. 
+    Initially the function sets the size of the buffer buff_size equal to
+    the value returned by get_max_join_buffer_size(). If the total size of
+    the space intended to be used for the join buffers employed by the
+    tables from the first one through join_tab exceeds the value of the
+    system parameter join_buff_space_limit, then the function first tries
+    to shrink the used buffers to make the occupied space fit the maximum
+    memory allowed to be used for all join buffers in total. After
+    this the function tries to allocate a join buffer for join_tab.
+    If it fails to do so, it decrements the requested size of the join
+    buffer, shrinks proportionally the join buffers used for the previous
+    tables and tries to allocate a buffer for join_tab. In the case of a
+    failure the function repeats its attempts with smaller and smaller
+    requested sizes of the buffer, but not more than 4 times.
+  
+  RETURN VALUE
+    0   if the memory has been successfully allocated
+    1   otherwise
+*/
+
+int JOIN_CACHE::alloc_buffer()
+{
+  JOIN_TAB *tab;
+  JOIN_CACHE *cache;
+  ulonglong curr_buff_space_sz= 0;
+  ulonglong curr_min_buff_space_sz= 0;
+  ulonglong join_buff_space_limit=
+    join->thd->variables.join_buff_space_limit;
+  bool optimize_buff_size= 
+         optimizer_flag(join->thd, OPTIMIZER_SWITCH_OPTIMIZE_JOIN_BUFFER_SIZE);
+  double partial_join_cardinality=  (join_tab-1)->get_partial_join_cardinality();
+  buff= NULL;
+  min_buff_size= 0;
+  max_buff_size= 0;
+  min_records= 1;
+  max_records= (size_t) (partial_join_cardinality <= join_buff_space_limit ?
+                 (ulonglong) partial_join_cardinality : join_buff_space_limit);
+  set_if_bigger(max_records, 10);
+  min_buff_size= get_min_join_buffer_size();
+  buff_size= get_max_join_buffer_size(optimize_buff_size);
+
+  for (tab= start_tab; tab!= join_tab; 
+       tab= next_linear_tab(join, tab, WITHOUT_BUSH_ROOTS))
+  {
+    cache= tab->cache;
+    if (cache)
+    {
+      curr_min_buff_space_sz+= cache->get_min_join_buffer_size();
+      curr_buff_space_sz+= cache->get_join_buffer_size();
+    }
+  }
+  curr_min_buff_space_sz+= min_buff_size;
+  curr_buff_space_sz+= buff_size;
+
+  if (curr_min_buff_space_sz > join_buff_space_limit ||
+      (curr_buff_space_sz > join_buff_space_limit &&
+       (!optimize_buff_size || 
+        join->shrink_join_buffers(join_tab, curr_buff_space_sz,
+                                  join_buff_space_limit))))
+    goto fail;
+                               
+  for (ulong buff_size_decr= (buff_size-min_buff_size)/4 + 1; ; )
+  {
+    ulong next_buff_size;
+
+    if ((buff= (uchar*) my_malloc(buff_size, MYF(0))))
+      break;
+
+    next_buff_size= buff_size > buff_size_decr ? buff_size-buff_size_decr : 0;
+    if (next_buff_size < min_buff_size ||
+        join->shrink_join_buffers(join_tab, curr_buff_space_sz,
+                                  curr_buff_space_sz-buff_size_decr))
+      goto fail;
+    buff_size= next_buff_size;
+
+    curr_buff_space_sz= 0;
+    for (tab= join->join_tab+join->const_tables; tab <= join_tab; tab++)
+    {
+      cache= tab->cache;
+      if (cache)
+        curr_buff_space_sz+= cache->get_join_buffer_size();
+    } 
+  }
+  return 0;
+
+fail:
+  buff_size= 0;
+  return 1;
+}
+
+ 
+/*
+  Shrink the size if the cache join buffer in a given ratio
+
+  SYNOPSIS
+    shrink_join_buffer_in_ratio()
+      n           nominator of the ratio to shrink the buffer in
+      d           denominator if the ratio
+
+  DESCRIPTION
+    The function first deallocates the join buffer of the cache. Then
+    it allocates a buffer that is (n/d) times smaller.
+    
+  RETURN VALUE
+    FALSE   on success with allocation of the smaller join buffer 
+    TRUE    otherwise       
+*/
+
+bool JOIN_CACHE::shrink_join_buffer_in_ratio(ulonglong n, ulonglong d)
+{
+  size_t next_buff_size;
+  if (n < d)
+    return FALSE;
+  next_buff_size= (size_t) ((double) buff_size / n * d);
+  set_if_bigger(next_buff_size, min_buff_size);
+  buff_size= next_buff_size;
+  return realloc_buffer();
+}  
+
+
+/*
+  Reallocate the join buffer of a join cache
+ 
+  SYNOPSIS
+    realloc_buffer()
+
+  DESCRITION
+    The function reallocates the join buffer of the join cache. After this
+    it resets the buffer for writing.
+
+  NOTES
+    The function assumes that buff_size contains the new value for the join
+    buffer size.  
+
+  RETURN VALUE
+    0   if the buffer has been successfully reallocated
+    1   otherwise
+*/
+
+int JOIN_CACHE::realloc_buffer()
+{
+  int rc;
+  free();
+  rc= test(!(buff= (uchar*) my_malloc(buff_size, MYF(0))));
+  reset(TRUE);
+  return rc;   	
+}
+  
+
+/* 
+  Initialize a join cache       
+
+  SYNOPSIS
+    init()
+
+  DESCRIPTION
+    The function initializes the join cache structure. It supposed to be called
+    by init methods for classes derived from the JOIN_CACHE.
+    The function allocates memory for the join buffer and for descriptors of
+    the record fields stored in the buffer.
+
+  NOTES
+    The code of this function should have been included into the constructor
+    code itself. However the new operator for the class JOIN_CACHE would
+    never fail while memory allocation for the join buffer is not absolutely
+    unlikely to fail. That's why this memory allocation has to be placed in a
+    separate function that is called in a couple with a cache constructor.
+    It is quite natural to put almost all other constructor actions into
+    this function.     
+  
+  RETURN VALUE
+    0   initialization with buffer allocations has been succeeded
+    1   otherwise
+*/
+
+int JOIN_CACHE::init()
+{
+  DBUG_ENTER("JOIN_CACHE::init");
+
+  calc_record_fields();
+
+  collect_info_on_key_args();
+
+  if (alloc_fields())
+    DBUG_RETURN(1);
+
+  create_flag_fields();
+
+  create_key_arg_fields();
+
+  create_remaining_fields();
+
+  set_constants();
+
+  if (alloc_buffer())
+    DBUG_RETURN(1); 
+  
+  reset(TRUE); 
+
+  DBUG_RETURN(0);
+}
+
+
+/* 
+  Check the possibility to read the access keys directly from the join buffer       
+  SYNOPSIS
+    check_emb_key_usage()
+
+  DESCRIPTION
+    The function checks some conditions at which the key values can be read
+    directly from the join buffer. This is possible when the key values can be
+    composed by concatenation of the record fields stored in the join buffer.
+    Sometimes when the access key is multi-component the function has to re-order
+    the fields written into the join buffer to make keys embedded. If key 
+    values for the key access are detected as embedded then 'use_emb_key'
+    is set to TRUE.
+
+  EXAMPLE
+    Let table t2 has an index defined on the columns a,b . Let's assume also
+    that the columns t2.a, t2.b as well as the columns t1.a, t1.b are all
+    of the integer type. Then if the query
+      SELECT COUNT(*) FROM t1, t2 WHERE t1.a=t2.a and t1.b=t2.b  
+    is executed with a join cache in such a way that t1 is the driving
+    table then the key values to access table t2 can be read directly
+    from the join buffer.
+  
+  NOTES
+    In some cases key values could be read directly from the join buffer but
+    we still do not consider them embedded. In the future we'll expand the
+    the class of keys which we identify as embedded.
+
+  NOTES
+    The function returns FALSE if no key is used to join the records
+    from join_tab.
+
+  RETURN VALUE
+    TRUE    key values will be considered as embedded,
+    FALSE   otherwise.
+*/
+
+bool JOIN_CACHE::check_emb_key_usage()
+{
+
+  if (!is_key_access())
+    return FALSE;
+
+  uint i;
+  Item *item; 
+  KEY_PART_INFO *key_part;
+  CACHE_FIELD *copy;
+  CACHE_FIELD *copy_end;
+  uint len= 0;
+  TABLE_REF *ref= &join_tab->ref;
+  KEY *keyinfo= join_tab->get_keyinfo_by_key_no(ref->key);
+
+  /* 
+    If some of the key arguments are not from the local cache the key
+    is not considered as embedded.
+    TODO:
+    Expand it to the case when ref->key_parts=1 and local_key_arg_fields=0.
+  */  
+  if (external_key_arg_fields != 0)
+    return FALSE;
+  /* 
+    If the number of the local key arguments is not equal to the number
+    of key parts the key value cannot be read directly from the join buffer.   
+  */
+  if (local_key_arg_fields != ref->key_parts)
+    return FALSE;
+
+  /* 
+    A key is not considered embedded if one of the following is true:
+    - one of its key parts is not equal to a field
+    - it is a partial key
+    - definition of the argument field does not coincide with the
+      definition of the corresponding key component
+    - some of the key components are nullable
+  */  
+  for (i=0; i < ref->key_parts; i++)
+  {
+    item= ref->items[i]->real_item();
+    if (item->type() != Item::FIELD_ITEM)
+      return FALSE;
+    key_part= keyinfo->key_part+i;
+    if (key_part->key_part_flag & HA_PART_KEY_SEG)
+      return FALSE;
+    if (!key_part->field->eq_def(((Item_field *) item)->field))
+      return FALSE;
+    if (key_part->field->maybe_null())
+      return FALSE;
+  }
+  
+  copy= field_descr+flag_fields;
+  copy_end= copy+local_key_arg_fields;
+  for ( ; copy < copy_end; copy++)
+  {
+    /* 
+      If some of the key arguments are of variable length the key
+      is not considered as embedded.
+    */
+    if (copy->type != 0)
+      return FALSE;
+    /* 
+      If some of the key arguments are bit fields whose bits are partially
+      stored with null bits the key is not considered as embedded.
+    */
+    if (copy->field->type() == MYSQL_TYPE_BIT &&
+	 ((Field_bit*) (copy->field))->bit_len)
+      return FALSE;
+    len+= copy->length;
+  }
+
+  emb_key_length= len;
+
+  /* 
+    Make sure that key fields follow the order of the corresponding
+    key components these fields are equal to. For this the descriptors
+    of the fields that comprise the key might be re-ordered.
+  */
+  for (i= 0; i < ref->key_parts; i++)
+  {
+    uint j;
+    Item *item= ref->items[i]->real_item();
+    Field *fld= ((Item_field *) item)->field;
+    CACHE_FIELD *init_copy= field_descr+flag_fields+i; 
+    for (j= i, copy= init_copy; i < local_key_arg_fields;  i++, copy++)
+    {
+      if (fld->eq(copy->field))
+      {
+        if (j != i)
+        {
+          CACHE_FIELD key_part_copy= *copy;
+          *copy= *init_copy;
+          *init_copy= key_part_copy;
+        }
+        break;
+      }
+    }
+  }
+
+  return TRUE;
+}    
+
+
+/* 
+  Write record fields and their required offsets into the join cache buffer
+
+  SYNOPSIS
+    write_record_data()
+      link        a reference to the associated info in the previous cache
+      is_full OUT true if it has been decided that no more records will be
+                  added to the join buffer
+
+  DESCRIPTION
+    This function put into the cache buffer the following info that it reads
+    from the join record buffers or computes somehow:
+    (1) the length of all fields written for the record (optional)
+    (2) an offset to the associated info in the previous cache (if there is any)
+        determined by the link parameter
+    (3) all flag fields of the tables whose data field are put into the cache:
+        - match flag (optional),
+        - null bitmaps for all tables,
+        - null row flags for all tables
+    (4) values of all data fields including
+        - full images of those fixed legth data fields that cannot have 
+          trailing spaces
+        - significant part of fixed length fields that can have trailing spaces
+          with the prepanded length 
+        - data of non-blob variable length fields with the prepanded data length  
+        - blob data from blob fields with the prepanded data length
+    (5) record offset values for the data fields that are referred to from 
+        other caches
+ 
+    The record is written at the current position stored in the field 'pos'.
+    At the end of the function 'pos' points at the position right after the 
+    written record data.
+    The function increments the number of records in the cache that is stored
+    in the 'records' field by 1. The function also modifies the values of
+    'curr_rec_pos' and 'last_rec_pos' to point to the written record.
+    The 'end_pos' cursor is modified accordingly.
+    The 'last_rec_blob_data_is_in_rec_buff' is set on if the blob data 
+    remains in the record buffers and not copied to the join buffer. It may
+    happen only to the blob data from the last record added into the cache.
+    If on_precond is attached to join_tab and it is not evaluated to TRUE
+    then MATCH_IMPOSSIBLE is placed in the match flag field of the record
+    written into the join buffer.
+       
+  RETURN VALUE
+    length of the written record data
+*/
+
+uint JOIN_CACHE::write_record_data(uchar * link, bool *is_full)
+{
+  uint len;
+  bool last_record;
+  CACHE_FIELD *copy;
+  CACHE_FIELD *copy_end;
+  uchar *flags_pos;
+  uchar *cp= pos;
+  uchar *init_pos= cp;
+  uchar *rec_len_ptr= 0;
+  uint key_extra= extra_key_length();
+ 
+  records++;  /* Increment the counter of records in the cache */
+
+  len= pack_length + key_extra;
+
+  /* Make an adjustment for the size of the auxiliary buffer if there is any */
+  uint incr= aux_buffer_incr(records);
+  size_t rem= rem_space();
+  aux_buff_size+= len+incr < rem ? incr : rem;
+
+  /*
+    For each blob to be put into cache save its length and a pointer
+    to the value in the corresponding element of the blob_ptr array.
+    Blobs with null values are skipped.
+    Increment 'len' by the total length of all these blobs. 
+  */    
+  if (blobs)
+  {
+    CACHE_FIELD **copy_ptr= blob_ptr;
+    CACHE_FIELD **copy_ptr_end= copy_ptr+blobs;
+    for ( ; copy_ptr < copy_ptr_end; copy_ptr++)
+    {
+      Field_blob *blob_field= (Field_blob *) (*copy_ptr)->field;
+      if (!blob_field->is_null())
+      {
+        uint blob_len= blob_field->get_length();
+        (*copy_ptr)->blob_length= blob_len;
+        len+= blob_len;
+        blob_field->get_ptr(&(*copy_ptr)->str);
+      }
+    }
+  }
+
+  /*
+    Check whether we won't be able to add any new record into the cache after
+    this one because the cache will be full. Set last_record to TRUE if it's so.
+    The assume that the cache will be full after the record has been written
+    into it if either the remaining space of the cache is not big enough for the 
+    record's blob values or if there is a chance that not all non-blob fields
+    of the next record can be placed there.
+    This function is called only in the case when there is enough space left in
+    the cache to store at least non-blob parts of the current record.
+  */
+  last_record= (len+pack_length_with_blob_ptrs+key_extra) > rem_space();
+  
+  /* 
+    Save the position for the length of the record in the cache if it's needed.
+    The length of the record will be inserted here when all fields of the record
+    are put into the cache.  
+  */
+  if (with_length)
+  {
+    rec_len_ptr= cp;   
+    DBUG_ASSERT(cp + size_of_rec_len <= buff + buff_size);
+    cp+= size_of_rec_len;
+  }
+
+  /*
+    Put a reference to the fields of the record that are stored in the previous
+    cache if there is any. This reference is passed by the 'link' parameter.     
+  */
+  if (prev_cache)
+  {
+    DBUG_ASSERT(cp + prev_cache->get_size_of_rec_offset() <= buff + buff_size);
+    cp+= prev_cache->get_size_of_rec_offset();
+    prev_cache->store_rec_ref(cp, link);
+  } 
+
+  curr_rec_pos= cp;
+  
+  /* If the there is a match flag set its value to 0 */
+  copy= field_descr;
+  if (with_match_flag)
+    *copy[0].str= 0;
+
+  /* First put into the cache the values of all flag fields */
+  copy_end= field_descr+flag_fields;
+  flags_pos= cp;
+  for ( ; copy < copy_end; copy++)
+  {
+    DBUG_ASSERT(cp + copy->length <= buff + buff_size);
+    memcpy(cp, copy->str, copy->length);
+    cp+= copy->length;
+  } 
+  
+  /* Now put the values of the remaining fields as soon as they are not nulls */ 
+  copy_end= field_descr+fields;
+  for ( ; copy < copy_end; copy++)
+  {
+    Field *field= copy->field;
+    if (field && field->maybe_null() && field->is_null())
+    {    
+      if (copy->referenced_field_no)
+        copy->offset= 0;
+      continue;              
+    }
+    /* Save the offset of the field to put it later at the end of the record */ 
+    if (copy->referenced_field_no)
+      copy->offset= cp-curr_rec_pos;
+
+    if (copy->type == CACHE_BLOB)
+    {
+      Field_blob *blob_field= (Field_blob *) copy->field;
+      if (last_record)
+      {
+        last_rec_blob_data_is_in_rec_buff= 1;
+        /* Put down the length of the blob and the pointer to the data */  
+        DBUG_ASSERT(cp + copy->length + sizeof(char*) <= buff + buff_size);
+	blob_field->get_image(cp, copy->length+sizeof(char*),
+                              blob_field->charset());
+	cp+= copy->length+sizeof(char*);
+      }
+      else
+      {
+        /* First put down the length of the blob and then copy the data */ 
+	blob_field->get_image(cp, copy->length, 
+			      blob_field->charset());
+        DBUG_ASSERT(cp + copy->length + copy->blob_length <= buff + buff_size);
+	memcpy(cp+copy->length, copy->str, copy->blob_length);               
+	cp+= copy->length+copy->blob_length;
+      }
+    }
+    else
+    {
+      switch (copy->type) {
+      case CACHE_VARSTR1:
+        /* Copy the significant part of the short varstring field */ 
+        len= (uint) copy->str[0] + 1;
+        DBUG_ASSERT(cp + len <= buff + buff_size);
+        memcpy(cp, copy->str, len);
+        cp+= len;
+        break;
+      case CACHE_VARSTR2:
+        /* Copy the significant part of the long varstring field */
+        len= uint2korr(copy->str) + 2;
+        DBUG_ASSERT(cp + len <= buff + buff_size);
+        memcpy(cp, copy->str, len);
+        cp+= len;
+        break;
+      case CACHE_STRIPPED:
+      {
+        /* 
+          Put down the field value stripping all trailing spaces off.
+          After this insert the length of the written sequence of bytes.
+        */ 
+	uchar *str, *end;
+	for (str= copy->str, end= str+copy->length;
+	     end > str && end[-1] == ' ';
+	     end--) ;
+	len=(uint) (end-str);
+        DBUG_ASSERT(cp + len + 2 <= buff + buff_size);
+        int2store(cp, len);
+	memcpy(cp+2, str, len);
+	cp+= len+2;
+        break;
+      }
+      case CACHE_ROWID:
+        if (!copy->length)
+	{
+          /*
+            This may happen only for ROWID fields of materialized
+            derived tables and views.
+	  */
+	  TABLE *table= (TABLE *) copy->str;
+          copy->str= table->file->ref;
+          copy->length= table->file->ref_length;
+          if (!copy->str)
+	  {
+            /* 
+              If table is an empty inner table of an outer join and it is
+              a materialized derived table then table->file->ref == NULL.
+	    */
+	    cp+= copy->length;
+            break;
+          }
+        }
+        /* fall through */
+      default:      
+        /* Copy the entire image of the field from the record buffer */
+        DBUG_ASSERT(cp + copy->length <= buff + buff_size);
+        if (copy->str)
+	  memcpy(cp, copy->str, copy->length);
+	cp+= copy->length;
+      }
+    }
+  }
+  
+  /* Add the offsets of the fields that are referenced from other caches */ 
+  if (referenced_fields)
+  {
+    uint cnt= 0;
+    for (copy= field_descr+flag_fields; copy < copy_end ; copy++)
+    {
+      if (copy->referenced_field_no)
+      {
+        store_fld_offset(cp+size_of_fld_ofs*(copy->referenced_field_no-1),
+                         copy->offset);
+        cnt++;
+      }
+    }
+    DBUG_ASSERT(cp + size_of_fld_ofs*cnt <= buff + buff_size);
+    cp+= size_of_fld_ofs*cnt;
+  }
+
+  if (rec_len_ptr)
+    store_rec_length(rec_len_ptr, (ulong) (cp-rec_len_ptr-size_of_rec_len));
+  last_rec_pos= curr_rec_pos; 
+  end_pos= pos= cp;
+  *is_full= last_record;
+
+  last_written_is_null_compl= 0;   
+  if (!join_tab->first_unmatched && join_tab->on_precond)
+  { 
+    join_tab->found= 0;
+    join_tab->not_null_compl= 1;
+    if (!join_tab->on_precond->val_int())
+    {
+      flags_pos[0]= MATCH_IMPOSSIBLE;     
+      last_written_is_null_compl= 1;
+    }
+  } 
+      
+  return (uint) (cp-init_pos);
+}
+
+
+/* 
+  Reset the join buffer for reading/writing: default implementation
+
+  SYNOPSIS
+    reset()
+      for_writing  if it's TRUE the function reset the buffer for writing
+
+  DESCRIPTION
+    This default implementation of the virtual function reset() resets 
+    the join buffer for reading or writing.
+    If the buffer is reset for reading only the 'pos' value is reset
+    to point to the very beginning of the join buffer. If the buffer is
+    reset for writing additionally: 
+    - the counter of the records in the buffer is set to 0,
+    - the the value of 'last_rec_pos' gets pointing at the position just
+      before the buffer, 
+    - 'end_pos' is set to point to the beginning of the join buffer,
+    - the size of the auxiliary buffer is reset to 0,
+    - the flag 'last_rec_blob_data_is_in_rec_buff' is set to 0.
+    
+  RETURN VALUE
+    none
+*/
+void JOIN_CACHE::reset(bool for_writing)
+{
+  pos= buff;
+  curr_rec_link= 0;
+  if (for_writing)
+  {
+    records= 0;
+    last_rec_pos= buff;
+    aux_buff_size= 0;
+    end_pos= pos;
+    last_rec_blob_data_is_in_rec_buff= 0;
+  }
+}
+
+
+/* 
+  Add a record into the join buffer: the default implementation
+
+  SYNOPSIS
+    put_record()
+
+  DESCRIPTION
+    This default implementation of the virtual function put_record writes
+    the next matching record into the join buffer.
+    It also links the record having been written into the join buffer with
+    the matched record in the previous cache if there is any.
+    The implementation assumes that the function get_curr_link() 
+    will return exactly the pointer to this matched record.
+
+  RETURN VALUE
+    TRUE    if it has been decided that it should be the last record
+            in the join buffer,
+    FALSE   otherwise
+*/
+
+bool JOIN_CACHE::put_record()
+{
+  bool is_full;
+  uchar *link= 0;
+  if (prev_cache)
+    link= prev_cache->get_curr_rec_link();
+  write_record_data(link, &is_full);
+  return is_full;
+}
+  
+
+/* 
+  Read the next record from the join buffer: the default implementation
+
+  SYNOPSIS
+    get_record()
+
+  DESCRIPTION
+    This default implementation of the virtual function get_record
+    reads fields of the next record from the join buffer of this cache.
+    The function also reads all other fields associated with this record
+    from the the join buffers of the previous caches. The fields are read
+    into the corresponding record buffers.
+    It is supposed that 'pos' points to the position in the buffer 
+    right after the previous record when the function is called.
+    When the function returns the 'pos' values is updated to point
+    to the position after the read record.
+    The value of 'curr_rec_pos' is also updated by the function to
+    point to the beginning of the first field of the record in the
+    join buffer.    
+
+  RETURN VALUE
+    TRUE    there are no more records to read from the join buffer
+    FALSE   otherwise
+*/
+
+bool JOIN_CACHE::get_record()
+{ 
+  bool res;
+  uchar *prev_rec_ptr= 0;
+  if (with_length)
+    pos+= size_of_rec_len;
+  if (prev_cache)
+  {
+    pos+= prev_cache->get_size_of_rec_offset();
+    prev_rec_ptr= prev_cache->get_rec_ref(pos);
+  }
+  curr_rec_pos= pos;
+  if (!(res= read_all_record_fields() == NO_MORE_RECORDS_IN_BUFFER))
+  {
+    pos+= referenced_fields*size_of_fld_ofs;
+    if (prev_cache)
+      prev_cache->get_record_by_pos(prev_rec_ptr);
+  } 
+  return res; 
+}
+
+
+/* 
+  Read a positioned record from the join buffer: the default implementation
+
+  SYNOPSIS
+    get_record_by_pos()
+      rec_ptr  position of the first field of the record in the join buffer
+
+  DESCRIPTION
+    This default implementation of the virtual function get_record_pos
+    reads the fields of the record positioned at 'rec_ptr' from the join buffer.
+    The function also reads all other fields associated with this record 
+    from the the join buffers of the previous caches. The fields are read
+    into the corresponding record buffers.
+
+  RETURN VALUE
+    none
+*/
+
+void JOIN_CACHE::get_record_by_pos(uchar *rec_ptr)
+{
+  uchar *save_pos= pos;
+  pos= rec_ptr;
+  read_all_record_fields();
+  pos= save_pos;
+  if (prev_cache)
+  {
+    uchar *prev_rec_ptr= prev_cache->get_rec_ref(rec_ptr);
+    prev_cache->get_record_by_pos(prev_rec_ptr);
+  }
+}
+
+
+/* 
+  Get the match flag from the referenced record: the default implementation
+
+  SYNOPSIS
+    get_match_flag_by_pos()
+      rec_ptr  position of the first field of the record in the join buffer
+
+  DESCRIPTION
+    This default implementation of the virtual function get_match_flag_by_pos
+    get the match flag for the record pointed by the reference at the position
+    rec_ptr. If the match flag is placed in one of the previous buffers the
+    function first reaches the linked record fields in this buffer.
+
+  RETURN VALUE
+    match flag for the record at the position rec_ptr
+*/
+
+enum JOIN_CACHE::Match_flag JOIN_CACHE::get_match_flag_by_pos(uchar *rec_ptr)
+{
+  Match_flag match_fl= MATCH_NOT_FOUND;
+  if (with_match_flag)
+  {
+    match_fl= (enum Match_flag) rec_ptr[0];
+    return match_fl;
+  }
+  if (prev_cache)
+  {
+    uchar *prev_rec_ptr= prev_cache->get_rec_ref(rec_ptr);
+    return prev_cache->get_match_flag_by_pos(prev_rec_ptr);
+  } 
+  DBUG_ASSERT(0);
+  return match_fl;
+}
+
+
+/* 
+  Calculate the increment of the auxiliary buffer for a record write
+
+  SYNOPSIS
+    aux_buffer_incr()
+      recno   the number of the record the increment to be calculated for
+
+  DESCRIPTION
+    This function calls the aux_buffer_incr the method of the
+    companion member join_tab_scan to calculate the growth of the
+    auxiliary buffer when the recno-th record is added to the
+    join_buffer of this cache.
+
+  RETURN VALUE
+    the number of bytes in the increment 
+*/
+
+uint JOIN_CACHE::aux_buffer_incr(ulong recno)
+{ 
+  return join_tab_scan->aux_buffer_incr(recno);
+}
+
+/* 
+  Read all flag and data fields of a record from the join buffer
+
+  SYNOPSIS
+    read_all_record_fields()
+
+  DESCRIPTION
+    The function reads all flag and data fields of a record from the join
+    buffer into the corresponding record buffers.
+    The fields are read starting from the position 'pos' which is
+    supposed to point to the beginning og the first record field.
+    The function increments the value of 'pos' by the length of the
+    read data. 
+
+  RETURN VALUE
+    (-1)   if there is no more records in the join buffer
+    length of the data read from the join buffer - otherwise
+*/
+
+uint JOIN_CACHE::read_all_record_fields()
+{
+  uchar *init_pos= pos;
+  
+  if (pos > last_rec_pos || !records)
+    return NO_MORE_RECORDS_IN_BUFFER;
+
+  /* First match flag, read null bitmaps and null_row flag for each table */
+  read_flag_fields();
+ 
+  /* Now read the remaining table fields if needed */
+  CACHE_FIELD *copy= field_descr+flag_fields;
+  CACHE_FIELD *copy_end= field_descr+fields;
+  bool blob_in_rec_buff= blob_data_is_in_rec_buff(init_pos);
+  for ( ; copy < copy_end; copy++)
+    read_record_field(copy, blob_in_rec_buff);
+
+  return (uint) (pos-init_pos);
+}
+
+
+/* 
+  Read all flag fields of a record from the join buffer
+
+  SYNOPSIS
+    read_flag_fields()
+
+  DESCRIPTION
+    The function reads all flag fields of a record from the join
+    buffer into the corresponding record buffers.
+    The fields are read starting from the position 'pos'.
+    The function increments the value of 'pos' by the length of the
+    read data. 
+
+  RETURN VALUE
+    length of the data read from the join buffer
+*/
+
+uint JOIN_CACHE::read_flag_fields()
+{
+  uchar *init_pos= pos;
+  CACHE_FIELD *copy= field_descr;
+  CACHE_FIELD *copy_end= copy+flag_fields;
+  if (with_match_flag)
+  {
+    copy->str[0]= test((Match_flag) pos[0] == MATCH_FOUND);
+    pos+= copy->length;
+    copy++;    
+  } 
+  for ( ; copy < copy_end; copy++)
+  {
+    memcpy(copy->str, pos, copy->length);
+    pos+= copy->length;
+  }
+  return (pos-init_pos);
+}
+
+
+/* 
+  Read a data record field from the join buffer
+
+  SYNOPSIS
+    read_record_field()
+      copy             the descriptor of the data field to be read
+      blob_in_rec_buff indicates whether this is the field from the record
+                       whose blob data are in record buffers
+
+  DESCRIPTION
+    The function reads the data field specified by the parameter copy
+    from the join buffer into the corresponding record buffer. 
+    The field is read starting from the position 'pos'.
+    The data of blob values is not copied from the join buffer.
+    The function increments the value of 'pos' by the length of the
+    read data. 
+
+  RETURN VALUE
+    length of the data read from the join buffer
+*/
+
+uint JOIN_CACHE::read_record_field(CACHE_FIELD *copy, bool blob_in_rec_buff)
+{
+  uint len;
+  /* Do not copy the field if its value is null */ 
+  if (copy->field && copy->field->maybe_null() && copy->field->is_null())
+    return 0;           
+  if (copy->type == CACHE_BLOB)
+  {
+    Field_blob *blob_field= (Field_blob *) copy->field;
+    /* 
+      Copy the length and the pointer to data but not the blob data 
+      itself to the record buffer
+    */ 
+    if (blob_in_rec_buff)
+    {
+      blob_field->set_image(pos, copy->length+sizeof(char*),
+			    blob_field->charset());
+      len= copy->length+sizeof(char*);
+    }
+    else
+    {
+      blob_field->set_ptr(pos, pos+copy->length);
+      len= copy->length+blob_field->get_length();
+    }
+  }
+  else
+  {
+    switch (copy->type) {
+    case CACHE_VARSTR1:
+      /* Copy the significant part of the short varstring field */
+      len= (uint) pos[0] + 1;
+      memcpy(copy->str, pos, len);
+      break;
+    case CACHE_VARSTR2:
+      /* Copy the significant part of the long varstring field */
+      len= uint2korr(pos) + 2;
+      memcpy(copy->str, pos, len);
+      break;
+    case CACHE_STRIPPED:
+      /* Pad the value by spaces that has been stripped off */
+      len= uint2korr(pos);
+      memcpy(copy->str, pos+2, len);
+      memset(copy->str+len, ' ', copy->length-len);
+      len+= 2;
+      break;
+    case CACHE_ROWID:
+      if (!copy->str)
+      {
+        len= copy->length;
+        break;
+      }
+      /* fall through */ 
+    default:
+      /* Copy the entire image of the field from the record buffer */
+      len= copy->length;
+      memcpy(copy->str, pos, len);
+    }
+  }
+  pos+= len;
+  return len;
+}
+
+
+/* 
+  Read a referenced field from the join buffer
+
+  SYNOPSIS
+    read_referenced_field()
+      copy         pointer to the descriptor of the referenced field
+      rec_ptr      pointer to the record that may contain this field
+      len  IN/OUT  total length of the record fields 
+
+  DESCRIPTION
+    The function checks whether copy points to a data field descriptor
+    for this cache object. If it does not then the function returns
+    FALSE. Otherwise the function reads the field of the record in
+    the join buffer pointed by 'rec_ptr' into the corresponding record
+    buffer and returns TRUE.
+    If the value of *len is 0 then the function sets it to the total
+    length of the record fields including possible trailing offset
+    values. Otherwise *len is supposed to provide this value that
+    has been obtained earlier. 
+
+  NOTE
+    If the value of the referenced field is null then the offset
+    for the value is set to 0. If the value of a field can be null
+    then the value of flag_fields is always positive. So the offset
+    for any non-null value cannot be 0 in this case. 
+
+  RETURN VALUE
+    TRUE   'copy' points to a data descriptor of this join cache
+    FALSE  otherwise
+*/
+
+bool JOIN_CACHE::read_referenced_field(CACHE_FIELD *copy,
+                                       uchar *rec_ptr, 
+                                       uint *len)
+{
+  uchar *ptr;
+  uint offset;
+  if (copy < field_descr || copy >= field_descr+fields)
+    return FALSE;
+  if (!*len)
+  {
+    /* Get the total length of the record fields */ 
+    uchar *len_ptr= rec_ptr;
+    if (prev_cache)
+      len_ptr-= prev_cache->get_size_of_rec_offset();
+    *len= get_rec_length(len_ptr-size_of_rec_len);
+  }
+  
+  ptr= rec_ptr-(prev_cache ? prev_cache->get_size_of_rec_offset() : 0);  
+  offset= get_fld_offset(ptr+ *len - 
+                         size_of_fld_ofs*
+                         (referenced_fields+1-copy->referenced_field_no));  
+  bool is_null= FALSE;
+  Field *field= copy->field;
+  if (offset == 0 && flag_fields)
+    is_null= TRUE;
+  if (is_null)
+  {
+    field->set_null();
+    if (!field->real_maybe_null())
+      field->table->null_row= 1;
+  }
+  else
+  {
+    uchar *save_pos= pos;
+    field->set_notnull(); 
+    if (!field->real_maybe_null())
+      field->table->null_row= 0;
+    pos= rec_ptr+offset;
+    read_record_field(copy, blob_data_is_in_rec_buff(rec_ptr));
+    pos= save_pos;
+  }
+  return TRUE;
+}
+   
+
+/* 
+  Skip record from join buffer if's already matched: default implementation
+
+  SYNOPSIS
+    skip_if_matched()
+
+  DESCRIPTION
+    This default implementation of the virtual function skip_if_matched
+    skips the next record from the join buffer if its  match flag is set to 
+    MATCH_FOUND.
+    If the record is skipped the value of 'pos' is set to point to the position
+    right after the record.
+
+  RETURN VALUE
+    TRUE   the match flag is set to MATCH_FOUND and the record has been skipped
+    FALSE  otherwise
+*/
+
+bool JOIN_CACHE::skip_if_matched()
+{
+  DBUG_ASSERT(with_length);
+  uint offset= size_of_rec_len;
+  if (prev_cache)
+    offset+= prev_cache->get_size_of_rec_offset();
+  /* Check whether the match flag is MATCH_FOUND */
+  if (get_match_flag_by_pos(pos+offset) == MATCH_FOUND)
+  {
+    pos+= size_of_rec_len + get_rec_length(pos);
+    return TRUE;
+  }
+  return FALSE;
+}      
+
+
+/* 
+  Skip record from join buffer if the match isn't needed: default implementation
+
+  SYNOPSIS
+    skip_if_not_needed_match()
+
+  DESCRIPTION
+    This default implementation of the virtual function skip_if_not_needed_match
+    skips the next record from the join buffer if its match flag is not 
+    MATCH_NOT_FOUND, and, either its value is MATCH_FOUND and join_tab is the
+    first inner table of an inner join, or, its value is MATCH_IMPOSSIBLE
+    and join_tab is the first inner table of an outer join.
+    If the record is skipped the value of 'pos' is set to point to the position
+    right after the record.
+
+  RETURN VALUE
+    TRUE    the record has to be skipped
+    FALSE   otherwise 
+*/
+
+bool JOIN_CACHE::skip_if_not_needed_match()
+{
+  DBUG_ASSERT(with_length);
+  enum Match_flag match_fl;
+  uint offset= size_of_rec_len;
+  if (prev_cache)
+    offset+= prev_cache->get_size_of_rec_offset();
+
+  if ((match_fl= get_match_flag_by_pos(pos+offset)) != MATCH_NOT_FOUND &&
+      (join_tab->check_only_first_match() == (match_fl == MATCH_FOUND)) )
+  {
+    pos+= size_of_rec_len + get_rec_length(pos);
+    return TRUE;
+  }
+  return FALSE;
+}      
+
+
+/* 
+  Restore the fields of the last record from the join buffer
+ 
+  SYNOPSIS
+    restore_last_record()
+
+  DESCRIPTION
+    This function restore the values of the fields of the last record put
+    into join buffer in record buffers. The values most probably have been
+    overwritten by the field values from other records when they were read
+    from the join buffer into the record buffer in order to check pushdown
+    predicates.
+
+  RETURN
+    none
+*/
+
+void JOIN_CACHE::restore_last_record()
+{
+  if (records)
+    get_record_by_pos(last_rec_pos);
+}
+
+
+/*
+  Join records from the join buffer with records from the next join table    
+
+  SYNOPSIS
+    join_records()
+      skip_last    do not find matches for the last record from the buffer
+
+  DESCRIPTION
+    The functions extends all records from the join buffer by the matched
+    records from join_tab. In the case of outer join operation it also
+    adds null complementing extensions for the records from the join buffer
+    that have no match. 
+    No extensions are generated for the last record from the buffer if
+    skip_last is true.  
+
+  NOTES
+    The function must make sure that if linked join buffers are used then
+    a join buffer cannot be refilled again until all extensions in the
+    buffers chained to this one are generated.
+    Currently an outer join operation with several inner tables always uses
+    at least two linked buffers with the match join flags placed in the
+    first buffer. Any record composed of rows of the inner tables that
+    matches a record in this buffer must refer to the position of the
+    corresponding match flag.
+
+  IMPLEMENTATION
+    When generating extensions for outer tables of an outer join operation
+    first we generate all extensions for those records from the join buffer
+    that have matches, after which null complementing extension for all
+    unmatched records from the join buffer are generated.  
+      
+  RETURN VALUE
+    return one of enum_nested_loop_state, except NESTED_LOOP_NO_MORE_ROWS.
+*/ 
+
+enum_nested_loop_state JOIN_CACHE::join_records(bool skip_last)
+{
+  JOIN_TAB *tab;
+  enum_nested_loop_state rc= NESTED_LOOP_OK;
+  bool outer_join_first_inner= join_tab->is_first_inner_for_outer_join();
+  DBUG_ENTER("JOIN_CACHE::join_records");
+
+  if (outer_join_first_inner && !join_tab->first_unmatched)
+    join_tab->not_null_compl= TRUE;   
+
+  if (!join_tab->first_unmatched)
+  {
+    /* Find all records from join_tab that match records from join buffer */
+    rc= join_matching_records(skip_last);   
+    if (rc != NESTED_LOOP_OK && rc != NESTED_LOOP_NO_MORE_ROWS)
+      goto finish;
+    if (outer_join_first_inner)
+    {
+      if (next_cache)
+      {
+        /* 
+          Ensure that all matches for outer records from join buffer are to be
+          found. Now we ensure that all full records are found for records from
+          join buffer. Generally this is an overkill.
+          TODO: Ensure that only matches of the inner table records have to be
+          found for the records from join buffer.
+	*/ 
+        rc= next_cache->join_records(skip_last);
+        if (rc != NESTED_LOOP_OK && rc != NESTED_LOOP_NO_MORE_ROWS)
+          goto finish;
+      }
+      join_tab->not_null_compl= FALSE;
+      /* Prepare for generation of null complementing extensions */
+      for (tab= join_tab->first_inner; tab <= join_tab->last_inner; tab++)
+        tab->first_unmatched= join_tab->first_inner;
+    }
+  }
+  if (join_tab->first_unmatched)
+  {
+    if (is_key_access())
+      restore_last_record();
+
+    /* 
+      Generate all null complementing extensions for the records from
+      join buffer that don't have any matching rows from the inner tables.
+    */
+    reset(FALSE);
+    rc= join_null_complements(skip_last);   
+    if (rc != NESTED_LOOP_OK && rc != NESTED_LOOP_NO_MORE_ROWS)
+      goto finish;
+  }
+  if(next_cache)
+  {
+    /* 
+      When using linked caches we must ensure the records in the next caches
+      that refer to the records in the join buffer are fully extended.
+      Otherwise we could have references to the records that have been
+      already erased from the join buffer and replaced for new records. 
+    */ 
+    rc= next_cache->join_records(skip_last);
+    if (rc != NESTED_LOOP_OK && rc != NESTED_LOOP_NO_MORE_ROWS)
+      goto finish;
+  }
+ 
+  if (skip_last)
+  {
+    DBUG_ASSERT(!is_key_access());
+    /*
+       Restore the last record from the join buffer to generate
+       all extentions for it.
+    */
+    get_record();		               
+  }
+
+finish:
+  if (outer_join_first_inner)
+  {
+    /* 
+      All null complemented rows have been already generated for all
+      outer records from join buffer. Restore the state of the
+      first_unmatched values to 0 to avoid another null complementing.
+    */
+    for (tab= join_tab->first_inner; tab <= join_tab->last_inner; tab++)
+      tab->first_unmatched= 0;
+  } 
+  restore_last_record();
+  reset(TRUE);
+  DBUG_PRINT("exit", ("rc: %d", rc));
+  DBUG_RETURN(rc);
+}
+
+
+/*   
+  Find matches from the next table for records from the join buffer 
+
+  SYNOPSIS
+    join_matching_records()
+      skip_last    do not look for matches for the last partial join record 
+
+  DESCRIPTION
+    The function retrieves rows of the join_tab table and checks whether they
+    match partial join records from the join buffer. If a match is found
+    the function will call the sub_select function trying to look for matches
+    for the remaining join operations.
+    This function currently is called only from the function join_records.    
+    If the value of skip_last is true the function writes the partial join
+    record from the record buffer into the join buffer to save its value for
+    the future processing in the caller function.
+
+  NOTES
+    If employed by BNL or BNLH join algorithms the function performs a full
+    scan of join_tab for each refill of the join buffer. If BKA or BKAH
+    algorithms are used then the function iterates only over those records
+    from join_tab that can be accessed by keys built over records in the join
+    buffer. To apply a proper method of iteration the function just calls
+    virtual iterator methods (open, next, close) of the member join_tab_scan.
+    The member can be either of the JOIN_TAB_SCAN or JOIN_TAB_SCAN_MMR type.
+    The class JOIN_TAB_SCAN provides the iterator methods for BNL/BNLH join
+    algorithms. The class JOIN_TAB_SCAN_MRR provides the iterator methods
+    for BKA/BKAH join algorithms.
+    When the function looks for records from the join buffer that would
+    match a record from join_tab it iterates either over all records in
+    the buffer or only over selected records. If BNL join operation is
+    performed all records are checked for the match. If BNLH or BKAH
+    algorithm is employed to join join_tab then the function looks only
+    through the records with the same join key as the record from join_tab.
+    With the BKA join algorithm only one record from the join buffer is checked
+    for a match for any record from join_tab. To iterate over the candidates
+    for a match the virtual function get_next_candidate_for_match is used,
+    while the virtual function prepare_look_for_matches is called to prepare
+    for such iteration proccess.     
+
+  NOTES
+    The function produces all matching extensions for the records in the 
+    join buffer following the path of the employed blocked algorithm. 
+    When an outer join operation is performed all unmatched records from
+    the join buffer must be extended by null values. The function 
+    'join_null_complements' serves this purpose.  
+      
+  RETURN VALUE
+    return one of enum_nested_loop_state
+*/ 
+
+enum_nested_loop_state JOIN_CACHE::join_matching_records(bool skip_last)
+{
+  int error;
+  enum_nested_loop_state rc= NESTED_LOOP_OK;
+  join_tab->table->null_row= 0;
+  bool check_only_first_match= join_tab->check_only_first_match();
+  bool outer_join_first_inner= join_tab->is_first_inner_for_outer_join();
+  DBUG_ENTER("JOIN_CACHE::join_matching_records");
+
+  /* Return at once if there are no records in the join buffer */
+  if (!records)     
+    DBUG_RETURN(NESTED_LOOP_OK);
+ 
+  /* 
+    When joining we read records from the join buffer back into record buffers.
+    If matches for the last partial join record are found through a call to
+    the sub_select function then this partial join record must be saved in the
+    join buffer in order to be restored just before the sub_select call.
+  */             
+  if (skip_last)     
+    put_record();     
+ 
+  if (join_tab->use_quick == 2 && join_tab->select->quick)
+  { 
+    /* A dynamic range access was used last. Clean up after it */
+    delete join_tab->select->quick;
+    join_tab->select->quick= 0;
+  }
+
+  if ((rc= join_tab_execution_startup(join_tab)) < 0)
+    goto finish2;
+
+  /* Prepare to retrieve all records of the joined table */
+  if ((error= join_tab_scan->open()))
+  { 
+    /* 
+      TODO: if we get here, we will assert in net_send_statement(). Add test
+      coverage and fix.
+    */
+    goto finish;
+  }
+  
+  while (!(error= join_tab_scan->next()))   
+  {
+    if (join->thd->killed)
+    {
+      /* The user has aborted the execution of the query */
+      join->thd->send_kill_message();
+      rc= NESTED_LOOP_KILLED;
+      goto finish; 
+    }
+
+    if (join_tab->keep_current_rowid)
+      join_tab->table->file->position(join_tab->table->record[0]);
+    
+    /* Prepare to read matching candidates from the join buffer */
+    if (prepare_look_for_matches(skip_last))
+      continue;
+
+    uchar *rec_ptr;
+    /* Read each possible candidate from the buffer and look for matches */
+    while ((rec_ptr= get_next_candidate_for_match()))
+    { 
+      /* 
+        If only the first match is needed, and, it has been already found for
+        the next record read from the join buffer, then the record is skipped.
+        Also those records that must be null complemented are not considered
+        as candidates for matches.
+      */
+      if ((!check_only_first_match && !outer_join_first_inner) ||
+          !skip_next_candidate_for_match(rec_ptr))
+      {
+	read_next_candidate_for_match(rec_ptr);
+        rc= generate_full_extensions(rec_ptr);
+        if (rc != NESTED_LOOP_OK && rc != NESTED_LOOP_NO_MORE_ROWS)
+	  goto finish;   
+      }
+    }
+  }
+
+finish: 
+  if (error)                 
+    rc= error < 0 ? NESTED_LOOP_NO_MORE_ROWS: NESTED_LOOP_ERROR;
+finish2:    
+  join_tab_scan->close();
+  DBUG_RETURN(rc);
+}
+
+
+/*
+  Set match flag for a record in join buffer if it has not been set yet    
+
+  SYNOPSIS
+    set_match_flag_if_none()
+      first_inner     the join table to which this flag is attached to
+      rec_ptr         pointer to the record in the join buffer 
+
+  DESCRIPTION
+    If the records of the table are accumulated in a join buffer the function
+    sets the match flag for the record in the buffer that is referred to by
+    the record from this cache positioned at 'rec_ptr'. 
+    The function also sets the match flag 'found' of the table first inner
+    if it has not been set before. 
+
+  NOTES
+    The function assumes that the match flag for any record in any cache
+    is placed in the first byte occupied by the record fields. 
+
+  RETURN VALUE
+    TRUE   the match flag is set by this call for the first time
+    FALSE  the match flag has been set before this call
+*/ 
+
+bool JOIN_CACHE::set_match_flag_if_none(JOIN_TAB *first_inner,
+                                        uchar *rec_ptr)
+{
+  if (!first_inner->cache)
+  {
+    /* 
+      Records of the first inner table to which the flag is attached to
+      are not accumulated in a join buffer.
+    */
+    if (first_inner->found)
+      return FALSE;
+    else
+    {
+      first_inner->found= 1;
+      return TRUE;
+    }
+  }
+  JOIN_CACHE *cache= this;
+  while (cache->join_tab != first_inner)
+  {
+    cache= cache->prev_cache;
+    DBUG_ASSERT(cache);
+    rec_ptr= cache->get_rec_ref(rec_ptr);
+  } 
+  if ((Match_flag) rec_ptr[0] != MATCH_FOUND)
+  {
+    rec_ptr[0]= MATCH_FOUND;
+    first_inner->found= 1;
+    return TRUE;  
+  }
+  return FALSE;
+}
+
+
+/*
+  Generate all full extensions for a partial join record in the buffer    
+
+  SYNOPSIS
+    generate_full_extensions()
+      rec_ptr     pointer to the record from join buffer to generate extensions 
+
+  DESCRIPTION
+    The function first checks whether the current record of 'join_tab' matches
+    the partial join record from join buffer located at 'rec_ptr'. If it is the
+    case the function calls the join_tab->next_select method to generate
+    all full extension for this partial join match.
+      
+  RETURN VALUE
+    return one of enum_nested_loop_state.
+*/ 
+
+enum_nested_loop_state JOIN_CACHE::generate_full_extensions(uchar *rec_ptr)
+{
+  enum_nested_loop_state rc= NESTED_LOOP_OK;
+  DBUG_ENTER("JOIN_CACHE::generate_full_extensions");
+  
+  /*
+    Check whether the extended partial join record meets
+    the pushdown conditions. 
+  */
+  if (check_match(rec_ptr))
+  {    
+    int res= 0;
+
+    if (!join_tab->check_weed_out_table || 
+        !(res= join_tab->check_weed_out_table->sj_weedout_check_row(join->thd)))
+    {
+      set_curr_rec_link(rec_ptr);
+      rc= (join_tab->next_select)(join, join_tab+1, 0);
+      if (rc != NESTED_LOOP_OK && rc != NESTED_LOOP_NO_MORE_ROWS)
+      {
+        reset(TRUE);
+        DBUG_RETURN(rc);
+      }
+    }
+    if (res == -1)
+    {
+      rc= NESTED_LOOP_ERROR;
+      DBUG_RETURN(rc);
+    }
+  }
+  else if (join->thd->is_error())
+    rc= NESTED_LOOP_ERROR;
+  DBUG_RETURN(rc);
+}
+
+
+/*
+  Check matching to a partial join record from the join buffer    
+
+  SYNOPSIS
+    check_match()
+      rec_ptr     pointer to the record from join buffer to check matching to 
+
+  DESCRIPTION
+    The function checks whether the current record of 'join_tab' matches
+    the partial join record from join buffer located at 'rec_ptr'. If this is
+    the case and 'join_tab' is the last inner table of a semi-join or an outer
+    join the function turns on the match flag for the 'rec_ptr' record unless
+    it has been already set.
+
+  NOTES
+    Setting the match flag on can trigger re-evaluation of pushdown conditions
+    for the record when join_tab is the last inner table of an outer join.
+      
+  RETURN VALUE
+    TRUE   there is a match
+    FALSE  there is no match
+           In this case the caller must also check thd->is_error() to see
+           if there was a fatal error for the query.
+*/ 
+
+inline bool JOIN_CACHE::check_match(uchar *rec_ptr)
+{
+  /* Check whether pushdown conditions are satisfied */
+  DBUG_ENTER("JOIN_CACHE:check_match");
+
+  if (join_tab->select && join_tab->select->skip_record(join->thd) <= 0)
+    DBUG_RETURN(FALSE);
+
+  if (!join_tab->is_last_inner_table())
+    DBUG_RETURN(TRUE);
+
+  /* 
+     This is the last inner table of an outer join,
+     and maybe of other embedding outer joins, or
+     this is the last inner table of a semi-join.
+  */
+  JOIN_TAB *first_inner= join_tab->get_first_inner_table();
+  do
+  {
+    set_match_flag_if_none(first_inner, rec_ptr);
+    if (first_inner->check_only_first_match() &&
+        !join_tab->first_inner)
+      DBUG_RETURN(TRUE);
+    /* 
+      This is the first match for the outer table row.
+      The function set_match_flag_if_none has turned the flag
+      first_inner->found on. The pushdown predicates for
+      inner tables must be re-evaluated with this flag on.
+      Note that, if first_inner is the first inner table 
+      of a semi-join, but is not an inner table of an outer join
+      such that 'not exists' optimization can  be applied to it, 
+      the re-evaluation of the pushdown predicates is not needed.
+    */      
+    for (JOIN_TAB *tab= first_inner; tab <= join_tab; tab++)
+    {
+      if (tab->select && tab->select->skip_record(join->thd) <= 0)
+        DBUG_RETURN(FALSE);
+    }
+  }
+  while ((first_inner= first_inner->first_upper) &&
+         first_inner->last_inner == join_tab);
+    DBUG_RETURN(TRUE);
+} 
+
+
+/*
+  Add null complements for unmatched outer records from join buffer    
+
+  SYNOPSIS
+    join_null_complements()
+      skip_last    do not add null complements for the last record 
+
+  DESCRIPTION
+    This function is called only for inner tables of outer joins.
+    The function retrieves all rows from the join buffer and adds null
+    complements for those of them that do not have matches for outer
+    table records.
+    If the 'join_tab' is the last inner table of the embedding outer 
+    join and the null complemented record satisfies the outer join
+    condition then the the corresponding match flag is turned on
+    unless it has been set earlier. This setting may trigger
+    re-evaluation of pushdown conditions for the record. 
+
+  NOTES
+    The same implementation of the virtual method join_null_complements
+    is used for BNL/BNLH/BKA/BKA join algorthm.
+      
+  RETURN VALUE
+    return one of enum_nested_loop_state.
+*/ 
+
+enum_nested_loop_state JOIN_CACHE::join_null_complements(bool skip_last)
+{
+  ulonglong cnt; 
+  enum_nested_loop_state rc= NESTED_LOOP_OK;
+  bool is_first_inner= join_tab == join_tab->first_unmatched;
+  DBUG_ENTER("JOIN_CACHE::join_null_complements");
+ 
+  /* Return at once if there are no records in the join buffer */
+  if (!records)
+    DBUG_RETURN(NESTED_LOOP_OK);
+  
+  cnt= records - (is_key_access() ? 0 : test(skip_last));
+
+  /* This function may be called only for inner tables of outer joins */ 
+  DBUG_ASSERT(join_tab->first_inner);
+
+  for ( ; cnt; cnt--)
+  {
+    if (join->thd->killed)
+    {
+      /* The user has aborted the execution of the query */
+      join->thd->send_kill_message();
+      rc= NESTED_LOOP_KILLED;
+      goto finish;
+    }
+    /* Just skip the whole record if a match for it has been already found */
+    if (!is_first_inner || !skip_if_matched())
+    {
+      get_record();
+      /* The outer row is complemented by nulls for each inner table */
+      restore_record(join_tab->table, s->default_values);
+      mark_as_null_row(join_tab->table);  
+      rc= generate_full_extensions(get_curr_rec());
+      if (rc != NESTED_LOOP_OK && rc != NESTED_LOOP_NO_MORE_ROWS)
+        goto finish;
+    }
+  }
+
+finish:
+  DBUG_RETURN(rc);
+}
+
+
+/*
+  Add a comment on the join algorithm employed by the join cache 
+
+  SYNOPSIS
+    print_explain_comment()
+      str  string to add the comment on the employed join algorithm to
+
+  DESCRIPTION
+    This function adds info on the type of the used join buffer (flat or
+    incremental) and on the type of the the employed join algorithm (BNL,
+    BNLH, BKA or BKAH) to the the end of the sring str.
+
+  RETURN VALUE
+    none
+*/ 
+
+void JOIN_CACHE::print_explain_comment(String *str)
+{
+  str->append(STRING_WITH_LEN(" ("));
+  const char *buffer_type= prev_cache ? "incremental" : "flat";
+  str->append(buffer_type);
+  str->append(STRING_WITH_LEN(", "));
+  
+  const char *join_alg="";
+  switch (get_join_alg()) {
+  case BNL_JOIN_ALG:
+    join_alg= "BNL";
+    break;
+  case BNLH_JOIN_ALG:
+    join_alg= "BNLH";
+    break;
+  case BKA_JOIN_ALG:
+    join_alg= "BKA";
+    break;
+  case BKAH_JOIN_ALG:
+    join_alg= "BKAH";
+    break;
+  default:
+    DBUG_ASSERT(0);
+  }
+
+  str->append(join_alg);
+  str->append(STRING_WITH_LEN(" join"));
+  str->append(STRING_WITH_LEN(")"));
+}
+
+/**
+  get thread handle.
+*/
+
+THD *JOIN_CACHE::thd()
+{
+  return join->thd;
+}
+
+
+static void add_mrr_explain_info(String *str, uint mrr_mode, handler *file)
+{
+  char mrr_str_buf[128]={0};
+  int len;
+  len= file->multi_range_read_explain_info(mrr_mode, mrr_str_buf,
+                                           sizeof(mrr_str_buf));
+  if (len > 0)
+  {
+    str->append(STRING_WITH_LEN("; "));
+    str->append(mrr_str_buf, len);
+  }
+}
+
+
+void JOIN_CACHE_BKA::print_explain_comment(String *str)
+{
+  JOIN_CACHE::print_explain_comment(str); 
+  add_mrr_explain_info(str, mrr_mode, join_tab->table->file);
+}
+
+
+void JOIN_CACHE_BKAH::print_explain_comment(String *str)
+{
+  JOIN_CACHE::print_explain_comment(str); 
+  add_mrr_explain_info(str, mrr_mode, join_tab->table->file);
+}
+
+
+/* 
+  Initialize a hashed join cache       
+
+  SYNOPSIS
+    init()
+
+  DESCRIPTION
+    The function initializes the cache structure with a hash table in it.
+    The hash table will be used to store key values for the records from
+    the join buffer.
+    The function allocates memory for the join buffer and for descriptors of
+    the record fields stored in the buffer.
+    The function also initializes a hash table for record keys within the join
+    buffer space.
+
+  NOTES VALUE
+    The function is supposed to be called by the init methods of the classes 
+    derived from JOIN_CACHE_HASHED.
+  
+  RETURN VALUE
+    0   initialization with buffer allocations has been succeeded
+    1   otherwise
+*/
+
+int JOIN_CACHE_HASHED::init()
+{
+  int rc= 0;
+  TABLE_REF *ref= &join_tab->ref;
+
+  DBUG_ENTER("JOIN_CACHE_HASHED::init");
+
+  hash_table= 0;
+  key_entries= 0;
+
+  key_length= ref->key_length;
+
+  if ((rc= JOIN_CACHE::init()))
+    DBUG_RETURN (rc);
+
+  if (!(key_buff= (uchar*) sql_alloc(key_length)))
+    DBUG_RETURN(1);
+
+  /* Take into account a reference to the next record in the key chain */
+  pack_length+= get_size_of_rec_offset(); 
+  pack_length_with_blob_ptrs+= get_size_of_rec_offset();
+
+  ref_key_info= join_tab->get_keyinfo_by_key_no(join_tab->ref.key);
+  ref_used_key_parts= join_tab->ref.key_parts;
+
+  hash_func= &JOIN_CACHE_HASHED::get_hash_idx_simple;
+  hash_cmp_func= &JOIN_CACHE_HASHED::equal_keys_simple;
+
+  KEY_PART_INFO *key_part= ref_key_info->key_part;
+  KEY_PART_INFO *key_part_end= key_part+ref_used_key_parts;
+  for ( ; key_part < key_part_end; key_part++)
+  {
+    if (!key_part->field->eq_cmp_as_binary())
+    {
+      hash_func= &JOIN_CACHE_HASHED::get_hash_idx_complex;
+      hash_cmp_func= &JOIN_CACHE_HASHED::equal_keys_complex;
+      break;
+    }
+  }
+      
+  init_hash_table();
+
+  rec_fields_offset= get_size_of_rec_offset()+get_size_of_rec_length()+
+                     (prev_cache ? prev_cache->get_size_of_rec_offset() : 0);
+
+  data_fields_offset= 0;
+  if (use_emb_key)
+  {
+    CACHE_FIELD *copy= field_descr;
+    CACHE_FIELD *copy_end= copy+flag_fields;
+    for ( ; copy < copy_end; copy++)
+      data_fields_offset+= copy->length;
+  } 
+
+  DBUG_RETURN(rc);
+}
+
+
+/* 
+  Initialize the hash table of a hashed join cache 
+
+  SYNOPSIS
+    init_hash_table()
+
+  DESCRIPTION
+    The function estimates the number of hash table entries in the hash
+    table to be used and initializes this hash table within the join buffer
+    space.
+
+  RETURN VALUE
+    Currently the function always returns 0;
+*/
+
+int JOIN_CACHE_HASHED::init_hash_table()
+{
+  hash_table= 0;
+  key_entries= 0;
+
+  /* Calculate the minimal possible value of size_of_key_ofs greater than 1 */
+  uint max_size_of_key_ofs= max(2, get_size_of_rec_offset());  
+  for (size_of_key_ofs= 2;
+       size_of_key_ofs <= max_size_of_key_ofs;
+       size_of_key_ofs+= 2)
+  {    
+    key_entry_length= get_size_of_rec_offset() + // key chain header
+                      size_of_key_ofs +          // reference to the next key 
+                      (use_emb_key ?  get_size_of_rec_offset() : key_length);
+
+    ulong space_per_rec= avg_record_length +
+                         avg_aux_buffer_incr +
+                         key_entry_length+size_of_key_ofs;
+    uint n= buff_size / space_per_rec;
+
+    /*
+      TODO: Make a better estimate for this upper bound of
+            the number of records in in the join buffer.
+    */
+    uint max_n= buff_size / (pack_length-length+
+                             key_entry_length+size_of_key_ofs);
+
+    hash_entries= (uint) (n / 0.7);
+    set_if_bigger(hash_entries, 1);
+    
+    if (offset_size(max_n*key_entry_length) <=
+        size_of_key_ofs)
+      break;
+  }
+   
+  /* Initialize the hash table */ 
+  hash_table= buff + (buff_size-hash_entries*size_of_key_ofs);
+  cleanup_hash_table();
+  curr_key_entry= hash_table;
+
+  return 0;
+}
+
+
+/*
+  Reallocate the join buffer of a hashed join cache
+ 
+  SYNOPSIS
+    realloc_buffer()
+
+  DESCRITION
+    The function reallocates the join buffer of the hashed join cache.
+    After this it initializes a hash table within the buffer space and
+    resets the join cache for writing.
+
+  NOTES
+    The function assumes that buff_size contains the new value for the join
+    buffer size.  
+
+  RETURN VALUE
+    0   if the buffer has been successfully reallocated
+    1   otherwise
+*/
+
+int JOIN_CACHE_HASHED::realloc_buffer()
+{
+  int rc;
+  free();
+  rc= test(!(buff= (uchar*) my_malloc(buff_size, MYF(0))));
+  init_hash_table();
+  reset(TRUE);
+  return rc;   	
+}
+
+
+/*
+  Get maximum size of the additional space per record used for record keys
+
+  SYNOPSYS
+    get_max_key_addon_space_per_record()
+  
+  DESCRIPTION
+    The function returns the size of the space occupied by one key entry
+    and one hash table entry.
+
+  RETURN VALUE
+    maximum size of the additional space per record that is used to store
+    record keys in the hash table
+*/
+
+uint JOIN_CACHE_HASHED::get_max_key_addon_space_per_record()
+{
+  ulong len;
+  TABLE_REF *ref= &join_tab->ref;
+  /* 
+    The total number of hash entries in the hash tables is bounded by
+    ceiling(N/0.7) where N is the maximum number of records in the buffer.
+    That's why the multiplier 2 is used in the formula below. 
+  */ 
+  len= (use_emb_key ?  get_size_of_rec_offset() : ref->key_length) +
+        size_of_rec_ofs +    // size of the key chain header
+        size_of_rec_ofs +    // >= size of the reference to the next key 
+        2*size_of_rec_ofs;   // >= 2*( size of hash table entry)
+  return len; 
+}    
+
+
+/* 
+  Reset the buffer of a hashed join cache for reading/writing
+
+  SYNOPSIS
+    reset()
+      for_writing  if it's TRUE the function reset the buffer for writing
+
+  DESCRIPTION
+    This implementation of the virtual function reset() resets the join buffer
+    of the JOIN_CACHE_HASHED class for reading or writing.
+    Additionally to what the default implementation does this function
+    cleans up the hash table allocated within the buffer.  
+    
+  RETURN VALUE
+    none
+*/
+ 
+void JOIN_CACHE_HASHED::reset(bool for_writing)
+{
+  this->JOIN_CACHE::reset(for_writing);
+  if (for_writing && hash_table)
+    cleanup_hash_table();
+  curr_key_entry= hash_table;
+}
+
+
+/* 
+  Add a record into the buffer of a hashed join cache
+
+  SYNOPSIS
+    put_record()
+
+  DESCRIPTION
+    This implementation of the virtual function put_record writes the next
+    matching record into the join buffer of the JOIN_CACHE_HASHED class.
+    Additionally to what the default implementation does this function
+    performs the following. 
+    It extracts from the record the key value used in lookups for matching
+    records and searches for this key in the hash tables from the join cache.
+    If it finds the key in the hash table it joins the record to the chain
+    of records with this key. If the key is not found in the hash table the
+    key is placed into it and a chain containing only the newly added record 
+    is attached to the key entry. The key value is either placed in the hash 
+    element added for the key or, if the use_emb_key flag is set, remains in
+    the record from the partial join.
+    If the match flag field of a record contains MATCH_IMPOSSIBLE the key is
+    not created for this record. 
+    
+  RETURN VALUE
+    TRUE    if it has been decided that it should be the last record
+            in the join buffer,
+    FALSE   otherwise
+*/
+
+bool JOIN_CACHE_HASHED::put_record()
+{
+  bool is_full;
+  uchar *key;
+  uint key_len= key_length;
+  uchar *key_ref_ptr;
+  uchar *link= 0;
+  TABLE_REF *ref= &join_tab->ref;
+  uchar *next_ref_ptr= pos;
+
+  pos+= get_size_of_rec_offset();
+  /* Write the record into the join buffer */  
+  if (prev_cache)
+    link= prev_cache->get_curr_rec_link();
+  write_record_data(link, &is_full);
+
+  if (last_written_is_null_compl)
+    return is_full;    
+
+  if (use_emb_key)
+    key= get_curr_emb_key();
+  else
+  {
+    /* Build the key over the fields read into the record buffers */ 
+    cp_buffer_from_ref(join->thd, join_tab->table, ref);
+    key= ref->key_buff;
+  }
+
+  /* Look for the key in the hash table */
+  if (key_search(key, key_len, &key_ref_ptr))
+  {
+    uchar *last_next_ref_ptr;
+    /* 
+      The key is found in the hash table. 
+      Add the record to the circular list of the records attached to this key.
+      Below 'rec' is the record to be added into the record chain for the found
+      key, 'key_ref' points to a flatten representation of the st_key_entry 
+      structure that contains the key and the head of the record chain.
+    */
+    last_next_ref_ptr= get_next_rec_ref(key_ref_ptr+get_size_of_key_offset());
+    /* rec->next_rec= key_entry->last_rec->next_rec */
+    memcpy(next_ref_ptr, last_next_ref_ptr, get_size_of_rec_offset());
+    /* key_entry->last_rec->next_rec= rec */ 
+    store_next_rec_ref(last_next_ref_ptr, next_ref_ptr);
+    /* key_entry->last_rec= rec */
+    store_next_rec_ref(key_ref_ptr+get_size_of_key_offset(), next_ref_ptr);
+  }
+  else
+  {
+    /* 
+      The key is not found in the hash table.
+      Put the key into the join buffer linking it with the keys for the
+      corresponding hash entry. Create a circular list with one element
+      referencing the record and attach the list to the key in the buffer.
+    */
+    uchar *cp= last_key_entry;
+    cp-= get_size_of_rec_offset()+get_size_of_key_offset();
+    store_next_key_ref(key_ref_ptr, cp);
+    store_null_key_ref(cp);
+    store_next_rec_ref(next_ref_ptr, next_ref_ptr);
+    store_next_rec_ref(cp+get_size_of_key_offset(), next_ref_ptr);
+    if (use_emb_key)
+    {
+      cp-= get_size_of_rec_offset();
+      store_emb_key_ref(cp, key);
+    }
+    else
+    {
+      cp-= key_len;
+      memcpy(cp, key, key_len);
+    }
+    last_key_entry= cp;
+    DBUG_ASSERT(last_key_entry >= end_pos);
+    /* Increment the counter of key_entries in the hash table */ 
+    key_entries++;
+  }  
+  return is_full;
+}
+
+
+/*
+  Read the next record from the buffer of a hashed join cache
+
+  SYNOPSIS
+    get_record()
+
+  DESCRIPTION
+    Additionally to what the default implementation of the virtual 
+    function get_record does this implementation skips the link element
+    used to connect the records with the same key into a chain. 
+
+  RETURN VALUE
+    TRUE    there are no more records to read from the join buffer
+    FALSE   otherwise
+*/
+
+bool JOIN_CACHE_HASHED::get_record()
+{ 
+  pos+= get_size_of_rec_offset();
+  return this->JOIN_CACHE::get_record();
+}
+
+
+/* 
+  Skip record from a hashed join buffer if its match flag is set to MATCH_FOUND
+
+  SYNOPSIS
+    skip_if_matched()
+
+  DESCRIPTION
+    This implementation of the virtual function skip_if_matched does
+    the same as the default implementation does, but it takes into account
+    the link element used to connect the records with the same key into a chain. 
+
+  RETURN VALUE
+    TRUE    the match flag is MATCH_FOUND  and the record has been skipped
+    FALSE   otherwise 
+*/
+
+bool JOIN_CACHE_HASHED::skip_if_matched()
+{
+  uchar *save_pos= pos;
+  pos+= get_size_of_rec_offset();
+  if (!this->JOIN_CACHE::skip_if_matched())
+  {
+    pos= save_pos;
+    return FALSE;
+  }
+  return TRUE;
+}
+
+
+/* 
+  Skip record from a hashed join buffer if its match flag dictates to do so
+
+  SYNOPSIS
+    skip_if_uneeded_match()
+
+  DESCRIPTION
+    This implementation of the virtual function skip_if_not_needed_match does
+    the same as the default implementation does, but it takes into account
+    the link element used to connect the records with the same key into a chain. 
+
+  RETURN VALUE
+    TRUE    the match flag dictates to skip the record
+    FALSE   the match flag is off 
+*/
+
+bool JOIN_CACHE_HASHED::skip_if_not_needed_match()
+{
+  uchar *save_pos= pos;
+  pos+= get_size_of_rec_offset();
+  if (!this->JOIN_CACHE::skip_if_not_needed_match())
+  {
+    pos= save_pos;
+    return FALSE;
+  }
+  return TRUE;
+}
+
+
+/* 
+  Search for a key in the hash table of the join buffer
+
+  SYNOPSIS
+    key_search()
+      key             pointer to the key value
+      key_len         key value length
+      key_ref_ptr OUT position of the reference to the next key from 
+                      the hash element for the found key , or
+                      a position where the reference to the the hash 
+                      element for the key is to be added in the
+                      case when the key has not been found
+      
+  DESCRIPTION
+    The function looks for a key in the hash table of the join buffer.
+    If the key is found the functionreturns the position of the reference
+    to the next key from  to the hash element for the given key. 
+    Otherwise the function returns the position where the reference to the
+    newly created hash element for the given key is to be added.  
+
+  RETURN VALUE
+    TRUE    the key is found in the hash table
+    FALSE   otherwise
+*/
+
+bool JOIN_CACHE_HASHED::key_search(uchar *key, uint key_len,
+                                   uchar **key_ref_ptr) 
+{
+  bool is_found= FALSE;
+  uint idx= (this->*hash_func)(key, key_length);
+  uchar *ref_ptr= hash_table+size_of_key_ofs*idx;
+  while (!is_null_key_ref(ref_ptr))
+  {
+    uchar *next_key;
+    ref_ptr= get_next_key_ref(ref_ptr);
+    next_key= use_emb_key ? get_emb_key(ref_ptr-get_size_of_rec_offset()) :
+                            ref_ptr-key_length;
+
+    if ((this->*hash_cmp_func)(next_key, key, key_len))
+    {
+      is_found= TRUE;
+      break;
+    }
+  }
+  *key_ref_ptr= ref_ptr;
+  return is_found;
+} 
+
+
+/* 
+  Hash function that considers a key in the hash table as byte array
+
+  SYNOPSIS
+    get_hash_idx_simple()
+      key             pointer to the key value
+      key_len         key value length
+      
+  DESCRIPTION
+    The function calculates an index of the hash entry in the hash table
+    of the join buffer for the given key. It considers the key just as
+    a sequence of bytes of the length key_len.
+
+  RETURN VALUE
+    the calculated index of the hash entry for the given key  
+*/
+
+inline
+uint JOIN_CACHE_HASHED::get_hash_idx_simple(uchar* key, uint key_len)
+{
+  ulong nr= 1;
+  ulong nr2= 4;
+  uchar *pos= key;
+  uchar *end= key+key_len;
+  for (; pos < end ; pos++)
+  {
+    nr^= (ulong) ((((uint) nr & 63)+nr2)*((uint) *pos))+ (nr << 8);
+    nr2+= 3;
+  }
+  return nr % hash_entries;
+}
+
+
+/* 
+  Hash function that takes into account collations of the components of the key  
+
+  SYNOPSIS
+    get_hash_idx_complex()
+      key             pointer to the key value
+      key_len         key value length
+      
+  DESCRIPTION
+    The function calculates an index of the hash entry in the hash table
+    of the join buffer for the given key. It takes into account that the
+    components of the key may be of a varchar type with different collations.
+    The function guarantees that the same hash value for any two equal
+    keys that may differ as byte sequences.
+    The function takes the info about the components of the key, their
+    types and used collations from the class member ref_key_info containing
+    a pointer to the descriptor of the index that can be used for the join
+    operation.
+
+  RETURN VALUE
+    the calculated index of the hash entry for the given key  
+*/
+
+inline
+uint JOIN_CACHE_HASHED::get_hash_idx_complex(uchar *key, uint key_len)
+{
+  return 
+    (uint) (key_hashnr(ref_key_info, ref_used_key_parts, key) % hash_entries);
+}
+
+
+/* 
+  Compare two key entries in the hash table as sequence of bytes
+
+  SYNOPSIS
+    equal_keys_simple()
+      key1            pointer to the first key entry
+      key2            pointer to the second key entry 
+      key_len         the length of the key values
+      
+  DESCRIPTION
+    The function compares two key entries in the hash table key1 and key2
+    as two sequences bytes of the length key_len
+
+  RETURN VALUE
+    TRUE       key1 coincides with key2
+    FALSE      otherwise
+*/
+
+inline
+bool JOIN_CACHE_HASHED::equal_keys_simple(uchar *key1, uchar *key2,
+                                          uint key_len)
+{
+  return memcmp(key1, key2, key_len) == 0;
+}
+
+
+/* 
+  Compare two key entries taking into account the used collation
+
+  SYNOPSIS
+    equal_keys_complex()
+      key1            pointer to the first key entry
+      key2            pointer to the second key entry 
+      key_len         the length of the key values
+      
+  DESCRIPTION
+    The function checks whether two key entries in the hash table
+    key1 and key2 are equal as, possibly, compound keys of a certain
+    structure whose components may be of a varchar type and may
+    employ different collations.
+    The descriptor of the key structure is taken from the class
+    member ref_key_info.
+
+  RETURN VALUE
+    TRUE       key1 is equal tokey2
+    FALSE      otherwise
+*/
+
+inline
+bool JOIN_CACHE_HASHED::equal_keys_complex(uchar *key1, uchar *key2,
+                                          uint key_len)
+{
+  return key_buf_cmp(ref_key_info, ref_used_key_parts, key1, key2) == 0;
+}
+
+
+/* 
+  Clean up the hash table of the join buffer
+
+  SYNOPSIS
+    cleanup_hash_table()
+      key             pointer to the key value
+      key_len         key value length
+      
+  DESCRIPTION
+    The function cleans up the hash table in the join buffer removing all
+    hash elements from the table. 
+
+  RETURN VALUE
+    none  
+*/
+
+void JOIN_CACHE_HASHED:: cleanup_hash_table()
+{
+  last_key_entry= hash_table;
+  bzero(hash_table, (buff+buff_size)-hash_table);
+  key_entries= 0;
+}
+
+
+/*
+  Check whether all records in a key chain have their match flags set on   
+
+  SYNOPSIS
+    check_all_match_flags_for_key()
+      key_chain_ptr     
+
+  DESCRIPTION
+    This function retrieves records in the given circular chain and checks
+    whether their match flags are set on. The parameter key_chain_ptr shall
+    point to the position in the join buffer storing the reference to the
+    last element of this chain. 
+            
+  RETURN VALUE
+    TRUE   if each retrieved record has its match flag set to MATCH_FOUND
+    FALSE  otherwise 
+*/
+
+bool JOIN_CACHE_HASHED::check_all_match_flags_for_key(uchar *key_chain_ptr)
+{
+  uchar *last_rec_ref_ptr= get_next_rec_ref(key_chain_ptr);
+  uchar *next_rec_ref_ptr= last_rec_ref_ptr;
+  do
+  {
+    next_rec_ref_ptr= get_next_rec_ref(next_rec_ref_ptr);
+    uchar *rec_ptr= next_rec_ref_ptr+rec_fields_offset;
+    if (get_match_flag_by_pos(rec_ptr) != MATCH_FOUND)
+      return FALSE;
+  }
+  while (next_rec_ref_ptr != last_rec_ref_ptr);
+  return TRUE;
+}
+  
+
+/* 
+  Get the next key built for the records from the buffer of a hashed join cache
+
+  SYNOPSIS
+    get_next_key()
+      key    pointer to the buffer where the key value is to be placed
+
+  DESCRIPTION
+    The function reads the next key value stored in the hash table of the
+    join buffer. Depending on the value of the use_emb_key flag of the
+    join cache the value is read either from the table itself or from
+    the record field where it occurs. 
+
+  RETURN VALUE
+    length of the key value - if the starting value of 'cur_key_entry' refers
+    to the position after that referred by the the value of 'last_key_entry',    
+    0 - otherwise.     
+*/
+
+uint JOIN_CACHE_HASHED::get_next_key(uchar ** key)
+{  
+  if (curr_key_entry == last_key_entry)
+    return 0;
+
+  curr_key_entry-= key_entry_length;
+
+  *key = use_emb_key ? get_emb_key(curr_key_entry) : curr_key_entry;
+
+  DBUG_ASSERT(*key >= buff && *key < hash_table);
+
+  return key_length;
+}
+
+
+/* 
+  Initiate an iteration process over records in the joined table
+
+  SYNOPSIS
+    open()
+
+  DESCRIPTION
+    The function initiates the process of iteration over records from the 
+    joined table recurrently performed by the BNL/BKLH join algorithm.  
+
+  RETURN VALUE   
+    0            the initiation is a success 
+    error code   otherwise     
+*/
+
+int JOIN_TAB_SCAN::open()
+{
+  save_or_restore_used_tabs(join_tab, FALSE);
+  is_first_record= TRUE;
+  return join_init_read_record(join_tab);
+}
+
+
+/* 
+  Read the next record that can match while scanning the joined table
+
+  SYNOPSIS
+    next()
+
+  DESCRIPTION
+    The function reads the next record from the joined table that can
+    match some records in the buffer of the join cache 'cache'. To do
+    this the function calls the function that scans table records and
+    looks for the next one that meets the condition pushed to the
+    joined table join_tab.
+
+  NOTES
+    The function catches the signal that kills the query.
+
+  RETURN VALUE   
+    0            the next record exists and has been successfully read 
+    error code   otherwise     
+*/
+
+int JOIN_TAB_SCAN::next()
+{
+  int err= 0;
+  int skip_rc;
+  READ_RECORD *info= &join_tab->read_record;
+  SQL_SELECT *select= join_tab->cache_select;
+  if (is_first_record)
+    is_first_record= FALSE;
+  else
+    err= info->read_record(info);
+  if (!err)
+    update_virtual_fields(join->thd, join_tab->table);
+  while (!err && select && (skip_rc= select->skip_record(join->thd)) <= 0)
+  {
+    if (join->thd->killed || skip_rc < 0) 
+      return 1;
+    /* 
+      Move to the next record if the last retrieved record does not
+      meet the condition pushed to the table join_tab.
+    */
+    err= info->read_record(info);
+    if (!err)
+      update_virtual_fields(join->thd, join_tab->table);
+  } 
+  return err; 
+}
+
+
+/*
+  Walk back in join order from join_tab until we encounter a join tab with
+  tab->cache!=NULL, and save/restore tab->table->status along the way.
+
+  @param save TRUE   save 
+              FALSE  restore
+*/
+
+static void save_or_restore_used_tabs(JOIN_TAB *join_tab, bool save)
+{
+  JOIN_TAB *first= join_tab->bush_root_tab?
+                     join_tab->bush_root_tab->bush_children->start :
+                     join_tab->join->join_tab + join_tab->join->const_tables;
+
+  for (JOIN_TAB *tab= join_tab-1; tab != first && !tab->cache; tab--)
+  {
+    if (tab->bush_children)
+    {
+      for (JOIN_TAB *child= tab->bush_children->start;
+           child != tab->bush_children->end;
+           child++)
+      {
+        if (save)
+          child->table->status= child->status;
+        else
+        {
+          tab->status= tab->table->status;
+          tab->table->status= 0;
+        }
+      }
+    }
+
+    if (save)
+      tab->table->status= tab->status;
+    else
+    {
+      tab->status= tab->table->status;
+      tab->table->status= 0;
+    }
+  }
+}
+
+
+/* 
+  Perform finalizing actions for a scan over the table records
+
+  SYNOPSIS
+    close()
+
+  DESCRIPTION
+    The function performs the necessary restoring actions after
+    the table scan over the joined table has been finished.
+
+  RETURN VALUE   
+    none      
+*/
+
+void JOIN_TAB_SCAN::close()
+{
+  save_or_restore_used_tabs(join_tab, TRUE);
+}
+
+
+/*
+  Prepare to iterate over the BNL join cache buffer to look for matches 
+
+  SYNOPSIS
+    prepare_look_for_matches()
+      skip_last   <-> ignore the last record in the buffer
+
+  DESCRIPTION
+    The function prepares the join cache for an iteration over the
+    records in the join buffer. The iteration is performed when looking
+    for matches for the record from the joined table join_tab that 
+    has been placed into the record buffer of the joined table.
+    If the value of the parameter skip_last is TRUE then the last
+    record from the join buffer is ignored.
+    The function initializes the counter of the records that have been
+    not iterated over yet.
+    
+  RETURN VALUE   
+    TRUE    there are no records in the buffer to iterate over 
+    FALSE   otherwise
+*/
+    
+bool JOIN_CACHE_BNL::prepare_look_for_matches(bool skip_last)
+{
+  if (!records)
+    return TRUE;
+  reset(FALSE);
+  rem_records= records-test(skip_last);
+  return rem_records == 0;
+}
+
+
+/*
+  Get next record from the BNL join cache buffer when looking for matches 
+
+  SYNOPSIS
+    get_next_candidate_for_match
+
+  DESCRIPTION
+    This method is used for iterations over the records from the join
+    cache buffer when looking for matches for records from join_tab.
+    The methods performs the necessary preparations to read the next record
+    from the join buffer into the record buffer by the method
+    read_next_candidate_for_match, or, to skip the next record from the join 
+    buffer by the method skip_recurrent_candidate_for_match.    
+    This implementation of the virtual method get_next_candidate_for_match
+    just  decrements the counter of the records that are to be iterated over
+    and returns the current value of the cursor 'pos' as the position of 
+    the record to be processed. 
+    
+  RETURN VALUE    
+    pointer to the position right after the prefix of the current record
+    in the join buffer if the there is another record to iterate over,
+    0 - otherwise.  
+*/
+
+uchar *JOIN_CACHE_BNL::get_next_candidate_for_match()
+{
+  if (!rem_records)
+    return 0;
+  rem_records--;
+  return pos+base_prefix_length;
+} 
+
+
+/*
+  Check whether the matching record from the BNL cache is to be skipped 
+
+  SYNOPSIS
+    skip_next_candidate_for_match
+    rec_ptr  pointer to the position in the join buffer right after the prefix 
+             of the current record
+
+  DESCRIPTION
+    This implementation of the virtual function just calls the
+    method skip_if_not_needed_match to check whether the record referenced by
+    ref_ptr has its match flag set either to MATCH_FOUND and join_tab is the
+    first inner table of a semi-join, or it's set to MATCH_IMPOSSIBLE and
+    join_tab is the first inner table of an outer join.
+    If so, the function just skips this record setting the value of the
+    cursor 'pos' to the position right after it.
+
+  RETURN VALUE    
+    TRUE   the record referenced by rec_ptr has been skipped
+    FALSE  otherwise  
+*/
+
+bool JOIN_CACHE_BNL::skip_next_candidate_for_match(uchar *rec_ptr)
+{
+  pos= rec_ptr-base_prefix_length; 
+  return skip_if_not_needed_match();
+}
+
+
+/*
+  Read next record from the BNL join cache buffer when looking for matches 
+
+  SYNOPSIS
+    read_next_candidate_for_match
+    rec_ptr  pointer to the position in the join buffer right after the prefix
+             the current record.
+
+  DESCRIPTION
+    This implementation of the virtual method read_next_candidate_for_match
+    calls the method get_record to read the record referenced by rec_ptr from
+    the join buffer into the record buffer. If this record refers to the
+    fields in the other join buffers the call of get_record ensures that
+    these fields are read into the corresponding record buffers as well.
+    This function is supposed to be called after a successful call of
+    the method get_next_candidate_for_match.
+    
+  RETURN VALUE   
+    none
+*/
+
+void JOIN_CACHE_BNL::read_next_candidate_for_match(uchar *rec_ptr)
+{
+  pos= rec_ptr-base_prefix_length;
+  get_record();
+} 
+
+
+/*
+  Initialize the BNL join cache 
+
+  SYNOPSIS
+    init
+
+  DESCRIPTION
+    The function initializes the cache structure. It is supposed to be called
+    right after a constructor for the JOIN_CACHE_BNL.
+
+  NOTES
+    The function first constructs a companion object of the type JOIN_TAB_SCAN,
+    then it calls the init method of the parent class.
+    
+  RETURN VALUE  
+    0   initialization with buffer allocations has been succeeded
+    1   otherwise
+*/
+
+int JOIN_CACHE_BNL::init()
+{
+  DBUG_ENTER("JOIN_CACHE_BNL::init");
+
+  if (!(join_tab_scan= new JOIN_TAB_SCAN(join, join_tab)))
+    DBUG_RETURN(1);
+
+  DBUG_RETURN(JOIN_CACHE::init());
+}
+
+
+/*
+  Get the chain of records from buffer matching the current candidate for join
+
+  SYNOPSIS
+    get_matching_chain_by_join_key()
+
+  DESCRIPTION
+    This function first build a join key for the record of join_tab that
+    currently is in the join buffer for this table. Then it looks for
+    the key entry with this key in the hash table of the join cache.
+    If such a key entry is found the function returns the pointer to
+    the head of the chain of records in the join_buffer that match this
+    key.
+
+  RETURN VALUE
+    The pointer to the corresponding circular list of records if
+    the key entry with the join key is found, 0 - otherwise.
+*/  
+
+uchar *JOIN_CACHE_BNLH::get_matching_chain_by_join_key()
+{
+  uchar *key_ref_ptr;
+  TABLE *table= join_tab->table;
+  TABLE_REF *ref= &join_tab->ref;
+  KEY *keyinfo= join_tab->get_keyinfo_by_key_no(ref->key);
+  /* Build the join key value out of the record in the record buffer */
+  key_copy(key_buff, table->record[0], keyinfo, key_length, TRUE);
+  /* Look for this key in the join buffer */
+  if (!key_search(key_buff, key_length, &key_ref_ptr))
+    return 0;
+  return key_ref_ptr+get_size_of_key_offset();
+}
+
+
+/*
+  Prepare to iterate over the BNLH join cache buffer to look for matches 
+
+  SYNOPSIS
+    prepare_look_for_matches()
+      skip_last   <-> ignore the last record in the buffer
+
+  DESCRIPTION
+    The function prepares the join cache for an iteration over the
+    records in the join buffer. The iteration is performed when looking
+    for matches for the record from the joined table join_tab that 
+    has been placed into the record buffer of the joined table.
+    If the value of the parameter skip_last is TRUE then the last
+    record from the join buffer is ignored.
+    The function builds the hashed key from the join fields of join_tab
+    and uses this key to look in the hash table of the join cache for
+    the chain of matching records in in the join buffer. If it finds
+    such a chain it sets  the member last_rec_ref_ptr to point to the
+    last link of the chain while setting the member next_rec_ref_po 0.
+    
+  RETURN VALUE    
+    TRUE    there are no matching records in the buffer to iterate over 
+    FALSE   otherwise
+*/
+    
+bool JOIN_CACHE_BNLH::prepare_look_for_matches(bool skip_last)
+{
+  uchar *curr_matching_chain;
+  last_matching_rec_ref_ptr= next_matching_rec_ref_ptr= 0;
+  if (!(curr_matching_chain= get_matching_chain_by_join_key()))
+    return 1;
+  last_matching_rec_ref_ptr= get_next_rec_ref(curr_matching_chain); 
+  return 0;
+}
+
+
+/*
+  Get next record from the BNLH join cache buffer when looking for matches 
+
+  SYNOPSIS
+    get_next_candidate_for_match
+
+  DESCRIPTION
+    This method is used for iterations over the records from the join
+    cache buffer when looking for matches for records from join_tab.
+    The methods performs the necessary preparations to read the next record
+    from the join buffer into the record buffer by the method
+    read_next_candidate_for_match, or, to skip the next record from the join 
+    buffer by the method skip_next_candidate_for_match.    
+    This implementation of the virtual method moves to the next record
+    in the chain of all records from the join buffer that are to be
+    equi-joined with the current record from join_tab.
+    
+  RETURN VALUE   
+    pointer to the beginning of the record fields in the join buffer
+    if the there is another record to iterate over, 0 - otherwise.  
+*/
+
+uchar *JOIN_CACHE_BNLH::get_next_candidate_for_match()
+{
+  if (next_matching_rec_ref_ptr == last_matching_rec_ref_ptr)
+    return 0;
+  next_matching_rec_ref_ptr= get_next_rec_ref(next_matching_rec_ref_ptr ?
+                                                next_matching_rec_ref_ptr :
+                                                last_matching_rec_ref_ptr);
+  return next_matching_rec_ref_ptr+rec_fields_offset; 
+} 
+
+
+/*
+  Check whether the matching record from the BNLH cache is to be skipped 
+
+  SYNOPSIS
+    skip_next_candidate_for_match
+    rec_ptr  pointer to the position in the join buffer right after 
+             the previous record
+
+  DESCRIPTION
+    This implementation of the virtual function just calls the
+    method get_match_flag_by_pos to check whether the record referenced
+    by ref_ptr has its match flag set to MATCH_FOUND.
+
+  RETURN VALUE    
+    TRUE   the record referenced by rec_ptr has its match flag set to 
+           MATCH_FOUND
+    FALSE  otherwise  
+*/
+
+bool JOIN_CACHE_BNLH::skip_next_candidate_for_match(uchar *rec_ptr)
+{
+ return  join_tab->check_only_first_match() &&
+          (get_match_flag_by_pos(rec_ptr) == MATCH_FOUND);
+}
+
+
+/*
+  Read next record from the BNLH join cache buffer when looking for matches 
+
+  SYNOPSIS
+    read_next_candidate_for_match
+    rec_ptr  pointer to the position in the join buffer right after 
+             the previous record
+
+  DESCRIPTION
+    This implementation of the virtual method read_next_candidate_for_match
+    calls the method get_record_by_pos to read the record referenced by rec_ptr
+    from the join buffer into the record buffer. If this record refers to
+    fields in the other join buffers the call of get_record_by_po ensures that
+    these fields are read into the corresponding record buffers as well.
+    This function is supposed to be called after a successful call of
+    the method get_next_candidate_for_match.
+    
+  RETURN VALUE   
+    none
+*/
+
+void JOIN_CACHE_BNLH::read_next_candidate_for_match(uchar *rec_ptr)
+{
+  get_record_by_pos(rec_ptr);
+} 
+
+
+/*
+  Initialize the BNLH join cache 
+
+  SYNOPSIS
+    init
+
+  DESCRIPTION
+    The function initializes the cache structure. It is supposed to be called
+    right after a constructor for the JOIN_CACHE_BNLH.
+
+  NOTES
+    The function first constructs a companion object of the type JOIN_TAB_SCAN,
+    then it calls the init method of the parent class.
+    
+  RETURN VALUE  
+    0   initialization with buffer allocations has been succeeded
+    1   otherwise
+*/
+
+int JOIN_CACHE_BNLH::init()
+{
+  DBUG_ENTER("JOIN_CACHE_BNLH::init");
+
+  if (!(join_tab_scan= new JOIN_TAB_SCAN(join, join_tab)))
+    DBUG_RETURN(1);
+
+  DBUG_RETURN(JOIN_CACHE_HASHED::init());
+}
+
+
+/* 
+  Calculate the increment of the MRR buffer for a record write       
+
+  SYNOPSIS
+    aux_buffer_incr()
+
+  DESCRIPTION
+    This implementation of the virtual function aux_buffer_incr determines
+    for how much the size of the MRR buffer should be increased when another
+    record is added to the cache.   
+
+  RETURN VALUE
+    the increment of the size of the MRR buffer for the next record
+*/
+
+uint JOIN_TAB_SCAN_MRR::aux_buffer_incr(ulong recno)
+{
+  uint incr= 0;
+  TABLE_REF *ref= &join_tab->ref;
+  TABLE *tab= join_tab->table;
+  uint rec_per_key= tab->key_info[ref->key].rec_per_key[ref->key_parts-1];
+  set_if_bigger(rec_per_key, 1);
+  if (recno == 1)
+    incr=  ref->key_length + tab->file->ref_length;
+  incr+= tab->file->stats.mrr_length_per_rec * rec_per_key;
+  return incr; 
+}
+
+
+/* 
+  Initiate iteration over records returned by MRR for the current join buffer
+
+  SYNOPSIS
+    open()
+
+  DESCRIPTION
+    The function initiates the process of iteration over the records from 
+    join_tab returned by the MRR interface functions for records from
+    the join buffer. Such an iteration is performed by the BKA/BKAH join
+    algorithm for each new refill of the join buffer.
+    The function calls the MRR handler function multi_range_read_init to
+    initiate this process.
+
+  RETURN VALUE   
+    0            the initiation is a success 
+    error code   otherwise     
+*/
+
+int JOIN_TAB_SCAN_MRR::open()
+{
+  handler *file= join_tab->table->file;
+
+  join_tab->table->null_row= 0;
+
+
+  /* Dynamic range access is never used with BKA */
+  DBUG_ASSERT(join_tab->use_quick != 2);
+
+  save_or_restore_used_tabs(join_tab, FALSE);
+
+  init_mrr_buff();
+
+  /* 
+    Prepare to iterate over keys from the join buffer and to get
+    matching candidates obtained with MMR handler functions.
+  */ 
+  if (!file->inited)
+    file->ha_index_init(join_tab->ref.key, 1);
+  ranges= cache->get_number_of_ranges_for_mrr();
+  if (!join_tab->cache_idx_cond)
+    range_seq_funcs.skip_index_tuple= 0;
+  return file->multi_range_read_init(&range_seq_funcs, (void*) cache,
+                                     ranges, mrr_mode, &mrr_buff);
+}
+
+
+/* 
+  Read the next record returned by MRR for the current join buffer
+
+  SYNOPSIS
+    next()
+
+  DESCRIPTION
+    The function reads the next record from the joined table join_tab
+    returned by the MRR handler function multi_range_read_next for
+    the current refill of the join buffer. The record is read into
+    the record buffer used for join_tab records in join operations.
+
+  RETURN VALUE   
+    0            the next record exists and has been successfully read 
+    error code   otherwise     
+*/
+
+int JOIN_TAB_SCAN_MRR::next()
+{
+  char **ptr= (char **) cache->get_curr_association_ptr();
+
+  DBUG_ASSERT(sizeof(range_id_t) == sizeof(*ptr));
+  int rc= join_tab->table->file->multi_range_read_next((range_id_t*)ptr) ? -1 : 0;
+  if (!rc)
+  {
+    /* 
+      If a record in in an incremental cache contains no fields then the
+      association for the last record in cache will be equal to cache->end_pos
+    */ 
+    DBUG_ASSERT(cache->buff <= (uchar *) (*ptr) &&
+                (uchar *) (*ptr) <= cache->end_pos);
+    update_virtual_fields(join->thd, join_tab->table);
+  }
+  return rc;
+}
+
+
+static 
+void bka_range_seq_key_info(void *init_params, uint *length, 
+                            key_part_map *map)
+{
+  TABLE_REF *ref= &(((JOIN_CACHE*)init_params)->join_tab->ref);
+  *length= ref->key_length;
+  *map= (key_part_map(1) << ref->key_parts) - 1;
+}
+
+
+/*
+  Initialize retrieval of range sequence for BKA join algorithm
+    
+  SYNOPSIS
+    bka_range_seq_init()
+     init_params   pointer to the BKA join cache object
+     n_ranges      the number of ranges obtained 
+     flags         combination of MRR flags
+
+  DESCRIPTION
+    The function interprets init_param as a pointer to a JOIN_CACHE_BKA
+    object. The function prepares for an iteration over the join keys
+    built for all records from the cache join buffer.
+
+  NOTE
+    This function are used only as a callback function.    
+
+  RETURN VALUE
+    init_param value that is to be used as a parameter of bka_range_seq_next()
+*/    
+
+static 
+range_seq_t bka_range_seq_init(void *init_param, uint n_ranges, uint flags)
+{
+  DBUG_ENTER("bka_range_seq_init");
+  JOIN_CACHE_BKA *cache= (JOIN_CACHE_BKA *) init_param;
+  cache->reset(0);
+  DBUG_RETURN((range_seq_t) init_param);
+}
+
+
+/*
+  Get the next range/key over records from the join buffer used by a BKA cache
+    
+  SYNOPSIS
+    bka_range_seq_next()
+      seq        the value returned by  bka_range_seq_init
+      range  OUT reference to the next range
+  
+  DESCRIPTION
+    The function interprets seq as a pointer to a JOIN_CACHE_BKA
+    object. The function returns a pointer to the range descriptor
+    for the key built over the next record from the join buffer.
+
+  NOTE
+    This function are used only as a callback function.
+   
+  RETURN VALUE
+    FALSE   ok, the range structure filled with info about the next range/key
+    TRUE    no more ranges
+*/    
+
+static 
+bool bka_range_seq_next(range_seq_t rseq, KEY_MULTI_RANGE *range)
+{
+  DBUG_ENTER("bka_range_seq_next");
+  JOIN_CACHE_BKA *cache= (JOIN_CACHE_BKA *) rseq;
+  TABLE_REF *ref= &cache->join_tab->ref;
+  key_range *start_key= &range->start_key;
+  if ((start_key->length= cache->get_next_key((uchar **) &start_key->key)))
+  {
+    start_key->keypart_map= (1 << ref->key_parts) - 1;
+    start_key->flag= HA_READ_KEY_EXACT;
+    range->end_key= *start_key;
+    range->end_key.flag= HA_READ_AFTER_KEY;
+    range->ptr= (char *) cache->get_curr_rec();
+    range->range_flag= EQ_RANGE;
+    DBUG_RETURN(0);
+  } 
+  DBUG_RETURN(1);
+}
+
+
+/*
+  Check whether range_info orders to skip the next record from BKA buffer
+
+  SYNOPSIS
+    bka_range_seq_skip_record()
+      seq              value returned by bka_range_seq_init()
+      range_info       information about the next range
+      rowid [NOT USED] rowid of the record to be checked 
+
+    
+  DESCRIPTION
+    The function interprets seq as a pointer to a JOIN_CACHE_BKA object.
+    The function returns TRUE if the record with this range_info 
+    is to be filtered out from the stream of records returned by 
+    multi_range_read_next(). 
+
+  NOTE
+    This function are used only as a callback function.
+
+  RETURN VALUE
+    1    record with this range_info is to be filtered out from the stream
+         of records returned by multi_range_read_next()
+    0    the record is to be left in the stream
+*/ 
+
+static 
+bool bka_range_seq_skip_record(range_seq_t rseq, range_id_t range_info, uchar *rowid)
+{
+  DBUG_ENTER("bka_range_seq_skip_record");
+  JOIN_CACHE_BKA *cache= (JOIN_CACHE_BKA *) rseq;
+  bool res= cache->get_match_flag_by_pos((uchar *) range_info) ==
+            JOIN_CACHE::MATCH_FOUND;
+  DBUG_RETURN(res);
+}
+
+
+/*
+  Check if the record combination from BKA cache matches the index condition
+
+  SYNOPSIS
+    bka_skip_index_tuple()
+      rseq             value returned by bka_range_seq_init()
+      range_info       record chain for the next range/key returned by MRR
+    
+  DESCRIPTION
+    This is wrapper for JOIN_CACHE_BKA::skip_index_tuple method,
+    see comments there.
+
+  NOTE
+    This function is used as a RANGE_SEQ_IF::skip_index_tuple callback.
+ 
+  RETURN VALUE
+    0    The record combination satisfies the index condition
+    1    Otherwise
+*/
+
+static 
+bool bka_skip_index_tuple(range_seq_t rseq, range_id_t range_info)
+{
+  DBUG_ENTER("bka_skip_index_tuple");
+  JOIN_CACHE_BKA *cache= (JOIN_CACHE_BKA *) rseq;
+  THD *thd= cache->thd();
+  bool res;
+  status_var_increment(thd->status_var.ha_icp_attempts);
+  if (!(res= cache->skip_index_tuple(range_info)))
+    status_var_increment(thd->status_var.ha_icp_match);
+  DBUG_RETURN(res);
+}
+
+
+/*
+  Prepare to read the record from BKA cache matching the current joined record   
+
+  SYNOPSIS
+    prepare_look_for_matches()
+      skip_last <-> ignore the last record in the buffer (always unused here)
+
+  DESCRIPTION
+    The function prepares to iterate over records in the join cache buffer
+    matching the record loaded into the record buffer for join_tab when
+    performing join operation by BKA join algorithm. With BKA algorithms the
+    record loaded into the record buffer for join_tab always has a direct
+    reference to the matching records from the join buffer. When the regular
+    BKA join algorithm is employed the record from join_tab can refer to
+    only one such record.   
+    The function sets the counter of the remaining records from the cache 
+    buffer that would match the current join_tab record to 1.
+    
+  RETURN VALUE   
+    TRUE    there are no records in the buffer to iterate over 
+    FALSE   otherwise
+*/
+    
+bool JOIN_CACHE_BKA::prepare_look_for_matches(bool skip_last)
+{
+  if (!records)
+    return TRUE;
+  rem_records= 1;
+  return FALSE;
+}
+
+
+/*
+  Get the record from the BKA cache matching the current joined record   
+
+  SYNOPSIS
+    get_next_candidate_for_match
+
+  DESCRIPTION
+    This method is used for iterations over the records from the join
+    cache buffer when looking for matches for records from join_tab.
+    The method performs the necessary preparations to read the next record
+    from the join buffer into the record buffer by the method
+    read_next_candidate_for_match, or, to skip the next record from the join 
+    buffer by the method skip_if_not_needed_match.    
+    This implementation of the virtual method get_next_candidate_for_match
+    just  decrements the counter of the records that are to be iterated over
+    and returns the value of curr_association as a reference to the position
+    of the beginning of the record fields in the buffer.
+    
+  RETURN VALUE   
+    pointer to the start of the record fields in the join buffer
+    if the there is another record to iterate over, 0 - otherwise.  
+*/
+
+uchar *JOIN_CACHE_BKA::get_next_candidate_for_match()
+{
+  if (!rem_records)
+    return 0;
+  rem_records--;
+  return curr_association;
+} 
+
+
+/*
+  Check whether the matching record from the BKA cache is to be skipped 
+
+  SYNOPSIS
+    skip_next_candidate_for_match
+    rec_ptr  pointer to the position in the join buffer right after 
+             the previous record
+
+  DESCRIPTION
+    This implementation of the virtual function just calls the
+    method get_match_flag_by_pos to check whether the record referenced
+    by ref_ptr has its match flag set to MATCH_FOUND.
+
+  RETURN VALUE   
+    TRUE   the record referenced by rec_ptr has its match flag set to
+           MATCH_FOUND
+    FALSE  otherwise  
+*/
+
+bool JOIN_CACHE_BKA::skip_next_candidate_for_match(uchar *rec_ptr)
+{
+  return join_tab->check_only_first_match() && 
+         (get_match_flag_by_pos(rec_ptr) == MATCH_FOUND);
+}
+
+
+/*
+  Read the next record from the BKA join cache buffer when looking for matches 
+
+  SYNOPSIS
+    read_next_candidate_for_match
+    rec_ptr  pointer to the position in the join buffer right after 
+             the previous record
+
+  DESCRIPTION
+    This implementation of the virtual method read_next_candidate_for_match
+    calls the method get_record_by_pos to read the record referenced by rec_ptr
+    from the join buffer into the record buffer. If this record refers to
+    fields in the other join buffers the call of get_record_by_po ensures that
+    these fields are read into the corresponding record buffers as well.
+    This function is supposed to be called after a successful call of
+    the method get_next_candidate_for_match.
+    
+  RETURN VALUE   
+    none
+*/
+
+void JOIN_CACHE_BKA::read_next_candidate_for_match(uchar *rec_ptr)
+{
+  get_record_by_pos(rec_ptr);
+} 
+
+
+/*
+  Initialize the BKA join cache 
+
+  SYNOPSIS
+    init
+
+  DESCRIPTION
+    The function initializes the cache structure. It is supposed to be called
+    right after a constructor for the JOIN_CACHE_BKA.
+
+  NOTES
+    The function first constructs a companion object of the type 
+    JOIN_TAB_SCAN_MRR, then it calls the init method of the parent class.
+    
+  RETURN VALUE   
+    0   initialization with buffer allocations has been succeeded
+    1   otherwise
+*/
+
+int JOIN_CACHE_BKA::init()
+{
+  int res;
+  bool check_only_first_match= join_tab->check_only_first_match();
+
+  RANGE_SEQ_IF rs_funcs= { bka_range_seq_key_info,
+                           bka_range_seq_init, 
+                           bka_range_seq_next,
+                           check_only_first_match ?
+                             bka_range_seq_skip_record : 0,
+                           bka_skip_index_tuple };
+
+  DBUG_ENTER("JOIN_CACHE_BKA::init");
+
+  JOIN_TAB_SCAN_MRR *jsm;
+  if (!(join_tab_scan= jsm= new JOIN_TAB_SCAN_MRR(join, join_tab, 
+                                                  mrr_mode, rs_funcs)))
+    DBUG_RETURN(1);
+
+  if ((res= JOIN_CACHE::init()))
+    DBUG_RETURN(res);
+
+  if (use_emb_key)
+    jsm->mrr_mode |= HA_MRR_MATERIALIZED_KEYS;
+
+  DBUG_RETURN(0);
+}
+
+
+/* 
+  Get the key built over the next record from BKA join buffer
+
+  SYNOPSIS
+    get_next_key()
+      key    pointer to the buffer where the key value is to be placed
+
+  DESCRIPTION
+    The function reads key fields from the current record in the join buffer.
+    and builds the key value out of these fields that will be used to access
+    the 'join_tab' table. Some of key fields may belong to previous caches.
+    They are accessed via record references to the record parts stored in the
+    previous join buffers. The other key fields always are placed right after
+    the flag fields of the record.
+    If the key is embedded, which means that its value can be read directly
+    from the join buffer, then *key is set to the beginning of the key in
+    this buffer. Otherwise the key is built in the join_tab->ref->key_buff.
+    The function returns the length of the key if it succeeds ro read it.
+    If is assumed that the functions starts reading at the position of
+    the record length which is provided for each records in a BKA cache.
+    After the key is built the 'pos' value points to the first position after
+    the current record.
+    The function just skips the records with MATCH_IMPOSSIBLE in the
+    match flag field if there is any. 
+    The function returns 0 if the initial position is after the beginning
+    of the record fields for last record from the join buffer. 
+
+  RETURN VALUE
+    length of the key value - if the starting value of 'pos' points to
+    the position before the fields for the last record,
+    0 - otherwise.     
+*/
+
+uint JOIN_CACHE_BKA::get_next_key(uchar ** key)
+{
+  uint len;
+  uint32 rec_len;
+  uchar *init_pos;
+  JOIN_CACHE *cache;
+  
+start:
+
+  /* Any record in a BKA cache is prepended with its length */
+  DBUG_ASSERT(with_length);
+   
+  if ((pos+size_of_rec_len) > last_rec_pos || !records)
+    return 0;
+
+  /* Read the length of the record */
+  rec_len= get_rec_length(pos);
+  pos+= size_of_rec_len; 
+  init_pos= pos;
+
+  /* Read a reference to the previous cache if any */
+  if (prev_cache)
+    pos+= prev_cache->get_size_of_rec_offset();
+
+  curr_rec_pos= pos;
+
+  /* Read all flag fields of the record */
+  read_flag_fields();
+
+  if (with_match_flag && 
+      (Match_flag) curr_rec_pos[0] == MATCH_IMPOSSIBLE )
+  {
+    pos= init_pos+rec_len;
+    goto start;
+  }
+ 
+  if (use_emb_key)
+  {
+    /* An embedded key is taken directly from the join buffer */
+    *key= pos;
+    len= emb_key_length;
+  }
+  else
+  {
+    /* Read key arguments from previous caches if there are any such fields */
+    if (external_key_arg_fields)
+    {
+      uchar *rec_ptr= curr_rec_pos;
+      uint key_arg_count= external_key_arg_fields;
+      CACHE_FIELD **copy_ptr= blob_ptr-key_arg_count;
+      for (cache= prev_cache; key_arg_count; cache= cache->prev_cache)
+      { 
+        uint len= 0;
+        DBUG_ASSERT(cache);
+        rec_ptr= cache->get_rec_ref(rec_ptr);
+        while (!cache->referenced_fields)
+        {
+          cache= cache->prev_cache;
+          DBUG_ASSERT(cache);
+          rec_ptr= cache->get_rec_ref(rec_ptr);
+        }
+        while (key_arg_count && 
+               cache->read_referenced_field(*copy_ptr, rec_ptr, &len))
+        {
+          copy_ptr++;
+          --key_arg_count;
+        }
+      }
+    }
+    
+    /* 
+      Read the other key arguments from the current record. The fields for
+      these arguments are always first in the sequence of the record's fields.
+    */     
+    CACHE_FIELD *copy= field_descr+flag_fields;
+    CACHE_FIELD *copy_end= copy+local_key_arg_fields;
+    bool blob_in_rec_buff= blob_data_is_in_rec_buff(curr_rec_pos);
+    for ( ; copy < copy_end; copy++)
+      read_record_field(copy, blob_in_rec_buff);
+    
+    /* Build the key over the fields read into the record buffers */ 
+    TABLE_REF *ref= &join_tab->ref;
+    cp_buffer_from_ref(join->thd, join_tab->table, ref);
+    *key= ref->key_buff;
+    len= ref->key_length;
+  }
+
+  pos= init_pos+rec_len;
+
+  return len;
+} 
+
+
+/*
+  Check the index condition of the joined table for a record from the BKA cache
+
+  SYNOPSIS
+    skip_index_tuple()
+      range_info       pointer to the record returned by MRR 
+    
+  DESCRIPTION
+    This function is invoked from MRR implementation to check if an index
+    tuple matches the index condition. It is used in the case where the index
+    condition actually depends on both columns of the used index and columns
+    from previous tables.
+   
+  NOTES 
+    Accessing columns of the previous tables requires special handling with
+    BKA. The idea of BKA is to collect record combinations in a buffer and 
+    then do a batch of ref access lookups, i.e. by the time we're doing a
+    lookup its previous-records-combination is not in prev_table->record[0]
+    but somewhere in the join buffer.    
+    We need to get it from there back into prev_table(s)->record[0] before we
+    can evaluate the index condition, and that's why we need this function
+    instead of regular IndexConditionPushdown.
+
+  NOTES
+    Possible optimization:
+    Before we unpack the record from a previous table
+    check if this table is used in the condition.
+    If so then unpack the record otherwise skip the unpacking.
+    This should be done by a special virtual method
+    get_partial_record_by_pos().
+
+  RETURN VALUE
+    1    the record combination does not satisfies the index condition
+    0    otherwise
+*/
+
+bool JOIN_CACHE_BKA::skip_index_tuple(range_id_t range_info)
+{
+  DBUG_ENTER("JOIN_CACHE_BKA::skip_index_tuple");
+  get_record_by_pos((uchar*)range_info);
+  DBUG_RETURN(!join_tab->cache_idx_cond->val_int());
+}
+
+
+
+/*
+  Initialize retrieval of range sequence for the BKAH join algorithm
+    
+  SYNOPSIS
+    bkah_range_seq_init()
+      init_params   pointer to the BKAH join cache object
+      n_ranges      the number of ranges obtained 
+      flags         combination of MRR flags
+
+  DESCRIPTION
+    The function interprets init_param as a pointer to a JOIN_CACHE_BKAH
+    object. The function prepares for an iteration over distinct join keys
+    built over the records from the cache join buffer.
+
+  NOTE
+    This function are used only as a callback function.    
+
+  RETURN VALUE
+    init_param    value that is to be used as a parameter of 
+                  bkah_range_seq_next()
+*/    
+
+static 
+range_seq_t bkah_range_seq_init(void *init_param, uint n_ranges, uint flags)
+{
+  DBUG_ENTER("bkah_range_seq_init");
+  JOIN_CACHE_BKAH *cache= (JOIN_CACHE_BKAH *) init_param;
+  cache->reset(0);
+  DBUG_RETURN((range_seq_t) init_param);
+}
+
+
+/*
+  Get the next range/key over records from the join buffer of a BKAH cache  
+    
+  SYNOPSIS
+    bkah_range_seq_next()
+      seq        value returned by  bkah_range_seq_init()
+      range  OUT reference to the next range
+  
+  DESCRIPTION
+    The function interprets seq as a pointer to a JOIN_CACHE_BKAH 
+    object. The function returns a pointer to the range descriptor
+    for the next unique key built over records from the join buffer.
+
+  NOTE
+    This function are used only as a callback function.
+   
+  RETURN VALUE
+    FALSE  ok, the range structure filled with info about the next range/key
+    TRUE   no more ranges
+*/    
+
+static 
+bool bkah_range_seq_next(range_seq_t rseq, KEY_MULTI_RANGE *range)
+{
+  DBUG_ENTER("bkah_range_seq_next");
+  JOIN_CACHE_BKAH *cache= (JOIN_CACHE_BKAH *) rseq;
+  TABLE_REF *ref= &cache->join_tab->ref;
+  key_range *start_key= &range->start_key;
+  if ((start_key->length= cache->get_next_key((uchar **) &start_key->key)))
+  {
+    start_key->keypart_map= (1 << ref->key_parts) - 1;
+    start_key->flag= HA_READ_KEY_EXACT;
+    range->end_key= *start_key;
+    range->end_key.flag= HA_READ_AFTER_KEY;
+    range->ptr= (char *) cache->get_curr_key_chain();
+    range->range_flag= EQ_RANGE;
+    DBUG_RETURN(0);
+  } 
+  DBUG_RETURN(1);
+}
+
+
+/*
+  Check whether range_info orders to skip the next record from BKAH join buffer
+
+  SYNOPSIS
+    bkah_range_seq_skip_record()
+      seq              value returned by bkah_range_seq_init()
+      range_info       information about the next range/key returned by MRR
+      rowid [NOT USED] rowid of the record to be checked (not used)
+    
+  DESCRIPTION
+    The function interprets seq as a pointer to a JOIN_CACHE_BKAH
+    object. The function returns TRUE if the record with this range_info
+    is to be filtered out from the stream of records returned by
+    multi_range_read_next(). 
+
+  NOTE
+    This function are used only as a callback function.
+
+  RETURN VALUE
+    1    record with this range_info is to be filtered out from the stream
+         of records returned by multi_range_read_next()
+    0    the record is to be left in the stream
+*/ 
+
+static 
+bool bkah_range_seq_skip_record(range_seq_t rseq, range_id_t range_info,
+                                uchar *rowid)
+{
+  DBUG_ENTER("bkah_range_seq_skip_record");
+  JOIN_CACHE_BKAH *cache= (JOIN_CACHE_BKAH *) rseq;
+  bool res= cache->check_all_match_flags_for_key((uchar *) range_info);
+  DBUG_RETURN(res);
+}
+
+ 
+/*
+  Check if the record combination from BKAH cache matches the index condition
+
+  SYNOPSIS
+    bkah_skip_index_tuple()
+      rseq             value returned by bka_range_seq_init()
+      range_info       record chain for the next range/key returned by MRR
+    
+  DESCRIPTION
+    This is wrapper for JOIN_CACHE_BKA_UNIQUE::skip_index_tuple method,
+    see comments there.
+
+  NOTE
+    This function is used as a RANGE_SEQ_IF::skip_index_tuple callback.
+ 
+  RETURN VALUE
+    0    some records from the chain satisfy the index condition
+    1    otherwise
+*/
+
+static 
+bool bkah_skip_index_tuple(range_seq_t rseq, range_id_t range_info)
+{
+  DBUG_ENTER("bka_unique_skip_index_tuple");
+  JOIN_CACHE_BKAH *cache= (JOIN_CACHE_BKAH *) rseq;
+  THD *thd= cache->thd();
+  bool res;
+  status_var_increment(thd->status_var.ha_icp_attempts);
+  if (!(res= cache->skip_index_tuple(range_info)))
+    status_var_increment(thd->status_var.ha_icp_match);
+  DBUG_RETURN(res);
+}
+
+
+/*
+  Prepare to read record from BKAH cache matching the current joined record   
+
+  SYNOPSIS
+    prepare_look_for_matches()
+      skip_last <-> ignore the last record in the buffer (always unused here)
+
+  DESCRIPTION
+    The function prepares to iterate over records in the join cache buffer
+    matching the record loaded into the record buffer for join_tab when
+    performing join operation by BKAH join algorithm. With BKAH algorithm, if
+    association labels are used, then record loaded into the record buffer 
+    for join_tab always has a direct reference to the chain of the mathing
+    records from the join buffer. If association labels are not used then
+    then the chain of the matching records is obtained by the call of the
+    get_key_chain_by_join_key function.
+    
+  RETURN VALUE   
+    TRUE    there are no records in the buffer to iterate over 
+    FALSE   otherwise
+*/
+    
+bool JOIN_CACHE_BKAH::prepare_look_for_matches(bool skip_last)
+{
+  last_matching_rec_ref_ptr= next_matching_rec_ref_ptr= 0;
+  if (no_association &&
+      (curr_matching_chain= get_matching_chain_by_join_key()))
+    return 1;
+  last_matching_rec_ref_ptr= get_next_rec_ref(curr_matching_chain);
+  return 0;
+}
+
+/*
+  Initialize the BKAH join cache 
+
+  SYNOPSIS
+    init
+
+  DESCRIPTION
+    The function initializes the cache structure. It is supposed to be called
+    right after a constructor for the JOIN_CACHE_BKAH.
+
+  NOTES
+    The function first constructs a companion object of the type 
+    JOIN_TAB_SCAN_MRR, then it calls the init method of the parent class.
+    
+  RETURN VALUE   
+    0   initialization with buffer allocations has been succeeded
+    1   otherwise
+*/
+
+int JOIN_CACHE_BKAH::init()
+{
+  bool check_only_first_match= join_tab->check_only_first_match();
+
+  no_association= test(mrr_mode & HA_MRR_NO_ASSOCIATION);
+
+  RANGE_SEQ_IF rs_funcs= { bka_range_seq_key_info,
+                           bkah_range_seq_init,
+                           bkah_range_seq_next,
+                           check_only_first_match && !no_association ?
+                             bkah_range_seq_skip_record : 0,
+                           bkah_skip_index_tuple };
+
+  DBUG_ENTER("JOIN_CACHE_BKAH::init");
+
+  if (!(join_tab_scan= new JOIN_TAB_SCAN_MRR(join, join_tab, 
+                                             mrr_mode, rs_funcs)))
+    DBUG_RETURN(1);
+
+  DBUG_RETURN(JOIN_CACHE_HASHED::init());
+}
+
+
+/*
+  Check the index condition of the joined table for a record from the BKA cache
+
+  SYNOPSIS
+    skip_index_tuple()
+      range_info       record chain returned by MRR 
+    
+  DESCRIPTION
+    See JOIN_CACHE_BKA::skip_index_tuple().
+    This function is the variant for use with rhe class JOIN_CACHE_BKAH.
+    The difference from JOIN_CACHE_BKA case is that there may be multiple
+    previous table record combinations that share the same key(MRR range).
+    As a consequence, we need to loop through the chain of all table record
+    combinations that match the given MRR range key range_info until we find
+    one that satisfies the index condition.
+
+  NOTE
+    Possible optimization:
+    Before we unpack the record from a previous table
+    check if this table is used in the condition.
+    If so then unpack the record otherwise skip the unpacking.
+    This should be done by a special virtual method
+    get_partial_record_by_pos().
+
+  RETURN VALUE
+    1    any record combination from the chain referred by range_info
+         does not satisfy the index condition
+    0    otherwise
+
+
+*/
+
+bool JOIN_CACHE_BKAH::skip_index_tuple(range_id_t range_info)
+{
+  uchar *last_rec_ref_ptr= get_next_rec_ref((uchar*) range_info);
+  uchar *next_rec_ref_ptr= last_rec_ref_ptr;
+  DBUG_ENTER("JOIN_CACHE_BKAH::skip_index_tuple");
+  do
+  {
+    next_rec_ref_ptr= get_next_rec_ref(next_rec_ref_ptr);
+    uchar *rec_ptr= next_rec_ref_ptr + rec_fields_offset;
+    get_record_by_pos(rec_ptr);
+    if (join_tab->cache_idx_cond->val_int())
+      DBUG_RETURN(FALSE);
+  } while(next_rec_ref_ptr != last_rec_ref_ptr);
+  DBUG_RETURN(TRUE);
+}
diff --git a/sql/sql_join_cache.h b/sql/sql_join_cache.h
new file mode 100644
index 00000000000..ba8e4ba8e4a
--- /dev/null
+++ b/sql/sql_join_cache.h
@@ -0,0 +1,1414 @@
+/*
+  This file contains declarations for implementations
+  of block based join algorithms
+*/
+
+#define JOIN_CACHE_INCREMENTAL_BIT           1
+#define JOIN_CACHE_HASHED_BIT                2
+#define JOIN_CACHE_BKA_BIT                   4
+
+/* 
+  Categories of data fields of variable length written into join cache buffers.
+  The value of any of these fields is written into cache together with the
+  prepended length of the value.     
+*/
+#define CACHE_BLOB      1        /* blob field  */
+#define CACHE_STRIPPED  2        /* field stripped of trailing spaces */
+#define CACHE_VARSTR1   3        /* short string value (length takes 1 byte) */ 
+#define CACHE_VARSTR2   4        /* long string value (length takes 2 bytes) */
+#define CACHE_ROWID     5        /* ROWID field */
+
+/*
+  The CACHE_FIELD structure used to describe fields of records that
+  are written into a join cache buffer from record buffers and backward.
+*/
+typedef struct st_cache_field {
+  uchar *str;   /**< buffer from/to where the field is to be copied */ 
+  uint length;  /**< maximal number of bytes to be copied from/to str */
+  /* 
+    Field object for the moved field
+    (0 - for a flag field, see JOIN_CACHE::create_flag_fields).
+  */
+  Field *field;
+  uint type;    /**< category of the of the copied field (CACHE_BLOB et al.) */
+  /* 
+    The number of the record offset value for the field in the sequence
+    of offsets placed after the last field of the record. These
+    offset values are used to access fields referred to from other caches.
+    If the value is 0 then no offset for the field is saved in the
+    trailing sequence of offsets.
+  */ 
+  uint referenced_field_no; 
+  /* The remaining structure fields are used as containers for temp values */
+  uint blob_length; /**< length of the blob to be copied */
+  uint offset;      /**< field offset to be saved in cache buffer */
+} CACHE_FIELD;
+
+
+class JOIN_TAB_SCAN;
+
+
+/*
+  JOIN_CACHE is the base class to support the implementations of 
+  - Block Nested Loop (BNL) Join Algorithm,
+  - Block Nested Loop Hash (BNLH) Join Algorithm,
+  - Batched Key Access (BKA) Join Algorithm.
+  The first algorithm is supported by the derived class JOIN_CACHE_BNL,
+  the second algorithm is supported by the derived class JOIN_CACHE_BNLH,
+  while the third algorithm is implemented in two variant supported by
+  the classes JOIN_CACHE_BKA and JOIN_CACHE_BKAH.
+  These three algorithms have a lot in common. Each of them first accumulates
+  the records of the left join operand in a join buffer and then searches for
+  matching rows of the second operand for all accumulated records.
+  For the first two algorithms this strategy saves on logical I/O operations:
+  the entire set of records from the join buffer requires only one look-through
+  of the records provided by the second operand. 
+  For the third algorithm the accumulation of records allows to optimize
+  fetching rows of the second operand from disk for some engines (MyISAM, 
+  InnoDB), or to minimize the number of round-trips between the Server and
+  the engine nodes (NDB Cluster).        
+*/ 
+
+class JOIN_CACHE :public Sql_alloc
+{
+
+private:
+
+  /* Size of the offset of a record from the cache */   
+  uint size_of_rec_ofs;    
+  /* Size of the length of a record in the cache */
+  uint size_of_rec_len;
+  /* Size of the offset of a field within a record in the cache */   
+  uint size_of_fld_ofs;
+
+protected:
+       
+  /* 3 functions below actually do not use the hidden parameter 'this' */ 
+
+  /* Calculate the number of bytes used to store an offset value */
+  uint offset_size(uint len)
+  { return (len < 256 ? 1 : len < 256*256 ? 2 : 4); }
+
+  /* Get the offset value that takes ofs_sz bytes at the position ptr */
+  ulong get_offset(uint ofs_sz, uchar *ptr)
+  {
+    switch (ofs_sz) {
+    case 1: return uint(*ptr);
+    case 2: return uint2korr(ptr);
+    case 4: return uint4korr(ptr);
+    }
+    return 0;
+  }
+
+  /* Set the offset value ofs that takes ofs_sz bytes at the position ptr */ 
+  void store_offset(uint ofs_sz, uchar *ptr, ulong ofs)
+  {
+    switch (ofs_sz) {
+    case 1: *ptr= (uchar) ofs; return;
+    case 2: int2store(ptr, (uint16) ofs); return;
+    case 4: int4store(ptr, (uint32) ofs); return;
+    }
+  }
+  
+  /* 
+    The maximum total length of the fields stored for a record in the cache.
+    For blob fields only the sizes of the blob lengths are taken into account. 
+  */
+  uint length;
+
+  /* 
+    Representation of the executed multi-way join through which all needed
+    context can be accessed.  
+  */   
+  JOIN *join;  
+
+  /*
+    JOIN_TAB of the first table that can have it's fields in the join cache. 
+    That is, tables in the [start_tab, tab) range can have their fields in the
+    join cache. 
+    If a join tab in the range represents an SJM-nest, then all tables from the
+    nest can have their fields in the join cache, too.
+  */
+  JOIN_TAB *start_tab;
+
+  /* 
+    The total number of flag and data fields that can appear in a record
+    written into the cache. Fields with null values are always skipped 
+    to save space. 
+  */
+  uint fields;
+
+  /* 
+    The total number of flag fields in a record put into the cache. They are
+    used for table null bitmaps, table null row flags, and an optional match
+    flag. Flag fields go before other fields in a cache record with the match
+    flag field placed always at the very beginning of the record.
+  */
+  uint flag_fields;
+
+  /* The total number of blob fields that are written into the cache */ 
+  uint blobs;
+
+  /* 
+    The total number of fields referenced from field descriptors for other join
+    caches. These fields are used to construct key values.
+    When BKA join algorithm is employed the constructed key values serve to
+    access matching rows with index lookups.
+    The key values are put into a hash table when the BNLH join algorithm
+    is employed and when BKAH is used for the join operation. 
+  */   
+  uint referenced_fields;
+   
+  /* 
+    The current number of already created data field descriptors.
+    This number can be useful for implementations of the init methods.  
+  */
+  uint data_field_count; 
+
+  /* 
+    The current number of already created pointers to the data field
+    descriptors. This number can be useful for implementations of
+    the init methods.  
+  */
+  uint data_field_ptr_count;
+ 
+  /* 
+    Array of the descriptors of fields containing 'fields' elements.
+    These are all fields that are stored for a record in the cache. 
+  */
+  CACHE_FIELD *field_descr;
+
+  /* 
+    Array of pointers to the blob descriptors that contains 'blobs' elements.
+  */
+  CACHE_FIELD **blob_ptr;
+
+  /* 
+    This flag indicates that records written into the join buffer contain
+    a match flag field. The flag must be set by the init method. 
+  */
+  bool with_match_flag; 
+  /*
+    This flag indicates that any record is prepended with the length of the
+    record which allows us to skip the record or part of it without reading.
+  */
+  bool with_length;
+
+  /* 
+    The maximal number of bytes used for a record representation in
+    the cache excluding the space for blob data. 
+    For future derived classes this representation may contains some
+    redundant info such as a key value associated with the record.     
+  */
+  uint pack_length;
+  /* 
+    The value of pack_length incremented by the total size of all 
+    pointers of a record in the cache to the blob data. 
+  */
+  uint pack_length_with_blob_ptrs;
+
+  /* 
+    The total size of the record base prefix. The base prefix of record may
+    include the following components:
+     - the length of the record
+     - the link to a record in a previous buffer.
+    Each record in the buffer are supplied with the same set of the components.
+  */
+  uint base_prefix_length;
+
+  /*
+    The expected length of a record in the join buffer together with     
+    all prefixes and postfixes
+  */
+  size_t avg_record_length;
+
+  /* The expected size of the space per record in the auxiliary buffer */
+  size_t avg_aux_buffer_incr;
+
+  /* Expected join buffer space used for one record */
+  size_t space_per_record; 
+
+  /* Pointer to the beginning of the join buffer */
+  uchar *buff;         
+  /* 
+    Size of the entire memory allocated for the join buffer.
+    Part of this memory may be reserved for the auxiliary buffer.
+  */ 
+  size_t buff_size;
+  /* The minimal join buffer size when join buffer still makes sense to use */
+  size_t min_buff_size;
+  /* The maximum expected size if the join buffer to be used */
+  size_t max_buff_size;
+  /* Size of the auxiliary buffer */ 
+  size_t aux_buff_size;
+
+  /* The number of records put into the join buffer */ 
+  size_t records;
+  /* 
+    The number of records in the fully refilled join buffer of
+    the minimal size equal to min_buff_size
+  */
+  size_t min_records;
+  /*
+    The maximum expected number of records to be put in the join buffer
+    at one refill 
+  */
+  size_t max_records;
+
+  /* 
+    Pointer to the current position in the join buffer.
+    This member is used both when writing to buffer and
+    when reading from it.
+  */
+  uchar *pos;
+  /* 
+    Pointer to the first free position in the join buffer,
+    right after the last record into it.
+  */
+  uchar *end_pos; 
+
+  /* 
+    Pointer to the beginning of the first field of the current read/write
+    record from the join buffer. The value is adjusted by the 
+    get_record/put_record functions.
+  */
+  uchar *curr_rec_pos;
+  /* 
+    Pointer to the beginning of the first field of the last record
+    from the join buffer.
+  */
+  uchar *last_rec_pos;
+
+  /* 
+    Flag is set if the blob data for the last record in the join buffer
+    is in record buffers rather than in the join cache.
+  */
+  bool last_rec_blob_data_is_in_rec_buff;
+
+  /* 
+    Pointer to the position to the current record link. 
+    Record links are used only with linked caches. Record links allow to set
+    connections between parts of one join record that are stored in different
+    join buffers.
+    In the simplest case a record link is just a pointer to the beginning of
+    the record stored in the buffer.
+    In a more general case a link could be a reference to an array of pointers
+    to records in the buffer.
+  */
+  uchar *curr_rec_link;
+
+  /* 
+    This flag is set to TRUE if join_tab is the first inner table of an outer
+    join and  the latest record written to the join buffer is detected to be
+    null complemented after checking on conditions over the outer tables for
+    this outer join operation
+  */ 
+  bool last_written_is_null_compl;
+
+  /*
+    The number of fields put in the join buffer of the join cache that are
+    used in building keys to access the table join_tab
+  */
+  uint local_key_arg_fields;
+  /* 
+    The total number of the fields in the previous caches that are used
+    in building keys to access the table join_tab
+  */
+  uint external_key_arg_fields;
+
+  /* 
+    This flag indicates that the key values will be read directly from the join
+    buffer. It will save us building key values in the key buffer.
+  */
+  bool use_emb_key;
+  /* The length of an embedded key value */ 
+  uint emb_key_length;
+
+  /*
+    This object provides the methods to iterate over records of
+    the joined table join_tab when looking for join matches between
+    records from join buffer and records from join_tab.
+    BNL and BNLH join algorithms retrieve all records from join_tab,
+    while BKA/BKAH algorithm iterates only over those records from
+    join_tab that can be accessed by look-ups with join keys built
+    from records in join buffer.  
+  */
+  JOIN_TAB_SCAN *join_tab_scan;
+
+  void calc_record_fields();     
+  void collect_info_on_key_args();
+  int alloc_fields();
+  void create_flag_fields();
+  void create_key_arg_fields();
+  void create_remaining_fields();
+  void set_constants();
+  int alloc_buffer();
+
+  /* Shall reallocate the join buffer */
+  virtual int realloc_buffer();
+  
+  /* Check the possibility to read the access keys directly from join buffer */ 
+  bool check_emb_key_usage();
+
+  uint get_size_of_rec_offset() { return size_of_rec_ofs; }
+  uint get_size_of_rec_length() { return size_of_rec_len; }
+  uint get_size_of_fld_offset() { return size_of_fld_ofs; }
+
+  uchar *get_rec_ref(uchar *ptr)
+  {
+    return buff+get_offset(size_of_rec_ofs, ptr-size_of_rec_ofs);
+  }
+  ulong get_rec_length(uchar *ptr)
+  { 
+    return (ulong) get_offset(size_of_rec_len, ptr);
+  }
+  ulong get_fld_offset(uchar *ptr)
+  { 
+    return (ulong) get_offset(size_of_fld_ofs, ptr);
+  }
+
+  void store_rec_ref(uchar *ptr, uchar* ref)
+  {
+    store_offset(size_of_rec_ofs, ptr-size_of_rec_ofs, (ulong) (ref-buff));
+  }
+  void store_rec_length(uchar *ptr, ulong len)
+  {
+    store_offset(size_of_rec_len, ptr, len);
+  }
+  void store_fld_offset(uchar *ptr, ulong ofs)
+  {
+    store_offset(size_of_fld_ofs, ptr, ofs);
+  }
+
+  /* Write record fields and their required offsets into the join buffer */ 
+  uint write_record_data(uchar *link, bool *is_full);
+
+  /* Get the total length of all prefixes of a record in the join buffer */ 
+  virtual uint get_prefix_length() { return base_prefix_length; }
+  /* Get maximum total length of all affixes of a record in the join buffer */
+  virtual uint get_record_max_affix_length(); 
+
+  /* 
+    Shall get maximum size of the additional space per record used for
+    record keys
+  */
+  virtual uint get_max_key_addon_space_per_record() { return 0; }
+
+  /* 
+    This method must determine for how much the auxiliary buffer should be
+    incremented when a new record is added to the join buffer.
+    If no auxiliary buffer is needed the function should return 0.
+  */
+  virtual uint aux_buffer_incr(ulong recno);
+
+  /* Shall calculate how much space is remaining in the join buffer */ 
+  virtual size_t rem_space() 
+  { 
+    return max(buff_size-(end_pos-buff)-aux_buff_size,0);
+  }
+
+  /* 
+    Shall calculate how much space is taken by allocation of the key
+    for a record in the join buffer
+  */
+  virtual uint extra_key_length() { return 0; }
+
+  /*  Read all flag and data fields of a record from the join buffer */
+  uint read_all_record_fields();
+  
+  /* Read all flag fields of a record from the join buffer */
+  uint read_flag_fields();
+
+  /* Read a data record field from the join buffer */
+  uint read_record_field(CACHE_FIELD *copy, bool last_record);
+
+  /* Read a referenced field from the join buffer */
+  bool read_referenced_field(CACHE_FIELD *copy, uchar *rec_ptr, uint *len);
+
+  /* 
+    Shall skip record from the join buffer if its match flag
+    is set to MATCH_FOUND
+ */
+  virtual bool skip_if_matched();
+
+  /* 
+    Shall skip record from the join buffer if its match flag
+    commands to do so
+  */
+  virtual bool skip_if_not_needed_match();
+
+  /* 
+    True if rec_ptr points to the record whose blob data stay in
+    record buffers
+  */
+  bool blob_data_is_in_rec_buff(uchar *rec_ptr)
+  {
+    return rec_ptr == last_rec_pos && last_rec_blob_data_is_in_rec_buff;
+  }
+
+  /* Find matches from the next table for records from the join buffer */
+  virtual enum_nested_loop_state join_matching_records(bool skip_last);
+
+  /* Shall set an auxiliary buffer up (currently used only by BKA joins) */
+  virtual int setup_aux_buffer(HANDLER_BUFFER &aux_buff) 
+  {
+    DBUG_ASSERT(0);
+    return 0;
+  }
+
+  /*
+    Shall get the number of ranges in the cache buffer passed
+    to the MRR interface
+  */  
+  virtual uint get_number_of_ranges_for_mrr() { return 0; };
+
+  /* 
+    Shall prepare to look for records from the join cache buffer that would
+    match the record of the joined table read into the record buffer
+  */ 
+  virtual bool prepare_look_for_matches(bool skip_last)= 0;
+  /* 
+    Shall return a pointer to the record from join buffer that is checked
+    as the next candidate for a match with the current record from join_tab.
+    Each implementation of this virtual function should bare in mind
+    that the record position it returns shall be exactly the position
+    passed as the parameter to the implementations of the virtual functions 
+    skip_next_candidate_for_match and read_next_candidate_for_match.
+  */   
+  virtual uchar *get_next_candidate_for_match()= 0;
+  /*
+    Shall check whether the given record from the join buffer has its match
+    flag settings commands to skip the record in the buffer.
+  */
+  virtual bool skip_next_candidate_for_match(uchar *rec_ptr)= 0;
+  /*
+    Shall read the given record from the join buffer into the
+    the corresponding record buffer
+  */
+  virtual void read_next_candidate_for_match(uchar *rec_ptr)= 0;
+
+  /* 
+    Shall return the location of the association label returned by 
+    the multi_read_range_next function for the current record loaded
+    into join_tab's record buffer
+  */
+  virtual uchar **get_curr_association_ptr() { return 0; };
+
+  /* Add null complements for unmatched outer records from the join buffer */
+  virtual enum_nested_loop_state join_null_complements(bool skip_last);
+
+  /* Restore the fields of the last record from the join buffer */
+  virtual void restore_last_record();
+
+  /* Set match flag for a record in join buffer if it has not been set yet */
+  bool set_match_flag_if_none(JOIN_TAB *first_inner, uchar *rec_ptr);
+
+  enum_nested_loop_state generate_full_extensions(uchar *rec_ptr);
+
+  /* Check matching to a partial join record from the join buffer */
+  bool check_match(uchar *rec_ptr);
+
+  /* 
+    This constructor creates an unlinked join cache. The cache is to be
+    used to join table 'tab' to the result of joining the previous tables 
+    specified by the 'j' parameter.
+  */   
+  JOIN_CACHE(JOIN *j, JOIN_TAB *tab)
+  {
+    join= j;
+    join_tab= tab;
+    prev_cache= next_cache= 0;
+    buff= 0;
+  }
+
+  /* 
+    This constructor creates a linked join cache. The cache is to be
+    used to join table 'tab' to the result of joining the previous tables 
+    specified by the 'j' parameter. The parameter 'prev' specifies the previous
+    cache object to which this cache is linked.
+  */   
+  JOIN_CACHE(JOIN *j, JOIN_TAB *tab, JOIN_CACHE *prev)   
+  {  
+    join= j;
+    join_tab= tab;
+    next_cache= 0;
+    prev_cache= prev;
+    buff= 0;
+    if (prev)
+      prev->next_cache= this;
+  }
+
+public:
+ 
+  /*
+    The enumeration type Join_algorithm includes a mnemonic constant for
+    each join algorithm that employs join buffers
+  */
+
+  enum Join_algorithm
+  { 
+    BNL_JOIN_ALG,     /* Block Nested Loop Join algorithm                  */
+    BNLH_JOIN_ALG,    /* Block Nested Loop Hash Join algorithm             */
+    BKA_JOIN_ALG,     /* Batched Key Access Join algorithm                 */
+    BKAH_JOIN_ALG,    /* Batched Key Access with Hash Table Join Algorithm */
+  };
+
+  /* 
+    The enumeration type Match_flag describes possible states of the match flag
+    field  stored for the records of the first inner tables of outer joins and
+    semi-joins in the cases when the first match strategy is used for them.
+    When a record with match flag field is written into the join buffer the
+    state of the field usually is MATCH_NOT_FOUND unless this is a record of the
+    first inner table of the outer join for which the on precondition (the
+    condition from on expression over outer tables)  has turned out not to be 
+    true. In the last case the state of the match flag is MATCH_IMPOSSIBLE.
+    The state of the match flag field is changed to MATCH_FOUND as soon as
+    the first full matching combination of inner tables of the outer join or
+    the semi-join is discovered. 
+  */
+  enum Match_flag { MATCH_NOT_FOUND, MATCH_FOUND, MATCH_IMPOSSIBLE };
+
+  /* Table to be joined with the partial join records from the cache */ 
+  JOIN_TAB *join_tab;
+
+  /* Pointer to the previous join cache if there is any */
+  JOIN_CACHE *prev_cache;
+  /* Pointer to the next join cache if there is any */
+  JOIN_CACHE *next_cache;
+
+  /* Shall initialize the join cache structure */ 
+  virtual int init();
+
+  /* Get the current size of the cache join buffer */ 
+  size_t get_join_buffer_size() { return buff_size; }
+  /* Set the size of the cache join buffer to a new value */
+  void set_join_buffer_size(size_t sz) { buff_size= sz; }
+
+  /* Get the minimum possible size of the cache join buffer */
+  virtual ulong get_min_join_buffer_size();
+  /* Get the maximum possible size of the cache join buffer */ 
+  virtual ulong get_max_join_buffer_size(bool optimize_buff_size);
+
+  /* Shrink the size if the cache join buffer in a given ratio */
+  bool shrink_join_buffer_in_ratio(ulonglong n, ulonglong d);
+
+  /*  Shall return the type of the employed join algorithm */
+  virtual enum Join_algorithm get_join_alg()= 0;
+
+  /* 
+    The function shall return TRUE only when there is a key access
+    to the join table
+  */
+  virtual bool is_key_access()= 0;
+
+  /* Shall reset the join buffer for reading/writing */
+  virtual void reset(bool for_writing);
+
+  /* 
+    This function shall add a record into the join buffer and return TRUE
+    if it has been decided that it should be the last record in the buffer.
+  */ 
+  virtual bool put_record();
+
+  /* 
+    This function shall read the next record into the join buffer and return
+    TRUE if there is no more next records.
+  */ 
+  virtual bool get_record();
+
+  /* 
+    This function shall read the record at the position rec_ptr
+    in the join buffer
+  */ 
+  virtual void get_record_by_pos(uchar *rec_ptr);
+
+  /* Shall return the value of the match flag for the positioned record */
+  virtual enum Match_flag get_match_flag_by_pos(uchar *rec_ptr);
+
+  /* Shall return the position of the current record */
+  virtual uchar *get_curr_rec() { return curr_rec_pos; }
+
+  /* Shall set the current record link */
+  virtual void set_curr_rec_link(uchar *link) { curr_rec_link= link; }
+
+  /* Shall return the current record link */
+  virtual uchar *get_curr_rec_link()
+  { 
+    return (curr_rec_link ? curr_rec_link : get_curr_rec());
+  }
+     
+  /* Join records from the join buffer with records from the next join table */ 
+  enum_nested_loop_state join_records(bool skip_last);
+
+  /* Add a comment on the join algorithm employed by the join cache */
+  virtual void print_explain_comment(String *str);
+
+  THD *thd();
+
+  virtual ~JOIN_CACHE() {}
+  void reset_join(JOIN *j) { join= j; }
+  void free()
+  { 
+    x_free(buff);
+    buff= 0;
+  }   
+  
+  friend class JOIN_CACHE_HASHED;
+  friend class JOIN_CACHE_BNL;
+  friend class JOIN_CACHE_BKA;
+  friend class JOIN_TAB_SCAN;
+  friend class JOIN_TAB_SCAN_MRR;
+
+};
+
+
+/*
+  The class JOIN_CACHE_HASHED is the base class for the classes
+  JOIN_CACHE_HASHED_BNL and JOIN_CACHE_HASHED_BKA. The first of them supports
+  an implementation of Block Nested Loop Hash (BNLH) Join Algorithm,
+  while the second is used for a variant of the BKA Join algorithm that performs
+  only one lookup for any records from join buffer with the same key value. 
+  For a join cache of this class the records from the join buffer that have
+  the same access key are linked into a chain attached to a key entry structure
+  that either itself contains the key value, or, in the case when the keys are
+  embedded, refers to its occurrence in one of the records from the chain.
+  To build the chains with the same keys a hash table is employed. It is placed
+  at the very end of the join buffer. The array of hash entries is allocated
+  first at the very bottom of the join buffer, while key entries are placed
+  before this array.
+  A hash entry contains a header of the list of the key entries with the same
+  hash value. 
+  Each key entry is a structure of the following type:
+    struct st_join_cache_key_entry {
+      union { 
+        uchar[] value;
+        cache_ref *value_ref; // offset from the beginning of the buffer
+      } hash_table_key;
+      key_ref next_key; // offset backward from the beginning of hash table
+      cache_ref *last_rec // offset from the beginning of the buffer
+    }
+  The references linking the records in a chain are always placed at the very
+  beginning of the record info stored in the join buffer. The records are 
+  linked in a circular list. A new record is always added to the end of this 
+  list.
+
+  The following picture represents a typical layout for the info stored in the
+  join buffer of a join cache object of the JOIN_CACHE_HASHED class.
+    
+  buff
+  V
+  +----------------------------------------------------------------------------+
+  |     |[*]record_1_1|                                                        |
+  |     ^ |                                                                    |
+  |     | +--------------------------------------------------+                 |
+  |     |                           |[*]record_2_1|          |                 |
+  |     |                           ^ |                      V                 |
+  |     |                           | +------------------+   |[*]record_1_2|   |
+  |     |                           +--------------------+-+   |               |
+  |+--+ +---------------------+                          | |   +-------------+ |
+  ||  |                       |                          V |                 | |
+  |||[*]record_3_1|         |[*]record_1_3|              |[*]record_2_2|     | |
+  ||^                       ^                            ^                   | |
+  ||+----------+            |                            |                   | |
+  ||^          |            |<---------------------------+-------------------+ |
+  |++          | | ... mrr  |   buffer ...           ... |     |               |
+  |            |            |                            |                     |
+  |      +-----+--------+   |                      +-----|-------+             |
+  |      V     |        |   |                      V     |       |             |
+  ||key_3|[/]|[*]|      |   |                |key_2|[/]|[*]|     |             |
+  |                   +-+---|-----------------------+            |             |
+  |                   V |   |                       |            |             |
+  |             |key_1|[*]|[*]|         |   | ... |[*]|   ...  |[*]|  ...  |   |
+  +----------------------------------------------------------------------------+
+                                        ^           ^            ^
+                                        |           i-th entry   j-th entry
+                                        hash table
+
+  i-th hash entry:
+    circular record chain for key_1:
+      record_1_1
+      record_1_2
+      record_1_3 (points to record_1_1)
+    circular record chain for key_3:
+      record_3_1 (points to itself)
+
+  j-th hash entry:
+    circular record chain for key_2:
+      record_2_1
+      record_2_2 (points to record_2_1)
+
+*/
+
+class JOIN_CACHE_HASHED: public JOIN_CACHE
+{
+
+  typedef uint (JOIN_CACHE_HASHED::*Hash_func) (uchar *key, uint key_len);
+  typedef bool (JOIN_CACHE_HASHED::*Hash_cmp_func) (uchar *key1, uchar *key2,
+                                                    uint key_len);
+  
+private:
+
+  /* Size of the offset of a key entry in the hash table */
+  uint size_of_key_ofs;
+
+  /* 
+    Length of the key entry in the hash table.
+    A key entry either contains the key value, or it contains a reference
+    to the key value if use_emb_key flag is set for the cache.
+  */ 
+  uint key_entry_length;
+ 
+  /* The beginning of the hash table in the join buffer */
+  uchar *hash_table;
+  /* Number of hash entries in the hash table */
+  uint hash_entries;
+
+
+  /* The position of the currently retrieved key entry in the hash table */
+  uchar *curr_key_entry;
+
+  /* The offset of the data fields from the beginning of the record fields */
+  uint data_fields_offset;
+
+  inline uint get_hash_idx_simple(uchar *key, uint key_len);
+  inline uint get_hash_idx_complex(uchar *key, uint key_len);
+
+  inline bool equal_keys_simple(uchar *key1, uchar *key2, uint key_len);
+  inline bool equal_keys_complex(uchar *key1, uchar *key2, uint key_len);
+
+  int init_hash_table();
+  void cleanup_hash_table();
+  
+protected:
+
+  /* 
+    Index info on the TABLE_REF object used by the hash join
+    to look for matching records
+  */    
+  KEY *ref_key_info;
+  /* 
+    Number of the key parts the TABLE_REF object used by the hash join
+    to look for matching records
+  */    
+  uint ref_used_key_parts;
+
+  /*
+    The hash function used in the hash table,
+    usually set by the init() method
+  */ 
+  Hash_func hash_func;
+  /*
+    The function to check whether two key entries in the hash table
+    are equal or not, usually set by the init() method
+  */ 
+  Hash_cmp_func hash_cmp_func;
+
+  /* 
+    Length of a key value.
+    It is assumed that all key values have the same length.
+  */
+  uint key_length;
+  /* Buffer to store key values for probing */
+  uchar *key_buff;
+
+  /* Number of key entries in the hash table (number of distinct keys) */
+  uint key_entries;
+
+  /* The position of the last key entry in the hash table */
+  uchar *last_key_entry;
+
+  /* 
+    The offset of the record fields from the beginning of the record
+    representation. The record representation starts with a reference to
+    the next record in the key record chain followed by the length of
+    the trailing record data followed by a reference to the record segment
+    in the previous cache, if any, followed by the record fields.
+  */ 
+  uint rec_fields_offset;
+
+  uint get_size_of_key_offset() { return size_of_key_ofs; }
+
+  /* 
+    Get the position of the next_key_ptr field pointed to by 
+    a linking reference stored at the position key_ref_ptr. 
+    This reference is actually the offset backward from the
+    beginning of hash table.
+  */  
+  uchar *get_next_key_ref(uchar *key_ref_ptr)
+  {
+    return hash_table-get_offset(size_of_key_ofs, key_ref_ptr);
+  }
+
+  /* 
+    Store the linking reference to the next_key_ptr field at 
+    the position key_ref_ptr. The position of the next_key_ptr
+    field is pointed to by ref. The stored reference is actually
+    the offset backward from the beginning of the hash table.
+  */  
+  void store_next_key_ref(uchar *key_ref_ptr, uchar *ref)
+  {
+    store_offset(size_of_key_ofs, key_ref_ptr, (ulong) (hash_table-ref));
+  }     
+  
+  /* 
+    Check whether the reference to the next_key_ptr field at the position
+    key_ref_ptr contains  a nil value.
+  */
+  bool is_null_key_ref(uchar *key_ref_ptr)
+  {
+    ulong nil= 0;
+    return memcmp(key_ref_ptr, &nil, size_of_key_ofs ) == 0;
+  } 
+
+  /* 
+    Set the reference to the next_key_ptr field at the position
+    key_ref_ptr equal to nil.
+  */
+  void store_null_key_ref(uchar *key_ref_ptr)
+  {
+    ulong nil= 0;
+    store_offset(size_of_key_ofs, key_ref_ptr, nil);
+  } 
+
+  uchar *get_next_rec_ref(uchar *ref_ptr)
+  {
+    return buff+get_offset(get_size_of_rec_offset(), ref_ptr);
+  }
+
+  void store_next_rec_ref(uchar *ref_ptr, uchar *ref)
+  {
+    store_offset(get_size_of_rec_offset(), ref_ptr, (ulong) (ref-buff));
+  } 
+
+  /*
+    Get the position of the embedded key value for the current
+    record pointed to by get_curr_rec().
+  */ 
+  uchar *get_curr_emb_key()
+  {
+    return get_curr_rec()+data_fields_offset;
+  }
+
+  /*
+    Get the position of the embedded key value pointed to by a reference
+    stored at ref_ptr. The stored reference is actually the offset from
+    the beginning of the join buffer.
+  */  
+  uchar *get_emb_key(uchar *ref_ptr)
+  {
+    return buff+get_offset(get_size_of_rec_offset(), ref_ptr);
+  }
+
+  /* 
+    Store the reference to an embedded key at the position key_ref_ptr.
+    The position of the embedded key is pointed to by ref. The stored
+    reference is actually the offset from the beginning of the join buffer.
+  */  
+  void store_emb_key_ref(uchar *ref_ptr, uchar *ref)
+  {
+    store_offset(get_size_of_rec_offset(), ref_ptr, (ulong) (ref-buff));
+  }
+  
+  /* Get the total length of all prefixes of a record in hashed join buffer */ 
+  uint get_prefix_length() 
+  { 
+    return base_prefix_length + get_size_of_rec_offset();
+  }
+
+  /* 
+    Get maximum size of the additional space per record used for
+    the hash table with record keys
+  */
+  uint get_max_key_addon_space_per_record();
+
+  /* 
+    Calculate how much space in the buffer would not be occupied by
+    records, key entries and additional memory for the MMR buffer.
+  */ 
+  size_t rem_space() 
+  { 
+    return max(last_key_entry-end_pos-aux_buff_size,0);
+  }
+
+  /* 
+    Calculate how much space is taken by allocation of the key
+    entry for a record in the join buffer
+  */
+  uint extra_key_length() { return key_entry_length; }
+
+  /* 
+    Skip record from a hashed join buffer if its match flag
+    is set to MATCH_FOUND
+  */
+  bool skip_if_matched();
+
+  /*
+    Skip record from a hashed join buffer if its match flag setting 
+    commands to do so
+  */
+  bool skip_if_not_needed_match();
+
+  /* Search for a key in the hash table of the join buffer */
+  bool key_search(uchar *key, uint key_len, uchar **key_ref_ptr);
+
+  /* Reallocate the join buffer of a hashed join cache */
+  int realloc_buffer();
+
+  /* 
+    This constructor creates an unlinked hashed join cache. The cache is to be
+    used to join table 'tab' to the result of joining the previous tables 
+    specified by the 'j' parameter.
+  */   
+  JOIN_CACHE_HASHED(JOIN *j, JOIN_TAB *tab) :JOIN_CACHE(j, tab) {}
+
+  /* 
+    This constructor creates a linked hashed join cache. The cache is to be
+    used to join table 'tab' to the result of joining the previous tables 
+    specified by the 'j' parameter. The parameter 'prev' specifies the previous
+    cache object to which this cache is linked.
+  */   
+  JOIN_CACHE_HASHED(JOIN *j, JOIN_TAB *tab, JOIN_CACHE *prev) 
+		    :JOIN_CACHE(j, tab, prev) {}
+
+public:
+
+  /* Initialize a hashed join cache */       
+  int init();
+
+  /* Reset the buffer of a hashed join cache for reading/writing */
+  void reset(bool for_writing);
+
+  /* Add a record into the buffer of a hashed join cache */
+  bool put_record();
+
+  /* Read the next record from the buffer of a hashed join cache */
+  bool get_record();
+
+  /*
+    Shall check whether all records in a key chain have 
+    their match flags set on
+  */   
+  virtual bool check_all_match_flags_for_key(uchar *key_chain_ptr);
+
+  uint get_next_key(uchar **key); 
+  
+  /* Get the head of the record chain attached to the current key entry */ 
+  uchar *get_curr_key_chain()
+  {
+    return get_next_rec_ref(curr_key_entry+key_entry_length-
+                            get_size_of_rec_offset());
+  }
+  
+};
+
+
+/*
+  The class JOIN_TAB_SCAN is a companion class for the classes JOIN_CACHE_BNL
+  and JOIN_CACHE_BNLH. Actually the class implements the iterator over the
+  table joinded by BNL/BNLH join algorithm.
+  The virtual functions open, next and close are called for any iteration over
+  the table. The function open is called to initiate the process of the 
+  iteration. The function next shall read the next record from the joined
+  table. The record is read into the record buffer of the joined table.
+  The record is to be matched with records from the join cache buffer. 
+  The function close shall perform the finalizing actions for the iteration.
+*/
+   
+class JOIN_TAB_SCAN: public Sql_alloc
+{
+
+private:
+  /* TRUE if this is the first record from the joined table to iterate over */
+  bool is_first_record;
+
+protected:
+
+  /* The joined table to be iterated over */
+  JOIN_TAB *join_tab;
+  /* The join cache used to join the table join_tab */ 
+  JOIN_CACHE *cache;
+  /* 
+    Representation of the executed multi-way join through which
+    all needed context can be accessed.  
+  */   
+  JOIN *join;
+
+public:
+  
+  JOIN_TAB_SCAN(JOIN *j, JOIN_TAB *tab)
+  {
+    join= j;
+    join_tab= tab;
+    cache= join_tab->cache;
+  }
+
+  virtual ~JOIN_TAB_SCAN() {}
+ 
+  /* 
+    Shall calculate the increment of the auxiliary buffer for a record
+    write if such a buffer is used by the table scan object 
+  */
+  virtual uint aux_buffer_incr(ulong recno) { return 0; }
+
+  /* Initiate the process of iteration over the joined table */
+  virtual int open();
+  /* 
+    Shall read the next candidate for matches with records from 
+    the join buffer.
+  */
+  virtual int next();
+  /* 
+    Perform the finalizing actions for the process of iteration
+    over the joined_table.
+  */ 
+  virtual void close();
+
+};
+
+/*
+  The class JOIN_CACHE_BNL is used when the BNL join algorithm is
+  employed to perform a join operation   
+*/
+
+class JOIN_CACHE_BNL :public JOIN_CACHE
+{
+private:
+  /* 
+    The number of the records in the join buffer that have to be
+    checked yet for a match with the current record of join_tab 
+    read into the record buffer.
+  */
+  uint rem_records;
+
+protected:
+
+  bool prepare_look_for_matches(bool skip_last);
+
+  uchar *get_next_candidate_for_match();
+
+  bool skip_next_candidate_for_match(uchar *rec_ptr);
+
+  void read_next_candidate_for_match(uchar *rec_ptr);
+
+public:
+
+  /* 
+    This constructor creates an unlinked BNL join cache. The cache is to be
+    used to join table 'tab' to the result of joining the previous tables 
+    specified by the 'j' parameter.
+  */   
+  JOIN_CACHE_BNL(JOIN *j, JOIN_TAB *tab) :JOIN_CACHE(j, tab) {}
+
+  /* 
+    This constructor creates a linked BNL join cache. The cache is to be 
+    used to join table 'tab' to the result of joining the previous tables 
+    specified by the 'j' parameter. The parameter 'prev' specifies the previous
+    cache object to which this cache is linked.
+  */   
+  JOIN_CACHE_BNL(JOIN *j, JOIN_TAB *tab, JOIN_CACHE *prev) 
+    :JOIN_CACHE(j, tab, prev) {}
+
+  /* Initialize the BNL cache */       
+  int init();
+
+  enum Join_algorithm get_join_alg() { return BNL_JOIN_ALG; }
+
+  bool is_key_access() { return FALSE; }
+
+};
+
+
+/*
+  The class JOIN_CACHE_BNLH is used when the BNLH join algorithm is
+  employed to perform a join operation   
+*/
+
+class JOIN_CACHE_BNLH :public JOIN_CACHE_HASHED
+{
+
+protected:
+
+  /* 
+    The pointer to the last record from the circular list of the records
+    that  match the join key built out of the record in the join buffer for
+    the join_tab table
+  */
+  uchar *last_matching_rec_ref_ptr;
+  /*
+    The pointer to the next current  record from the circular list of the
+    records that match the join key built out of the record in the join buffer
+    for the join_tab table. This pointer is used by the class method 
+    get_next_candidate_for_match to iterate over records from the circular
+    list.
+  */
+  uchar *next_matching_rec_ref_ptr;
+
+  /*
+    Get the chain of records from buffer matching the current candidate
+    record for join
+  */
+  uchar *get_matching_chain_by_join_key();
+
+  bool prepare_look_for_matches(bool skip_last);
+
+  uchar *get_next_candidate_for_match();
+
+  bool skip_next_candidate_for_match(uchar *rec_ptr);
+
+  void read_next_candidate_for_match(uchar *rec_ptr);
+
+public:
+
+  /* 
+    This constructor creates an unlinked BNLH join cache. The cache is to be
+    used to join table 'tab' to the result of joining the previous tables 
+    specified by the 'j' parameter.
+  */   
+  JOIN_CACHE_BNLH(JOIN *j, JOIN_TAB *tab) : JOIN_CACHE_HASHED(j, tab) {}
+
+  /* 
+    This constructor creates a linked BNLH join cache. The cache is to be 
+    used to join table 'tab' to the result of joining the previous tables 
+    specified by the 'j' parameter. The parameter 'prev' specifies the previous
+    cache object to which this cache is linked.
+  */   
+  JOIN_CACHE_BNLH(JOIN *j, JOIN_TAB *tab, JOIN_CACHE *prev) 
+    : JOIN_CACHE_HASHED(j, tab, prev) {}
+
+  /* Initialize the BNLH cache */       
+  int init();
+
+  enum Join_algorithm get_join_alg() { return BNLH_JOIN_ALG; }
+
+  bool is_key_access() { return TRUE; }
+
+};
+
+
+/*
+  The class JOIN_TAB_SCAN_MRR is a companion class for the classes
+  JOIN_CACHE_BKA and JOIN_CACHE_BKAH. Actually the class implements the
+  iterator over the records from join_tab selected by BKA/BKAH join
+  algorithm as the candidates to be joined. 
+  The virtual functions open, next and close are called for any iteration over
+  join_tab record candidates. The function open is called to initiate the
+  process of the iteration. The function next shall read the next record from
+  the set of the record candidates. The record is read into the record buffer
+  of the joined table. The function close shall perform the finalizing actions
+  for the iteration.
+*/
+   
+class JOIN_TAB_SCAN_MRR: public JOIN_TAB_SCAN
+{
+  /* Interface object to generate key ranges for MRR */
+  RANGE_SEQ_IF range_seq_funcs;
+
+  /* Number of ranges to be processed by the MRR interface */
+  uint ranges;
+
+  /* Flag to to be passed to the MRR interface */ 
+  uint mrr_mode;
+
+  /* MRR buffer assotiated with this join cache */
+  HANDLER_BUFFER mrr_buff;
+
+  /* Shall initialize the MRR buffer */
+  virtual void init_mrr_buff()
+  {
+    cache->setup_aux_buffer(mrr_buff);
+  }
+
+public:
+
+  JOIN_TAB_SCAN_MRR(JOIN *j, JOIN_TAB *tab, uint flags, RANGE_SEQ_IF rs_funcs)
+    :JOIN_TAB_SCAN(j, tab), range_seq_funcs(rs_funcs), mrr_mode(flags) {}
+
+  uint aux_buffer_incr(ulong recno);
+
+  int open();
+ 
+  int next();
+
+  friend class JOIN_CACHE_BKA; /* it needs to add an mrr_mode flag after JOIN_CACHE::init() call */
+};
+
+/*
+  The class JOIN_CACHE_BKA is used when the BKA join algorithm is
+  employed to perform a join operation   
+*/
+
+class JOIN_CACHE_BKA :public JOIN_CACHE
+{
+private:
+
+  /* Flag to to be passed to the companion JOIN_TAB_SCAN_MRR object */
+  uint mrr_mode;
+
+  /* 
+    This value is set to 1 by the class prepare_look_for_matches method
+    and back to 0 by the class get_next_candidate_for_match method
+  */
+  uint rem_records;
+
+  /*
+    This field contains the current association label set by a call of
+    the multi_range_read_next handler function.
+    See the function JOIN_CACHE_BKA::get_curr_key_association()
+  */
+  uchar *curr_association;
+
+protected:
+
+  /* 
+    Get the number of ranges in the cache buffer passed to the MRR
+    interface. For each record its own range is passed.
+  */
+  uint get_number_of_ranges_for_mrr() { return (uint)records; }
+
+ /*
+   Setup the MRR buffer as the space between the last record put
+   into the join buffer and the very end of the join buffer 
+ */
+  int setup_aux_buffer(HANDLER_BUFFER &aux_buff)
+  {
+    aux_buff.buffer= end_pos;
+    aux_buff.buffer_end= buff+buff_size;
+    return 0;
+  }
+
+  bool prepare_look_for_matches(bool skip_last);
+
+  uchar *get_next_candidate_for_match();
+
+  bool skip_next_candidate_for_match(uchar *rec_ptr);
+
+  void read_next_candidate_for_match(uchar *rec_ptr);
+
+public:
+
+  /* 
+    This constructor creates an unlinked BKA join cache. The cache is to be
+    used to join table 'tab' to the result of joining the previous tables 
+    specified by the 'j' parameter.
+    The MRR mode initially is set to 'flags'.
+  */   
+  JOIN_CACHE_BKA(JOIN *j, JOIN_TAB *tab, uint flags)
+    :JOIN_CACHE(j, tab), mrr_mode(flags) {}
+  /* 
+    This constructor creates a linked BKA join cache. The cache is to be 
+    used to join table 'tab' to the result of joining the previous tables 
+    specified by the 'j' parameter. The parameter 'prev' specifies the previous
+    cache object to which this cache is linked.
+    The MRR mode initially is set to 'flags'.
+  */   
+  JOIN_CACHE_BKA(JOIN *j, JOIN_TAB *tab, uint flags, JOIN_CACHE *prev)
+    :JOIN_CACHE(j, tab, prev), mrr_mode(flags) {}
+  
+  uchar **get_curr_association_ptr() { return &curr_association; }
+
+  /* Initialize the BKA cache */       
+  int init();
+
+  enum Join_algorithm get_join_alg() { return BKA_JOIN_ALG; }
+
+  bool is_key_access() { return TRUE; }
+
+  /* Get the key built over the next record from the join buffer */
+  uint get_next_key(uchar **key);
+
+  /* Check index condition of the joined table for a record from BKA cache */
+  bool skip_index_tuple(range_id_t range_info);
+
+  void print_explain_comment(String *str);
+};
+
+
+
+/*
+  The class JOIN_CACHE_BKAH is used when the BKAH join algorithm is
+  employed to perform a join operation   
+*/
+
+class JOIN_CACHE_BKAH :public JOIN_CACHE_BNLH
+{
+
+private:
+  /* Flag to to be passed to the companion JOIN_TAB_SCAN_MRR object */
+  uint mrr_mode;
+
+  /* 
+    This flag is set to TRUE if the implementation of the MRR interface cannot
+    handle range association labels and does not return them to the caller of
+    the multi_range_read_next handler function. E.g. the implementation of
+    the MRR inteface for the Falcon engine could not return association
+    labels to the caller of multi_range_read_next.
+    The flag is set by JOIN_CACHE_BKA::init() and is not ever changed.
+  */       
+  bool no_association;
+
+  /* 
+    This field contains the association label returned by the 
+    multi_range_read_next function.
+    See the function JOIN_CACHE_BKAH::get_curr_key_association()
+  */
+  uchar *curr_matching_chain;
+
+protected:
+
+  uint get_number_of_ranges_for_mrr() { return key_entries; }
+
+  /* 
+    Initialize the MRR buffer allocating some space within the join buffer.
+    The entire space between the last record put into the join buffer and the
+    last key entry added to the hash table is used for the MRR buffer.
+  */
+  int setup_aux_buffer(HANDLER_BUFFER &aux_buff)
+  {
+    aux_buff.buffer= end_pos;
+    aux_buff.buffer_end= last_key_entry;
+    return 0;
+  }
+
+  bool prepare_look_for_matches(bool skip_last);
+
+  /*
+    The implementations of the methods
+    - get_next_candidate_for_match
+    - skip_recurrent_candidate_for_match
+    - read_next_candidate_for_match
+    are inherited from the JOIN_CACHE_BNLH class
+  */
+
+public:
+
+  /* 
+    This constructor creates an unlinked BKAH join cache. The cache is to be
+    used to join table 'tab' to the result of joining the previous tables 
+    specified by the 'j' parameter.
+    The MRR mode initially is set to 'flags'.
+  */   
+  JOIN_CACHE_BKAH(JOIN *j, JOIN_TAB *tab, uint flags) 
+    :JOIN_CACHE_BNLH(j, tab), mrr_mode(flags) {}
+
+  /* 
+    This constructor creates a linked BKAH join cache. The cache is to be 
+    used to join table 'tab' to the result of joining the previous tables 
+    specified by the 'j' parameter. The parameter 'prev' specifies the previous
+    cache object to which this cache is linked.
+    The MRR mode initially is set to 'flags'.
+  */   
+  JOIN_CACHE_BKAH(JOIN *j, JOIN_TAB *tab, uint flags, JOIN_CACHE *prev)
+    :JOIN_CACHE_BNLH(j, tab, prev), mrr_mode(flags)  {}
+
+  uchar **get_curr_association_ptr() { return &curr_matching_chain; }
+
+  /* Initialize the BKAH cache */       
+  int init();
+
+  enum Join_algorithm get_join_alg() { return BKAH_JOIN_ALG; }
+
+  /* Check index condition of the joined table for a record from BKAH cache */
+  bool skip_index_tuple(range_id_t range_info);
+
+  void print_explain_comment(String *str);
+};
diff --git a/sql/sql_lex.cc b/sql/sql_lex.cc
index 30b53f49b5a..4a69cd3b1fa 100644
--- a/sql/sql_lex.cc
+++ b/sql/sql_lex.cc
@@ -25,6 +25,7 @@
 #include <hash.h>
 #include "sp_head.h"
 #include "sp.h"
+#include "sql_select.h"
 
 /*
   We are using pointer to this variable for distinguishing between assignment
@@ -311,7 +312,6 @@ void lex_start(THD *thd)
   lex->derived_tables= 0;
   lex->lock_option= TL_READ;
   lex->safe_to_cache_query= 1;
-  lex->leaf_tables_insert= 0;
   lex->parsing_options.reset();
   lex->empty_field_list_on_rset= 0;
   lex->select_lex.select_number= 1;
@@ -341,6 +341,7 @@ void lex_start(THD *thd)
   lex->event_parse_data= NULL;
   lex->profile_options= PROFILE_NONE;
   lex->nest_level=0 ;
+  lex->select_lex.nest_level_base= &lex->unit;
   lex->allow_sum_func= 0;
   lex->in_sum_func= NULL;
   lex->protect_against_global_read_lock= FALSE;
@@ -821,45 +822,48 @@ int MYSQLlex(void *arg, void *yythd)
 	yylval->lex_str.length=2;
 	return NULL_SYM;
       }
+      /* Fall through */
     case MY_LEX_CHAR:			// Unknown or single char token
     case MY_LEX_SKIP:			// This should not happen
-      if (c == '-' && lip->yyPeek() == '-' &&
+      if (c != ')')
+	lip->next_state= MY_LEX_START;	// Allow signed numbers
+      return((int) c);
+
+    case MY_LEX_MINUS_OR_COMMENT:
+      if (lip->yyPeek() == '-' &&
           (my_isspace(cs,lip->yyPeekn(1)) ||
            my_iscntrl(cs,lip->yyPeekn(1))))
       {
         state=MY_LEX_COMMENT;
         break;
       }
+      lip->next_state= MY_LEX_START;	// Allow signed numbers
+      return((int) c);
 
-      if (c != ')')
-	lip->next_state= MY_LEX_START;	// Allow signed numbers
-
-      if (c == ',')
-      {
-        /*
-          Warning:
-          This is a work around, to make the "remember_name" rule in
-          sql/sql_yacc.yy work properly.
-          The problem is that, when parsing "select expr1, expr2",
-          the code generated by bison executes the *pre* action
-          remember_name (see select_item) *before* actually parsing the
-          first token of expr2.
-        */
-        lip->restart_token();
-      }
-      else
-      {
-        /*
-          Check for a placeholder: it should not precede a possible identifier
-          because of binlogging: when a placeholder is replaced with
-          its value in a query for the binlog, the query must stay
-          grammatically correct.
-        */
-        if (c == '?' && lip->stmt_prepare_mode &&
-            !ident_map[(uchar) lip->yyPeek()])
+    case MY_LEX_PLACEHOLDER:
+      /*
+        Check for a placeholder: it should not precede a possible identifier
+        because of binlogging: when a placeholder is replaced with
+        its value in a query for the binlog, the query must stay
+        grammatically correct.
+      */
+      lip->next_state= MY_LEX_START;	// Allow signed numbers
+      if (lip->stmt_prepare_mode && !ident_map[(uchar) lip->yyPeek()])
         return(PARAM_MARKER);
-      }
+      return((int) c);
 
+    case MY_LEX_COMMA:
+      lip->next_state= MY_LEX_START;	// Allow signed numbers
+      /*
+        Warning:
+        This is a work around, to make the "remember_name" rule in
+        sql/sql_yacc.yy work properly.
+        The problem is that, when parsing "select expr1, expr2",
+        the code generated by bison executes the *pre* action
+        remember_name (see select_item) *before* actually parsing the
+        first token of expr2.
+      */
+      lip->restart_token();
       return((int) c);
 
     case MY_LEX_IDENT_OR_NCHAR:
@@ -1272,39 +1276,39 @@ int MYSQLlex(void *arg, void *yythd)
 
       lip->save_in_comment_state();
 
+      if (lip->yyPeekn(2) == 'M' && lip->yyPeekn(3) == '!')
+      {
+        /* Skip MariaDB unique marker */
+        lip->set_echo(FALSE);
+        lip->yySkip();
+        /* The following if will be true */
+      }
       if (lip->yyPeekn(2) == '!')
       {
         lip->in_comment= DISCARD_COMMENT;
         /* Accept '/' '*' '!', but do not keep this marker. */
         lip->set_echo(FALSE);
-        lip->yySkip();
-        lip->yySkip();
-        lip->yySkip();
+        lip->yySkipn(3);
 
         /*
           The special comment format is very strict:
-          '/' '*' '!', followed by exactly
+          '/' '*' '!', followed by an optional 'M' and exactly
           1 digit (major), 2 digits (minor), then 2 digits (dot).
           32302 -> 3.23.02
           50032 -> 5.0.32
           50114 -> 5.1.14
         */
-        char version_str[6];
-        version_str[0]= lip->yyPeekn(0);
-        version_str[1]= lip->yyPeekn(1);
-        version_str[2]= lip->yyPeekn(2);
-        version_str[3]= lip->yyPeekn(3);
-        version_str[4]= lip->yyPeekn(4);
-        version_str[5]= 0;
-        if (  my_isdigit(cs, version_str[0])
-           && my_isdigit(cs, version_str[1])
-           && my_isdigit(cs, version_str[2])
-           && my_isdigit(cs, version_str[3])
-           && my_isdigit(cs, version_str[4])
+        if (  my_isdigit(cs, lip->yyPeekn(0))
+           && my_isdigit(cs, lip->yyPeekn(1))
+           && my_isdigit(cs, lip->yyPeekn(2))
+           && my_isdigit(cs, lip->yyPeekn(3))
+           && my_isdigit(cs, lip->yyPeekn(4))
            )
         {
           ulong version;
-          version=strtol(version_str, NULL, 10);
+          char *end_ptr= (char*) lip->get_ptr()+5;
+          int error;
+          version= (ulong) my_strtoll10(lip->get_ptr(), &end_ptr, &error);
 
           if (version <= MYSQL_VERSION_ID)
           {
@@ -1568,7 +1572,7 @@ void st_select_lex_node::init_query()
   options= 0;
   sql_cache= SQL_CACHE_UNSPECIFIED;
   linkage= UNSPECIFIED_TYPE;
-  no_error= no_table_names_allowed= 0;
+  no_table_names_allowed= 0;
   uncacheable= 0;
 }
 
@@ -1594,6 +1598,7 @@ void st_select_lex_unit::init_query()
   describe= 0;
   found_rows_for_union= 0;
   insert_table_with_stored_vcol= 0;
+  derived= 0;
 }
 
 void st_select_lex::init_query()
@@ -1602,7 +1607,9 @@ void st_select_lex::init_query()
   table_list.empty();
   top_join_list.empty();
   join_list= &top_join_list;
-  embedding= leaf_tables= 0;
+  embedding= 0;
+  leaf_tables_prep.empty();
+  leaf_tables.empty();
   item_list.empty();
   join= 0;
   having= prep_having= where= prep_where= 0;
@@ -1636,6 +1643,9 @@ void st_select_lex::init_query()
   nest_level= 0;
   link_next= 0;
   lock_option= TL_READ_DEFAULT;
+  is_prep_leaf_list_saved= FALSE;
+
+  bzero((char*) expr_cache_may_be_used, sizeof(expr_cache_may_be_used));
   m_non_agg_field_used= false;
   m_agg_func_used= false;
 }
@@ -1643,6 +1653,8 @@ void st_select_lex::init_query()
 void st_select_lex::init_select()
 {
   st_select_lex_node::init_select();
+  sj_nests.empty();
+  sj_subselects.empty();
   group_list.empty();
   type= db= 0;
   having= 0;
@@ -1668,6 +1680,8 @@ void st_select_lex::init_select()
   non_agg_fields.empty();
   cond_value= having_value= Item::COND_UNDEF;
   inner_refs_list.empty();
+  insert_tables= 0;
+  merged_into= 0;
   m_non_agg_field_used= false;
   m_agg_func_used= false;
 }
@@ -1687,6 +1701,31 @@ void st_select_lex_node::include_down(st_select_lex_node *upper)
   slave= 0;
 }
 
+
+void st_select_lex_node::add_slave(st_select_lex_node *slave_arg)
+{
+  for (; slave; slave= slave->next)
+    if (slave == slave_arg)
+      return;
+
+  if (slave)
+  {
+    st_select_lex_node *slave_arg_slave= slave_arg->slave;
+    /* Insert in the front of list of slaves if any. */
+    slave_arg->include_neighbour(slave);
+    /* include_neighbour() sets slave_arg->slave=0, restore it. */
+    slave_arg->slave= slave_arg_slave;
+    /* Count on include_neighbour() setting the master. */
+    DBUG_ASSERT(slave_arg->master == this);
+  }
+  else
+  {
+    slave= slave_arg;
+    slave_arg->master= this;
+  }
+}
+
+
 /*
   include on level down (but do not link)
 
@@ -1738,17 +1777,29 @@ void st_select_lex_node::fast_exclude()
   
 }
 
+
+/*
+  Exclude a node from the tree lex structure, but leave it in the global
+  list of nodes.
+*/
+
+void st_select_lex_node::exclude_from_tree()
+{
+  if ((*prev= next))
+    next->prev= prev;
+}
+
+
 /*
-  excluding select_lex structure (except first (first select can't be
+  Exclude select_lex structure (except first (first select can't be
   deleted, because it is most upper select))
 */
 void st_select_lex_node::exclude()
 {
-  //exclude from global list
+  /* exclude from global list */
   fast_exclude();
-  //exclude from other structures
-  if ((*prev= next))
-    next->prev= prev;
+  /* exclude from other structures */
+  exclude_from_tree();
   /* 
      We do not need following statements, because prev pointer of first 
      list element point to master->slave
@@ -1847,9 +1898,8 @@ void st_select_lex_unit::exclude_tree()
     'last' should be reachable from this st_select_lex_node
 */
 
-void st_select_lex::mark_as_dependent(st_select_lex *last, Item *dependency)
+bool st_select_lex::mark_as_dependent(THD *thd, st_select_lex *last, Item *dependency)
 {
-  SELECT_LEX *next_to_last;
 
   DBUG_ASSERT(this != last);
 
@@ -1860,28 +1910,31 @@ void st_select_lex::mark_as_dependent(st_select_lex *last, Item *dependency)
   SELECT_LEX *s= this;
   do
   {
-    if (!(s->uncacheable & UNCACHEABLE_DEPENDENT))
+    if (!(s->uncacheable & UNCACHEABLE_DEPENDENT_GENERATED))
     {
       // Select is dependent of outer select
       s->uncacheable= (s->uncacheable & ~UNCACHEABLE_UNITED) |
-                       UNCACHEABLE_DEPENDENT;
+                       UNCACHEABLE_DEPENDENT_GENERATED;
       SELECT_LEX_UNIT *munit= s->master_unit();
       munit->uncacheable= (munit->uncacheable & ~UNCACHEABLE_UNITED) |
-                       UNCACHEABLE_DEPENDENT;
+                       UNCACHEABLE_DEPENDENT_GENERATED;
       for (SELECT_LEX *sl= munit->first_select(); sl ; sl= sl->next_select())
       {
         if (sl != s &&
-            !(sl->uncacheable & (UNCACHEABLE_DEPENDENT | UNCACHEABLE_UNITED)))
+            !(sl->uncacheable & (UNCACHEABLE_DEPENDENT_GENERATED |
+                                 UNCACHEABLE_UNITED)))
           sl->uncacheable|= UNCACHEABLE_UNITED;
       }
     }
-    next_to_last= s;
-  } while ((s= s->outer_select()) != last && s != 0);
 
+    Item_subselect *subquery_expr= s->master_unit()->item;
+    if (subquery_expr && subquery_expr->mark_as_dependent(thd, last, 
+                                                          dependency))
+      return TRUE;
+  } while ((s= s->outer_select()) != last && s != 0);
   is_correlated= TRUE;
   this->master_unit()->item->is_correlated= TRUE;
-  if (dependency)
-    next_to_last->master_unit()->item->refers_to.push_back(dependency);
+  return FALSE;
 }
 
 bool st_select_lex_node::set_braces(bool value)      { return 1; }
@@ -2006,23 +2059,25 @@ ulong st_select_lex::get_table_join_options()
 
 bool st_select_lex::setup_ref_array(THD *thd, uint order_group_num)
 {
+  DBUG_ENTER("st_select_lex::setup_ref_array");
+
   if (ref_pointer_array)
-    return 0;
+    DBUG_RETURN(0);
 
   // find_order_in_list() may need some extra space, so multiply by two.
   order_group_num*= 2;
 
   /*
-    We have to create array in prepared statement memory if it is
+    We have to create array in prepared statement memory if it is a
     prepared statement
   */
-  Query_arena *arena= thd->stmt_arena;
-  return (ref_pointer_array=
-          (Item **)arena->alloc(sizeof(Item*) * (n_child_sum_items +
-                                                 item_list.elements +
-                                                 select_n_having_items +
-                                                 select_n_where_fields +
-                                                 order_group_num)*5)) == 0;
+  ref_pointer_array=
+    (Item **)thd->stmt_arena->alloc(sizeof(Item*) * (n_child_sum_items +
+                                                     item_list.elements +
+                                                     select_n_having_items +
+                                                     select_n_where_fields +
+                                                     order_group_num)*5);
+  DBUG_RETURN(ref_pointer_array == 0);
 }
 
 
@@ -2067,9 +2122,27 @@ void st_select_lex::print_order(String *str,
   {
     if (order->counter_used)
     {
-      char buffer[20];
-      size_t length= my_snprintf(buffer, 20, "%d", order->counter);
-      str->append(buffer, (uint) length);
+      if (query_type != QT_VIEW_INTERNAL)
+      {
+        char buffer[20];
+        size_t length= my_snprintf(buffer, 20, "%d", order->counter);
+        str->append(buffer, (uint) length);
+      }
+      else
+      {
+        /* replace numeric reference with expression */
+        if (order->item[0]->type() == Item::INT_ITEM &&
+            order->item[0]->basic_const_item())
+        {
+          char buffer[20];
+          size_t length= my_snprintf(buffer, 20, "%d", order->counter);
+          str->append(buffer, (uint) length);
+          /* make it expression instead of integer constant */
+          str->append(STRING_WITH_LEN("+0"));
+        }
+        else
+          (*order->item)->print(str, query_type);
+      }
     }
     else
       (*order->item)->print(str, query_type);
@@ -2087,16 +2160,17 @@ void st_select_lex::print_limit(THD *thd,
 {
   SELECT_LEX_UNIT *unit= master_unit();
   Item_subselect *item= unit->item;
-  if (item && unit->global_parameters == this &&
-      (item->substype() == Item_subselect::EXISTS_SUBS ||
-       item->substype() == Item_subselect::IN_SUBS ||
-       item->substype() == Item_subselect::ALL_SUBS))
+
+  if (item && unit->global_parameters == this)
   {
-    DBUG_ASSERT(!item->fixed ||
-                (select_limit->val_int() == LL(1) && offset_limit == 0));
-    return;
+    Item_subselect::subs_type subs_type= item->substype();
+    if (subs_type == Item_subselect::EXISTS_SUBS ||
+        subs_type == Item_subselect::IN_SUBS ||
+        subs_type == Item_subselect::ALL_SUBS)
+    {
+      return;
+    }
   }
-
   if (explicit_limit)
   {
     str->append(STRING_WITH_LEN(" limit "));
@@ -2109,6 +2183,7 @@ void st_select_lex::print_limit(THD *thd,
   }
 }
 
+
 /**
   @brief Restore the LEX and THD in case of a parse error.
 
@@ -2810,6 +2885,7 @@ void st_lex::cleanup_after_one_table_open()
   if (all_selects_list != &select_lex)
   {
     derived_tables= 0;
+    select_lex.exclude_from_table_unique_test= false;
     /* cleunup underlying units (units of VIEW) */
     for (SELECT_LEX_UNIT *un= select_lex.first_inner_unit();
          un;
@@ -2904,7 +2980,11 @@ static void fix_prepare_info_in_table_list(THD *thd, TABLE_LIST *tbl)
       thd->check_and_register_item_tree(&tbl->prep_on_expr, &tbl->on_expr);
       tbl->on_expr= tbl->on_expr->copy_andor_structure(thd);
     }
-    fix_prepare_info_in_table_list(thd, tbl->merge_underlying_list);
+    if (tbl->is_view_or_derived() && tbl->is_merged_derived())
+    {
+      SELECT_LEX *sel= tbl->get_single_select();
+      fix_prepare_info_in_table_list(thd, sel->get_table_list());
+    }
   }
 }
 
@@ -3019,6 +3099,643 @@ bool st_select_lex::add_index_hint (THD *thd, char *str, uint length)
                                             str, length));
 }
 
+
+bool st_select_lex::optimize_unflattened_subqueries()
+{
+  for (SELECT_LEX_UNIT *un= first_inner_unit(); un; un= un->next_unit())
+  {
+    Item_subselect *subquery_predicate= un->item;
+    
+    if (subquery_predicate)
+    {
+      if (subquery_predicate->substype() == Item_subselect::IN_SUBS)
+      {
+        Item_in_subselect *in_subs=(Item_in_subselect*)subquery_predicate;
+        if (in_subs->is_jtbm_merged)
+          continue;
+      }
+
+      for (SELECT_LEX *sl= un->first_select(); sl; sl= sl->next_select())
+      {
+        JOIN *inner_join= sl->join;
+        if (!inner_join)
+          continue;
+        SELECT_LEX *save_select= un->thd->lex->current_select;
+        ulonglong save_options;
+        int res;
+        /* We need only 1 row to determine existence */
+        un->set_limit(un->global_parameters);
+        un->thd->lex->current_select= sl;
+        save_options= inner_join->select_options;
+        if (options & SELECT_DESCRIBE)
+        {
+          /* Optimize the subquery in the context of EXPLAIN. */
+          sl->set_explain_type();
+          sl->options|= SELECT_DESCRIBE;
+          inner_join->select_options|= SELECT_DESCRIBE;
+        }
+        res= inner_join->optimize();
+        inner_join->select_options= save_options;
+        un->thd->lex->current_select= save_select;
+        if (res)
+          return TRUE;
+      }
+    }
+  }
+  return FALSE;
+}
+
+
+
+/**
+  @brief Process all derived tables/views of the SELECT.
+
+  @param lex    LEX of this thread
+  @param phase  phases to run derived tables/views through
+
+  @details
+  This function runs specified 'phases' on all tables from the
+  table_list of this select.
+
+  @return FALSE ok.
+  @return TRUE an error occur.
+*/
+
+bool st_select_lex::handle_derived(struct st_lex *lex, uint phases)
+{
+  for (TABLE_LIST *cursor= (TABLE_LIST*) table_list.first;
+       cursor;
+       cursor= cursor->next_local)
+  {
+    if (cursor->is_view_or_derived() && cursor->handle_derived(lex, phases))
+      return TRUE;
+  }
+  return FALSE;
+}
+
+
+/**
+  @brief
+  Returns first unoccupied table map and table number
+
+  @param map     [out] return found map
+  @param tablenr [out] return found tablenr
+
+  @details
+  Returns first unoccupied table map and table number in this select.
+  Map and table are returned in *'map' and *'tablenr' accordingly.
+
+  @retrun TRUE  no free table map/table number
+  @return FALSE found free table map/table number
+*/
+
+bool st_select_lex::get_free_table_map(table_map *map, uint *tablenr)
+{
+  *map= 0;
+  *tablenr= 0;
+  TABLE_LIST *tl;
+  List_iterator<TABLE_LIST> ti(leaf_tables);
+  while ((tl= ti++))
+  {
+    if (tl->table->map > *map)
+      *map= tl->table->map;
+    if (tl->table->tablenr > *tablenr)
+      *tablenr= tl->table->tablenr;
+  }
+  (*map)<<= 1;
+  (*tablenr)++;
+  if (*tablenr >= MAX_TABLES)
+    return TRUE;
+  return FALSE;
+}
+
+
+/**
+  @brief
+  Append given table to the leaf_tables list.
+
+  @param link  Offset to which list in table structure to use
+  @param table Table to append
+
+  @details
+  Append given 'table' to the leaf_tables list using the 'link' offset.
+  If the 'table' is linked with other tables through next_leaf/next_local
+  chains then whole list will be appended.
+*/
+
+void st_select_lex::append_table_to_list(TABLE_LIST *TABLE_LIST::*link,
+                                         TABLE_LIST *table)
+{
+  TABLE_LIST *tl;
+  for (tl= leaf_tables.head(); tl->*link; tl= tl->*link) ;
+  tl->*link= table;
+}
+
+
+/*
+  @brief
+  Replace given table from the leaf_tables list for a list of tables 
+
+  @param table Table to replace
+  @param list  List to substititute the table for
+
+  @details
+  Replace 'table' from the leaf_tables list for a list of tables 'tbl_list'.
+*/
+
+void st_select_lex::replace_leaf_table(TABLE_LIST *table, List<TABLE_LIST> &tbl_list)
+{
+  TABLE_LIST *tl;
+  List_iterator<TABLE_LIST> ti(leaf_tables);
+  while ((tl= ti++))
+  {
+    if (tl == table)
+    {
+      ti.replace(tbl_list);
+      break;
+    }
+  }
+}
+
+
+/**
+  @brief
+  Assigns new table maps to tables in the leaf_tables list
+
+  @param derived    Derived table to take initial table map from
+  @param map        table map to begin with
+  @param tablenr    table number to begin with
+  @param parent_lex new parent select_lex
+
+  @details
+  Assign new table maps/table numbers to all tables in the leaf_tables list.
+  'map'/'tablenr' are used for the first table and shifted to left/
+  increased for each consequent table in the leaf_tables list.
+  If the 'derived' table is given then it's table map/number is used for the
+  first table in the list and 'map'/'tablenr' are used for the second and
+  all consequent tables.
+  The 'parent_lex' is set as the new parent select_lex for all tables in the
+  list.
+*/
+
+void st_select_lex::remap_tables(TABLE_LIST *derived, table_map map,
+                                 uint tablenr, SELECT_LEX *parent_lex)
+{
+  bool first_table= TRUE;
+  TABLE_LIST *tl;
+  table_map first_map;
+  uint first_tablenr;
+
+  if (derived && derived->table)
+  {
+    first_map= derived->table->map;
+    first_tablenr= derived->table->tablenr;
+  }
+  else
+  {
+    first_map= map;
+    map<<= 1;
+    first_tablenr= tablenr++;
+  }
+  /*
+    Assign table bit/table number.
+    To the first table of the subselect the table bit/tablenr of the
+    derived table is assigned. The rest of tables are getting bits
+    sequentially, starting from the provided table map/tablenr.
+  */
+  List_iterator<TABLE_LIST> ti(leaf_tables);
+  while ((tl= ti++))
+  {
+    if (first_table)
+    {
+      first_table= FALSE;
+      tl->table->set_table_map(first_map, first_tablenr);
+    }
+    else
+    {
+      tl->table->set_table_map(map, tablenr);
+      tablenr++;
+      map<<= 1;
+    }
+    SELECT_LEX *old_sl= tl->select_lex;
+    tl->select_lex= parent_lex;
+    for(TABLE_LIST *emb= tl->embedding;
+        emb && emb->select_lex == old_sl;
+        emb= emb->embedding)
+      emb->select_lex= parent_lex;
+  }
+}
+
+/**
+  @brief
+  Merge a subquery into this select.
+
+  @param derived     derived table of the subquery to be merged
+  @param subq_select select_lex of the subquery
+  @param map         table map for assigning to merged tables from subquery
+  @param table_no    table number for assigning to merged tables from subquery
+
+  @details
+  This function merges a subquery into its parent select. In short the
+  merge operation appends the subquery FROM table list to the parent's
+  FROM table list. In more details:
+    .) the top_join_list of the subquery is wrapped into a join_nest
+       and attached to 'derived'
+    .) subquery's leaf_tables list  is merged with the leaf_tables
+       list of this select_lex
+    .) the table maps and table numbers of the tables merged from
+       the subquery are adjusted to reflect their new binding to
+       this select
+
+  @return TRUE  an error occur
+  @return FALSE ok
+*/
+
+bool SELECT_LEX::merge_subquery(THD *thd, TABLE_LIST *derived,
+                                SELECT_LEX *subq_select,
+                                uint table_no, table_map map)
+{
+  derived->wrap_into_nested_join(subq_select->top_join_list);
+
+  ftfunc_list->concat(subq_select->ftfunc_list);
+  if (join ||
+      thd->lex->sql_command == SQLCOM_UPDATE_MULTI ||
+      thd->lex->sql_command == SQLCOM_DELETE_MULTI)
+  {
+    List_iterator_fast<Item_in_subselect> li(subq_select->sj_subselects);
+    Item_in_subselect *in_subq;
+    while ((in_subq= li++))
+    {
+      sj_subselects.push_back(in_subq);
+      if (in_subq->emb_on_expr_nest == NO_JOIN_NEST)
+         in_subq->emb_on_expr_nest= derived;
+    }
+  }
+
+  /* Walk through child's tables and adjust table map, tablenr,
+   * parent_lex */
+  subq_select->remap_tables(derived, map, table_no, this);
+  subq_select->merged_into= this;
+
+  replace_leaf_table(derived, subq_select->leaf_tables);
+
+  return FALSE;
+}
+
+
+/**
+  @brief
+  Mark tables from the leaf_tables list as belong to a derived table.
+
+  @param derived   tables will be marked as belonging to this derived
+
+  @details
+  Run through the leaf_list and mark all tables as belonging to the 'derived'.
+*/
+
+void SELECT_LEX::mark_as_belong_to_derived(TABLE_LIST *derived)
+{
+  /* Mark tables as belonging to this DT */
+  TABLE_LIST *tl;
+  List_iterator<TABLE_LIST> ti(leaf_tables);
+  while ((tl= ti++))
+  {
+    tl->skip_temporary= 1;
+    tl->belong_to_derived= derived;
+  }
+}
+
+
+/**
+  @brief
+  Update used_tables cache for this select
+
+  @details
+  This function updates used_tables cache of ON expressions of all tables
+  in the leaf_tables list and of the conds expression (if any).
+*/
+
+void SELECT_LEX::update_used_tables()
+{
+  TABLE_LIST *tl;
+  List_iterator<TABLE_LIST> ti(leaf_tables);
+
+  while ((tl= ti++))
+  {
+    if (tl->table && !tl->is_view_or_derived())
+    {
+      TABLE_LIST *embedding= tl->embedding;
+      for (embedding= tl->embedding; embedding; embedding=embedding->embedding)
+      {
+        if (embedding->is_view_or_derived())
+	{
+          DBUG_ASSERT(embedding->is_merged_derived());
+          TABLE *tab= tl->table;
+          tab->covering_keys= tab->s->keys_for_keyread;
+          tab->covering_keys.intersect(tab->keys_in_use_for_query);
+          tab->merge_keys.clear_all();
+          bitmap_clear_all(tab->read_set);
+          bitmap_clear_all(tab->vcol_set);
+          break;
+        }
+      }
+    }
+  }
+
+  ti.rewind();
+  while ((tl= ti++))
+  {
+    TABLE_LIST *embedding= tl;
+    do
+    {
+      bool maybe_null;
+      if ((maybe_null= test(embedding->outer_join)))
+      {
+	tl->table->maybe_null= maybe_null;
+        break;
+      }
+    }
+    while ((embedding= embedding->embedding));
+    if (tl->on_expr)
+    {
+      tl->on_expr->update_used_tables();
+      tl->on_expr->walk(&Item::eval_not_null_tables, 0, NULL);
+    }
+    embedding= tl->embedding;
+    while (embedding)
+    {
+      if (embedding->on_expr && 
+          embedding->nested_join->join_list.head() == tl)
+      {
+        embedding->on_expr->update_used_tables();
+        embedding->on_expr->walk(&Item::eval_not_null_tables, 0, NULL);
+      }
+      tl= embedding;
+      embedding= tl->embedding;
+    }
+  }
+
+  if (join->conds)
+  {
+    join->conds->update_used_tables();
+    join->conds->walk(&Item::eval_not_null_tables, 0, NULL);
+  }
+  if (join->having)
+  {
+    join->having->update_used_tables();
+  }
+
+  Item *item;
+  List_iterator_fast<Item> it(join->fields_list);
+  while ((item= it++))
+  {
+    item->update_used_tables();
+  }
+  Item_outer_ref *ref;
+  List_iterator_fast<Item_outer_ref> ref_it(inner_refs_list);
+  while ((ref= ref_it++))
+  {
+    item= ref->outer_ref;
+    item->update_used_tables();
+  }
+  for (ORDER *order= group_list.first; order; order= order->next)
+    (*order->item)->update_used_tables();
+  if (!master_unit()->is_union())
+  {
+    for (ORDER *order= order_list.first; order; order= order->next)
+      (*order->item)->update_used_tables();
+  }      
+}
+
+
+/**
+  Set the EXPLAIN type for this subquery.
+*/
+
+void st_select_lex::set_explain_type()
+{
+  bool is_primary= FALSE;
+  if (next_select())
+    is_primary= TRUE;
+
+  if (!is_primary && first_inner_unit())
+  {
+    /*
+      If there is at least one materialized derived|view then it's a PRIMARY select.
+      Otherwise, all derived tables/views were merged and this select is a SIMPLE one.
+    */
+    for (SELECT_LEX_UNIT *un= first_inner_unit(); un; un= un->next_unit())
+    {
+      if ((!un->derived || un->derived->is_materialized_derived()))
+      {
+        is_primary= TRUE;
+        break;
+      }
+    }
+  }
+
+  SELECT_LEX *first= master_unit()->first_select();
+  /* drop UNCACHEABLE_EXPLAIN, because it is for internal usage only */
+  uint8 is_uncacheable= (uncacheable & ~UNCACHEABLE_EXPLAIN);
+  
+  bool using_materialization= FALSE;
+  Item_subselect *parent_item;
+  if ((parent_item= master_unit()->item) &&
+      parent_item->substype() == Item_subselect::IN_SUBS)
+  {
+    Item_in_subselect *in_subs= (Item_in_subselect*)parent_item;
+    /*
+      Surprisingly, in_subs->is_set_strategy() can return FALSE here,
+      even for the last invocation of this function for the select.
+    */
+    if (in_subs->test_strategy(SUBS_MATERIALIZATION))
+      using_materialization= TRUE;
+  }
+
+  if (&master_unit()->thd->lex->select_lex == this)
+  {
+     type= is_primary ? "PRIMARY" : "SIMPLE";
+  }
+  else
+  {
+    if (this == first)
+    {
+      /* If we're a direct child of a UNION, we're the first sibling there */
+      if (linkage == DERIVED_TABLE_TYPE)
+        type= "DERIVED";
+      else if (using_materialization)
+        type= "MATERIALIZED";
+      else
+      {
+         if (is_uncacheable & UNCACHEABLE_DEPENDENT)
+           type= "DEPENDENT SUBQUERY";
+         else
+         {
+           type= is_uncacheable? "UNCACHEABLE SUBQUERY" :
+                                 "SUBQUERY";
+         }
+      }
+    }
+    else
+    {
+      /* This a non-first sibling in UNION */
+      if (is_uncacheable & UNCACHEABLE_DEPENDENT)
+        type= "DEPENDENT UNION";
+      else if (using_materialization)
+        type= "MATERIALIZED UNION";
+      else
+      {
+        type= is_uncacheable ? "UNCACHEABLE UNION": "UNION";
+      }
+    }
+  }
+  options|= SELECT_DESCRIBE;
+}
+
+
+/**
+  @brief
+  Increase estimated number of records for a derived table/view
+
+  @param records  number of records to increase estimate by
+
+  @details
+  This function increases estimated number of records by the 'records'
+  for the derived table to which this select belongs to.
+*/
+
+void SELECT_LEX::increase_derived_records(ha_rows records)
+{
+  SELECT_LEX_UNIT *unit= master_unit();
+  DBUG_ASSERT(unit->derived);
+
+  select_union *result= (select_union*)unit->result;
+  result->records+= records;
+}
+
+
+/**
+  @brief
+  Mark select's derived table as a const one.
+
+  @param empty Whether select has an empty result set
+
+  @details
+  Mark derived table/view of this select as a constant one (to
+  materialize it at the optimization phase) unless this select belongs to a
+  union. Estimated number of rows is incremented if this select has non empty
+  result set.
+*/
+
+void SELECT_LEX::mark_const_derived(bool empty)
+{
+  TABLE_LIST *derived= master_unit()->derived;
+  if (!join->thd->lex->describe && derived)
+  {
+    if (!empty)
+      increase_derived_records(1);
+    if (!master_unit()->is_union() && !derived->is_merged_derived())
+      derived->fill_me= TRUE;
+  }
+}
+
+
+bool st_select_lex::save_leaf_tables(THD *thd)
+{
+  Query_arena *arena= thd->stmt_arena, backup;
+  if (arena->is_conventional())
+    arena= 0;                                  
+  else
+    thd->set_n_backup_active_arena(arena, &backup);
+
+  List_iterator_fast<TABLE_LIST> li(leaf_tables);
+  TABLE_LIST *table;
+  while ((table= li++))
+  {
+    if (leaf_tables_exec.push_back(table))
+      return 1;
+    table->tablenr_exec= table->get_tablenr();
+    table->map_exec= table->get_map();
+    if (join && (join->select_options & SELECT_DESCRIBE))
+      table->maybe_null_exec= 0;
+    else
+      table->maybe_null_exec= table->table?  table->table->maybe_null: 0;
+  }
+  if (arena)
+    thd->restore_active_arena(arena, &backup);
+
+  return 0;
+}
+
+
+bool st_select_lex::save_prep_leaf_tables(THD *thd)
+{
+  if (!thd->save_prep_leaf_list)
+    return 0;
+
+  Query_arena *arena= thd->stmt_arena, backup;
+  if (arena->is_conventional())
+    arena= 0;                                  
+  else
+    thd->set_n_backup_active_arena(arena, &backup);
+
+  List_iterator_fast<TABLE_LIST> li(leaf_tables);
+  TABLE_LIST *table;
+  while ((table= li++))
+  {
+    if (leaf_tables_prep.push_back(table))
+      return 1;
+  }
+  thd->lex->select_lex.is_prep_leaf_list_saved= TRUE; 
+  thd->save_prep_leaf_list= FALSE;
+  if (arena)
+    thd->restore_active_arena(arena, &backup);
+
+  return 0;
+}
+
+
+/*
+  Return true if this select_lex has been converted into a semi-join nest
+  within 'ancestor'.
+
+  We need a loop to check this because there could be several nested
+  subselects, like
+
+    SELECT ... FROM grand_parent 
+      WHERE expr1 IN (SELECT ... FROM parent 
+                        WHERE expr2 IN ( SELECT ... FROM child)
+
+  which were converted into:
+  
+    SELECT ... 
+    FROM grand_parent SEMI_JOIN (parent JOIN child) 
+    WHERE 
+      expr1 AND expr2
+
+  In this case, both parent and child selects were merged into the parent.
+*/
+
+bool st_select_lex::is_merged_child_of(st_select_lex *ancestor)
+{
+  bool all_merged= TRUE;
+  for (SELECT_LEX *sl= this; sl && sl!=ancestor;
+       sl=sl->outer_select())
+  {
+    Item *subs= sl->master_unit()->item;
+    if (subs && subs->type() == Item::SUBSELECT_ITEM && 
+        ((Item_subselect*)subs)->substype() == Item_subselect::IN_SUBS &&
+        ((Item_in_subselect*)subs)->test_strategy(SUBS_SEMI_JOIN))
+    {
+      continue;
+    }
+    all_merged= FALSE;
+    break;
+  }
+  return all_merged;
+}
+
+
 /**
   A routine used by the parser to decide whether we are specifying a full
   partitioning or if only partitions to add or to split.
@@ -3036,4 +3753,3 @@ bool st_lex::is_partition_management() const
           (alter_info.flags == ALTER_ADD_PARTITION ||
            alter_info.flags == ALTER_REORGANIZE_PARTITION));
 }
-
diff --git a/sql/sql_lex.h b/sql/sql_lex.h
index acdbab62076..cefa092a874 100644
--- a/sql/sql_lex.h
+++ b/sql/sql_lex.h
@@ -419,7 +419,8 @@ public:
 
   /*
     result of this query can't be cached, bit field, can be :
-      UNCACHEABLE_DEPENDENT
+      UNCACHEABLE_DEPENDENT_GENERATED
+      UNCACHEABLE_DEPENDENT_INJECTED
       UNCACHEABLE_RAND
       UNCACHEABLE_SIDEEFFECT
       UNCACHEABLE_EXPLAIN
@@ -428,7 +429,6 @@ public:
   uint8 uncacheable;
   enum sub_select_type linkage;
   bool no_table_names_allowed; /* used for global order by */
-  bool no_error; /* suppress error message (convert it to warnings) */
 
   static void *operator new(size_t size) throw ()
   {
@@ -444,10 +444,12 @@ public:
   virtual void init_query();
   virtual void init_select();
   void include_down(st_select_lex_node *upper);
+  void add_slave(st_select_lex_node *slave_arg);
   void include_neighbour(st_select_lex_node *before);
   void include_standalone(st_select_lex_node *sel, st_select_lex_node **ref);
   void include_global(st_select_lex_node **plink);
   void exclude();
+  void exclude_from_tree();
 
   virtual st_select_lex_unit* master_unit()= 0;
   virtual st_select_lex* outer_select()= 0;
@@ -471,6 +473,11 @@ public:
   friend bool mysql_new_select(struct st_lex *lex, bool move_down);
   friend bool mysql_make_view(THD *thd, File_parser *parser,
                               TABLE_LIST *table, uint flags);
+  friend bool mysql_derived_prepare(THD *thd, st_lex *lex,
+                                  TABLE_LIST *orig_table_list);
+  friend bool mysql_derived_merge(THD *thd, st_lex *lex,
+                                  TABLE_LIST *orig_table_list);
+  friend bool TABLE_LIST::init_derived(THD *thd, bool init_view);
 private:
   void fast_exclude();
 };
@@ -489,13 +496,12 @@ class st_select_lex_unit: public st_select_lex_node {
 protected:
   TABLE_LIST result_table_list;
   select_union *union_result;
-  TABLE *table; /* temporary table using for appending UNION results */
-
-  select_result *result;
   ulonglong found_rows_for_union;
   bool saved_error;
 
 public:
+  TABLE *table; /* temporary table using for appending UNION results */
+  select_result *result;
   bool  prepared, // prepare phase already performed for UNION (unit)
     optimized, // optimize phase already performed for UNION (unit)
     executed, // already executed
@@ -522,6 +528,11 @@ public:
   ha_rows select_limit_cnt, offset_limit_cnt;
   /* not NULL if unit used in subselect, point to subselect item */
   Item_subselect *item;
+  /*
+    TABLE_LIST representing this union in the embedding select. Used for
+    derived tables/views handling.
+  */
+  TABLE_LIST *derived;
   /* thread handler */
   THD *thd;
   /*
@@ -558,6 +569,7 @@ public:
 
   /* UNION methods */
   bool prepare(THD *thd, select_result *result, ulong additional_options);
+  bool optimize();
   bool exec();
   bool cleanup();
   inline void unclean() { cleaned= 0; }
@@ -568,11 +580,14 @@ public:
   bool add_fake_select_lex(THD *thd);
   void init_prepare_fake_select_lex(THD *thd);
   inline bool is_prepared() { return prepared; }
-  bool change_result(select_subselect *result, select_subselect *old_result);
+  bool change_result(select_result_interceptor *result,
+                     select_result_interceptor *old_result);
   void set_limit(st_select_lex *values);
   void set_thd(THD *thd_arg) { thd= thd_arg; }
   inline bool is_union (); 
 
+  void set_unique_exclude();
+
   friend void lex_start(THD *thd);
   friend int subselect_union_engine::exec();
 
@@ -613,12 +628,30 @@ public:
   List<TABLE_LIST> top_join_list; /* join list of the top level          */
   List<TABLE_LIST> *join_list;    /* list for the currently parsed join  */
   TABLE_LIST *embedding;          /* table embedding to the above list   */
+  List<TABLE_LIST> sj_nests;      /* Semi-join nests within this join */
   /*
     Beginning of the list of leaves in a FROM clause, where the leaves
     inlcude all base tables including view tables. The tables are connected
     by TABLE_LIST::next_leaf, so leaf_tables points to the left-most leaf.
+
+    List of all base tables local to a subquery including all view
+    tables. Unlike 'next_local', this in this list views are *not*
+    leaves. Created in setup_tables() -> make_leaves_list().
+  */
+  /* 
+    Subqueries that will need to be converted to semi-join nests, including
+    those converted to jtbm nests. The list is emptied when conversion is done.
   */
-  TABLE_LIST *leaf_tables;
+  List<Item_in_subselect> sj_subselects;
+
+  List<TABLE_LIST> leaf_tables;
+  List<TABLE_LIST> leaf_tables_exec;
+  List<TABLE_LIST> leaf_tables_prep;
+  bool is_prep_leaf_list_saved;
+  uint insert_tables;
+  st_select_lex *merged_into; /* select which this select is merged into */
+                              /* (not 0 only for views/derived tables)   */
+
   const char *type;               /* type of select for EXPLAIN          */
 
   SQL_I_List<ORDER> order_list;   /* ORDER clause */
@@ -652,6 +685,13 @@ public:
   ulong table_join_options;
   uint in_sum_expr;
   uint select_number; /* number of select (used for EXPLAIN) */
+
+  /*
+    nest_levels are local to the query or VIEW,
+    and that view merge procedure does not re-calculate them.
+    So we also have to remember unit against which we count levels.
+  */
+  SELECT_LEX_UNIT *nest_level_base;
   int nest_level;     /* nesting level of select */
   Item_sum *inner_sum_func_list; /* list of sum func in nested selects */ 
   uint with_wild; /* item list contain '*' */
@@ -668,6 +708,11 @@ public:
   /* explicit LIMIT clause was used */
   bool explicit_limit;
   /*
+    This array is used to note  whether we have any candidates for
+    expression caching in the corresponding clauses
+  */
+  bool expr_cache_may_be_used[PARSING_PLACE_SIZE];
+  /*
     there are subquery in HAVING clause => we can't close tables before
     query processing end even if we use temporary table
   */
@@ -746,7 +791,7 @@ public:
   }
   inline bool is_subquery_function() { return master_unit()->item != 0; }
 
-  void mark_as_dependent(st_select_lex *last, Item *dependency);
+  bool mark_as_dependent(THD *thd, st_select_lex *last, Item *dependency);
 
   bool set_braces(bool value);
   bool inc_in_sum_expr();
@@ -830,6 +875,38 @@ public:
   }
 
   void clear_index_hints(void) { index_hints= NULL; }
+  bool is_part_of_union() { return master_unit()->is_union(); }
+  /*
+    Optimize all subqueries that have not been flattened into semi-joins.
+    This functionality is a method of SELECT_LEX instead of JOIN because
+    some SQL statements as DELETE do not have a corresponding JOIN object.
+  */
+  bool optimize_unflattened_subqueries();
+  /* Set the EXPLAIN type for this subquery. */
+  void set_explain_type();
+  bool handle_derived(struct st_lex *lex, uint phases);
+  void append_table_to_list(TABLE_LIST *TABLE_LIST::*link, TABLE_LIST *table);
+  bool get_free_table_map(table_map *map, uint *tablenr);
+  void replace_leaf_table(TABLE_LIST *table, List<TABLE_LIST> &tbl_list);
+  void remap_tables(TABLE_LIST *derived, table_map map,
+                    uint tablenr, st_select_lex *parent_lex);
+  bool merge_subquery(THD *thd, TABLE_LIST *derived, st_select_lex *subq_lex,
+                      uint tablenr, table_map map);
+  inline bool is_mergeable()
+  {
+    return (next_select() == 0 && group_list.elements == 0 &&
+            having == 0 && with_sum_func == 0 &&
+            table_list.elements >= 1 && !(options & SELECT_DISTINCT) &&
+            select_limit == 0);
+  }
+  void mark_as_belong_to_derived(TABLE_LIST *derived);
+  void increase_derived_records(ha_rows records);
+  void update_used_tables();
+  void mark_const_derived(bool empty);
+
+  bool save_leaf_tables(THD *thd);
+  bool save_prep_leaf_tables(THD *thd);
+  bool is_merged_child_of(st_select_lex *ancestor);
 
   /*
     For MODE_ONLY_FULL_GROUP_BY we need to maintain two flags:
@@ -1617,8 +1694,6 @@ typedef struct st_lex : public Query_tables_list
 
   CHARSET_INFO *charset;
   bool text_string_is_7bit;
-  /* store original leaf_tables for INSERT SELECT and PS/SP */
-  TABLE_LIST *leaf_tables_insert;
 
   /** SELECT of CREATE VIEW statement */
   LEX_STRING create_view_select;
@@ -1675,6 +1750,9 @@ typedef struct st_lex : public Query_tables_list
   LEX_SERVER_OPTIONS server_options;
   USER_RESOURCES mqh;
   ulong type;
+  /* The following is used by KILL */
+  killed_state kill_signal;
+  killed_type  kill_type;
   /*
     This variable is used in post-parse stage to declare that sum-functions,
     or functions which have sense only if GROUP BY is present, are allowed.
@@ -1735,7 +1813,7 @@ typedef struct st_lex : public Query_tables_list
     DERIVED_SUBQUERY and DERIVED_VIEW).
   */
   uint8 derived_tables;
-  uint8 create_view_algorithm;
+  uint16 create_view_algorithm;
   uint8 create_view_check;
   bool drop_if_exists, drop_temporary, local_file, one_shot_set;
   bool autocommit;
@@ -1745,7 +1823,7 @@ typedef struct st_lex : public Query_tables_list
 
   uint8 context_analysis_only;
   bool safe_to_cache_query;
-  bool subqueries, ignore;
+  bool subqueries, ignore, online;
   st_parsing_options parsing_options;
   Alter_info alter_info;
   /* Prepared statements SQL syntax:*/
@@ -1883,6 +1961,7 @@ typedef struct st_lex : public Query_tables_list
   {
     return (context_analysis_only &
             (CONTEXT_ANALYSIS_ONLY_PREPARE |
+             CONTEXT_ANALYSIS_ONLY_VCOL_EXPR |
              CONTEXT_ANALYSIS_ONLY_VIEW));
   }
 
@@ -1932,6 +2011,8 @@ typedef struct st_lex : public Query_tables_list
     switch (sql_command) {
     case SQLCOM_UPDATE:
     case SQLCOM_UPDATE_MULTI:
+    case SQLCOM_DELETE:
+    case SQLCOM_DELETE_MULTI:
     case SQLCOM_INSERT:
     case SQLCOM_INSERT_SELECT:
     case SQLCOM_REPLACE:
diff --git a/sql/sql_lifo_buffer.h b/sql/sql_lifo_buffer.h
new file mode 100644
index 00000000000..34f9624436d
--- /dev/null
+++ b/sql/sql_lifo_buffer.h
@@ -0,0 +1,342 @@
+/**
+  @defgroup Bi-directional LIFO buffers used by DS-MRR implementation
+  @{
+*/
+
+class Forward_lifo_buffer;
+class Backward_lifo_buffer;
+
+
+/*
+  A base class for in-memory buffer used by DS-MRR implementation. Common
+  properties:
+  - The buffer is last-in-first-out, i.e. elements that are written last are
+    read first.
+  - The buffer contains fixed-size elements. The elements are either atomic
+    byte sequences or pairs of them.
+  - The buffer resides in the memory provided by the user. It is possible to
+     = dynamically (ie. between write operations) add ajacent memory space to
+       the buffer
+     = dynamically remove unused space from the buffer.
+    The intent of this is to allow to have two buffers on adjacent memory
+    space, one is being read from (and so its space shrinks), while the other 
+    is being written to (and so it needs more and more space).
+
+  There are two concrete classes, Forward_lifo_buffer and Backward_lifo_buffer.
+*/
+
+class Lifo_buffer 
+{
+protected:
+  size_t size1;
+  size_t size2;
+
+public:
+  /**
+    write() will put into buffer size1 bytes pointed by write_ptr1. If
+    size2!=0, then they will be accompanied by size2 bytes pointed by
+    write_ptr2.
+  */
+  uchar *write_ptr1;
+  uchar *write_ptr2;
+
+  /**
+    read() will do reading by storing pointers to read data into read_ptr1 or
+    into (read_ptr1, read_ptr2), depending on whether the buffer was set to
+    store single objects or pairs.
+  */
+  uchar *read_ptr1;
+  uchar *read_ptr2;
+
+protected:
+  uchar *start; /**< points to start of buffer space */
+  uchar *end;   /**< points to just beyond the end of buffer space */
+public:
+
+  enum enum_direction {
+    BACKWARD=-1, /**< buffer is filled/read from bigger to smaller memory addresses */
+    FORWARD=1  /**< buffer is filled/read from smaller to bigger memory addresses */
+  };
+
+  virtual enum_direction type() = 0;
+
+  /* Buffer space control functions */
+
+  /** Let the buffer store data in the given space. */
+  void set_buffer_space(uchar *start_arg, uchar *end_arg) 
+  {
+    start= start_arg;
+    end= end_arg;
+    TRASH(start, end - start);
+    reset();
+  }
+  
+  /** 
+    Specify where write() should get the source data from, as well as source
+    data size.
+  */
+  void setup_writing(size_t len1, size_t len2)
+  {
+    size1= len1;
+    size2= len2;
+  }
+
+  /** 
+    Specify where read() should store pointers to read data, as well as read
+    data size. The sizes must match those passed to setup_writing().
+  */
+  void setup_reading(size_t len1, size_t len2)
+  {
+    DBUG_ASSERT(len1 == size1);
+    DBUG_ASSERT(len2 == size2);
+  }
+  
+  bool can_write()
+  {
+    return have_space_for(size1 + size2);
+  }
+  virtual void write() = 0;
+
+  bool is_empty() { return used_size() == 0; }
+  virtual bool read() = 0;
+  
+  void sort(qsort2_cmp cmp_func, void *cmp_func_arg)
+  {
+    size_t elem_size= size1 + size2;
+    size_t n_elements= used_size() / elem_size;
+    my_qsort2(used_area(), n_elements, elem_size, cmp_func, cmp_func_arg);
+  }
+
+  virtual void reset() = 0;
+  virtual uchar *end_of_space() = 0;
+protected:
+  virtual size_t used_size() = 0;
+  
+  /* To be used only by iterator class: */
+  virtual uchar *get_pos()= 0;
+  virtual bool read(uchar **position, uchar **ptr1, uchar **ptr2)= 0;
+  friend class Lifo_buffer_iterator;
+public:
+  virtual bool have_space_for(size_t bytes) = 0;
+
+  virtual void remove_unused_space(uchar **unused_start, uchar **unused_end)=0;
+  virtual uchar *used_area() = 0; 
+  virtual ~Lifo_buffer() {};
+};
+
+
+/**
+  Forward LIFO buffer
+
+  The buffer that is being written to from start to end and read in the
+  reverse.  'pos' points to just beyond the end of used space.
+
+  It is possible to grow/shink the buffer at the end bound
+
+     used space      unused space  
+   *==============*-----------------*
+   ^              ^                 ^
+   |              |                 +--- end
+   |              +---- pos              
+   +--- start           
+*/
+
+class Forward_lifo_buffer: public Lifo_buffer
+{
+  uchar *pos;
+public:
+  enum_direction type() { return FORWARD; }
+  size_t used_size()
+  {
+    return (size_t)(pos - start);
+  }
+  void reset()
+  {
+    pos= start;
+  }
+  uchar *end_of_space() { return pos; }
+  bool have_space_for(size_t bytes)
+  {
+    return (pos + bytes < end);
+  }
+
+  void write()
+  {
+    write_bytes(write_ptr1, size1);
+    if (size2)
+      write_bytes(write_ptr2, size2);
+  }
+  void write_bytes(const uchar *data, size_t bytes)
+  {
+    DBUG_ASSERT(have_space_for(bytes));
+    memcpy(pos, data, bytes);
+    pos += bytes;
+  }
+  bool have_data(uchar *position, size_t bytes)
+  {
+    return ((position - start) >= (ptrdiff_t)bytes);
+  }
+  uchar *read_bytes(uchar **position, size_t bytes)
+  {
+    DBUG_ASSERT(have_data(*position, bytes));
+    *position= (*position) - bytes;
+    return *position;
+  }
+  bool read() { return read(&pos, &read_ptr1, &read_ptr2); }
+  bool read(uchar **position, uchar **ptr1, uchar **ptr2)
+  {
+    if (!have_data(*position, size1 + size2))
+      return TRUE;
+    if (size2)
+      *ptr2= read_bytes(position, size2);
+    *ptr1= read_bytes(position, size1);
+    return FALSE;
+  }
+  void remove_unused_space(uchar **unused_start, uchar **unused_end)
+  {
+    DBUG_ASSERT(0); /* Don't need this yet */
+  }
+  /**
+    Add more space to the buffer. The caller is responsible that the space
+    being added is adjacent to the end of the buffer.
+
+    @param unused_start Start of space
+    @param unused_end   End of space
+  */
+  void grow(uchar *unused_start, uchar *unused_end)
+  {
+    DBUG_ASSERT(unused_end >= unused_start);
+    DBUG_ASSERT(end == unused_start);
+    TRASH(unused_start, unused_end - unused_start);
+    end= unused_end;
+  }
+  /* Return pointer to start of the memory area that is occupied by the data */
+  uchar *used_area() { return start; }
+  friend class Lifo_buffer_iterator;
+  uchar *get_pos() { return pos; }
+};
+
+
+
+/**
+  Backward LIFO buffer
+
+  The buffer that is being written to from start to end and read in the
+  reverse.  'pos' points to the start of used space.
+
+  It is possible to grow/shink the buffer at the start.
+
+     unused space      used space  
+   *--------------*=================*
+   ^              ^                 ^
+   |              |                 +--- end
+   |              +---- pos              
+   +--- start           
+*/
+class Backward_lifo_buffer: public Lifo_buffer
+{
+  uchar *pos;
+public:
+  enum_direction type() { return BACKWARD; }
+ 
+  size_t used_size()
+  {
+    return (size_t)(end - pos);
+  }
+  void reset()
+  {
+    pos= end;
+  }
+  uchar *end_of_space() { return end; }
+  bool have_space_for(size_t bytes)
+  {
+    return (pos - bytes >= start);
+  }
+  void write()
+  {
+    if (write_ptr2)
+      write_bytes(write_ptr2, size2);
+    write_bytes(write_ptr1, size1);
+  }
+  void write_bytes(const uchar *data, size_t bytes)
+  {
+    DBUG_ASSERT(have_space_for(bytes));
+    pos -= bytes;
+    memcpy(pos, data, bytes);
+  }
+  bool read()
+  {
+    return read(&pos, &read_ptr1, &read_ptr2);
+  }
+  bool read(uchar **position, uchar **ptr1, uchar **ptr2)
+  {
+    if (!have_data(*position, size1 + size2))
+      return TRUE;
+    *ptr1= read_bytes(position, size1);
+    if (size2)
+      *ptr2= read_bytes(position, size2);
+    return FALSE;
+  }
+  bool have_data(uchar *position, size_t bytes)
+  {
+    return ((end - position) >= (ptrdiff_t)bytes);
+  }
+  uchar *read_bytes(uchar **position, size_t bytes)
+  {
+    DBUG_ASSERT(have_data(*position, bytes));
+    uchar *ret= *position;
+    *position= *position + bytes;
+    return ret;
+  }
+  /**
+    Stop using/return the unused part of the space
+    @param unused_start  OUT Start of the unused space
+    @param unused_end    OUT End of the unused space
+  */
+  void remove_unused_space(uchar **unused_start, uchar **unused_end)
+  {
+    *unused_start= start;
+    *unused_end= pos;
+    start= pos;
+  }
+  void grow(uchar *unused_start, uchar *unused_end)
+  {
+    DBUG_ASSERT(0); /* Not used for backward buffers */
+  }
+  /* Return pointer to start of the memory area that is occupied by the data */
+  uchar *used_area() { return pos; }
+  friend class Lifo_buffer_iterator;
+  uchar *get_pos() { return pos; }
+};
+
+
+/** Iterator to walk over contents of the buffer without reading from it */
+class Lifo_buffer_iterator
+{
+  uchar *pos;
+  Lifo_buffer *buf;
+  
+public:
+  /* The data is read to here */
+  uchar *read_ptr1;
+  uchar *read_ptr2;
+
+  void init(Lifo_buffer *buf_arg)
+  {
+    buf= buf_arg;
+    pos= buf->get_pos();
+  }
+  /*
+    Read the next value. The calling convention is the same as buf->read()
+    has.
+
+    @retval FALSE - ok
+    @retval TRUE  - EOF, reached the end of the buffer
+  */
+  bool read() 
+  {
+    return buf->read(&pos, &read_ptr1, &read_ptr2);
+  }
+};
+
+
diff --git a/sql/sql_list.h b/sql/sql_list.h
index 27882d057b8..adedd9a3a4d 100644
--- a/sql/sql_list.h
+++ b/sql/sql_list.h
@@ -154,6 +154,7 @@ struct list_node :public Sql_alloc
   }
 };
 
+typedef bool List_eq(void *a, void *b);
 
 extern MYSQL_PLUGIN_IMPORT list_node end_of_list;
 
@@ -237,6 +238,11 @@ public:
   {
     if (!list->is_empty())
     {
+      if (is_empty())
+      {
+        *this= *list;
+        return;
+      }
       *last= list->first;
       last= list->last;
       elements+= list->elements;
@@ -251,17 +257,24 @@ public:
       last= &first;
     return tmp->info;
   }
-  inline void disjoin(base_list *list)
+  /*
+    Remove from this list elements that are contained in the passed list. 
+    We assume that the passed list is a tail of this list (that is, the whole 
+    list_node* elements are shared).
+  */
+  inline void disjoin(const base_list *list)
   {
     list_node **prev= &first;
     list_node *node= first;
     list_node *list_first= list->first;
     elements=0;
-    while (node && node != list_first)
+    while (node != &end_of_list && node != list_first)
     {
       prev= &node->next;
       node= node->next;
       elements++;
+      if (node == &end_of_list)
+        return;
     }
     *prev= *last;
     last= prev;
@@ -290,10 +303,40 @@ public:
   inline void **head_ref() { return first != &end_of_list ? &first->info : 0; }
   inline bool is_empty() { return first == &end_of_list ; }
   inline list_node *last_ref() { return &end_of_list; }
+  inline bool add_unique(void *info, List_eq *eq)
+  {
+    list_node *node= first;
+    for (;
+         node != &end_of_list && (!(*eq)(node->info, info));
+         node= node->next) ;
+    if (node == &end_of_list)
+      return push_back(info);
+    return 1;
+  }
   friend class base_list_iterator;
   friend class error_list;
   friend class error_list_iterator;
 
+  /*
+    Debugging help: return N-th element in the list, or NULL if the list has
+    less than N elements.
+  */
+  inline void *nth_element(int n)
+  {
+    list_node *node= first;
+    void *data= NULL;
+    for (int i=0; i <= n; i++)
+    {
+      if (node == &end_of_list)
+      {
+        data= NULL;
+        break;
+      }
+      data= node->info;
+      node= node->next;
+    }
+    return data;
+  }
 #ifdef LIST_EXTRA_DEBUG
   /*
     Check list invariants and print results into trace. Invariants are:
@@ -460,6 +503,8 @@ public:
   inline void concat(List<T> *list) { base_list::concat(list); }
   inline void disjoin(List<T> *list) { base_list::disjoin(list); }
   inline void prepand(List<T> *list) { base_list::prepand(list); }
+  inline bool add_unique(T *a, bool (*eq)(T *a, T *b))
+  { return base_list::add_unique(a, (List_eq *)eq); }
   void delete_elements(void)
   {
     list_node *element,*next;
@@ -470,6 +515,7 @@ public:
     }
     empty();
   }
+  inline T *nth_element(int n) { return (T*)base_list::nth_element(n); }
 };
 
 
@@ -512,36 +558,40 @@ public:
 
 
 /*
-  Exchange sort algorithm for List<T>.
+  Bubble sort algorithm for List<T>.
+  This sort function is supposed to be used only for very short list.
+  Currently it is used for the lists of Item_equal objects and
+  for some lists in the table elimination algorithms. In both
+  cases the sorted lists are very short.
 */
+
 template <class T> 
-inline void exchange_sort(List<T> *list_to_sort,
-                          int (*sort_func)(T *a, T *b, void *arg), void *arg)
+inline void bubble_sort(List<T> *list_to_sort,
+                        int (*sort_func)(T *a, T *b, void *arg), void *arg)
 {
   bool swap;
+  T **ref1= 0;
+  T **ref2= 0;
   List_iterator<T> it(*list_to_sort);
   do
   {
+    T **last_ref= ref1;
     T *item1= it++;
-    T **ref1= it.ref();
+    ref1= it.ref();
     T *item2;
 
     swap= FALSE;
-    while ((item2= it++))
+    while ((item2= it++) && (ref2= it.ref()) != last_ref)
     {
-      T **ref2= it.ref();
       if (sort_func(item1, item2, arg) < 0)
       {
-        T *item= *ref1;
-        *ref1= *ref2;
-        *ref2= item;
+        *ref1= item2;
+        *ref2= item1;
         swap= TRUE;
       }
       else
-      {
         item1= item2;
-        ref1= ref2;
-      }
+      ref1= ref2;
     }
     it.rewind();
   } while (swap);
diff --git a/sql/sql_load.cc b/sql/sql_load.cc
index e4ba53ac53c..7fcb52a7b9c 100644
--- a/sql/sql_load.cc
+++ b/sql/sql_load.cc
@@ -65,6 +65,8 @@ public:
     ::end_io_cache(&cache);
     need_end_io_cache = 0;
   }
+  my_off_t file_length() { return cache.end_of_file; }
+  my_off_t position()    { return my_b_tell(&cache); }
 
   /*
     Either this method, or we need to make cache public
@@ -130,7 +132,7 @@ int mysql_load(THD *thd,sql_exchange *ex,TABLE_LIST *table_list,
   bool is_fifo=0;
 #ifndef EMBEDDED_LIBRARY
   LOAD_FILE_INFO lf_info;
-  THD::killed_state killed_status;
+  killed_state killed_status;
 #endif
   char *db = table_list->db;			// This is never null
   /*
@@ -174,15 +176,18 @@ int mysql_load(THD *thd,sql_exchange *ex,TABLE_LIST *table_list,
 
   if (open_and_lock_tables(thd, table_list))
     DBUG_RETURN(TRUE);
+  if (mysql_handle_single_derived(thd->lex, table_list, DT_MERGE_FOR_INSERT) ||
+      mysql_handle_single_derived(thd->lex, table_list, DT_PREPARE))
+    DBUG_RETURN(TRUE);
   if (setup_tables_and_check_access(thd, &thd->lex->select_lex.context,
                                     &thd->lex->select_lex.top_join_list,
                                     table_list,
-                                    &thd->lex->select_lex.leaf_tables, FALSE,
+                                    thd->lex->select_lex.leaf_tables, FALSE,
                                     INSERT_ACL | UPDATE_ACL,
-                                    INSERT_ACL | UPDATE_ACL))
+                                    INSERT_ACL | UPDATE_ACL, FALSE))
      DBUG_RETURN(-1);
   if (!table_list->table ||               // do not suport join view
-      !table_list->updatable ||           // and derived tables
+      !table_list->single_table_updatable() || // and derived tables
       check_key_in_view(thd, table_list))
   {
     my_error(ER_NON_UPDATABLE_TABLE, MYF(0), table_list->alias, "LOAD");
@@ -415,9 +420,9 @@ int mysql_load(THD *thd,sql_exchange *ex,TABLE_LIST *table_list,
     }
   }
 
+  thd_proc_info(thd, "reading file");
   if (!(error=test(read_info.error)))
   {
-
     table->next_number_field=table->found_next_number_field;
     if (ignore ||
 	handle_duplicates == DUP_REPLACE)
@@ -435,6 +440,7 @@ int mysql_load(THD *thd,sql_exchange *ex,TABLE_LIST *table_list,
                              (MODE_STRICT_TRANS_TABLES |
                               MODE_STRICT_ALL_TABLES)));
 
+    thd_progress_init(thd, 2);
     if (!field_term->length() && !enclosed->length())
       error= read_fixed_length(thd, info, table_list, fields_vars,
                                set_fields, set_values, read_info,
@@ -443,6 +449,9 @@ int mysql_load(THD *thd,sql_exchange *ex,TABLE_LIST *table_list,
       error= read_sep_field(thd, info, table_list, fields_vars,
                             set_fields, set_values, read_info,
 			    *enclosed, skip_lines, ignore);
+    
+    thd_proc_info(thd, "End bulk insert");
+    thd_progress_next_stage(thd);
     if (!thd->prelocked_mode && table->file->ha_end_bulk_insert() && !error)
     {
       table->file->print_error(my_errno, MYF(0));
@@ -464,11 +473,11 @@ int mysql_load(THD *thd,sql_exchange *ex,TABLE_LIST *table_list,
   DBUG_EXECUTE_IF("simulate_kill_bug27571",
                   {
                     error=1;
-                    thd->killed= THD::KILL_QUERY;
+                    thd->killed= KILL_QUERY;
                   };);
 
 #ifndef EMBEDDED_LIBRARY
-  killed_status= (error == 0) ? THD::NOT_KILLED : thd->killed;
+  killed_status= (error == 0) ? NOT_KILLED : thd->killed;
 #endif
 
   /*
@@ -512,7 +521,7 @@ int mysql_load(THD *thd,sql_exchange *ex,TABLE_LIST *table_list,
 	/* If the file was not empty, wrote_create_file is true */
 	if (lf_info.wrote_create_file)
 	{
-          int errcode= query_error_code(thd, killed_status == THD::NOT_KILLED);
+          int errcode= query_error_code(thd, killed_status == NOT_KILLED);
           
           /* since there is already an error, the possible error of
              writing binary log will be ignored */
@@ -563,7 +572,7 @@ int mysql_load(THD *thd,sql_exchange *ex,TABLE_LIST *table_list,
       read_info.end_io_cache();
       if (lf_info.wrote_create_file)
       {
-        int errcode= query_error_code(thd, killed_status == THD::NOT_KILLED);
+        int errcode= query_error_code(thd, killed_status == NOT_KILLED);
         error= write_execute_load_query_log_event(thd, ex,
                                                   table_list->db, table_list->table_name,
                                                   handle_duplicates, ignore,
@@ -735,9 +744,16 @@ read_fixed_length(THD *thd, COPY_INFO &info, TABLE_LIST *table_list,
   List_iterator_fast<Item> it(fields_vars);
   Item_field *sql_field;
   TABLE *table= table_list->table;
-  bool err;
+  bool err, progress_reports;
+  ulonglong counter, time_to_report_progress;
   DBUG_ENTER("read_fixed_length");
 
+  counter= 0;
+  time_to_report_progress= MY_HOW_OFTEN_TO_WRITE/10;
+  progress_reports= 1;
+  if ((thd->progress.max_counter= read_info.file_length()) == ~(my_off_t) 0)
+    progress_reports= 0;
+
   while (!read_info.read_fixed_length())
   {
     if (thd->killed)
@@ -745,6 +761,16 @@ read_fixed_length(THD *thd, COPY_INFO &info, TABLE_LIST *table_list,
       thd->send_kill_message();
       DBUG_RETURN(1);
     }
+    if (progress_reports)
+    {
+      thd->progress.counter= read_info.position();
+      if (++counter >= time_to_report_progress)
+      {
+        time_to_report_progress+= MY_HOW_OFTEN_TO_WRITE/10;
+        thd_progress_report(thd, thd->progress.counter,
+                            thd->progress.max_counter);
+      }
+    }
     if (skip_lines)
     {
       /*
@@ -863,11 +889,18 @@ read_sep_field(THD *thd, COPY_INFO &info, TABLE_LIST *table_list,
   Item *item;
   TABLE *table= table_list->table;
   uint enclosed_length;
-  bool err;
+  bool err, progress_reports;
+  ulonglong counter, time_to_report_progress;
   DBUG_ENTER("read_sep_field");
 
   enclosed_length=enclosed.length();
 
+  counter= 0;
+  time_to_report_progress= MY_HOW_OFTEN_TO_WRITE/10;
+  progress_reports= 1;
+  if ((thd->progress.max_counter= read_info.file_length()) == ~(my_off_t) 0)
+    progress_reports= 0;
+
   for (;;it.rewind())
   {
     if (thd->killed)
@@ -876,6 +909,16 @@ read_sep_field(THD *thd, COPY_INFO &info, TABLE_LIST *table_list,
       DBUG_RETURN(1);
     }
 
+    if (progress_reports)
+    {
+      thd->progress.counter= read_info.position();
+      if (++counter >= time_to_report_progress)
+      {
+        time_to_report_progress+= MY_HOW_OFTEN_TO_WRITE/10;
+        thd_progress_report(thd, thd->progress.counter,
+                            thd->progress.max_counter);
+      }
+    }
     restore_record(table, s->default_values);
 
     while ((item= it++))
@@ -985,7 +1028,7 @@ read_sep_field(THD *thd, COPY_INFO &info, TABLE_LIST *table_list,
           if (!field->maybe_null() && field->type() == FIELD_TYPE_TIMESTAMP)
               ((Field_timestamp*) field)->set_time();
           /*
-            QQ: We probably should not throw warning for each field.
+            TODO: We probably should not throw warning for each field.
             But how about intention to always have the same number
             of warnings in THD::cuted_fields (and get rid of cuted_fields
             in the end ?)
diff --git a/sql/sql_olap.cc b/sql/sql_olap.cc
index ddc287c1b6d..a7a847cf286 100644
--- a/sql/sql_olap.cc
+++ b/sql/sql_olap.cc
@@ -155,8 +155,7 @@ int handle_olaps(LEX *lex, SELECT_LEX *select_lex)
 
 
   if (setup_tables(lex->thd, &select_lex->context, &select_lex->top_join_list,
-                   select_lex->table_list.first
-                   &select_lex->leaf_tables, FALSE) ||
+                   FALSE, FALSE) ||
       setup_fields(lex->thd, 0, select_lex->item_list, MARK_COLUMNS_READ,
                    &all_fields,1) ||
       setup_fields(lex->thd, 0, item_list_copy, MARK_COLUMNS_READ,
diff --git a/sql/sql_parse.cc b/sql/sql_parse.cc
index 1510ea7bf10..e84750369f9 100644
--- a/sql/sql_parse.cc
+++ b/sql/sql_parse.cc
@@ -31,6 +31,7 @@
 #include "events.h"
 #include "sql_trigger.h"
 #include "debug_sync.h"
+#include "sql_handler.h"
 #include <rpl_mi.h>
 
 #ifdef WITH_ARIA_STORAGE_ENGINE
@@ -269,17 +270,17 @@ void init_update_queries(void)
   bzero((uchar*) &sql_command_flags, sizeof(sql_command_flags));
 
   sql_command_flags[SQLCOM_CREATE_TABLE]=   CF_CHANGES_DATA | CF_REEXECUTION_FRAGILE;
-  sql_command_flags[SQLCOM_CREATE_INDEX]=   CF_CHANGES_DATA;
-  sql_command_flags[SQLCOM_ALTER_TABLE]=    CF_CHANGES_DATA | CF_WRITE_LOGS_COMMAND;
+  sql_command_flags[SQLCOM_CREATE_INDEX]=   CF_CHANGES_DATA | CF_REPORT_PROGRESS;
+  sql_command_flags[SQLCOM_ALTER_TABLE]=    CF_CHANGES_DATA | CF_WRITE_LOGS_COMMAND | CF_REPORT_PROGRESS;
   sql_command_flags[SQLCOM_TRUNCATE]=       CF_CHANGES_DATA | CF_WRITE_LOGS_COMMAND;
   sql_command_flags[SQLCOM_DROP_TABLE]=     CF_CHANGES_DATA;
-  sql_command_flags[SQLCOM_LOAD]=           CF_CHANGES_DATA | CF_REEXECUTION_FRAGILE;
+  sql_command_flags[SQLCOM_LOAD]=           CF_CHANGES_DATA | CF_REEXECUTION_FRAGILE | CF_REPORT_PROGRESS;
   sql_command_flags[SQLCOM_CREATE_DB]=      CF_CHANGES_DATA;
   sql_command_flags[SQLCOM_DROP_DB]=        CF_CHANGES_DATA;
   sql_command_flags[SQLCOM_RENAME_TABLE]=   CF_CHANGES_DATA;
   sql_command_flags[SQLCOM_BACKUP_TABLE]=   CF_CHANGES_DATA;
   sql_command_flags[SQLCOM_RESTORE_TABLE]=  CF_CHANGES_DATA;
-  sql_command_flags[SQLCOM_DROP_INDEX]=     CF_CHANGES_DATA;
+  sql_command_flags[SQLCOM_DROP_INDEX]=     CF_CHANGES_DATA | CF_REPORT_PROGRESS;
   sql_command_flags[SQLCOM_CREATE_VIEW]=    CF_CHANGES_DATA | CF_REEXECUTION_FRAGILE;
   sql_command_flags[SQLCOM_DROP_VIEW]=      CF_CHANGES_DATA;
   sql_command_flags[SQLCOM_CREATE_EVENT]=   CF_CHANGES_DATA;
@@ -372,9 +373,11 @@ void init_update_queries(void)
     The following admin table operations are allowed
     on log tables.
   */
-  sql_command_flags[SQLCOM_REPAIR]=           CF_WRITE_LOGS_COMMAND;
-  sql_command_flags[SQLCOM_OPTIMIZE]=         CF_WRITE_LOGS_COMMAND;
-  sql_command_flags[SQLCOM_ANALYZE]=          CF_WRITE_LOGS_COMMAND;
+  sql_command_flags[SQLCOM_REPAIR]=           CF_WRITE_LOGS_COMMAND | CF_REPORT_PROGRESS;
+  sql_command_flags[SQLCOM_OPTIMIZE]=         CF_WRITE_LOGS_COMMAND | CF_REPORT_PROGRESS;
+  sql_command_flags[SQLCOM_ANALYZE]=          CF_WRITE_LOGS_COMMAND | CF_REPORT_PROGRESS;
+  sql_command_flags[SQLCOM_CHECK]=            CF_REPORT_PROGRESS;
+  sql_command_flags[SQLCOM_CHECKSUM]=         CF_REPORT_PROGRESS;
 }
 
 
@@ -506,10 +509,10 @@ static void handle_bootstrap_impl(THD *thd)
 
     query= (char *) thd->memdup_w_gap(buff, length + 1,
                                       thd->db_length + 1 +
+                                      QUERY_CACHE_DB_LENGTH_SIZE +
                                       QUERY_CACHE_FLAGS_SIZE);
-    size_t db_len= 0;
-    memcpy(query + length + 1, (char *) &db_len, sizeof(size_t));
     thd->set_query(query, length);
+    int2store(query + length + 1, 0);           // No db in bootstrap
     DBUG_PRINT("query",("%-.4096s", thd->query()));
 #if defined(ENABLED_PROFILING) && defined(COMMUNITY_SERVER)
     thd->profiling.start_new_query();
@@ -797,7 +800,7 @@ int end_trans(THD *thd, enum enum_mysql_completiontype completion)
     my_error(thd->killed_errno(), MYF(0));
   else if ((res == 0) && do_release)
   {
-    thd->killed= THD::KILL_CONNECTION;
+    thd->killed= KILL_CONNECTION;
     if (global_system_variables.log_warnings > 3)
     {
       Security_context *sctx= &thd->main_security_ctx;
@@ -1024,6 +1027,10 @@ bool dispatch_command(enum enum_server_command command, THD *thd,
   DBUG_ENTER("dispatch_command");
   DBUG_PRINT("info", ("command: %d", command));
 
+  DBUG_EXECUTE_IF("crash_dispatch_command_before",
+                  { DBUG_PRINT("crash_dispatch_command_before", ("now"));
+                    DBUG_ABORT(); });
+
   thd->command=command;
   /*
     Commands which always take a long time are logged into
@@ -1033,18 +1040,6 @@ bool dispatch_command(enum enum_server_command command, THD *thd,
   thd->query_plan_flags= QPLAN_INIT;
   thd->lex->sql_command= SQLCOM_END; /* to avoid confusing VIEW detectors */
   thd->set_time();
-  if (!thd->is_valid_time())
-  {
-    /*
-     If the time has got past 2038 we need to shut this server down
-     We do this by making sure every command is a shutdown and we 
-     have enough privileges to shut the server down
-
-     TODO: remove this when we have full 64 bit my_time_t support
-    */
-    thd->security_ctx->master_access|= SHUTDOWN_ACL;
-    command= COM_SHUTDOWN;
-  }
 
   VOID(pthread_mutex_lock(&LOCK_thread_count));
   thd->query_id= global_query_id;
@@ -1067,7 +1062,6 @@ bool dispatch_command(enum enum_server_command command, THD *thd,
   }
 
   thread_running++;
-  /* TODO: set thd->lex->sql_command to SQLCOM_END here */
   VOID(pthread_mutex_unlock(&LOCK_thread_count));
 
   /**
@@ -1223,6 +1217,7 @@ bool dispatch_command(enum enum_server_command command, THD *thd,
 
     general_log_write(thd, command, thd->query(), thd->query_length());
     DBUG_PRINT("query",("%-.4096s",thd->query()));
+    
 #if defined(ENABLED_PROFILING) && defined(COMMUNITY_SERVER)
     thd->profiling.set_query_source(thd->query(), thd->query_length());
 #endif
@@ -1267,15 +1262,15 @@ bool dispatch_command(enum enum_server_command command, THD *thd,
 #endif
 
       thd->set_query(beginning_of_next_stmt, length);
-      VOID(pthread_mutex_lock(&LOCK_thread_count));
       /*
         Count each statement from the client.
       */
       statistic_increment(thd->status_var.questions, &LOCK_status);
+      thd->set_time(); /* Reset the query start time for next query. */
+      VOID(pthread_mutex_lock(&LOCK_thread_count));
       thd->query_id= next_query_id();
-      thd->set_time(); /* Reset the query start time. */
-      /* TODO: set thd->lex->sql_command to SQLCOM_END here */
       VOID(pthread_mutex_unlock(&LOCK_thread_count));
+      /* TODO: set thd->lex->sql_command to SQLCOM_END here */
       mysql_parse(thd, beginning_of_next_stmt, length, &end_of_stmt);
     }
 
@@ -1375,54 +1370,6 @@ bool dispatch_command(enum enum_server_command command, THD *thd,
     error=TRUE;					// End server
     break;
 
-#ifdef REMOVED
-  case COM_CREATE_DB:				// QQ: To be removed
-    {
-      LEX_STRING db, alias;
-      HA_CREATE_INFO create_info;
-
-      status_var_increment(thd->status_var.com_stat[SQLCOM_CREATE_DB]);
-      if (thd->make_lex_string(&db, packet, packet_length, FALSE) ||
-          thd->make_lex_string(&alias, db.str, db.length, FALSE) ||
-          check_db_name(&db))
-      {
-	my_error(ER_WRONG_DB_NAME, MYF(0), db.str ? db.str : "NULL");
-	break;
-      }
-      if (check_access(thd, CREATE_ACL, db.str , 0, 1, 0,
-                       is_schema_db(db.str, db.length)))
-	break;
-      general_log_print(thd, command, "%.*s", db.length, db.str);
-      bzero(&create_info, sizeof(create_info));
-      mysql_create_db(thd, (lower_case_table_names == 2 ? alias.str : db.str),
-                      &create_info, 0);
-      break;
-    }
-  case COM_DROP_DB:				// QQ: To be removed
-    {
-      status_var_increment(thd->status_var.com_stat[SQLCOM_DROP_DB]);
-      LEX_STRING db;
-
-      if (thd->make_lex_string(&db, packet, packet_length, FALSE) ||
-          check_db_name(&db))
-      {
-	my_error(ER_WRONG_DB_NAME, MYF(0), db.str ? db.str : "NULL");
-	break;
-      }
-      if (check_access(thd, DROP_ACL, db.str, 0, 1, 0,
-                            is_schema_db(db.str, db.length)))
-	break;
-      if (thd->locked_tables || thd->active_transaction())
-      {
-	my_message(ER_LOCK_OR_ACTIVE_TRANSACTION,
-                   ER(ER_LOCK_OR_ACTIVE_TRANSACTION), MYF(0));
-	break;
-      }
-      general_log_write(thd, command, "%.*s", db.length, db.str);
-      mysql_rm_db(thd, db.str, 0, 0);
-      break;
-    }
-#endif
 #ifndef EMBEDDED_LIBRARY
   case COM_BINLOG_DUMP:
     {
@@ -1507,10 +1454,7 @@ bool dispatch_command(enum enum_server_command command, THD *thd,
       packet[0].
     */
     enum mysql_enum_shutdown_level level;
-    if (!thd->is_valid_time())
-      level= SHUTDOWN_DEFAULT;
-    else
-      level= (enum mysql_enum_shutdown_level) (uchar) packet[0];
+    level= (enum mysql_enum_shutdown_level) (uchar) packet[0];
     if (level == SHUTDOWN_DEFAULT)
       level= SHUTDOWN_WAIT_ALL_BUFFERS; // soon default will be configurable
     else if (level != SHUTDOWN_WAIT_ALL_BUFFERS)
@@ -1602,7 +1546,7 @@ bool dispatch_command(enum enum_server_command command, THD *thd,
   {
     status_var_increment(thd->status_var.com_stat[SQLCOM_KILL]);
     ulong id=(ulong) uint4korr(packet);
-    sql_kill(thd,id,false);
+    sql_kill(thd,id, KILL_CONNECTION_HARD);
     break;
   }
   case COM_SET_OPTION:
@@ -1644,15 +1588,18 @@ bool dispatch_command(enum enum_server_command command, THD *thd,
   }
 
   /* report error issued during command execution */
-  if (thd->killed_errno())
+  if (thd->killed)
   {
-    if (! thd->main_da.is_set())
-      thd->send_kill_message();
-  }
-  if (thd->killed == THD::KILL_QUERY || thd->killed == THD::KILL_BAD_DATA)
-  {
-    thd->killed= THD::NOT_KILLED;
-    thd->mysys_var->abort= 0;
+    if  (thd->killed_errno())
+    {
+      if (! thd->main_da.is_set())
+        thd->send_kill_message();
+    }
+    if (thd->killed < KILL_CONNECTION)
+    {
+      thd->killed= NOT_KILLED;
+      thd->mysys_var->abort= 0;
+    }
   }
 
   /* If commit fails, we should be able to reset the OK status. */
@@ -1936,7 +1883,8 @@ bool alloc_query(THD *thd, const char *packet, uint packet_length)
   */
   if (! (query= (char*) thd->memdup_w_gap(packet,
                                           packet_length,
-                                          1 + sizeof(size_t) + thd->db_length +
+                                          1 + thd->db_length +
+                                          QUERY_CACHE_DB_LENGTH_SIZE +
                                           QUERY_CACHE_FLAGS_SIZE)))
       return TRUE;
   query[packet_length]= '\0';
@@ -1945,8 +1893,7 @@ bool alloc_query(THD *thd, const char *packet, uint packet_length)
     also store this length, in case current database is changed during
     execution.  We might need to reallocate the 'query' buffer
   */
-  char *len_pos = (query + packet_length + 1);
-  memcpy(len_pos, (char *) &thd->db_length, sizeof(size_t));
+  int2store(query + packet_length + 1, thd->db_length);
     
   thd->set_query(query, packet_length);
 
@@ -2274,6 +2221,8 @@ mysql_execute_command(THD *thd)
   } /* endif unlikely slave */
 #endif
   status_var_increment(thd->status_var.com_stat[lex->sql_command]);
+  thd->progress.report_to_client= test(sql_command_flags[lex->sql_command] &
+                                       CF_REPORT_PROGRESS);
 
   DBUG_ASSERT(thd->transaction.stmt.modified_non_trans_table == FALSE);
   
@@ -2389,11 +2338,7 @@ mysql_execute_command(THD *thd)
       goto error;
     }
     it= new Item_func_unix_timestamp(it);
-    /*
-      it is OK only emulate fix_fieds, because we need only
-      value of constant
-    */
-    it->quick_fix_field();
+    it->fix_fields(thd, &it);
     res = purge_master_logs_before_date(thd, (ulong)it->val_int());
     break;
   }
@@ -2903,7 +2848,7 @@ end_with_restore_list:
 
     res= mysql_alter_table(thd, first_table->db, first_table->table_name,
                            &create_info, first_table, &alter_info,
-                           0, (ORDER*) 0, 0);
+                           0, (ORDER*) 0, 0, 0);
     break;
   }
 #ifdef HAVE_REPLICATION
@@ -3020,7 +2965,7 @@ end_with_restore_list:
                              &alter_info,
                              select_lex->order_list.elements,
                              (ORDER *) select_lex->order_list.first,
-                             lex->ignore);
+                             lex->ignore, lex->online);
       break;
     }
   case SQLCOM_RENAME_TABLE:
@@ -3113,6 +3058,7 @@ end_with_restore_list:
     thd->enable_slow_log= opt_log_slow_admin_statements;
     thd->query_plan_flags|= QPLAN_ADMIN;
     res= mysql_analyze_table(thd, first_table, &lex->check_opt);
+
     /* ! we write after unlocking the table */
     if (!res && !lex->no_write_to_binlog)
     {
@@ -3337,6 +3283,10 @@ end_with_restore_list:
 
     if (!(res= open_and_lock_tables(thd, all_tables)))
     {
+      /*
+        Only the INSERT table should be merged. Other will be handled by
+        select.
+      */
       /* Skip first table, which is the table we are inserting in */
       TABLE_LIST *second_table= first_table->next_local;
       select_lex->table_list.first= second_table;
@@ -3650,7 +3600,7 @@ end_with_restore_list:
     {
 #ifdef HAVE_QUERY_CACHE
       if (thd->variables.query_cache_wlock_invalidate)
-	query_cache.invalidate_locked_for_write(first_table);
+	query_cache.invalidate_locked_for_write(thd, first_table);
 #endif /*HAVE_QUERY_CACHE*/
       thd->locked_tables=thd->lock;
       thd->lock=0;
@@ -4130,8 +4080,6 @@ end_with_restore_list:
   }
   case SQLCOM_KILL:
   {
-    Item *it= (Item *)lex->value_list.head();
-
     if (lex->table_or_sp_used())
     {
       my_error(ER_NOT_SUPPORTED_YET, MYF(0), "Usage of subqueries or stored "
@@ -4139,13 +4087,20 @@ end_with_restore_list:
       break;
     }
 
-    if ((!it->fixed && it->fix_fields(lex->thd, &it)) || it->check_cols(1))
+    if (lex->kill_type == KILL_TYPE_ID)
     {
-      my_message(ER_SET_CONSTANTS_ONLY, ER(ER_SET_CONSTANTS_ONLY),
-		 MYF(0));
-      goto error;
+      Item *it= (Item *)lex->value_list.head();
+      if ((!it->fixed && it->fix_fields(lex->thd, &it)) || it->check_cols(1))
+      {
+        my_message(ER_SET_CONSTANTS_ONLY, ER(ER_SET_CONSTANTS_ONLY),
+                   MYF(0));
+        goto error;
+      }
+      sql_kill(thd, (ulong) it->val_int(), lex->kill_signal);
     }
-    sql_kill(thd, (ulong)it->val_int(), lex->type & ONLY_KILL_QUERY);
+    else
+      sql_kill_user(thd, get_current_user(thd, lex->users_list.head()),
+                    lex->kill_signal);
     break;
   }
 #ifndef NO_EMBEDDED_ACCESS_CHECKS
@@ -5891,6 +5846,7 @@ void mysql_reset_thd_for_next_command(THD *thd, my_bool calculate_userstat)
   thd->stmt_depends_on_first_successful_insert_id_in_prev_stmt= 0;
 
   thd->query_start_used= 0;
+  thd->query_start_sec_part_used= 0;
   thd->is_fatal_error= thd->time_zone_used= 0;
   /*
     Clear the status flag that are expected to be cleared at the
@@ -6000,6 +5956,7 @@ mysql_new_select(LEX *lex, bool move_down)
     DBUG_RETURN(1);
   }
   select_lex->nest_level= lex->nest_level;
+  select_lex->nest_level_base= &thd->lex->unit;
   if (move_down)
   {
     SELECT_LEX_UNIT *unit;
@@ -6349,17 +6306,6 @@ bool add_field_to_list(THD *thd, LEX_STRING *field_name, enum_field_types type,
     DBUG_RETURN(1);
   }
 
-  if (type == MYSQL_TYPE_TIMESTAMP && length)
-  {
-    /* Display widths are no longer supported for TIMSTAMP as of MySQL 4.1.
-       In other words, for declarations such as TIMESTAMP(2), TIMESTAMP(4),
-       and so on, the display width is ignored.
-    */
-    char buf[32];
-    my_snprintf(buf, sizeof(buf), "TIMESTAMP(%s)", length);
-    WARN_DEPRECATED(thd, "6.0", buf, "'TIMESTAMP'");
-  }
-
   if (!(new_field= new Create_field()) ||
       new_field->init(thd, field_name->str, type, length, decimals, type_modifier,
                       default_value, on_update_value, comment, change,
@@ -7137,7 +7083,7 @@ bool reload_acl_and_cache(THD *thd, ulong options, TABLE_LIST *tables,
 #ifdef HAVE_QUERY_CACHE
   if (options & REFRESH_QUERY_CACHE_FREE)
   {
-    query_cache.pack();				// FLUSH QUERY CACHE
+    query_cache.pack(thd);				// FLUSH QUERY CACHE
     options &= ~REFRESH_QUERY_CACHE;    // Don't flush cache, just free memory
   }
   if (options & (REFRESH_TABLES | REFRESH_QUERY_CACHE))
@@ -7195,6 +7141,8 @@ bool reload_acl_and_cache(THD *thd, ulong options, TABLE_LIST *tables,
         unlock_global_read_lock(thd);
         return 1;
       }
+      if (options & REFRESH_CHECKPOINT)
+        disable_checkpoints(thd);
     }
     else
     {
@@ -7302,12 +7250,13 @@ bool reload_acl_and_cache(THD *thd, ulong options, TABLE_LIST *tables,
     This is written such that we have a short lock on LOCK_thread_count
 */
 
-uint kill_one_thread(THD *thd, ulong id, bool only_kill_query)
+uint kill_one_thread(THD *thd, ulong id, killed_state kill_signal)
 {
   THD *tmp;
   uint error=ER_NO_SUCH_THREAD;
   DBUG_ENTER("kill_one_thread");
-  DBUG_PRINT("enter", ("id=%lu only_kill=%d", id, only_kill_query));
+  DBUG_PRINT("enter", ("id: %lu  signal: %u", id, (uint) kill_signal));
+
   VOID(pthread_mutex_lock(&LOCK_thread_count)); // For unlink from list
   I_List_iterator<THD> it(threads);
   while ((tmp=it++))
@@ -7347,7 +7296,7 @@ uint kill_one_thread(THD *thd, ulong id, bool only_kill_query)
     if ((thd->security_ctx->master_access & SUPER_ACL) ||
         thd->security_ctx->user_matches(tmp->security_ctx))
     {
-      tmp->awake(only_kill_query ? THD::KILL_QUERY : THD::KILL_CONNECTION);
+      tmp->awake(kill_signal);
       error=0;
     }
     else
@@ -7359,6 +7308,86 @@ uint kill_one_thread(THD *thd, ulong id, bool only_kill_query)
 }
 
 
+/**
+  kill all threads from one user
+
+  @param thd			Thread class
+  @param user_name		User name for threads we should kill
+  @param only_kill_query        Should it kill the query or the connection
+
+  @note
+    This is written such that we have a short lock on LOCK_thread_count
+
+    If we can't kill all threads because of security issues, no threads
+    are killed.
+*/
+
+static uint kill_threads_for_user(THD *thd, LEX_USER *user,
+                                  killed_state kill_signal, ha_rows *rows)
+{
+  THD *tmp;
+  List<THD> threads_to_kill;
+  DBUG_ENTER("kill_threads_for_user");
+
+  *rows= 0;
+
+  if (thd->is_fatal_error)                       // If we run out of memory
+    DBUG_RETURN(ER_OUT_OF_RESOURCES);
+
+  DBUG_PRINT("enter", ("user: %s  signal: %u", user->user.str,
+                       (uint) kill_signal));
+
+  VOID(pthread_mutex_lock(&LOCK_thread_count)); // For unlink from list
+  I_List_iterator<THD> it(threads);
+  while ((tmp=it++))
+  {
+    if (tmp->command == COM_DAEMON)
+      continue;
+    /*
+      Check that hostname (if given) and user name matches.
+
+      host.str[0] == '%' means that host name was not given. See sql_yacc.yy
+    */
+    if (((user->host.str[0] == '%' && !user->host.str[1]) ||
+         !strcmp(tmp->security_ctx->host, user->host.str)) &&
+        !strcmp(tmp->security_ctx->user, user->user.str))
+    {
+      if (!(thd->security_ctx->master_access & SUPER_ACL) &&
+          !thd->security_ctx->user_matches(tmp->security_ctx))
+      {
+        VOID(pthread_mutex_unlock(&LOCK_thread_count));
+        DBUG_RETURN(ER_KILL_DENIED_ERROR);
+      }
+      if (!threads_to_kill.push_back(tmp, tmp->mem_root))
+        pthread_mutex_lock(&tmp->LOCK_thd_data); // Lock from delete
+    }
+  }
+  VOID(pthread_mutex_unlock(&LOCK_thread_count));
+  if (!threads_to_kill.is_empty())
+  {
+    List_iterator_fast<THD> it(threads_to_kill);
+    THD *next_ptr;
+    THD *ptr= it++;
+    do
+    {
+      ptr->awake(kill_signal);
+      /*
+        Careful here: The list nodes are allocated on the memroots of the
+        THDs to be awakened.
+        But those THDs may be terminated and deleted as soon as we release
+        LOCK_thd_data, which will make the list nodes invalid.
+        Since the operation "it++" dereferences the "next" pointer of the
+        previous list node, we need to do this while holding LOCK_thd_data.
+      */
+      next_ptr= it++;
+      pthread_mutex_unlock(&ptr->LOCK_thd_data);
+      (*rows)++;
+    } while ((ptr= next_ptr));
+  }
+  DBUG_RETURN(0);
+}
+
+
 /*
   kills a thread and sends response
 
@@ -7369,16 +7398,33 @@ uint kill_one_thread(THD *thd, ulong id, bool only_kill_query)
     only_kill_query     Should it kill the query or the connection
 */
 
-void sql_kill(THD *thd, ulong id, bool only_kill_query)
+void sql_kill(THD *thd, ulong id, killed_state state)
 {
   uint error;
-  if (!(error= kill_one_thread(thd, id, only_kill_query)))
+  if (!(error= kill_one_thread(thd, id, state)))
     my_ok(thd);
   else
     my_error(error, MYF(0), id);
 }
 
 
+void sql_kill_user(THD *thd, LEX_USER *user, killed_state state)
+{
+  uint error;
+  ha_rows rows;
+  if (!(error= kill_threads_for_user(thd, user, state, &rows)))
+    my_ok(thd, rows);
+  else
+  {
+    /*
+      This is probably ER_OUT_OF_RESOURCES, but in the future we may
+      want to write the name of the user we tried to kill
+    */
+    my_error(error, MYF(0), user->host.str, user->user.str);
+  }
+}
+
+
 /** If pointer is not a null pointer, append filename to it. */
 
 bool append_file_to_dir(THD *thd, const char **filename_ptr,
diff --git a/sql/sql_plugin.cc b/sql/sql_plugin.cc
index 67c59322a55..5b7c2d285e6 100644
--- a/sql/sql_plugin.cc
+++ b/sql/sql_plugin.cc
@@ -1648,8 +1648,11 @@ static void plugin_load(MEM_ROOT *tmp_root, int *argc, char **argv)
   if (simple_open_n_lock_tables(new_thd, &tables))
   {
     DBUG_PRINT("error",("Can't open plugin table"));
-    sql_print_error("Can't open the mysql.plugin table. Please "
-                    "run mysql_upgrade to create it.");
+    if (!opt_help)
+      sql_print_error("Can't open the mysql.plugin table. Please "
+                      "run mysql_upgrade to create it.");
+    else
+      sql_print_warning("Could not open mysql.plugin table. Some options may be missing from the help text");
     goto end;
   }
   table= tables.table;
diff --git a/sql/sql_plugin_services.h b/sql/sql_plugin_services.h
index 14a2a16561a..497e2c8d6bc 100644
--- a/sql/sql_plugin_services.h
+++ b/sql/sql_plugin_services.h
@@ -22,6 +22,8 @@ struct st_service_ref {
   void *service;
 };
 
+#ifdef HAVE_DLOPEN
+
 static struct my_snprintf_service_st my_snprintf_handler = {
   my_snprintf,
   my_vsnprintf
@@ -36,9 +38,18 @@ static struct thd_alloc_service_st thd_alloc_handler= {
   thd_make_lex_string
 };
 
+static struct progress_report_service_st progress_report_handler= {
+  thd_progress_init,
+  thd_progress_report,
+  thd_progress_next_stage,
+  thd_progress_end,
+  set_thd_proc_info
+};
+
 static struct st_service_ref list_of_services[] __attribute__((unused)) =
 {
   { "my_snprintf_service", VERSION_my_snprintf, &my_snprintf_handler },
-  { "thd_alloc_service",   VERSION_thd_alloc,   &thd_alloc_handler }
+  { "thd_alloc_service",   VERSION_thd_alloc,   &thd_alloc_handler },
+  { "progress_report_service", VERSION_progress_report, &progress_report_handler }
 };
-
+#endif
diff --git a/sql/sql_prepare.cc b/sql/sql_prepare.cc
index 2c062deacd0..32ce8dc6fc4 100644
--- a/sql/sql_prepare.cc
+++ b/sql/sql_prepare.cc
@@ -96,6 +96,7 @@ When one supplies long data for a placeholder:
 #else
 #include <mysql_com.h>
 #endif
+#include "sql_handler.h"
 
 /**
   A result class used to send cursor rows using the binary protocol.
@@ -244,6 +245,8 @@ static bool send_prep_stmt(Prepared_statement *stmt, uint columns)
   int error;
   THD *thd= stmt->thd;
   DBUG_ENTER("send_prep_stmt");
+  DBUG_PRINT("enter",("stmt->id: %lu  columns: %d  param_count: %d",
+                      stmt->id, columns, stmt->param_count));
 
   buff[0]= 0;                                   /* OK packet indicator */
   int4store(buff+1, stmt->id);
@@ -488,8 +491,7 @@ static void set_param_time(Item_param *param, uchar **pos, ulong len)
   }
   else
     set_zero_time(&tm, MYSQL_TIMESTAMP_TIME);
-  param->set_time(&tm, MYSQL_TIMESTAMP_TIME,
-                  MAX_TIME_WIDTH * MY_CHARSET_BIN_MB_MAXLEN);
+  param->set_time(&tm, MYSQL_TIMESTAMP_TIME, MAX_TIME_FULL_WIDTH);
   *pos+= length;
 }
 
@@ -1166,7 +1168,7 @@ static bool mysql_test_insert(Prepared_statement *stmt,
     If we would use locks, then we have to ensure we are not using
     TL_WRITE_DELAYED as having two such locks can cause table corruption.
   */
-  if (open_normal_and_derived_tables(thd, table_list, 0))
+  if (open_normal_and_derived_tables(thd, table_list, 0, DT_INIT)) 
     goto error;
 
   if ((values= its++))
@@ -1250,7 +1252,10 @@ static int mysql_test_update(Prepared_statement *stmt,
       open_tables(thd, &table_list, &table_count, 0))
     goto error;
 
-  if (table_list->multitable_view)
+  if (mysql_handle_derived(thd->lex, DT_INIT))
+    goto error;
+
+  if (table_list->is_multitable())
   {
     DBUG_ASSERT(table_list->view != 0);
     DBUG_PRINT("info", ("Switch to multi-update"));
@@ -1264,8 +1269,16 @@ static int mysql_test_update(Prepared_statement *stmt,
     thd->fill_derived_tables() is false here for sure (because it is
     preparation of PS, so we even do not check it).
   */
-  if (mysql_handle_derived(thd->lex, &mysql_derived_prepare))
+  if (table_list->handle_derived(thd->lex, DT_MERGE_FOR_INSERT))
+    goto error;
+  if (table_list->handle_derived(thd->lex, DT_PREPARE))
+    goto error;
+
+  if (!table_list->single_table_updatable())
+  {
+    my_error(ER_NON_UPDATABLE_TABLE, MYF(0), table_list->alias, "UPDATE");
     goto error;
+  }
 
 #ifndef NO_EMBEDDED_ACCESS_CHECKS
   /* Force privilege re-checking for views after they have been opened. */
@@ -1319,15 +1332,28 @@ error:
 static bool mysql_test_delete(Prepared_statement *stmt,
                               TABLE_LIST *table_list)
 {
+  uint table_count= 0;
   THD *thd= stmt->thd;
   LEX *lex= stmt->lex;
   DBUG_ENTER("mysql_test_delete");
 
   if (delete_precheck(thd, table_list) ||
-      open_normal_and_derived_tables(thd, table_list, 0))
+      open_tables(thd, &table_list, &table_count, 0))
+    goto error;
+
+  if (mysql_handle_derived(thd->lex, DT_INIT))
+    goto error;
+  if (mysql_handle_derived(thd->lex, DT_MERGE_FOR_INSERT))
+    goto error;
+  if (mysql_handle_derived(thd->lex, DT_PREPARE))
     goto error;
 
-  if (!table_list->table)
+  if (!table_list->single_table_updatable())
+  {
+    my_error(ER_NON_UPDATABLE_TABLE, MYF(0), table_list->alias, "DELETE");
+    goto error;
+  }
+  if (!table_list->table || !table_list->table->created)
   {
     my_error(ER_VIEW_DELETE_MERGE_VIEW, MYF(0),
              table_list->view_db.str, table_list->view_name.str);
@@ -1382,7 +1408,8 @@ static int mysql_test_select(Prepared_statement *stmt,
     goto error;
   }
 
-  if (open_normal_and_derived_tables(thd, tables, 0))
+  if (open_normal_and_derived_tables(thd, tables, 0,
+                                     DT_PREPARE | DT_CREATE))
     goto error;
 
   thd->lex->used_tables= 0;                        // Updated by setup_fields
@@ -1442,7 +1469,8 @@ static bool mysql_test_do_fields(Prepared_statement *stmt,
   if (tables && check_table_access(thd, SELECT_ACL, tables, UINT_MAX, FALSE))
     DBUG_RETURN(TRUE);
 
-  if (open_normal_and_derived_tables(thd, tables, 0))
+  if (open_normal_and_derived_tables(thd, tables, 0,
+                                     DT_PREPARE | DT_CREATE))
     DBUG_RETURN(TRUE);
   DBUG_RETURN(setup_fields(thd, 0, *values, MARK_COLUMNS_NONE, 0, 0));
 }
@@ -1472,7 +1500,8 @@ static bool mysql_test_set_fields(Prepared_statement *stmt,
 
   if ((tables &&
        check_table_access(thd, SELECT_ACL, tables, UINT_MAX, FALSE)) ||
-      open_normal_and_derived_tables(thd, tables, 0))
+      open_normal_and_derived_tables(thd, tables, 0,
+                                     DT_PREPARE | DT_CREATE))
     goto error;
 
   while ((var= it++))
@@ -1509,7 +1538,7 @@ static bool mysql_test_call_fields(Prepared_statement *stmt,
 
   if ((tables &&
        check_table_access(thd, SELECT_ACL, tables, UINT_MAX, FALSE)) ||
-      open_normal_and_derived_tables(thd, tables, 0))
+      open_normal_and_derived_tables(thd, tables, 0, DT_PREPARE))
     goto err;
 
   while ((item= it++))
@@ -1584,6 +1613,7 @@ select_like_stmt_test_with_open(Prepared_statement *stmt,
                                 int (*specific_prepare)(THD *thd),
                                 ulong setup_tables_done_option)
 {
+  uint table_count= 0;
   DBUG_ENTER("select_like_stmt_test_with_open");
 
   /*
@@ -1592,7 +1622,8 @@ select_like_stmt_test_with_open(Prepared_statement *stmt,
     prepared EXPLAIN yet so derived tables will clean up after
     themself.
   */
-  if (open_normal_and_derived_tables(stmt->thd, tables, 0))
+  THD *thd= stmt->thd;
+  if (open_tables(thd, &tables, &table_count, 0))
     DBUG_RETURN(TRUE);
 
   DBUG_RETURN(select_like_stmt_test(stmt, specific_prepare,
@@ -1637,7 +1668,8 @@ static bool mysql_test_create_table(Prepared_statement *stmt)
       create_table->skip_temporary= true;
     }
 
-    if (open_normal_and_derived_tables(stmt->thd, lex->query_tables, 0))
+    if (open_normal_and_derived_tables(stmt->thd, lex->query_tables, 0,
+                                       DT_PREPARE | DT_CREATE))
       DBUG_RETURN(TRUE);
 
     if (!(lex->create_info.options & HA_LEX_CREATE_TMP_TABLE))
@@ -1655,7 +1687,8 @@ static bool mysql_test_create_table(Prepared_statement *stmt)
       we validate metadata of all CREATE TABLE statements,
       which keeps metadata validation code simple.
     */
-    if (open_normal_and_derived_tables(stmt->thd, lex->query_tables, 0))
+    if (open_normal_and_derived_tables(stmt->thd, lex->query_tables, 0,
+                                       DT_PREPARE))
       DBUG_RETURN(TRUE);
   }
 
@@ -1690,7 +1723,7 @@ static bool mysql_test_create_view(Prepared_statement *stmt)
   if (create_view_precheck(thd, tables, view, lex->create_view_mode))
     goto err;
 
-  if (open_normal_and_derived_tables(thd, tables, 0))
+  if (open_normal_and_derived_tables(thd, tables, 0, DT_PREPARE))
     goto err;
 
   lex->context_analysis_only|=  CONTEXT_ANALYSIS_ONLY_VIEW;
@@ -1835,6 +1868,56 @@ static bool mysql_test_insert_select(Prepared_statement *stmt,
   return res;
 }
 
+/**
+  Validate SELECT statement.
+
+    In case of success, if this query is not EXPLAIN, send column list info
+    back to the client.
+
+  @param stmt               prepared statement
+  @param tables             list of tables used in the query
+
+  @retval 0 success
+  @retval 1 error, error message is set in THD
+  @retval 2 success, and statement metadata has been sent
+*/
+
+static int mysql_test_handler_read(Prepared_statement *stmt,
+                                   TABLE_LIST *tables)
+{
+  THD *thd= stmt->thd;
+  LEX *lex= stmt->lex;
+  SQL_HANDLER *ha_table;
+  DBUG_ENTER("mysql_test_select");
+
+  lex->select_lex.context.resolve_in_select_list= TRUE;
+
+  /*
+    We don't have to test for permissions as this is already done during
+    HANDLER OPEN
+  */
+  if (!(ha_table= mysql_ha_read_prepare(thd, tables, lex->ha_read_mode,
+                                        lex->ident.str,
+                                        lex->insert_list,
+                                        lex->select_lex.where)))
+    DBUG_RETURN(1);
+
+  if (!stmt->is_sql_prepare())
+  {
+    if (!lex->result && !(lex->result= new (stmt->mem_root) select_send))
+    {
+      my_error(ER_OUTOFMEMORY, MYF(0), sizeof(select_send));
+      DBUG_RETURN(1);
+    }
+    if (send_prep_stmt(stmt, ha_table->fields.elements) ||
+        lex->result->send_fields(ha_table->fields, Protocol::SEND_EOF) ||
+        thd->protocol->flush())
+      DBUG_RETURN(1);
+    DBUG_RETURN(2);
+  }
+  DBUG_RETURN(0);
+}
+
 
 /**
   Perform semantic analysis of the parsed tree and send a response packet
@@ -1949,6 +2032,11 @@ static bool check_prepared_statement(Prepared_statement *stmt)
     res= mysql_test_insert_select(stmt, tables);
     break;
 
+  case SQLCOM_HA_READ:
+    res= mysql_test_handler_read(stmt, tables);
+    /* Statement and field info has already been sent */
+    DBUG_RETURN(res == 1 ? TRUE : FALSE);
+
     /*
       Note that we don't need to have cases in this list if they are
       marked with CF_STATUS_COMMAND in sql_command_flags
@@ -2371,9 +2459,7 @@ void reinit_stmt_before_use(THD *thd, LEX *lex)
       /* Fix ORDER list */
       for (order= sl->order_list.first; order; order= order->next)
         order->item= &order->item_ptr;
-
-      /* clear the no_error flag for INSERT/UPDATE IGNORE */
-      sl->no_error= FALSE;
+      sl->handle_derived(lex, DT_REINIT);
     }
     {
       SELECT_LEX_UNIT *unit= sl->master_unit();
@@ -2414,9 +2500,6 @@ void reinit_stmt_before_use(THD *thd, LEX *lex)
   }
   lex->current_select= &lex->select_lex;
 
-  /* restore original list used in INSERT ... SELECT */
-  if (lex->leaf_tables_insert)
-    lex->select_lex.leaf_tables= lex->leaf_tables_insert;
 
   if (lex->result)
   {
diff --git a/sql/sql_profile.cc b/sql/sql_profile.cc
index 116cdb91ef3..9e45a34f794 100644
--- a/sql/sql_profile.cc
+++ b/sql/sql_profile.cc
@@ -238,7 +238,7 @@ void PROF_MEASUREMENT::set_label(const char *status_arg,
 */
 void PROF_MEASUREMENT::collect()
 {
-  time_usecs= (double) my_getsystime() / 10.0;  /* 1 sec was 1e7, now is 1e6 */
+  time_usecs= my_interval_timer() / 1e3;  /* ns to us */
 #ifdef HAVE_GETRUSAGE
   getrusage(RUSAGE_SELF, &rusage);
 #elif defined(__WIN__)
diff --git a/sql/sql_rename.cc b/sql/sql_rename.cc
index cab41b1df07..2cf4eca447c 100644
--- a/sql/sql_rename.cc
+++ b/sql/sql_rename.cc
@@ -21,7 +21,7 @@
 
 #include "mysql_priv.h"
 #include "sql_trigger.h"
-
+#include "sql_handler.h"
 
 static TABLE_LIST *rename_tables(THD *thd, TABLE_LIST *table_list,
 				 bool skip_error);
diff --git a/sql/sql_repl.cc b/sql/sql_repl.cc
index 7a09074f554..8245bbe48f1 100644
--- a/sql/sql_repl.cc
+++ b/sql/sql_repl.cc
@@ -32,6 +32,9 @@ my_bool opt_sporadic_binlog_dump_fail = 0;
 static int binlog_dump_count = 0;
 #endif
 
+extern TYPELIB binlog_checksum_typelib;
+
+
 /*
     fake_rotate_event() builds a fake (=which does not exist physically in any
     binlog) Rotate event, which contains the name of the binlog we are going to
@@ -51,10 +54,21 @@ static int binlog_dump_count = 0;
 */
 
 static int fake_rotate_event(NET* net, String* packet, char* log_file_name,
-                             ulonglong position, const char** errmsg)
+                             ulonglong position, const char** errmsg,
+                             uint8 checksum_alg_arg)
 {
   DBUG_ENTER("fake_rotate_event");
   char header[LOG_EVENT_HEADER_LEN], buf[ROTATE_HEADER_LEN+100];
+
+  /*
+    this Rotate is to be sent with checksum if and only if
+    slave's get_master_version_and_clock time handshake value 
+    of master's @@global.binlog_checksum was TRUE
+  */
+
+  my_bool do_checksum= checksum_alg_arg != BINLOG_CHECKSUM_ALG_OFF &&
+    checksum_alg_arg != BINLOG_CHECKSUM_ALG_UNDEF;
+
   /*
     'when' (the timestamp) is set to 0 so that slave could distinguish between
     real and fake Rotate events (if necessary)
@@ -64,7 +78,8 @@ static int fake_rotate_event(NET* net, String* packet, char* log_file_name,
 
   char* p = log_file_name+dirname_length(log_file_name);
   uint ident_len = (uint) strlen(p);
-  ulong event_len = ident_len + LOG_EVENT_HEADER_LEN + ROTATE_HEADER_LEN;
+  ulong event_len = ident_len + LOG_EVENT_HEADER_LEN + ROTATE_HEADER_LEN +
+    (do_checksum ? BINLOG_CHECKSUM_LEN : 0);
   int4store(header + SERVER_ID_OFFSET, server_id);
   int4store(header + EVENT_LEN_OFFSET, event_len);
   int2store(header + FLAGS_OFFSET, LOG_EVENT_ARTIFICIAL_F);
@@ -75,7 +90,19 @@ static int fake_rotate_event(NET* net, String* packet, char* log_file_name,
   packet->append(header, sizeof(header));
   int8store(buf+R_POS_OFFSET,position);
   packet->append(buf, ROTATE_HEADER_LEN);
-  packet->append(p,ident_len);
+  packet->append(p, ident_len);
+
+  if (do_checksum)
+  {
+    char b[BINLOG_CHECKSUM_LEN];
+    ha_checksum crc= my_checksum(0L, NULL, 0);
+    crc= my_checksum(crc, (uchar*)header, sizeof(header));
+    crc= my_checksum(crc, (uchar*)buf, ROTATE_HEADER_LEN);
+    crc= my_checksum(crc, (uchar*)p, ident_len);
+    int4store(b, crc);
+    packet->append(b, sizeof(b));
+  }
+
   if (my_net_write(net, (uchar*) packet->ptr(), packet->length()))
   {
     *errmsg = "failed on my_net_write()";
@@ -157,6 +184,86 @@ static int send_file(THD *thd)
 }
 
 
+/**
+   Internal to mysql_binlog_send() routine that recalculates checksum for
+   a FD event (asserted) that needs additional arranment prior sending to slave.
+*/
+inline void fix_checksum(String *packet, ulong ev_offset)
+{
+  /* recalculate the crc for this event */
+  uint data_len = uint4korr(packet->ptr() + ev_offset + EVENT_LEN_OFFSET);
+  ha_checksum crc= my_checksum(0L, NULL, 0);
+  DBUG_ASSERT(data_len == 
+              LOG_EVENT_MINIMAL_HEADER_LEN + FORMAT_DESCRIPTION_HEADER_LEN +
+              BINLOG_CHECKSUM_ALG_DESC_LEN + BINLOG_CHECKSUM_LEN);
+  crc= my_checksum(crc, (uchar *)packet->ptr() + ev_offset, data_len -
+                   BINLOG_CHECKSUM_LEN);
+  int4store(packet->ptr() + ev_offset + data_len - BINLOG_CHECKSUM_LEN, crc);
+}
+
+
+static user_var_entry * get_binlog_checksum_uservar(THD * thd)
+{
+  LEX_STRING name=  { C_STRING_WITH_LEN("master_binlog_checksum")};
+  user_var_entry *entry= 
+    (user_var_entry*) my_hash_search(&thd->user_vars, (uchar*) name.str,
+                                  name.length);
+  return entry;
+}
+
+/**
+  Function for calling in mysql_binlog_send
+  to check if slave initiated checksum-handshake.
+
+  @param[in]    thd  THD to access a user variable
+
+  @return        TRUE if handshake took place, FALSE otherwise
+*/
+
+static bool is_slave_checksum_aware(THD * thd)
+{
+  DBUG_ENTER("is_slave_checksum_aware");
+  user_var_entry *entry= get_binlog_checksum_uservar(thd);
+  DBUG_RETURN(entry? true  : false);
+}
+
+/**
+  Function for calling in mysql_binlog_send
+  to get the value of @@binlog_checksum of the master at
+  time of checksum-handshake.
+
+  The value tells the master whether to compute or not, and the slave
+  to verify or not the first artificial Rotate event's checksum.
+
+  @param[in]    thd  THD to access a user variable
+
+  @return       value of @@binlog_checksum alg according to
+                @c enum enum_binlog_checksum_alg
+*/
+
+static uint8 get_binlog_checksum_value_at_connect(THD * thd)
+{
+  uint8 ret;
+
+  DBUG_ENTER("get_binlog_checksum_value_at_connect");
+  user_var_entry *entry= get_binlog_checksum_uservar(thd);
+  if (!entry)
+  {
+    ret= BINLOG_CHECKSUM_ALG_UNDEF;
+  }
+  else
+  {
+    DBUG_ASSERT(entry->type == STRING_RESULT);
+    String str;
+    uint dummy_errors;
+    str.copy(entry->value, entry->length, &my_charset_bin, &my_charset_bin,
+             &dummy_errors);
+    ret= (uint8) find_type ((char*) str.ptr(), &binlog_checksum_typelib, 1) - 1;
+    DBUG_ASSERT(ret <= BINLOG_CHECKSUM_ALG_CRC32); // while it's just on CRC32 alg
+  }
+  DBUG_RETURN(ret);
+}
+
 /*
   Adjust the position pointer in the binary log file for all running slaves
 
@@ -331,6 +438,9 @@ Increase max_allowed_packet on master";
   case LOG_READ_TRUNC:
     *errmsg = "binlog truncated in the middle of event";
     break;
+  case LOG_READ_CHECKSUM_FAILURE:
+    *errmsg = "event read from binlog did not pass crc check";
+    break;
   default:
     *errmsg = "unknown error reading log event on the master";
     break;
@@ -357,10 +467,11 @@ void mysql_binlog_send(THD* thd, char* log_ident, my_off_t pos,
   NET* net = &thd->net;
   pthread_mutex_t *log_lock;
   bool binlog_can_be_corrupted= FALSE;
+  uint8 current_checksum_alg= BINLOG_CHECKSUM_ALG_UNDEF;
+  int old_max_allowed_packet= thd->variables.max_allowed_packet;
 #ifndef DBUG_OFF
   int left_events = max_binlog_dump_events;
 #endif
-  int old_max_allowed_packet= thd->variables.max_allowed_packet;
   DBUG_ENTER("mysql_binlog_send");
   DBUG_PRINT("enter",("log_ident: '%s'  pos: %ld", log_ident, (long) pos));
 
@@ -454,7 +565,8 @@ impossible position";
     given that we want minimum modification of 4.0, we send the normal
     and fake Rotates.
   */
-  if (fake_rotate_event(net, packet, log_file_name, pos, &errmsg))
+  if (fake_rotate_event(net, packet, log_file_name, pos, &errmsg,
+                        get_binlog_checksum_value_at_connect(thd)))
   {
     /*
        This error code is not perfect, as fake_rotate_event() does not
@@ -484,8 +596,8 @@ impossible position";
        Try to find a Format_description_log_event at the beginning of
        the binlog
      */
-     if (!(error = Log_event::read_log_event(&log, packet, log_lock)))
-     {
+    if (!(error = Log_event::read_log_event(&log, packet, log_lock, 0)))
+    { 
        /*
          The packet has offsets equal to the normal offsets in a binlog
          event +1 (the first character is \0).
@@ -493,8 +605,25 @@ impossible position";
        DBUG_PRINT("info",
                   ("Looked for a Format_description_log_event, found event type %d",
                    (*packet)[EVENT_TYPE_OFFSET+1]));
-       if ((*packet)[EVENT_TYPE_OFFSET+1] == FORMAT_DESCRIPTION_EVENT)
+       if ((uchar)(*packet)[EVENT_TYPE_OFFSET+1] == FORMAT_DESCRIPTION_EVENT)
        {
+         current_checksum_alg= get_checksum_alg(packet->ptr() + 1,
+                                                packet->length() - 1);
+         DBUG_ASSERT(current_checksum_alg == BINLOG_CHECKSUM_ALG_OFF ||
+                     current_checksum_alg == BINLOG_CHECKSUM_ALG_UNDEF ||
+                     current_checksum_alg == BINLOG_CHECKSUM_ALG_CRC32);
+         if (!is_slave_checksum_aware(thd) &&
+             current_checksum_alg != BINLOG_CHECKSUM_ALG_OFF &&
+             current_checksum_alg != BINLOG_CHECKSUM_ALG_UNDEF)
+         {
+           my_errno= ER_MASTER_FATAL_ERROR_READING_BINLOG;
+           errmsg= "Slave can not handle replication events with the checksum "
+             "that master is configured to log";
+           sql_print_warning("Master is configured to log replication events "
+                             "with checksum, but will not send such events to "
+                             "slaves that cannot process them");
+           goto err;
+         }
          binlog_can_be_corrupted= test((*packet)[FLAGS_OFFSET+1] &
                                        LOG_EVENT_BINLOG_IN_USE_F);
          (*packet)[FLAGS_OFFSET+1] &= ~LOG_EVENT_BINLOG_IN_USE_F;
@@ -510,6 +639,12 @@ impossible position";
           */
          int4store((char*) packet->ptr()+LOG_EVENT_MINIMAL_HEADER_LEN+
                    ST_CREATED_OFFSET+1, (ulong) 0);
+
+	 /* fix the checksum due to latest changes in header */
+	 if (current_checksum_alg != BINLOG_CHECKSUM_ALG_OFF &&
+             current_checksum_alg != BINLOG_CHECKSUM_ALG_UNDEF)
+           fix_checksum(packet, 1);
+
          /* send it */
          if (my_net_write(net, (uchar*) packet->ptr(), packet->length()))
          {
@@ -549,7 +684,8 @@ impossible position";
   while (!net->error && net->vio != 0 && !thd->killed)
   {
     my_off_t prev_pos= pos;
-    while (!(error = Log_event::read_log_event(&log, packet, log_lock)))
+    while (!(error = Log_event::read_log_event(&log, packet, log_lock,
+                                               current_checksum_alg)))
     {
       prev_pos= my_b_tell(&log);
 #ifndef DBUG_OFF
@@ -561,7 +697,6 @@ impossible position";
 	goto err;
       }
 #endif
-
       DBUG_EXECUTE_IF("dump_thread_wait_before_send_xid",
                       {
                         if ((*packet)[EVENT_TYPE_OFFSET+1] == XID_EVENT)
@@ -571,7 +706,7 @@ impossible position";
                             "now "
                             "wait_for signal.continue";
                           DBUG_ASSERT(opt_debug_sync_timeout > 0);
-                          DBUG_ASSERT(!debug_sync_set_action(current_thd,
+                          DBUG_ASSERT(!debug_sync_set_action(thd,
                                                              STRING_WITH_LEN(act)));
                           const char act2[]=
                             "now "
@@ -581,40 +716,61 @@ impossible position";
                         }
                       });
 
-      if ((*packet)[EVENT_TYPE_OFFSET+1] == FORMAT_DESCRIPTION_EVENT)
+      if ((uchar) (*packet)[EVENT_TYPE_OFFSET+1] == FORMAT_DESCRIPTION_EVENT)
       {
+        current_checksum_alg= get_checksum_alg(packet->ptr() + 1,
+                                               packet->length() - 1);
+        DBUG_ASSERT(current_checksum_alg == BINLOG_CHECKSUM_ALG_OFF ||
+                    current_checksum_alg == BINLOG_CHECKSUM_ALG_UNDEF ||
+                    current_checksum_alg == BINLOG_CHECKSUM_ALG_CRC32);
+        if (!is_slave_checksum_aware(thd) &&
+            current_checksum_alg != BINLOG_CHECKSUM_ALG_OFF &&
+            current_checksum_alg != BINLOG_CHECKSUM_ALG_UNDEF)
+        {
+          my_errno= ER_MASTER_FATAL_ERROR_READING_BINLOG;
+          errmsg= "Slave can not handle replication events with the checksum "
+            "that master is configured to log";
+          sql_print_warning("Master is configured to log replication events "
+                            "with checksum, but will not send such events to "
+                            "slaves that cannot process them");
+          goto err;
+        }
         binlog_can_be_corrupted= test((*packet)[FLAGS_OFFSET+1] &
                                       LOG_EVENT_BINLOG_IN_USE_F);
         (*packet)[FLAGS_OFFSET+1] &= ~LOG_EVENT_BINLOG_IN_USE_F;
       }
-      else if ((*packet)[EVENT_TYPE_OFFSET+1] == STOP_EVENT)
+      else if ((uchar)(*packet)[EVENT_TYPE_OFFSET+1] == STOP_EVENT)
         binlog_can_be_corrupted= FALSE;
 
-      if (my_net_write(net, (uchar*) packet->ptr(), packet->length()))
+      if ((uchar)(*packet)[EVENT_TYPE_OFFSET+1] != ANNOTATE_ROWS_EVENT ||
+          (flags & BINLOG_SEND_ANNOTATE_ROWS_EVENT))
       {
-	errmsg = "Failed on my_net_write()";
-	my_errno= ER_UNKNOWN_ERROR;
-	goto err;
-      }
+        if (my_net_write(net, (uchar*) packet->ptr(), packet->length()))
+        {
+          errmsg = "Failed on my_net_write()";
+          my_errno= ER_UNKNOWN_ERROR;
+          goto err;
+        }
 
-      DBUG_EXECUTE_IF("dump_thread_wait_before_send_xid",
-                      {
-                        if ((*packet)[EVENT_TYPE_OFFSET+1] == XID_EVENT)
+        DBUG_EXECUTE_IF("dump_thread_wait_before_send_xid",
                         {
-                          net_flush(net);
-                        }
-                      });
-
-      DBUG_PRINT("info", ("log event code %d",
-			  (*packet)[LOG_EVENT_OFFSET+1] ));
-      if ((*packet)[LOG_EVENT_OFFSET+1] == LOAD_EVENT)
-      {
-	if (send_file(thd))
-	{
-	  errmsg = "failed in send_file()";
-	  my_errno= ER_UNKNOWN_ERROR;
-	  goto err;
-	}
+                          if ((*packet)[EVENT_TYPE_OFFSET+1] == XID_EVENT)
+                          {
+                            net_flush(net);
+                          }
+                        });
+
+        DBUG_PRINT("info", ("log event code %d",
+                            (*packet)[LOG_EVENT_OFFSET+1] ));
+        if ((uchar) (*packet)[LOG_EVENT_OFFSET+1] == LOAD_EVENT)
+        {
+          if (send_file(thd))
+          {
+            errmsg = "failed in send_file()";
+            my_errno= ER_UNKNOWN_ERROR;
+            goto err;
+          }
+        }
       }
       packet->set("\0", 1, &my_charset_bin);
     }
@@ -624,7 +780,8 @@ impossible position";
       of a crash ?). treat any corruption as EOF
     */
     if (binlog_can_be_corrupted &&
-        error != LOG_READ_MEM && error != LOG_READ_EOF)
+        (error != LOG_READ_MEM && error != LOG_READ_CHECKSUM_FAILURE &&
+         error != LOG_READ_EOF))
     {
       my_b_seek(&log, prev_pos);
       error=LOG_READ_EOF;
@@ -683,7 +840,8 @@ impossible position";
 	*/
 
 	pthread_mutex_lock(log_lock);
-	switch (error= Log_event::read_log_event(&log, packet, (pthread_mutex_t*) 0)) {
+	switch (error= Log_event::read_log_event(&log, packet, (pthread_mutex_t*) 0,
+                                                 current_checksum_alg)) {
 	case 0:
 	  /* we read successfully, so we'll need to send it to the slave */
 	  pthread_mutex_unlock(log_lock);
@@ -715,23 +873,27 @@ impossible position";
 
 	if (read_packet)
 	{
-	  thd_proc_info(thd, "Sending binlog event to slave");
-	  if (my_net_write(net, (uchar*) packet->ptr(), packet->length()) )
-	  {
-	    errmsg = "Failed on my_net_write()";
-	    my_errno= ER_UNKNOWN_ERROR;
-	    goto err;
-	  }
-
-	  if ((*packet)[LOG_EVENT_OFFSET+1] == LOAD_EVENT)
-	  {
-	    if (send_file(thd))
+          if ((uchar)(*packet)[EVENT_TYPE_OFFSET+1] != ANNOTATE_ROWS_EVENT ||
+              (flags & BINLOG_SEND_ANNOTATE_ROWS_EVENT))
+          {
+            thd_proc_info(thd, "Sending binlog event to slave");
+            if (my_net_write(net, (uchar*) packet->ptr(), packet->length()) )
 	    {
-	      errmsg = "failed in send_file()";
+              errmsg = "Failed on my_net_write()";
 	      my_errno= ER_UNKNOWN_ERROR;
 	      goto err;
 	    }
-	  }
+
+            if ((uchar)(*packet)[LOG_EVENT_OFFSET+1] == LOAD_EVENT)
+            {
+              if (send_file(thd))
+              {
+                errmsg = "failed in send_file()";
+                my_errno= ER_UNKNOWN_ERROR;
+                goto err;
+              }
+            }
+          }
 	  packet->set("\0", 1, &my_charset_bin);
 	  /*
 	    No need to net_flush because we will get to flush later when
@@ -780,7 +942,7 @@ impossible position";
       */
       if ((file=open_binlog(&log, log_file_name, &errmsg)) < 0 ||
 	  fake_rotate_event(net, packet, log_file_name, BIN_LOG_HEADER_SIZE,
-                            &errmsg))
+                            &errmsg, current_checksum_alg))
       {
 	my_errno= ER_MASTER_FATAL_ERROR_READING_BINLOG;
 	goto err;
@@ -1117,9 +1279,9 @@ err:
   idle, then this could last long, and if the slave reconnects, we could have 2
   Binlog_dump threads in SHOW PROCESSLIST, until a query is written to the
   binlog. To avoid this, when the slave reconnects and sends COM_BINLOG_DUMP,
-  the master kills any existing thread with the slave's server id (if this id is
-  not zero; it will be true for real slaves, but false for mysqlbinlog when it
-  sends COM_BINLOG_DUMP to get a remote binlog dump).
+  the master kills any existing thread with the slave's server id (if this id
+  is not zero; it will be true for real slaves, but false for mysqlbinlog when
+  it sends COM_BINLOG_DUMP to get a remote binlog dump).
 
   SYNOPSIS
     kill_zombie_dump_threads()
@@ -1151,7 +1313,7 @@ void kill_zombie_dump_threads(uint32 slave_server_id)
       it will be slow because it will iterate through the list
       again. We just to do kill the thread ourselves.
     */
-    tmp->awake(THD::KILL_QUERY);
+    tmp->awake(KILL_QUERY);
     pthread_mutex_unlock(&tmp->LOCK_thd_data);
   }
 }
@@ -1529,7 +1691,8 @@ bool mysql_show_binlog_events(THD* thd)
       This code will fail on a mixed relay log (one which has Format_desc then
       Rotate then Format_desc).
     */
-    ev = Log_event::read_log_event(&log,(pthread_mutex_t*)0,description_event);
+    ev = Log_event::read_log_event(&log,(pthread_mutex_t*)0,description_event,
+                                   opt_master_verify_checksum);
     if (ev)
     {
       if (ev->get_type_code() == FORMAT_DESCRIPTION_EVENT)
@@ -1551,8 +1714,12 @@ bool mysql_show_binlog_events(THD* thd)
 
     for (event_count = 0;
 	 (ev = Log_event::read_log_event(&log,(pthread_mutex_t*) 0,
-                                         description_event)); )
+                                         description_event,
+                                         opt_master_verify_checksum)); )
     {
+      if (ev->get_type_code() == FORMAT_DESCRIPTION_EVENT)
+        description_event->checksum_alg= ev->checksum_alg;
+
       if (event_count >= limit_start &&
 	  ev->net_send(protocol, linfo.log_file_name, pos))
       {
@@ -1812,6 +1979,11 @@ static sys_var_chain vars = { NULL, NULL };
 static sys_var_const    sys_log_slave_updates(&vars, "log_slave_updates",
                                               OPT_GLOBAL, SHOW_MY_BOOL,
                                               (uchar*) &opt_log_slave_updates);
+static sys_var_const
+sys_replicate_annotate_row_events(&vars,
+                                "replicate_annotate_row_events",
+                                OPT_GLOBAL, SHOW_MY_BOOL,
+                                (uchar*) &opt_replicate_annotate_row_events);
 static sys_var_const    sys_relay_log(&vars, "relay_log",
                                       OPT_GLOBAL, SHOW_CHAR_PTR,
                                       (uchar*) &opt_relay_logname);
@@ -1840,6 +2012,12 @@ static sys_var_long_ptr	sys_slave_trans_retries(&vars, "slave_transaction_retrie
 						&slave_trans_retries);
 static sys_var_sync_binlog_period sys_sync_binlog_period(&vars, "sync_binlog", &sync_binlog_period);
 static sys_var_slave_skip_counter sys_slave_skip_counter(&vars, "sql_slave_skip_counter");
+static sys_var_bool_ptr	sys_master_verify_checksum(&vars,
+                                                   "master_verify_checksum",
+                                                   &opt_master_verify_checksum);
+static sys_var_bool_ptr	sys_slave_sql_verify_checksum(&vars,
+                                                      "slave_sql_verify_checksum",
+                                                      &opt_slave_sql_verify_checksum);
 
 
 bool sys_var_slave_skip_counter::check(THD *thd, set_var *var)
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index 49f1c11c4d8..f05e8ad34b2 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -32,54 +32,53 @@
 #include "mysql_priv.h"
 #include "sql_select.h"
 #include "sql_cursor.h"
+#include "opt_subselect.h"
 
 #include <m_ctype.h>
 #include <my_bit.h>
 #include <hash.h>
 #include <ft_global.h>
-#if defined(WITH_ARIA_STORAGE_ENGINE) && defined(USE_MARIA_FOR_TMP_TABLES)
-#include "../storage/maria/ha_maria.h"
-#define TMP_ENGINE_HTON maria_hton
-#else
-#define TMP_ENGINE_HTON myisam_hton
-#endif
 
 const char *join_type_str[]={ "UNKNOWN","system","const","eq_ref","ref",
 			      "MAYBE_REF","ALL","range","index","fulltext",
 			      "ref_or_null","unique_subquery","index_subquery",
-                              "index_merge"
-};
+                              "index_merge", "hash_ALL", "hash_range",
+                              "hash_index", "hash_index_merge" };
+
+const char *copy_to_tmp_table= "Copying to tmp table";
 
 struct st_sargable_param;
 
 static void optimize_keyuse(JOIN *join, DYNAMIC_ARRAY *keyuse_array);
-static bool make_join_statistics(JOIN *join, TABLE_LIST *leaves, COND *conds,
-				 DYNAMIC_ARRAY *keyuse);
+static bool make_join_statistics(JOIN *join, List<TABLE_LIST> &leaves, 
+                                 COND *conds, DYNAMIC_ARRAY *keyuse);
 static bool update_ref_and_keys(THD *thd, DYNAMIC_ARRAY *keyuse,
                                 JOIN_TAB *join_tab,
                                 uint tables, COND *conds,
-                                COND_EQUAL *cond_equal,
                                 table_map table_map, SELECT_LEX *select_lex,
                                 st_sargable_param **sargables);
+static bool sort_and_filter_keyuse(THD *thd, DYNAMIC_ARRAY *keyuse,
+                                   bool skip_unprefixed_keyparts);
 static int sort_keyuse(KEYUSE *a,KEYUSE *b);
+static bool are_tables_local(JOIN_TAB *jtab, table_map used_tables);
 static bool create_ref_for_key(JOIN *join, JOIN_TAB *j, KEYUSE *org_keyuse,
-			       table_map used_tables);
-static bool choose_plan(JOIN *join,table_map join_tables);
-
-static void best_access_path(JOIN *join, JOIN_TAB *s, THD *thd,
-                             table_map remaining_tables, uint idx,
-                             double record_count, double read_time);
+			       bool allow_full_scan, table_map used_tables);
+void best_access_path(JOIN *join, JOIN_TAB *s, 
+                             table_map remaining_tables, uint idx, 
+                             bool disable_jbuf, double record_count,
+                             POSITION *pos, POSITION *loose_scan_pos);
 static void optimize_straight_join(JOIN *join, table_map join_tables);
 static bool greedy_search(JOIN *join, table_map remaining_tables,
-                             uint depth, uint prune_level);
+                          uint depth, uint prune_level);
 static bool best_extension_by_limited_search(JOIN *join,
                                              table_map remaining_tables,
                                              uint idx, double record_count,
                                              double read_time, uint depth,
                                              uint prune_level);
 static uint determine_search_depth(JOIN* join);
-static int join_tab_cmp(const void* ptr1, const void* ptr2);
-static int join_tab_cmp_straight(const void* ptr1, const void* ptr2);
+static int join_tab_cmp(const void *dummy, const void* ptr1, const void* ptr2);
+static int join_tab_cmp_straight(const void *dummy, const void* ptr1, const void* ptr2);
+static int join_tab_cmp_embedded_first(const void *emb, const void* ptr1, const void *ptr2);
 /*
   TODO: 'find_best' is here only temporarily until 'greedy_search' is
   tested and approved.
@@ -87,21 +86,24 @@ static int join_tab_cmp_straight(const void* ptr1, const void* ptr2);
 static bool find_best(JOIN *join,table_map rest_tables,uint index,
 		      double record_count,double read_time);
 static uint cache_record_length(JOIN *join,uint index);
-static double prev_record_reads(JOIN *join, uint idx, table_map found_ref);
-static bool get_best_combination(JOIN *join);
+bool get_best_combination(JOIN *join);
 static store_key *get_store_key(THD *thd,
 				KEYUSE *keyuse, table_map used_tables,
 				KEY_PART_INFO *key_part, uchar *key_buff,
 				uint maybe_null);
-static void make_outerjoin_info(JOIN *join);
+static bool make_outerjoin_info(JOIN *join);
+static Item*
+make_cond_after_sjm(Item *root_cond, Item *cond, table_map tables, table_map sjm_tables);
 static bool make_join_select(JOIN *join,SQL_SELECT *select,COND *item);
-static void make_join_readinfo(JOIN *join, ulonglong options);
+static void revise_cache_usage(JOIN_TAB *join_tab);
+static bool make_join_readinfo(JOIN *join, ulonglong options, uint no_jbuf_after);
 static bool only_eq_ref_tables(JOIN *join, ORDER *order, table_map tables);
 static void update_depend_map(JOIN *join);
-static void update_depend_map(JOIN *join, ORDER *order);
+static void update_depend_map_for_order(JOIN *join, ORDER *order);
 static ORDER *remove_const(JOIN *join,ORDER *first_order,COND *cond,
 			   bool change_list, bool *simple_order);
-static int return_zero_rows(JOIN *join, select_result *res,TABLE_LIST *tables,
+static int return_zero_rows(JOIN *join, select_result *res, 
+                            List<TABLE_LIST> &tables,
                             List<Item> &fields, bool send_row,
                             ulonglong select_options, const char *info,
                             Item *having, List<Item> &all_fields);
@@ -109,11 +111,12 @@ static COND *build_equal_items(THD *thd, COND *cond,
                                COND_EQUAL *inherited,
                                List<TABLE_LIST> *join_list,
                                COND_EQUAL **cond_equal_ref);
-static COND* substitute_for_best_equal_field(COND *cond,
+static COND* substitute_for_best_equal_field(JOIN_TAB *context_tab,
+                                             COND *cond,
                                              COND_EQUAL *cond_equal,
                                              void *table_join_idx);
 static COND *simplify_joins(JOIN *join, List<TABLE_LIST> *join_list,
-                            COND *conds, bool top);
+                            COND *conds, bool top, bool in_sj);
 static bool check_interleaving_with_nj(JOIN_TAB *next);
 static void restore_prev_nj_state(JOIN_TAB *last);
 static uint reset_nj_counters(JOIN *join, List<TABLE_LIST> *join_list);
@@ -122,16 +125,12 @@ static uint build_bitmap_for_nested_joins(List<TABLE_LIST> *join_list,
 
 static COND *optimize_cond(JOIN *join, COND *conds,
                            List<TABLE_LIST> *join_list,
-			   Item::cond_result *cond_value);
+			   Item::cond_result *cond_value, 
+                           COND_EQUAL **cond_equal);
 static bool const_expression_in_where(COND *conds,Item *item, Item **comp_item);
-static bool open_tmp_table(TABLE *table);
-static bool create_internal_tmp_table(TABLE *,TMP_TABLE_PARAM *, ulonglong);
-static bool create_internal_tmp_table_from_heap2(THD *thd, TABLE *table,
-                                                 TMP_TABLE_PARAM *param,
-                                                 int error,
-                                                 bool ignore_last_dupp,
-                                                 handlerton *hton,
-                                                 const char *proc_info);
+static bool create_internal_tmp_table_from_heap2(THD *, TABLE *,
+                                     ENGINE_COLUMNDEF *, ENGINE_COLUMNDEF **, 
+                                     int, bool, handlerton *, const char *);
 static int do_select(JOIN *join,List<Item> *fields,TABLE *tmp_table,
 		     Procedure *proc);
 
@@ -141,10 +140,8 @@ evaluate_join_record(JOIN *join, JOIN_TAB *join_tab,
 static enum_nested_loop_state
 evaluate_null_complemented_join_record(JOIN *join, JOIN_TAB *join_tab);
 static enum_nested_loop_state
-flush_cached_records(JOIN *join, JOIN_TAB *join_tab, bool skip_last);
-static enum_nested_loop_state
 end_send(JOIN *join, JOIN_TAB *join_tab, bool end_of_records);
-static enum_nested_loop_state
+enum_nested_loop_state
 end_send_group(JOIN *join, JOIN_TAB *join_tab, bool end_of_records);
 static enum_nested_loop_state
 end_write(JOIN *join, JOIN_TAB *join_tab, bool end_of_records);
@@ -152,7 +149,7 @@ static enum_nested_loop_state
 end_update(JOIN *join, JOIN_TAB *join_tab, bool end_of_records);
 static enum_nested_loop_state
 end_unique_update(JOIN *join, JOIN_TAB *join_tab, bool end_of_records);
-static enum_nested_loop_state
+enum_nested_loop_state
 end_write_group(JOIN *join, JOIN_TAB *join_tab, bool end_of_records);
 
 static int test_if_group_changed(List<Cached_item> &list);
@@ -167,7 +164,7 @@ static int join_no_more_records(READ_RECORD *info);
 static int join_read_next(READ_RECORD *info);
 static int join_init_quick_read_record(JOIN_TAB *tab);
 static int test_if_quick_select(JOIN_TAB *tab);
-static int join_init_read_record(JOIN_TAB *tab);
+static bool test_if_use_dynamic_range_scan(JOIN_TAB *join_tab);
 static int join_read_first(JOIN_TAB *tab);
 static int join_read_next(READ_RECORD *info);
 static int join_read_next_same(READ_RECORD *info);
@@ -178,13 +175,24 @@ static int join_ft_read_first(JOIN_TAB *tab);
 static int join_ft_read_next(READ_RECORD *info);
 int join_read_always_key_or_null(JOIN_TAB *tab);
 int join_read_next_same_or_null(READ_RECORD *info);
-static COND *make_cond_for_table(COND *cond,table_map table,
-				 table_map used_table);
+static COND *make_cond_for_table(THD *thd, Item *cond,table_map table,
+                                 table_map used_table,
+                                 int join_tab_idx_arg,
+                                 bool exclude_expensive_cond,
+                                 bool retain_ref_cond);
+static COND *make_cond_for_table_from_pred(THD *thd, Item *root_cond,
+                                           Item *cond,
+                                           table_map tables,
+                                           table_map used_table,
+                                           int join_tab_idx_arg,
+                                           bool exclude_expensive_cond,
+                                           bool retain_ref_cond);
+
 static Item* part_of_refkey(TABLE *form,Field *field);
 uint find_shortest_key(TABLE *table, const key_map *usable_keys);
 static bool test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,
 				    ha_rows select_limit, bool no_changes,
-                                    key_map *map);
+                                    const key_map *map);
 static bool list_contains_unique_index(TABLE *table,
                           bool (*find_func) (Field *, void *), void *data);
 static bool find_field_in_item_list (Field *field, void *data);
@@ -198,15 +206,8 @@ static int remove_dup_with_compare(THD *thd, TABLE *entry, Field **field,
 				   ulong offset,Item *having);
 static int remove_dup_with_hash_index(THD *thd,TABLE *table,
 				      uint field_count, Field **first_field,
-
 				      ulong key_length,Item *having);
-static int join_init_cache(THD *thd,JOIN_TAB *tables,uint table_count);
-static ulong used_blob_length(CACHE_FIELD **ptr);
-static bool store_record_in_cache(JOIN_CACHE *cache);
-static void reset_cache_read(JOIN_CACHE *cache);
-static void reset_cache_write(JOIN_CACHE *cache);
-static void read_cached_record(JOIN_TAB *tab);
-static bool cmp_buffer_with_ref(JOIN_TAB *tab);
+static bool cmp_buffer_with_ref(THD *thd, TABLE *table, TABLE_REF *tab_ref);
 static bool setup_new_fields(THD *thd, List<Item> &fields,
 			     List<Item> &all_fields, ORDER *new_order);
 static ORDER *create_distinct_group(THD *thd, Item **ref_pointer_array,
@@ -214,7 +215,7 @@ static ORDER *create_distinct_group(THD *thd, Item **ref_pointer_array,
                                     List<Item> &all_fields,
 				    bool *all_order_by_fields_used);
 static bool test_if_subpart(ORDER *a,ORDER *b);
-static TABLE *get_sort_by_table(ORDER *a,ORDER *b,TABLE_LIST *tables);
+static TABLE *get_sort_by_table(ORDER *a,ORDER *b,List<TABLE_LIST> &tables);
 static void calc_group_buffer(JOIN *join,ORDER *group);
 static bool make_group_fields(JOIN *main_join, JOIN *curr_join);
 static bool alloc_group_fields(JOIN *join,ORDER *group);
@@ -237,10 +238,14 @@ static bool init_sum_functions(Item_sum **func, Item_sum **end);
 static bool update_sum_func(Item_sum **func);
 static void select_describe(JOIN *join, bool need_tmp_table,bool need_order,
 			    bool distinct, const char *message=NullS);
-static Item *remove_additional_cond(Item* conds);
 static void add_group_and_distinct_keys(JOIN *join, JOIN_TAB *join_tab);
-static bool test_if_ref(Item_field *left_item,Item *right_item);
+static uint make_join_orderinfo(JOIN *join);
+static bool generate_derived_keys(DYNAMIC_ARRAY *keyuse_array);
 
+Item_equal *find_item_equal(COND_EQUAL *cond_equal, Field *field,
+                            bool *inherited_fl);
+JOIN_TAB *first_depth_first_tab(JOIN* join);
+JOIN_TAB *next_depth_first_tab(JOIN* join, JOIN_TAB* tab);
 
 /**
   This handles SELECT with and without UNION.
@@ -422,11 +427,78 @@ fix_inner_refs(THD *thd, List<Item> &all_fields, SELECT_LEX *select,
 }
 
 /**
+   The following clauses are redundant for subqueries:
+
+   DISTINCT
+   GROUP BY   if there are no aggregate functions and no HAVING
+              clause
+
+   Because redundant clauses are removed both from JOIN and
+   select_lex, the removal is permanent. Thus, it only makes sense to
+   call this function for normal queries and on first execution of
+   SP/PS
+
+   @param subq_select_lex   select_lex that is part of a subquery 
+                            predicate. This object and the associated 
+                            join is modified.
+*/
+
+static
+void remove_redundant_subquery_clauses(st_select_lex *subq_select_lex)
+{
+  Item_subselect *subq_predicate= subq_select_lex->master_unit()->item;
+  /*
+    The removal should happen for IN, ALL, ANY and EXISTS subqueries,
+    which means all but single row subqueries. Example single row
+    subqueries: 
+       a) SELECT * FROM t1 WHERE t1.a = (<single row subquery>) 
+       b) SELECT a, (<single row subquery) FROM t1
+   */
+  if (subq_predicate->substype() == Item_subselect::SINGLEROW_SUBS)
+    return;
+
+  /* A subquery that is not single row should be one of IN/ALL/ANY/EXISTS. */
+  DBUG_ASSERT (subq_predicate->substype() == Item_subselect::EXISTS_SUBS ||
+               subq_predicate->is_in_predicate());
+
+  if (subq_select_lex->options & SELECT_DISTINCT)
+  {
+    subq_select_lex->join->select_distinct= false;
+    subq_select_lex->options&= ~SELECT_DISTINCT;
+  }
+
+  /*
+    Remove GROUP BY if there are no aggregate functions and no HAVING
+    clause
+  */
+  if (subq_select_lex->group_list.elements &&
+      !subq_select_lex->with_sum_func && !subq_select_lex->join->having)
+  {
+    subq_select_lex->join->group_list= NULL;
+    subq_select_lex->group_list.empty();
+  }
+
+  /*
+    TODO: This would prevent processing quries with ORDER BY ... LIMIT
+    therefore we disable this optimization for now.
+    Remove GROUP BY if there are no aggregate functions and no HAVING
+    clause
+  if (subq_select_lex->group_list.elements &&
+      !subq_select_lex->with_sum_func && !subq_select_lex->join->having)
+  {
+    subq_select_lex->join->group_list= NULL;
+    subq_select_lex->group_list.empty();
+  }
+  */
+}
+
+
+/**
   Function to setup clauses without sum functions.
 */
 inline int setup_without_group(THD *thd, Item **ref_pointer_array,
 			       TABLE_LIST *tables,
-			       TABLE_LIST *leaves,
+			       List<TABLE_LIST> &leaves,
 			       List<Item> &fields,
 			       List<Item> &all_fields,
 			       COND **conds,
@@ -464,6 +536,7 @@ inline int setup_without_group(THD *thd, Item **ref_pointer_array,
   mysql_select assumes that all tables are already opened
 *****************************************************************************/
 
+
 /**
   Prepare of whole select (including sub queries in future).
 
@@ -502,29 +575,89 @@ JOIN::prepare(Item ***rref_pointer_array,
   join_list= &select_lex->top_join_list;
   union_part= unit_arg->is_union();
 
+  if (select_lex->handle_derived(thd->lex, DT_PREPARE))
+    DBUG_RETURN(1);
+
   thd->lex->current_select->is_item_list_lookup= 1;
   /*
     If we have already executed SELECT, then it have not sense to prevent
     its table from update (see unique_table())
+    Affects only materialized derived tables.
   */
-  if (thd->derived_tables_processing)
-    select_lex->exclude_from_table_unique_test= TRUE;
-
   /* Check that all tables, fields, conds and order are ok */
-
   if (!(select_options & OPTION_SETUP_TABLES_DONE) &&
       setup_tables_and_check_access(thd, &select_lex->context, join_list,
-                                    tables_list, &select_lex->leaf_tables,
-                                    FALSE, SELECT_ACL, SELECT_ACL))
+                                    tables_list, select_lex->leaf_tables,
+                                    FALSE, SELECT_ACL, SELECT_ACL, FALSE))
       DBUG_RETURN(-1);
+
+  /*
+    Permanently remove redundant parts from the query if
+      1) This is a subquery
+      2) This is the first time this query is optimized (since the
+         transformation is permanent
+      3) Not normalizing a view. Removal should take place when a
+         query involving a view is optimized, not when the view
+         is created
+  */
+  if (select_lex->master_unit()->item &&                               // 1)
+      select_lex->first_cond_optimization &&                           // 2)
+      !(thd->lex->context_analysis_only & CONTEXT_ANALYSIS_ONLY_VIEW)) // 3)
+  {
+    remove_redundant_subquery_clauses(select_lex);
+  }
+  
+  /*
+    TRUE if the SELECT list mixes elements with and without grouping,
+    and there is no GROUP BY clause. Mixing non-aggregated fields with
+    aggregate functions in the SELECT list is a MySQL exptenstion that
+    is allowed only if the ONLY_FULL_GROUP_BY sql mode is not set.
+  */
+  bool mixed_implicit_grouping= false;
+  if ((~thd->variables.sql_mode & MODE_ONLY_FULL_GROUP_BY) &&
+      select_lex->with_sum_func && !group_list)
+  {
+    List_iterator_fast <Item> select_it(fields_list);
+    Item *select_el; /* Element of the SELECT clause, can be an expression. */
+    bool found_field_elem= false;
+    bool found_sum_func_elem= false;
+
+    while ((select_el= select_it++))
+    {
+      if (select_el->with_sum_func)
+        found_sum_func_elem= true;
+      if (select_el->with_field)
+        found_field_elem= true;
+      if (found_sum_func_elem && found_field_elem)
+      {
+        mixed_implicit_grouping= true;
+        break;
+      }
+    }
+  }
+
+  table_count= select_lex->leaf_tables.elements;
  
-  TABLE_LIST *table_ptr;
-  for (table_ptr= select_lex->leaf_tables;
-       table_ptr;
-       table_ptr= table_ptr->next_leaf)
-    tables++;
+  TABLE_LIST *tbl;
+  List_iterator_fast<TABLE_LIST> li(select_lex->leaf_tables);
+  while ((tbl= li++))
+  {
+    //table_count++; /* Count the number of tables in the join. */
+    /*
+      If the query uses implicit grouping where the select list contains both
+      aggregate functions and non-aggregate fields, any non-aggregated field
+      may produce a NULL value. Set all fields of each table as nullable before
+      semantic analysis to take into account this change of nullability.
+
+      Note: this loop doesn't touch tables inside merged semi-joins, because
+      subquery-to-semijoin conversion has not been done yet. This is intended.
+    */
+    if (mixed_implicit_grouping)
+      tbl->table->maybe_null= 1;
+  }
 
-  if (setup_wild(thd, tables_list, fields_list, &all_fields, wild_num) ||
+  if ((wild_num && setup_wild(thd, tables_list, fields_list, &all_fields,
+                              wild_num)) ||
       select_lex->setup_ref_array(thd, og_num) ||
       setup_fields(thd, (*rref_pointer_array), fields_list, MARK_COLUMNS_READ,
 		   &all_fields, 1) ||
@@ -542,6 +675,13 @@ JOIN::prepare(Item ***rref_pointer_array,
     thd->where="having clause";
     thd->lex->allow_sum_func|= 1 << select_lex_arg->nest_level;
     select_lex->having_fix_field= 1;
+    /*
+      Wrap alone field in HAVING clause in case it will be outer field of subquery
+      which need persistent pointer on it, but having could be changed by optimizer
+    */
+    if (having->type() == Item::REF_ITEM &&
+        ((Item_ref *)having)->ref_type() == Item_ref::REF)
+      wrap_ident(thd, &having);
     bool having_fix_rc= (!having->fixed &&
 			 (having->fix_fields(thd, &having) ||
 			  having->check_cols(1)));
@@ -550,25 +690,13 @@ JOIN::prepare(Item ***rref_pointer_array,
       DBUG_RETURN(-1);				/* purecov: inspected */
     thd->lex->allow_sum_func= save_allow_sum_func;
   }
-
-  if (!(thd->lex->context_analysis_only & CONTEXT_ANALYSIS_ONLY_VIEW) &&
-      !(select_options & SELECT_DESCRIBE))
-  {
-    Item_subselect *subselect;
-    /* Is it subselect? */
-    if ((subselect= select_lex->master_unit()->item))
-    {
-      Item_subselect::trans_res res;
-      if ((res= subselect->select_transformer(this)) !=
-	  Item_subselect::RES_OK)
-      {
-        select_lex->fix_prepare_information(thd, &conds, &having);
-	DBUG_RETURN((res == Item_subselect::RES_ERROR));
-      }
-    }
-  }
+  
+  int res= check_and_do_in_subquery_rewrites(this);
 
   select_lex->fix_prepare_information(thd, &conds, &having);
+  
+  if (res)
+    DBUG_RETURN(res);
 
   if (order)
   {
@@ -643,15 +771,15 @@ JOIN::prepare(Item ***rref_pointer_array,
     }
   }
 
-  if (setup_ftfuncs(select_lex)) /* should be after having->fix_fields */
-    DBUG_RETURN(-1);
-  
-
   /*
     Check if there are references to un-aggregated columns when computing 
     aggregate functions with implicit grouping (there is no GROUP BY).
   */
   if (thd->variables.sql_mode & MODE_ONLY_FULL_GROUP_BY && !group_list &&
+      !(select_lex->master_unit()->item &&
+        select_lex->master_unit()->item->is_in_predicate() &&
+        ((Item_in_subselect*)select_lex->master_unit()->item)->
+        test_set_strategy(SUBS_MAXMIN_INJECTED)) &&
       select_lex->non_agg_field_used() &&
       select_lex->agg_func_used())
   {
@@ -706,11 +834,36 @@ JOIN::prepare(Item ***rref_pointer_array,
   if (!procedure && result && result->prepare(fields_list, unit_arg))
     goto err;					/* purecov: inspected */
 
+  unit= unit_arg;
+  if (prepare_stage2())
+    goto err;
+
+  DBUG_RETURN(0); // All OK
+
+err:
+  delete procedure;                /* purecov: inspected */
+  procedure= 0;
+  DBUG_RETURN(-1);                /* purecov: inspected */
+}
+
+
+/**
+  Second phase of prepare where we collect some statistic.
+
+  @details
+  We made this part separate to be able recalculate some statistic after
+  transforming subquery on optimization phase.
+*/
+
+bool JOIN::prepare_stage2()
+{
+  bool res= TRUE;
+  DBUG_ENTER("JOIN::prepare_stage2");
+
   /* Init join struct */
   count_field_types(select_lex, &tmp_table_param, all_fields, 0);
   ref_pointer_array_size= all_fields.elements*sizeof(Item*);
   this->group= group_list != 0;
-  unit= unit_arg;
 
   if (tmp_table_param.sum_func_count && !group_list)
     implicit_grouping= TRUE;
@@ -727,94 +880,9 @@ JOIN::prepare(Item ***rref_pointer_array,
   if (alloc_func_list())
     goto err;
 
-  DBUG_RETURN(0); // All OK
-
+  res= FALSE;
 err:
-  delete procedure;				/* purecov: inspected */
-  procedure= 0;
-  DBUG_RETURN(-1);				/* purecov: inspected */
-}
-
-
-/*
-  Remove the predicates pushed down into the subquery
-
-  SYNOPSIS
-    JOIN::remove_subq_pushed_predicates()
-      where   IN  Must be NULL
-              OUT The remaining WHERE condition, or NULL
-
-  DESCRIPTION
-    Given that this join will be executed using (unique|index)_subquery,
-    without "checking NULL", remove the predicates that were pushed down
-    into the subquery.
-
-    If the subquery compares scalar values, we can remove the condition that
-    was wrapped into trig_cond (it will be checked when needed by the subquery
-    engine)
-
-    If the subquery compares row values, we need to keep the wrapped
-    equalities in the WHERE clause: when the left (outer) tuple has both NULL
-    and non-NULL values, we'll do a full table scan and will rely on the
-    equalities corresponding to non-NULL parts of left tuple to filter out
-    non-matching records.
-
-    TODO: We can remove the equalities that will be guaranteed to be true by the
-    fact that subquery engine will be using index lookup. This must be done only
-    for cases where there are no conversion errors of significance, e.g. 257
-    that is searched in a byte. But this requires homogenization of the return 
-    codes of all Field*::store() methods.
-*/
-
-void JOIN::remove_subq_pushed_predicates(Item **where)
-{
-  if (conds->type() == Item::FUNC_ITEM &&
-      ((Item_func *)this->conds)->functype() == Item_func::EQ_FUNC &&
-      ((Item_func *)conds)->arguments()[0]->type() == Item::REF_ITEM &&
-      ((Item_func *)conds)->arguments()[1]->type() == Item::FIELD_ITEM &&
-      test_if_ref ((Item_field *)((Item_func *)conds)->arguments()[1],
-                   ((Item_func *)conds)->arguments()[0]))
-  {
-    *where= 0;
-    return;
-  }
-}
-
-
-/*
-  Index lookup-based subquery: save some flags for EXPLAIN output
-
-  SYNOPSIS
-    save_index_subquery_explain_info()
-      join_tab  Subquery's join tab (there is only one as index lookup is
-                only used for subqueries that are single-table SELECTs)
-      where     Subquery's WHERE clause
-
-  DESCRIPTION
-    For index lookup-based subquery (i.e. one executed with
-    subselect_uniquesubquery_engine or subselect_indexsubquery_engine),
-    check its EXPLAIN output row should contain 
-      "Using index" (TAB_INFO_FULL_SCAN_ON_NULL) 
-      "Using Where" (TAB_INFO_USING_WHERE)
-      "Full scan on NULL key" (TAB_INFO_FULL_SCAN_ON_NULL)
-    and set appropriate flags in join_tab->packed_info.
-*/
-
-static void save_index_subquery_explain_info(JOIN_TAB *join_tab, Item* where)
-{
-  join_tab->packed_info= TAB_INFO_HAVE_VALUE;
-  if (join_tab->table->covering_keys.is_set(join_tab->ref.key))
-    join_tab->packed_info |= TAB_INFO_USING_INDEX;
-  if (where)
-    join_tab->packed_info |= TAB_INFO_USING_WHERE;
-  for (uint i = 0; i < join_tab->ref.key_parts; i++)
-  {
-    if (join_tab->ref.cond_guards[i])
-    {
-      join_tab->packed_info |= TAB_INFO_FULL_SCAN_ON_NULL;
-      break;
-    }
-  }
+  DBUG_RETURN(res);				/* purecov: inspected */
 }
 
 
@@ -833,23 +901,59 @@ static void save_index_subquery_explain_info(JOIN_TAB *join_tab, Item* where)
 int
 JOIN::optimize()
 {
+  ulonglong select_opts_for_readinfo;
+  uint no_jbuf_after;
+
   DBUG_ENTER("JOIN::optimize");
+  do_send_rows = (unit->select_limit_cnt) ? 1 : 0;
   // to prevent double initialization on EXPLAIN
   if (optimized)
     DBUG_RETURN(0);
   optimized= 1;
-
   thd_proc_info(thd, "optimizing");
+
+  set_allowed_join_cache_types();
+
+  /* Run optimize phase for all derived tables/views used in this SELECT. */
+  if (select_lex->handle_derived(thd->lex, DT_OPTIMIZE))
+    DBUG_RETURN(1);
+
+  if (select_lex->first_cond_optimization)
+  {
+    //Do it only for the first execution
+    /* Merge all mergeable derived tables/views in this SELECT. */
+    if (select_lex->handle_derived(thd->lex, DT_MERGE))
+      DBUG_RETURN(TRUE);  
+    table_count= select_lex->leaf_tables.elements;
+    select_lex->update_used_tables();
+  }
+
+  if (transform_max_min_subquery())
+    DBUG_RETURN(1); /* purecov: inspected */
+
+  if (select_lex->first_cond_optimization)
+  {
+    /* dump_TABLE_LIST_graph(select_lex, select_lex->leaf_tables); */
+    if (convert_join_subqueries_to_semijoins(this))
+      DBUG_RETURN(1); /* purecov: inspected */
+    /* dump_TABLE_LIST_graph(select_lex, select_lex->leaf_tables); */
+    select_lex->update_used_tables();
+
+  }
+  
+  eval_select_list_used_tables();
+  
+  table_count= select_lex->leaf_tables.elements;
+
+  if (setup_ftfuncs(select_lex)) /* should be after having->fix_fields */
+    DBUG_RETURN(-1);
+
   row_limit= ((select_distinct || order || group_list) ? HA_POS_ERROR :
 	      unit->select_limit_cnt);
   /* select_limit is used to decide if we are likely to scan the whole table */
   select_limit= unit->select_limit_cnt;
   if (having || (select_options & OPTION_FOUND_ROWS))
     select_limit= HA_POS_ERROR;
-  do_send_rows = (unit->select_limit_cnt) ? 1 : 0;
-  // Ignore errors of execution if option IGNORE present
-  if (thd->lex->ignore)
-    thd->lex->current_select->no_error= 1;
 #ifdef HAVE_REF_TO_FIELDS			// Not done yet
   /* Add HAVING to WHERE if possible */
   if (having && !group_list && !sum_func_count)
@@ -872,7 +976,8 @@ JOIN::optimize()
     }
   }
 #endif
-  SELECT_LEX *sel= thd->lex->current_select;
+
+  SELECT_LEX *sel= select_lex;
   if (sel->first_cond_optimization)
   {
     /*
@@ -889,16 +994,24 @@ JOIN::optimize()
     sel->first_cond_optimization= 0;
 
     /* Convert all outer joins to inner joins if possible */
-    conds= simplify_joins(this, join_list, conds, TRUE);
+    conds= simplify_joins(this, join_list, conds, TRUE, FALSE);
+    if (select_lex->save_leaf_tables(thd))
+      DBUG_RETURN(1);
     build_bitmap_for_nested_joins(join_list, 0);
 
     sel->prep_where= conds ? conds->copy_andor_structure(thd) : 0;
 
+    sel->where= conds;
+
     if (arena)
       thd->restore_active_arena(arena, &backup);
   }
+  
+  if (setup_jtbm_semi_joins(this, join_list, &conds))
+    DBUG_RETURN(1);
 
-  conds= optimize_cond(this, conds, join_list, &cond_value);   
+  conds= optimize_cond(this, conds, join_list, &cond_value, &cond_equal);
+     
   if (thd->is_error())
   {
     error= 1;
@@ -907,7 +1020,7 @@ JOIN::optimize()
   }
 
   {
-    having= optimize_cond(this, having, join_list, &having_value);
+    having= optimize_cond(this, having, join_list, &having_value, &having_equal);
     if (thd->is_error())
     {
       error= 1;
@@ -915,10 +1028,17 @@ JOIN::optimize()
       DBUG_RETURN(1);
     }
     if (select_lex->where)
+    {
       select_lex->cond_value= cond_value;
+      if (sel->where != conds && cond_value == Item::COND_OK)
+        thd->change_item_tree(&sel->where, conds);
+    }  
     if (select_lex->having)
+    {
       select_lex->having_value= having_value;
-
+      if (sel->having != having && having_value == Item::COND_OK)
+        thd->change_item_tree(&sel->having, having);    
+    }
     if (cond_value == Item::COND_FALSE || having_value == Item::COND_FALSE || 
         (!unit->select_limit_cnt && !(select_options & OPTION_FOUND_ROWS)))
     {						/* Impossible cond */
@@ -926,16 +1046,17 @@ JOIN::optimize()
                             "Impossible HAVING" : "Impossible WHERE"));
       zero_result_cause=  having_value == Item::COND_FALSE ?
                            "Impossible HAVING" : "Impossible WHERE";
-      tables= 0;
+      table_count= top_join_tab_count= 0;
       error= 0;
-      DBUG_RETURN(0);
+      goto setup_subq_exit;
     }
   }
 
 #ifdef WITH_PARTITION_STORAGE_ENGINE
   {
     TABLE_LIST *tbl;
-    for (tbl= select_lex->leaf_tables; tbl; tbl= tbl->next_leaf)
+    List_iterator_fast<TABLE_LIST> li(select_lex->leaf_tables);
+    while ((tbl= li++))
     {
       /* 
         If tbl->embedding!=NULL that means that this table is in the inner
@@ -972,13 +1093,14 @@ JOIN::optimize()
     */
     if ((res=opt_sum_query(thd, select_lex->leaf_tables, all_fields, conds)))
     {
+      DBUG_ASSERT(res >= 0);
       if (res == HA_ERR_KEY_NOT_FOUND)
       {
         DBUG_PRINT("info",("No matching min/max row"));
 	zero_result_cause= "No matching min/max row";
-        tables= 0;
+        table_count= top_join_tab_count= 0;
 	error=0;
-	DBUG_RETURN(0);
+        goto setup_subq_exit;
       }
       if (res > 1)
       {
@@ -986,18 +1108,11 @@ JOIN::optimize()
         DBUG_PRINT("error",("Error from opt_sum_query"));
         DBUG_RETURN(1);
       }
-      if (res < 0)
-      {
-        DBUG_PRINT("info",("No matching min/max row"));
-        zero_result_cause= "No matching min/max row";
-        tables= 0;
-        error=0;
-        DBUG_RETURN(0);
-      }
+
       DBUG_PRINT("info",("Select tables optimized away"));
       zero_result_cause= "Select tables optimized away";
       tables_list= 0;				// All tables resolved
-      const_tables= tables;
+      const_tables= top_join_tab_count= table_count;
       /*
         Extract all table-independent conditions and replace the WHERE
         clause with them. All other conditions were computed by opt_sum_query
@@ -1011,7 +1126,8 @@ JOIN::optimize()
       if (conds && !(thd->lex->describe & DESCRIBE_EXTENDED))
       {
         COND *table_independent_conds=
-          make_cond_for_table(conds, PSEUDO_TABLE_BITS, 0);
+          make_cond_for_table(thd, conds, PSEUDO_TABLE_BITS, 0, -1,
+                              FALSE, FALSE);
         DBUG_EXECUTE("where",
                      print_where(table_independent_conds,
                                  "where after opt_sum_query()",
@@ -1024,7 +1140,7 @@ JOIN::optimize()
   {
     DBUG_PRINT("info",("No tables"));
     error= 0;
-    DBUG_RETURN(0);
+    goto setup_subq_exit;
   }
   error= -1;					// Error is sent to client
   sort_by_table= get_sort_by_table(order, group_list, select_lex->leaf_tables);
@@ -1038,6 +1154,9 @@ JOIN::optimize()
     DBUG_RETURN(1);
   }
 
+  if (optimizer_flag(thd, OPTIMIZER_SWITCH_DERIVED_WITH_KEYS))
+    drop_unused_derived_keys();
+
   if (rollup.state != ROLLUP::STATE_NONE)
   {
     if (rollup_process_const_fields())
@@ -1049,7 +1168,7 @@ JOIN::optimize()
   else
   {
     /* Remove distinct if only const tables */
-    select_distinct= select_distinct && (const_tables != tables);
+    select_distinct= select_distinct && (const_tables != table_count);
   }
 
   thd_proc_info(thd, "preparing");
@@ -1067,7 +1186,7 @@ JOIN::optimize()
     zero_result_cause= "no matching row in const table";
     DBUG_PRINT("error",("Error: %s", zero_result_cause));
     error= 0;
-    DBUG_RETURN(0);
+    goto setup_subq_exit;
   }
   if (!(thd->options & OPTION_BIG_SELECTS) &&
       best_read > (double) thd->variables.max_join_size &&
@@ -1095,7 +1214,10 @@ JOIN::optimize()
   }
   
   reset_nj_counters(this, join_list);
-  make_outerjoin_info(this);
+  if (make_outerjoin_info(this))
+  {
+    DBUG_RETURN(1);
+  }
 
   /*
     Among the equal fields belonging to the same multiple equality
@@ -1105,7 +1227,8 @@ JOIN::optimize()
   */
   if (conds)
   {
-    conds= substitute_for_best_equal_field(conds, cond_equal, map2table);
+    conds= substitute_for_best_equal_field(NO_PARTICULAR_TAB, conds, 
+                                           cond_equal, map2table);
     conds->update_used_tables();
     DBUG_EXECUTE("where",
                  print_where(conds,
@@ -1114,20 +1237,66 @@ JOIN::optimize()
   }
 
   /*
-    Permorm the the optimization on fields evaluation mentioned above
+    Perform the optimization on fields evaluation mentioned above
     for all on expressions.
-  */ 
-  for (JOIN_TAB *tab= join_tab + const_tables; tab < join_tab + tables ; tab++)
+  */
+  for (JOIN_TAB *tab= first_linear_tab(this, WITHOUT_CONST_TABLES); tab;
+       tab= next_linear_tab(this, tab, WITH_BUSH_ROOTS))
   {
     if (*tab->on_expr_ref)
     {
-      *tab->on_expr_ref= substitute_for_best_equal_field(*tab->on_expr_ref,
+      *tab->on_expr_ref= substitute_for_best_equal_field(NO_PARTICULAR_TAB,
+                                                         *tab->on_expr_ref,
                                                          tab->cond_equal,
                                                          map2table);
       (*tab->on_expr_ref)->update_used_tables();
     }
   }
 
+  /*
+    Perform the optimization on fields evaliation mentioned above
+    for all used ref items.
+  */
+  for (JOIN_TAB *tab= first_linear_tab(this, WITHOUT_CONST_TABLES); tab;
+       tab= next_linear_tab(this, tab, WITH_BUSH_ROOTS))
+  {
+    uint key_copy_index=0;
+    for (uint i=0; i < tab->ref.key_parts; i++)
+    {
+      Item **ref_item_ptr= tab->ref.items+i;
+      Item *ref_item= *ref_item_ptr;
+      if (!ref_item->used_tables() && !(select_options & SELECT_DESCRIBE))
+        continue;
+      COND_EQUAL *equals= cond_equal;
+      JOIN_TAB *first_inner= tab->first_inner;
+      while (equals)
+      {
+        ref_item= substitute_for_best_equal_field(tab, ref_item,
+                                                  equals, map2table);
+        if (first_inner)
+	{
+          equals= first_inner->cond_equal;
+          first_inner= first_inner->first_upper;
+        }
+        else
+          equals= 0;
+      }  
+      ref_item->update_used_tables();
+      if (*ref_item_ptr != ref_item)
+      {
+        *ref_item_ptr= ref_item;
+        Item *item= ref_item->real_item();
+        store_key *key_copy= tab->ref.key_copy[key_copy_index];
+        if (key_copy->type() == store_key::FIELD_STORE_KEY)
+        {
+          store_key_field *field_copy= ((store_key_field *)key_copy);
+          field_copy->change_source_field((Item_field *) item);
+        }
+      }
+      key_copy_index++;
+    }
+  }
+
   if (conds && const_table_map != found_const_table_map &&
       (select_options & SELECT_DESCRIBE))
   {
@@ -1138,7 +1307,8 @@ JOIN::optimize()
   {
     zero_result_cause=
       "Impossible WHERE noticed after reading const tables";
-    DBUG_RETURN(0);				// error == 0
+    select_lex->mark_const_derived(zero_result_cause);
+    goto setup_subq_exit;
   }
 
   error= -1;					/* if goto err */
@@ -1173,7 +1343,7 @@ JOIN::optimize()
 
      The FROM clause must contain a single non-constant table.
   */
-  if (tables - const_tables == 1 && (group_list || select_distinct) &&
+  if (table_count - const_tables == 1 && (group_list || select_distinct) &&
       !tmp_table_param.sum_func_count &&
       (!join_tab[const_tables].select ||
        !join_tab[const_tables].select->quick ||
@@ -1224,7 +1394,7 @@ JOIN::optimize()
     if (! hidden_group_fields && rollup.state == ROLLUP::STATE_NONE)
       select_distinct=0;
   }
-  else if (select_distinct && tables - const_tables == 1 &&
+  else if (select_distinct && table_count - const_tables == 1 &&
            rollup.state == ROLLUP::STATE_NONE)
   {
     /*
@@ -1329,7 +1499,7 @@ JOIN::optimize()
     order=0;
 
   // Can't use sort on head table if using join buffering
-  if (full_join)
+  if (full_join || hash_join)
   {
     TABLE *stable= (sort_by_table == (TABLE *) 1 ? 
       join_tab[const_tables].table : sort_by_table);
@@ -1346,93 +1516,39 @@ JOIN::optimize()
     }
   }
 
+  need_tmp= test_if_need_tmp_table();
+
   /*
-    Check if we need to create a temporary table.
-    This has to be done if all tables are not already read (const tables)
-    and one of the following conditions holds:
-    - We are using DISTINCT (simple distinct's are already optimized away)
-    - We are using an ORDER BY or GROUP BY on fields not in the first table
-    - We are using different ORDER BY and GROUP BY orders
-    - The user wants us to buffer the result.
-    When the WITH ROLLUP modifier is present, we cannot skip temporary table
-    creation for the DISTINCT clause just because there are only const tables.
+    If the hint FORCE INDEX FOR ORDER BY/GROUP BY is used for the table
+    whose columns are required to be returned in a sorted order, then
+    the proper value for no_jbuf_after should be yielded by a call to
+    the make_join_orderinfo function.
+    Yet the current implementation of FORCE INDEX hints does not
+    allow us to do it in a clean manner.
   */
-  need_tmp= ((const_tables != tables &&
-	     ((select_distinct || !simple_order || !simple_group) ||
-	      (group_list && order) ||
-	      test(select_options & OPTION_BUFFER_RESULT))) ||
-             (rollup.state != ROLLUP::STATE_NONE && select_distinct));
-
-  // No cache for MATCH
-  make_join_readinfo(this,
-		     (select_options & (SELECT_DESCRIBE |
-					SELECT_NO_JOIN_CACHE)) |
-		     (select_lex->ftfunc_list->elements ?
-		      SELECT_NO_JOIN_CACHE : 0));
+  no_jbuf_after= 1 ? table_count : make_join_orderinfo(this);
+
+  select_opts_for_readinfo=
+    (select_options & (SELECT_DESCRIBE | SELECT_NO_JOIN_CACHE)) |
+    (select_lex->ftfunc_list->elements ?  SELECT_NO_JOIN_CACHE : 0);
+
+  // No cache for MATCH == 'Don't use join buffering when we use MATCH'.
+  if (make_join_readinfo(this, select_opts_for_readinfo, no_jbuf_after))
+    DBUG_RETURN(1);
 
   /* Perform FULLTEXT search before all regular searches */
   if (!(select_options & SELECT_DESCRIBE))
     init_ftfuncs(thd, select_lex, test(order));
 
-  /*
-    is this simple IN subquery?
-  */
-  if (!group_list && !order &&
-      unit->item && unit->item->substype() == Item_subselect::IN_SUBS &&
-      tables == 1 && conds &&
-      !unit->is_union())
-  {
-    if (!having)
-    {
-      Item *where= conds;
-      if (join_tab[0].type == JT_EQ_REF &&
-	  join_tab[0].ref.items[0]->name == in_left_expr_name)
-      {
-        remove_subq_pushed_predicates(&where);
-        save_index_subquery_explain_info(join_tab, where);
-        join_tab[0].type= JT_UNIQUE_SUBQUERY;
-        error= 0;
-        DBUG_RETURN(unit->item->
-                    change_engine(new
-                                  subselect_uniquesubquery_engine(thd,
-                                                                  join_tab,
-                                                                  unit->item,
-                                                                  where)));
-      }
-      else if (join_tab[0].type == JT_REF &&
-	       join_tab[0].ref.items[0]->name == in_left_expr_name)
-      {
-	remove_subq_pushed_predicates(&where);
-        save_index_subquery_explain_info(join_tab, where);
-        join_tab[0].type= JT_INDEX_SUBQUERY;
-        error= 0;
-        DBUG_RETURN(unit->item->
-                    change_engine(new
-                                  subselect_indexsubquery_engine(thd,
-                                                                 join_tab,
-                                                                 unit->item,
-                                                                 where,
-                                                                 NULL,
-                                                                 0)));
-      }
-    } else if (join_tab[0].type == JT_REF_OR_NULL &&
-	       join_tab[0].ref.items[0]->name == in_left_expr_name &&
-               having->name == in_having_cond)
-    {
-      join_tab[0].type= JT_INDEX_SUBQUERY;
-      error= 0;
-      conds= remove_additional_cond(conds);
-      save_index_subquery_explain_info(join_tab, conds);
-      DBUG_RETURN(unit->item->
-		  change_engine(new subselect_indexsubquery_engine(thd,
-								   join_tab,
-								   unit->item,
-								   conds,
-                                                                   having,
-								   1)));
-    }
+  if (optimize_unflattened_subqueries())
+    DBUG_RETURN(1);
+  
+  int res;
+  if ((res= rewrite_to_index_subquery_engine(this)) != -1)
+    DBUG_RETURN(res);
+  if (setup_subquery_caches())
+    DBUG_RETURN(-1);
 
-  }
   /*
     Need to tell handlers that to play it safe, it should fetch all
     columns of the primary key of the tables: this is because MySQL may
@@ -1441,13 +1557,16 @@ JOIN::optimize()
   */
   if (need_tmp || select_distinct || group_list || order)
   {
-    for (uint i = const_tables; i < tables; i++)
-      join_tab[i].table->prepare_for_position();
+    for (uint i= 0; i < table_count; i++)
+    {
+      if (!(table[i]->map & const_table_map))
+        table[i]->prepare_for_position();
+    }
   }
 
   DBUG_EXECUTE("info",TEST_join(this););
 
-  if (const_tables != tables)
+  if (const_tables != table_count)
   {
     /*
       Because filesort always does a full table scan or a quick range scan
@@ -1496,7 +1615,7 @@ JOIN::optimize()
       for (ORDER *tmp_order= order; tmp_order ; tmp_order=tmp_order->next)
       {
         Item *item= *tmp_order->item;
-        if (item->walk(&Item::is_expensive_processor, 0, (uchar*)0))
+        if (item->is_expensive())
         {
           /* Force tmp table without sort */
           need_tmp=1; simple_order=simple_group=0;
@@ -1510,7 +1629,7 @@ JOIN::optimize()
   if (select_options & SELECT_DESCRIBE)
   {
     error= 0;
-    DBUG_RETURN(0);
+    goto derived_exit;
   }
   having= 0;
 
@@ -1528,6 +1647,41 @@ JOIN::optimize()
   if (join_tab->is_using_loose_index_scan())
     tmp_table_param.precomputed_group_by= TRUE;
 
+  error= 0;
+
+  DBUG_RETURN(0);
+
+setup_subq_exit:
+  /* Choose an execution strategy for this JOIN. */
+  if (!tables_list || !table_count)
+    choose_tableless_subquery_plan();
+  /*
+    Even with zero matching rows, subqueries in the HAVING clause may
+    need to be evaluated if there are aggregate functions in the query.
+  */
+  if (optimize_unflattened_subqueries())
+    DBUG_RETURN(1);
+  error= 0;
+
+derived_exit:
+  select_lex->mark_const_derived(zero_result_cause);
+  DBUG_RETURN(0);
+}
+
+
+/**
+  Create and initialize objects neeed for the execution of a query plan.
+  Evaluate constant expressions not evaluated during optimization.
+*/
+
+int JOIN::init_execution()
+{
+  DBUG_ENTER("JOIN::init_execution");
+
+  DBUG_ASSERT(optimized);
+  DBUG_ASSERT(!(select_options & SELECT_DESCRIBE));
+  initialized= true;
+
   /* Create a tmp table if distinct or if the sort is too complicated */
   if (need_tmp)
   {
@@ -1560,7 +1714,7 @@ JOIN::optimize()
 			   select_options,
                            tmp_rows_limit,
 			   (char *) "")))
-		{
+    {
       DBUG_RETURN(1);
     }
 
@@ -1621,8 +1775,8 @@ JOIN::optimize()
 
     if (exec_tmp_table1->distinct)
     {
-      table_map used_tables= thd->lex->used_tables;
-      JOIN_TAB *last_join_tab= join_tab+tables-1;
+      table_map used_tables= select_list_used_tables;
+      JOIN_TAB *last_join_tab= join_tab + top_join_tab_count - 1;
       do
       {
 	if (used_tables & last_join_tab->table->map)
@@ -1646,12 +1800,101 @@ JOIN::optimize()
       DBUG_RETURN(-1);                         /* purecov: inspected */
   }
 
-  error= 0;
   DBUG_RETURN(0);
 }
 
 
 /**
+  Setup expression caches for subqueries that need them
+
+  @details
+  The function wraps correlated subquery expressions that return one value
+  into objects of the class Item_cache_wrapper setting up an expression
+  cache for each of them. The result values of the subqueries are to be
+  cached together with the corresponding sets of the parameters - outer
+  references of the subqueries.
+
+  @retval FALSE OK
+  @retval TRUE  Error
+*/
+
+bool JOIN::setup_subquery_caches()
+{
+  DBUG_ENTER("JOIN::setup_subquery_caches");
+
+  /*
+    We have to check all this condition together because items created in
+    one of this clauses can be moved to another one by optimizer
+  */
+  if (select_lex->expr_cache_may_be_used[IN_WHERE] ||
+      select_lex->expr_cache_may_be_used[IN_HAVING] ||
+      select_lex->expr_cache_may_be_used[IN_ON] ||
+      select_lex->expr_cache_may_be_used[NO_MATTER])
+  {
+    if (conds)
+      conds= conds->transform(&Item::expr_cache_insert_transformer,
+                              (uchar*) thd);
+    for (JOIN_TAB *tab= first_linear_tab(this, WITHOUT_CONST_TABLES); 
+         tab; tab= next_linear_tab(this, tab, WITH_BUSH_ROOTS))
+    {
+      if (tab->select_cond)
+        tab->select_cond=
+          tab->select_cond->transform(&Item::expr_cache_insert_transformer,
+                                      (uchar*) thd);
+      if (tab->cache_select && tab->cache_select->cond)
+        tab->cache_select->cond=
+          tab->cache_select->
+          cond->transform(&Item::expr_cache_insert_transformer,
+                          (uchar*) thd);
+
+    }
+
+    if (having)
+      having= having->transform(&Item::expr_cache_insert_transformer,
+                                (uchar*) thd);
+    if (tmp_having)
+    {
+      DBUG_ASSERT(having == NULL);
+      tmp_having= tmp_having->transform(&Item::expr_cache_insert_transformer,
+                                        (uchar*) thd);
+    }
+  }
+  if (select_lex->expr_cache_may_be_used[SELECT_LIST] ||
+      select_lex->expr_cache_may_be_used[IN_GROUP_BY] ||
+      select_lex->expr_cache_may_be_used[NO_MATTER])
+  {
+    List_iterator<Item> li(all_fields);
+    Item *item;
+    while ((item= li++))
+    {
+      Item *new_item=
+        item->transform(&Item::expr_cache_insert_transformer, (uchar*) thd);
+      if (new_item != item)
+      {
+        thd->change_item_tree(li.ref(), new_item);
+      }
+    }
+    for (ORDER *group= group_list; group ; group= group->next)
+    {
+      *group->item=
+        (*group->item)->transform(&Item::expr_cache_insert_transformer,
+                                  (uchar*) thd);
+    }
+  }
+  if (select_lex->expr_cache_may_be_used[NO_MATTER])
+  {
+    for (ORDER *ord= order; ord; ord= ord->next)
+    {
+      *ord->item=
+        (*ord->item)->transform(&Item::expr_cache_insert_transformer,
+                                (uchar*) thd);
+    }
+  }
+  DBUG_RETURN(FALSE);
+}
+
+
+/**
   Restore values in temporary join.
 */
 void JOIN::restore_tmp()
@@ -1660,6 +1903,62 @@ void JOIN::restore_tmp()
 }
 
 
+/*
+  Shrink join buffers used for preceding tables to reduce the occupied space
+
+  SYNOPSIS
+    shrink_join_buffers()
+      jt           table up to which the buffers are to be shrunk
+      curr_space   the size of the space used by the buffers for tables 1..jt
+      needed_space the size of the space that has to be used by these buffers
+
+  DESCRIPTION
+    The function makes an attempt to shrink all join buffers used for the
+    tables starting from the first up to jt to reduce the total size of the
+    space occupied by the buffers used for tables 1,...,jt  from curr_space
+    to needed_space.
+    The function assumes that the buffer for the table jt has not been
+    allocated yet.
+
+  RETURN
+    FALSE     if all buffer have been successfully shrunk
+    TRUE      otherwise
+*/
+  
+bool JOIN::shrink_join_buffers(JOIN_TAB *jt, 
+                               ulonglong curr_space,
+                               ulonglong needed_space)
+{
+  JOIN_CACHE *cache;
+  for (JOIN_TAB *tab= join_tab+const_tables; tab < jt; tab++)
+  {
+    cache= tab->cache;
+    if (cache)
+    { 
+      size_t buff_size;
+      if (needed_space < cache->get_min_join_buffer_size())
+        return TRUE;
+      if (cache->shrink_join_buffer_in_ratio(curr_space, needed_space))
+      { 
+        revise_cache_usage(tab);
+        return TRUE;
+      }
+      buff_size= cache->get_join_buffer_size();
+      curr_space-= buff_size;
+      needed_space-= buff_size;
+    }
+  }
+
+  cache= jt->cache;
+  DBUG_ASSERT(cache);
+  if (needed_space < cache->get_min_join_buffer_size())
+    return TRUE;
+  cache->set_join_buffer_size((size_t)needed_space);
+  
+  return FALSE;
+}
+
+
 int
 JOIN::reinit()
 {
@@ -1685,16 +1984,22 @@ JOIN::reinit()
     free_io_cache(exec_tmp_table2);
     filesort_free_buffers(exec_tmp_table2,0);
   }
+  clear_sj_tmp_tables(this);
   if (items0)
     set_items_ref_array(items0);
 
   if (join_tab_save)
-    memcpy(join_tab, join_tab_save, sizeof(JOIN_TAB) * tables);
+    memcpy(join_tab, join_tab_save, sizeof(JOIN_TAB) * table_count);
 
   /* need to reset ref access state (see join_read_key) */
   if (join_tab)
-    for (uint i= 0; i < tables; i++)
-      join_tab[i].ref.key_err= TRUE;
+  {
+    for (JOIN_TAB *tab= first_linear_tab(this, WITH_CONST_TABLES); tab; 
+         tab= next_linear_tab(this, tab, WITH_BUSH_ROOTS))
+    {
+      tab->ref.key_err= TRUE;
+    }
+  }
 
   if (tmp_join)
     restore_tmp();
@@ -1751,7 +2056,7 @@ JOIN::save_join_tab()
   if (!join_tab_save && select_lex->master_unit()->uncacheable)
   {
     if (!(join_tab_save= (JOIN_TAB*)thd->memdup((uchar*) join_tab,
-						sizeof(JOIN_TAB) * tables)))
+						sizeof(JOIN_TAB) * table_count)))
       return 1;
   }
   return 0;
@@ -1791,7 +2096,7 @@ JOIN::exec()
   }
   (void) result->prepare2(); // Currently, this cannot fail.
 
-  if (!tables_list && (tables || !select_lex->with_sum_func))
+  if (!tables_list && (table_count || !select_lex->with_sum_func))
   {                                           // Only test of functions
     if (select_options & SELECT_DESCRIBE)
       select_describe(this, FALSE, FALSE, FALSE,
@@ -1844,9 +2149,28 @@ JOIN::exec()
     FOUND_ROWS() may be called. Never reset the examined row count here.
     It must be accumulated from all join iterations of all join parts.
   */
-  if (tables)
+  if (table_count)
     thd->limit_found_rows= 0;
 
+  /*
+    Evaluate expensive constant conditions that were not evaluated during
+    optimization. Do not evaluate them for EXPLAIN statements as these
+    condtions may be arbitrarily costly, and because the optimize phase
+    might not have produced a complete executable plan for EXPLAINs.
+  */
+  if (exec_const_cond && !(select_options & SELECT_DESCRIBE) &&
+      !exec_const_cond->val_int())
+    zero_result_cause= "Impossible WHERE noticed after reading const tables";
+
+  /* 
+    We've called exec_const_cond->val_int(). This may have caused an error.
+  */
+  if (thd->is_error())
+  {
+    error= thd->is_error();
+    DBUG_VOID_RETURN;
+  }
+
   if (zero_result_cause)
   {
     (void) return_zero_rows(this, result, select_lex->leaf_tables,
@@ -1854,10 +2178,31 @@ JOIN::exec()
 			    send_row_on_empty_set(),
 			    select_options,
 			    zero_result_cause,
-			    having, all_fields);
+			    having ? having : tmp_having, all_fields);
     DBUG_VOID_RETURN;
   }
 
+  /*
+    Evaluate all constant expressions with subqueries in the ORDER/GROUP clauses
+    to make sure that all subqueries return a single row. The evaluation itself
+    will trigger an error if that is not the case.
+  */
+  if (exec_const_order_group_cond.elements &&
+      !(select_options & SELECT_DESCRIBE))
+  {
+    List_iterator_fast<Item> const_item_it(exec_const_order_group_cond);
+    Item *cur_const_item;
+    while ((cur_const_item= const_item_it++))
+    {
+      cur_const_item->val_str(&cur_const_item->str_value);
+      if (thd->is_error())
+      {
+        error= thd->is_error();
+        DBUG_VOID_RETURN;
+      }
+    }
+  }
+
   if ((this->select_lex->options & OPTION_SCHEMA_TABLE) &&
       get_schema_tables_result(this, PROCESSED_BY_JOIN_EXEC))
     DBUG_VOID_RETURN;
@@ -1881,7 +2226,7 @@ JOIN::exec()
     }
     if (order && 
         (order != group_list || !(select_options & SELECT_BIG_RESULT)) &&
-	(const_tables == tables ||
+	(const_tables == table_count ||
  	 ((simple_order || skip_sort_order) &&
 	  test_if_skip_sort_order(&join_tab[const_tables], order,
 				  select_limit, 0, 
@@ -1892,9 +2237,17 @@ JOIN::exec()
     select_describe(this, need_tmp,
 		    order != 0 && !skip_sort_order,
 		    select_distinct,
-                    !tables ? "No tables used" : NullS);
+                    !table_count ? "No tables used" : NullS);
     DBUG_VOID_RETURN;
   }
+  else
+  {
+    /* it's a const select, materialize it. */
+    select_lex->mark_const_derived(zero_result_cause);
+  }
+
+  if (!initialized && init_execution())
+    DBUG_VOID_RETURN;
 
   JOIN *curr_join= this;
   List<Item> *curr_all_fields= &all_fields;
@@ -1923,11 +2276,14 @@ JOIN::exec()
     curr_tmp_table= exec_tmp_table1;
 
     /* Copy data to the temporary table */
-    thd_proc_info(thd, "Copying to tmp table");
+    thd_proc_info(thd, copy_to_tmp_table);
     DBUG_PRINT("info", ("%s", thd->proc_info));
     if (!curr_join->sort_and_group &&
-        curr_join->const_tables != curr_join->tables)
-      curr_join->join_tab[curr_join->const_tables].sorted= 0;
+        curr_join->const_tables != curr_join->table_count)
+    {
+      JOIN_TAB *first_tab= curr_join->join_tab + curr_join->const_tables;
+      first_tab->sorted= test(first_tab->loosescan_match_tab);
+    }
 
     Procedure *save_proc= curr_join->procedure;
     tmp_error= do_select(curr_join, (List<Item> *) 0, curr_tmp_table, 0);
@@ -2078,7 +2434,6 @@ JOIN::exec()
       
       thd_proc_info(thd, "Copying to group table");
       DBUG_PRINT("info", ("%s", thd->proc_info));
-      tmp_error= -1;
       if (curr_join != this)
       {
 	if (sum_funcs2)
@@ -2098,8 +2453,12 @@ JOIN::exec()
         DBUG_VOID_RETURN;
       curr_join->group_list= 0;
       if (!curr_join->sort_and_group &&
-          curr_join->const_tables != curr_join->tables)
-        curr_join->join_tab[curr_join->const_tables].sorted= 0;
+          curr_join->const_tables != curr_join->table_count)
+      {
+        JOIN_TAB *first_tab= curr_join->join_tab + curr_join->const_tables;
+        first_tab->sorted= test(first_tab->loosescan_match_tab);
+      }
+      tmp_error= -1;
       if (setup_sum_funcs(curr_join->thd, curr_join->sum_funcs) ||
 	  (tmp_error= do_select(curr_join, (List<Item> *) 0, curr_tmp_table,
 				0)))
@@ -2108,7 +2467,7 @@ JOIN::exec()
 	DBUG_VOID_RETURN;
       }
       end_read_record(&curr_join->join_tab->read_record);
-      curr_join->const_tables= curr_join->tables; // Mark free for cleanup()
+      curr_join->const_tables= curr_join->table_count; // Mark free for cleanup()
       curr_join->join_tab[0].table= 0;           // Table is freed
       
       // No sum funcs anymore
@@ -2224,9 +2583,10 @@ JOIN::exec()
       table_map used_tables= (curr_join->const_table_map |
 			      curr_table->table->map);
 
-      Item* sort_table_cond= make_cond_for_table(curr_join->tmp_having,
+      Item* sort_table_cond= make_cond_for_table(thd, curr_join->tmp_having,
 						 used_tables,
-						 (table_map) 0);
+						 (table_map)0, -1,
+						 FALSE, FALSE);
       if (sort_table_cond)
       {
 	if (!curr_table->select)
@@ -2240,16 +2600,33 @@ JOIN::exec()
 		new Item_cond_and(curr_table->select->cond,
 				  sort_table_cond)))
 	    DBUG_VOID_RETURN;
-	  curr_table->select->cond->fix_fields(thd, 0);
 	}
-	curr_table->select_cond= curr_table->select->cond;
+        if (curr_table->pre_idx_push_select_cond)
+	{
+          if (sort_table_cond->type() == Item::COND_ITEM)
+            sort_table_cond= sort_table_cond->copy_andor_structure(thd);           
+          if (!(curr_table->pre_idx_push_select_cond= 
+                new Item_cond_and(curr_table->pre_idx_push_select_cond,
+                                  sort_table_cond)))
+            DBUG_VOID_RETURN;            
+        }
+        if (curr_table->select->cond && !curr_table->select->cond->fixed)
+	  curr_table->select->cond->fix_fields(thd, 0);
+        if (curr_table->pre_idx_push_select_cond &&
+            !curr_table->pre_idx_push_select_cond->fixed)
+          curr_table->pre_idx_push_select_cond->fix_fields(thd, 0);
+
+        curr_table->select->pre_idx_push_select_cond=
+          curr_table->pre_idx_push_select_cond;
+        curr_table->set_select_cond(curr_table->select->cond, __LINE__);
 	curr_table->select_cond->top_level_item();
 	DBUG_EXECUTE("where",print_where(curr_table->select->cond,
 					 "select and having",
                                          QT_ORDINARY););
-	curr_join->tmp_having= make_cond_for_table(curr_join->tmp_having,
+	curr_join->tmp_having= make_cond_for_table(thd, curr_join->tmp_having,
 						   ~ (table_map) 0,
-						   ~used_tables);
+						   ~used_tables, -1,
+						   FALSE, FALSE);
 	DBUG_EXECUTE("where",print_where(curr_join->tmp_having,
                                          "having after sort",
                                          QT_ORDINARY););
@@ -2265,7 +2642,7 @@ JOIN::exec()
 	  WHERE clause for any tables after the sorted one.
 	*/
 	JOIN_TAB *curr_table= &curr_join->join_tab[curr_join->const_tables+1];
-	JOIN_TAB *end_table= &curr_join->join_tab[curr_join->tables];
+	JOIN_TAB *end_table= &curr_join->join_tab[curr_join->top_join_tab_count];
 	for (; curr_table < end_table ; curr_table++)
 	{
 	  /*
@@ -2303,7 +2680,7 @@ JOIN::exec()
                             curr_join->group_list ? TRUE : FALSE))
 	DBUG_VOID_RETURN;
       sortorder= curr_join->sortorder;
-      if (curr_join->const_tables != curr_join->tables &&
+      if (curr_join->const_tables != curr_join->table_count &&
           !curr_join->join_tab[curr_join->const_tables].table->sort.io_cache)
       {
         /*
@@ -2325,7 +2702,7 @@ JOIN::exec()
   curr_join->fields= curr_fields_list;
   curr_join->procedure= procedure;
 
-  if (is_top_level_join() && thd->cursor && tables != const_tables)
+  if (is_top_level_join() && thd->cursor && table_count != const_tables)
   {
     /*
       We are here if this is JOIN::exec for the last select of the main unit
@@ -2392,9 +2769,11 @@ JOIN::destroy()
   {
     if (join_tab != tmp_join->join_tab)
     {
-      JOIN_TAB *tab, *end;
-      for (tab= join_tab, end= tab+tables ; tab != end ; tab++)
+      for (JOIN_TAB *tab= first_linear_tab(this, WITH_CONST_TABLES); tab; 
+           tab= next_linear_tab(this, tab, WITH_BUSH_ROOTS))
+      {
 	tab->cleanup();
+      }
     }
     tmp_join->tmp_join= 0;
     /*
@@ -2407,6 +2786,7 @@ JOIN::destroy()
     DBUG_RETURN(tmp_join->destroy());
   }
   cond_equal= 0;
+  having_equal= 0;
 
   cleanup(1);
  /* Cleanup items referencing temporary table columns */
@@ -2417,6 +2797,7 @@ JOIN::destroy()
   if (exec_tmp_table2)
     free_tmp_table(thd, exec_tmp_table2);
   delete select;
+  destroy_sj_tmp_tables(this);
   delete_dynamic(&keyuse);
   delete procedure;
   DBUG_RETURN(error);
@@ -2425,6 +2806,7 @@ JOIN::destroy()
 
 void JOIN::cleanup_item_list(List<Item> &items) const
 {
+  DBUG_ENTER("JOIN::cleanup_item_list");
   if (!items.is_empty())
   {
     List_iterator_fast<Item> it(items);
@@ -2432,6 +2814,7 @@ void JOIN::cleanup_item_list(List<Item> &items) const
     while ((item= it++))
       item->cleanup();
   }
+  DBUG_VOID_RETURN;
 }
 
 
@@ -2594,6 +2977,7 @@ err:
   DBUG_RETURN(join->error);
 }
 
+
 /*****************************************************************************
   Create JOIN_TABS, make a guess about the table types,
   Approximate how many records will be used in each table
@@ -2615,7 +2999,7 @@ static ha_rows get_quick_record_count(THD *thd, SQL_SELECT *select,
     select->head=table;
     table->reginfo.impossible_range=0;
     if ((error= select->test_quick_select(thd, *(key_map *)keys,(table_map) 0,
-                                          limit, 0)) == 1)
+                                          limit, 0, FALSE)) == 1)
       DBUG_RETURN(select->quick->records);
     if (error == -1)
     {
@@ -2641,6 +3025,7 @@ typedef struct st_sargable_param
   uint num_values;           /* number of values in the above array      */
 } SARGABLE_PARAM;  
 
+
 /**
   Calculate the best possible join and initialize the join structure.
 
@@ -2651,15 +3036,15 @@ typedef struct st_sargable_param
 */
 
 static bool
-make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds,
-		     DYNAMIC_ARRAY *keyuse_array)
+make_join_statistics(JOIN *join, List<TABLE_LIST> &tables_list,
+                     COND *conds, DYNAMIC_ARRAY *keyuse_array)
 {
-  int error;
+  int error= 0;
   TABLE *table;
-  TABLE_LIST *tables= tables_arg;
   uint i,table_count,const_count,key;
   table_map found_const_table_map, all_table_map, found_ref, refs;
   key_map const_ref, eq_part;
+  bool has_expensive_keyparts;
   TABLE **table_vector;
   JOIN_TAB *stat,*stat_end,*s,**stat_ref;
   KEYUSE *keyuse,*start_keyuse;
@@ -2667,10 +3052,14 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds,
   table_map no_rows_const_tables= 0;
   SARGABLE_PARAM *sargables= 0;
   JOIN_TAB *stat_vector[MAX_TABLES+1];
+  List_iterator<TABLE_LIST> ti(tables_list);
+  TABLE_LIST *tables;
   DBUG_ENTER("make_join_statistics");
 
-  table_count=join->tables;
-  stat=(JOIN_TAB*) join->thd->calloc(sizeof(JOIN_TAB)*table_count);
+  LINT_INIT(table); /* inited in all loops */
+  table_count=join->table_count;
+
+  stat=(JOIN_TAB*) join->thd->calloc(sizeof(JOIN_TAB)*(table_count));
   stat_ref=(JOIN_TAB**) join->thd->alloc(sizeof(JOIN_TAB*)*MAX_TABLES);
   table_vector=(TABLE**) join->thd->alloc(sizeof(TABLE*)*(table_count*2));
   if (!stat || !stat_ref || !table_vector)
@@ -2682,9 +3071,7 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds,
   found_const_table_map= all_table_map=0;
   const_count=0;
 
-  for (s= stat, i= 0;
-       tables;
-       s++, tables= tables->next_leaf, i++)
+  for (s= stat, i= 0; (tables= ti++); s++, i++)
   {
     TABLE_LIST *embedding= tables->embedding;
     stat_vector[i]=s;
@@ -2694,8 +3081,7 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds,
     s->needed_reg.init();
     table_vector[i]=s->table=table=tables->table;
     table->pos_in_table_list= tables;
-    error= table->file->info(HA_STATUS_VARIABLE | HA_STATUS_NO_LOCK);
-
+    error= tables->fetch_number_of_rows();
     DBUG_EXECUTE_IF("bug11747970_raise_error",
                     {
                       if (!error)
@@ -2704,22 +3090,21 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds,
                         goto error;
                       }
                     });
-
     if (error)
     {
       table->file->print_error(error, MYF(0));
       goto error;
     }
     table->quick_keys.clear_all();
+    table->intersect_keys.clear_all();
     table->reginfo.join_tab=s;
     table->reginfo.not_exists_optimize=0;
     bzero((char*) table->const_key_parts, sizeof(key_part_map)*table->s->keys);
     all_table_map|= table->map;
+    s->preread_init_done= FALSE;
     s->join=join;
-    s->info=0;					// For describe
 
     s->dependent= tables->dep_tables;
-    s->key_dependent= 0;
     if (tables->schema_table)
       table->file->stats.records= 2;
     table->quick_condition_rows= table->file->stats.records;
@@ -2729,9 +3114,11 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds,
     {
       /* s is the only inner table of an outer join */
 #ifdef WITH_PARTITION_STORAGE_ENGINE
-      if ((!table->file->stats.records || table->no_partitions_used) && !embedding)
+      if (!table->is_filled_at_execution() &&
+           (!table->file->stats.records || table->no_partitions_used) && !embedding)
 #else
-      if (!table->file->stats.records && !embedding)
+      if (!table->is_filled_at_execution() &&
+          !table->file->stats.records && !embedding)
 #endif
       {						// Empty table
         s->dependent= 0;                        // Ignore LEFT JOIN depend.
@@ -2749,8 +3136,19 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds,
     {
       /* s belongs to a nested join, maybe to several embedded joins */
       s->embedding_map= 0;
+      bool inside_an_outer_join= FALSE;
       do
       {
+        /* 
+          If this is a semi-join nest, skip it, and proceed upwards. Maybe
+          we're in some outer join nest
+        */
+        if (embedding->sj_on_expr)
+        {
+          embedding= embedding->embedding;
+          continue;
+        }
+        inside_an_outer_join= TRUE;
         NESTED_JOIN *nested_join= embedding->nested_join;
         s->embedding_map|=nested_join->nj_map;
         s->dependent|= embedding->dep_tables;
@@ -2758,14 +3156,16 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds,
         outer_join|= nested_join->used_tables;
       }
       while (embedding);
-      continue;
+      if (inside_an_outer_join)
+        continue;
     }
 #ifdef WITH_PARTITION_STORAGE_ENGINE
     const bool no_partitions_used= table->no_partitions_used;
 #else
     const bool no_partitions_used= FALSE;
 #endif
-    if ((table->s->system || table->file->stats.records <= 1 ||
+    if (!table->is_filled_at_execution() && 
+        (table->s->system || table->file->stats.records <= 1 ||
          no_partitions_used) &&
 	!s->dependent &&
 	(table->file->ha_table_flags() & HA_STATS_RECORDS_IS_EXACT) &&
@@ -2774,7 +3174,16 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds,
       set_position(join,const_count++,s,(KEYUSE*) 0);
       no_rows_const_tables |= table->map;
     }
+    
+    /* SJ-Materialization handling: */
+    if (table->pos_in_table_list->jtbm_subselect &&
+        table->pos_in_table_list->jtbm_subselect->is_jtbm_const_tab)
+    {
+      set_position(join,const_count++,s,(KEYUSE*) 0);
+      no_rows_const_tables |= table->map;
+    }
   }
+
   stat_vector[i]=0;
   join->outer_join=outer_join;
 
@@ -2798,6 +3207,7 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds,
              if (bitmap_is_set(stat[j].dependent, i) &&
                  bitmap_is_set(stat[i].dependent, k))
                bitmap_set_bit(stat[j].dependent, k);
+           }
          }
        }  
     */
@@ -2818,7 +3228,7 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds,
     {
       if (s->dependent & s->table->map)
       {
-        join->tables=0;			// Don't use join->table
+        join->table_count=0;			// Don't use join->table
         my_message(ER_WRONG_OUTER_JOIN, ER(ER_WRONG_OUTER_JOIN), MYF(0));
         goto error;
       }
@@ -2827,10 +3237,24 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds,
   }
 
   if (conds || outer_join)
-    if (update_ref_and_keys(join->thd, keyuse_array, stat, join->tables,
-                            conds, join->cond_equal,
-                            ~outer_join, join->select_lex, &sargables))
+  {
+    if (update_ref_and_keys(join->thd, keyuse_array, stat, join->table_count,
+                            conds, ~outer_join, join->select_lex, &sargables))
+      goto error;
+    /*
+      Keyparts without prefixes may be useful if this JOIN is a subquery, and
+      if the subquery may be executed via the IN-EXISTS strategy.
+    */
+    bool skip_unprefixed_keyparts=
+      !(join->is_in_subquery() &&
+        ((Item_in_subselect*)join->unit->item)->test_strategy(SUBS_IN_TO_EXISTS));
+
+    if (keyuse_array->elements &&
+        sort_and_filter_keyuse(join->thd, keyuse_array,
+                               skip_unprefixed_keyparts))
       goto error;
+    DBUG_EXECUTE("opt", print_keyuse_array(keyuse_array););
+  }
 
   join->const_table_map= no_rows_const_tables;
   join->const_tables= const_count;
@@ -2877,6 +3301,9 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds,
     {
       table=s->table;
 
+      if (table->is_filled_at_execution())
+        continue;
+
       /* 
         If equi-join condition by a key is null rejecting and after a
         substitution of a const table the key value happens to be null
@@ -2897,7 +3324,8 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds,
 	*/              
         while (keyuse->table == table)
         {
-          if (!(keyuse->val->used_tables() & ~join->const_table_map) &&
+          if (!keyuse->is_for_hash_join() && 
+              !(keyuse->val->used_tables() & ~join->const_table_map) &&
               keyuse->val->is_null() && keyuse->null_rejecting)
           {
             s->type= JT_CONST;
@@ -2918,7 +3346,9 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds,
 	  continue;
 	if (table->file->stats.records <= 1L &&
 	    (table->file->ha_table_flags() & HA_STATS_RECORDS_IS_EXACT) &&
-            !table->pos_in_table_list->embedding)
+            !table->pos_in_table_list->embedding &&
+	      !((outer_join & table->map) && 
+		(*s->on_expr_ref)->is_expensive()))
 	{					// system table
 	  int tmp= 0;
 	  s->type=JT_SYSTEM;
@@ -2940,19 +3370,29 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds,
 	s->type= JT_REF;
 	while (keyuse->table == table)
 	{
+          if (keyuse->is_for_hash_join())
+	  {
+            keyuse++;
+            continue;
+          }
 	  start_keyuse=keyuse;
 	  key=keyuse->key;
-	  s->keys.set_bit(key);               // QQ: remove this ?
+	  s->keys.set_bit(key);               // TODO: remove this ?
 
 	  refs=0;
           const_ref.clear_all();
 	  eq_part.clear_all();
+          has_expensive_keyparts= false;
 	  do
 	  {
 	    if (keyuse->val->type() != Item::NULL_ITEM && !keyuse->optimize)
 	    {
 	      if (!((~found_const_table_map) & keyuse->used_tables))
+              {
 		const_ref.set_bit(keyuse->keypart);
+                if (keyuse->val->is_expensive())
+                  has_expensive_keyparts= true;
+              }
 	      else
 		refs|=keyuse->used_tables;
 	      eq_part.set_bit(keyuse->keypart);
@@ -2960,20 +3400,30 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds,
 	    keyuse++;
 	  } while (keyuse->table == table && keyuse->key == key);
 
+          TABLE_LIST *embedding= table->pos_in_table_list->embedding;
+          /*
+            TODO (low priority): currently we ignore the const tables that
+            are within a semi-join nest which is within an outer join nest.
+            The effect of this is that we don't do const substitution for
+            such tables.
+          */
 	  if (eq_part.is_prefix(table->key_info[key].key_parts) &&
               !table->fulltext_searched && 
-              !table->pos_in_table_list->embedding)
+              (!embedding || (embedding->sj_on_expr && !embedding->embedding)))
 	  {
             if (table->key_info[key].flags & HA_NOSAME)
             {
-	      if (const_ref == eq_part)
+	      if (const_ref == eq_part &&
+                  !has_expensive_keyparts &&
+                  !((outer_join & table->map) &&
+                    (*s->on_expr_ref)->is_expensive()))
 	      {					// Found everything for ref.
 	        int tmp;
 	        ref_changed = 1;
 	        s->type= JT_CONST;
 	        join->const_table_map|=table->map;
 	        set_position(join,const_count++,s,start_keyuse);
-	        if (create_ref_for_key(join, s, start_keyuse,
+	        if (create_ref_for_key(join, s, start_keyuse, FALSE,
 				       found_const_table_map))
                   goto error;
 	        if ((tmp=join_read_const_table(s,
@@ -2996,7 +3446,7 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds,
       }
     }
   } while (join->const_table_map & found_ref && ref_changed);
-
+ 
   /* 
     Update info on indexes that can be used for search lookups as
     reading const tables may has added new sargable predicates. 
@@ -3021,15 +3471,27 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds,
 
   for (s=stat ; s < stat_end ; s++)
   {
+    s->startup_cost= 0;
     if (s->type == JT_SYSTEM || s->type == JT_CONST)
     {
       /* Only one matching row */
-      s->found_records=s->records=s->read_time=1; s->worst_seeks=1.0;
+      s->found_records= s->records= 1;
+      s->read_time=1.0; 
+      s->worst_seeks=1.0;
       continue;
     }
     /* Approximate found rows and time to read them */
-    s->found_records=s->records=s->table->file->stats.records;
-    s->read_time=(ha_rows) s->table->file->scan_time();
+    if (s->table->is_filled_at_execution())
+    {
+      get_delayed_table_estimates(s->table, &s->records, &s->read_time,
+                                  &s->startup_cost);
+      s->found_records= s->records;
+      table->quick_condition_rows=s->records;
+    }
+    else
+    {
+       s->scan_time();
+    }
 
     /*
       Set a max range of how many seeks we can expect when using keys
@@ -3046,9 +3508,21 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds,
       all select distinct fields participate in one index.
     */
     add_group_and_distinct_keys(join, s);
-
-    if (!s->const_keys.is_clear_all() &&
-        !s->table->pos_in_table_list->embedding)
+    
+    /*
+      Perform range analysis if there are keys it could use (1). 
+      Don't do range analysis if we're on the inner side of an outer join (2).
+      Do range analysis if we're on the inner side of a semi-join (3).
+      Don't do range analysis for materialized subqueries (4).
+      Don't do range analysis for materialized derived tables (5)
+    */
+    if (!s->const_keys.is_clear_all() &&                            // (1)
+        (!s->table->pos_in_table_list->embedding ||                 // (2)
+         (s->table->pos_in_table_list->embedding &&                 // (3)
+          s->table->pos_in_table_list->embedding->sj_on_expr)) &&   // (3)
+        !s->table->is_filled_at_execution() &&                      // (4)
+        !(s->table->pos_in_table_list->derived &&                   // (5)
+          s->table->pos_in_table_list->is_materialized_derived()))  // (5)
     {
       ha_rows records;
       SQL_SELECT *select;
@@ -3086,31 +3560,63 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds,
       if (records != HA_POS_ERROR)
       {
 	s->found_records=records;
-	s->read_time= (ha_rows) (s->quick ? s->quick->read_time : 0.0);
+	s->read_time= s->quick ? s->quick->read_time : 0.0;
       }
       delete select;
     }
   }
 
+  if (pull_out_semijoin_tables(join))
+    DBUG_RETURN(TRUE);
+
   join->join_tab=stat;
   join->map2table=stat_ref;
-  join->table= join->all_tables=table_vector;
+  join->table= table_vector;
   join->const_tables=const_count;
   join->found_const_table_map=found_const_table_map;
 
-  /* Find an optimal join order of the non-constant tables. */
-  if (join->const_tables != join->tables)
-  {
+  if (join->const_tables != join->table_count)
     optimize_keyuse(join, keyuse_array);
-    if (choose_plan(join, all_table_map & ~join->const_table_map))
-      goto error;
-  }
-  else
+   
+  if (optimize_semijoin_nests(join, all_table_map))
+    DBUG_RETURN(TRUE); /* purecov: inspected */
+
   {
-    memcpy((uchar*) join->best_positions,(uchar*) join->positions,
-	   sizeof(POSITION)*join->const_tables);
-    join->best_read=1.0;
+    ha_rows records= 1;
+    SELECT_LEX_UNIT *unit= join->select_lex->master_unit();
+
+    /* Find an optimal join order of the non-constant tables. */
+    if (join->const_tables != join->table_count)
+    {
+      if (choose_plan(join, all_table_map & ~join->const_table_map))
+        goto error;
+    }
+    else
+    {
+      memcpy((uchar*) join->best_positions,(uchar*) join->positions,
+	     sizeof(POSITION)*join->const_tables);
+      join->record_count= 1.0;
+      join->best_read=1.0;
+    }
+  
+    if (!(join->select_options & SELECT_DESCRIBE) &&
+        unit->derived && unit->derived->is_materialized_derived())
+    {
+      /*
+        Calculate estimated number of rows for materialized derived
+        table/view.
+      */
+      for (i= 0; i < join->table_count ; i++)
+        records*= join->best_positions[i].records_read ?
+                  (ha_rows)join->best_positions[i].records_read : 1;
+      set_if_smaller(records, unit->select_limit_cnt);
+      join->select_lex->increase_derived_records(records);
+    }
   }
+
+  if (join->choose_subquery_plan(all_table_map & ~join->const_table_map))
+    goto error;
+
   /* Generate an execution plan from the found optimal join order. */
   DBUG_RETURN(join->thd->killed || get_best_combination(join));
 
@@ -3121,8 +3627,12 @@ error:
     may not be assigned yet by this function (which is building join_tab).
     Dangling TABLE::reginfo.join_tab may cause part_of_refkey to choke. 
   */
-  for (tables= tables_arg; tables; tables= tables->next_leaf)
-    tables->table->reginfo.join_tab= NULL;
+  {    
+    TABLE_LIST *table;
+    List_iterator<TABLE_LIST> ti(tables_list);
+    while ((table= ti++))
+      table->table->reginfo.join_tab= NULL;
+  }
   DBUG_RETURN (1);
 }
 
@@ -3148,12 +3658,9 @@ typedef struct key_field_t {
   */
   bool          null_rejecting; 
   bool         *cond_guard; /* See KEYUSE::cond_guard */
+  uint          sj_pred_no; /* See KEYUSE::sj_pred_no */
 } KEY_FIELD;
 
-/* Values in optimize */
-#define KEY_OPTIMIZE_EXISTS		1
-#define KEY_OPTIMIZE_REF_OR_NULL	2
-
 /**
   Merge new key definitions to old ones, remove those not used in both.
 
@@ -3276,15 +3783,18 @@ merge_key_fields(KEY_FIELD *start,KEY_FIELD *new_fields,KEY_FIELD *end,
 	{
 	  /* field = expression OR field IS NULL */
 	  old->level= and_level;
-	  old->optimize= KEY_OPTIMIZE_REF_OR_NULL;
+          if (old->field->maybe_null())
+	  {
+	    old->optimize= KEY_OPTIMIZE_REF_OR_NULL;
+            /* The referred expression can be NULL: */ 
+            old->null_rejecting= 0;
+	  }
 	  /*
             Remember the NOT NULL value unless the value does not depend
             on other tables.
           */
 	  if (!old->val->used_tables() && old->val->is_null())
 	    old->val= new_fields->val;
-          /* The referred expression can be NULL: */ 
-          old->null_rejecting= 0;
 	}
 	else
 	{
@@ -3317,6 +3827,52 @@ merge_key_fields(KEY_FIELD *start,KEY_FIELD *new_fields,KEY_FIELD *end,
 }
 
 
+/*
+  Given a field, return its index in semi-join's select list, or UINT_MAX
+
+  DESCRIPTION
+    Given a field, we find its table; then see if the table is within a
+    semi-join nest and if the field was in select list of the subselect.
+    If it was, we return field's index in the select list. The value is used
+    by LooseScan strategy.
+*/
+
+static uint get_semi_join_select_list_index(Field *field)
+{
+  uint res= UINT_MAX;
+  TABLE_LIST *emb_sj_nest;
+  if ((emb_sj_nest= field->table->pos_in_table_list->embedding) &&
+      emb_sj_nest->sj_on_expr)
+  {
+    Item_in_subselect *subq_pred= emb_sj_nest->sj_subq_pred;
+    st_select_lex *subq_lex= subq_pred->unit->first_select();
+    if (subq_pred->left_expr->cols() == 1)
+    {
+      Item *sel_item= subq_lex->ref_pointer_array[0];
+      if (sel_item->type() == Item::FIELD_ITEM &&
+          ((Item_field*)sel_item)->field->eq(field))
+      {
+        res= 0;
+      }
+    }
+    else
+    {
+      for (uint i= 0; i < subq_pred->left_expr->cols(); i++)
+      {
+        Item *sel_item= subq_lex->ref_pointer_array[i];
+        if (sel_item->type() == Item::FIELD_ITEM &&
+            ((Item_field*)sel_item)->field->eq(field))
+        {
+          res= i;
+          break;
+        }
+      }
+    }
+  }
+  return res;
+}
+
+
 /**
   Add a possible key to array of possible keys if it's usable as a key
 
@@ -3339,21 +3895,32 @@ merge_key_fields(KEY_FIELD *start,KEY_FIELD *new_fields,KEY_FIELD *end,
 */
 
 static void
-add_key_field(KEY_FIELD **key_fields,uint and_level, Item_func *cond,
+add_key_field(JOIN *join,
+              KEY_FIELD **key_fields,uint and_level, Item_func *cond,
               Field *field, bool eq_func, Item **value, uint num_values,
               table_map usable_tables, SARGABLE_PARAM **sargables)
 {
-  uint exists_optimize= 0;
-  if (!(field->flags & PART_KEY_FLAG))
+  uint optimize= 0;  
+  if (eq_func &&
+      ((join->is_allowed_hash_join_access() &&
+        field->hash_join_is_possible() && 
+        !(field->table->pos_in_table_list->is_materialized_derived() &&
+          field->table->created)) ||
+       (field->table->pos_in_table_list->is_materialized_derived() &&
+        !field->table->created)))
+  {
+    optimize= KEY_OPTIMIZE_EQ;
+  }   
+  else if (!(field->flags & PART_KEY_FLAG))
   {
     // Don't remove column IS NULL on a LEFT JOIN table
     if (!eq_func || (*value)->type() != Item::NULL_ITEM ||
         !field->table->maybe_null || field->null_ptr)
       return;					// Not a key. Skip it
-    exists_optimize= KEY_OPTIMIZE_EXISTS;
+    optimize= KEY_OPTIMIZE_EXISTS;
     DBUG_ASSERT(num_values == 1);
   }
-  else
+  if (optimize != KEY_OPTIMIZE_EXISTS)
   {
     table_map used_tables=0;
     bool optimizable=0;
@@ -3370,12 +3937,12 @@ add_key_field(KEY_FIELD **key_fields,uint and_level, Item_func *cond,
       if (!eq_func || (*value)->type() != Item::NULL_ITEM ||
           !field->table->maybe_null || field->null_ptr)
 	return;					// Can't use left join optimize
-      exists_optimize= KEY_OPTIMIZE_EXISTS;
+      optimize= KEY_OPTIMIZE_EXISTS;
     }
     else
     {
       JOIN_TAB *stat=field->table->reginfo.join_tab;
-      key_map possible_keys=field->key_start;
+      key_map possible_keys=field->get_possible_keys();
       possible_keys.intersect(field->table->keys_in_use_for_query);
       stat[0].keys.merge(possible_keys);             // Add possible keys
 
@@ -3390,7 +3957,8 @@ add_key_field(KEY_FIELD **key_fields,uint and_level, Item_func *cond,
          Field BETWEEN ...
          Field IN ...
       */
-      stat[0].key_dependent|=used_tables;
+      if (field->flags & PART_KEY_FLAG)
+        stat[0].key_dependent|=used_tables;
 
       bool is_const=1;
       for (uint i=0; i<num_values; i++)
@@ -3413,30 +3981,21 @@ add_key_field(KEY_FIELD **key_fields,uint and_level, Item_func *cond,
         (*sargables)->arg_value= value;
         (*sargables)->num_values= num_values;
       }
+
       /*
-	We can't always use indexes when comparing a string index to a
-	number. cmp_type() is checked to allow compare of dates to numbers.
-        eq_func is NEVER true when num_values > 1
+	We can't use indexes when comparing a string index to a
+	number or two strings if the effective collation
+        of the operation differ from the field collation.
        */
       if (!eq_func)
         return;
-      if (field->result_type() == STRING_RESULT)
+
+      if (field->cmp_type() == STRING_RESULT)
       {
-        if ((*value)->result_type() != STRING_RESULT)
-        {
-          if (field->cmp_type() != (*value)->result_type())
-            return;
-        }
-        else
-        {
-          /*
-            We can't use indexes if the effective collation
-            of the operation differ from the field collation.
-          */
-          if (field->cmp_type() == STRING_RESULT &&
-              ((Field_str*)field)->charset() != cond->compare_collation())
+        if ((*value)->cmp_type() != STRING_RESULT)
             return;
-        }
+        if (((Field_str*)field)->charset() != cond->compare_collation())
+          return;
       }
     }
   }
@@ -3449,8 +4008,8 @@ add_key_field(KEY_FIELD **key_fields,uint and_level, Item_func *cond,
   (*key_fields)->field=		field;
   (*key_fields)->eq_func=	eq_func;
   (*key_fields)->val=		*value;
-  (*key_fields)->level=		and_level;
-  (*key_fields)->optimize=	exists_optimize;
+  (*key_fields)->level=         and_level;
+  (*key_fields)->optimize=      optimize;
   /*
     If the condition has form "tbl.keypart = othertbl.field" and 
     othertbl.field can be NULL, there will be no matches if othertbl.field 
@@ -3458,11 +4017,19 @@ add_key_field(KEY_FIELD **key_fields,uint and_level, Item_func *cond,
     We use null_rejecting in add_not_null_conds() to add
     'othertbl.field IS NOT NULL' to tab->select_cond.
   */
-  (*key_fields)->null_rejecting= ((cond->functype() == Item_func::EQ_FUNC ||
-                                   cond->functype() == Item_func::MULT_EQUAL_FUNC) &&
-                                  ((*value)->type() == Item::FIELD_ITEM) &&
-                                  ((Item_field*)*value)->field->maybe_null());
+  {
+    Item *real= (*value)->real_item();
+    if (((cond->functype() == Item_func::EQ_FUNC) ||
+         (cond->functype() == Item_func::MULT_EQUAL_FUNC)) &&
+        (real->type() == Item::FIELD_ITEM) &&
+        ((Item_field*)real)->field->maybe_null())
+      (*key_fields)->null_rejecting= true;
+    else
+      (*key_fields)->null_rejecting= false;
+  }
   (*key_fields)->cond_guard= NULL;
+
+  (*key_fields)->sj_pred_no= get_semi_join_select_list_index(field);
   (*key_fields)++;
 }
 
@@ -3489,29 +4056,29 @@ add_key_field(KEY_FIELD **key_fields,uint and_level, Item_func *cond,
 */
 
 static void
-add_key_equal_fields(KEY_FIELD **key_fields, uint and_level,
-                     Item_func *cond, Item_field *field_item,
+add_key_equal_fields(JOIN *join, KEY_FIELD **key_fields, uint and_level,
+                     Item_func *cond, Item *field_item,
                      bool eq_func, Item **val,
                      uint num_values, table_map usable_tables,
                      SARGABLE_PARAM **sargables)
 {
-  Field *field= field_item->field;
-  add_key_field(key_fields, and_level, cond, field,
+  Field *field= ((Item_field *) (field_item->real_item()))->field;
+  add_key_field(join, key_fields, and_level, cond, field,
                 eq_func, val, num_values, usable_tables, sargables);
-  Item_equal *item_equal= field_item->item_equal;
+  Item_equal *item_equal= field_item->get_item_equal();
   if (item_equal)
   { 
     /*
       Add to the set of possible key values every substitution of
       the field for an equal field included into item_equal
     */
-    Item_equal_iterator it(*item_equal);
-    Item_field *item;
-    while ((item= it++))
+    Item_equal_fields_iterator it(*item_equal);
+    while (it++)
     {
-      if (!field->eq(item->field))
+      Field *equal_field= it.get_curr_field();
+      if (!field->eq(equal_field))
       {
-        add_key_field(key_fields, and_level, cond, item->field,
+        add_key_field(join, key_fields, and_level, cond, equal_field,
                       eq_func, val, num_values, usable_tables,
                       sargables);
       }
@@ -3536,8 +4103,8 @@ static bool
 is_local_field (Item *field)
 {
   return field->real_item()->type() == Item::FIELD_ITEM
-    && !(field->used_tables() & OUTER_REF_TABLE_BIT)
-    && !((Item_field *)field->real_item())->depended_from;
+     && !(field->used_tables() & OUTER_REF_TABLE_BIT)
+    && !((Item_field *)field->real_item())->get_depended_from();
 }
 
 
@@ -3557,7 +4124,6 @@ is_local_field (Item *field)
   The primary reason for having and_level attribute is the OR operation which 
   uses and_level to mark KEY_FIELDs that should get into the result of the OR
   operation
-
 */
 
 static void
@@ -3669,7 +4235,7 @@ add_key_fields(JOIN *join, KEY_FIELD **key_fields, uint *and_level,
       if (is_local_field(values[0]))
       {
         field_item= (Item_field *) (values[0]->real_item());
-        add_key_equal_fields(key_fields, *and_level, cond_func,
+        add_key_equal_fields(join, key_fields, *and_level, cond_func,
                              field_item, equal_func, &values[1],
                              num_values, usable_tables, sargables);
       }
@@ -3683,7 +4249,7 @@ add_key_fields(JOIN *join, KEY_FIELD **key_fields, uint *and_level,
         if (is_local_field(values[i]))
         {
           field_item= (Item_field *) (values[i]->real_item());
-          add_key_equal_fields(key_fields, *and_level, cond_func,
+          add_key_equal_fields(join, key_fields, *and_level, cond_func,
                                field_item, equal_func, values,
                                1, usable_tables, sargables);
         }
@@ -3700,7 +4266,7 @@ add_key_fields(JOIN *join, KEY_FIELD **key_fields, uint *and_level,
         values--;
       DBUG_ASSERT(cond_func->functype() != Item_func::IN_FUNC ||
                   cond_func->argument_count() != 2);
-      add_key_equal_fields(key_fields, *and_level, cond_func,
+      add_key_equal_fields(join, key_fields, *and_level, cond_func,
                            (Item_field*) (cond_func->key_item()->real_item()),
                            0, values, 
                            cond_func->argument_count()-1,
@@ -3715,8 +4281,9 @@ add_key_fields(JOIN *join, KEY_FIELD **key_fields, uint *and_level,
 
     if (is_local_field (cond_func->arguments()[0]))
     {
-      add_key_equal_fields(key_fields, *and_level, cond_func,
-	                (Item_field*) (cond_func->arguments()[0])->real_item(),
+      add_key_equal_fields(join, key_fields, *and_level, cond_func,
+                           (Item_field*) (cond_func->arguments()[0])->
+                           real_item(),
 		           equal_func,
                            cond_func->arguments()+1, 1, usable_tables,
                            sargables);
@@ -3724,8 +4291,9 @@ add_key_fields(JOIN *join, KEY_FIELD **key_fields, uint *and_level,
     if (is_local_field (cond_func->arguments()[1]) &&
 	cond_func->functype() != Item_func::LIKE_FUNC)
     {
-      add_key_equal_fields(key_fields, *and_level, cond_func, 
-                       (Item_field*) (cond_func->arguments()[1])->real_item(),
+      add_key_equal_fields(join, key_fields, *and_level, cond_func, 
+                           (Item_field*) (cond_func->arguments()[1])->
+                           real_item(),
 		           equal_func,
                            cond_func->arguments(),1,usable_tables,
                            sargables);
@@ -3740,17 +4308,17 @@ add_key_fields(JOIN *join, KEY_FIELD **key_fields, uint *and_level,
       Item *tmp=new Item_null;
       if (unlikely(!tmp))                       // Should never be true
 	return;
-      add_key_equal_fields(key_fields, *and_level, cond_func,
-		    (Item_field*) (cond_func->arguments()[0])->real_item(),
-		    cond_func->functype() == Item_func::ISNULL_FUNC,
+      add_key_equal_fields(join, key_fields, *and_level, cond_func,
+                           (Item_field*) (cond_func->arguments()[0])->
+                           real_item(),
+                           cond_func->functype() == Item_func::ISNULL_FUNC,
 			   &tmp, 1, usable_tables, sargables);
     }
     break;
   case Item_func::OPTIMIZE_EQUAL:
     Item_equal *item_equal= (Item_equal *) cond;
     Item *const_item= item_equal->get_const();
-    Item_equal_iterator it(*item_equal);
-    Item_field *item;
+    Item_equal_fields_iterator it(*item_equal);
     if (const_item)
     {
       /*
@@ -3758,9 +4326,10 @@ add_key_fields(JOIN *join, KEY_FIELD **key_fields, uint *and_level,
         field1=const_item as a condition allowing an index access of the table
         with field1 by the keys value of field1.
       */   
-      while ((item= it++))
+      while (it++)
       {
-        add_key_field(key_fields, *and_level, cond_func, item->field,
+        Field *equal_field= it.get_curr_field();
+        add_key_field(join, key_fields, *and_level, cond_func, equal_field,
                       TRUE, &const_item, 1, usable_tables, sargables);
       }
     }
@@ -3772,17 +4341,18 @@ add_key_fields(JOIN *join, KEY_FIELD **key_fields, uint *and_level,
         field1=field2 as a condition allowing an index access of the table
         with field1 by the keys value of field2.
       */   
-      Item_equal_iterator fi(*item_equal);
-      while ((item= fi++))
+      Item_equal_fields_iterator fi(*item_equal);
+      while (fi++)
       {
-        Field *field= item->field;
+        Field *field= fi.get_curr_field();
+        Item *item;
         while ((item= it++))
         {
-          if (!field->eq(item->field))
+          Field *equal_field= it.get_curr_field();
+          if (!field->eq(equal_field))
           {
-            Item *tmp_item= item;
-            add_key_field(key_fields, *and_level, cond_func, field,
-                          TRUE, &tmp_item, 1, usable_tables,
+            add_key_field(join, key_fields, *and_level, cond_func, field,
+                          TRUE, &item, 1, usable_tables,
                           sargables);
           }
         }
@@ -3802,6 +4372,55 @@ max_part_bit(key_part_map bits)
   return found;
 }
 
+
+/**
+  Add a new keuse to the specified array of KEYUSE objects
+
+  @param[in,out]  keyuse_array  array of keyuses to be extended 
+  @param[in]      key_field     info on the key use occurrence
+  @param[in]      key           key number for the keyse to be added
+  @param[in]      part          key part for the keyuse to be added
+
+  @note
+  The function builds a new KEYUSE object for a key use utilizing the info
+  on the left and right parts of the given key use  extracted from the 
+  structure key_field, the key number and key part for this key use. 
+  The built object is added to the dynamic array keyuse_array.
+
+  @retval         0             the built object is succesfully added 
+  @retval         1             otherwise
+*/
+
+static bool
+add_keyuse(DYNAMIC_ARRAY *keyuse_array, KEY_FIELD *key_field,
+          uint key, uint part)
+{
+  KEYUSE keyuse;
+  Field *field= key_field->field;
+
+  keyuse.table= field->table;
+  keyuse.val= key_field->val;
+  keyuse.key= key;
+  if (!is_hash_join_key_no(key))
+  {
+    keyuse.keypart=part;
+    keyuse.keypart_map= (key_part_map) 1 << part;
+  }
+  else
+  {
+    keyuse.keypart= field->field_index;
+    keyuse.keypart_map= (key_part_map) 0;
+  }
+  keyuse.used_tables= key_field->val->used_tables();
+  keyuse.optimize= key_field->optimize & KEY_OPTIMIZE_REF_OR_NULL;
+  keyuse.ref_table_rows= 0;
+  keyuse.null_rejecting= key_field->null_rejecting;
+  keyuse.cond_guard= key_field->cond_guard;
+  keyuse.sj_pred_no= key_field->sj_pred_no;
+  return (insert_dynamic(keyuse_array,(uchar*) &keyuse));
+}
+
+
 /*
   Add all keys with uses 'field' for some keypart
   If field->and_level != and_level then only mark key_part as const_part
@@ -3812,11 +4431,10 @@ max_part_bit(key_part_map bits)
 */
 
 static bool
-add_key_part(DYNAMIC_ARRAY *keyuse_array,KEY_FIELD *key_field)
+add_key_part(DYNAMIC_ARRAY *keyuse_array, KEY_FIELD *key_field)
 {
   Field *field=key_field->field;
   TABLE *form= field->table;
-  KEYUSE keyuse;
 
   if (key_field->eq_func && !(key_field->optimize & KEY_OPTIMIZE_EXISTS))
   {
@@ -3832,20 +4450,24 @@ add_key_part(DYNAMIC_ARRAY *keyuse_array,KEY_FIELD *key_field)
       {
 	if (field->eq(form->key_info[key].key_part[part].field))
 	{
-	  keyuse.table= field->table;
-	  keyuse.val =  key_field->val;
-	  keyuse.key =  key;
-	  keyuse.keypart=part;
-	  keyuse.keypart_map= (key_part_map) 1 << part;
-	  keyuse.used_tables=key_field->val->used_tables();
-	  keyuse.optimize= key_field->optimize & KEY_OPTIMIZE_REF_OR_NULL;
-          keyuse.null_rejecting= key_field->null_rejecting;
-          keyuse.cond_guard= key_field->cond_guard;
-	  if (insert_dynamic(keyuse_array,(uchar*) &keyuse))
+          if (add_keyuse(keyuse_array, key_field, key, part))
             return TRUE;
 	}
       }
     }
+    if (field->hash_join_is_possible() &&
+        (key_field->optimize & KEY_OPTIMIZE_EQ) &&
+        key_field->val->used_tables())
+    {
+      /* 
+        If a key use is extracted from an equi-join predicate then it is
+        added not only as a key use for every index whose component can
+        be evalusted utilizing this key use, but also as a key use for
+        hash join. Such key uses are marked with a special key number. 
+      */    
+      if (add_keyuse(keyuse_array, key_field, get_hash_join_key_no(), 0))
+        return TRUE;
+    }
   }
   return FALSE;
 }
@@ -3913,6 +4535,7 @@ add_ft_keys(DYNAMIC_ARRAY *keyuse_array,
   keyuse.used_tables=cond_func->key_item()->used_tables();
   keyuse.optimize= 0;
   keyuse.keypart_map= 0;
+  keyuse.sj_pred_no= UINT_MAX;
   return insert_dynamic(keyuse_array,(uchar*) &keyuse);
 }
 
@@ -3925,6 +4548,9 @@ sort_keyuse(KEYUSE *a,KEYUSE *b)
     return (int) (a->table->tablenr - b->table->tablenr);
   if (a->key != b->key)
     return (int) (a->key - b->key);
+  if (a->key == MAX_KEY && b->key == MAX_KEY && 
+      a->used_tables != b->used_tables)
+    return (int) ((ulong) a->used_tables - (ulong) b->used_tables);
   if (a->keypart != b->keypart)
     return (int) (a->keypart - b->keypart);
   // Place const values before other ones
@@ -3977,20 +4603,34 @@ static void add_key_fields_for_nj(JOIN *join, TABLE_LIST *nested_join_table,
                                   SARGABLE_PARAM **sargables)
 {
   List_iterator<TABLE_LIST> li(nested_join_table->nested_join->join_list);
+  List_iterator<TABLE_LIST> li2(nested_join_table->nested_join->join_list);
+  bool have_another = FALSE;
   table_map tables= 0;
   TABLE_LIST *table;
   DBUG_ASSERT(nested_join_table->nested_join);
 
-  while ((table= li++))
+  while ((table= li++) || (have_another && (li=li2, have_another=FALSE,
+                                            (table= li++))))
   {
     if (table->nested_join)
-      add_key_fields_for_nj(join, table, end, and_level, sargables);
+    {
+      if (!table->on_expr)
+      {
+        /* It's a semi-join nest. Walk into it as if it wasn't a nest */
+        have_another= TRUE;
+        li2= li;
+        li= List_iterator<TABLE_LIST>(table->nested_join->join_list); 
+      }
+      else
+        add_key_fields_for_nj(join, table, end, and_level, sargables);
+    }
     else
       if (!table->on_expr)
         tables |= table->table->map;
   }
-  add_key_fields(join, end, and_level, nested_join_table->on_expr, tables,
-                 sargables);
+  if (nested_join_table->on_expr)
+    add_key_fields(join, end, and_level, nested_join_table->on_expr, tables,
+                   sargables);
 }
 
 
@@ -4017,11 +4657,10 @@ static void add_key_fields_for_nj(JOIN *join, TABLE_LIST *nested_join_table,
 
 static bool
 update_ref_and_keys(THD *thd, DYNAMIC_ARRAY *keyuse,JOIN_TAB *join_tab,
-                    uint tables, COND *cond, COND_EQUAL *cond_equal,
-                    table_map normal_tables, SELECT_LEX *select_lex,
-                    SARGABLE_PARAM **sargables)
+                    uint tables, COND *cond, table_map normal_tables,
+                    SELECT_LEX *select_lex, SARGABLE_PARAM **sargables)
 {
-  uint	and_level,i,found_eq_constant;
+  uint	and_level,i;
   KEY_FIELD *key_fields, *end, *field;
   uint sz;
   uint m= max(select_lex->max_equal_elems,1);
@@ -4061,19 +4700,21 @@ update_ref_and_keys(THD *thd, DYNAMIC_ARRAY *keyuse,JOIN_TAB *join_tab,
 
   if (my_init_dynamic_array(keyuse,sizeof(KEYUSE),20,64))
     return TRUE;
+
   if (cond)
   {
+    KEY_FIELD *saved_field= field;
     add_key_fields(join_tab->join, &end, &and_level, cond, normal_tables,
                    sargables);
     for (; field != end ; field++)
     {
-      if (add_key_part(keyuse,field))
-        return TRUE;
+
       /* Mark that we can optimize LEFT JOIN */
       if (field->val->type() == Item::NULL_ITEM &&
 	  !field->field->real_maybe_null())
 	field->field->table->reginfo.not_exists_optimize=1;
     }
+    field= saved_field;
   }
   for (i=0 ; i < tables ; i++)
   {
@@ -4117,69 +4758,85 @@ update_ref_and_keys(THD *thd, DYNAMIC_ARRAY *keyuse,JOIN_TAB *join_tab,
       return TRUE;
   }
 
-  /*
-    Sort the array of possible keys and remove the following key parts:
-    - ref if there is a keypart which is a ref and a const.
-      (e.g. if there is a key(a,b) and the clause is a=3 and b=7 and b=t2.d,
-      then we skip the key part corresponding to b=t2.d)
-    - keyparts without previous keyparts
-      (e.g. if there is a key(a,b,c) but only b < 5 (or a=2 and c < 3) is
-      used in the query, we drop the partial key parts from consideration).
-    Special treatment for ft-keys.
-  */
-  if (keyuse->elements)
-  {
-    KEYUSE key_end,*prev,*save_pos,*use;
+  return FALSE;
+}
 
-    my_qsort(keyuse->buffer,keyuse->elements,sizeof(KEYUSE),
-	  (qsort_cmp) sort_keyuse);
 
-    bzero((char*) &key_end,sizeof(key_end));    /* Add for easy testing */
-    if (insert_dynamic(keyuse,(uchar*) &key_end))
-      return TRUE;
+/**
+  Sort the array of possible keys and remove the following key parts:
+  - ref if there is a keypart which is a ref and a const.
+    (e.g. if there is a key(a,b) and the clause is a=3 and b=7 and b=t2.d,
+    then we skip the key part corresponding to b=t2.d)
+  - keyparts without previous keyparts
+    (e.g. if there is a key(a,b,c) but only b < 5 (or a=2 and c < 3) is
+    used in the query, we drop the partial key parts from consideration).
+  Special treatment for ft-keys.
+*/
+
+static bool sort_and_filter_keyuse(THD *thd, DYNAMIC_ARRAY *keyuse, 
+                                   bool skip_unprefixed_keyparts)
+{
+  KEYUSE key_end, *prev, *save_pos, *use;
+  uint found_eq_constant, i;
 
-    use=save_pos=dynamic_element(keyuse,0,KEYUSE*);
-    prev= &key_end;
-    found_eq_constant=0;
-    for (i=0 ; i < keyuse->elements-1 ; i++,use++)
+  DBUG_ASSERT(keyuse->elements);
+
+  my_qsort(keyuse->buffer, keyuse->elements, sizeof(KEYUSE),
+           (qsort_cmp) sort_keyuse);
+
+  bzero((char*) &key_end, sizeof(key_end));    /* Add for easy testing */
+  if (insert_dynamic(keyuse, (uchar*) &key_end))
+    return TRUE;
+
+  if (optimizer_flag(thd, OPTIMIZER_SWITCH_DERIVED_WITH_KEYS))
+    generate_derived_keys(keyuse);
+
+  use= save_pos= dynamic_element(keyuse,0,KEYUSE*);
+  prev= &key_end;
+  found_eq_constant= 0;
+  for (i=0 ; i < keyuse->elements-1 ; i++,use++)
+  {
+    if (!use->is_for_hash_join())
     {
       if (!use->used_tables && use->optimize != KEY_OPTIMIZE_REF_OR_NULL)
-	use->table->const_key_parts[use->key]|= use->keypart_map;
+        use->table->const_key_parts[use->key]|= use->keypart_map;
       if (use->keypart != FT_KEYPART)
       {
-	if (use->key == prev->key && use->table == prev->table)
-	{
-	  if (prev->keypart+1 < use->keypart ||
-	      (prev->keypart == use->keypart && found_eq_constant))
-	    continue;				/* remove */
-	}
-	else if (use->keypart != 0)		// First found must be 0
-	  continue;
+        if (use->key == prev->key && use->table == prev->table)
+        {
+          if ((prev->keypart+1 < use->keypart && skip_unprefixed_keyparts) ||
+              (prev->keypart == use->keypart && found_eq_constant))
+            continue;				/* remove */
+        }
+        else if (use->keypart != 0 && skip_unprefixed_keyparts)
+          continue; /* remove - first found must be 0 */
       }
 
-      /*
-        Old gcc used a memcpy(), which is undefined if save_pos==use:
-        http://gcc.gnu.org/bugzilla/show_bug.cgi?id=19410
-        http://gcc.gnu.org/bugzilla/show_bug.cgi?id=39480
-        This also disables a valgrind warning, so better to have the test.
-      */
-      if (save_pos != use)
-        *save_pos= *use;
-      prev=use;
+      prev= use;
       found_eq_constant= !use->used_tables;
-      /* Save ptr to first use */
-      if (!use->table->reginfo.join_tab->keyuse)
-	use->table->reginfo.join_tab->keyuse=save_pos;
       use->table->reginfo.join_tab->checked_keys.set_bit(use->key);
-      save_pos++;
     }
-    i=(uint) (save_pos-(KEYUSE*) keyuse->buffer);
-    VOID(set_dynamic(keyuse,(uchar*) &key_end,i));
-    keyuse->elements=i;
+    /*
+      Old gcc used a memcpy(), which is undefined if save_pos==use:
+      http://gcc.gnu.org/bugzilla/show_bug.cgi?id=19410
+      http://gcc.gnu.org/bugzilla/show_bug.cgi?id=39480
+      This also disables a valgrind warning, so better to have the test.
+    */
+    if (save_pos != use)
+      *save_pos= *use;
+    /* Save ptr to first use */
+    if (!use->table->reginfo.join_tab->keyuse)
+      use->table->reginfo.join_tab->keyuse= save_pos;
+    save_pos++;
   }
+  i= (uint) (save_pos-(KEYUSE*) keyuse->buffer);
+  VOID(set_dynamic(keyuse,(uchar*) &key_end,i));
+  keyuse->elements= i;
+
   return FALSE;
 }
 
+
 /**
   Update some values in keyuse for faster choose_plan() loop.
 */
@@ -4204,12 +4861,15 @@ static void optimize_keyuse(JOIN *join, DYNAMIC_ARRAY *keyuse_array)
 	(map= (keyuse->used_tables & ~join->const_table_map &
 	       ~OUTER_REF_TABLE_BIT)))
     {
-      uint tablenr;
-      for (tablenr=0 ; ! (map & 1) ; map>>=1, tablenr++) ;
-      if (map == 1)			// Only one table
+      uint n_tables= my_count_bits(map);
+      if (n_tables == 1)			// Only one table
       {
-	TABLE *tmp_table=join->all_tables[tablenr];
-	keyuse->ref_table_rows= max(tmp_table->file->stats.records, 100);
+        Table_map_iterator it(map);
+        int tablenr= it.next_bit();
+        DBUG_ASSERT(tablenr != Table_map_iterator::BITMAP_END);
+	TABLE *tmp_table=join->table[tablenr];
+        if (tmp_table) // already created
+          keyuse->ref_table_rows= max(tmp_table->file->stats.records, 100);
       }
     }
     /*
@@ -4297,6 +4957,10 @@ void set_position(JOIN *join,uint idx,JOIN_TAB *table,KEYUSE *key)
   join->positions[idx].records_read=1.0;	/* This is a const table */
   join->positions[idx].ref_depend_map= 0;
 
+//  join->positions[idx].loosescan_key= MAX_KEY; /* Not a LooseScan */
+  join->positions[idx].sj_strategy= SJ_OPT_NONE;
+  join->positions[idx].use_join_buffer= FALSE;
+
   /* Move the const table as down as possible in best_ref */
   JOIN_TAB **pos=join->best_ref+idx+1;
   JOIN_TAB *next=join->best_ref[idx];
@@ -4310,6 +4974,35 @@ void set_position(JOIN *join,uint idx,JOIN_TAB *table,KEYUSE *key)
 }
 
 
+/* Estimate of the number matching candidates in the joined table */
+
+inline
+ha_rows matching_candidates_in_table(JOIN_TAB *s, bool with_found_constraint)
+{
+  ha_rows records= s->found_records;
+  /*
+    If there is a filtering condition on the table (i.e. ref analyzer found
+    at least one "table.keyXpartY= exprZ", where exprZ refers only to tables
+    preceding this table in the join order we're now considering), then 
+    assume that 25% of the rows will be filtered out by this condition.
+
+    This heuristic is supposed to force tables used in exprZ to be before
+    this table in join order.
+  */
+  if (with_found_constraint)
+    records-= records/4;
+
+    /*
+      If applicable, get a more accurate estimate. Don't use the two
+      heuristics at once.
+    */
+  if (s->table->quick_condition_rows != s->found_records)
+    records= s->table->quick_condition_rows;
+
+  return records;
+}
+
+
 /**
   Find the best access path for an extension of a partial execution
   plan and add this path to the plan.
@@ -4327,23 +5020,28 @@ void set_position(JOIN *join,uint idx,JOIN_TAB *table,KEYUSE *key)
   @param thd              thread for the connection that submitted the query
   @param remaining_tables set of tables not included into the partial plan yet
   @param idx              the length of the partial plan
+  @param disable_jbuf     TRUE<=> Don't use join buffering
   @param record_count     estimate for the number of records returned by the
                           partial plan
-  @param read_time        the cost of the partial plan
+  @param pos              OUT Table access plan
+  @param loose_scan_pos   OUT Table plan that uses loosescan, or set cost to 
+                              DBL_MAX if not possible.
 
   @return
     None
 */
 
-static void
+void
 best_access_path(JOIN      *join,
                  JOIN_TAB  *s,
-                 THD       *thd,
                  table_map remaining_tables,
                  uint      idx,
+                 bool      disable_jbuf,
                  double    record_count,
-                 double    read_time)
+                 POSITION *pos,
+                 POSITION *loose_scan_pos)
 {
+  THD *thd= join->thd;
   KEYUSE *best_key=         0;
   uint best_max_key_part=   0;
   my_bool found_constraint= 0;
@@ -4353,12 +5051,24 @@ best_access_path(JOIN      *join,
   table_map best_ref_depends_map= 0;
   double tmp;
   ha_rows rec;
+  bool best_uses_jbuf= FALSE;
+  MY_BITMAP *eq_join_set= &s->table->eq_join_set;
+  KEYUSE *hj_start_key= 0;
+
+  disable_jbuf= disable_jbuf || idx == join->const_tables;  
+
+  Loose_scan_opt loose_scan_opt;
   DBUG_ENTER("best_access_path");
+  
+  bitmap_clear_all(eq_join_set);
 
+  loose_scan_opt.init(join, s, remaining_tables);
+  
   if (s->keyuse)
   {                                            /* Use key if possible */
+    KEYUSE *keyuse;
+    KEYUSE *start_key=0;
     TABLE *table= s->table;
-    KEYUSE *keyuse,*start_key=0;
     double best_records= DBL_MAX;
     uint max_key_part=0;
 
@@ -4366,19 +5076,41 @@ best_access_path(JOIN      *join,
     rec= s->records/MATCHING_ROWS_IN_OTHER_TABLE;  // Assumed records/key
     for (keyuse=s->keyuse ; keyuse->table == table ;)
     {
+      KEY *keyinfo;
       key_part_map found_part= 0;
       table_map found_ref= 0;
       uint key= keyuse->key;
-      KEY *keyinfo= table->key_info+key;
       bool ft_key=  (keyuse->keypart == FT_KEYPART);
       /* Bitmap of keyparts where the ref access is over 'keypart=const': */
       key_part_map const_part= 0;
       /* The or-null keypart in ref-or-null access: */
       key_part_map ref_or_null_part= 0;
+      if (is_hash_join_key_no(key))
+      {
+        /* 
+          Hash join as any join employing join buffer can be used to join
+          only those tables that are joined after the first non const table
+	*/  
+        if (!(remaining_tables & keyuse->used_tables) &&
+            idx > join->const_tables)
+        {
+          if (!hj_start_key)
+            hj_start_key= keyuse;
+          bitmap_set_bit(eq_join_set, keyuse->keypart);
+        }
+        keyuse++;
+        continue;
+      }
+
+      keyinfo= table->key_info+key;
 
       /* Calculate how many key segments of the current key we can use */
       start_key= keyuse;
 
+      loose_scan_opt.next_ref_key();
+      DBUG_PRINT("info", ("Considering ref access on key %s",
+                          keyuse->table->key_info[keyuse->key].name));
+
       do /* For each keypart */
       {
         uint keypart= keyuse->keypart;
@@ -4387,7 +5119,6 @@ best_access_path(JOIN      *join,
         
         do /* For each way to access the keypart */
         {
-
           /*
             if 1. expression doesn't refer to forward tables
                2. we won't get two ref-or-null's
@@ -4400,8 +5131,8 @@ best_access_path(JOIN      *join,
             if (!(keyuse->used_tables & ~join->const_table_map))
               const_part|= keyuse->keypart_map;
 
-            double tmp2= prev_record_reads(join, idx, (found_ref |
-                                                      keyuse->used_tables));
+            double tmp2= prev_record_reads(join->positions, idx,
+                                           (found_ref | keyuse->used_tables));
             if (tmp2 < best_prev_record_reads)
             {
               best_part_found_ref= keyuse->used_tables & ~join->const_table_map;
@@ -4416,6 +5147,7 @@ best_access_path(JOIN      *join,
             if (keyuse->optimize & KEY_OPTIMIZE_REF_OR_NULL)
               ref_or_null_part |= keyuse->keypart_map;
           }
+          loose_scan_opt.add_keyuse(remaining_tables, keyuse);
           keyuse++;
         } while (keyuse->table == table && keyuse->key == key &&
                  keyuse->keypart == keypart);
@@ -4425,7 +5157,7 @@ best_access_path(JOIN      *join,
       /*
         Assume that that each key matches a proportional part of table.
       */
-      if (!found_part && !ft_key)
+      if (!found_part && !ft_key && !loose_scan_opt.have_a_case())
         continue;                               // Nothing usable found
 
       if (rec < MATCHING_ROWS_IN_OTHER_TABLE)
@@ -4440,22 +5172,22 @@ best_access_path(JOIN      *join,
           Really, there should be records=0.0 (yes!)
           but 1.0 would be probably safer
         */
-        tmp= prev_record_reads(join, idx, found_ref);
+        tmp= prev_record_reads(join->positions, idx, found_ref);
         records= 1.0;
       }
       else
       {
-        found_constraint= 1;
-        /*
-          Check if we found full key
-        */
+        found_constraint= test(found_part);
+        loose_scan_opt.check_ref_access_part1(s, key, start_key, found_part);
+
+        /* Check if we found full key */
         if (found_part == PREV_BITS(uint,keyinfo->key_parts) &&
             !ref_or_null_part)
         {                                         /* use eq key */
           max_key_part= (uint) ~0;
           if ((keyinfo->flags & (HA_NOSAME | HA_NULL_PART_KEY)) == HA_NOSAME)
           {
-            tmp = prev_record_reads(join, idx, found_ref);
+            tmp = prev_record_reads(join->positions, idx, found_ref);
             records=1.0;
           }
           else
@@ -4510,7 +5242,8 @@ best_access_path(JOIN      *join,
                 in ReuseRangeEstimateForRef-3.
               */
               if (table->quick_keys.is_set(key) &&
-                  const_part & (1 << table->quick_key_parts[key]) &&
+                  (const_part & ((1 << table->quick_key_parts[key])-1)) ==
+                  (((key_part_map)1 << table->quick_key_parts[key])-1) &&
                   table->quick_n_ranges[key] == 1 &&
                   records > (double) table->quick_rows[key])
               {
@@ -4521,14 +5254,11 @@ best_access_path(JOIN      *join,
             tmp= records;
             set_if_smaller(tmp, (double) thd->variables.max_seeks_for_key);
             if (table->covering_keys.is_set(key))
-            {
-              /* we can use only index tree */
-              uint keys_per_block= table->file->stats.block_size/2/
-                (keyinfo->key_length+table->file->ref_length)+1;
-              tmp= record_count*(tmp+keys_per_block-1)/keys_per_block;
-            }
+              tmp= table->file->keyread_time(key, 1, (ha_rows) tmp);
             else
-              tmp= record_count*min(tmp,s->worst_seeks);
+              tmp= table->file->read_time(key, 1,
+                                          (ha_rows) min(tmp,s->worst_seeks));
+            tmp*= record_count;
           }
         }
         else
@@ -4688,20 +5418,21 @@ best_access_path(JOIN      *join,
             /* Limit the number of matched rows */
             set_if_smaller(tmp, (double) thd->variables.max_seeks_for_key);
             if (table->covering_keys.is_set(key))
-            {
-              /* we can use only index tree */
-              uint keys_per_block= table->file->stats.block_size/2/
-                (keyinfo->key_length+table->file->ref_length)+1;
-              tmp= record_count*(tmp+keys_per_block-1)/keys_per_block;
-            }
+              tmp= table->file->keyread_time(key, 1, (ha_rows) tmp);
             else
-              tmp= record_count*min(tmp,s->worst_seeks);
+              tmp= table->file->read_time(key, 1,
+                                          (ha_rows) min(tmp,s->worst_seeks));
+            tmp*= record_count;
           }
           else
             tmp= best_time;                    // Do nothing
         }
+
+        DBUG_ASSERT(tmp > 0 || record_count == 0);
+        tmp += s->startup_cost;
+        loose_scan_opt.check_ref_access_part2(key, start_key, records, tmp);
       } /* not ft_key */
-      if (tmp < best_time - records/(double) TIME_FOR_COMPARE)
+      if (tmp + 0.0001 < best_time - records/(double) TIME_FOR_COMPARE)
       {
         best_time= tmp + records/(double) TIME_FOR_COMPARE;
         best= tmp;
@@ -4710,10 +5441,47 @@ best_access_path(JOIN      *join,
         best_max_key_part= max_key_part;
         best_ref_depends_map= found_ref;
       }
-    }
+    } /* for each key */
     records= best_records;
   }
 
+  /* 
+    If there is no key to access the table, but there is an equi-join
+    predicate connecting the table with the privious tables then we
+    consider the possibility of using hash join.
+    We need also to check that:
+    (1) s is inner table of semi-join -> join cache is allowed for semijoins
+    (2) s is inner table of outer join -> join cache is allowed for outer joins
+  */  
+  if (idx > join->const_tables && best_key == 0 &&
+      (join->allowed_join_cache_types & JOIN_CACHE_HASHED_BIT) &&
+      join->max_allowed_join_cache_level > 2 &&
+     !bitmap_is_clear_all(eq_join_set) &&  !disable_jbuf &&
+      (!s->emb_sj_nest ||                     
+       join->allowed_semijoin_with_cache) &&    // (1)
+      (!(s->table->map & join->outer_join) ||
+       join->allowed_outer_join_with_cache))    // (2)
+  {
+    double join_sel= 0.1;
+    /* Estimate the cost of  the hash join access to the table */
+    ha_rows rnd_records= matching_candidates_in_table(s, found_constraint);
+
+    tmp= s->quick ? s->quick->read_time : s->scan_time();
+    tmp+= (s->records - rnd_records)/(double) TIME_FOR_COMPARE;
+
+    /* We read the table as many times as join buffer becomes full. */
+    tmp*= (1.0 + floor((double) cache_record_length(join,idx) *
+                          record_count /
+                          (double) thd->variables.join_buff_size));
+    best_time= tmp + 
+               (record_count*join_sel) / TIME_FOR_COMPARE * rnd_records;
+    best= tmp;
+    records= rows2double(rnd_records);
+    best_key= hj_start_key;
+    best_ref_depends_map= 0;
+    best_uses_jbuf= TRUE;
+   }
+
   /*
     Don't test table scan if it can't be better.
     Prefer key lookup if we would use the same key for scanning.
@@ -4723,7 +5491,7 @@ best_access_path(JOIN      *join,
     This is because table scans uses index and we would not win
     anything by using a table scan.
 
-    A word for word translation of the below if-statement in psergey's
+    A word for word translation of the below if-statement in sergefp's
     understanding: we check if we should use table scan if:
     (1) The found 'ref' access produces more records than a table scan
         (or index scan, or quick select), or 'ref' is more expensive than
@@ -4741,39 +5509,28 @@ best_access_path(JOIN      *join,
         Since we have a 'ref' access path, and FORCE INDEX instructs us to
         choose it over ALL/index, there is no need to consider a full table
         scan.
+    (5) Non-flattenable semi-joins: don't consider doing a scan of temporary
+        table if we had an option to make lookups into it. In real-world cases,
+        lookups are cheaper than full scans, but when the table is small, they
+        can be [considered to be] more expensive, which causes lookups not to 
+        be used for cases with small datasets, which is annoying.
   */
   if ((records >= s->found_records || best > s->read_time) &&            // (1)
       !(s->quick && best_key && s->quick->index == best_key->key &&      // (2)
         best_max_key_part >= s->table->quick_key_parts[best_key->key]) &&// (2)
       !((s->table->file->ha_table_flags() & HA_TABLE_SCAN_ON_INDEX) &&   // (3)
         ! s->table->covering_keys.is_clear_all() && best_key && !s->quick) &&// (3)
-      !(s->table->force_index && best_key && !s->quick))                 // (4)
+      !(s->table->force_index && best_key && !s->quick) &&               // (4)
+      !(best_key && s->table->pos_in_table_list->jtbm_subselect))        // (5)
   {                                             // Check full join
-    ha_rows rnd_records= s->found_records;
-    /*
-      If there is a filtering condition on the table (i.e. ref analyzer found
-      at least one "table.keyXpartY= exprZ", where exprZ refers only to tables
-      preceding this table in the join order we're now considering), then 
-      assume that 25% of the rows will be filtered out by this condition.
-
-      This heuristic is supposed to force tables used in exprZ to be before
-      this table in join order.
-    */
-    if (found_constraint)
-      rnd_records-= rnd_records/4;
-
-    /*
-      If applicable, get a more accurate estimate. Don't use the two
-      heuristics at once.
-    */
-    if (s->table->quick_condition_rows != s->found_records)
-      rnd_records= s->table->quick_condition_rows;
+    ha_rows rnd_records= matching_candidates_in_table(s, found_constraint);
 
     /*
       Range optimizer never proposes a RANGE if it isn't better
       than FULL: so if RANGE is present, it's always preferred to FULL.
       Here we estimate its cost.
     */
+
     if (s->quick)
     {
       /*
@@ -4788,12 +5545,14 @@ best_access_path(JOIN      *join,
       tmp= record_count *
         (s->quick->read_time +
          (s->found_records - rnd_records)/(double) TIME_FOR_COMPARE);
+
+      loose_scan_opt.check_range_access(join, idx, s->quick);
     }
     else
     {
       /* Estimate cost of reading table. */
-      tmp= s->table->file->scan_time();
-      if (s->table->map & join->outer_join)     // Can't use join cache
+      tmp= s->scan_time();
+      if ((s->table->map & join->outer_join) || disable_jbuf)     // Can't use join cache
       {
         /*
           For each record we have to:
@@ -4821,6 +5580,7 @@ best_access_path(JOIN      *join,
       }
     }
 
+    tmp += s->startup_cost;
     /*
       We estimate the cost of evaluating WHERE clause for found records
       as record_count * rnd_records / TIME_FOR_COMPARE. This cost plus
@@ -4828,7 +5588,8 @@ best_access_path(JOIN      *join,
     */
     if (best == DBL_MAX ||
         (tmp  + record_count/(double) TIME_FOR_COMPARE*rnd_records <
-         best + record_count/(double) TIME_FOR_COMPARE*records))
+         (best_key->is_for_hash_join() ? best_time :
+          best + record_count/(double) TIME_FOR_COMPARE*records)))
     {
       /*
         If the table has a range (s->quick is set) make_join_select()
@@ -4839,15 +5600,21 @@ best_access_path(JOIN      *join,
       best_key= 0;
       /* range/index_merge/ALL/index access method are "independent", so: */
       best_ref_depends_map= 0;
+      best_uses_jbuf= test(!disable_jbuf && !((s->table->map & 
+                                               join->outer_join)));
     }
   }
 
   /* Update the cost information for the current partial plan */
-  join->positions[idx].records_read= records;
-  join->positions[idx].read_time=    best;
-  join->positions[idx].key=          best_key;
-  join->positions[idx].table=        s;
-  join->positions[idx].ref_depend_map= best_ref_depends_map;
+  pos->records_read= records;
+  pos->read_time=    best;
+  pos->key=          best_key;
+  pos->table=        s;
+  pos->ref_depend_map= best_ref_depends_map;
+  pos->loosescan_picker.loosescan_key= MAX_KEY;
+  pos->use_join_buffer= best_uses_jbuf;
+   
+  loose_scan_opt.save_to_position(s, loose_scan_pos);
 
   if (!best_key &&
       idx == join->const_tables &&
@@ -4882,7 +5649,7 @@ best_access_path(JOIN      *join,
     TRUE        Fatal error
 */
 
-static bool
+bool
 choose_plan(JOIN *join, table_map join_tables)
 {
   uint search_depth= join->thd->variables.optimizer_search_depth;
@@ -4891,19 +5658,34 @@ choose_plan(JOIN *join, table_map join_tables)
   DBUG_ENTER("choose_plan");
 
   join->cur_embedding_map= 0;
+  join->cur_dups_producing_tables= 0;
   reset_nj_counters(join, join->join_list);
-  /*
-    if (SELECT_STRAIGHT_JOIN option is set)
-      reorder tables so dependent tables come after tables they depend 
-      on, otherwise keep tables in the order they were specified in the query 
-    else
-      Apply heuristic: pre-sort all access plans with respect to the number of
-      records accessed.
-  */
-  my_qsort(join->best_ref + join->const_tables,
-           join->tables - join->const_tables, sizeof(JOIN_TAB*),
-           straight_join ? join_tab_cmp_straight : join_tab_cmp);
-  
+  qsort2_cmp jtab_sort_func;
+
+  if (join->emb_sjm_nest)
+  {
+    /* We're optimizing semi-join materialization nest, so put the 
+       tables from this semi-join as first
+    */
+    jtab_sort_func= join_tab_cmp_embedded_first;
+  }
+  else
+  {
+    /*
+      if (SELECT_STRAIGHT_JOIN option is set)
+        reorder tables so dependent tables come after tables they depend 
+        on, otherwise keep tables in the order they were specified in the query 
+      else
+        Apply heuristic: pre-sort all access plans with respect to the number of
+        records accessed.
+    */
+    jtab_sort_func= straight_join ? join_tab_cmp_straight : join_tab_cmp;
+  }
+  my_qsort2(join->best_ref + join->const_tables,
+            join->table_count - join->const_tables, sizeof(JOIN_TAB*),
+            jtab_sort_func, (void*)join->emb_sjm_nest);
+  join->cur_sj_inner_tables= 0;
+
   if (straight_join)
   {
     optimize_straight_join(join, join_tables);
@@ -4967,7 +5749,7 @@ choose_plan(JOIN *join, table_map join_tables)
 */
 
 static int
-join_tab_cmp(const void* ptr1, const void* ptr2)
+join_tab_cmp(const void *dummy, const void* ptr1, const void* ptr2)
 {
   JOIN_TAB *jt1= *(JOIN_TAB**) ptr1;
   JOIN_TAB *jt2= *(JOIN_TAB**) ptr2;
@@ -4989,11 +5771,19 @@ join_tab_cmp(const void* ptr1, const void* ptr2)
 */
 
 static int
-join_tab_cmp_straight(const void* ptr1, const void* ptr2)
+join_tab_cmp_straight(const void *dummy, const void* ptr1, const void* ptr2)
 {
   JOIN_TAB *jt1= *(JOIN_TAB**) ptr1;
   JOIN_TAB *jt2= *(JOIN_TAB**) ptr2;
 
+  /*
+    We don't do subquery flattening if the parent or child select has
+    STRAIGHT_JOIN modifier. It is complicated to implement and the semantics
+    is hardly useful.
+  */
+  DBUG_ASSERT(!jt1->emb_sj_nest);
+  DBUG_ASSERT(!jt2->emb_sj_nest);
+
   if (jt1->dependent & jt2->table->map)
     return 1;
   if (jt2->dependent & jt1->table->map)
@@ -5001,6 +5791,38 @@ join_tab_cmp_straight(const void* ptr1, const void* ptr2)
   return jt1 > jt2 ? 1 : (jt1 < jt2 ? -1 : 0);
 }
 
+
+/*
+  Same as join_tab_cmp but tables from within the given semi-join nest go 
+  first. Used when the optimizing semi-join materialization nests.
+*/
+
+static int
+join_tab_cmp_embedded_first(const void *emb,  const void* ptr1, const void* ptr2)
+{
+  const TABLE_LIST *emb_nest= (TABLE_LIST*) emb;
+  JOIN_TAB *jt1= *(JOIN_TAB**) ptr1;
+  JOIN_TAB *jt2= *(JOIN_TAB**) ptr2;
+
+  if (jt1->emb_sj_nest == emb_nest && jt2->emb_sj_nest != emb_nest)
+    return -1;
+  if (jt1->emb_sj_nest != emb_nest && jt2->emb_sj_nest == emb_nest)
+    return 1;
+
+  if (jt1->dependent & jt2->table->map)
+    return 1;
+  if (jt2->dependent & jt1->table->map)
+    return -1;
+
+  if (jt1->found_records > jt2->found_records)
+    return 1;
+  if (jt1->found_records < jt2->found_records)
+    return -1; 
+  
+  return jt1 > jt2 ? 1 : (jt1 < jt2 ? -1 : 0);
+}
+
+
 /**
   Heuristic procedure to automatically guess a reasonable degree of
   exhaustiveness for the greedy search procedure.
@@ -5036,7 +5858,7 @@ join_tab_cmp_straight(const void* ptr1, const void* ptr2)
 static uint
 determine_search_depth(JOIN *join)
 {
-  uint table_count=  join->tables - join->const_tables;
+  uint table_count=  join->table_count - join->const_tables;
   uint search_depth;
   /* TODO: this value should be determined dynamically, based on statistics: */
   uint max_tables_for_exhaustive_opt= 7;
@@ -5082,28 +5904,35 @@ optimize_straight_join(JOIN *join, table_map join_tables)
 {
   JOIN_TAB *s;
   uint idx= join->const_tables;
+  bool disable_jbuf= join->thd->variables.join_cache_level == 0;
   double    record_count= 1.0;
   double    read_time=    0.0;
- 
+  POSITION  loose_scan_pos;
+
   for (JOIN_TAB **pos= join->best_ref + idx ; (s= *pos) ; pos++)
   {
     /* Find the best access method from 's' to the current partial plan */
-    best_access_path(join, s, join->thd, join_tables, idx,
-                     record_count, read_time);
+    best_access_path(join, s, join_tables, idx, disable_jbuf, record_count,
+                     join->positions + idx, &loose_scan_pos);
+
     /* compute the cost of the new plan extended with 's' */
     record_count*= join->positions[idx].records_read;
-    read_time+=    join->positions[idx].read_time;
+    read_time+= join->positions[idx].read_time +
+                record_count / (double) TIME_FOR_COMPARE;
+    advance_sj_state(join, join_tables, idx, &record_count, &read_time,
+                     &loose_scan_pos);
+
     join_tables&= ~(s->table->map);
     ++idx;
   }
 
-  read_time+= record_count / (double) TIME_FOR_COMPARE;
   if (join->sort_by_table &&
       join->sort_by_table != join->positions[join->const_tables].table->table)
     read_time+= record_count;  // We have to make a temp table
   memcpy((uchar*) join->best_positions, (uchar*) join->positions,
          sizeof(POSITION)*idx);
-  join->best_read= read_time;
+  join->record_count= record_count;
+  join->best_read= read_time - 0.001;
 }
 
 
@@ -5201,11 +6030,17 @@ greedy_search(JOIN      *join,
   uint      size_remain;    // cardinality of remaining_tables
   POSITION  best_pos;
   JOIN_TAB  *best_table; // the next plan node to be added to the curr QEP
+  uint      n_tables; // ==join->tables or # tables in the sj-mat nest we're optimizing
 
   DBUG_ENTER("greedy_search");
 
   /* number of tables that remain to be optimized */
-  size_remain= my_count_bits(remaining_tables);
+  n_tables= size_remain= my_count_bits(remaining_tables &
+                                       (join->emb_sjm_nest? 
+                                         (join->emb_sjm_nest->sj_inner_tables &
+                                          ~join->const_table_map)
+                                         :
+                                         ~(table_map)0));
 
   do {
     /* Find the extension of the current QEP with the lowest cost */
@@ -5225,7 +6060,7 @@ greedy_search(JOIN      *join,
         'join->best_positions' contains a complete optimal extension of the
         current partial QEP.
       */
-      DBUG_EXECUTE("opt", print_plan(join, join->tables,
+      DBUG_EXECUTE("opt", print_plan(join, n_tables,
                                      record_count, read_time, read_time,
                                      "optimal"););
       DBUG_RETURN(FALSE);
@@ -5248,11 +6083,12 @@ greedy_search(JOIN      *join,
       the interleaving state to the one of the non-extended partial plan 
       on exit.
     */
-    IF_DBUG(bool is_interleave_error= )
-    check_interleaving_with_nj (best_table);
+    bool is_interleave_error __attribute__((unused))= 
+      check_interleaving_with_nj(best_table);
     /* This has been already checked by best_extension_by_limited_search */
     DBUG_ASSERT(!is_interleave_error);
 
+
     /* find the position of 'best_table' in 'join->best_ref' */
     best_idx= idx;
     JOIN_TAB *pos= join->best_ref[best_idx];
@@ -5264,7 +6100,8 @@ greedy_search(JOIN      *join,
 
     /* compute the cost of the new plan extended with 'best_table' */
     record_count*= join->positions[idx].records_read;
-    read_time+=    join->positions[idx].read_time;
+    read_time+= join->positions[idx].read_time + 
+                record_count / (double) TIME_FOR_COMPARE;
 
     remaining_tables&= ~(best_table->table->map);
     --size_remain;
@@ -5278,6 +6115,151 @@ greedy_search(JOIN      *join,
 
 
 /**
+  Get cost of execution and fanout produced by selected tables in the join
+  prefix (where prefix is defined as prefix in depth-first traversal)
+ 
+  @param end_tab_idx               The number of last tab to be taken into
+                                   account (in depth-first traversal prefix)
+  @param filter_map                Bitmap of tables whose cost/fanout are to 
+                                   be taken into account.
+  @param read_time_arg     [out]   store read time here 
+  @param record_count_arg  [out]   store record count here
+
+  @note
+
+  @returns
+    read_time_arg and record_count_arg contain the computed cost and fanout
+*/
+
+void JOIN::get_partial_cost_and_fanout(int end_tab_idx,
+                                       table_map filter_map,
+                                       double *read_time_arg, 
+                                       double *record_count_arg)
+{
+  double record_count= 1;
+  double read_time= 0.0;
+  double sj_inner_fanout= 1.0;
+  JOIN_TAB *end_tab= NULL;
+  JOIN_TAB *tab;
+  int i;
+  int last_sj_table= MAX_TABLES;
+
+  /* 
+    Handle a special case where the join is degenerate, and produces no
+    records
+  */
+  if (table_count == const_tables)
+  {
+    *read_time_arg= 0.0;
+    /*
+      We return 1, because 
+       - it is the pessimistic estimate (there might be grouping)
+       - it's safer, as we're less likely to hit the edge cases in
+         calculations.
+    */
+    *record_count_arg=1.0;
+    return;
+  }
+
+  for (tab= first_depth_first_tab(this), i= const_tables;
+       tab;
+       tab= next_depth_first_tab(this, tab), i++)
+  {
+    end_tab= tab;
+    if (i == end_tab_idx)
+      break;
+  }
+
+  for (tab= first_depth_first_tab(this), i= const_tables;
+       ;
+       tab= next_depth_first_tab(this, tab), i++)
+  {
+    if (end_tab->bush_root_tab && end_tab->bush_root_tab == tab)
+    {
+      /* 
+        We've entered the SJM nest that contains the end_tab. The caller is
+        - interested in fanout inside the nest (because that's how many times 
+          we'll invoke the attached WHERE conditions)
+        - not interested in cost
+      */
+      record_count= 1.0;
+      read_time= 0.0;
+    }
+    
+    /* 
+      Ignore fanout (but not cost) from sj-inner tables, as long as 
+      the range that processes them finishes before the end_tab
+    */
+    if (tab->sj_strategy != SJ_OPT_NONE)
+    {
+      sj_inner_fanout= 1.0;
+      last_sj_table= i + tab->n_sj_tables;
+    }
+    
+    table_map cur_table_map;
+    if (tab->table)
+      cur_table_map= tab->table->map;
+    else
+    {
+      /* This is a SJ-Materialization nest. Check all of its tables */
+      TABLE *first_child= tab->bush_children->start->table;
+      TABLE_LIST *sjm_nest= first_child->pos_in_table_list->embedding;
+      cur_table_map= sjm_nest->nested_join->used_tables;
+    }
+    if (tab->records_read && (cur_table_map & filter_map))
+    {
+      record_count *= tab->records_read;
+      read_time += tab->read_time + record_count / (double) TIME_FOR_COMPARE;
+      if (tab->emb_sj_nest)
+        sj_inner_fanout *= tab->records_read;
+    }
+
+    if (i == last_sj_table)
+    {
+      record_count /= sj_inner_fanout;
+      sj_inner_fanout= 1.0;
+      last_sj_table= MAX_TABLES;
+    }
+
+    if (tab == end_tab)
+      break;
+  }
+  *read_time_arg= read_time;// + record_count / TIME_FOR_COMPARE;
+  *record_count_arg= record_count;
+}
+
+
+/*
+  Get prefix cost and fanout. This function is different from
+  get_partial_cost_and_fanout:
+   - it operates on a JOIN that haven't yet finished its optimization phase (in
+     particular, fix_semijoin_strategies_for_picked_join_order() and
+     get_best_combination() haven't been called)
+   - it assumes the the join prefix doesn't have any semi-join plans
+
+  These assumptions are met by the caller of the function.
+*/
+
+void JOIN::get_prefix_cost_and_fanout(uint n_tables, 
+                                      double *read_time_arg,
+                                      double *record_count_arg)
+{
+  double record_count= 1;
+  double read_time= 0.0;
+  for (uint i= const_tables; i < n_tables + const_tables ; i++)
+  {
+    if (best_positions[i].records_read)
+    {
+      record_count *= best_positions[i].records_read;
+      read_time += best_positions[i].read_time;
+    }
+  }
+  *read_time_arg= read_time;// + record_count / TIME_FOR_COMPARE;
+  *record_count_arg= record_count;
+}
+
+
+/**
   Find a good, possibly optimal, query execution plan (QEP) by a possibly
   exhaustive search.
 
@@ -5419,38 +6401,54 @@ best_extension_by_limited_search(JOIN      *join,
   JOIN_TAB *s;
   double best_record_count= DBL_MAX;
   double best_read_time=    DBL_MAX;
+  bool disable_jbuf= join->thd->variables.join_cache_level == 0;
 
   DBUG_EXECUTE("opt", print_plan(join, idx, record_count, read_time, read_time,
                                 "part_plan"););
 
+  /* 
+    If we are searching for the execution plan of a materialized semi-join nest
+    then allowed_tables contains bits only for the tables from this nest.
+  */
+  table_map allowed_tables= ~(table_map)0;
+  if (join->emb_sjm_nest)
+    allowed_tables= join->emb_sjm_nest->sj_inner_tables & ~join->const_table_map;
+
   for (JOIN_TAB **pos= join->best_ref + idx ; (s= *pos) ; pos++)
   {
     table_map real_table_bit= s->table->map;
     if ((remaining_tables & real_table_bit) && 
+        (allowed_tables & real_table_bit) &&
         !(remaining_tables & s->dependent) && 
         (!idx || !check_interleaving_with_nj(s)))
     {
       double current_record_count, current_read_time;
+      POSITION *position= join->positions + idx;
 
       /* Find the best access method from 's' to the current partial plan */
-      best_access_path(join, s, thd, remaining_tables, idx,
-                       record_count, read_time);
+      POSITION loose_scan_pos;
+      best_access_path(join, s, remaining_tables, idx, disable_jbuf,
+                       record_count, join->positions + idx, &loose_scan_pos);
+
       /* Compute the cost of extending the plan with 's' */
-      current_record_count= record_count * join->positions[idx].records_read;
-      current_read_time=    read_time + join->positions[idx].read_time;
+
+      current_record_count= record_count * position->records_read;
+      current_read_time=read_time + position->read_time +
+                        current_record_count / (double) TIME_FOR_COMPARE;
+
+      advance_sj_state(join, remaining_tables, idx, &current_record_count,
+                       &current_read_time, &loose_scan_pos);
 
       /* Expand only partial plans with lower cost than the best QEP so far */
-      if ((current_read_time +
-           current_record_count / (double) TIME_FOR_COMPARE) >= join->best_read)
+      if (current_read_time >= join->best_read)
       {
         DBUG_EXECUTE("opt", print_plan(join, idx+1,
                                        current_record_count,
                                        read_time,
-                                       (current_read_time +
-                                        current_record_count / 
-                                        (double) TIME_FOR_COMPARE),
+                                       current_read_time,
                                        "prune_by_cost"););
         restore_prev_nj_state(s);
+        restore_prev_sj_state(remaining_tables, s, idx);
         continue;
       }
 
@@ -5463,12 +6461,12 @@ best_extension_by_limited_search(JOIN      *join,
         if (best_record_count > current_record_count ||
             best_read_time > current_read_time ||
             (idx == join->const_tables &&  // 's' is the first table in the QEP
-             s->table == join->sort_by_table))
+            s->table == join->sort_by_table))
         {
           if (best_record_count >= current_record_count &&
               best_read_time >= current_read_time &&
               /* TODO: What is the reasoning behind this condition? */
-              (!(s->key_dependent & remaining_tables) ||
+              (!(s->key_dependent & allowed_tables & remaining_tables) ||
                join->positions[idx].records_read < 2.0))
           {
             best_record_count= current_record_count;
@@ -5483,11 +6481,12 @@ best_extension_by_limited_search(JOIN      *join,
                                          current_read_time,
                                          "pruned_by_heuristic"););
           restore_prev_nj_state(s);
+          restore_prev_sj_state(remaining_tables, s, idx);
           continue;
         }
       }
 
-      if ( (search_depth > 1) && (remaining_tables & ~real_table_bit) )
+      if ( (search_depth > 1) && (remaining_tables & ~real_table_bit) & allowed_tables )
       { /* Recursively expand the current partial plan */
         swap_variables(JOIN_TAB*, join->best_ref[idx], *pos);
         if (best_extension_by_limited_search(join,
@@ -5505,16 +6504,16 @@ best_extension_by_limited_search(JOIN      *join,
           'join' is either the best partial QEP with 'search_depth' relations,
           or the best complete QEP so far, whichever is smaller.
         */
-        current_read_time+= current_record_count / (double) TIME_FOR_COMPARE;
         if (join->sort_by_table &&
             join->sort_by_table !=
             join->positions[join->const_tables].table->table)
           /* We have to make a temp table */
           current_read_time+= current_record_count;
-        if ((search_depth == 1) || (current_read_time < join->best_read))
+        if (current_read_time < join->best_read)
         {
           memcpy((uchar*) join->best_positions, (uchar*) join->positions,
                  sizeof(POSITION) * (idx + 1));
+          join->record_count= current_record_count;
           join->best_read= current_read_time - 0.001;
         }
         DBUG_EXECUTE("opt", print_plan(join, idx+1,
@@ -5524,6 +6523,7 @@ best_extension_by_limited_search(JOIN      *join,
                                        "full_plan"););
       }
       restore_prev_nj_state(s);
+      restore_prev_sj_state(remaining_tables, s, idx);
     }
   }
   DBUG_RETURN(FALSE);
@@ -5570,6 +6570,7 @@ find_best(JOIN *join,table_map rest_tables,uint idx,double record_count,
 
   JOIN_TAB *s;
   double best_record_count=DBL_MAX,best_read_time=DBL_MAX;
+  bool disable_jbuf= join->thd->variables.join_cache_level == 0;
   for (JOIN_TAB **pos=join->best_ref+idx ; (s=*pos) ; pos++)
   {
     table_map real_table_bit=s->table->map;
@@ -5577,8 +6578,9 @@ find_best(JOIN *join,table_map rest_tables,uint idx,double record_count,
         (!idx|| !check_interleaving_with_nj(s)))
     {
       double records, best;
-      best_access_path(join, s, thd, rest_tables, idx, record_count, 
-                       read_time);
+      POSITION loose_scan_pos;
+      best_access_path(join, s, rest_tables, idx, disable_jbuf, record_count, 
+                       join->positions + idx, &loose_scan_pos);
       records= join->positions[idx].records_read;
       best= join->positions[idx].read_time;
       /*
@@ -5587,6 +6589,9 @@ find_best(JOIN *join,table_map rest_tables,uint idx,double record_count,
       */
       double current_record_count=record_count*records;
       double current_read_time=read_time+best;
+      advance_sj_state(join, rest_tables, idx, &current_record_count, 
+                       &current_read_time, &loose_scan_pos);
+
       if (best_record_count > current_record_count ||
 	  best_read_time > current_read_time ||
 	  (idx == join->const_tables && s->table == join->sort_by_table))
@@ -5605,6 +6610,7 @@ find_best(JOIN *join,table_map rest_tables,uint idx,double record_count,
 	swap_variables(JOIN_TAB*, join->best_ref[idx], *pos);
       }
       restore_prev_nj_state(s);
+      restore_prev_sj_state(rest_tables, s, idx);
       if (join->select_options & SELECT_STRAIGHT_JOIN)
 	break;				// Don't test all combinations
     }
@@ -5617,14 +6623,16 @@ find_best(JOIN *join,table_map rest_tables,uint idx,double record_count,
   Find how much space the prevous read not const tables takes in cache.
 */
 
-static void calc_used_field_length(THD *thd, JOIN_TAB *join_tab)
+void JOIN_TAB::calc_used_field_length(bool max_fl)
 {
-  uint null_fields,blobs,fields,rec_length;
+  uint null_fields,blobs,fields;
+  ulong rec_length;
   Field **f_ptr,*field;
-  MY_BITMAP *read_set= join_tab->table->read_set;
+  uint uneven_bit_fields;
+  MY_BITMAP *read_set= table->read_set;
 
-  null_fields= blobs= fields= rec_length=0;
-  for (f_ptr=join_tab->table->field ; (field= *f_ptr) ; f_ptr++)
+  uneven_bit_fields= null_fields= blobs= fields= rec_length=0;
+  for (f_ptr=table->field ; (field= *f_ptr) ; f_ptr++)
   {
     if (bitmap_is_set(read_set, field->field_index))
     {
@@ -5635,21 +6643,118 @@ static void calc_used_field_length(THD *thd, JOIN_TAB *join_tab)
 	blobs++;
       if (!(flags & NOT_NULL_FLAG))
 	null_fields++;
+      if (field->type() == MYSQL_TYPE_BIT &&
+          ((Field_bit*)field)->bit_len)
+        uneven_bit_fields++;
     }
   }
-  if (null_fields)
-    rec_length+=(join_tab->table->s->null_fields+7)/8;
-  if (join_tab->table->maybe_null)
+  if (null_fields || uneven_bit_fields)
+    rec_length+=(table->s->null_fields+7)/8;
+  if (table->maybe_null)
     rec_length+=sizeof(my_bool);
-  if (blobs)
+
+  /* Take into account that DuplicateElimination may need to store rowid */
+  uint rowid_add_size= 0;
+  if (keep_current_rowid)
+  {
+    rowid_add_size= table->file->ref_length; 
+    rec_length += rowid_add_size;
+    fields++;
+  }
+
+  if (max_fl)
+  {
+    // TODO: to improve this estimate for max expected length 
+    if (blobs)
+    {
+      ulong blob_length= table->file->stats.mean_rec_length;
+      if (ULONG_MAX - rec_length > blob_length)
+        rec_length+=  blob_length;
+      else
+        rec_length= ULONG_MAX;
+    }
+    max_used_fieldlength= rec_length;
+  } 
+  else if (table->file->stats.mean_rec_length)
+    set_if_smaller(rec_length, table->file->stats.mean_rec_length + rowid_add_size);
+      
+  used_fields=fields;
+  used_fieldlength=rec_length;
+  used_blobs=blobs;
+  used_null_fields= null_fields;
+  used_uneven_bit_fields= uneven_bit_fields;
+}
+
+
+/* 
+  @brief
+  Extract pushdown conditions for a table scan
+
+  @details
+  This functions extracts pushdown conditions usable when this table is scanned.
+  The conditions are extracted either from WHERE or from ON expressions.
+  The conditions are attached to the field cache_select of this table.
+
+  @note 
+  Currently the extracted conditions are used only by BNL and BNLH join.
+  algorithms.
+ 
+  @retval  0   on success
+           1   otherwise
+*/ 
+
+int JOIN_TAB::make_scan_filter()
+{
+  COND *tmp;
+  DBUG_ENTER("make_scan_filter");
+
+  Item *cond= is_inner_table_of_outer_join() ?
+                *get_first_inner_table()->on_expr_ref : join->conds;
+  
+  if (cond &&
+      (tmp= make_cond_for_table(join->thd, cond,
+                               join->const_table_map | table->map,
+			       table->map, -1, FALSE, TRUE)))
+  {
+     DBUG_EXECUTE("where",print_where(tmp,"cache", QT_ORDINARY););
+     if (!(cache_select=
+          (SQL_SELECT*) join->thd->memdup((uchar*) select, sizeof(SQL_SELECT))))
+	DBUG_RETURN(1);
+     cache_select->cond= tmp;
+     cache_select->read_tables=join->const_table_map;
+  }
+  DBUG_RETURN(0);
+}
+
+
+/**
+  @brief
+  Check whether hash join algorithm can be used to join this table   
+
+  @details
+  This function finds out whether the ref items that have been chosen
+  by the planner to access this table can be used for hash join algorithms.
+  The answer depends on a certain property of the the fields of the
+  joined tables on which the hash join key is built.
+  
+  @note
+  At present the function is supposed to be called only after the function
+  get_best_combination has been called.
+
+  @retval TRUE    it's possible to use hash join to join this table
+  @retval FALSE   otherwise
+*/
+
+bool JOIN_TAB::hash_join_is_possible()
+{
+  if (type != JT_REF && type != JT_EQ_REF)
+    return FALSE;
+  if (!is_ref_for_hash_join())
   {
-    uint blob_length=(uint) (join_tab->table->file->stats.mean_rec_length-
-			     (join_tab->table->s->reclength- rec_length));
-    rec_length+=(uint) max(4,blob_length);
+    KEY *keyinfo= table->key_info + ref.key;
+    return keyinfo->key_part[0].field->hash_join_is_possible();
   }
-  join_tab->used_fields=fields;
-  join_tab->used_fieldlength=rec_length;
-  join_tab->used_blobs=blobs;
+  return TRUE;
 }
 
 
@@ -5658,16 +6763,13 @@ cache_record_length(JOIN *join,uint idx)
 {
   uint length=0;
   JOIN_TAB **pos,**end;
-  THD *thd=join->thd;
 
   for (pos=join->best_ref+join->const_tables,end=join->best_ref+idx ;
        pos != end ;
        pos++)
   {
     JOIN_TAB *join_tab= *pos;
-    if (!join_tab->used_fieldlength)		/* Not calced yet */
-      calc_used_field_length(thd, join_tab);
-    length+=join_tab->used_fieldlength;
+    length+= join_tab->get_used_fieldlength();
   }
   return length;
 }
@@ -5724,12 +6826,12 @@ cache_record_length(JOIN *join,uint idx)
     Expected number of row combinations
 */
 
-static double
-prev_record_reads(JOIN *join, uint idx, table_map found_ref)
+double
+prev_record_reads(POSITION *positions, uint idx, table_map found_ref)
 {
   double found=1.0;
-  POSITION *pos_end= join->positions - 1;
-  for (POSITION *pos= join->positions + idx - 1; pos != pos_end; pos--)
+  POSITION *pos_end= positions - 1;
+  for (POSITION *pos= positions + idx - 1; pos != pos_end; pos--)
   {
     if (pos->table->table->map & found_ref)
     {
@@ -5758,14 +6860,239 @@ prev_record_reads(JOIN *join, uint idx, table_map found_ref)
 }
 
 
-/**
-  Set up join struct according to best position.
+/*
+  Enumerate join tabs in breadth-first fashion, including const tables.
 */
 
-static bool
+JOIN_TAB *first_breadth_first_tab(JOIN *join)
+{
+  return join->join_tab; /* There's always one (i.e. first) table */
+}
+
+
+JOIN_TAB *next_breadth_first_tab(JOIN *join, JOIN_TAB *tab)
+{
+  if (!tab->bush_root_tab)
+  {
+    /* We're at top level. Get the next top-level tab */
+    tab++;
+    if (tab < join->join_tab + join->top_join_tab_count)
+      return tab;
+
+    /* No more top-level tabs. Switch to enumerating SJM nest children */
+    tab= join->join_tab;
+  }
+  else
+  {
+    /* We're inside of an SJM nest */
+    if (!tab->last_leaf_in_bush)
+    {
+      /* There's one more table in the nest, return it. */
+      return ++tab;
+    }
+    else
+    {
+      /* 
+        There are no more tables in this nest. Get out of it and then we'll
+        proceed to the next nest.
+      */
+      tab= tab->bush_root_tab + 1;
+    }
+  }
+   
+  /* 
+    Ok, "tab" points to a top-level table, and we need to find the next SJM
+    nest and enter it.
+  */
+  for (; tab < join->join_tab + join->top_join_tab_count; tab++)
+  {
+    if (tab->bush_children)
+      return tab->bush_children->start;
+  }
+  return NULL;
+}
+
+
+JOIN_TAB *first_top_level_tab(JOIN *join, enum enum_with_const_tables with_const)
+{
+  JOIN_TAB *tab= join->join_tab;
+  if (with_const == WITH_CONST_TABLES)
+  {
+    if (join->const_tables == join->table_count)
+      return NULL;
+    tab += join->const_tables;
+  }
+  return tab;
+}
+
+
+JOIN_TAB *next_top_level_tab(JOIN *join, JOIN_TAB *tab)
+{
+  tab= next_breadth_first_tab(join, tab);
+  if (tab && tab->bush_root_tab)
+    tab= NULL;
+  return tab;
+}
+
+
+JOIN_TAB *first_linear_tab(JOIN *join, enum enum_with_const_tables const_tbls)
+{
+  JOIN_TAB *first= join->join_tab;
+  if (const_tbls == WITHOUT_CONST_TABLES)
+    first+= join->const_tables;
+  if (first < join->join_tab + join->top_join_tab_count)
+    return first;
+  return NULL; /* All tables were const tables */
+}
+
+
+/*
+  A helper function to loop over all join's join_tab in sequential fashion
+
+  DESCRIPTION
+    Depending on include_bush_roots parameter, JOIN_TABs that represent
+    SJM-scan/lookups are either returned or omitted.
+
+    SJM-Bush children are returned right after (or in place of) their container
+    join tab (TODO: does anybody depend on this? A: make_join_readinfo() seems
+    to)
+
+    For example, if we have this structure:
+      
+       ot1--ot2--sjm1----------------ot3-...
+                  |
+                  +--it1--it2--it3
+
+    calls to next_linear_tab( include_bush_roots=TRUE) will return:
+      
+      ot1 ot2 sjm1 it1 it2 it3 ot3 ...
+   
+   while calls to next_linear_tab( include_bush_roots=FALSE) will return:
+
+      ot1 ot2 it1 it2 it3 ot3 ...
+
+   (note that sjm1 won't be returned).
+*/
+
+JOIN_TAB *next_linear_tab(JOIN* join, JOIN_TAB* tab, 
+                          enum enum_with_bush_roots include_bush_roots)
+{
+  if (include_bush_roots == WITH_BUSH_ROOTS && tab->bush_children)
+  {
+    /* This JOIN_TAB is a SJM nest; Start from first table in nest */
+    return tab->bush_children->start;
+  }
+
+  DBUG_ASSERT(!tab->last_leaf_in_bush || tab->bush_root_tab);
+
+  if (tab->bush_root_tab)       /* Are we inside an SJM nest */
+  {
+    /* Inside SJM nest */
+    if (!tab->last_leaf_in_bush)
+      return tab+1;              /* Return next in nest */
+    /* Continue from the sjm on the top level */
+    tab= tab->bush_root_tab;
+  }
+
+  /* If no more JOIN_TAB's on the top level */
+  if (++tab == join->join_tab + join->top_join_tab_count)
+    return NULL;
+
+  if (include_bush_roots == WITHOUT_BUSH_ROOTS && tab->bush_children)
+  {
+    /* This JOIN_TAB is a SJM nest; Start from first table in nest */
+    tab= tab->bush_children->start;
+  }
+  return tab;
+}
+
+
+/*
+  Start to iterate over all join tables in bush-children-first order, excluding 
+  the const tables (see next_depth_first_tab() comment for details)
+*/
+
+JOIN_TAB *first_depth_first_tab(JOIN* join)
+{
+  JOIN_TAB* tab;
+  /* This means we're starting the enumeration */
+  if (join->const_tables == join->top_join_tab_count)
+    return NULL;
+
+  tab= join->join_tab + join->const_tables;
+
+  return (tab->bush_children) ? tab->bush_children->start : tab;
+}
+
+
+/*
+  A helper function to iterate over all join tables in bush-children-first order
+
+  DESCRIPTION
+   
+  For example, for this join plan
+
+    ot1--ot2--sjm1------------ot3-...
+               |
+               |
+              it1--it2--it3 
+  
+  call to first_depth_first_tab() will return ot1, and subsequent calls to
+  next_depth_first_tab() will return:
+
+     ot2 it1 it2 it3 sjm ot3 ...
+*/
+
+JOIN_TAB *next_depth_first_tab(JOIN* join, JOIN_TAB* tab)
+{
+  /* If we're inside SJM nest and have reached its end, get out */
+  if (tab->last_leaf_in_bush)
+    return tab->bush_root_tab;
+  
+  /* Move to next tab in the array we're traversing */
+  tab++;
+  
+  if (tab == join->join_tab +join->top_join_tab_count)
+    return NULL; /* Outside SJM nest and reached EOF */
+
+  if (tab->bush_children)
+    return tab->bush_children->start;
+
+  return tab;
+}
+
+
+static Item * const null_ptr= NULL;
+
+/*
+  Set up join struct according to the picked join order in
+  
+  SYNOPSIS
+    get_best_combination()
+      join  The join to process (the picked join order is mainly in
+            join->best_positions)
+
+  DESCRIPTION
+    Setup join structures according the picked join order
+    - finalize semi-join strategy choices (see
+        fix_semijoin_strategies_for_picked_join_order)
+    - create join->join_tab array and put there the JOIN_TABs in the join order
+    - create data structures describing ref access methods.
+
+  NOTE
+    In this function we switch from pre-join-optimization JOIN_TABs to
+    post-join-optimization JOIN_TABs. This is achieved by copying the entire
+    JOIN_TAB objects.
+ 
+  RETURN 
+    FALSE  OK
+    TRUE   Out of memory
+*/
+
+bool
 get_best_combination(JOIN *join)
 {
-  uint i,tablenr;
+  uint tablenr;
   table_map used_tables;
   JOIN_TAB *join_tab,*j;
   KEYUSE *keyuse;
@@ -5773,18 +7100,76 @@ get_best_combination(JOIN *join)
   THD *thd=join->thd;
   DBUG_ENTER("get_best_combination");
 
-  table_count=join->tables;
+  table_count=join->table_count;
   if (!(join->join_tab=join_tab=
 	(JOIN_TAB*) thd->alloc(sizeof(JOIN_TAB)*table_count)))
     DBUG_RETURN(TRUE);
 
   join->full_join=0;
+  join->hash_join= FALSE;
 
   used_tables= OUTER_REF_TABLE_BIT;		// Outer row is already read
+
+  fix_semijoin_strategies_for_picked_join_order(join);
+  
+  JOIN_TAB_RANGE *root_range;
+  if (!(root_range= new JOIN_TAB_RANGE))
+    DBUG_RETURN(TRUE);
+  root_range->start= join->join_tab;
+  /* root_range->end will be set later */
+  join->join_tab_ranges.empty();
+
+  if (join->join_tab_ranges.push_back(root_range))
+    DBUG_RETURN(TRUE);
+
+  JOIN_TAB *sjm_nest_end= NULL;
+  JOIN_TAB *sjm_nest_root= NULL;
+
   for (j=join_tab, tablenr=0 ; tablenr < table_count ; tablenr++,j++)
   {
     TABLE *form;
+    POSITION *cur_pos= &join->best_positions[tablenr];
+    if (cur_pos->sj_strategy == SJ_OPT_MATERIALIZE || 
+        cur_pos->sj_strategy == SJ_OPT_MATERIALIZE_SCAN)
+    {
+      /*
+        Ok, we've entered an SJ-Materialization semi-join (note that this can't
+        be done recursively, semi-joins are not allowed to be nested).
+        1. Put into main join order a JOIN_TAB that represents a lookup or scan
+           in the temptable.
+      */
+      bzero(j, sizeof(JOIN_TAB));
+      j->join= join;
+      j->table= NULL; //temporary way to tell SJM tables from others.
+      j->ref.key = -1;
+      j->on_expr_ref= (Item**) &null_ptr;
+      j->keys= key_map(1); /* The unique index is always in 'possible keys' in EXPLAIN */
+
+      /*
+        2. Proceed with processing SJM nest's join tabs, putting them into the
+           sub-order
+      */
+      SJ_MATERIALIZATION_INFO *sjm= cur_pos->table->emb_sj_nest->sj_mat_info;
+      j->records= j->records_read= (ha_rows)(sjm->is_sj_scan? sjm->rows : 1);
+      JOIN_TAB *jt;
+      JOIN_TAB_RANGE *jt_range;
+      if (!(jt= (JOIN_TAB*)join->thd->alloc(sizeof(JOIN_TAB)*sjm->tables)) ||
+          !(jt_range= new JOIN_TAB_RANGE))
+        DBUG_RETURN(TRUE);
+      jt_range->start= jt;
+      jt_range->end= jt + sjm->tables;
+      join->join_tab_ranges.push_back(jt_range);
+      j->bush_children= jt_range;
+      sjm_nest_end= jt + sjm->tables;
+      sjm_nest_root= j;
+
+      j= jt;
+    }
+    
     *j= *join->best_positions[tablenr].table;
+
+    j->bush_root_tab= sjm_nest_root;
+
     form=join->table[tablenr]=j->table;
     used_tables|= form->map;
     form->reginfo.join_tab=j;
@@ -5792,45 +7177,228 @@ get_best_combination(JOIN *join)
       form->reginfo.not_exists_optimize=0;	// Only with LEFT JOIN
     DBUG_PRINT("info",("type: %d", j->type));
     if (j->type == JT_CONST)
-      continue;					// Handled in make_join_stat..
+      goto loop_end;					// Handled in make_join_stat..
 
+    j->loosescan_match_tab= NULL;  //non-nulls will be set later
+    j->inside_loosescan_range= FALSE;
     j->ref.key = -1;
     j->ref.key_parts=0;
 
     if (j->type == JT_SYSTEM)
-      continue;
-    if (j->keys.is_clear_all() || !(keyuse= join->best_positions[tablenr].key))
+      goto loop_end;
+    if ( !(keyuse= join->best_positions[tablenr].key))
     {
       j->type=JT_ALL;
       if (tablenr != join->const_tables)
 	join->full_join=1;
     }
-    else if (create_ref_for_key(join, j, keyuse, used_tables))
+
+    /*if (join->best_positions[tablenr].sj_strategy == SJ_OPT_LOOSE_SCAN)
+    {
+      DBUG_ASSERT(!keyuse || keyuse->key ==
+                             join->best_positions[tablenr].loosescan_picker.loosescan_key);
+      j->index= join->best_positions[tablenr].loosescan_picker.loosescan_key;
+    }*/
+    
+    if (keyuse && create_ref_for_key(join, j, keyuse, TRUE, used_tables))
       DBUG_RETURN(TRUE);                        // Something went wrong
+
+    if ((j->type == JT_REF || j->type == JT_EQ_REF) &&
+        is_hash_join_key_no(j->ref.key))
+      join->hash_join= TRUE; 
+
+  loop_end:
+    /* 
+      Save records_read in JOIN_TAB so that select_describe()/etc don't have
+      to access join->best_positions[]. 
+    */
+    j->records_read= (ha_rows)join->best_positions[tablenr].records_read;
+    join->map2table[j->table->tablenr]= j;
+
+    /* If we've reached the end of sjm nest, switch back to main sequence */
+    if (j + 1 == sjm_nest_end)
+    {
+      j->last_leaf_in_bush= TRUE;
+      j= sjm_nest_root;
+      sjm_nest_root= NULL;
+      sjm_nest_end= NULL;
+    }
   }
+  root_range->end= j;
 
-  for (i=0 ; i < table_count ; i++)
-    join->map2table[join->join_tab[i].table->tablenr]=join->join_tab+i;
+  join->top_join_tab_count= join->join_tab_ranges.head()->end - 
+                            join->join_tab_ranges.head()->start;
   update_depend_map(join);
   DBUG_RETURN(0);
 }
 
+/**
+  Create a descriptor of hash join key to access a given join table  
 
-static bool create_ref_for_key(JOIN *join, JOIN_TAB *j, KEYUSE *org_keyuse,
-			       table_map used_tables)
+  @param   join         join which the join table belongs to
+  @param   join_tab     the join table to access
+  @param   org_keyuse   beginning of the key uses to join this table
+  @param   used_tables  bitmap of the previous tables
+
+  @details
+  This function first finds key uses that can be utilized by the hash join
+  algorithm to join join_tab to the previous tables marked in the bitmap 
+  used_tables.  The tested key uses are taken from the array of all key uses
+  for 'join' starting from the position org_keyuse. After all interesting key
+  uses have been found the function builds a descriptor of the corresponding
+  key that is used by the hash join algorithm would it be chosen to join
+  the table join_tab.
+
+  @retval  FALSE  the descriptor for a hash join key is successfully created
+  @retval  TRUE   otherwise
+*/
+
+static bool create_hj_key_for_table(JOIN *join, JOIN_TAB *join_tab,
+                                    KEYUSE *org_keyuse, table_map used_tables)
 {
-  KEYUSE *keyuse=org_keyuse;
-  bool ftkey=(keyuse->keypart == FT_KEYPART);
+  KEY *keyinfo;
+  KEY_PART_INFO *key_part_info;
+  KEYUSE *keyuse= org_keyuse;
+  uint key_parts= 0;
   THD  *thd= join->thd;
-  uint keyparts,length,key;
+  TABLE *table= join_tab->table;
+  bool first_keyuse= TRUE;
+  DBUG_ENTER("create_hj_key_for_table");
+
+  do
+  {
+    if (!(~used_tables & keyuse->used_tables) &&
+        are_tables_local(join_tab, keyuse->used_tables))    
+    {
+      if (first_keyuse)
+      {
+        key_parts++;
+        first_keyuse= FALSE;
+      }
+      else
+      {
+        KEYUSE *curr= org_keyuse;
+        for( ; curr < keyuse; curr++)
+        {
+          if (curr->keypart == keyuse->keypart &&
+              !(~used_tables & curr->used_tables) &&
+              are_tables_local(join_tab, curr->used_tables))
+            break;
+        }
+        if (curr == keyuse)
+           key_parts++;
+      }
+    }
+    keyuse++;
+  } while (keyuse->table == table && keyuse->is_for_hash_join());
+  if (!key_parts)
+    DBUG_RETURN(TRUE);
+  /* This memory is allocated only once for the joined table join_tab */
+  if (!(keyinfo= (KEY *) thd->alloc(sizeof(KEY))) ||
+      !(key_part_info = (KEY_PART_INFO *) thd->alloc(sizeof(KEY_PART_INFO)*
+                                                     key_parts)))
+    DBUG_RETURN(TRUE);
+  keyinfo->usable_key_parts= keyinfo->key_parts = key_parts;
+  keyinfo->key_part= key_part_info;
+  keyinfo->key_length=0;
+  keyinfo->algorithm= HA_KEY_ALG_UNDEF;
+  keyinfo->flags= HA_GENERATED_KEY;
+  keyinfo->name= (char *) "$hj";
+  keyinfo->rec_per_key= (ulong*) thd->calloc(sizeof(ulong)*key_parts);
+  if (!keyinfo->rec_per_key)
+    DBUG_RETURN(TRUE);
+  keyinfo->key_part= key_part_info;
+
+  first_keyuse= TRUE;
+  keyuse= org_keyuse;
+  do
+  {
+    if (!(~used_tables & keyuse->used_tables) &&
+        are_tables_local(join_tab, keyuse->used_tables))
+    { 
+      bool add_key_part= TRUE;
+      if (!first_keyuse)
+      {
+        for(KEYUSE *curr= org_keyuse; curr < keyuse; curr++)
+        {
+          if (curr->keypart == keyuse->keypart &&
+              !(~used_tables & curr->used_tables) &&
+               are_tables_local(join_tab, curr->used_tables))
+	  {
+            keyuse->keypart= NO_KEYPART;
+            add_key_part= FALSE;
+            break;
+          }
+        }
+      }
+      if (add_key_part)
+      {
+        Field *field= table->field[keyuse->keypart];
+        uint fieldnr= keyuse->keypart+1;
+        table->create_key_part_by_field(keyinfo, key_part_info, field, fieldnr);
+        key_part_info++;
+      }
+    }
+    first_keyuse= FALSE;
+    keyuse++;
+  } while (keyuse->table == table && keyuse->is_for_hash_join());
+
+  join_tab->hj_key= keyinfo;
+
+  DBUG_RETURN(FALSE);
+}
+
+/* 
+  Check if a set of tables specified by used_tables can be accessed when
+  we're doing scan on join_tab jtab.
+*/
+static bool are_tables_local(JOIN_TAB *jtab, table_map used_tables)
+{
+  if (jtab->bush_root_tab)
+  {
+    /*
+      jtab is inside execution join nest. We may not refer to outside tables,
+      except the const tables.
+    */
+    table_map local_tables= jtab->emb_sj_nest->nested_join->used_tables |
+                            jtab->join->const_table_map |
+                            OUTER_REF_TABLE_BIT;
+    return !test(used_tables & ~local_tables);
+  }
+
+  /* 
+    If we got here then jtab is at top level. 
+     - all other tables at top level are accessible,
+     - tables in join nests are accessible too, because all their columns that 
+       are needed at top level will be unpacked when scanning the
+       materialization table.
+  */
+  return TRUE;
+}
+
+static bool create_ref_for_key(JOIN *join, JOIN_TAB *j,
+                               KEYUSE *org_keyuse, bool allow_full_scan, 
+                               table_map used_tables)
+{
+  uint keyparts, length, key;
   TABLE *table;
   KEY *keyinfo;
+  KEYUSE *keyuse= org_keyuse;
+  bool ftkey= (keyuse->keypart == FT_KEYPART);
+  THD *thd= join->thd;
   DBUG_ENTER("create_ref_for_key");
 
   /*  Use best key from find_best */
-  table=j->table;
-  key=keyuse->key;
-  keyinfo=table->key_info+key;
+  table= j->table;
+  key= keyuse->key;
+  if (!is_hash_join_key_no(key))
+    keyinfo= table->key_info+key;
+  else
+  {
+    if (create_hj_key_for_table(join, j, org_keyuse, used_tables))
+      DBUG_RETURN(TRUE);
+    keyinfo= j->hj_key;
+  }
 
   if (ftkey)
   {
@@ -5853,27 +7421,38 @@ static bool create_ref_for_key(JOIN *join, JOIN_TAB *j, KEYUSE *org_keyuse,
     {
       if (!(~used_tables & keyuse->used_tables))
       {
-	if (keyparts == keyuse->keypart &&
-	    !(found_part_ref_or_null & keyuse->optimize))
-	{
-	  keyparts++;
-	  length+= keyinfo->key_part[keyuse->keypart].store_length;
-	  found_part_ref_or_null|= keyuse->optimize;
-	}
+        if  (are_tables_local(j, keyuse->val->used_tables()))
+        {
+          if ((is_hash_join_key_no(key) && keyuse->keypart != NO_KEYPART) ||
+              (!is_hash_join_key_no(key) && keyparts == keyuse->keypart &&
+               !(found_part_ref_or_null & keyuse->optimize)))
+          {
+             length+= keyinfo->key_part[keyparts].store_length;
+             keyparts++;
+             found_part_ref_or_null|= keyuse->optimize & ~KEY_OPTIMIZE_EQ;
+          }
+        }
       }
       keyuse++;
     } while (keyuse->table == table && keyuse->key == key);
   } /* not ftkey */
+  
+  if (!keyparts && allow_full_scan)
+  {
+    /* It's a LooseIndexScan strategy scanning whole index */
+    j->type= JT_ALL;
+    j->index= key;
+    DBUG_RETURN(FALSE);
+  }
 
   /* set up fieldref */
-  keyinfo=table->key_info+key;
-  j->ref.key_parts=keyparts;
-  j->ref.key_length=length;
-  j->ref.key=(int) key;
+  j->ref.key_parts= keyparts;
+  j->ref.key_length= length;
+  j->ref.key= (int) key;
   if (!(j->ref.key_buff= (uchar*) thd->calloc(ALIGN_SIZE(length)*2)) ||
       !(j->ref.key_copy= (store_key**) thd->alloc((sizeof(store_key*) *
-						   (keyparts+1)))) ||
-      !(j->ref.items=    (Item**) thd->alloc(sizeof(Item*)*keyparts)) ||
+						          (keyparts+1)))) ||
+      !(j->ref.items=(Item**) thd->alloc(sizeof(Item*)*keyparts)) ||
       !(j->ref.cond_guards= (bool**) thd->alloc(sizeof(uint*)*keyparts)))
   {
     DBUG_RETURN(TRUE);
@@ -5882,11 +7461,13 @@ static bool create_ref_for_key(JOIN *join, JOIN_TAB *j, KEYUSE *org_keyuse,
   j->ref.key_err=1;
   j->ref.has_record= FALSE;
   j->ref.null_rejecting= 0;
-  j->ref.use_count= 0;
+  j->ref.disable_cache= FALSE;
+  j->ref.null_ref_part= NO_REF_PART;
   keyuse=org_keyuse;
 
   store_key **ref_key= j->ref.key_copy;
   uchar *key_buff=j->ref.key_buff, *null_ref_key= 0;
+  uint null_ref_part= NO_REF_PART;
   bool keyuse_uses_no_tables= TRUE;
   if (ftkey)
   {
@@ -5903,9 +7484,13 @@ static bool create_ref_for_key(JOIN *join, JOIN_TAB *j, KEYUSE *org_keyuse,
     uint i;
     for (i=0 ; i < keyparts ; keyuse++,i++)
     {
-      while (keyuse->keypart != i ||
-	     ((~used_tables) & keyuse->used_tables))
-	keyuse++;				/* Skip other parts */
+      while (((~used_tables) & keyuse->used_tables) || 
+             keyuse->keypart == NO_KEYPART ||
+	     (keyuse->keypart != 
+              (is_hash_join_key_no(key) ?
+                 keyinfo->key_part[i].field->field_index : i)) || 
+             !are_tables_local(j, keyuse->val->used_tables())) 
+	 keyuse++;                              	/* Skip other parts */ 
 
       uint maybe_null= test(keyinfo->key_part[i].null_bit);
       j->ref.items[i]=keyuse->val;		// Save for cond removal
@@ -5913,12 +7498,15 @@ static bool create_ref_for_key(JOIN *join, JOIN_TAB *j, KEYUSE *org_keyuse,
       if (keyuse->null_rejecting) 
         j->ref.null_rejecting |= 1 << i;
       keyuse_uses_no_tables= keyuse_uses_no_tables && !keyuse->used_tables;
-      if (!keyuse->used_tables &&!thd->lex->describe)
+      if (!keyuse->used_tables && !thd->lex->describe)
       {					// Compare against constant
-	store_key_item tmp(thd, keyinfo->key_part[i].field,
+	store_key_item tmp(thd, 
+                           keyinfo->key_part[i].field,
                            key_buff + maybe_null,
                            maybe_null ?  key_buff : 0,
-                           keyinfo->key_part[i].length, keyuse->val);
+                           keyinfo->key_part[i].length,
+                           keyuse->val,
+                           FALSE);
 	if (thd->is_fatal_error)
 	  DBUG_RETURN(TRUE);
 	tmp.copy();
@@ -5934,8 +7522,11 @@ static bool create_ref_for_key(JOIN *join, JOIN_TAB *j, KEYUSE *org_keyuse,
 	instead of JT_REF_OR_NULL in case if field can't be null
       */
       if ((keyuse->optimize & KEY_OPTIMIZE_REF_OR_NULL) && maybe_null)
+      {
 	null_ref_key= key_buff;
-      key_buff+=keyinfo->key_part[i].store_length;
+        null_ref_part= i;
+      }
+      key_buff+= keyinfo->key_part[i].store_length;
     }
   } /* not ftkey */
   *ref_key=0;				// end_marker
@@ -5949,6 +7540,7 @@ static bool create_ref_for_key(JOIN *join, JOIN_TAB *j, KEYUSE *org_keyuse,
     /* Must read with repeat */
     j->type= null_ref_key ? JT_REF_OR_NULL : JT_REF;
     j->ref.null_ref_key= null_ref_key;
+    j->ref.null_ref_part= null_ref_part;
   }
   else if (keyuse_uses_no_tables)
   {
@@ -5983,9 +7575,10 @@ get_store_key(THD *thd, KEYUSE *keyuse, table_map used_tables,
   }
   else if (keyuse->val->type() == Item::FIELD_ITEM ||
            (keyuse->val->type() == Item::REF_ITEM &&
-            ((Item_ref*)keyuse->val)->ref_type() == Item_ref::OUTER_REF &&
-            (*(Item_ref**)((Item_ref*)keyuse->val)->ref)->ref_type() ==
-             Item_ref::DIRECT_REF && 
+	    ((((Item_ref*)keyuse->val)->ref_type() == Item_ref::OUTER_REF &&
+              (*(Item_ref**)((Item_ref*)keyuse->val)->ref)->ref_type() ==
+              Item_ref::DIRECT_REF) || 
+             ((Item_ref*)keyuse->val)->ref_type() == Item_ref::VIEW_REF) &&
             keyuse->val->real_item()->type() == Item::FIELD_ITEM))
     return new store_key_field(thd,
 			       key_part->field,
@@ -5993,13 +7586,13 @@ get_store_key(THD *thd, KEYUSE *keyuse, table_map used_tables,
 			       maybe_null ? key_buff : 0,
 			       key_part->length,
 			       ((Item_field*) keyuse->val->real_item())->field,
-			       keyuse->val->full_name());
+			       keyuse->val->real_item()->full_name());
   return new store_key_item(thd,
 			    key_part->field,
 			    key_buff + maybe_null,
 			    maybe_null ? key_buff : 0,
 			    key_part->length,
-			    keyuse->val);
+			    keyuse->val, FALSE);
 }
 
 /**
@@ -6058,7 +7651,8 @@ JOIN::make_simple_join(JOIN *parent, TABLE *temp_table)
 
   join_tab= parent->join_tab_reexec;
   table= &parent->table_reexec[0]; parent->table_reexec[0]= temp_table;
-  tables= 1;
+  table_count= top_join_tab_count= 1;
+
   const_tables= 0;
   const_table_map= 0;
   eliminated_tables= 0;
@@ -6077,10 +7671,13 @@ JOIN::make_simple_join(JOIN *parent, TABLE *temp_table)
   row_limit= unit->select_limit_cnt;
   do_send_rows= row_limit ? 1 : 0;
 
-  join_tab->cache.buff=0;			/* No caching */
+  join_tab->use_join_cache= FALSE;
+  join_tab->cache=0;			        /* No caching */
   join_tab->table=temp_table;
+  join_tab->cache_select= 0;
   join_tab->select=0;
-  join_tab->select_cond=0;
+  join_tab->select_cond= 0;                     // Avoid valgrind warning
+  join_tab->set_select_cond(NULL, __LINE__);
   join_tab->quick=0;
   join_tab->type= JT_ALL;			/* Map through all records */
   join_tab->keys.init();
@@ -6092,8 +7689,18 @@ JOIN::make_simple_join(JOIN *parent, TABLE *temp_table)
   join_tab->ref.key = -1;
   join_tab->not_used_in_distinct=0;
   join_tab->read_first_record= join_init_read_record;
+  join_tab->preread_init_done= FALSE;
   join_tab->join= this;
   join_tab->ref.key_parts= 0;
+  join_tab->keep_current_rowid= FALSE;
+  join_tab->flush_weedout_table= join_tab->check_weed_out_table= NULL;
+  join_tab->do_firstmatch= NULL;
+  join_tab->loosescan_match_tab= NULL;
+  join_tab->emb_sj_nest= NULL;
+  join_tab->pre_idx_push_select_cond= NULL;
+  join_tab->bush_root_tab= NULL;
+  join_tab->bush_children= NULL;
+  join_tab->last_leaf_in_bush= FALSE;
   bzero((char*) &join_tab->read_record,sizeof(join_tab->read_record));
   temp_table->status=0;
   temp_table->null_row=0;
@@ -6101,15 +7708,17 @@ JOIN::make_simple_join(JOIN *parent, TABLE *temp_table)
 }
 
 
-inline void add_cond_and_fix(Item **e1, Item *e2)
+inline void add_cond_and_fix(THD *thd, Item **e1, Item *e2)
 {
   if (*e1)
   {
+    if (!e2)
+      return;
     Item *res;
     if ((res= new Item_cond_and(*e1, e2)))
     {
       *e1= res;
-      res->quick_fix_field();
+      res->fix_fields(thd, 0);
       res->update_used_tables();
     }
   }
@@ -6172,12 +7781,13 @@ inline void add_cond_and_fix(Item **e1, Item *e2)
 static void add_not_null_conds(JOIN *join)
 {
   DBUG_ENTER("add_not_null_conds");
-  for (uint i=join->const_tables ; i < join->tables ; i++)
+  
+  for (JOIN_TAB *tab= first_linear_tab(join, WITHOUT_CONST_TABLES); 
+       tab; 
+       tab= next_linear_tab(join, tab, WITH_BUSH_ROOTS))
   {
-    JOIN_TAB *tab=join->join_tab+i;
-    if ((tab->type == JT_REF || tab->type == JT_EQ_REF || 
-         tab->type == JT_REF_OR_NULL) &&
-        !tab->table->maybe_null)
+    if (tab->type == JT_REF || tab->type == JT_EQ_REF || 
+        tab->type == JT_REF_OR_NULL)
     {
       for (uint keypart= 0; keypart < tab->ref.key_parts; keypart++)
       {
@@ -6185,15 +7795,16 @@ static void add_not_null_conds(JOIN *join)
         {
           Item *item= tab->ref.items[keypart];
           Item *notnull;
-          DBUG_ASSERT(item->type() == Item::FIELD_ITEM);
-          Item_field *not_null_item= (Item_field*)item;
+          Item *real= item->real_item();
+          DBUG_ASSERT(real->type() == Item::FIELD_ITEM);
+          Item_field *not_null_item= (Item_field*)real;
           JOIN_TAB *referred_tab= not_null_item->field->table->reginfo.join_tab;
           /*
             For UPDATE queries such as:
             UPDATE t1 SET t1.f2=(SELECT MAX(t2.f4) FROM t2 WHERE t2.f3=t1.f1);
             not_null_item is the t1.f1, but it's referred_tab is 0.
           */
-          if (!referred_tab || referred_tab->join != join)
+          if (!referred_tab)
             continue;
           if (!(notnull= new Item_func_isnotnull(not_null_item)))
             DBUG_VOID_RETURN;
@@ -6205,11 +7816,22 @@ static void add_not_null_conds(JOIN *join)
           */
           if (notnull->fix_fields(join->thd, &notnull))
             DBUG_VOID_RETURN;
-          DBUG_EXECUTE("where",
-                       print_where(notnull,
-                                   referred_tab->table->alias.c_ptr(),
-                                   QT_ORDINARY););
-          add_cond_and_fix(&referred_tab->select_cond, notnull);
+          DBUG_EXECUTE("where",print_where(notnull,
+                                           referred_tab->table->alias.c_ptr(),
+                                           QT_ORDINARY););
+          if (!tab->first_inner)
+	  {
+            COND *new_cond= referred_tab->join == join ? 
+                              referred_tab->select_cond :
+                              join->outer_ref_cond;
+            add_cond_and_fix(join->thd, &new_cond, notnull);
+            if (referred_tab->join == join)
+              referred_tab->set_select_cond(new_cond, __LINE__);
+            else 
+              join->outer_ref_cond= new_cond;
+          }
+          else
+            add_cond_and_fix(join->thd, tab->first_inner->on_expr_ref, notnull);
         }
       }
     }
@@ -6224,6 +7846,12 @@ static void add_not_null_conds(JOIN *join)
   nested outer join and so on until it reaches root_tab
   (root_tab can be 0).
 
+  In other words:
+  add_found_match_trig_cond(tab->first_inner_tab, y, 0) is the way one should 
+  wrap parts of WHERE.  The idea is that the part of WHERE should be only
+  evaluated after we've finished figuring out whether outer joins.
+  ^^^ is the above correct?
+
   @param tab       the first inner table for most nested outer join
   @param cond      the predicate to be guarded (must be set)
   @param root_tab  the first inner table to stop
@@ -6251,6 +7879,12 @@ add_found_match_trig_cond(JOIN_TAB *tab, COND *cond, JOIN_TAB *root_tab)
 }
 
 
+bool TABLE_LIST::is_active_sjm()
+{ 
+  return sj_mat_info && sj_mat_info->is_used;
+}
+
+
 /**
   Fill in outer join related info for the execution plan structure.
 
@@ -6268,6 +7902,12 @@ add_found_match_trig_cond(JOIN_TAB *tab, COND *cond, JOIN_TAB *root_tab)
     corresponding first inner table through the field t0->on_expr_ref.
     Here ti are structures of the JOIN_TAB type.
 
+    In other words, for each join tab, set
+     - first_inner
+     - last_inner
+     - first_upper
+     - on_expr_ref, cond_equal
+
   EXAMPLE. For the query: 
   @code
         SELECT * FROM t1
@@ -6293,14 +7933,33 @@ add_found_match_trig_cond(JOIN_TAB *tab, COND *cond, JOIN_TAB *root_tab)
     has been chosen.
 */
 
-static void
+static bool
 make_outerjoin_info(JOIN *join)
 {
   DBUG_ENTER("make_outerjoin_info");
-  for (uint i=join->const_tables ; i < join->tables ; i++)
+  
+  /*
+    Create temp. tables for merged SJ-Materialization nests. We need to do
+    this now, because further code relies on tab->table and
+    tab->table->pos_in_table_list being set.
+  */
+  JOIN_TAB *tab;
+  for (tab= first_linear_tab(join, WITHOUT_CONST_TABLES); 
+       tab; 
+       tab= next_linear_tab(join, tab, WITH_BUSH_ROOTS))
   {
-    JOIN_TAB *tab=join->join_tab+i;
-    TABLE *table=tab->table;
+    if (tab->bush_children)
+    {
+      if (setup_sj_materialization_part1(tab))
+        DBUG_RETURN(TRUE);
+      tab->table->reginfo.join_tab= tab;
+    }
+  }
+
+  for (JOIN_TAB *tab= first_linear_tab(join, WITHOUT_CONST_TABLES); tab; 
+       tab= next_linear_tab(join, tab, WITH_BUSH_ROOTS))
+  {
+    TABLE *table= tab->table;
     TABLE_LIST *tbl= table->pos_in_table_list;
     TABLE_LIST *embedding= tbl->embedding;
 
@@ -6314,11 +7973,19 @@ make_outerjoin_info(JOIN *join)
       tab->last_inner= tab->first_inner= tab;
       tab->on_expr_ref= &tbl->on_expr;
       tab->cond_equal= tbl->cond_equal;
-      if (embedding)
+      if (embedding && !embedding->is_active_sjm())
         tab->first_upper= embedding->nested_join->first_nested;
     }    
     for ( ; embedding ; embedding= embedding->embedding)
     {
+      if (embedding->is_active_sjm())
+      {
+        /* We're trying to walk out of an SJ-Materialization nest. Don't do this.  */
+        break;
+      }
+      /* Ignore sj-nests: */
+      if (!(embedding->on_expr && embedding->outer_join))
+        continue;
       NESTED_JOIN *nested_join= embedding->nested_join;
       if (!nested_join->counter)
       {
@@ -6334,13 +8001,20 @@ make_outerjoin_info(JOIN *join)
       }
       if (!tab->first_inner)  
         tab->first_inner= nested_join->first_nested;
+      if (tab->table->reginfo.not_exists_optimize)
+        tab->first_inner->table->reginfo.not_exists_optimize= 1;         
       if (++nested_join->counter < nested_join->n_tables)
         break;
       /* Table tab is the last inner table for nested join. */
       nested_join->first_nested->last_inner= tab;
+      if (tab->first_inner->table->reginfo.not_exists_optimize)
+      {
+        for (JOIN_TAB *join_tab= tab->first_inner; join_tab <= tab; join_tab++)
+          join_tab->table->reginfo.not_exists_optimize= 1;
+      } 
     }
   }
-  DBUG_VOID_RETURN;
+  DBUG_RETURN(FALSE);
 }
 
 
@@ -6353,75 +8027,133 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond)
   {
     add_not_null_conds(join);
     table_map used_tables;
+    /*
+      Step #1: Extract constant condition
+       - Extract and check the constant part of the WHERE 
+       - Extract constant parts of ON expressions from outer 
+         joins and attach them appropriately.
+    */
     if (cond)                /* Because of QUICK_GROUP_MIN_MAX_SELECT */
     {                        /* there may be a select without a cond. */    
-      if (join->tables > 1)
+      if (join->table_count > 1)
         cond->update_used_tables();		// Tablenr may have changed
-      if (join->const_tables == join->tables &&
+      if (join->const_tables == join->table_count &&
 	  thd->lex->current_select->master_unit() ==
 	  &thd->lex->unit)		// not upper level SELECT
         join->const_table_map|=RAND_TABLE_BIT;
+
+      /*
+        Extract expressions that depend on constant tables
+        1. Const part of the join's WHERE clause can be checked immediately
+           and if it is not satisfied then the join has empty result
+        2. Constant parts of outer joins' ON expressions must be attached 
+           there inside the triggers.
+      */
       {						// Check const tables
-        COND *const_cond=
-	  make_cond_for_table(cond,
+        join->exec_const_cond=
+	  make_cond_for_table(thd, cond,
                               join->const_table_map,
-                              (table_map) 0);
-        DBUG_EXECUTE("where",print_where(const_cond,"constants", QT_ORDINARY););
-        for (JOIN_TAB *tab= join->join_tab+join->const_tables;
-             tab < join->join_tab+join->tables ; tab++)
+                              (table_map) 0, -1, FALSE, FALSE);
+        /* Add conditions added by add_not_null_conds(). */
+        for (uint i= 0 ; i < join->const_tables ; i++)
+          add_cond_and_fix(thd, &join->exec_const_cond,
+                           join->join_tab[i].select_cond);
+
+        DBUG_EXECUTE("where",print_where(join->exec_const_cond,"constants",
+					 QT_ORDINARY););
+        if (join->exec_const_cond && !join->exec_const_cond->is_expensive() &&
+            !join->exec_const_cond->val_int())
+        {
+          DBUG_PRINT("info",("Found impossible WHERE condition"));
+          join->exec_const_cond= NULL;
+          DBUG_RETURN(1);	 // Impossible const condition
+        }
+
+        if (join->table_count != join->const_tables)
         {
-          if (*tab->on_expr_ref)
+          COND *outer_ref_cond= make_cond_for_table(thd, cond,
+                                                    join->const_table_map |
+                                                    OUTER_REF_TABLE_BIT,
+                                                    OUTER_REF_TABLE_BIT,
+                                                    -1, FALSE, FALSE);
+          if (outer_ref_cond)
           {
-            JOIN_TAB *cond_tab= tab->first_inner;
-            COND *tmp= make_cond_for_table(*tab->on_expr_ref,
-                                           join->const_table_map,
-                                         (  table_map) 0);
-            if (!tmp)
-              continue;
-            tmp= new Item_func_trig_cond(tmp, &cond_tab->not_null_compl);
-            if (!tmp)
-              DBUG_RETURN(1);
-            tmp->quick_fix_field();
-            cond_tab->select_cond= !cond_tab->select_cond ? tmp :
-	                            new Item_cond_and(cond_tab->select_cond,
-                                                      tmp);
-            if (!cond_tab->select_cond)
-	      DBUG_RETURN(1);
-            cond_tab->select_cond->quick_fix_field();
-          }       
+            add_cond_and_fix(thd, &outer_ref_cond, join->outer_ref_cond);
+            join->outer_ref_cond= outer_ref_cond;
+          }
         }
-        if (const_cond && !const_cond->val_int())
+        else
         {
-	  DBUG_PRINT("info",("Found impossible WHERE condition"));
-	  DBUG_RETURN(1);	 // Impossible const condition
+          COND *pseudo_bits_cond=
+            make_cond_for_table(thd, cond,
+                                join->const_table_map |
+                                PSEUDO_TABLE_BITS,
+                                PSEUDO_TABLE_BITS,
+                                -1, FALSE, FALSE);
+          if (pseudo_bits_cond)
+          {
+            add_cond_and_fix(thd, &pseudo_bits_cond,
+                             join->pseudo_bits_cond);
+            join->pseudo_bits_cond= pseudo_bits_cond;
+          }
         }
       }
     }
+
+    /*
+      Step #2: Extract WHERE/ON parts
+    */
+    table_map save_used_tables= 0;
     used_tables=((select->const_tables=join->const_table_map) |
 		 OUTER_REF_TABLE_BIT | RAND_TABLE_BIT);
-    for (uint i=join->const_tables ; i < join->tables ; i++)
+    JOIN_TAB *tab;
+    table_map current_map;
+    uint i= join->const_tables;
+    for (tab= first_depth_first_tab(join); tab;
+         tab= next_depth_first_tab(join, tab), i++)
     {
-      JOIN_TAB *tab=join->join_tab+i;
+      bool is_hj;
       /*
         first_inner is the X in queries like:
         SELECT * FROM t1 LEFT OUTER JOIN (t2 JOIN t3) ON X
       */
-      JOIN_TAB *first_inner_tab= tab->first_inner; 
-      table_map current_map= tab->table->map;
+      JOIN_TAB *first_inner_tab= tab->first_inner;
+
+      if (!tab->bush_children)
+        current_map= tab->table->map;
+      else
+        current_map= tab->bush_children->start->emb_sj_nest->sj_inner_tables;
+
       bool use_quick_range=0;
       COND *tmp;
 
+      /* 
+        Tables that are within SJ-Materialization nests cannot have their
+        conditions referring to preceding non-const tables.
+         - If we're looking at the first SJM table, reset used_tables
+           to refer to only allowed tables
+      */
+      if (tab->emb_sj_nest && tab->emb_sj_nest->sj_mat_info && 
+          tab->emb_sj_nest->sj_mat_info->is_used &&
+          !(used_tables & tab->emb_sj_nest->sj_inner_tables))
+      {
+        save_used_tables= used_tables;
+        used_tables= join->const_table_map | OUTER_REF_TABLE_BIT | 
+                     RAND_TABLE_BIT;
+      }
+
       /*
 	Following force including random expression in last table condition.
 	It solve problem with select like SELECT * FROM t1 WHERE rand() > 0.5
       */
-      if (i == join->tables-1)
+      if (tab == join->join_tab + join->top_join_tab_count - 1)
 	current_map|= OUTER_REF_TABLE_BIT | RAND_TABLE_BIT;
       used_tables|=current_map;
 
       if (tab->type == JT_REF && tab->quick &&
-	  (uint) tab->ref.key == tab->quick->index &&
-	  tab->ref.key_length < tab->quick->max_used_key_length)
+	  (((uint) tab->ref.key == tab->quick->index &&
+	    tab->ref.key_length < tab->quick->max_used_key_length) ||
+	    tab->table->intersect_keys.is_set(tab->ref.key)))
       {
 	/* Range uses longer key;  Use this instead of ref on key */
 	tab->type=JT_ALL;
@@ -6434,16 +8166,44 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond)
           We will use join cache here : prevent sorting of the first
           table only and sort at the end.
         */
-        if (i != join->const_tables && join->tables > join->const_tables + 1)
+        if (i != join->const_tables && join->table_count > join->const_tables + 1)
           join->full_join= 1;
       }
 
       tmp= NULL;
+
       if (cond)
-        tmp= make_cond_for_table(cond,used_tables,current_map);
+      {
+        if (tab->bush_children)
+        {
+          // Reached the materialization tab
+          tmp= make_cond_after_sjm(cond, cond, save_used_tables, used_tables);
+          used_tables= save_used_tables | used_tables;
+          save_used_tables= 0;
+        }
+        else
+         {
+	  tmp= make_cond_for_table(thd, cond, used_tables, current_map, i,
+                                   FALSE, FALSE);
+         }
+        /* Add conditions added by add_not_null_conds(). */
+        if (tab->select_cond)
+          add_cond_and_fix(thd, &tmp, tab->select_cond);
+      }
+
+      is_hj= (tab->type == JT_REF || tab->type == JT_EQ_REF) &&
+             (join->allowed_join_cache_types & JOIN_CACHE_HASHED_BIT) &&
+	     ((join->max_allowed_join_cache_level+1)/2 == 2 ||
+              ((join->max_allowed_join_cache_level+1)/2 > 2 &&
+	       is_hash_join_key_no(tab->ref.key))) &&
+              (!tab->emb_sj_nest ||                     
+               join->allowed_semijoin_with_cache) && 
+              (!(tab->table->map & join->outer_join) ||
+               join->allowed_outer_join_with_cache);
+
       if (cond && !tmp && tab->quick)
       {						// Outer join
-        if (tab->type != JT_ALL)
+        if (tab->type != JT_ALL && !is_hj)
         {
           /*
             Don't use the quick method
@@ -6465,11 +8225,12 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond)
         }
 
       }
-      if (tmp || !cond || tab->type == JT_REF)
+      if (tmp || !cond || tab->type == JT_REF || tab->type == JT_REF_OR_NULL ||
+          tab->type == JT_EQ_REF || first_inner_tab)
       {
-        DBUG_EXECUTE("where",
-                     print_where(tmp,tab->table->alias.c_ptr(),
-                                 QT_ORDINARY););
+        DBUG_EXECUTE("where",print_where(tmp, 
+                                         tab->table? tab->table->alias.c_ptr() :"sjm-nest",
+                                         QT_ORDINARY););
 	SQL_SELECT *sel= tab->select= ((SQL_SELECT*)
                                        thd->memdup((uchar*) select,
                                                    sizeof(*select)));
@@ -6489,35 +8250,47 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond)
           */
           if (!(tmp= add_found_match_trig_cond(first_inner_tab, tmp, 0)))
             DBUG_RETURN(1);
-          tab->select_cond=sel->cond=tmp;
+          sel->cond= tmp;
+          tab->set_select_cond(tmp, __LINE__);
           /* Push condition to storage engine if this is enabled
              and the condition is not guarded */
-	  if (thd->variables.engine_condition_pushdown)
+          if (tab->table)
           {
-            COND *push_cond= 
-              make_cond_for_table(tmp, current_map, current_map);
-            if (push_cond)
+            tab->table->file->pushed_cond= NULL;
+            if (thd->variables.engine_condition_pushdown && !first_inner_tab)
             {
-              /* Push condition to handler */
-              if (!tab->table->file->cond_push(push_cond))
-                tab->table->file->pushed_cond= push_cond;
+              COND *push_cond= 
+              make_cond_for_table(thd, tmp, current_map, current_map,
+                                  -1, FALSE, FALSE);
+              if (push_cond)
+              {
+                /* Push condition to handler */
+                if (!tab->table->file->cond_push(push_cond))
+                  tab->table->file->pushed_cond= push_cond;
+              }
             }
           }
         }
         else
-          tab->select_cond= sel->cond= NULL;
+        {
+          sel->cond= NULL;
+          tab->set_select_cond(NULL, __LINE__);
+        }
 
 	sel->head=tab->table;
         DBUG_EXECUTE("where",
-                     print_where(tmp,tab->table->alias.c_ptr(),
+                     print_where(tmp, 
+                                 tab->table ? tab->table->alias.c_ptr() :
+                                   "(sjm-nest)",
                                  QT_ORDINARY););
 	if (tab->quick)
 	{
 	  /* Use quick key read if it's a constant and it's not used
 	     with key reading */
-	  if (tab->needed_reg.is_clear_all() && tab->type != JT_EQ_REF
-	      && tab->type != JT_FT && (tab->type != JT_REF ||
-               (uint) tab->ref.key == tab->quick->index))
+	  if ((tab->needed_reg.is_clear_all() && tab->type != JT_EQ_REF
+	       && tab->type != JT_FT &&
+               ((tab->type != JT_REF && tab->type != JT_CONST) ||
+                (uint) tab->ref.key == tab->quick->index)) || is_hj)
 	  {
 	    sel->quick=tab->quick;		// Use value from get_quick_...
 	    sel->quick_keys.clear_all();
@@ -6529,7 +8302,7 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond)
 	  }
 	  tab->quick=0;
 	}
-	uint ref_key=(uint) sel->head->reginfo.join_tab->ref.key+1;
+	uint ref_key= sel->head? (uint) sel->head->reginfo.join_tab->ref.key+1 : 0;
 	if (i == join->const_tables && ref_key)
 	{
 	  if (!tab->const_keys.is_clear_all() &&
@@ -6548,12 +8321,12 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond)
 	    the index if we are using limit and this is the first table
 	  */
 
-	  if ((cond &&
-               (!tab->keys.is_subset(tab->const_keys) && i > 0)) ||
-	      (!tab->const_keys.is_clear_all() && i == join->const_tables &&
-	       join->unit->select_limit_cnt <
-	       join->best_positions[i].records_read &&
-	       !(join->select_options & OPTION_FOUND_ROWS)))
+	  if (!tab->table->is_filled_at_execution() &&
+              ((cond && (!tab->keys.is_subset(tab->const_keys) && i > 0)) ||
+               (!tab->const_keys.is_clear_all() && i == join->const_tables &&
+                join->unit->select_limit_cnt <
+                join->best_positions[i].records_read &&
+                !(join->select_options & OPTION_FOUND_ROWS))))
 	  {
 	    /* Join with outer join condition */
 	    COND *orig_cond=sel->cond;
@@ -6570,11 +8343,13 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond)
 	      sel->cond->quick_fix_field();
 
 	    if (sel->test_quick_select(thd, tab->keys,
-				       used_tables & ~ current_map,
+				       ((used_tables & ~ current_map) |
+                                        OUTER_REF_TABLE_BIT),
 				       (join->select_options &
 					OPTION_FOUND_ROWS ?
 					HA_POS_ERROR :
-					join->unit->select_limit_cnt), 0) < 0)
+					join->unit->select_limit_cnt), 0,
+                                        FALSE) < 0)
             {
 	      /*
 		Before reporting "Impossible WHERE" for the whole query
@@ -6587,7 +8362,8 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond)
                                          (join->select_options &
                                           OPTION_FOUND_ROWS ?
                                           HA_POS_ERROR :
-                                          join->unit->select_limit_cnt),0) < 0)
+                                          join->unit->select_limit_cnt),0,
+                                          FALSE) < 0)
 		DBUG_RETURN(1);			// Impossible WHERE
             }
             else
@@ -6614,26 +8390,17 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond)
 	      2 : 1;
 	    sel->read_tables= used_tables & ~current_map;
 	  }
-	  if (i != join->const_tables && tab->use_quick != 2)
+	  if (i != join->const_tables && tab->use_quick != 2 &&
+              !tab->first_inner)
 	  {					/* Read with cache */
-	    if (cond &&
-                (tmp=make_cond_for_table(cond,
-					 join->const_table_map |
-					 current_map,
-					 current_map)))
-	    {
-              DBUG_EXECUTE("where",print_where(tmp,"cache", QT_ORDINARY););
-	      tab->cache.select=(SQL_SELECT*)
-		thd->memdup((uchar*) sel, sizeof(SQL_SELECT));
-	      tab->cache.select->cond=tmp;
-	      tab->cache.select->read_tables=join->const_table_map;
-	    }
-	  }
+            if (tab->make_scan_filter())
+              DBUG_RETURN(1);
+          }
 	}
       }
       
       /* 
-        Push down conditions from all on expressions.
+        Push down conditions from all ON expressions.
         Each of these conditions are guarded by a variable
         that turns if off just before null complemented row for
         outer joins is formed. Thus, the condition from an
@@ -6641,16 +8408,32 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond)
         the null complemented row.
       */ 
 
-      /* First push down constant conditions from on expressions */
-      for (JOIN_TAB *join_tab= join->join_tab+join->const_tables;
-           join_tab < join->join_tab+join->tables ; join_tab++)
+      /* 
+        First push down constant conditions from ON expressions. 
+         - Each pushed-down condition is wrapped into trigger which is 
+           enabled only for non-NULL-complemented record
+         - The condition is attached to the first_inner_table.
+        
+        With regards to join nests:
+         - if we start at top level, don't walk into nests
+         - if we start inside a nest, stay within that nest.
+      */
+      JOIN_TAB *start_from= tab->bush_root_tab? 
+                               tab->bush_root_tab->bush_children->start : 
+                               join->join_tab + join->const_tables;
+      JOIN_TAB *end_with= tab->bush_root_tab? 
+                               tab->bush_root_tab->bush_children->end : 
+                               join->join_tab + join->top_join_tab_count;
+      for (JOIN_TAB *join_tab= start_from;
+           join_tab != end_with;
+           join_tab++)
       {
         if (*join_tab->on_expr_ref)
         {
           JOIN_TAB *cond_tab= join_tab->first_inner;
-          COND *tmp= make_cond_for_table(*join_tab->on_expr_ref,
+          COND *tmp= make_cond_for_table(thd, *join_tab->on_expr_ref,
                                          join->const_table_map,
-                                         (table_map) 0);
+                                         (table_map) 0, -1, FALSE, FALSE);
           if (!tmp)
             continue;
           tmp= new Item_func_trig_cond(tmp, &cond_tab->not_null_compl);
@@ -6662,13 +8445,22 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond)
           if (!cond_tab->select_cond)
 	    DBUG_RETURN(1);
           cond_tab->select_cond->quick_fix_field();
+          cond_tab->select_cond->update_used_tables();
+          if (cond_tab->select)
+            cond_tab->select->cond= cond_tab->select_cond; 
         }       
       }
 
-      /* Push down non-constant conditions from on expressions */
+
+      /* Push down non-constant conditions from ON expressions */
       JOIN_TAB *last_tab= tab;
+
+      /*
+        while we're inside of an outer join and last_tab is 
+        the last of its tables ... 
+      */
       while (first_inner_tab && first_inner_tab->last_inner == last_tab)
-      {  
+      { 
         /* 
           Table tab is the last inner table of an outer join.
           An on expression is always attached to it.
@@ -6677,13 +8469,54 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond)
 
         table_map used_tables2= (join->const_table_map |
                                  OUTER_REF_TABLE_BIT | RAND_TABLE_BIT);
-	for (tab= join->join_tab+join->const_tables; tab <= last_tab ; tab++)
+
+        start_from= tab->bush_root_tab? 
+                      tab->bush_root_tab->bush_children->start : 
+                      join->join_tab + join->const_tables;
+        for (JOIN_TAB *tab= start_from; tab <= last_tab; tab++)
         {
+          DBUG_ASSERT(tab->table);
           current_map= tab->table->map;
           used_tables2|= current_map;
-          COND *tmp_cond= make_cond_for_table(on_expr, used_tables2,
-                                             current_map);
-          if (tmp_cond)
+          /*
+            psergey: have put the -1 below. It's bad, will need to fix it.
+          */
+          COND *tmp_cond= make_cond_for_table(thd, on_expr, used_tables2,
+                                              current_map, /*(tab - first_tab)*/ -1,
+					      FALSE, FALSE);
+          bool is_sjm_lookup_tab= FALSE;
+          if (tab->bush_children)
+          {
+            /*
+              'tab' is an SJ-Materialization tab, i.e. we have a join order 
+              like this:
+
+                ot1 sjm_tab LEFT JOIN ot2 ot3
+                         ^          ^
+                   'tab'-+          +--- left join we're adding triggers for
+
+              LEFT JOIN's ON expression may not have references to subquery
+              columns.  The subquery was in the WHERE clause, so IN-equality 
+              is in the WHERE clause, also.
+              However, equality propagation code may have propagated the
+              IN-equality into ON expression, and we may get things like
+
+                subquery_inner_table=const
+
+              in the ON expression. We must not check such conditions during
+              SJM-lookup, because 1) subquery_inner_table has no valid current
+              row (materialization temp.table has it instead), and 2) they
+              would be true anyway.
+            */
+            SJ_MATERIALIZATION_INFO *sjm=
+              tab->bush_children->start->emb_sj_nest->sj_mat_info;
+            if (sjm->is_used && !sjm->is_sj_scan)
+              is_sjm_lookup_tab= TRUE;
+          }
+
+          if (tab == first_inner_tab && tab->on_precond && !is_sjm_lookup_tab)
+            add_cond_and_fix(thd, &tmp_cond, tab->on_precond);
+          if (tmp_cond && !is_sjm_lookup_tab)
           {
             JOIN_TAB *cond_tab= tab < first_inner_tab ? first_inner_tab : tab;
             Item **sel_cond_ref= tab < first_inner_tab ?
@@ -6719,7 +8552,10 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond)
             if (!(*sel_cond_ref))
               DBUG_RETURN(1);
             (*sel_cond_ref)->quick_fix_field();
-          }              
+            (*sel_cond_ref)->update_used_tables();
+            if (cond_tab->select)
+              cond_tab->select->cond= cond_tab->select_cond;
+          }
         }
         first_inner_tab= first_inner_tab->first_upper;       
       }
@@ -6729,6 +8565,290 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond)
 }
 
 
+static
+uint get_next_field_for_derived_key(uchar *arg)
+{
+  KEYUSE *keyuse= *(KEYUSE **) arg;
+  if (!keyuse)
+    return (uint) (-1);
+  TABLE *table= keyuse->table;
+  uint key= keyuse->key;
+  uint fldno= keyuse->keypart; 
+  uint keypart= keyuse->keypart_map == (key_part_map) 1 ?
+                                         0 : (keyuse-1)->keypart+1;
+  for ( ; 
+        keyuse->table == table && keyuse->key == key && keyuse->keypart == fldno;
+        keyuse++)
+    keyuse->keypart= keypart;
+  if (keyuse->key != key)
+    keyuse= 0;
+  *((KEYUSE **) arg)= keyuse;
+  return fldno;
+}
+
+
+static 
+bool generate_derived_keys_for_table(KEYUSE *keyuse, uint count, uint keys)
+{
+  TABLE *table= keyuse->table;
+  if (table->alloc_keys(keys))
+    return TRUE;
+  uint key_count= 0;
+  KEYUSE *first_keyuse= keyuse;
+  uint prev_part= keyuse->keypart;
+  uint parts= 0;
+  uint i= 0;
+
+  for ( ; i < count && key_count < keys; )
+  {
+    do
+    {
+      keyuse->key= table->s->keys;
+      keyuse->keypart_map= (key_part_map) (1 << parts);     
+      keyuse++;
+      i++;
+    } 
+    while (i < count && keyuse->used_tables == first_keyuse->used_tables &&
+           keyuse->keypart == prev_part);
+    parts++;
+    if (i < count && keyuse->used_tables == first_keyuse->used_tables)
+    {
+      prev_part= keyuse->keypart;
+    }
+    else
+    {
+      if (table->add_tmp_key(table->s->keys, parts, 
+                             get_next_field_for_derived_key, 
+                             (uchar *) &first_keyuse,
+                             FALSE))
+        return TRUE;
+      table->reginfo.join_tab->keys.set_bit(table->s->keys);
+      first_keyuse= keyuse;
+      key_count++;
+      parts= 0;
+      prev_part= keyuse->keypart;
+    }
+  }             
+
+  return FALSE;
+}
+   
+
+static
+bool generate_derived_keys(DYNAMIC_ARRAY *keyuse_array)
+{
+  KEYUSE *keyuse= dynamic_element(keyuse_array, 0, KEYUSE*);
+  uint elements= keyuse_array->elements;
+  TABLE *prev_table= 0;
+  for (uint i= 0; i < elements; i++, keyuse++)
+  {
+    if (!keyuse->table)
+      break;
+    KEYUSE *first_table_keyuse= NULL;
+    table_map last_used_tables= 0;
+    uint count= 0;
+    uint keys= 0;
+    TABLE_LIST *derived= NULL;
+    if (keyuse->table != prev_table)
+      derived= keyuse->table->pos_in_table_list;
+    while (derived && derived->is_materialized_derived())
+    {
+      if (keyuse->table != prev_table)
+      {
+        prev_table= keyuse->table;
+        while (keyuse->table == prev_table && keyuse->key != MAX_KEY)
+	{
+          keyuse++;
+          i++;
+        }
+        if (keyuse->table != prev_table)
+	{
+          keyuse--;
+          i--;
+          derived= NULL;
+          continue;
+        }
+        first_table_keyuse= keyuse;
+        last_used_tables= keyuse->used_tables;
+        count= 0;
+        keys= 0;
+      }
+      else if (keyuse->used_tables != last_used_tables)
+      {
+        keys++;
+        last_used_tables= keyuse->used_tables;
+      }
+      count++;
+      keyuse++;
+      i++;
+      if (keyuse->table != prev_table)
+      {
+        if (generate_derived_keys_for_table(first_table_keyuse, count, ++keys))
+          return TRUE;
+        keyuse--;
+        i--;
+	derived= NULL;
+      }
+    }
+  }
+  return FALSE;
+}
+
+
+/*
+  @brief
+  Drops unused keys for each materialized derived table/view
+
+  @details
+  For materialized derived tables only ref access can be used, it employs
+  only one index, thus we don't need the rest. For each materialized derived
+  table/view call TABLE::use_index to save one index chosen by the optimizer
+  and free others. No key is chosen then all keys will be dropped.
+*/
+
+void JOIN::drop_unused_derived_keys()
+{
+  JOIN_TAB *tab;
+  for (tab= first_linear_tab(this, WITHOUT_CONST_TABLES); 
+       tab; 
+       tab= next_linear_tab(this, tab, WITHOUT_BUSH_ROOTS))
+  {
+    
+    TABLE *table=tab->table;
+    if (!table)
+      continue;
+    if (!table->pos_in_table_list->is_materialized_derived())
+      continue;
+    if (table->max_keys > 1)
+      table->use_index(tab->ref.key);
+    if (table->s->keys)
+    {
+      if (tab->ref.key >= 0)
+        tab->ref.key= 0;
+      else
+        table->s->keys= 0;
+    }
+    tab->keys= (key_map) (table->s->keys ? 1 : 0);
+  }
+}
+
+
+/*
+  Evaluate the bitmap of used tables for items from the select list
+*/
+
+inline void JOIN::eval_select_list_used_tables()
+{
+  select_list_used_tables= 0;
+  Item *item;
+  List_iterator_fast<Item> it(fields_list);
+  while ((item= it++))
+  {
+    select_list_used_tables|= item->used_tables();
+  }
+  Item_outer_ref *ref;
+  List_iterator_fast<Item_outer_ref> ref_it(select_lex->inner_refs_list);
+  while ((ref= ref_it++))
+  {
+    item= ref->outer_ref;
+    select_list_used_tables|= item->used_tables();
+  }
+}
+
+
+/*
+  Determine {after which table we'll produce ordered set} 
+
+  SYNOPSIS
+    make_join_orderinfo()
+     join
+
+   
+  DESCRIPTION 
+    Determine if the set is already ordered for ORDER BY, so it can 
+    disable join cache because it will change the ordering of the results.
+    Code handles sort table that is at any location (not only first after 
+    the const tables) despite the fact that it's currently prohibited.
+    We must disable join cache if the first non-const table alone is
+    ordered. If there is a temp table the ordering is done as a last
+    operation and doesn't prevent join cache usage.
+
+  RETURN
+    Number of table after which the set will be ordered
+    join->tables if we don't need an ordered set 
+*/
+
+static uint make_join_orderinfo(JOIN *join)
+{
+  /*
+    This function needs to be fixed to take into account that we now have SJM
+    nests.
+  */
+  DBUG_ASSERT(0);
+
+  JOIN_TAB *tab;
+  if (join->need_tmp)
+    return join->table_count;
+  tab= join->get_sort_by_join_tab();
+  return tab ? tab-join->join_tab : join->table_count;
+}
+
+/*
+  Deny usage of join buffer for the specified table
+
+  SYNOPSIS
+    set_join_cache_denial()
+      tab    join table for which join buffer usage is to be denied  
+     
+  DESCRIPTION
+    The function denies usage of join buffer when joining the table 'tab'.
+    The table is marked as not employing any join buffer. If a join cache
+    object has been already allocated for the table this object is destroyed.
+
+  RETURN
+    none    
+*/
+
+static
+void set_join_cache_denial(JOIN_TAB *join_tab)
+{
+  if (join_tab->cache)
+  {
+    /* 
+      If there is a previous cache linked to this cache through the
+      next_cache pointer: remove the link. 
+    */
+    if (join_tab->cache->prev_cache)
+      join_tab->cache->prev_cache->next_cache= 0;
+    /*
+      No need to do the same for next_cache since cache denial is done
+      backwards starting from the latest cache in the linked list (see
+      revise_cache_usage()).
+    */
+    DBUG_ASSERT(!join_tab->cache->next_cache);
+
+    join_tab->cache->free();
+    join_tab->cache= 0;
+  }
+  if (join_tab->use_join_cache)
+  {
+    join_tab->use_join_cache= FALSE;
+    join_tab->used_join_cache_level= 0;
+    /*
+      It could be only sub_select(). It could not be sub_seject_sjm because we
+      don't do join buffering for the first table in sjm nest. 
+    */
+    join_tab[-1].next_select= sub_select;
+    if (join_tab->type == JT_REF && join_tab->is_ref_for_hash_join())
+    {
+      join_tab->type= JT_ALL;
+      join_tab->ref.key_parts= 0;
+    }
+    join_tab->join->return_tab= join_tab;
+  }
+}
+
+
 /**
   The default implementation of unlock-row method of READ_RECORD,
   used in all access methods.
@@ -6737,11 +8857,10 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond)
 void rr_unlock_row(st_join_table *tab)
 {
   READ_RECORD *info= &tab->read_record;
-  info->file->unlock_row();
+  info->table->file->unlock_row();
 }
 
 
-
 /**
   Pick the appropriate access method functions
 
@@ -6792,47 +8911,692 @@ pick_table_access_method(JOIN_TAB *tab)
 }
 
 
-static void
-make_join_readinfo(JOIN *join, ulonglong options)
+/* 
+  Revise usage of join buffer for the specified table and the whole nest   
+
+  SYNOPSIS
+    revise_cache_usage()
+      tab    join table for which join buffer usage is to be revised  
+
+  DESCRIPTION
+    The function revise the decision to use a join buffer for the table 'tab'.
+    If this table happened to be among the inner tables of a nested outer join/
+    semi-join the functions denies usage of join buffers for all of them
+
+  RETURN
+    none    
+*/
+
+static
+void revise_cache_usage(JOIN_TAB *join_tab)
+{
+  JOIN_TAB *tab;
+  JOIN_TAB *first_inner;
+
+  if (join_tab->first_inner)
+  {
+    JOIN_TAB *end_tab= join_tab;
+    for (first_inner= join_tab->first_inner; 
+         first_inner;
+         first_inner= first_inner->first_upper)           
+    {
+      for (tab= end_tab; tab >= first_inner; tab--)
+        set_join_cache_denial(tab);
+      end_tab= first_inner;
+    }
+  }
+  else if (join_tab->first_sj_inner_tab)
+  {
+    first_inner= join_tab->first_sj_inner_tab;
+    for (tab= join_tab; tab >= first_inner; tab--)
+    {
+      set_join_cache_denial(tab);
+    }
+  }
+  else set_join_cache_denial(join_tab);
+}
+
+
+/*
+  end_select-compatible function that writes the record into a sjm temptable
+  
+  SYNOPSIS
+    end_sj_materialize()
+      join            The join 
+      join_tab        Points to right after the last join_tab in materialization bush
+      end_of_records  FALSE <=> This call is made to pass another record 
+                                combination
+                      TRUE  <=> EOF (no action)
+
+  DESCRIPTION
+    This function is used by semi-join materialization to capture suquery's
+    resultset and write it into the temptable (that is, materialize it).
+
+  NOTE
+    This function is used only for semi-join materialization. Non-semijoin
+    materialization uses different mechanism.
+
+  RETURN 
+    NESTED_LOOP_OK
+    NESTED_LOOP_ERROR
+*/
+
+enum_nested_loop_state 
+end_sj_materialize(JOIN *join, JOIN_TAB *join_tab, bool end_of_records)
+{
+  int error;
+  THD *thd= join->thd;
+  SJ_MATERIALIZATION_INFO *sjm= join_tab[-1].emb_sj_nest->sj_mat_info;
+  DBUG_ENTER("end_sj_materialize");
+  if (!end_of_records)
+  {
+    TABLE *table= sjm->table;
+
+    List_iterator<Item> it(sjm->sjm_table_cols);
+    Item *item;
+    while ((item= it++))
+    {
+      if (item->is_null())
+        DBUG_RETURN(NESTED_LOOP_OK);
+    }
+    fill_record(thd, table->field, sjm->sjm_table_cols, TRUE, FALSE);
+    if (thd->is_error())
+      DBUG_RETURN(NESTED_LOOP_ERROR); /* purecov: inspected */
+    if ((error= table->file->ha_write_tmp_row(table->record[0])))
+    {
+      /* create_myisam_from_heap will generate error if needed */
+      if (table->file->is_fatal_error(error, HA_CHECK_DUP) &&
+          create_internal_tmp_table_from_heap(thd, table,
+                                              sjm->sjm_table_param.start_recinfo, 
+                                              &sjm->sjm_table_param.recinfo, error, 1))
+        DBUG_RETURN(NESTED_LOOP_ERROR); /* purecov: inspected */
+    }
+  }
+  DBUG_RETURN(NESTED_LOOP_OK);
+}
+
+
+/* 
+  Check whether a join buffer can be used to join the specified table   
+
+  SYNOPSIS
+    check_join_cache_usage()
+      tab                 joined table to check join buffer usage for
+      options             options of the join
+      no_jbuf_after       don't use join buffering after table with this number
+      prev_tab            previous join table
+
+  DESCRIPTION
+    The function finds out whether the table 'tab' can be joined using a join
+    buffer. This check is performed after the best execution plan for 'join'
+    has been chosen. If the function decides that a join buffer can be employed
+    then it selects the most appropriate join cache object that contains this
+    join buffer.
+    The result of the check and the type of the the join buffer to be used
+    depend on:
+      - the access method to access rows of the joined table
+      - whether the join table is an inner table of an outer join or semi-join
+      - whether the optimizer switches
+          outer_join_with_cache, semijoin_with_cache, join_cache_incremental,
+          join_cache_hashed, join_cache_bka,
+        are set on or off
+      - the join cache level set for the query
+      - the join 'options'.
+
+    In any case join buffer is not used if the number of the joined table is
+    greater than 'no_jbuf_after'. It's also never used if the value of
+    join_cache_level is equal to 0.
+    If the optimizer switch outer_join_with_cache is off no join buffer is
+    used for outer join operations.
+    If the optimizer switch semijoin_with_cache is off no join buffer is used
+    for semi-join operations.
+    If the optimizer switch join_cache_incremental is off no incremental join
+    buffers are used.
+    If the optimizer switch join_cache_hashed is off then the optimizer uses
+    neither BNLH algorithm, nor BKAH algorithm to perform join operations.
+
+    If the optimizer switch join_cache_bka is off then the optimizer uses
+    neither BKA algorithm, nor BKAH algorithm to perform join operation.
+    The valid settings for join_cache_level lay in the interval 0..8.
+    If it set to 0 no join buffers are used to perform join operations.
+    Currently we differentiate between join caches of 8 levels:
+      1 : non-incremental join cache used for BNL join algorithm
+      2 : incremental join cache used for BNL join algorithm
+      3 : non-incremental join cache used for BNLH join algorithm
+      4 : incremental join cache used for BNLH join algorithm
+      5 : non-incremental join cache used for BKA join algorithm
+      6 : incremental join cache used for BKA join algorithm 
+      7 : non-incremental join cache used for BKAH join algorithm 
+      8 : incremental join cache used for BKAH join algorithm
+    If the value of join_cache_level is set to n then no join caches of
+    levels higher than n can be employed.
+
+    If the optimizer switches outer_join_with_cache, semijoin_with_cache,
+    join_cache_incremental, join_cache_hashed, join_cache_bka are all on
+    the following rules are applied.
+    If join_cache_level==1|2 then join buffer is used for inner joins, outer
+    joins and semi-joins with 'JT_ALL' access method. In this case a
+    JOIN_CACHE_BNL object is employed.
+    If join_cache_level==3|4 and then join buffer is used for a join operation
+    (inner join, outer join, semi-join) with 'JT_REF'/'JT_EQREF' access method
+    then a JOIN_CACHE_BNLH object is employed. 
+    If an index is used to access rows of the joined table and the value of
+    join_cache_level==5|6 then a JOIN_CACHE_BKA object is employed. 
+    If an index is used to access rows of the joined table and the value of
+    join_cache_level==7|8 then a JOIN_CACHE_BKAH object is employed. 
+    If the value of join_cache_level is odd then creation of a non-linked 
+    join cache is forced.
+
+    Currently for any join operation a join cache of the  level of the
+    highest allowed and applicable level is used.
+    For example, if join_cache_level is set to 6 and the optimizer switch
+    join_cache_bka is off, while the optimizer switch join_cache_hashed is
+    on then for any inner join operation with JT_REF/JT_EQREF access method
+    to the joined table the BNLH join algorithm will be used, while for
+    the table accessed by the JT_ALL methods the BNL algorithm will be used.
+
+    If the function decides that a join buffer can be used to join the table
+    'tab' then it sets the value of tab->use_join_buffer to TRUE and assigns
+    the selected join cache object to the field 'cache' of the previous
+    join table. 
+    If the function creates a join cache object it tries to initialize it. The
+    failure to do this results in an invocation of the function that destructs
+    the created object.
+    If the function decides that but some reasons no join buffer can be used
+    for a table it calls the function revise_cache_usage that checks
+    whether join cache should be denied for some previous tables. In this case
+    a pointer to the first table for which join cache usage has been denied
+    is passed in join->return_val (see the function set_join_cache_denial).
+    
+    The functions changes the value the fields tab->icp_other_tables_ok and
+    tab->idx_cond_fact_out to FALSE if the chosen join cache algorithm 
+    requires it.
+ 
+  NOTES
+    An inner table of a nested outer join or a nested semi-join can be currently
+    joined only when a linked cache object is employed. In these cases setting
+    join_cache_incremental to 'off' results in denial of usage of any join
+    buffer when joining the table.
+    For a nested outer join/semi-join, currently, we either use join buffers for
+    all inner tables or for none of them. 
+    Some engines (e.g. Falcon) currently allow to use only a join cache
+    of the type JOIN_CACHE_BKAH when the joined table is accessed through
+    an index. For these engines setting the value of join_cache_level to 5 or 6
+    results in that no join buffer is used to join the table. 
+  
+  RETURN VALUE
+    cache level if cache is used, otherwise returns 0
+
+  TODO
+    Support BKA inside SJ-Materialization nests. When doing this, we'll need
+    to only store sj-inner tables in the join buffer.
+#if 0
+        JOIN_TAB *first_tab= join->join_tab+join->const_tables;
+        uint n_tables= i-join->const_tables;
+        / *
+          We normally put all preceding tables into the join buffer, except
+          for the constant tables.
+          If we're inside a semi-join materialization nest, e.g.
+
+             outer_tbl1  outer_tbl2  ( inner_tbl1, inner_tbl2 ) ...
+                                                       ^-- we're here
+
+          then we need to put into the join buffer only the tables from
+          within the nest.
+        * /
+        if (i >= first_sjm_table && i < last_sjm_table)
+        {
+          n_tables= i - first_sjm_table; // will be >0 if we got here
+          first_tab= join->join_tab + first_sjm_table;
+        }
+#endif
+*/
+
+static
+uint check_join_cache_usage(JOIN_TAB *tab,
+                            ulonglong options,
+                            uint no_jbuf_after,
+                            uint table_index,
+                            JOIN_TAB *prev_tab)
 {
+  COST_VECT cost;
+  uint flags= 0;
+  ha_rows rows= 0;
+  uint bufsz= 4096;
+  JOIN_CACHE *prev_cache=0;
+  JOIN *join= tab->join;
+  uint cache_level= tab->used_join_cache_level;
+  bool force_unlinked_cache=
+         !(join->allowed_join_cache_types & JOIN_CACHE_INCREMENTAL_BIT);
+  bool no_hashed_cache=
+         !(join->allowed_join_cache_types & JOIN_CACHE_HASHED_BIT);
+  bool no_bka_cache= 
+         !(join->allowed_join_cache_types & JOIN_CACHE_BKA_BIT);
+
+  join->return_tab= 0;
+
+  /*
+    Don't use join cache if @@join_cache_level==0 or this table is the first
+    one join suborder (either at top level or inside a bush)
+  */
+  if (cache_level == 0 || !prev_tab)
+    return 0;
+
+  if (force_unlinked_cache && (cache_level%2 == 0))
+    cache_level--;
+
+  if (options & SELECT_NO_JOIN_CACHE)
+    goto no_join_cache;
+
+  if (tab->use_quick == 2)
+    goto no_join_cache;
+
+  if (tab->table->map & join->complex_firstmatch_tables)
+    goto no_join_cache;
+  
+  /*
+    Don't use join cache if we're inside a join tab range covered by LooseScan
+    strategy (TODO: LooseScan is very similar to FirstMatch so theoretically it 
+    should be possible to use join buffering in the same way we're using it for
+    multi-table firstmatch ranges).
+  */
+  if (tab->inside_loosescan_range)
+    goto no_join_cache;
+
+  if (tab->is_inner_table_of_semijoin() &&
+      !join->allowed_semijoin_with_cache)
+    goto no_join_cache;
+  if (tab->is_inner_table_of_outer_join() &&
+      !join->allowed_outer_join_with_cache)
+    goto no_join_cache;
+
+  /*
+    Non-linked join buffers can't guarantee one match
+  */
+  if (tab->is_nested_inner())
+  {
+    if (force_unlinked_cache || cache_level == 1)
+      goto no_join_cache;
+    if (cache_level & 1)
+      cache_level--;
+  }
+    
+  /*
+    Don't use join buffering if we're dictated not to by no_jbuf_after
+    (This is not meaningfully used currently)
+  */
+  if (table_index > no_jbuf_after)
+    goto no_join_cache;
+  
+  /*
+    TODO: BNL join buffer should be perfectly ok with tab->bush_children.
+  */
+  if (tab->loosescan_match_tab || tab->bush_children)
+    goto no_join_cache;
+
+  for (JOIN_TAB *first_inner= tab->first_inner; first_inner;
+       first_inner= first_inner->first_upper)
+  {
+    if (first_inner != tab && 
+        (!first_inner->use_join_cache || !(tab-1)->use_join_cache))
+      goto no_join_cache;
+  }
+  if (tab->first_sj_inner_tab && tab->first_sj_inner_tab != tab &&
+      (!tab->first_sj_inner_tab->use_join_cache || !(tab-1)->use_join_cache))
+    goto no_join_cache;
+  if (!prev_tab->use_join_cache)
+  {
+    /* 
+      Check whether table tab and the previous one belong to the same nest of
+      inner tables and if so do not use join buffer when joining table tab. 
+    */
+    if (tab->first_inner && tab != tab->first_inner)
+    {
+      for (JOIN_TAB *first_inner= tab[-1].first_inner;
+           first_inner;
+           first_inner= first_inner->first_upper)
+      {
+        if (first_inner == tab->first_inner)
+          goto no_join_cache;
+      }
+    }
+    else if (tab->first_sj_inner_tab && tab != tab->first_sj_inner_tab &&
+             tab->first_sj_inner_tab == tab[-1].first_sj_inner_tab)
+      goto no_join_cache; 
+  }       
+
+  prev_cache= prev_tab->cache;
+
+  switch (tab->type) {
+  case JT_ALL:
+    if (cache_level == 1)
+      prev_cache= 0;
+    if ((tab->cache= new JOIN_CACHE_BNL(join, tab, prev_cache)) &&
+        ((options & SELECT_DESCRIBE) || !tab->cache->init()))
+    {
+      tab->icp_other_tables_ok= FALSE;
+      return (2-test(!prev_cache));
+    }
+    goto no_join_cache;
+  case JT_SYSTEM:
+  case JT_CONST:
+  case JT_REF:
+  case JT_EQ_REF:
+    if (cache_level <=2 || (no_hashed_cache && no_bka_cache))
+      goto no_join_cache;
+    if (tab->ref.is_access_triggered())
+      goto no_join_cache;
+      
+    if (!tab->is_ref_for_hash_join())
+    {
+      flags= HA_MRR_NO_NULL_ENDPOINTS | HA_MRR_SINGLE_POINT;
+      if (tab->table->covering_keys.is_set(tab->ref.key))
+        flags|= HA_MRR_INDEX_ONLY;
+      rows= tab->table->file->multi_range_read_info(tab->ref.key, 10, 20,
+                                                    tab->ref.key_parts,
+                                                    &bufsz, &flags, &cost);
+    }
+
+    if ((cache_level <=4 && !no_hashed_cache) || no_bka_cache ||
+        tab->is_ref_for_hash_join() ||
+	((flags & HA_MRR_NO_ASSOCIATION) && cache_level <=6))
+    {
+      if (!tab->hash_join_is_possible() ||
+          tab->make_scan_filter())
+        goto no_join_cache;
+      if (cache_level == 3)
+        prev_cache= 0;
+      if ((tab->cache= new JOIN_CACHE_BNLH(join, tab, prev_cache)) &&
+          ((options & SELECT_DESCRIBE) || !tab->cache->init()))
+      {
+        tab->icp_other_tables_ok= FALSE;        
+        return (4-test(!prev_cache));
+      }
+      goto no_join_cache;
+    }
+    if (cache_level > 4 && no_bka_cache)
+      goto no_join_cache;
+    
+    if ((flags & HA_MRR_NO_ASSOCIATION) &&
+	(cache_level <= 6 || no_hashed_cache))
+      goto no_join_cache;
+
+    if ((rows != HA_POS_ERROR) && !(flags & HA_MRR_USE_DEFAULT_IMPL))
+    {
+      if (cache_level <= 6 || no_hashed_cache)
+      {
+        if (cache_level == 5)
+          prev_cache= 0;
+        if ((tab->cache= new JOIN_CACHE_BKA(join, tab, flags, prev_cache)) &&
+            ((options & SELECT_DESCRIBE) || !tab->cache->init()))
+          return (6-test(!prev_cache));
+        goto no_join_cache;
+      }
+      else
+      {
+        if (cache_level == 7)
+          prev_cache= 0;
+        if ((tab->cache= new JOIN_CACHE_BKAH(join, tab, flags, prev_cache)) &&
+            ((options & SELECT_DESCRIBE) || !tab->cache->init()))
+	{
+         tab->idx_cond_fact_out= FALSE;
+          return (8-test(!prev_cache));
+        }
+        goto no_join_cache;
+      }
+    }
+    goto no_join_cache;
+  default : ;
+  }
+
+no_join_cache:
+  if (tab->type != JT_ALL && tab->is_ref_for_hash_join())
+  {
+    tab->type= JT_ALL;
+    tab->ref.key_parts= 0;
+  }
+  revise_cache_usage(tab); 
+  return 0;
+}
+
+
+/* 
+  Check whether join buffers can be used to join tables of a join   
+
+  SYNOPSIS
+    check_join_cache_usage()
+      join                join whose tables are to be checked             
+      options             options of the join
+      no_jbuf_after       don't use join buffering after table with this number
+                          (The tables are assumed to be numbered in
+                          first_linear_tab(join, WITHOUT_CONST_TABLES),
+                          next_linear_tab(join, WITH_CONST_TABLES) order).
+
+  DESCRIPTION
+    For each table after the first non-constant table the function checks
+    whether the table can be joined using a join buffer. If the function decides
+    that a join buffer can be employed then it selects the most appropriate join
+    cache object that contains this join buffer whose level is not greater
+    than join_cache_level set for the join. To make this check the function
+    calls the function check_join_cache_usage for every non-constant table.
+
+  NOTES
+    In some situations (e.g. for nested outer joins, for nested semi-joins) only
+    incremental buffers can be used. If it turns out that for some inner table
+    no join buffer can be used then any inner table of an outer/semi-join nest
+    cannot use join buffer. In the case when already chosen buffer must be
+    denied for a table the function recalls check_join_cache_usage()
+    starting from this table. The pointer to the table from which the check
+    has to be restarted is returned in join->return_val (see the description
+    of check_join_cache_usage).
+*/
+
+void check_join_cache_usage_for_tables(JOIN *join, ulonglong options,
+                                       uint no_jbuf_after)
+{
+  JOIN_TAB *tab;
+  JOIN_TAB *prev_tab;
+
+  for (tab= first_linear_tab(join, WITHOUT_CONST_TABLES); 
+       tab; 
+       tab= next_linear_tab(join, tab, WITH_BUSH_ROOTS))
+  {
+    tab->used_join_cache_level= join->max_allowed_join_cache_level;  
+  }
+
+  uint idx= join->const_tables;
+  for (tab= first_linear_tab(join, WITHOUT_CONST_TABLES); 
+       tab; 
+       tab= next_linear_tab(join, tab, WITH_BUSH_ROOTS))
+  {
+restart:
+    tab->icp_other_tables_ok= TRUE;
+    tab->idx_cond_fact_out= TRUE;
+    
+    /* 
+      Check if we have a preceding join_tab, as something that will feed us
+      records that we could buffer. We don't have it, if 
+       - this is the first non-const table in the join order,
+       - this is the first table inside an SJM nest.
+    */
+    prev_tab= tab - 1;
+    if (tab == join->join_tab + join->const_tables ||
+        (tab->bush_root_tab && tab->bush_root_tab->bush_children->start == tab))
+      prev_tab= NULL;
+
+    switch (tab->type) {
+    case JT_SYSTEM:
+    case JT_CONST:
+    case JT_EQ_REF:
+    case JT_REF:
+    case JT_REF_OR_NULL:
+    case JT_ALL:
+      tab->used_join_cache_level= check_join_cache_usage(tab, options,
+                                                         no_jbuf_after,
+                                                         idx,
+                                                         prev_tab);
+      tab->use_join_cache= test(tab->used_join_cache_level);
+      /*
+        psergey-merge: todo: raise the question that this is really stupid that
+        we can first allocate a join buffer, then decide not to use it and free
+        it.
+      */
+      if (join->return_tab)
+      {
+        tab= join->return_tab;
+        goto restart;
+      }
+      break; 
+    default:
+      tab->used_join_cache_level= 0;
+    }
+    if (!tab->bush_children)
+      idx++;
+  }
+}
+
+
+/*
+  Plan refinement stage: do various setup things for the executor
+
+  SYNOPSIS
+    make_join_readinfo()
+      join           Join being processed
+      options        Join's options (checking for SELECT_DESCRIBE, 
+                     SELECT_NO_JOIN_CACHE)
+      no_jbuf_after  Don't use join buffering after table with this number.
+
+  DESCRIPTION
+    Plan refinement stage: do various set ups for the executioner
+      - set up use of join buffering
+      - push index conditions
+      - increment relevant counters
+      - etc
+
+  RETURN 
+    FALSE - OK
+    TRUE  - Out of memory
+*/
+
+static bool
+make_join_readinfo(JOIN *join, ulonglong options, uint no_jbuf_after)
+{
+  JOIN_TAB *tab;
   uint i;
+  DBUG_ENTER("make_join_readinfo");
+
   bool statistics= test(!(join->select_options & SELECT_DESCRIBE));
-  bool ordered_set= 0;
   bool sorted= 1;
-  DBUG_ENTER("make_join_readinfo");
 
-  for (i=join->const_tables ; i < join->tables ; i++)
+  join->complex_firstmatch_tables= table_map(0);
+
+  if (!join->select_lex->sj_nests.is_empty() &&
+      setup_semijoin_dups_elimination(join, options, no_jbuf_after))
+    DBUG_RETURN(TRUE); /* purecov: inspected */
+  
+  /* For const tables, set partial_join_cardinality to 1. */
+  for (tab= join->join_tab; tab != join->join_tab + join->const_tables; tab++)
+    tab->partial_join_cardinality= 1; 
+
+  JOIN_TAB *prev_tab= NULL;
+  for (tab= first_linear_tab(join, WITHOUT_CONST_TABLES), i= join->const_tables; 
+       tab; 
+       prev_tab=tab, tab= next_linear_tab(join, tab, WITH_BUSH_ROOTS))
   {
-    JOIN_TAB *tab=join->join_tab+i;
+    /*
+      The approximation below for partial join cardinality is not good because
+        - it does not take into account some pushdown predicates
+        - it does not differentiate between inner joins, outer joins and
+        semi-joins.
+      Later it should be improved.
+    */
+
+    if (tab->bush_root_tab && tab->bush_root_tab->bush_children->start == tab)
+      prev_tab= NULL;
+    DBUG_ASSERT(tab->bush_children || tab->table == join->best_positions[i].table->table);
+
+    tab->partial_join_cardinality= join->best_positions[i].records_read *
+                                   (prev_tab? prev_tab->partial_join_cardinality : 1);
+    if (!tab->bush_children)
+      i++;
+  }
+ 
+  check_join_cache_usage_for_tables(join, options, no_jbuf_after);
+  
+  JOIN_TAB *first_tab;
+  for (tab= first_tab= first_linear_tab(join, WITHOUT_CONST_TABLES); 
+       tab; 
+       tab= next_linear_tab(join, tab, WITH_BUSH_ROOTS))
+  {
+    if (tab->bush_children)
+    {
+      if (setup_sj_materialization_part2(tab))
+        return TRUE;
+    }
+
     TABLE *table=tab->table;
+    uint jcl= tab->used_join_cache_level;
     tab->read_record.table= table;
-    tab->read_record.file=table->file;
     tab->read_record.unlock_row= rr_unlock_row;
-    tab->next_select=sub_select;		/* normal select */
+    tab->sorted= sorted;
+    sorted= 0;                                  // only first must be sorted
+    
 
     /*
-      Determine if the set is already ordered for ORDER BY, so it can 
-      disable join cache because it will change the ordering of the results.
-      Code handles sort table that is at any location (not only first after 
-      the const tables) despite the fact that it's currently prohibited.
-      We must disable join cache if the first non-const table alone is
-      ordered. If there is a temp table the ordering is done as a last
-      operation and doesn't prevent join cache usage.
+      We should not set tab->next_select for the last table in the
+      SMJ-nest, as setup_sj_materialization() has already set it to
+      end_sj_materialize.
     */
-    if (!ordered_set && !join->need_tmp && 
-        (table == join->sort_by_table ||
-         (join->sort_by_table == (TABLE *) 1 && i != join->const_tables)))
-      ordered_set= 1;
+    if (!(tab->bush_root_tab && 
+          tab->bush_root_tab->bush_children->end == tab + 1))
+    {
+      tab->next_select=sub_select;		/* normal select */
+    }
 
-    tab->sorted= sorted;
-    sorted= 0;                                  // only first must be sorted
+
+    if (tab->loosescan_match_tab)
+    {
+      if (!(tab->loosescan_buf= (uchar*)join->thd->alloc(tab->
+                                                         loosescan_key_len)))
+        return TRUE; /* purecov: inspected */
+      tab->sorted= TRUE;
+    }
     table->status=STATUS_NO_RECORD;
     pick_table_access_method (tab);
 
+    if (jcl)
+       tab[-1].next_select=sub_select_cache;
+
+    if (tab->cache && tab->cache->get_join_alg() == JOIN_CACHE::BNLH_JOIN_ALG)
+      tab->type= JT_HASH;
+      
     switch (tab->type) {
+    case JT_SYSTEM:				// Only happens with left join 
+    case JT_CONST:				// Only happens with left join
+      /* Only happens with outer joins */
+      tab->read_first_record= tab->type == JT_SYSTEM ?
+                                join_read_system :join_read_const;
+      if (table->covering_keys.is_set(tab->ref.key) &&
+          !table->no_keyread)
+      {
+        table->key_read=1;
+        table->file->extra(HA_EXTRA_KEYREAD);
+      }
+      else if ((!jcl || jcl > 4) && !tab->ref.is_access_triggered())
+        push_index_cond(tab, tab->ref.key);
+      break;
     case JT_EQ_REF:
       tab->read_record.unlock_row= join_read_key_unlock_row;
       /* fall through */
+      if (table->covering_keys.is_set(tab->ref.key) &&
+	  !table->no_keyread)
+      {
+	table->key_read=1;
+	table->file->extra(HA_EXTRA_KEYREAD);
+      }
+      else if ((!jcl || jcl > 4) && !tab->ref.is_access_triggered())
+        push_index_cond(tab, tab->ref.key);
+      break;
     case JT_REF_OR_NULL:
     case JT_REF:
       if (tab->select)
@@ -6842,27 +9606,20 @@ make_join_readinfo(JOIN *join, ulonglong options)
       }
       delete tab->quick;
       tab->quick=0;
-      /* fall through */
-    case JT_CONST:				// Only happens with left join
       if (table->covering_keys.is_set(tab->ref.key) &&
 	  !table->no_keyread)
         table->enable_keyread();
+      else if ((!jcl || jcl > 4) && !tab->ref.is_access_triggered())
+        push_index_cond(tab, tab->ref.key);
       break;
     case JT_ALL:
+    case JT_HASH:
       /*
 	If previous table use cache
         If the incoming data set is already sorted don't use cache.
+        Also don't use cache if this is the first table in semi-join
+          materialization nest.
       */
-      if (i != join->const_tables && !(options & SELECT_NO_JOIN_CACHE) &&
-          tab->use_quick != 2 && !tab->first_inner && !ordered_set)
-      {
-	if ((options & SELECT_DESCRIBE) ||
-	    !join_init_cache(join->thd,join->join_tab+join->const_tables,
-			     i-join->const_tables))
-	{
-	  tab[-1].next_select=sub_select_cache; /* Patch previous */
-	}
-      }
       /* These init changes read_record */
       if (tab->use_quick == 2)
       {
@@ -6873,8 +9630,9 @@ make_join_readinfo(JOIN *join, ulonglong options)
       }
       else
       {
-	tab->read_first_record= join_init_read_record;
-	if (i == join->const_tables)
+        if (!tab->bush_children)
+          tab->read_first_record= join_init_read_record;
+	if (tab == first_tab)
 	{
 	  if (tab->select && tab->select->quick)
 	  {
@@ -6896,14 +9654,16 @@ make_join_readinfo(JOIN *join, ulonglong options)
 	  if (tab->select && tab->select->quick)
 	  {
 	    if (statistics)
-	      status_var_increment(join->thd->status_var.select_full_range_join_count);
+	      status_var_increment(join->thd->status_var.
+                                   select_full_range_join_count);
 	  }
 	  else
 	  {
 	    join->thd->server_status|=SERVER_QUERY_NO_INDEX_USED;
 	    if (statistics)
 	    {
-	      status_var_increment(join->thd->status_var.select_full_join_count);
+	      status_var_increment(join->thd->status_var.
+                                   select_full_join_count);
 	      join->thd->query_plan_flags|= QPLAN_FULL_JOIN;
 	    }
 	  }
@@ -6917,37 +9677,80 @@ make_join_readinfo(JOIN *join, ulonglong options)
 	  else if (!table->covering_keys.is_clear_all() &&
 		   !(tab->select && tab->select->quick))
 	  {					// Only read index tree
+#ifdef BAD_OPTIMIZATION
 	    /*
-            It has turned out that the below change, while speeding things
-            up for disk-bound loads, slows them down for cases when the data
-            is in disk cache (see BUG#35850):
-	    //  See bug #26447: "Using the clustered index for a table scan
-	    //  is always faster than using a secondary index".
+              It has turned out that the below change, while speeding things
+              up for disk-bound loads, slows them down for cases when the data
+              is in disk cache (see BUG#35850):
+              See bug #26447: "Using the clustered index for a table scan
+              is always faster than using a secondary index".
+            */
             if (table->s->primary_key != MAX_KEY &&
                 table->file->primary_key_is_clustered())
               tab->index= table->s->primary_key;
             else
-	    */
+#endif
               tab->index=find_shortest_key(table, & table->covering_keys);
 	    tab->read_first_record= join_read_first;
-	    tab->type=JT_NEXT;		// Read with index_first / index_next
+            /* Read with index_first / index_next */
+	    tab->type= tab->type == JT_ALL ? JT_NEXT : JT_HASH_NEXT;		
 	  }
 	}
+        if (tab->select && tab->select->quick &&
+            tab->select->quick->index != MAX_KEY && ! tab->table->key_read)
+          push_index_cond(tab, tab->select->quick->index);
       }
       break;
     case JT_FT:
-    case JT_SYSTEM: 
       break;
+      /* purecov: begin deadcode */
     default:
-      DBUG_PRINT("error",("Table type %d found",tab->type)); /* purecov: deadcode */
-      break;					/* purecov: deadcode */
+      DBUG_PRINT("error",("Table type %d found",tab->type));
+      break;
     case JT_UNKNOWN:
     case JT_MAYBE_REF:
-      abort();					/* purecov: deadcode */
+      abort();
+      /* purecov: end */
     }
   }
-  join->join_tab[join->tables-1].next_select=0; /* Set by do_select */
-  DBUG_VOID_RETURN;
+  uint n_top_tables= join->join_tab_ranges.head()->end -  
+                     join->join_tab_ranges.head()->start;
+
+  join->join_tab[n_top_tables - 1].next_select=0;  /* Set by do_select */
+  
+  /*
+    If a join buffer is used to join a table the ordering by an index
+    for the first non-constant table cannot be employed anymore.
+  */
+  for (tab= join->join_tab + join->const_tables ; 
+       tab != join->join_tab + n_top_tables ; tab++)
+  {
+    if (tab->use_join_cache)
+    {
+       JOIN_TAB *sort_by_tab= join->group && join->simple_group &&
+                              join->group_list ?
+			       join->join_tab+join->const_tables :
+                               join->get_sort_by_join_tab();
+     if (sort_by_tab)
+      {
+        join->need_tmp= 1;
+        join->simple_order= join->simple_group= 0;
+        if (sort_by_tab->type == JT_NEXT)
+        {
+          sort_by_tab->type= JT_ALL;
+          sort_by_tab->read_first_record= join_init_read_record;
+        }
+        else if (sort_by_tab->type == JT_HASH_NEXT)
+        {
+          sort_by_tab->type= JT_HASH;
+          sort_by_tab->read_first_record= join_init_read_record;
+        }
+      }
+      break;
+    }
+  }
+
+  DBUG_RETURN(FALSE);
 }
 
 
@@ -6967,14 +9770,11 @@ make_join_readinfo(JOIN *join, ulonglong options)
 
 bool error_if_full_join(JOIN *join)
 {
-  for (JOIN_TAB *tab=join->join_tab, *end=join->join_tab+join->tables;
-       tab < end;
-       tab++)
+  for (JOIN_TAB *tab=first_top_level_tab(join, WITH_CONST_TABLES); tab;
+       tab= next_top_level_tab(join, tab))
   {
     if (tab->type == JT_ALL && (!tab->select || !tab->select->quick))
     {
-      /* This error should not be ignored. */
-      join->select_lex->no_error= FALSE;
       my_message(ER_UPDATE_WITHOUT_KEY_IN_SAFE_MODE,
                  ER(ER_UPDATE_WITHOUT_KEY_IN_SAFE_MODE), MYF(0));
       return(1);
@@ -6986,21 +9786,53 @@ bool error_if_full_join(JOIN *join)
 
 /**
   cleanup JOIN_TAB.
+
+  DESCRIPTION 
+    This is invoked when we've finished all join executions.
 */
 
 void JOIN_TAB::cleanup()
 {
+  DBUG_ENTER("JOIN_TAB::cleanup");
+  DBUG_PRINT("enter", ("table %s.%s",
+                       (table ? table->s->db.str : "?"),
+                       (table ? table->s->table_name.str : "?")));
   delete select;
   select= 0;
   delete quick;
   quick= 0;
-  x_free(cache.buff);
-  cache.buff= 0;
+  if (cache)
+  {
+    cache->free();
+    cache= 0;
+  }
   limit= 0;
   if (table)
   {
     table->disable_keyread();
     table->file->ha_index_or_rnd_end();
+    preread_init_done= FALSE;
+    if (table->pos_in_table_list && 
+        table->pos_in_table_list->jtbm_subselect)
+    {
+      if (table->pos_in_table_list->jtbm_subselect->is_jtbm_const_tab)
+      {
+        free_tmp_table(join->thd, table);
+        table= NULL;
+      }
+      else
+      {
+        end_read_record(&read_record);
+        table->pos_in_table_list->jtbm_subselect->cleanup();
+        /* 
+          The above call freed the materializedd temptable. Set it to NULL so
+          that we don't attempt to touch it if JOIN_TAB::cleanup() is invoked
+          multiple times (it may be)
+        */
+        table=NULL;
+      }
+      DBUG_VOID_RETURN;
+    }
     /*
       We need to reset this for next select
       (Tested in part_of_refkey)
@@ -7008,6 +9840,157 @@ void JOIN_TAB::cleanup()
     table->reginfo.join_tab= 0;
   }
   end_read_record(&read_record);
+  DBUG_VOID_RETURN;
+}
+
+
+/**
+  Estimate the time to get rows of the joined table
+*/
+
+double JOIN_TAB::scan_time()
+{
+  double res;
+  if (table->created)
+  {
+    if (table->is_filled_at_execution())
+    {
+      get_delayed_table_estimates(table, &records, &read_time,
+                                    &startup_cost);
+      found_records= records;
+      table->quick_condition_rows= records;
+    }
+    else
+    {
+      found_records= records= table->file->stats.records;
+      read_time= table->file->scan_time();
+      /*
+        table->quick_condition_rows has already been set to
+        table->file->stats.records
+      */
+    }
+    res= read_time;
+  }
+  else
+  {
+    found_records= records=table->file->stats.records;
+    read_time= found_records ? (double)found_records: 10.0;// TODO:fix this stub
+    res= read_time;
+  }
+  return res;
+}
+
+/**
+  Initialize the join_tab before reading.
+  Currently only derived table/view materialization is done here.
+
+  TODO: consider moving this together with join_tab_execution_startup
+*/
+bool JOIN_TAB::preread_init()
+{
+  TABLE_LIST *derived= table->pos_in_table_list;
+  if (!derived || !derived->is_materialized_derived())
+  {
+    preread_init_done= TRUE;
+    return FALSE;
+  }
+
+  /* Materialize derived table/view. */
+  if (!derived->get_unit()->executed &&
+      mysql_handle_single_derived(join->thd->lex,
+                                    derived, DT_CREATE | DT_FILL))
+      return TRUE;
+  preread_init_done= TRUE;
+  if (select && select->quick)
+    select->quick->replace_handler(table->file);
+  return FALSE;
+}
+
+
+
+/**
+  Build a TABLE_REF structure for index lookup in the temporary table
+
+  @param thd             Thread handle
+  @param tmp_key         The temporary table key
+  @param it              The iterator of items for lookup in the key
+  @param skip            Number of fields from the beginning to skip
+
+  @details
+  Build TABLE_REF object for lookup in the key 'tmp_key' using items
+  accessible via item iterator 'it'.
+
+  @retval TRUE  Error
+  @retval FALSE OK
+*/
+
+bool TABLE_REF::tmp_table_index_lookup_init(THD *thd,
+                                            KEY *tmp_key,
+                                            Item_iterator &it,
+                                            bool value,
+                                            uint skip)
+{
+  uint tmp_key_parts= tmp_key->key_parts;
+  uint i;
+  DBUG_ENTER("TABLE_REF::tmp_table_index_lookup_init");
+
+  key= 0; /* The only temp table index. */
+  key_length= tmp_key->key_length;
+  if (!(key_buff=
+        (uchar*) thd->calloc(ALIGN_SIZE(tmp_key->key_length) * 2)) ||
+      !(key_copy=
+        (store_key**) thd->alloc((sizeof(store_key*) *
+                                  (tmp_key_parts + 1)))) ||
+      !(items=
+        (Item**) thd->alloc(sizeof(Item*) * tmp_key_parts)))
+    DBUG_RETURN(TRUE);
+
+  key_buff2= key_buff + ALIGN_SIZE(tmp_key->key_length);
+
+  KEY_PART_INFO *cur_key_part= tmp_key->key_part;
+  store_key **ref_key= key_copy;
+  uchar *cur_ref_buff= key_buff;
+
+  it.open();
+  for (i= 0; i < skip; i++) it.next();
+  for (i= 0; i < tmp_key_parts; i++, cur_key_part++, ref_key++)
+  {
+    Item *item= it.next();
+    DBUG_ASSERT(item);
+    items[i]= item;
+    int null_count= test(cur_key_part->field->real_maybe_null());
+    *ref_key= new store_key_item(thd, cur_key_part->field,
+                                 /* TIMOUR:
+                                    the NULL byte is taken into account in
+                                    cur_key_part->store_length, so instead of
+                                    cur_ref_buff + test(maybe_null), we could
+                                    use that information instead.
+                                 */
+                                 cur_ref_buff + null_count,
+                                 null_count ? cur_ref_buff : 0,
+                                 cur_key_part->length, items[i], value);
+    cur_ref_buff+= cur_key_part->store_length;
+  }
+  *ref_key= NULL; /* End marker. */
+  key_err= 1;
+  key_parts= tmp_key_parts;
+  DBUG_RETURN(FALSE);
+}
+
+
+/*
+  Check if ref access uses "Full scan on NULL key" (i.e. it actually alternates
+  between ref access and full table scan)
+*/
+
+bool TABLE_REF::is_access_triggered()
+{
+  for (uint i = 0; i < key_parts; i++)
+  {
+    if (cond_guards[i])
+      return TRUE;
+  }
+  return FALSE;
 }
 
 
@@ -7062,7 +10045,7 @@ void JOIN::join_free()
     Optimization: if not EXPLAIN and we are done with the JOIN,
     free all tables.
   */
-  bool full= (!select_lex->uncacheable && !thd->lex->describe);
+  bool full= !(select_lex->uncacheable);
   bool can_unlock= full;
   DBUG_ENTER("JOIN::join_free");
 
@@ -7126,15 +10109,16 @@ void JOIN::join_free()
 void JOIN::cleanup(bool full)
 {
   DBUG_ENTER("JOIN::cleanup");
+  DBUG_PRINT("enter", ("full %u", (uint) full));
 
   if (table)
   {
-    JOIN_TAB *tab,*end;
+    JOIN_TAB *tab;
     /*
       Only a sorted table may be cached.  This sorted table is always the
       first non const table in join->table
     */
-    if (tables > const_tables) // Test for not-const tables
+    if (table_count > const_tables) // Test for not-const tables
     {
       free_io_cache(table[const_tables]);
       filesort_free_buffers(table[const_tables],full);
@@ -7142,16 +10126,24 @@ void JOIN::cleanup(bool full)
 
     if (full)
     {
-      for (tab= join_tab, end= tab+tables; tab != end; tab++)
+      for (tab= first_linear_tab(this, WITH_CONST_TABLES); tab; 
+           tab= next_linear_tab(this, tab, WITH_BUSH_ROOTS))
+      {
 	tab->cleanup();
+      }
       table= 0;
     }
     else
     {
-      for (tab= join_tab, end= tab+tables; tab != end; tab++)
+      for (tab= first_linear_tab(this, WITH_CONST_TABLES); tab; 
+           tab= next_linear_tab(this, tab, WITH_BUSH_ROOTS))
       {
 	if (tab->table)
+        {
+          DBUG_PRINT("info", ("close index: %s.%s", tab->table->s->db.str,
+                              tab->table->s->table_name.str));
           tab->table->file->ha_index_or_rnd_end();
+        }
       }
     }
   }
@@ -7213,6 +10205,8 @@ void JOIN::cleanup(bool full)
   SELECT * FROM t1,t2 WHERE t1.a=t2.a AND t1.b=t2.b ORDER BY t1.a,t2.c
   SELECT * FROM t1,t2 WHERE t1.a=t2.a ORDER BY t2.b,t1.a
   @endcode
+
+  TODO: this function checks ORDER::used, which can only have a value of 0.
 */
 
 static bool
@@ -7286,9 +10280,8 @@ only_eq_ref_tables(JOIN *join,ORDER *order,table_map tables)
 
 static void update_depend_map(JOIN *join)
 {
-  JOIN_TAB *join_tab=join->join_tab, *end=join_tab+join->tables;
-
-  for (; join_tab != end ; join_tab++)
+  for (JOIN_TAB *join_tab= first_linear_tab(join, WITH_CONST_TABLES); join_tab;
+       join_tab= next_linear_tab(join, join_tab, WITH_BUSH_ROOTS))
   {
     TABLE_REF *ref= &join_tab->ref;
     table_map depend_map=0;
@@ -7299,11 +10292,11 @@ static void update_depend_map(JOIN *join)
     ref->depend_map=depend_map & ~OUTER_REF_TABLE_BIT;
     depend_map&= ~OUTER_REF_TABLE_BIT;
     for (JOIN_TAB **tab=join->map2table;
-	 depend_map ;
-	 tab++,depend_map>>=1 )
+         depend_map ;
+         tab++,depend_map>>=1 )
     {
       if (depend_map & 1)
-	ref->depend_map|=(*tab)->ref.depend_map;
+        ref->depend_map|=(*tab)->ref.depend_map;
     }
   }
 }
@@ -7311,7 +10304,7 @@ static void update_depend_map(JOIN *join)
 
 /** Update the dependency map for the sort order. */
 
-static void update_depend_map(JOIN *join, ORDER *order)
+static void update_depend_map_for_order(JOIN *join, ORDER *order)
 {
   for (; order ; order=order->next)
   {
@@ -7358,21 +10351,30 @@ static ORDER *
 remove_const(JOIN *join,ORDER *first_order, COND *cond,
              bool change_list, bool *simple_order)
 {
-  if (join->tables == join->const_tables)
+  if (join->table_count == join->const_tables)
     return change_list ? 0 : first_order;		// No need to sort
 
   ORDER *order,**prev_ptr;
-  table_map first_table= join->join_tab[join->const_tables].table->map;
+  table_map first_table;
   table_map not_const_tables= ~join->const_table_map;
   table_map ref;
+  bool first_is_base_table= FALSE;
   DBUG_ENTER("remove_const");
+  
+  LINT_INIT(first_table); /* protected by first_is_base_table */
+  if (join->join_tab[join->const_tables].table)
+  {
+    first_table= join->join_tab[join->const_tables].table->map;
+    first_is_base_table= TRUE;
+  }
+  
 
   /*
     Cleanup to avoid interference of calls of this function for
     ORDER BY and GROUP BY
   */
   for (JOIN_TAB *tab= join->join_tab + join->const_tables;
-       tab < join->join_tab + join->tables;
+       tab < join->join_tab + join->table_count;
        tab++)
     tab->cached_eq_ref_table= FALSE;
 
@@ -7381,7 +10383,7 @@ remove_const(JOIN *join,ORDER *first_order, COND *cond,
 
   /* NOTE: A variable of not_const_tables ^ first_table; breaks gcc 2.7 */
 
-  update_depend_map(join, first_order);
+  update_depend_map_for_order(join, first_order);
   for (order=first_order; order ; order=order->next)
   {
     table_map order_tables=order->item[0]->used_tables();
@@ -7396,16 +10398,21 @@ remove_const(JOIN *join,ORDER *first_order, COND *cond,
           table for all queries containing more than one table, ROLLUP, and an
           outer join.
          */
-        (join->tables > 1 && join->rollup.state == ROLLUP::STATE_INITED &&
+        (join->table_count > 1 && join->rollup.state == ROLLUP::STATE_INITED &&
         join->outer_join))
       *simple_order=0;				// Must do a temp table to sort
     else if (!(order_tables & not_const_tables))
     {
-      if (order->item[0]->with_subselect && 
-          !(join->select_lex->options & SELECT_DESCRIBE))
-        order->item[0]->val_str(&order->item[0]->str_value);
+      if (order->item[0]->with_subselect)
+      {
+        /*
+          Delay the evaluation of constant ORDER and/or GROUP expressions that
+          contain subqueries until the execution phase.
+        */
+        join->exec_const_order_group_cond.push_back(order->item[0]);
+      }
       DBUG_PRINT("info",("removing: %s", order->item[0]->full_name()));
-      continue;					// skip const item
+      continue;
     }
     else
     {
@@ -7419,7 +10426,7 @@ remove_const(JOIN *join,ORDER *first_order, COND *cond,
 	  DBUG_PRINT("info",("removing: %s", order->item[0]->full_name()));
 	  continue;
 	}
-	if ((ref=order_tables & (not_const_tables ^ first_table)))
+	if (first_is_base_table && (ref=order_tables & (not_const_tables ^ first_table)))
 	{
 	  if (!(order_tables & first_table) &&
               only_eq_ref_tables(join,first_order, ref))
@@ -7445,7 +10452,7 @@ remove_const(JOIN *join,ORDER *first_order, COND *cond,
 
 
 static int
-return_zero_rows(JOIN *join, select_result *result,TABLE_LIST *tables,
+return_zero_rows(JOIN *join, select_result *result, List<TABLE_LIST> &tables,
 		 List<Item> &fields, bool send_row, ulonglong select_options,
 		 const char *info, Item *having, List<Item> &all_fields)
 {
@@ -7461,10 +10468,24 @@ return_zero_rows(JOIN *join, select_result *result,TABLE_LIST *tables,
 
   if (send_row)
   {
+    /*
+      Set all tables to have NULL row. This is needed as we will be evaluating
+      HAVING condition.
+    */
+    List_iterator<TABLE_LIST> ti(tables);
+    TABLE_LIST *table;
+    while ((table= ti++))
+    {
+      /*
+        Don't touch semi-join materialization tables, as the above join_free()
+        call has freed them (and HAVING clause can't have references to them 
+        anyway).
+      */
+      if (!table->is_jtbm())
+        mark_as_null_row(table->table);		// All fields are NULL
+    }
     List_iterator_fast<Item> it(all_fields);
     Item *item;
-    for (TABLE_LIST *table= tables; table; table= table->next_leaf)
-      mark_as_null_row(table->table);		// All fields are NULL
     /*
       Inform all items (especially aggregating) to calculate HAVING correctly,
       also we will need it for sending results.
@@ -7497,8 +10518,11 @@ static void clear_tables(JOIN *join)
     must clear only the non-const tables, as const tables
     are not re-calculated.
   */
-  for (uint i=join->const_tables ; i < join->tables ; i++)
-    mark_as_null_row(join->table[i]);		// All fields are NULL
+  for (uint i= 0 ; i < join->table_count ; i++)
+  {
+    if (!(join->table[i]->map & join->const_table_map))
+      mark_as_null_row(join->table[i]);		// All fields are NULL
+  }
 }
 
 /*****************************************************************************
@@ -7660,24 +10684,26 @@ finish:
 static bool check_simple_equality(Item *left_item, Item *right_item,
                                   Item *item, COND_EQUAL *cond_equal)
 {
+  Item *orig_left_item= left_item;
+  Item *orig_right_item= right_item;
   if (left_item->type() == Item::REF_ITEM &&
       ((Item_ref*)left_item)->ref_type() == Item_ref::VIEW_REF)
   {
-    if (((Item_ref*)left_item)->depended_from)
+    if (((Item_ref*)left_item)->get_depended_from())
       return FALSE;
     left_item= left_item->real_item();
   }
   if (right_item->type() == Item::REF_ITEM &&
       ((Item_ref*)right_item)->ref_type() == Item_ref::VIEW_REF)
   {
-    if (((Item_ref*)right_item)->depended_from)
+    if (((Item_ref*)right_item)->get_depended_from())
       return FALSE;
     right_item= right_item->real_item();
   }
   if (left_item->type() == Item::FIELD_ITEM &&
       right_item->type() == Item::FIELD_ITEM &&
-      !((Item_field*)left_item)->depended_from &&
-      !((Item_field*)right_item)->depended_from)
+      !((Item_field*)left_item)->get_depended_from() &&
+      !((Item_field*)right_item)->get_depended_from())
   {
     /* The predicate the form field1=field2 is processed */
 
@@ -7726,7 +10752,7 @@ static bool check_simple_equality(Item *left_item, Item *right_item,
     { 
       /* left item was found in the current or one of the upper levels */
       if (! right_item_equal)
-        left_item_equal->add((Item_field *) right_item);
+        left_item_equal->add(orig_right_item);
       else
       {
         /* Merge two multiple equalities forming a new one */
@@ -7741,12 +10767,13 @@ static bool check_simple_equality(Item *left_item, Item *right_item,
     { 
       /* left item was not found neither the current nor in upper levels  */
       if (right_item_equal)
-        right_item_equal->add((Item_field *) left_item);
+        right_item_equal->add(orig_left_item);
       else 
       {
         /* None of the fields was found in multiple equalities */
-        Item_equal *item_equal= new Item_equal((Item_field *) left_item,
-                                               (Item_field *) right_item);
+        Item_equal *item_equal= new Item_equal(orig_left_item,
+                                               orig_right_item,
+                                               FALSE);
         cond_equal->current_level.push_back(item_equal);
       }
     }
@@ -7757,18 +10784,21 @@ static bool check_simple_equality(Item *left_item, Item *right_item,
     /* The predicate of the form field=const/const=field is processed */
     Item *const_item= 0;
     Item_field *field_item= 0;
+    Item *orig_field_item= 0;
     if (left_item->type() == Item::FIELD_ITEM &&
-        !((Item_field*)left_item)->depended_from &&
-        right_item->const_item())
+        !((Item_field*)left_item)->get_depended_from() &&
+        right_item->const_item() && !right_item->is_expensive())
     {
-      field_item= (Item_field*) left_item;
+      orig_field_item= orig_left_item;
+      field_item= (Item_field *) left_item;
       const_item= right_item;
     }
     else if (right_item->type() == Item::FIELD_ITEM &&
-             !((Item_field*)right_item)->depended_from &&
-             left_item->const_item())
+             !((Item_field*)right_item)->get_depended_from() &&
+             left_item->const_item() && !left_item->is_expensive())
     {
-      field_item= (Item_field*) right_item;
+      orig_field_item= orig_right_item;
+      field_item= (Item_field *) right_item;
       const_item= left_item;
     }
 
@@ -7783,7 +10813,7 @@ static bool check_simple_equality(Item *left_item, Item *right_item,
         if (!item)
         {
           Item_func_eq *eq_item;
-          if ((eq_item= new Item_func_eq(left_item, right_item)))
+          if ((eq_item= new Item_func_eq(orig_left_item, orig_right_item)))
             return FALSE;
           eq_item->set_cmp_func();
           eq_item->quick_fix_field();
@@ -7808,11 +10838,11 @@ static bool check_simple_equality(Item *left_item, Item *right_item,
           already contains a constant and its value is  not equal to
           the value of const_item.
         */
-        item_equal->add(const_item, field_item);
+        item_equal->add_const(const_item, orig_field_item);
       }
       else
       {
-        item_equal= new Item_equal(const_item, field_item);
+        item_equal= new Item_equal(const_item, orig_field_item, TRUE);
         cond_equal->current_level.push_back(item_equal);
       }
       return TRUE;
@@ -8054,10 +11084,10 @@ static COND *build_equal_items_for_cond(THD *thd, COND *cond,
       List_iterator_fast<Item_equal> it(cond_equal.current_level);
       while ((item_equal= it++))
       {
-        item_equal->fix_length_and_dec();
+        item_equal->fix_fields(thd, NULL);
         item_equal->update_used_tables();
         set_if_bigger(thd->lex->current_select->max_equal_elems,
-                      item_equal->members());  
+                      item_equal->n_field_items());  
       }
 
       ((Item_cond_and*)cond)->cond_equal= cond_equal;
@@ -8088,7 +11118,8 @@ static COND *build_equal_items_for_cond(THD *thd, COND *cond,
       args->concat((List<Item> *)&cond_equal.current_level);
     }
   }
-  else if (cond->type() == Item::FUNC_ITEM)
+  else if (cond->type() == Item::FUNC_ITEM ||
+           cond->real_item()->type() == Item::FIELD_ITEM)
   {
     List<Item> eq_list;
     /*
@@ -8110,10 +11141,10 @@ static COND *build_equal_items_for_cond(THD *thd, COND *cond,
       {
         if ((item_equal= cond_equal.current_level.pop()))
         {
-          item_equal->fix_length_and_dec();
+          item_equal->fix_fields(thd, NULL);
           item_equal->update_used_tables();
           set_if_bigger(thd->lex->current_select->max_equal_elems,
-                        item_equal->members());  
+                        item_equal->n_field_items());  
           return item_equal;
 	}
 
@@ -8134,7 +11165,7 @@ static COND *build_equal_items_for_cond(THD *thd, COND *cond,
           item_equal->fix_length_and_dec();
           item_equal->update_used_tables();
           set_if_bigger(thd->lex->current_select->max_equal_elems,
-                        item_equal->members());  
+                        item_equal->n_field_items());  
         }
         and_cond->cond_equal= cond_equal;
         args->concat((List<Item> *)&cond_equal.current_level);
@@ -8148,7 +11179,7 @@ static COND *build_equal_items_for_cond(THD *thd, COND *cond,
       as soon the field is not of a string type or the field reference is
       an argument of a comparison predicate. 
     */ 
-    uchar *is_subst_valid= (uchar *) 1;
+    uchar* is_subst_valid= (uchar *) Item::ANY_SUBST;
     cond= cond->compile(&Item::subst_argument_checker,
                         &is_subst_valid, 
                         &Item::equal_fields_propagator,
@@ -8239,6 +11270,7 @@ static COND *build_equal_items(THD *thd, COND *cond,
     if (cond->type() == Item::COND_ITEM &&
         ((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC)
       cond_equal= &((Item_cond_and*) cond)->cond_equal;
+
     else if (cond->type() == Item::FUNC_ITEM &&
              ((Item_cond*) cond)->functype() == Item_func::MULT_EQUAL_FUNC)
     {
@@ -8282,10 +11314,14 @@ static COND *build_equal_items(THD *thd, COND *cond,
 /**
   Compare field items by table order in the execution plan.
 
+    If field1 and field2 belong to different tables then
     field1 considered as better than field2 if the table containing
     field1 is accessed earlier than the table containing field2.   
     The function finds out what of two fields is better according
     this criteria.
+    If field1 and field2 belong to the same table then the result
+    of comparison depends on whether the fields are parts of
+    the key that are used to access this table.  
 
   @param field1          first field item to compare
   @param field2          second field item to compare
@@ -8299,18 +11335,24 @@ static COND *build_equal_items(THD *thd, COND *cond,
     0  otherwise
 */
 
-static int compare_fields_by_table_order(Item_field *field1,
-                                  Item_field *field2,
-                                  void *table_join_idx)
+static int compare_fields_by_table_order(Item *field1,
+                                         Item *field2,
+                                         void *table_join_idx)
 {
   int cmp= 0;
   bool outer_ref= 0;
-  if (field2->used_tables() & OUTER_REF_TABLE_BIT)
+  Item_field *f1= (Item_field *) (field1->real_item());
+  Item_field *f2= (Item_field *) (field2->real_item());
+  if (f1->const_item())
+    return 1;
+  if (f2->const_item())
+    return -1;
+  if (f2->used_tables() & OUTER_REF_TABLE_BIT)
   {  
     outer_ref= 1;
     cmp= -1;
   }
-  if (field2->used_tables() & OUTER_REF_TABLE_BIT)
+  if (f1->used_tables() & OUTER_REF_TABLE_BIT)
   {
     outer_ref= 1;
     cmp++;
@@ -8318,11 +11360,75 @@ static int compare_fields_by_table_order(Item_field *field1,
   if (outer_ref)
     return cmp;
   JOIN_TAB **idx= (JOIN_TAB **) table_join_idx;
-  cmp= idx[field2->field->table->tablenr]-idx[field1->field->table->tablenr];
+  
+  JOIN_TAB *tab1= idx[f1->field->table->tablenr];
+  JOIN_TAB *tab2= idx[f2->field->table->tablenr];
+  
+  /* 
+    if one of the table is inside a merged SJM nest and another one isn't,
+    compare SJM bush roots of the tables.
+  */
+  if (tab1->bush_root_tab != tab2->bush_root_tab)
+  {
+    if (tab1->bush_root_tab)
+      tab1= tab1->bush_root_tab;
+
+    if (tab2->bush_root_tab)
+      tab2= tab2->bush_root_tab;
+  }
+  
+  cmp= tab2 - tab1;
+
+  if (!cmp)
+  {
+    JOIN_TAB *tab= idx[f1->field->table->tablenr];
+    uint keyno= MAX_KEY;
+    if (tab->ref.key_parts)
+      keyno= tab->ref.key;
+    else if (tab->select && tab->select->quick)
+       keyno = tab->select->quick->index;
+    if (keyno != MAX_KEY)
+    {
+      if (f2->field->part_of_key.is_set(keyno))
+        cmp= -1;
+      if (f1->field->part_of_key.is_set(keyno))
+        cmp++;
+      if (!cmp)
+      {
+        KEY *key_info= tab->table->key_info + keyno;
+        for (uint i= 0; i < key_info->key_parts; i++)
+	{
+          Field *fld= key_info->key_part[i].field;
+          if (fld->eq(f2->field))
+	  {
+	    cmp= -1;
+            break;
+          }
+          if (fld->eq(f1->field))
+	  {
+	    cmp= 1;
+            break;
+          }
+        }
+      }              
+    }              
+    else   
+      cmp= f2->field->field_index-f1->field->field_index;
+  }
   return cmp < 0 ? -1 : (cmp ? 1 : 0);
 }
 
 
+static TABLE_LIST* embedding_sjm(Item *item)
+{
+  Item_field *item_field= (Item_field *) (item->real_item());
+  TABLE_LIST *nest= item_field->field->table->pos_in_table_list->embedding;
+  if (nest && nest->sj_mat_info && nest->sj_mat_info->is_used)
+    return nest;
+  else
+    return NULL;
+}
+
 /**
   Generate minimal set of simple equalities equivalent to a multiple equality.
 
@@ -8356,6 +11462,23 @@ static int compare_fields_by_table_order(Item_field *field1,
     So only t1.a=t3.c should be left in the lower level.
     If cond is equal to 0, then not more then one equality is generated
     and a pointer to it is returned as the result of the function.
+    
+    Equality substutution and semi-join materialization nests:
+
+       In case join order looks like this:
+
+          outer_tbl1 outer_tbl2 SJM (inner_tbl1 inner_tbl2) outer_tbl3 
+
+        We must not construct equalities like 
+
+           outer_tbl1.col = inner_tbl1.col 
+
+        because they would get attached to inner_tbl1 and will get evaluated
+        during materialization phase, when we don't have current value of
+        outer_tbl1.col.
+
+        Item_equal::get_first() also takes similar measures for dealing with
+        equality substitution in presense of SJM nests.
 
   @return
     - The condition with generated simple equalities or
@@ -8363,54 +11486,113 @@ static int compare_fields_by_table_order(Item_field *field1,
     - 0, otherwise.
 */
 
-static Item *eliminate_item_equal(COND *cond, COND_EQUAL *upper_levels,
-                                  Item_equal *item_equal)
+Item *eliminate_item_equal(COND *cond, COND_EQUAL *upper_levels,
+                           Item_equal *item_equal)
 {
   List<Item> eq_list;
   Item_func_eq *eq_item= 0;
   if (((Item *) item_equal)->const_item() && !item_equal->val_int())
     return new Item_int((longlong) 0,1); 
   Item *item_const= item_equal->get_const();
-  Item_equal_iterator it(*item_equal);
+  Item_equal_fields_iterator it(*item_equal);
   Item *head;
   DBUG_ASSERT(!cond || cond->type() == Item::COND_ITEM);
 
+  TABLE_LIST *current_sjm= NULL;
+  Item *current_sjm_head= NULL;
+
+  /* 
+    Pick the "head" item: the constant one or the first in the join order
+    (if the first in the join order happends to be inside an SJM nest, that's
+    ok, because this is where the value will be unpacked after
+    materialization).
+  */
   if (item_const)
     head= item_const;
   else
   {
-    head= item_equal->get_first();
+    TABLE_LIST *emb_nest;
+    head= item_equal->get_first(NO_PARTICULAR_TAB, NULL);
     it++;
+    if ((emb_nest= embedding_sjm(head)))
+    {
+      current_sjm= emb_nest;
+      current_sjm_head= head;
+    }
   }
-  Item_field *item_field;
-  while ((item_field= it++))
+
+  Item *field_item;
+  /*
+    For each other item, generate "item=head" equality (except the tables that 
+    are within SJ-Materialization nests, for those "head" is defined
+    differently)
+  */
+  while ((field_item= it++))
   {
-    Item_equal *upper= item_field->find_item_equal(upper_levels);
-    Item_field *item= item_field;
+    Item_equal *upper= field_item->find_item_equal(upper_levels);
+    Item *item= field_item;
+    TABLE_LIST *field_sjm= embedding_sjm(field_item);
+    if (!field_sjm)
+    { 
+      current_sjm= NULL;
+      current_sjm_head= NULL;
+    }      
+
+    /* 
+      Check if "field_item=head" equality is already guaranteed to be true 
+      on upper AND-levels.
+    */
     if (upper)
     { 
       if (item_const && upper->get_const())
         item= 0;
       else
       {
-        Item_equal_iterator li(*item_equal);
-        while ((item= li++) != item_field)
+        Item_equal_fields_iterator li(*item_equal);
+        while ((item= li++) != field_item)
         {
-          if (item->find_item_equal(upper_levels) == upper)
+          if (embedding_sjm(item) == field_sjm && 
+              item->find_item_equal(upper_levels) == upper)
             break;
         }
       }
     }
-    if (item == item_field)
+    
+    bool produce_equality= test(item == field_item);
+    if (!item_const && field_sjm && field_sjm != current_sjm)
+    {
+      /* Entering an SJM nest */
+      current_sjm_head= field_item;
+      if (!field_sjm->sj_mat_info->is_sj_scan)
+        produce_equality= FALSE;
+    }
+
+    if (produce_equality)
     {
       if (eq_item)
         eq_list.push_back(eq_item);
-      eq_item= new Item_func_eq(item_field, head);
+      
+      /*
+        If we're inside an SJM-nest (current_sjm!=NULL), and the multi-equality
+        doesn't include a constant, we should produce equality with the first
+        of the equals in this SJM.
+
+        In other cases, get the "head" item, which is either first of the
+        equals on top level, or the constant.
+      */
+      Item *head_item= (!item_const && current_sjm)? current_sjm_head: head;
+      Item *head_real_item=  head_item->real_item();
+      if (head_real_item->type() == Item::FIELD_ITEM)
+        head_item= head_real_item;
+      
+      eq_item= new Item_func_eq(field_item->real_item(), head_item);
+
       if (!eq_item)
         return 0;
       eq_item->set_cmp_func();
       eq_item->quick_fix_field();
     }
+    current_sjm= field_sjm;
   }
 
   if (!cond)
@@ -8455,6 +11637,9 @@ static Item *eliminate_item_equal(COND *cond, COND_EQUAL *upper_levels,
     After this the function retrieves all other conjuncted
     predicates substitute every field reference by the field reference
     to the first equal field or equal constant if there are any.
+
+  @param context_tab     Join tab that 'cond' will be attached to, or 
+                         NO_PARTICULAR_TAB. See notes above.
   @param cond            condition to process
   @param cond_equal      multiple equalities to take into consideration
   @param table_join_idx  index to tables determining field preference
@@ -8465,11 +11650,37 @@ static Item *eliminate_item_equal(COND *cond, COND_EQUAL *upper_levels,
     new fields in multiple equality item of lower levels. We want
     the order in them to comply with the order of upper levels.
 
+    context_tab may be used to specify which join tab `cond` will be
+    attached to. There are two possible cases:
+
+    1. context_tab != NO_PARTICULAR_TAB
+       We're doing substitution for an Item which will be evaluated in the 
+       context of a particular item. For example, if the optimizer does a 
+       ref access on "tbl1.key= expr" then
+        = equality substitution will be perfomed on 'expr'
+        = it is known in advance that 'expr' will be evaluated when 
+          table t1 is accessed.
+       Note that in this kind of substution we never have to replace Item_equal
+       objects. For example, for
+
+        t.key= func(col1=col2 AND col2=const)
+       
+       we will not build Item_equal or do equality substution (if we decide to,
+       this function will need to be fixed to handle it)
+
+    2. context_tab == NO_PARTICULAR_TAB
+       We're doing substitution in WHERE/ON condition, which is not yet 
+       attached to any particular join_tab. We will use information about the
+       chosen join order to make "optimal" substitions, i.e. those that allow
+       to apply filtering as soon as possible. See eliminate_item_equal() and 
+       Item_equal::get_first() for details.
+
   @return
     The transformed condition
 */
 
-static COND* substitute_for_best_equal_field(COND *cond,
+static COND* substitute_for_best_equal_field(JOIN_TAB *context_tab,
+                                             COND *cond,
                                              COND_EQUAL *cond_equal,
                                              void *table_join_idx)
 {
@@ -8485,7 +11696,7 @@ static COND* substitute_for_best_equal_field(COND *cond,
     if (and_level)
     {
       cond_equal= &((Item_cond_and *) cond)->cond_equal;
-      cond_list->disjoin((List<Item> *) &cond_equal->current_level);
+      cond_list->disjoin((List<Item> *) &cond_equal->current_level);/* remove Item_equal objects from the AND. */
 
       List_iterator_fast<Item_equal> it(cond_equal->current_level);      
       while ((item_equal= it++))
@@ -8498,7 +11709,8 @@ static COND* substitute_for_best_equal_field(COND *cond,
     Item *item;
     while ((item= li++))
     {
-      Item *new_item= substitute_for_best_equal_field(item, cond_equal,
+      Item *new_item= substitute_for_best_equal_field(context_tab,
+                                                      item, cond_equal,
                                                       table_join_idx);
       /*
         This works OK with PS/SP re-execution as changes are made to
@@ -8538,8 +11750,19 @@ static COND* substitute_for_best_equal_field(COND *cond,
     cond= eliminate_item_equal(0, cond_equal, item_equal);
     return cond ? cond : org_cond;
   }
-  else
-    cond->transform(&Item::replace_equal_field, 0);
+  else 
+  {
+    while (cond_equal)
+    {
+      List_iterator_fast<Item_equal> it(cond_equal->current_level);
+      while((item_equal= it++))
+      {
+        REPLACE_EQUAL_FIELD_ARG arg= {item_equal, context_tab};
+        cond= cond->transform(&Item::replace_equal_field, (uchar *) &arg);
+      }
+      cond_equal= cond_equal->upper_levels;
+    }
+  }
   return cond;
 }
 
@@ -8579,11 +11802,10 @@ static void update_const_equal_items(COND *cond, JOIN_TAB *tab)
     if (!contained_const && item_equal->get_const())
     {
       /* Update keys for range analysis */
-      Item_equal_iterator it(*item_equal);
-      Item_field *item_field;
-      while ((item_field= it++))
+      Item_equal_fields_iterator it(*item_equal);
+      while (it++)
       {
-        Field *field= item_field->field;
+        Field *field= it.get_curr_field();
         JOIN_TAB *stat= field->table->reginfo.join_tab;
         key_map possible_keys= field->key_start;
         possible_keys.intersect(field->table->keys_in_use_for_query);
@@ -8599,7 +11821,7 @@ static void update_const_equal_items(COND *cond, JOIN_TAB *tab)
           TABLE *tab= field->table;
           KEYUSE *use;
           for (use= stat->keyuse; use && use->table == tab; use++)
-            if (possible_keys.is_set(use->key) && 
+            if (!use->is_for_hash_join() && possible_keys.is_set(use->key) && 
                 tab->key_info[use->key].key_part[use->keypart].field ==
                 field)
               tab->const_key_parts[use->key]|= use->keypart_map;
@@ -8693,37 +11915,6 @@ change_cond_ref_to_const(THD *thd, I_List<COND_CMP> *save_list,
   }
 }
 
-/**
-  Remove additional condition inserted by IN/ALL/ANY transformation.
-
-  @param conds   condition for processing
-
-  @return
-    new conditions
-*/
-
-static Item *remove_additional_cond(Item* conds)
-{
-  if (conds->name == in_additional_cond)
-    return 0;
-  if (conds->type() == Item::COND_ITEM)
-  {
-    Item_cond *cnd= (Item_cond*) conds;
-    List_iterator<Item> li(*(cnd->argument_list()));
-    Item *item;
-    while ((item= li++))
-    {
-      if (item->name == in_additional_cond)
-      {
-	li.remove();
-	if (cnd->argument_list()->elements == 1)
-	  return cnd->argument_list()->head();
-	return conds;
-      }
-    }
-  }
-  return conds;
-}
 
 static void
 propagate_cond_constants(THD *thd, I_List<COND_CMP> *save_list,
@@ -8761,10 +11952,10 @@ propagate_cond_constants(THD *thd, I_List<COND_CMP> *save_list,
     {
       Item_func_eq *func=(Item_func_eq*) cond;
       Item **args= func->arguments();
-      bool left_const= args[0]->const_item();
-      bool right_const= args[1]->const_item();
+      bool left_const= args[0]->const_item() && !args[0]->is_expensive();
+      bool right_const= args[1]->const_item() && !args[1]->is_expensive();
       if (!(left_const && right_const) &&
-          args[0]->result_type() == args[1]->result_type())
+          args[0]->cmp_type() == args[1]->cmp_type())
       {
 	if (right_const)
 	{
@@ -8785,7 +11976,6 @@ propagate_cond_constants(THD *thd, I_List<COND_CMP> *save_list,
   }
 }
 
-
 /**
   Simplify joins replacing outer joins by inner joins whenever it's
   possible.
@@ -8880,13 +12070,18 @@ propagate_cond_constants(THD *thd, I_List<COND_CMP> *save_list,
     consider any plan where one of the inner tables is before some of outer
     tables.
 
-
+  IMPLEMENTATION
     The function is implemented by a recursive procedure.  On the recursive
     ascent all attributes are calculated, all outer joins that can be
     converted are replaced and then all unnecessary braces are removed.
     As join list contains join tables in the reverse order sequential
     elimination of outer joins does not require extra recursive calls.
 
+  SEMI-JOIN NOTES
+    Remove all semi-joins that have are within another semi-join (i.e. have
+    an "ancestor" semi-join nest)
+
+  EXAMPLES
     Here is an example of a join query with invalid cross references:
     @code
       SELECT * FROM t1 LEFT JOIN t2 ON t2.a=t3.a LEFT JOIN t3 ON t3.b=t1.b 
@@ -8896,14 +12091,15 @@ propagate_cond_constants(THD *thd, I_List<COND_CMP> *save_list,
   @param join_list   list representation of the join to be converted
   @param conds       conditions to add on expressions for converted joins
   @param top         true <=> conds is the where condition
-
+  @param in_sj       TRUE <=> processing semi-join nest's children
   @return
     - The new condition, if success
     - 0, otherwise
 */
 
 static COND *
-simplify_joins(JOIN *join, List<TABLE_LIST> *join_list, COND *conds, bool top)
+simplify_joins(JOIN *join, List<TABLE_LIST> *join_list, COND *conds, bool top,
+               bool in_sj)
 {
   TABLE_LIST *table;
   NESTED_JOIN *nested_join;
@@ -8939,7 +12135,7 @@ simplify_joins(JOIN *join, List<TABLE_LIST> *join_list, COND *conds, bool top)
            the corresponding on expression is added to E. 
 	*/ 
         expr= simplify_joins(join, &nested_join->join_list,
-                             expr, FALSE);
+                             expr, FALSE, in_sj || table->sj_on_expr);
 
         if (!table->prep_on_expr || expr != table->on_expr)
         {
@@ -8951,7 +12147,8 @@ simplify_joins(JOIN *join, List<TABLE_LIST> *join_list, COND *conds, bool top)
       }
       nested_join->used_tables= (table_map) 0;
       nested_join->not_null_tables=(table_map) 0;
-      conds= simplify_joins(join, &nested_join->join_list, conds, top);
+      conds= simplify_joins(join, &nested_join->join_list, conds, top, 
+                            in_sj || table->sj_on_expr);
       used_tables= nested_join->used_tables;
       not_null_tables= nested_join->not_null_tables;  
       /* The following two might become unequal after table elimination: */
@@ -8961,7 +12158,7 @@ simplify_joins(JOIN *join, List<TABLE_LIST> *join_list, COND *conds, bool top)
     {
       if (!table->prep_on_expr)
         table->prep_on_expr= table->on_expr;
-      used_tables= table->table->map;
+      used_tables= table->get_map();
       if (conds)
         not_null_tables= conds->not_null_tables();
     }
@@ -8983,7 +12180,7 @@ simplify_joins(JOIN *join, List<TABLE_LIST> *join_list, COND *conds, bool top)
       table->outer_join= 0;
       if (table->on_expr)
       {
-        /* Add on expression to the where condition. */
+        /* Add ON expression to the WHERE or upper-level ON condition. */
         if (conds)
         {
           conds= and_conds(conds, table->on_expr);
@@ -9018,7 +12215,7 @@ simplify_joins(JOIN *join, List<TABLE_LIST> *join_list, COND *conds, bool top)
         table->embedding->on_expr_dep_tables|= table->on_expr->used_tables();
       }
       else
-        table->dep_tables&= ~table->table->map;
+        table->dep_tables&= ~table->get_map();
     }
 
     if (prev_table)
@@ -9031,7 +12228,7 @@ simplify_joins(JOIN *join, List<TABLE_LIST> *join_list, COND *conds, bool top)
         prev_table->dep_tables|= table->on_expr_dep_tables;
         table_map prev_used_tables= prev_table->nested_join ?
 	                            prev_table->nested_join->used_tables :
-	                            prev_table->table->map;
+	                            prev_table->get_map();
         /* 
           If on expression contains only references to inner tables
           we still make the inner tables dependent on the outer tables.
@@ -9052,26 +12249,51 @@ simplify_joins(JOIN *join, List<TABLE_LIST> *join_list, COND *conds, bool top)
     prev_table= table;
   }
     
-  /* Flatten nested joins that can be flattened. */
+  /* 
+    Flatten nested joins that can be flattened.
+    no ON expression and not a semi-join => can be flattened.
+  */
   TABLE_LIST *right_neighbor= NULL;
   li.rewind();
   while ((table= li++))
   {
     bool fix_name_res= FALSE;
     nested_join= table->nested_join;
-    if (nested_join && !table->on_expr)
+    if (table->sj_on_expr && !in_sj)
+    {
+       /*
+         If this is a semi-join that is not contained within another semi-join, 
+         leave it intact (otherwise it is flattened)
+       */
+      join->select_lex->sj_nests.push_back(table);
+
+      /* 
+        Also, walk through semi-join children and mark those that are now
+        top-level
+      */
+      TABLE_LIST *tbl;
+      List_iterator<TABLE_LIST> it(nested_join->join_list);
+      while ((tbl= it++))
+      {
+        if (!tbl->on_expr && tbl->table)
+          tbl->table->maybe_null= FALSE;
+      }
+    }
+    else if (nested_join && !table->on_expr)
     {
       TABLE_LIST *tbl;
       List_iterator<TABLE_LIST> it(nested_join->join_list);
+      List<TABLE_LIST> repl_list;  
       while ((tbl= it++))
       {
         tbl->embedding= table->embedding;
         if (!tbl->embedding && !tbl->on_expr && tbl->table)
           tbl->table->maybe_null= FALSE;
         tbl->join_list= table->join_list;
+        repl_list.push_back(tbl);
         tbl->dep_tables|= table->dep_tables;
       }
-      li.replace(nested_join->join_list);
+      li.replace(repl_list);
       /* Need to update the name resolution table chain when flattening joins */
       fix_name_res= TRUE;
       table= *li.ref();
@@ -9089,8 +12311,8 @@ simplify_joins(JOIN *join, List<TABLE_LIST> *join_list, COND *conds, bool top)
 /**
   Assign each nested join structure a bit in nested_join_map.
 
-    Assign each nested join structure (except "confluent" ones - those that
-    embed only one element) a bit in nested_join_map.
+    Assign each nested join structure (except ones that embed only one element
+    and so are redundant) a bit in nested_join_map.
 
   @param join          Join being processed
   @param join_list     List of tables
@@ -9099,7 +12321,7 @@ simplify_joins(JOIN *join, List<TABLE_LIST> *join_list, COND *conds, bool top)
 
   @note
     This function is called after simplify_joins(), when there are no
-    redundant nested joins, #non_confluent_nested_joins <= #tables_in_join so
+    redundant nested joins, #non_redundant_nested_joins <= #tables_in_join so
     we will not run out of bits in nested_join_map.
 
   @return
@@ -9128,7 +12350,9 @@ static uint build_bitmap_for_nested_joins(List<TABLE_LIST> *join_list,
       */
       if (nested_join->n_tables != 1)
       {
-        nested_join->nj_map= (nested_join_map) 1 << first_unused++;
+        /* Don't assign bits to sj-nests */
+        if (table->on_expr)
+          nested_join->nj_map= (nested_join_map) 1 << first_unused++;
         first_unused= build_bitmap_for_nested_joins(&nested_join->join_list,
                                                     first_unused);
       }
@@ -9157,14 +12381,16 @@ static uint reset_nj_counters(JOIN *join, List<TABLE_LIST> *join_list)
   while ((table= li++))
   {
     NESTED_JOIN *nested_join;
+    bool is_eliminated_nest= FALSE;
     if ((nested_join= table->nested_join))
     {
       nested_join->counter= 0;
-      //nested_join->n_tables= my_count_bits(nested_join->used_tables & 
-      //                                     ~join->eliminated_tables);
       nested_join->n_tables= reset_nj_counters(join, &nested_join->join_list);
+      if (!nested_join->n_tables)
+        is_eliminated_nest= TRUE;
     }
-    if (!table->table || (table->table->map & ~join->eliminated_tables))
+    if ((table->nested_join && !is_eliminated_nest) || 
+        (!table->nested_join && (table->table->map & ~join->eliminated_tables)))
       n++;
   }
   DBUG_RETURN(n);
@@ -9279,28 +12505,31 @@ static bool check_interleaving_with_nj(JOIN_TAB *next_tab)
     Do update counters for "pairs of brackets" that we've left (marked as
     X,Y,Z in the above picture)
   */
-  for (;next_emb; next_emb= next_emb->embedding)
+  for (;next_emb && next_emb != join->emb_sjm_nest; next_emb= next_emb->embedding)
   {
-    next_emb->nested_join->counter++;
-    if (next_emb->nested_join->counter == 1)
+    if (!next_emb->sj_on_expr)
     {
-      /* 
-        next_emb is the first table inside a nested join we've "entered". In
-        the picture above, we're looking at the 'X' bracket. Don't exit yet as
-        X bracket might have Y pair bracket.
+      next_emb->nested_join->counter++;
+      if (next_emb->nested_join->counter == 1)
+      {
+        /* 
+          next_emb is the first table inside a nested join we've "entered". In
+          the picture above, we're looking at the 'X' bracket. Don't exit yet as
+          X bracket might have Y pair bracket.
+        */
+        join->cur_embedding_map |= next_emb->nested_join->nj_map;
+      }
+      
+      if (next_emb->nested_join->n_tables !=
+          next_emb->nested_join->counter)
+        break;
+
+      /*
+        We're currently at Y or Z-bracket as depicted in the above picture.
+        Mark that we've left it and continue walking up the brackets hierarchy.
       */
-      join->cur_embedding_map |= next_emb->nested_join->nj_map;
+      join->cur_embedding_map &= ~next_emb->nested_join->nj_map;
     }
-    
-    if (next_emb->nested_join->n_tables !=
-        next_emb->nested_join->counter)
-      break;
-
-    /*
-      We're currently at Y or Z-bracket as depicted in the above picture.
-      Mark that we've left it and continue walking up the brackets hierarchy.
-    */
-    join->cur_embedding_map &= ~next_emb->nested_join->nj_map;
   }
   return FALSE;
 }
@@ -9362,27 +12591,129 @@ static void restore_prev_nj_state(JOIN_TAB *last)
 {
   TABLE_LIST *last_emb= last->table->pos_in_table_list->embedding;
   JOIN *join= last->join;
-  for (;last_emb != NULL; last_emb= last_emb->embedding)
+  for (;last_emb != NULL && last_emb != join->emb_sjm_nest; 
+       last_emb= last_emb->embedding)
   {
-    NESTED_JOIN *nest= last_emb->nested_join;
-    DBUG_ASSERT(nest->counter > 0);
-    
-    bool was_fully_covered= nest->is_fully_covered();
-    
-    if (--nest->counter == 0)
-      join->cur_embedding_map&= ~nest->nj_map;
-    
-    if (!was_fully_covered)
-      break;
+    if (!last_emb->sj_on_expr)
+    {
+      NESTED_JOIN *nest= last_emb->nested_join;
+      DBUG_ASSERT(nest->counter > 0);
+      
+      bool was_fully_covered= nest->is_fully_covered();
+      
+      if (--nest->counter == 0)
+        join->cur_embedding_map&= ~nest->nj_map;
+      
+      if (!was_fully_covered)
+        break;
+      
+      join->cur_embedding_map|= nest->nj_map;
+    }
+  }
+}
+
+
+
+/*
+  Change access methods not to use join buffering and adjust costs accordingly
+
+  SYNOPSIS
+    optimize_wo_join_buffering()
+      join
+      first_tab               The first tab to do re-optimization for
+      last_tab                The last tab to do re-optimization for
+      last_remaining_tables   Bitmap of tables that are not in the
+                              [0...last_tab] join prefix
+      first_alt               TRUE <=> Use the LooseScan plan for the first_tab
+      no_jbuf_before          Don't allow to use join buffering before this
+                              table
+      reopt_rec_count     OUT New output record count
+      reopt_cost          OUT New join prefix cost
+
+  DESCRIPTION
+    Given a join prefix [0; ... first_tab], change the access to the tables
+    in the [first_tab; last_tab] not to use join buffering. This is needed
+    because some semi-join strategies cannot be used together with the join
+    buffering.
+    In general case the best table order in [first_tab; last_tab] range with
+    join buffering is different from the best order without join buffering but
+    we don't try finding a better join order. (TODO ask Igor why did we
+    chose not to do this in the end. that's actually the difference from the 
+    forking approach)
+*/
+
+void optimize_wo_join_buffering(JOIN *join, uint first_tab, uint last_tab, 
+                                table_map last_remaining_tables, 
+                                bool first_alt, uint no_jbuf_before,
+                                double *outer_rec_count, double *reopt_cost)
+{
+  double cost, rec_count;
+  table_map reopt_remaining_tables= last_remaining_tables;
+  uint i;
+
+  if (first_tab > join->const_tables)
+  {
+    cost=      join->positions[first_tab - 1].prefix_cost.total_cost();
+    rec_count= join->positions[first_tab - 1].prefix_record_count;
+  }
+  else
+  {
+    cost= 0.0;
+    rec_count= 1;
+  }
+
+  *outer_rec_count= rec_count;
+  for (i= first_tab; i <= last_tab; i++)
+    reopt_remaining_tables |= join->positions[i].table->table->map;
+  
+  /*
+    best_access_path() optimization depends on the value of 
+    join->cur_sj_inner_tables. Our goal in this function is to do a
+    re-optimization with disabled join buffering, but no other changes.
+    In order to achieve this, cur_sj_inner_tables needs have the same 
+    value it had during the original invocations of best_access_path. 
+
+    We know that this function, optimize_wo_join_buffering() is called to
+    re-optimize semi-join join order range, which allows to conclude that 
+    the "original" value of cur_sj_inner_tables was 0.
+  */
+  table_map save_cur_sj_inner_tables= join->cur_sj_inner_tables;
+  join->cur_sj_inner_tables= 0;
+
+  for (i= first_tab; i <= last_tab; i++)
+  {
+    JOIN_TAB *rs= join->positions[i].table;
+    POSITION pos, loose_scan_pos;
     
-    join->cur_embedding_map|= nest->nj_map;
+    if ((i == first_tab && first_alt) || join->positions[i].use_join_buffer)
+    {
+      /* Find the best access method that would not use join buffering */
+      best_access_path(join, rs, reopt_remaining_tables, i, 
+                       TRUE, rec_count,
+                       &pos, &loose_scan_pos);
+    }
+    else 
+      pos= join->positions[i];
+
+    if ((i == first_tab && first_alt))
+      pos= loose_scan_pos;
+
+    reopt_remaining_tables &= ~rs->table->map;
+    rec_count *= pos.records_read;
+    cost += pos.read_time;
+
+    if (!rs->emb_sj_nest)
+      *outer_rec_count *= pos.records_read;
   }
+  join->cur_sj_inner_tables= save_cur_sj_inner_tables;
+
+  *reopt_cost= cost;
 }
 
 
 static COND *
 optimize_cond(JOIN *join, COND *conds, List<TABLE_LIST> *join_list,
-              Item::cond_result *cond_value)
+              Item::cond_result *cond_value, COND_EQUAL **cond_equal)
 {
   THD *thd= join->thd;
   DBUG_ENTER("optimize_cond");
@@ -9390,7 +12721,7 @@ optimize_cond(JOIN *join, COND *conds, List<TABLE_LIST> *join_list,
   if (!conds)
   {
     *cond_value= Item::COND_TRUE;
-    build_equal_items(join->thd, NULL, NULL, join_list, &join->cond_equal);
+    build_equal_items(join->thd, NULL, NULL, join_list, cond_equal);
   }  
   else
   {
@@ -9403,9 +12734,8 @@ optimize_cond(JOIN *join, COND *conds, List<TABLE_LIST> *join_list,
       multiple equality contains a constant.
     */ 
     DBUG_EXECUTE("where", print_where(conds, "original", QT_ORDINARY););
-    conds= build_equal_items(join->thd, conds, NULL, join_list,
-                             &join->cond_equal);
-    DBUG_EXECUTE("where",print_where(conds,"after equal_items", QT_ORDINARY););
+    conds= build_equal_items(join->thd, conds, NULL, join_list, cond_equal);
+     DBUG_EXECUTE("where",print_where(conds,"after equal_items", QT_ORDINARY););
 
     /* change field = field to field = const for each found field = const */
     propagate_cond_constants(thd, (I_List<COND_CMP> *) 0, conds, conds);
@@ -9558,13 +12888,13 @@ remove_eq_conds(THD *thd, COND *cond, Item::cond_result *cond_value)
 	}
       }
     }
-    if (cond->const_item())
+    if (cond->const_item() && !cond->is_expensive())
     {
       *cond_value= eval_const_cond(cond) ? Item::COND_TRUE : Item::COND_FALSE;
       return (COND*) 0;
     }
   }
-  else if (cond->const_item())
+  else if (cond->const_item() && !cond->is_expensive())
   {
     *cond_value= eval_const_cond(cond) ? Item::COND_TRUE : Item::COND_FALSE;
     return (COND*) 0;
@@ -9584,40 +12914,36 @@ remove_eq_conds(THD *thd, COND *cond, Item::cond_result *cond_value)
   return cond;					// Point at next and level
 }
 
-/* 
+/**
   Check if equality can be used in removing components of GROUP BY/DISTINCT
   
-  SYNOPSIS
-    test_if_equality_guarantees_uniqueness()
-      l          the left comparison argument (a field if any)
-      r          the right comparison argument (a const of any)
-  
-  DESCRIPTION    
-    Checks if an equality predicate can be used to take away 
-    DISTINCT/GROUP BY because it is known to be true for exactly one 
-    distinct value (e.g. <expr> == <const>).
-    Arguments must be of the same type because e.g. 
-    <string_field> = <int_const> may match more than 1 distinct value from 
-    the column. 
-    We must take into consideration and the optimization done for various 
-    string constants when compared to dates etc (see Item_int_with_ref) as
-    well as the collation of the arguments.
+  @param    l          the left comparison argument (a field if any)
+  @param    r          the right comparison argument (a const of any)
   
-  RETURN VALUE  
-    TRUE    can be used
-    FALSE   cannot be used
+  @details
+  Checks if an equality predicate can be used to take away 
+  DISTINCT/GROUP BY because it is known to be true for exactly one 
+  distinct value (e.g. <expr> == <const>).
+  Arguments must be compared in the native type of the left argument
+  and (for strings) in the native collation of the left argument.
+  Otherwise, for example,
+  <string_field> = <int_const> may match more than 1 distinct value or
+  the <string_field>.
+
+  @note We don't need to aggregate l and r collations here, because r -
+  the constant item - has already been converted to a proper collation
+  for comparison. We only need to compare this collation with field's collation.
+
+  @retval true    can be used
+  @retval false   cannot be used
 */
 static bool
 test_if_equality_guarantees_uniqueness(Item *l, Item *r)
 {
   return r->const_item() &&
-    /* elements must be compared as dates */
-     (Arg_comparator::can_compare_as_dates(l, r, 0) ||
-      /* or of the same result type */
-      (r->result_type() == l->result_type() &&
-       /* and must have the same collation if compared as strings */
-       (l->result_type() != STRING_RESULT ||
-        l->collation.collation == r->collation.collation)));
+    item_cmp_type(l->cmp_type(), r->cmp_type()) == l->cmp_type() &&
+    (l->cmp_type() != STRING_RESULT ||
+     l->collation.collation == r->collation.collation);
 }
 
 /**
@@ -9799,15 +13125,12 @@ static Field *create_tmp_field_from_item(THD *thd, Item *item, TABLE *table,
   case STRING_RESULT:
     DBUG_ASSERT(item->collation.collation);
   
-    enum enum_field_types type;
     /*
       DATE/TIME and GEOMETRY fields have STRING_RESULT result type. 
       To preserve type they needed to be handled separately.
     */
-    if ((type= item->field_type()) == MYSQL_TYPE_DATETIME ||
-        type == MYSQL_TYPE_TIME || type == MYSQL_TYPE_DATE ||
-        type == MYSQL_TYPE_NEWDATE ||
-        type == MYSQL_TYPE_TIMESTAMP || type == MYSQL_TYPE_GEOMETRY)
+    if (item->cmp_type() == TIME_RESULT ||
+        item->field_type() == MYSQL_TYPE_GEOMETRY)
       new_field= item->tmp_table_field_from_field_type(table, 1);
     /* 
       Make sure that the blob fits into a Field_varstring which has 
@@ -9948,13 +13271,30 @@ Field *create_tmp_field(THD *thd, TABLE *table,Item *item, Item::Type type,
       If item have to be able to store NULLs but underlaid field can't do it,
       create_tmp_field_from_field() can't be used for tmp field creation.
     */
-    if (field->maybe_null && field->in_rollup && !field->field->maybe_null())
+    if (((field->maybe_null && field->in_rollup) ||      
+	(thd->create_tmp_table_for_derived  &&    /* for mat. view/dt */
+	 orig_item && orig_item->maybe_null)) &&         
+        !field->field->maybe_null())
     {
+      bool save_maybe_null= FALSE;
+      /*
+        The item the ref points to may have maybe_null flag set while
+        the ref doesn't have it. This may happen for outer fields
+        when the outer query decided at some point after name resolution phase
+        that this field might be null. Take this into account here.
+      */
+      if (orig_item)
+      {
+        save_maybe_null= item->maybe_null;
+        item->maybe_null= orig_item->maybe_null;
+      }
       result= create_tmp_field_from_item(thd, item, table, NULL,
                                          modify_item, convert_blob_length);
       *from_field= field->field;
       if (result && modify_item)
         field->result_field= result;
+      if (orig_item)
+        item->maybe_null= save_maybe_null;
     } 
     else if (table_cant_handle_bit_fields && field->field->type() ==
              MYSQL_TYPE_BIT)
@@ -10029,6 +13369,8 @@ Field *create_tmp_field(THD *thd, TABLE *table,Item *item, Item::Type type,
   case Item::REF_ITEM:
   case Item::NULL_ITEM:
   case Item::VARBIN_ITEM:
+  case Item::CACHE_ITEM:
+  case Item::EXPR_CACHE_ITEM:
     if (make_copy_field)
     {
       DBUG_ASSERT(((Item_result_field*)item)->result_field);
@@ -10065,7 +13407,9 @@ void setup_tmp_table_column_bitmaps(TABLE *table, uchar *bitmaps)
   bitmap_init(&table->tmp_set,
               (my_bitmap_map*) (bitmaps+ 2*bitmap_buffer_size(field_count)),
               field_count, FALSE);
-
+  bitmap_init(&table->eq_join_set,
+              (my_bitmap_map*) (bitmaps+ 3*bitmap_buffer_size(field_count)),
+              field_count, FALSE);
   /* write_set and all_set are copies of read_set */
   table->def_write_set= table->def_read_set;
   table->s->all_set= table->def_read_set;
@@ -10100,16 +13444,11 @@ void setup_tmp_table_column_bitmaps(TABLE *table, uchar *bitmaps)
                               be used for name resolving; can be "".
 */
 
-#define STRING_TOTAL_LENGTH_TO_PACK_ROWS 128
-#define AVG_STRING_LENGTH_TO_PACK_ROWS   64
-#define RATIO_TO_PACK_ROWS	       2
-#define MIN_STRING_LENGTH_TO_PACK_ROWS   10
-
 TABLE *
-create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
+create_tmp_table(THD *thd, TMP_TABLE_PARAM *param, List<Item> &fields,
 		 ORDER *group, bool distinct, bool save_sum_fields,
 		 ulonglong select_options, ha_rows rows_limit,
-		 char *table_alias)
+                 char *table_alias, bool do_not_open)
 {
   MEM_ROOT *mem_root_save, own_root;
   TABLE *table;
@@ -10177,6 +13516,12 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
       group=0;					// Can't use group key
     else for (ORDER *tmp=group ; tmp ; tmp=tmp->next)
     {
+      /*
+        marker == 4 means two things:
+        - store NULLs in the key, and
+        - convert BIT fields to 64-bit long, needed because MEMORY tables
+          can't index BIT fields.
+      */
       (*tmp->item)->marker=4;			// Store null in key
       if ((*tmp->item)->max_length >= CONVERT_IF_BIGGER_TO_BLOB)
 	using_unique_constraint=1;
@@ -10218,7 +13563,7 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
                         &tmpname, (uint) strlen(path)+1,
                         &group_buff, (group && ! using_unique_constraint ?
                                       param->group_length : 0),
-                        &bitmaps, bitmap_buffer_size(field_count)*3,
+                        &bitmaps, bitmap_buffer_size(field_count)*4,
                         NullS))
   {
     if (temp_pool_slot != MY_BIT_NONE)
@@ -10250,7 +13595,6 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
   table->alias.set(table_alias, strlen(table_alias), table_alias_charset);
 
   table->reginfo.lock_type=TL_WRITE;	/* Will be updated */
-  table->db_stat=HA_OPEN_KEYFILE+HA_OPEN_RNDFILE;
   table->map=1;
   table->temp_pool_slot = temp_pool_slot;
   table->copy_blobs= 1;
@@ -10258,13 +13602,14 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
   table->quick_keys.init();
   table->covering_keys.init();
   table->merge_keys.init();
+  table->intersect_keys.init();
   table->keys_in_use_for_query.init();
+  table->no_rows_with_nulls= param->force_not_null_cols;
 
   table->s= share;
   init_tmp_table_share(thd, share, "", 0, tmpname, tmpname);
   share->blob_field= blob_field;
   share->blob_ptr_size= portable_sizeof_char_ptr;
-  share->db_low_byte_first=1;                // True for HEAP, MyISAM and Maria
   share->table_charset= param->table_charset;
   share->primary_key= MAX_KEY;               // Indicate no primary key
   share->keys_for_keyread.init();
@@ -10288,7 +13633,7 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
       {
         if (item->used_tables() & OUTER_REF_TABLE_BIT)
           item->update_used_tables();
-        if (type == Item::SUBSELECT_ITEM ||
+        if ((item->real_type() == Item::SUBSELECT_ITEM) ||
             (item->used_tables() & ~OUTER_REF_TABLE_BIT))
         {
 	  /*
@@ -10339,6 +13684,11 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
           thd->mem_root= mem_root_save;
           arg= sum_item->set_arg(i, thd, new Item_field(new_field));
           thd->mem_root= &table->mem_root;
+          if (param->force_not_null_cols)
+	  {
+            new_field->flags|= NOT_NULL_FLAG;
+            new_field->null_ptr= NULL;
+          }
 	  if (!(new_field->flags & NOT_NULL_FLAG))
           {
 	    null_count++;
@@ -10374,18 +13724,51 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
                          group != 0,
                          !force_copy_fields &&
                            (not_all_columns || group !=0),
-                         item->marker == 4, force_copy_fields,
+                         /*
+                           If item->marker == 4 then we force create_tmp_field
+                           to create a 64-bit longs for BIT fields because HEAP
+                           tables can't index BIT fields directly. We do the same
+                           for distinct, as we want the distinct index to be
+                           usable in this case too.
+                         */
+                         item->marker == 4  || param->bit_fields_as_long,
+                         force_copy_fields,
                          param->convert_blob_length);
 
       if (!new_field)
       {
 	if (thd->is_fatal_error)
 	  goto err;				// Got OOM
-	continue;				// Some kindf of const item
+	continue;				// Some kind of const item
       }
       if (type == Item::SUM_FUNC_ITEM)
-	((Item_sum *) item)->result_field= new_field;
+      {
+        Item_sum *agg_item= (Item_sum *) item;
+        /*
+          Update the result field only if it has never been set, or if the
+          created temporary table is not to be used for subquery
+          materialization.
+
+          The reason is that for subqueries that require materialization as part
+          of their plan, we create the 'external' temporary table needed for IN
+          execution, after the 'internal' temporary table needed for grouping.
+          Since both the external and the internal temporary tables are created
+          for the same list of SELECT fields of the subquery, setting
+          'result_field' for each invocation of create_tmp_table overrides the
+           previous value of 'result_field'.
+
+          The condition below prevents the creation of the external temp table
+          to override the 'result_field' that was set for the internal temp table.
+        */
+        if (!agg_item->result_field || !param->materialized_subquery)
+          agg_item->result_field= new_field;
+      }
       tmp_from_field++;
+      if (param->force_not_null_cols)
+      {
+        new_field->flags|= NOT_NULL_FLAG;
+        new_field->null_ptr= NULL;
+      }
       reclength+=new_field->pack_length();
       if (!(new_field->flags & NOT_NULL_FLAG))
 	null_count++;
@@ -10439,6 +13822,7 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
   *reg_field= 0;
   *blob_field= 0;				// End marker
   share->fields= field_count;
+  share->column_bitmap_size= bitmap_buffer_size(share->fields);
 
   /* If result table is small; use a heap */
   /* future: storage engine selection can be made dynamic? */
@@ -10618,7 +14002,7 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
   }
 
   param->copy_field_end=copy;
-  param->recinfo=recinfo;
+  param->recinfo= recinfo;              	// Pointer to after last field
   store_record(table,s->default_values);        // Make empty default record
 
   if (thd->variables.tmp_table_size == ~ (ulonglong) 0)		// No limit
@@ -10646,9 +14030,11 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
     param->group_buff=group_buff;
     share->keys=1;
     share->uniques= test(using_unique_constraint);
-    table->key_info=keyinfo;
+    table->key_info= table->s->key_info= keyinfo;
+    table->keys_in_use_for_query.set_bit(0);
+    share->keys_in_use.set_bit(0);
     keyinfo->key_part=key_part_info;
-    keyinfo->flags=HA_NOSAME;
+    keyinfo->flags=HA_NOSAME | HA_BINARY_PACK_KEY | HA_PACK_KEY;
     keyinfo->usable_key_parts=keyinfo->key_parts= param->group_parts;
     keyinfo->key_length=0;
     keyinfo->rec_per_key=0;
@@ -10662,6 +14048,9 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
       bool maybe_null=(*cur_group->item)->maybe_null;
       key_part_info->null_bit=0;
       key_part_info->field=  field;
+      key_part_info->fieldnr= field->field_index + 1;
+      if (cur_group == group)
+        field->key_start.set_bit(0);
       key_part_info->offset= field->offset(table->record[0]);
       key_part_info->length= (uint16) field->key_length();
       key_part_info->type=   (uint8) field->key_type();
@@ -10733,29 +14122,41 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
     */
     DBUG_PRINT("info",("hidden_field_count: %d", param->hidden_field_count));
 
-    null_pack_length-=hidden_null_pack_length;
-    keyinfo->key_parts= ((field_count-param->hidden_field_count)+
-			 test(null_pack_length));
-    table->distinct= 1;
-    share->keys= 1;
     if (blob_count)
     {
-      using_unique_constraint=1;
+      /*
+        Special mode for index creation in MyISAM used to support unique
+        indexes on blobs with arbitrary length. Such indexes cannot be
+        used for lookups.
+      */
       share->uniques= 1;
     }
+    null_pack_length-=hidden_null_pack_length;
+    keyinfo->key_parts= ((field_count-param->hidden_field_count)+
+			 (share->uniques ? test(null_pack_length) : 0));
+    table->distinct= 1;
+    share->keys= 1;
     if (!(key_part_info= (KEY_PART_INFO*)
           alloc_root(&table->mem_root,
                      keyinfo->key_parts * sizeof(KEY_PART_INFO))))
       goto err;
     bzero((void*) key_part_info, keyinfo->key_parts * sizeof(KEY_PART_INFO));
-    table->key_info=keyinfo;
+    table->keys_in_use_for_query.set_bit(0);
+    share->keys_in_use.set_bit(0);
+    table->key_info= table->s->key_info= keyinfo;
     keyinfo->key_part=key_part_info;
-    keyinfo->flags=HA_NOSAME | HA_NULL_ARE_EQUAL;
-    keyinfo->key_length=(uint16) reclength;
+    keyinfo->flags=HA_NOSAME | HA_NULL_ARE_EQUAL | HA_BINARY_PACK_KEY | HA_PACK_KEY;
+    keyinfo->key_length= 0;  // Will compute the sum of the parts below.
     keyinfo->name= (char*) "distinct_key";
     keyinfo->algorithm= HA_KEY_ALG_UNDEF;
     keyinfo->rec_per_key=0;
-    if (null_pack_length)
+
+    /*
+      Create an extra field to hold NULL bits so that unique indexes on
+      blobs can distinguish NULL from 0. This extra field is not needed
+      when we do not use UNIQUE indexes for blobs.
+    */
+    if (null_pack_length && share->uniques)
     {
       key_part_info->null_bit=0;
       key_part_info->offset=hidden_null_pack_length;
@@ -10771,6 +14172,7 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
       key_part_info->field->init(table);
       key_part_info->key_type=FIELDFLAG_BINARY;
       key_part_info->type=    HA_KEYTYPE_BINARY;
+      key_part_info->fieldnr= key_part_info->field->field_index + 1;
       key_part_info++;
     }
     /* Create a distinct key over the columns we are going to return */
@@ -10778,10 +14180,47 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
 	 i < field_count;
 	 i++, reg_field++, key_part_info++)
     {
-      key_part_info->null_bit=0;
       key_part_info->field=    *reg_field;
+      (*reg_field)->flags |= PART_KEY_FLAG;
+      if (key_part_info == keyinfo->key_part)
+        (*reg_field)->key_start.set_bit(0);
+      key_part_info->null_bit= (*reg_field)->null_bit;
+      key_part_info->null_offset= (uint) ((*reg_field)->null_ptr -
+                                          (uchar*) table->record[0]);
+
       key_part_info->offset=   (*reg_field)->offset(table->record[0]);
       key_part_info->length=   (uint16) (*reg_field)->pack_length();
+      key_part_info->fieldnr= (*reg_field)->field_index + 1;
+      /* TODO:
+        The below method of computing the key format length of the
+        key part is a copy/paste from opt_range.cc, and table.cc.
+        This should be factored out, e.g. as a method of Field.
+        In addition it is not clear if any of the Field::*_length
+        methods is supposed to compute the same length. If so, it
+        might be reused.
+      */
+      key_part_info->store_length= key_part_info->length;
+
+      if ((*reg_field)->real_maybe_null())
+      {
+        key_part_info->store_length+= HA_KEY_NULL_LENGTH;
+        key_part_info->key_part_flag |= HA_NULL_PART;
+      }
+      if ((*reg_field)->type() == MYSQL_TYPE_BLOB ||
+          (*reg_field)->real_type() == MYSQL_TYPE_VARCHAR ||
+          (*reg_field)->type() == MYSQL_TYPE_GEOMETRY)
+      {
+        if ((*reg_field)->type() == MYSQL_TYPE_BLOB ||
+            (*reg_field)->type() == MYSQL_TYPE_GEOMETRY)
+          key_part_info->key_part_flag|= HA_BLOB_PART;
+        else
+          key_part_info->key_part_flag|= HA_VAR_LENGTH_PART;
+
+        key_part_info->store_length+=HA_KEY_BLOB_LENGTH;
+      }
+
+      keyinfo->key_length+= key_part_info->store_length;
+
       key_part_info->type=     (uint8) (*reg_field)->key_type();
       key_part_info->key_type =
 	((ha_base_keytype) key_part_info->type == HA_KEYTYPE_TEXT ||
@@ -10794,13 +14233,20 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
   if (thd->is_fatal_error)				// If end of memory
     goto err;					 /* purecov: inspected */
   share->db_record_offset= 1;
-  if (share->db_type() == TMP_ENGINE_HTON)
+  table->used_for_duplicate_elimination= (param->sum_func_count == 0 &&
+                                          (table->group || table->distinct));
+
+  if (!do_not_open)
   {
-    if (create_internal_tmp_table(table,param,select_options))
+    if (share->db_type() == TMP_ENGINE_HTON)
+    {
+      if (create_internal_tmp_table(table, param->keyinfo, param->start_recinfo,
+                                    &param->recinfo, select_options))
+        goto err;
+    }
+    if (open_tmp_table(table))
       goto err;
   }
-  if (open_tmp_table(table))
-    goto err;
 
   // Make empty record so random data is not written to disk
   empty_record(table);
@@ -10818,6 +14264,7 @@ err:
 }
 
 
+
 /****************************************************************************/
 
 /**
@@ -10858,7 +14305,7 @@ TABLE *create_virtual_tmp_table(THD *thd, List<Create_field> &field_list)
                         &share, sizeof(*share),
                         &field, (field_count + 1) * sizeof(Field*),
                         &blob_field, (field_count+1) *sizeof(uint),
-                        &bitmaps, bitmap_buffer_size(field_count)*3,
+                        &bitmaps, bitmap_buffer_size(field_count)*4,
                         NullS))
     return 0;
 
@@ -10946,7 +14393,7 @@ error:
 }
 
 
-static bool open_tmp_table(TABLE *table)
+bool open_tmp_table(TABLE *table)
 {
   int error;
   if ((error= table->file->ha_open(table, table->s->table_name.str, O_RDWR,
@@ -10957,22 +14404,54 @@ static bool open_tmp_table(TABLE *table)
     table->db_stat=0;
     return(1);
   }
+  table->db_stat= HA_OPEN_KEYFILE+HA_OPEN_RNDFILE;
   (void) table->file->extra(HA_EXTRA_QUICK);		/* Faster */
+  table->created= TRUE;
   return(0);
 }
 
 
 #if defined(WITH_ARIA_STORAGE_ENGINE) && defined(USE_MARIA_FOR_TMP_TABLES)
 
-/* Create internal Aria temporary table */
+/*
+  Create internal (MyISAM or Maria) temporary table
+
+  SYNOPSIS
+    create_internal_tmp_table()
+      table           Table object that descrimes the table to be created
+      keyinfo         Description of the index (there is always one index)
+      start_recinfo   engine's column descriptions
+      recinfo INOUT   End of engine's column descriptions
+      options         Option bits
+   
+  DESCRIPTION
+    Create an internal emporary table according to passed description. The is
+    assumed to have one unique index or constraint.
+
+    The passed array or ENGINE_COLUMNDEF structures must have this form:
 
-static bool create_internal_tmp_table(TABLE *table,TMP_TABLE_PARAM *param,
-                                      ulonglong options)
+      1. 1-byte column (afaiu for 'deleted' flag) (note maybe not 1-byte
+         when there are many nullable columns)
+      2. Table columns
+      3. One free ENGINE_COLUMNDEF element (*recinfo points here)
+   
+    This function may use the free element to create hash column for unique
+    constraint.
+
+   RETURN
+     FALSE - OK
+     TRUE  - Error
+*/
+
+
+bool create_internal_tmp_table(TABLE *table, KEY *keyinfo, 
+                               ENGINE_COLUMNDEF *start_recinfo,
+                               ENGINE_COLUMNDEF **recinfo, 
+                               ulonglong options)
 {
   int error;
   MARIA_KEYDEF keydef;
   MARIA_UNIQUEDEF uniquedef;
-  KEY *keyinfo=param->keyinfo;
   TABLE_SHARE *share= table->s;
   MARIA_CREATE_INFO create_info;
   DBUG_ENTER("create_internal_tmp_table");
@@ -10990,6 +14469,13 @@ static bool create_internal_tmp_table(TABLE *table,TMP_TABLE_PARAM *param,
 	keyinfo->key_parts > table->file->max_key_parts() ||
 	share->uniques)
     {
+      if (!share->uniques && !(keyinfo->flags & HA_NOSAME))
+      {
+        my_error(ER_INTERNAL_ERROR, MYF(0),
+                 "Using too big key for internal temp tables");
+        DBUG_RETURN(1);
+      }
+
       /* Can't create a key; Make a unique constraint instead of a key */
       share->keys=    0;
       share->uniques= 1;
@@ -11000,17 +14486,17 @@ static bool create_internal_tmp_table(TABLE *table,TMP_TABLE_PARAM *param,
       uniquedef.null_are_equal=1;
 
       /* Create extra column for hash value */
-      bzero((uchar*) param->recinfo,sizeof(*param->recinfo));
-      param->recinfo->type=   FIELD_CHECK;
-      param->recinfo->length= MARIA_UNIQUE_HASH_LENGTH;
-      param->recinfo++;
+      bzero((uchar*) *recinfo,sizeof(**recinfo));
+      (*recinfo)->type=   FIELD_CHECK;
+      (*recinfo)->length= MARIA_UNIQUE_HASH_LENGTH;
+      (*recinfo)++;
       share->reclength+=      MARIA_UNIQUE_HASH_LENGTH;
     }
     else
     {
-      /* Create an unique key */
+      /* Create a key */
       bzero((char*) &keydef,sizeof(keydef));
-      keydef.flag=HA_NOSAME;
+      keydef.flag= keyinfo->flags & HA_NOSAME;
       keydef.keysegs=  keyinfo->key_parts;
       keydef.seg= seg;
     }
@@ -11070,13 +14556,14 @@ static bool create_internal_tmp_table(TABLE *table,TMP_TABLE_PARAM *param,
     a group by and no sum functions or if distinct is used.
   */
   if ((error= maria_create(share->table_name.str,
+                           table->no_rows ? NO_RECORD :
                            (share->reclength < 64 &&
                             !share->blob_fields ? STATIC_RECORD :
-                            ((!param->sum_func_count && table->group) ||
-                             table->distinct) ? DYNAMIC_RECORD : BLOCK_RECORD),
+                            table->used_for_duplicate_elimination ?
+                            DYNAMIC_RECORD : BLOCK_RECORD),
                            share->keys, &keydef,
-                           (uint) (param->recinfo-param->start_recinfo),
-                           param->start_recinfo,
+                           (uint) (*recinfo-start_recinfo),
+                           start_recinfo,
                            share->uniques, &uniquedef,
                            &create_info,
                            HA_CREATE_TMP_TABLE)))
@@ -11095,11 +14582,13 @@ static bool create_internal_tmp_table(TABLE *table,TMP_TABLE_PARAM *param,
 
 
 bool create_internal_tmp_table_from_heap(THD *thd, TABLE *table,
-                                         TMP_TABLE_PARAM *param,
+                                         ENGINE_COLUMNDEF *start_recinfo,
+                                         ENGINE_COLUMNDEF **recinfo, 
                                          int error,
                                          bool ignore_last_dupp_key_error)
 {
-  return create_internal_tmp_table_from_heap2(thd, table, param, error,
+  return create_internal_tmp_table_from_heap2(thd, table, 
+                                              start_recinfo, recinfo, error,
                                               ignore_last_dupp_key_error,
                                               maria_hton,
                                               "converting HEAP to Aria");
@@ -11107,15 +14596,46 @@ bool create_internal_tmp_table_from_heap(THD *thd, TABLE *table,
 
 #else
 
+/*
+  Create internal (MyISAM or Maria) temporary table
+
+  SYNOPSIS
+    create_internal_tmp_table()
+      table           Table object that descrimes the table to be created
+      keyinfo         Description of the index (there is always one index)
+      start_recinfo   engine's column descriptions
+      recinfo INOUT   End of engine's column descriptions
+      options         Option bits
+   
+  DESCRIPTION
+    Create an internal emporary table according to passed description. The is
+    assumed to have one unique index or constraint.
+
+    The passed array or ENGINE_COLUMNDEF structures must have this form:
+
+      1. 1-byte column (afaiu for 'deleted' flag) (note maybe not 1-byte
+         when there are many nullable columns)
+      2. Table columns
+      3. One free ENGINE_COLUMNDEF element (*recinfo points here)
+   
+    This function may use the free element to create hash column for unique
+    constraint.
+
+   RETURN
+     FALSE - OK
+     TRUE  - Error
+*/
+
 /* Create internal MyISAM temporary table */
 
-static bool create_internal_tmp_table(TABLE *table,TMP_TABLE_PARAM *param,
-                                      ulonglong options)
+bool create_internal_tmp_table(TABLE *table, KEY *keyinfo, 
+                               ENGINE_COLUMNDEF *start_recinfo,
+                               ENGINE_COLUMNDEF **recinfo,
+                               ulonglong options)
 {
   int error;
   MI_KEYDEF keydef;
   MI_UNIQUEDEF uniquedef;
-  KEY *keyinfo=param->keyinfo;
   TABLE_SHARE *share= table->s;
   DBUG_ENTER("create_internal_tmp_table");
 
@@ -11142,17 +14662,18 @@ static bool create_internal_tmp_table(TABLE *table,TMP_TABLE_PARAM *param,
       uniquedef.null_are_equal=1;
 
       /* Create extra column for hash value */
-      bzero((uchar*) param->recinfo,sizeof(*param->recinfo));
-      param->recinfo->type= FIELD_CHECK;
-      param->recinfo->length=MI_UNIQUE_HASH_LENGTH;
-      param->recinfo++;
+      bzero((uchar*) *recinfo,sizeof(**recinfo));
+      (*recinfo)->type= FIELD_CHECK;
+      (*recinfo)->length=MI_UNIQUE_HASH_LENGTH;
+      (*recinfo)++;
       share->reclength+=MI_UNIQUE_HASH_LENGTH;
     }
     else
     {
       /* Create an unique key */
       bzero((char*) &keydef,sizeof(keydef));
-      keydef.flag=HA_NOSAME | HA_BINARY_PACK_KEY | HA_PACK_KEY;
+      keydef.flag= ((keyinfo->flags & HA_NOSAME) | HA_BINARY_PACK_KEY |
+                    HA_PACK_KEY);
       keydef.keysegs=  keyinfo->key_parts;
       keydef.seg= seg;
     }
@@ -11202,8 +14723,8 @@ static bool create_internal_tmp_table(TABLE *table,TMP_TABLE_PARAM *param,
     create_info.data_file_length= ~(ulonglong) 0;
 
   if ((error=mi_create(share->table_name.str, share->keys, &keydef,
-		       (uint) (param->recinfo-param->start_recinfo),
-		       param->start_recinfo,
+		       (uint) (*recinfo-start_recinfo),
+		       start_recinfo,
 		       share->uniques, &uniquedef,
 		       &create_info,
 		       HA_CREATE_TMP_TABLE)))
@@ -11213,7 +14734,9 @@ static bool create_internal_tmp_table(TABLE *table,TMP_TABLE_PARAM *param,
     goto err;
   }
   status_var_increment(table->in_use->status_var.created_tmp_disk_tables);
+  table->in_use->query_plan_flags|= QPLAN_TMP_DISK;
   share->db_record_offset= 1;
+  table->created= TRUE;
   DBUG_RETURN(0);
  err:
   DBUG_RETURN(1);
@@ -11225,11 +14748,13 @@ static bool create_internal_tmp_table(TABLE *table,TMP_TABLE_PARAM *param,
 */
 
 bool create_internal_tmp_table_from_heap(THD *thd, TABLE *table,
-                                         TMP_TABLE_PARAM *param,
+                                         ENGINE_COLUMNDEF *start_recinfo,
+                                         ENGINE_COLUMNDEF **recinfo, 
                                          int error,
                                          bool ignore_last_dupp_key_error)
 {
-  return create_internal_tmp_table_from_heap2(thd, table, param, error,
+  return create_internal_tmp_table_from_heap2(thd, table, 
+                                              start_recinfo, recinfo, error,
                                               ignore_last_dupp_key_error,
                                               myisam_hton,
                                               "converting HEAP to MyISAM");
@@ -11246,7 +14771,8 @@ bool create_internal_tmp_table_from_heap(THD *thd, TABLE *table,
 
 static bool
 create_internal_tmp_table_from_heap2(THD *thd, TABLE *table,
-                                     TMP_TABLE_PARAM *param,
+                                     ENGINE_COLUMNDEF *start_recinfo,
+                                     ENGINE_COLUMNDEF **recinfo, 
                                      int error,
                                      bool ignore_last_dupp_key_error,
                                      handlerton *hton,
@@ -11255,9 +14781,8 @@ create_internal_tmp_table_from_heap2(THD *thd, TABLE *table,
   TABLE new_table;
   TABLE_SHARE share;
   const char *save_proc_info;
-  int write_err;
+  int write_err= 0;
   DBUG_ENTER("create_internal_tmp_table_from_heap2");
-  LINT_INIT(write_err);
 
   if (table->s->db_type() != heap_hton || 
       error != HA_ERR_RECORD_FILE_FULL)
@@ -11281,8 +14806,11 @@ create_internal_tmp_table_from_heap2(THD *thd, TABLE *table,
   save_proc_info=thd->proc_info;
   thd_proc_info(thd, proc_info);
 
-  if (create_internal_tmp_table(&new_table, param,
-			      thd->lex->select_lex.options | thd->options))
+  new_table.no_rows= table->no_rows;
+  if (create_internal_tmp_table(&new_table, table->key_info, start_recinfo,
+                                recinfo,
+                                thd->lex->select_lex.options | 
+                                thd->options))
     goto err2;
   if (open_tmp_table(&new_table))
     goto err1;
@@ -11291,24 +14819,15 @@ create_internal_tmp_table_from_heap2(THD *thd, TABLE *table,
   table->file->ha_index_or_rnd_end();
   if (table->file->ha_rnd_init_with_error(1))
     DBUG_RETURN(1);
-  if (table->no_rows)
-  {
+  if (new_table.no_rows)
     new_table.file->extra(HA_EXTRA_NO_ROWS);
-    new_table.no_rows=1;
+  else
+  {
+    /* update table->file->stats.records */
+    table->file->info(HA_STATUS_VARIABLE);
+    new_table.file->ha_start_bulk_insert(table->file->stats.records);
   }
 
-#ifdef TO_BE_DONE_LATER_IN_4_1
-  /*
-    To use start_bulk_insert() (which is new in 4.1) we need to find
-    all places where a corresponding end_bulk_insert() should be put.
-  */
-  table->file->info(HA_STATUS_VARIABLE); /* update table->file->stats.records */
-  new_table.file->ha_start_bulk_insert(table->file->stats.records);
-#else
-  /* HA_EXTRA_WRITE_CACHE can stay until close, no need to disable it */
-  new_table.file->extra(HA_EXTRA_WRITE_CACHE);
-#endif
-
   /*
     copy all old rows from heap table to MyISAM table
     This is the only code that uses record[1] to read/write but this
@@ -11317,7 +14836,7 @@ create_internal_tmp_table_from_heap2(THD *thd, TABLE *table,
   */
   while (!table->file->ha_rnd_next(new_table.record[1]))
   {
-    write_err= new_table.file->ha_write_row(new_table.record[1]);
+    write_err= new_table.file->ha_write_tmp_row(new_table.record[1]);
     DBUG_EXECUTE_IF("raise_error", write_err= HA_ERR_FOUND_DUPP_KEY ;);
     if (write_err)
       goto err;
@@ -11327,8 +14846,10 @@ create_internal_tmp_table_from_heap2(THD *thd, TABLE *table,
       goto err_killed;
     }
   }
+  if (!new_table.no_rows && new_table.file->ha_end_bulk_insert())
+    goto err;
   /* copy row that filled HEAP table */
-  if ((write_err=new_table.file->ha_write_row(table->record[0])))
+  if ((write_err=new_table.file->ha_write_tmp_row(table->record[0])))
   {
     if (new_table.file->is_fatal_error(write_err, HA_CHECK_DUP) ||
 	!ignore_last_dupp_key_error)
@@ -11337,7 +14858,7 @@ create_internal_tmp_table_from_heap2(THD *thd, TABLE *table,
 
   /* remove heap table and change to use myisam table */
   (void) table->file->ha_rnd_end();
-  (void) table->file->close();                  // This deletes the table !
+  (void) table->file->ha_close();          // This deletes the table !
   delete table->file;
   table->file=0;
   plugin_unlock(0, table->s->db_plugin);
@@ -11349,8 +14870,8 @@ create_internal_tmp_table_from_heap2(THD *thd, TABLE *table,
   table->file->change_table_ptr(table, table->s);
   table->use_all_columns();
   if (save_proc_info)
-    thd_proc_info(thd, (!strcmp(save_proc_info,"Copying to tmp table") ?
-                     "Copying to tmp table on disk" : save_proc_info));
+    thd_proc_info(thd, save_proc_info == copy_to_tmp_table ?
+                  "Copying to tmp table on disk" : save_proc_info);
   DBUG_RETURN(0);
 
  err:
@@ -11358,7 +14879,7 @@ create_internal_tmp_table_from_heap2(THD *thd, TABLE *table,
   table->file->print_error(write_err, MYF(0));
 err_killed:
   (void) table->file->ha_rnd_end();
-  (void) new_table.file->close();
+  (void) new_table.file->ha_close();
  err1:
   new_table.file->ha_delete_table(new_table.s->table_name.str);
  err2:
@@ -11380,7 +14901,7 @@ free_tmp_table(THD *thd, TABLE *entry)
   save_proc_info=thd->proc_info;
   thd_proc_info(thd, "removing tmp table");
 
-  if (entry->file)
+  if (entry->file && entry->created)
   {
     if (entry->db_stat)
       entry->file->ha_drop_table(entry->s->table_name.str);
@@ -11495,7 +15016,6 @@ Next_select_func setup_end_select_func(JOIN *join)
   @retval
     -1  if error should be sent
 */
-
 static int
 do_select(JOIN *join,List<Item> *fields,TABLE *table,Procedure *procedure)
 {
@@ -11526,20 +15046,24 @@ do_select(JOIN *join,List<Item> *fields,TABLE *table,Procedure *procedure)
   }
   /* Set up select_end */
   Next_select_func end_select= setup_end_select_func(join);
-  if (join->tables)
+  if (join->table_count)
   {
-    join->join_tab[join->tables-1].next_select= end_select;
-
+    join->join_tab[join->top_join_tab_count - 1].next_select= end_select;
     join_tab=join->join_tab+join->const_tables;
   }
   join->send_records=0;
-  if (join->tables == join->const_tables)
+  if (join->table_count == join->const_tables)
   {
     /*
       HAVING will be checked after processing aggregate functions,
-      But WHERE should checkd here (we alredy have read tables)
+      But WHERE should checked here (we alredy have read tables).
+      Notice that make_join_select() splits all conditions in this case
+      into two groups exec_const_cond and outer_ref_cond.
+      If join->table_count == join->const_tables then it is
+      sufficient to check only the condition pseudo_bits_cond.
     */
-    if (!join->conds || join->conds->val_int())
+    DBUG_ASSERT(join->outer_ref_cond == NULL);
+    if (!join->pseudo_bits_cond || join->pseudo_bits_cond->val_int())
     {
       error= (*end_select)(join, 0, 0);
       if (error == NESTED_LOOP_OK || error == NESTED_LOOP_QUERY_LIMIT)
@@ -11556,15 +15080,21 @@ do_select(JOIN *join,List<Item> *fields,TABLE *table,Procedure *procedure)
     }
     else if (join->send_row_on_empty_set())
     {
-      List<Item> *columns_list= (procedure ? &join->procedure_fields_list :
-                                 fields);
-      rc= join->result->send_data(*columns_list) > 0;
+      if (!join->having || join->having->val_int())
+      {
+        List<Item> *columns_list= (procedure ? &join->procedure_fields_list :
+                                   fields);
+        rc= join->result->send_data(*columns_list) > 0;
+      }
     }
   }
   else
   {
-    DBUG_ASSERT(join->tables);
-    error= sub_select(join,join_tab,0);
+    DBUG_ASSERT(join->table_count);
+    if (join->outer_ref_cond && !join->outer_ref_cond->val_int())
+      error= NESTED_LOOP_NO_MORE_ROWS;
+    else
+      error= sub_select(join,join_tab,0);
     if (error == NESTED_LOOP_OK || error == NESTED_LOOP_NO_MORE_ROWS)
       error= sub_select(join,join_tab,1);
     if (error == NESTED_LOOP_QUERY_LIMIT)
@@ -11619,33 +15149,100 @@ do_select(JOIN *join,List<Item> *fields,TABLE *table,Procedure *procedure)
 }
 
 
+int rr_sequential_and_unpack(READ_RECORD *info)
+{
+  int error;
+  if ((error= rr_sequential(info)))
+    return error;
+  
+  for (Copy_field *cp= info->copy_field; cp != info->copy_field_end; cp++)
+    (*cp->do_copy)(cp);
+
+  return error;
+}
+
+
+/*
+  Fill the join buffer with partial records, retrieve all full  matches for them   
+
+  SYNOPSIS
+    sub_select_cache()
+      join     pointer to the structure providing all context info for the query
+      join_tab the first next table of the execution plan to be retrieved
+      end_records  true when we need to perform final steps of the retrieval
+
+  DESCRIPTION
+    For a given table Ti= join_tab from the sequence of tables of the chosen 
+    execution plan T1,...,Ti,...,Tn the function just put the partial record
+    t1,...,t[i-1] into the join buffer associated with table Ti unless this
+    is the last record added into the buffer. In this case,  the function 
+    additionally finds all matching full records for all partial
+    records accumulated in the buffer, after which it cleans the buffer up.
+    If a partial join record t1,...,ti is extended utilizing a dynamic
+    range scan then it is not put into the join buffer. Rather all matching
+    records are found for it at once by the function sub_select.
+
+  NOTES
+    The function implements the algorithmic schema for both Blocked Nested
+    Loop Join and Batched Key Access Join. The difference can be seen only at
+    the level of of the implementation of the put_record and join_records
+    virtual methods for the cache object associated with the join_tab.
+    The put_record method accumulates records in the cache, while the 
+    join_records method builds all matching join records and send them into
+    the output stream.  
+      
+  RETURN
+    return one of enum_nested_loop_state, except NESTED_LOOP_NO_MORE_ROWS.
+*/ 
+
 enum_nested_loop_state
-sub_select_cache(JOIN *join,JOIN_TAB *join_tab,bool end_of_records)
+sub_select_cache(JOIN *join, JOIN_TAB *join_tab, bool end_of_records)
 {
   enum_nested_loop_state rc;
+  JOIN_CACHE *cache= join_tab->cache;
+  DBUG_ENTER("sub_select_cache");
+
+  /*
+    This function cannot be called if join_tab has no associated join
+    buffer
+  */
+  DBUG_ASSERT(cache != NULL);
+
+  join_tab->cache->reset_join(join);
 
   if (end_of_records)
   {
-    rc= flush_cached_records(join,join_tab,FALSE);
+    rc= cache->join_records(FALSE);
     if (rc == NESTED_LOOP_OK || rc == NESTED_LOOP_NO_MORE_ROWS)
-      rc= sub_select(join,join_tab,end_of_records);
-    return rc;
+      rc= sub_select(join, join_tab, end_of_records);
+    DBUG_RETURN(rc);
   }
-  if (join->thd->killed)		// If aborted by user
+  if (join->thd->killed)
   {
+    /* The user has aborted the execution of the query */
     join->thd->send_kill_message();
-    return NESTED_LOOP_KILLED;                   /* purecov: inspected */
+    DBUG_RETURN(NESTED_LOOP_KILLED);
   }
-  if (join_tab->use_quick != 2 || test_if_quick_select(join_tab) <= 0)
+  if (!test_if_use_dynamic_range_scan(join_tab))
   {
-    if (!store_record_in_cache(&join_tab->cache))
-      return NESTED_LOOP_OK;                     // There is more room in cache
-    return flush_cached_records(join,join_tab,FALSE);
+    if (!cache->put_record())
+      DBUG_RETURN(NESTED_LOOP_OK); 
+    /* 
+      We has decided that after the record we've just put into the buffer
+      won't add any more records. Now try to find all the matching 
+      extensions for all records in the buffer.
+    */ 
+    rc= cache->join_records(FALSE);
+    DBUG_RETURN(rc);
   }
-  rc= flush_cached_records(join, join_tab, TRUE);
+  /*
+     TODO: Check whether we really need the call below and we can't do
+           without it. If it's not the case remove it.
+  */ 
+  rc= cache->join_records(TRUE);
   if (rc == NESTED_LOOP_OK || rc == NESTED_LOOP_NO_MORE_ROWS)
     rc= sub_select(join, join_tab, end_of_records);
-  return rc;
+  DBUG_RETURN(rc);
 }
 
 /**
@@ -11771,18 +15368,33 @@ sub_select_cache(JOIN *join,JOIN_TAB *join_tab,bool end_of_records)
 enum_nested_loop_state
 sub_select(JOIN *join,JOIN_TAB *join_tab,bool end_of_records)
 {
+  DBUG_ENTER("sub_select");
+
   join_tab->table->null_row=0;
   if (end_of_records)
-    return (*join_tab->next_select)(join,join_tab+1,end_of_records);
-
+  {
+    enum_nested_loop_state nls=
+      (*join_tab->next_select)(join,join_tab+1,end_of_records);
+    DBUG_RETURN(nls);
+  }
   int error;
   enum_nested_loop_state rc= NESTED_LOOP_OK;
   READ_RECORD *info= &join_tab->read_record;
+   
+  for (SJ_TMP_TABLE *flush_dups_table= join_tab->flush_weedout_table;
+       flush_dups_table;
+       flush_dups_table= flush_dups_table->next_flush_table)
+  {
+    flush_dups_table->sj_weedout_delete_rows();
+  }
+
+  if (!join_tab->preread_init_done && join_tab->preread_init())
+    DBUG_RETURN(NESTED_LOOP_ERROR);
 
   if (join->resume_nested_loop)
   {
     /* If not the last table, plunge down the nested loop */
-    if (join_tab < join->join_tab + join->tables - 1)
+    if (join_tab < join->join_tab + join->top_join_tab_count - 1)
       rc= (*join_tab->next_select)(join, join_tab + 1, 0);
     else
     {
@@ -11806,19 +15418,61 @@ sub_select(JOIN *join,JOIN_TAB *join_tab,bool end_of_records)
       join_tab->last_inner->first_unmatched= join_tab;
       if (join_tab->on_precond && !join_tab->on_precond->val_int())
         rc= NESTED_LOOP_NO_MORE_ROWS;
-    }
+     }
     join->thd->row_count= 0;
+    
+    if (rc != NESTED_LOOP_NO_MORE_ROWS && 
+        (rc= join_tab_execution_startup(join_tab)) < 0)
+      DBUG_RETURN(rc);
+
+    if (join_tab->loosescan_match_tab)
+      join_tab->loosescan_match_tab->found_match= FALSE;
 
     if (rc != NESTED_LOOP_NO_MORE_ROWS)
     {
       error= (*join_tab->read_first_record)(join_tab);
+      if (join_tab->keep_current_rowid)
+        join_tab->table->file->position(join_tab->table->record[0]);    
       rc= evaluate_join_record(join, join_tab, error);
     }
   }
-
-  while (rc == NESTED_LOOP_OK)
+  
+  /* 
+    Note: psergey has added the 2nd part of the following condition; the 
+    change should probably be made in 5.1, too.
+  */
+  bool skip_over= FALSE;
+  while (rc == NESTED_LOOP_OK && join->return_tab >= join_tab)
   {
+    if (join_tab->loosescan_match_tab && 
+        join_tab->loosescan_match_tab->found_match)
+    {
+      KEY *key= join_tab->table->key_info + join_tab->loosescan_key;
+      key_copy(join_tab->loosescan_buf, join_tab->table->record[0], key, 
+               join_tab->loosescan_key_len);
+      skip_over= TRUE;
+    }
+
     error= info->read_record(info);
+
+    if (skip_over && !error) 
+    {
+      if(!key_cmp(join_tab->table->key_info[join_tab->loosescan_key].key_part,
+                  join_tab->loosescan_buf, join_tab->loosescan_key_len))
+      {
+        /* 
+          This is the LooseScan action: skip over records with the same key
+          value if we already had a match for them.
+        */
+        continue;
+      }
+      join_tab->loosescan_match_tab->found_match= FALSE;
+      skip_over= FALSE;
+    }
+
+    if (join_tab->keep_current_rowid)
+      join_tab->table->file->position(join_tab->table->record[0]);
+    
     rc= evaluate_join_record(join, join_tab, error);
   }
 
@@ -11828,16 +15482,23 @@ sub_select(JOIN *join,JOIN_TAB *join_tab,bool end_of_records)
 
   if (rc == NESTED_LOOP_NO_MORE_ROWS)
     rc= NESTED_LOOP_OK;
-  return rc;
+  DBUG_RETURN(rc);
 }
 
 
 /**
-  Process one record of the nested loop join.
-
-    This function will evaluate parts of WHERE/ON clauses that are
-    applicable to the partial record on hand and in case of success
-    submit this record to the next level of the nested loop.
+  @brief Process one row of the nested loop join.
+
+  This function will evaluate parts of WHERE/ON clauses that are
+  applicable to the partial row on hand and in case of success
+  submit this row to the next level of the nested loop.
+
+  @param  join     - The join object
+  @param  join_tab - The most inner join_tab being processed
+  @param  error > 0: Error, terminate processing
+                = 0: (Partial) row is available
+                < 0: No more rows available at this level
+  @return Nested loop state (Ok, No_more_rows, Error, Killed)
 */
 
 static enum_nested_loop_state
@@ -11849,16 +15510,19 @@ evaluate_join_record(JOIN *join, JOIN_TAB *join_tab,
   COND *select_cond= join_tab->select_cond;
   bool select_cond_result= TRUE;
 
+  DBUG_ENTER("evaluate_join_record");
+  DBUG_PRINT("enter",
+             ("evaluate_join_record join: %p join_tab: %p"
+              " cond: %p error: %d", join, join_tab, select_cond, error));
   if (error > 0 || (join->thd->is_error()))     // Fatal error
-    return NESTED_LOOP_ERROR;
+    DBUG_RETURN(NESTED_LOOP_ERROR);
   if (error < 0)
-    return NESTED_LOOP_NO_MORE_ROWS;
+    DBUG_RETURN(NESTED_LOOP_NO_MORE_ROWS);
   if (join->thd->killed)			// Aborted by user
   {
     join->thd->send_kill_message();
-    return NESTED_LOOP_KILLED;               /* purecov: inspected */
+    DBUG_RETURN(NESTED_LOOP_KILLED);            /* purecov: inspected */
   }
-  DBUG_PRINT("info", ("select cond 0x%lx", (ulong)select_cond));
 
   update_virtual_fields(join->thd, join_tab->table);
 
@@ -11868,7 +15532,7 @@ evaluate_join_record(JOIN *join, JOIN_TAB *join_tab,
 
     /* check for errors evaluating the condition */
     if (join->thd->is_error())
-      return NESTED_LOOP_ERROR;
+      DBUG_RETURN(NESTED_LOOP_ERROR);
   }
 
   if (!select_cond || select_cond_result)
@@ -11878,7 +15542,6 @@ evaluate_join_record(JOIN *join, JOIN_TAB *join_tab,
       condition is true => a match is found.
     */
     bool found= 1;
-    bool use_not_exists_opt= 0;
     while (join_tab->first_unmatched && found)
     {
       /*
@@ -11894,8 +15557,6 @@ evaluate_join_record(JOIN *join, JOIN_TAB *join_tab,
       first_unmatched->found= 1;
       for (JOIN_TAB *tab= first_unmatched; tab <= join_tab; tab++)
       {
-        if (tab->table->reginfo.not_exists_optimize)
-          use_not_exists_opt= 1;
         /* Check all predicates that has just been activated. */
         /*
           Actually all predicates non-guarded by first_unmatched->found
@@ -11906,7 +15567,11 @@ evaluate_join_record(JOIN *join, JOIN_TAB *join_tab,
         {
           /* The condition attached to table tab is false */
           if (tab == join_tab)
+          {
             found= 0;
+            if (tab->table->reginfo.not_exists_optimize)
+              DBUG_RETURN(NESTED_LOOP_NO_MORE_ROWS);
+          }            
           else
           {
             /*
@@ -11914,7 +15579,10 @@ evaluate_join_record(JOIN *join, JOIN_TAB *join_tab,
               not to the last table of the current nest level.
             */
             join->return_tab= tab;
-            return NESTED_LOOP_OK;
+            if (tab->table->reginfo.not_exists_optimize)
+              DBUG_RETURN(NESTED_LOOP_NO_MORE_ROWS);
+            else
+              DBUG_RETURN(NESTED_LOOP_OK);
           }
         }
       }
@@ -11928,8 +15596,25 @@ evaluate_join_record(JOIN *join, JOIN_TAB *join_tab,
       join_tab->first_unmatched= first_unmatched;
     }
 
-    if (use_not_exists_opt)
-      return NESTED_LOOP_NO_MORE_ROWS;
+    JOIN_TAB *return_tab= join->return_tab;
+    join_tab->found_match= TRUE;
+
+    if (join_tab->check_weed_out_table && found)
+    {
+      int res= join_tab->check_weed_out_table->sj_weedout_check_row(join->thd);
+      if (res == -1)
+        DBUG_RETURN(NESTED_LOOP_ERROR);
+      else if (res == 1)
+        found= FALSE;
+    }
+    else if (join_tab->do_firstmatch)
+    {
+      /* 
+        We should return to the join_tab->do_firstmatch after we have 
+        enumerated all the suffixes for current prefix row combination
+      */
+      return_tab= join_tab->do_firstmatch;
+    }
 
     /*
       It was not just a return to lower loop level when one
@@ -11947,16 +15632,19 @@ evaluate_join_record(JOIN *join, JOIN_TAB *join_tab,
       /* A match from join_tab is found for the current partial join. */
       rc= (*join_tab->next_select)(join, join_tab+1, 0);
       if (rc != NESTED_LOOP_OK && rc != NESTED_LOOP_NO_MORE_ROWS)
-        return rc;
+        DBUG_RETURN(rc);
+      if (return_tab < join->return_tab)
+        join->return_tab= return_tab;
+
       if (join->return_tab < join_tab)
-        return NESTED_LOOP_OK;
+        DBUG_RETURN(NESTED_LOOP_OK);
       /*
         Test if this was a SELECT DISTINCT query on a table that
         was not in the field list;  In this case we can abort if
         we found a row, as no new rows can be added to the result.
       */
       if (not_used_in_distinct && found_records != join->found_records)
-        return NESTED_LOOP_NO_MORE_ROWS;
+        DBUG_RETURN(NESTED_LOOP_NO_MORE_ROWS);
     }
     else
       join_tab->read_record.unlock_row(join_tab);
@@ -11971,10 +15659,9 @@ evaluate_join_record(JOIN *join, JOIN_TAB *join_tab,
     join->thd->row_count++;
     join_tab->read_record.unlock_row(join_tab);
   }
-  return NESTED_LOOP_OK;
+  DBUG_RETURN(NESTED_LOOP_OK);
 }
 
-
 /**
 
   @details
@@ -12035,109 +15722,32 @@ evaluate_null_complemented_join_record(JOIN *join, JOIN_TAB *join_tab)
   /*
     The row complemented by nulls satisfies all conditions
     attached to inner tables.
-    Send the row complemented by nulls to be joined with the
-    remaining tables.
   */
-  return (*join_tab->next_select)(join, join_tab+1, 0);
-}
-
-
-static enum_nested_loop_state
-flush_cached_records(JOIN *join,JOIN_TAB *join_tab,bool skip_last)
-{
-  enum_nested_loop_state rc= NESTED_LOOP_OK;
-  int error;
-  READ_RECORD *info;
-  SQL_SELECT *select;
-
-  join_tab->table->null_row= 0;
-  if (!join_tab->cache.records)
-    return NESTED_LOOP_OK;                      /* Nothing to do */
-  if (skip_last)
-    (void) store_record_in_cache(&join_tab->cache); // Must save this for later
-  if (join_tab->use_quick == 2)
+  if (join_tab->check_weed_out_table)
   {
-    if (join_tab->select->quick)
-    {					/* Used quick select last. reset it */
-      delete join_tab->select->quick;
-      join_tab->select->quick=0;
-    }
-  }
-  /* read through all records */
-  if ((error=join_init_read_record(join_tab)))
-  {
-    reset_cache_write(&join_tab->cache);
-    return error < 0 ? NESTED_LOOP_NO_MORE_ROWS: NESTED_LOOP_ERROR;
+    int res= join_tab->check_weed_out_table->sj_weedout_check_row(join->thd);
+    if (res == -1)
+      return NESTED_LOOP_ERROR;
+    else if (res == 1)
+      return NESTED_LOOP_OK;
   }
-
-  for (JOIN_TAB *tmp= join_tab-1;
-       tmp >= join->join_tab && !tmp->cache.buff; tmp--)
+  else if (join_tab->do_firstmatch)
   {
-    tmp->status=tmp->table->status;
-    tmp->table->status=0;
+    /* 
+      We should return to the join_tab->do_firstmatch after we have 
+      enumerated all the suffixes for current prefix row combination
+    */
+    if (join_tab->do_firstmatch < join->return_tab)
+      join->return_tab= join_tab->do_firstmatch;
   }
 
-  info= &join_tab->read_record;
-  select= join_tab->select;
-
-  do
-  {
-    int err= 0;
-    if (join->thd->killed)
-    {
-      join->thd->send_kill_message();
-      return NESTED_LOOP_KILLED; // Aborted by user /* purecov: inspected */
-    }
-    if (rc == NESTED_LOOP_OK)
-      update_virtual_fields(join->thd, join_tab->table);
-    if (rc == NESTED_LOOP_OK &&
-        (!join_tab->cache.select ||
-         (err= join_tab->cache.select->skip_record(join->thd)) != 0 ))
-    {
-      if (err < 0)
-      {
-        reset_cache_write(&join_tab->cache);
-        return NESTED_LOOP_ERROR;
-      }
-
-      reset_cache_read(&join_tab->cache);
-      for (uint i= (join_tab->cache.records- (skip_last ? 1 : 0)) ; i-- > 0 ;)
-      {
-	read_cached_record(join_tab);
-        err= 0;
-	if (!select || (err= select->skip_record(join->thd)) != 0)
-        {
-          if (err < 0)
-          {
-            reset_cache_write(&join_tab->cache);
-            return NESTED_LOOP_ERROR;
-          }
-          rc= (join_tab->next_select)(join,join_tab+1,0);
-	  if (rc != NESTED_LOOP_OK && rc != NESTED_LOOP_NO_MORE_ROWS)
-          {
-            reset_cache_write(&join_tab->cache);
-            return rc;
-          }
-        }
-      }
-    }
-      
-    rc= NESTED_LOOP_OK;
-
-  } while (!(error=info->read_record(info)));
-
-  if (skip_last)
-    read_cached_record(join_tab);		// Restore current record
-  reset_cache_write(&join_tab->cache);
-  if (error > 0)				// Fatal error
-    return NESTED_LOOP_ERROR;                   /* purecov: inspected */
-  for (JOIN_TAB *tmp2= join_tab-1;
-       tmp2 >= join->join_tab && !tmp2->cache.buff; tmp2--)
-    tmp2->table->status=tmp2->status;
-  return NESTED_LOOP_OK;
+  /*
+    Send the row complemented by nulls to be joined with the
+    remaining tables.
+  */
+  return (*join_tab->next_select)(join, join_tab+1, 0);
 }
 
-
 /*****************************************************************************
   The different ways to read a record
   Returns -1 if row was not found, 0 if row was found and 1 on errors
@@ -12157,8 +15767,13 @@ int report_error(TABLE *table, int error)
     print them to the .err log
   */
   if (error != HA_ERR_LOCK_DEADLOCK && error != HA_ERR_LOCK_WAIT_TIMEOUT)
+  {
+    push_warning_printf(table->in_use, MYSQL_ERROR::WARN_LEVEL_ERROR, error,
+                        "Got error %d when reading table `%s`.`%s`",
+                        error, table->s->db.str, table->s->table_name.str);
     sql_print_error("Got error %d when reading table '%s'",
 		    error, table->s->path.str);
+  }
   table->file->print_error(error,MYF(0));
   return 1;
 }
@@ -12181,13 +15796,32 @@ static int
 join_read_const_table(JOIN_TAB *tab, POSITION *pos)
 {
   int error;
+  TABLE_LIST *tbl;
   DBUG_ENTER("join_read_const_table");
   TABLE *table=tab->table;
   table->const_table=1;
   table->null_row=0;
   table->status=STATUS_NO_RECORD;
   
-  if (tab->type == JT_SYSTEM)
+  if (tab->table->pos_in_table_list->is_materialized_derived() &&
+      !tab->table->pos_in_table_list->fill_me)
+  {
+    //TODO: don't get here at all
+    /* Skip materialized derived tables/views. */
+    DBUG_RETURN(0);
+  }
+  else if (tab->table->pos_in_table_list->jtbm_subselect && 
+          tab->table->pos_in_table_list->jtbm_subselect->is_jtbm_const_tab)
+  {
+    /* Row will not be found */
+    int res;
+    if (tab->table->pos_in_table_list->jtbm_subselect->jtbm_const_row_found)
+      res= 0;
+    else
+      res= -1;
+    DBUG_RETURN(res);
+  }
+  else if (tab->type == JT_SYSTEM)
   {
     if ((error=join_read_system(tab)))
     {						// Info for DESCRIBE
@@ -12225,7 +15859,14 @@ join_read_const_table(JOIN_TAB *tab, POSITION *pos)
 	DBUG_RETURN(error);
     }
   }
-  if (*tab->on_expr_ref && !table->null_row)
+  /* 
+     Evaluate an on-expression only if it is not considered expensive.
+     This mainly prevents executing subqueries in optimization phase.
+     This is necessary since proper setup for such execution has not been
+     done at this stage.
+  */
+  if (*tab->on_expr_ref && !table->null_row && 
+      !(*tab->on_expr_ref)->is_expensive())
   {
 #if !defined(DBUG_OFF) && defined(NOT_USING_ITEM_EQUAL)
     /*
@@ -12244,26 +15885,27 @@ join_read_const_table(JOIN_TAB *tab, POSITION *pos)
   if (!table->null_row)
     table->maybe_null=0;
 
-  /* Check appearance of new constant items in Item_equal objects */
-  JOIN *join= tab->join;
-  if (join->conds)
-    update_const_equal_items(join->conds, tab);
-  TABLE_LIST *tbl;
-  for (tbl= join->select_lex->leaf_tables; tbl; tbl= tbl->next_leaf)
   {
-    TABLE_LIST *embedded;
-    TABLE_LIST *embedding= tbl;
-    do
-    {
-      embedded= embedding;
-      if (embedded->on_expr)
-         update_const_equal_items(embedded->on_expr, tab);
-      embedding= embedded->embedding;
+    JOIN *join= tab->join;
+    List_iterator<TABLE_LIST> ti(join->select_lex->leaf_tables);
+    /* Check appearance of new constant items in Item_equal objects */
+    if (join->conds)
+      update_const_equal_items(join->conds, tab);
+    while ((tbl= ti++))
+    {
+      TABLE_LIST *embedded;
+      TABLE_LIST *embedding= tbl;
+      do
+      {
+        embedded= embedding;
+        if (embedded->on_expr)
+           update_const_equal_items(embedded->on_expr, tab);
+        embedding= embedded->embedding;
+      }
+      while (embedding &&
+             embedding->nested_join->join_list.head() == embedded);
     }
-    while (embedding &&
-           embedding->nested_join->join_list.head() == embedded);
   }
-
   DBUG_RETURN(0);
 }
 
@@ -12345,25 +15987,49 @@ join_read_const(JOIN_TAB *tab)
   return table->status ? -1 : 0;
 }
 
+/*
+  eq_ref access method implementation: "read_first" function
+
+  SYNOPSIS
+    join_read_key()
+      tab  JOIN_TAB of the accessed table
+
+  DESCRIPTION
+    This is "read_fist" function for the eq_ref access method. The difference
+    from ref access function is that is that it has a one-element lookup 
+    cache (see cmp_buffer_with_ref)
+
+  RETURN
+    0  - Ok
+   -1  - Row not found 
+    1  - Error
+*/
+
 
 static int
 join_read_key(JOIN_TAB *tab)
 {
-  int error;
-  TABLE *table= tab->table;
+  return join_read_key2(tab->join->thd, tab, tab->table, &tab->ref);
+}
 
+
+/*
+  eq_ref access handler but generalized a bit to support TABLE and TABLE_REF
+  not from the join_tab. See join_read_key for detailed synopsis.
+*/
+int join_read_key2(THD *thd, JOIN_TAB *tab, TABLE *table, TABLE_REF *table_ref)
+{
+  int error;
   if (!table->file->inited)
   {
-    if ((error= table->file->ha_index_init(tab->ref.key, tab->sorted)))
-    {
-      table->file->print_error(error, MYF(0));/* purecov: inspected */
-      return 1;                               /* purecov: inspected */
-    }
+    table->file->ha_index_init(table_ref->key, (tab ? tab->sorted : TRUE));
   }
-  if (cmp_buffer_with_ref(tab) ||
+
+  /* TODO: Why don't we do "Late NULLs Filtering" here? */
+  if (cmp_buffer_with_ref(thd, table, table_ref) ||
       (table->status & (STATUS_GARBAGE | STATUS_NO_PARENT | STATUS_NULL_ROW)))
   {
-    if (tab->ref.key_err)
+    if (table_ref->key_err)
     {
       table->status=STATUS_NOT_FOUND;
       return -1;
@@ -12372,28 +16038,28 @@ join_read_key(JOIN_TAB *tab)
       Moving away from the current record. Unlock the row
       in the handler if it did not match the partial WHERE.
     */
-    if (tab->ref.has_record && tab->ref.use_count == 0)
+    if (tab && tab->ref.has_record && tab->ref.use_count == 0)
     {
-      tab->read_record.file->unlock_row();
-      tab->ref.has_record= FALSE;
+      tab->read_record.table->file->unlock_row();
+      table_ref->has_record= FALSE;
     }
-    error= table->file->ha_index_read_map(table->record[0],
-                                          tab->ref.key_buff,
-                                          make_prev_keypart_map(tab->ref.key_parts),
-                                          HA_READ_KEY_EXACT);
+    error=table->file->ha_index_read_map(table->record[0],
+                                  table_ref->key_buff,
+                                  make_prev_keypart_map(table_ref->key_parts),
+                                  HA_READ_KEY_EXACT);
     if (error && error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE)
       return report_error(table, error);
 
     if (! error)
     {
-      tab->ref.has_record= TRUE;
-      tab->ref.use_count= 1;
+      table_ref->has_record= TRUE;
+      table_ref->use_count= 1;
     }
   }
   else if (table->status == 0)
   {
-    DBUG_ASSERT(tab->ref.has_record);
-    tab->ref.use_count++;
+    DBUG_ASSERT(table_ref->has_record);
+    table_ref->use_count++;
   }
   table->null_row=0;
   return table->status ? -1 : 0;
@@ -12452,13 +16118,6 @@ join_read_always_key(JOIN_TAB *tab)
     }
   }
 
-  /* Perform "Late NULLs Filtering" (see internals manual for explanations) */
-  for (uint i= 0 ; i < tab->ref.key_parts ; i++)
-  {
-    if ((tab->ref.null_rejecting & 1 << i) && tab->ref.items[i]->is_null())
-        return -1;
-  }
-
   if (cp_buffer_from_ref(tab->join->thd, table, &tab->ref))
     return -1;
   if ((error= table->file->ha_index_read_map(table->record[0],
@@ -12564,11 +16223,10 @@ join_init_quick_read_record(JOIN_TAB *tab)
 }
 
 
-int rr_sequential(READ_RECORD *info);
 int init_read_record_seq(JOIN_TAB *tab)
 {
   tab->read_record.read_record= rr_sequential;
-  if (tab->read_record.file->ha_rnd_init_with_error(1))
+  if (tab->read_record.table->file->ha_rnd_init_with_error(1))
     return 1;
   return (*tab->read_record.read_record)(&tab->read_record);
 }
@@ -12579,21 +16237,50 @@ test_if_quick_select(JOIN_TAB *tab)
   delete tab->select->quick;
   tab->select->quick=0;
   return tab->select->test_quick_select(tab->join->thd, tab->keys,
-					(table_map) 0, HA_POS_ERROR, 0);
+					(table_map) 0, HA_POS_ERROR, 0,
+                                        FALSE);
 }
 
 
-static int
-join_init_read_record(JOIN_TAB *tab)
+static 
+bool test_if_use_dynamic_range_scan(JOIN_TAB *join_tab)
+{
+    return (join_tab->use_quick == 2 && test_if_quick_select(join_tab) > 0);
+}
+
+int join_init_read_record(JOIN_TAB *tab)
 {
   if (tab->select && tab->select->quick && tab->select->quick->reset())
     return 1;
+  if (!tab->preread_init_done && tab->preread_init())
+    return 1;
   if (init_read_record(&tab->read_record, tab->join->thd, tab->table,
                        tab->select,1,1, FALSE))
     return 1;
   return (*tab->read_record.read_record)(&tab->read_record);
 }
 
+int
+join_read_record_no_init(JOIN_TAB *tab)
+{
+  Copy_field *save_copy, *save_copy_end;
+  
+  /*
+    init_read_record resets all elements of tab->read_record().
+    Remember things that we don't want to have reset.
+  */
+  save_copy=     tab->read_record.copy_field;
+  save_copy_end= tab->read_record.copy_field_end;
+  
+  init_read_record(&tab->read_record, tab->join->thd, tab->table,
+		   tab->select,1,1, FALSE);
+
+  tab->read_record.copy_field=     save_copy;
+  tab->read_record.copy_field_end= save_copy_end;
+  tab->read_record.read_record= rr_sequential_and_unpack;
+
+  return (*tab->read_record.read_record)(&tab->read_record);
+}
 
 static int
 join_read_first(JOIN_TAB *tab)
@@ -12606,7 +16293,6 @@ join_read_first(JOIN_TAB *tab)
   tab->table->status=0;
   tab->read_record.read_record=join_read_next;
   tab->read_record.table=table;
-  tab->read_record.file=table->file;
   tab->read_record.index=tab->index;
   tab->read_record.record=table->record[0];
   if (!table->file->inited)
@@ -12627,7 +16313,7 @@ static int
 join_read_next(READ_RECORD *info)
 {
   int error;
-  if ((error= info->file->ha_index_next(info->record)))
+  if ((error= info->table->file->ha_index_next(info->record)))
     return report_error(info->table, error);
 
   return 0;
@@ -12645,7 +16331,6 @@ join_read_last(JOIN_TAB *tab)
   tab->table->status=0;
   tab->read_record.read_record=join_read_prev;
   tab->read_record.table=table;
-  tab->read_record.file=table->file;
   tab->read_record.index=tab->index;
   tab->read_record.record=table->record[0];
   if (!table->file->inited)
@@ -12663,7 +16348,7 @@ static int
 join_read_prev(READ_RECORD *info)
 {
   int error;
-  if ((error= info->file->ha_index_prev(info->record)))
+  if ((error= info->table->file->ha_index_prev(info->record)))
     return report_error(info->table, error);
   return 0;
 }
@@ -12697,7 +16382,7 @@ static int
 join_ft_read_next(READ_RECORD *info)
 {
   int error;
-  if ((error= info->file->ha_ft_read(info->table->record[0])))
+  if ((error= info->table->file->ha_ft_read(info->table->record[0])))
     return report_error(info->table, error);
   return 0;
 }
@@ -12793,7 +16478,7 @@ end_send(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
       if (join->select_options & OPTION_FOUND_ROWS)
       {
 	JOIN_TAB *jt=join->join_tab;
-	if ((join->tables == 1) && !join->tmp_table && !join->sort_and_group
+	if ((join->table_count == 1) && !join->tmp_table && !join->sort_and_group
 	    && !join->send_group_parts && !join->having && !jt->select_cond &&
 	    !(jt->select && jt->select->quick) &&
 	    (jt->table->file->ha_table_flags() & HA_STATS_RECORDS_IS_EXACT) &&
@@ -12844,7 +16529,7 @@ end_send(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
 
 
 	/* ARGSUSED */
-static enum_nested_loop_state
+enum_nested_loop_state
 end_send_group(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
 	       bool end_of_records)
 {
@@ -13007,12 +16692,14 @@ end_write(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
     {
       int error;
       join->found_records++;
-      if ((error= table->file->ha_write_row(table->record[0])))
+      if ((error= table->file->ha_write_tmp_row(table->record[0])))
       {
         if (!table->file->is_fatal_error(error, HA_CHECK_DUP))
 	  goto end;
-	if (create_internal_tmp_table_from_heap(join->thd, table, &join->tmp_table_param,
-				    error,1))
+	if (create_internal_tmp_table_from_heap(join->thd, table, 
+                                                join->tmp_table_param.start_recinfo,
+                                                &join->tmp_table_param.recinfo,
+                                                error,1))
 	  DBUG_RETURN(NESTED_LOOP_ERROR);        // Not a table_is_full error
 	table->s->uniques=0;			// To ensure rows are the same
       }
@@ -13069,8 +16756,8 @@ end_update(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
   {						/* Update old record */
     restore_record(table,record[1]);
     update_tmptable_sum_func(join->sum_funcs,table);
-    if ((error= table->file->ha_update_row(table->record[1],
-                                           table->record[0])))
+    if ((error= table->file->ha_update_tmp_row(table->record[1],
+                                               table->record[0])))
     {
       table->file->print_error(error,MYF(0));	/* purecov: inspected */
       DBUG_RETURN(NESTED_LOOP_ERROR);            /* purecov: inspected */
@@ -13094,10 +16781,11 @@ end_update(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
   init_tmptable_sum_functions(join->sum_funcs);
   if (copy_funcs(join->tmp_table_param.items_to_copy, join->thd))
     DBUG_RETURN(NESTED_LOOP_ERROR);           /* purecov: inspected */
-  if ((error= table->file->ha_write_row(table->record[0])))
+  if ((error= table->file->ha_write_tmp_row(table->record[0])))
   {
     if (create_internal_tmp_table_from_heap(join->thd, table,
-                                            &join->tmp_table_param,
+                                            join->tmp_table_param.start_recinfo,
+                                            &join->tmp_table_param.recinfo,
                                             error, 0))
       DBUG_RETURN(NESTED_LOOP_ERROR);            // Not a table_is_full error
     /* Change method to update rows */
@@ -13106,7 +16794,7 @@ end_update(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
       table->file->print_error(error, MYF(0));/* purecov: inspected */
       DBUG_RETURN(NESTED_LOOP_ERROR);         /* purecov: inspected */
     }
-    join->join_tab[join->tables-1].next_select=end_unique_update;
+    join->join_tab[join->top_join_tab_count-1].next_select=end_unique_update;
   }
   join->send_records++;
   DBUG_RETURN(NESTED_LOOP_OK);
@@ -13136,7 +16824,7 @@ end_unique_update(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
   if (copy_funcs(join->tmp_table_param.items_to_copy, join->thd))
     DBUG_RETURN(NESTED_LOOP_ERROR);           /* purecov: inspected */
 
-  if (!(error= table->file->ha_write_row(table->record[0])))
+  if (!(error= table->file->ha_write_tmp_row(table->record[0])))
     join->send_records++;			// New group
   else
   {
@@ -13152,8 +16840,8 @@ end_unique_update(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
     }
     restore_record(table,record[1]);
     update_tmptable_sum_func(join->sum_funcs,table);
-    if ((error= table->file->ha_update_row(table->record[1],
-                                           table->record[0])))
+    if ((error= table->file->ha_update_tmp_row(table->record[1],
+                                               table->record[0])))
     {
       table->file->print_error(error,MYF(0));	/* purecov: inspected */
       DBUG_RETURN(NESTED_LOOP_ERROR);            /* purecov: inspected */
@@ -13164,7 +16852,7 @@ end_unique_update(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
 
 
 	/* ARGSUSED */
-static enum_nested_loop_state
+enum_nested_loop_state
 end_write_group(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
 		bool end_of_records)
 {
@@ -13196,10 +16884,12 @@ end_write_group(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
                        join->sum_funcs_end[send_group_parts]);
 	if (!join->having || join->having->val_int())
 	{
-          int error= table->file->ha_write_row(table->record[0]);
-          if (error && create_internal_tmp_table_from_heap(join->thd, table,
-                                               &join->tmp_table_param,
-                                               error, 0))
+          int error= table->file->ha_write_tmp_row(table->record[0]);
+          if (error && 
+              create_internal_tmp_table_from_heap(join->thd, table,
+                                                  join->tmp_table_param.start_recinfo,
+                                                  &join->tmp_table_param.recinfo,
+                                                  error, 0))
 	    DBUG_RETURN(NESTED_LOOP_ERROR);
         }
         if (join->rollup.state != ROLLUP::STATE_NONE)
@@ -13247,18 +16937,49 @@ end_write_group(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
 *****************************************************************************/
 
 /**
-  @return
-    1 if right_item is used removable reference key on left_item
+  Check if "left_item=right_item" equality is guaranteed to be true by use of
+  [eq]ref access on left_item->field->table.
+
+  SYNOPSIS
+    test_if_ref()
+      root_cond
+      left_item
+      right_item
+
+  DESCRIPTION
+    Check if the given "left_item = right_item" equality is guaranteed to be
+    true by use of [eq_]ref access method.
+
+    We need root_cond as we can't remove ON expressions even if employed ref 
+    access guarantees that they are true. This is because  TODO
+
+  RETURN
+    TRUE   if right_item is used removable reference key on left_item
+    FALSE  Otherwise
+    
 */
 
-static bool test_if_ref(Item_field *left_item,Item *right_item)
+bool test_if_ref(Item *root_cond, Item_field *left_item,Item *right_item)
 {
   Field *field=left_item->field;
-  // No need to change const test. We also have to keep tests on LEFT JOIN
-  if (!field->table->const_table && !field->table->maybe_null)
+  JOIN_TAB *join_tab= field->table->reginfo.join_tab;
+  // No need to change const test
+  if (!field->table->const_table && join_tab &&
+      !join_tab->is_ref_for_hash_join() &&
+      (!join_tab->first_inner ||
+       *join_tab->first_inner->on_expr_ref == root_cond))
   {
+    /*
+      If ref access uses "Full scan on NULL key" (i.e. it actually alternates
+      between ref access and full table scan), then no equality can be
+      guaranteed to be true.
+    */
+    if (join_tab->ref.is_access_triggered())
+      return FALSE;
+
     Item *ref_item=part_of_refkey(field->table,field);
-    if (ref_item && ref_item->eq(right_item,1))
+    if (ref_item && (ref_item->eq(right_item,1) || 
+		     ref_item->real_item()->eq(right_item,1)))
     {
       right_item= right_item->real_item();
       if (right_item->type() == Item::FIELD_ITEM)
@@ -13270,7 +16991,7 @@ static bool test_if_ref(Item_field *left_item,Item *right_item)
       {
 	/*
 	  We can remove binary fields and numerical fields except float,
-	  as float comparison isn't 100 % secure
+	  as float comparison isn't 100 % safe
 	  We have to keep normal strings to be able to check for end spaces
 	*/
 	if (field->binary() &&
@@ -13286,14 +17007,24 @@ static bool test_if_ref(Item_field *left_item,Item *right_item)
   return 0;					// keep test
 }
 
+
 /**
    Extract a condition that can be checked after reading given table
+   @fn make_cond_for_table()
 
    @param cond       Condition to analyze
    @param tables     Tables for which "current field values" are available
-   @param used_table Table that we're extracting the condition for (may
-                     also include PSEUDO_TABLE_BITS, and may be zero)
-   @param exclude_expensive_cond  Do not push expensive conditions
+   @param used_table Table that we're extracting the condition for
+      tables       Tables for which "current field values" are available (this
+                   includes used_table)
+                   (may  also include PSEUDO_TABLE_BITS, and may be zero)
+   @param join_tab_idx_arg
+		     The index of the JOIN_TAB this Item is being extracted
+                     for. MAX_TABLES if there is no corresponding JOIN_TAB.
+   @param exclude_expensive_cond
+		     Do not push expensive conditions
+   @param retain_ref_cond
+                     Retain ref conditions
 
    @retval <>NULL Generated condition
    @retval =NULL  Already checked, OR error
@@ -13323,11 +17054,161 @@ static bool test_if_ref(Item_field *left_item,Item *right_item)
      make_cond_for_info_schema() uses similar algorithm as well.
 */ 
 
-static COND *
-make_cond_for_table(COND *cond, table_map tables, table_map used_table)
+static Item *
+make_cond_for_table(THD *thd, Item *cond, table_map tables,
+                    table_map used_table,
+                    int join_tab_idx_arg,
+                    bool exclude_expensive_cond __attribute__((unused)),
+		    bool retain_ref_cond)
+{
+  return make_cond_for_table_from_pred(thd, cond, cond, tables, used_table,
+                                       join_tab_idx_arg,
+                                       exclude_expensive_cond,
+                                       retain_ref_cond);
+}
+
+
+static Item *
+make_cond_for_table_from_pred(THD *thd, Item *root_cond, Item *cond,
+                              table_map tables, table_map used_table,
+                              int join_tab_idx_arg,
+                              bool exclude_expensive_cond __attribute__
+                              ((unused)),
+                              bool retain_ref_cond)
+
 {
   if (used_table && !(cond->used_tables() & used_table))
     return (COND*) 0;				// Already checked
+
+  if (cond->type() == Item::COND_ITEM)
+  {
+    if (((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC)
+    {
+      /* Create new top level AND item */
+      Item_cond_and *new_cond=new Item_cond_and;
+      if (!new_cond)
+	return (COND*) 0;			// OOM /* purecov: inspected */
+      List_iterator<Item> li(*((Item_cond*) cond)->argument_list());
+      Item *item;
+      while ((item=li++))
+      {
+	Item *fix=make_cond_for_table_from_pred(thd, root_cond, item, 
+                                                tables, used_table,
+						join_tab_idx_arg,
+                                                exclude_expensive_cond,
+                                                retain_ref_cond);
+	if (fix)
+	  new_cond->argument_list()->push_back(fix);
+      }
+      switch (new_cond->argument_list()->elements) {
+      case 0:
+	return (COND*) 0;			// Always true
+      case 1:
+	return new_cond->argument_list()->head();
+      default:
+	/*
+          Call fix_fields to propagate all properties of the children to
+          the new parent Item. This should not be expensive because all
+	  children of Item_cond_and should be fixed by now.
+	*/
+	new_cond->fix_fields(thd, 0);
+	new_cond->used_tables_cache=
+	  ((Item_cond_and*) cond)->used_tables_cache &
+	  tables;
+	return new_cond;
+      }
+    }
+    else
+    {						// Or list
+      Item_cond_or *new_cond=new Item_cond_or;
+      if (!new_cond)
+	return (COND*) 0;			// OOM /* purecov: inspected */
+      List_iterator<Item> li(*((Item_cond*) cond)->argument_list());
+      Item *item;
+      while ((item=li++))
+      {
+	Item *fix=make_cond_for_table_from_pred(thd, root_cond, item,
+                                                tables, 0L,
+                                                join_tab_idx_arg,
+                                                exclude_expensive_cond,
+                                                retain_ref_cond);
+	if (!fix)
+	  return (COND*) 0;			// Always true
+	new_cond->argument_list()->push_back(fix);
+      }
+      /*
+        Call fix_fields to propagate all properties of the children to
+        the new parent Item. This should not be expensive because all
+        children of Item_cond_and should be fixed by now.
+      */
+      new_cond->fix_fields(thd, 0);
+      new_cond->used_tables_cache= ((Item_cond_or*) cond)->used_tables_cache;
+      new_cond->top_level_item();
+      return new_cond;
+    }
+  }
+
+  /*
+    Because the following test takes a while and it can be done
+    table_count times, we mark each item that we have examined with the result
+    of the test
+  */
+  if ((cond->marker == 3 && !retain_ref_cond) ||
+      (cond->used_tables() & ~tables))
+    return (COND*) 0;				// Can't check this yet
+
+  if (cond->marker == 2 || cond->eq_cmp_result() == Item::COND_OK)
+  {
+    cond->set_join_tab_idx(join_tab_idx_arg);
+    return cond;				// Not boolean op
+  }
+
+  if (cond->type() == Item::FUNC_ITEM && 
+      ((Item_func*) cond)->functype() == Item_func::EQ_FUNC)
+  {
+    Item *left_item=	((Item_func*) cond)->arguments()[0]->real_item();
+    Item *right_item= ((Item_func*) cond)->arguments()[1]->real_item();
+    if (left_item->type() == Item::FIELD_ITEM && !retain_ref_cond &&
+	test_if_ref(root_cond, (Item_field*) left_item,right_item))
+    {
+      cond->marker=3;			// Checked when read
+      return (COND*) 0;
+    }
+    if (right_item->type() == Item::FIELD_ITEM && !retain_ref_cond &&
+	test_if_ref(root_cond, (Item_field*) right_item,left_item))
+    {
+      cond->marker=3;			// Checked when read
+      return (COND*) 0;
+    }
+  }
+  cond->marker=2;
+  cond->set_join_tab_idx(join_tab_idx_arg);
+  return cond;
+}
+
+
+/*
+  The difference of this from make_cond_for_table() is that we're in the
+  following state:
+    1. conditions referring to 'tables' have been checked
+    2. conditions referring to sjm_tables have been checked, too
+    3. We need condition that couldn't be checked in #1 or #2 but 
+       can be checked when we get both (tables | sjm_tables).
+
+*/
+static COND *
+make_cond_after_sjm(Item *root_cond, Item *cond, table_map tables, 
+                    table_map sjm_tables)
+{
+  /*
+    We assume that conditions that refer to only join prefix tables or 
+    sjm_tables have already been checked.
+  */
+  if ((!(cond->used_tables() & ~tables) || 
+       !(cond->used_tables() & ~sjm_tables)))
+    return (COND*) 0;				// Already checked
+
+  /* AND/OR recursive descent */
   if (cond->type() == Item::COND_ITEM)
   {
     if (((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC)
@@ -13340,7 +17221,7 @@ make_cond_for_table(COND *cond, table_map tables, table_map used_table)
       Item *item;
       while ((item=li++))
       {
-	Item *fix=make_cond_for_table(item,tables,used_table);
+	Item *fix=make_cond_after_sjm(root_cond, item, tables, sjm_tables);
 	if (fix)
 	  new_cond->argument_list()->push_back(fix);
       }
@@ -13370,7 +17251,7 @@ make_cond_for_table(COND *cond, table_map tables, table_map used_table)
       Item *item;
       while ((item=li++))
       {
-	Item *fix=make_cond_for_table(item,tables,0L);
+	Item *fix= make_cond_after_sjm(root_cond, item, tables, 0L);
 	if (!fix)
 	  return (COND*) 0;			// Always true
 	new_cond->argument_list()->push_back(fix);
@@ -13392,23 +17273,27 @@ make_cond_for_table(COND *cond, table_map tables, table_map used_table)
     of the test
   */
 
-  if (cond->marker == 3 || (cond->used_tables() & ~tables))
+  if (cond->marker == 3 || (cond->used_tables() & ~(tables | sjm_tables)))
     return (COND*) 0;				// Can't check this yet
   if (cond->marker == 2 || cond->eq_cmp_result() == Item::COND_OK)
     return cond;				// Not boolean op
 
+  /* 
+    Remove equalities that are guaranteed to be true by use of 'ref' access
+    method
+  */
   if (((Item_func*) cond)->functype() == Item_func::EQ_FUNC)
   {
-    Item *left_item=	((Item_func*) cond)->arguments()[0];
-    Item *right_item= ((Item_func*) cond)->arguments()[1];
+    Item *left_item= ((Item_func*) cond)->arguments()[0]->real_item();
+    Item *right_item= ((Item_func*) cond)->arguments()[1]->real_item();
     if (left_item->type() == Item::FIELD_ITEM &&
-	test_if_ref((Item_field*) left_item,right_item))
+	test_if_ref(root_cond, (Item_field*) left_item,right_item))
     {
       cond->marker=3;			// Checked when read
       return (COND*) 0;
     }
     if (right_item->type() == Item::FIELD_ITEM &&
-	test_if_ref((Item_field*) right_item,left_item))
+	test_if_ref(root_cond, (Item_field*) right_item,left_item))
     {
       cond->marker=3;			// Checked when read
       return (COND*) 0;
@@ -13418,22 +17303,62 @@ make_cond_for_table(COND *cond, table_map tables, table_map used_table)
   return cond;
 }
 
+
+/*
+  @brief
+
+  Check if
+   - @table uses "ref"-like access 
+   - it is based on "@field=certain_item" equality
+   - the equality will be true for any record returned by the access method
+  and return the certain_item if yes.
+  
+  @detail
+  
+  Equality won't necessarily hold if:
+   - the used index covers only part of the @field. 
+     Suppose, we have a CHAR(5) field and INDEX(field(3)). if you make a lookup
+     for 'abc', you will get both record with 'abc' and with 'abcde'.
+   - The type of access is actually ref_or_null, and so @field can be either 
+     a value or NULL.
+
+  @return 
+    Item that the field will be equal to
+    NULL if no such item 
+*/
+
 static Item *
 part_of_refkey(TABLE *table,Field *field)
 {
-  if (!table->reginfo.join_tab)
+  JOIN_TAB *join_tab= table->reginfo.join_tab;
+  if (!join_tab)
     return (Item*) 0;             // field from outer non-select (UPDATE,...)
 
-  uint ref_parts=table->reginfo.join_tab->ref.key_parts;
-  if (ref_parts)
+  uint ref_parts= join_tab->ref.key_parts;
+  if (ref_parts) /* if it's ref/eq_ref/ref_or_null */
   {
-    KEY_PART_INFO *key_part=
-      table->key_info[table->reginfo.join_tab->ref.key].key_part;
+    uint key= join_tab->ref.key;
+    KEY *key_info= join_tab->get_keyinfo_by_key_no(key);
+    KEY_PART_INFO *key_part= key_info->key_part;
 
     for (uint part=0 ; part < ref_parts ; part++,key_part++)
-      if (field->eq(key_part->field) &&
-	  !(key_part->key_part_flag & (HA_PART_KEY_SEG | HA_NULL_PART)))
-	return table->reginfo.join_tab->ref.items[part];
+    {
+      if (field->eq(key_part->field))
+      {
+        /*
+          Found the field in the key. Check that 
+           1. ref_or_null doesn't alternate this component between a value and
+              a NULL
+           2. index fully covers the key
+        */
+        if (part != join_tab->ref.null_ref_part &&            // (1)
+            !(key_part->key_part_flag & HA_PART_KEY_SEG))     // (2)
+        {
+          return join_tab->ref.items[part];
+        }
+        break;
+      }
+    }
   }
   return (Item*) 0;
 }
@@ -13804,8 +17729,8 @@ find_field_in_item_list (Field *field, void *data)
 */
 
 static bool
-test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit,
-			bool no_changes, key_map *map)
+test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit_arg,
+			bool no_changes, const key_map *map)
 {
   int ref_key;
   uint ref_key_parts;
@@ -13814,10 +17739,16 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit,
   TABLE *table=tab->table;
   SQL_SELECT *select=tab->select;
   key_map usable_keys;
-  QUICK_SELECT_I *save_quick= 0;
+  QUICK_SELECT_I *save_quick= select ? select->quick : 0;
+  Item *orig_cond= 0;
+  bool orig_cond_saved= false;
   int best_key= -1;
+  bool changed_key= false;
+  ha_rows best_select_limit;
   DBUG_ENTER("test_if_skip_sort_order");
+
   LINT_INIT(ref_key_parts);
+  LINT_INIT(best_select_limit);
 
   /*
     Keys disabled by ALTER TABLE ... DISABLE KEYS should have already
@@ -13835,7 +17766,7 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit,
     }
     usable_keys.intersect(((Item_field*) item)->field->part_of_sortkey);
     if (usable_keys.is_clear_all())
-      DBUG_RETURN(0);					// No usable keys
+      goto use_filesort;                        // No usable keys
   }
 
   ref_key= -1;
@@ -13845,22 +17776,22 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit,
     ref_key=	   tab->ref.key;
     ref_key_parts= tab->ref.key_parts;
     if (tab->type == JT_REF_OR_NULL || tab->type == JT_FT)
-      DBUG_RETURN(0);
+      goto use_filesort;
   }
   else if (select && select->quick)		// Range found by opt_range
   {
     int quick_type= select->quick->get_type();
-    save_quick= select->quick;
     /* 
       assume results are not ordered when index merge is used 
       TODO: sergeyp: Results of all index merge selects actually are ordered 
       by clustered PK values.
     */
   
-    if (quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_MERGE || 
+    if (quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_MERGE ||
+        quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_INTERSECT || 
         quick_type == QUICK_SELECT_I::QS_TYPE_ROR_UNION || 
         quick_type == QUICK_SELECT_I::QS_TYPE_ROR_INTERSECT)
-      DBUG_RETURN(0);
+      goto use_filesort;
     ref_key=	   select->quick->index;
     ref_key_parts= select->quick->used_key_parts;
   }
@@ -13882,10 +17813,15 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit,
       */
       if (table->covering_keys.is_set(ref_key))
 	usable_keys.intersect(table->covering_keys);
+      if (tab->pre_idx_push_select_cond)
+      {
+        orig_cond= tab->set_cond(tab->pre_idx_push_select_cond);
+        orig_cond_saved= true;
+      }
+
       if ((new_ref_key= test_if_subkey(order, table, ref_key, ref_key_parts,
 				       &usable_keys)) < MAX_KEY)
       {
-	/* Found key that can be used to retrieve data in sorted order */
 	if (tab->ref.key >= 0)
 	{
           /*
@@ -13899,9 +17835,10 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit,
           KEYUSE *keyuse= tab->keyuse;
           while (keyuse->key != new_ref_key && keyuse->table == tab->table)
             keyuse++;
-          if (create_ref_for_key(tab->join, tab, keyuse, 
+
+          if (create_ref_for_key(tab->join, tab, keyuse, FALSE,
                                  tab->join->const_table_map))
-            DBUG_RETURN(0);
+            goto use_filesort;
 
           pick_table_access_method(tab);
 	}
@@ -13911,26 +17848,41 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit,
             The range optimizer constructed QUICK_RANGE for ref_key, and
             we want to use instead new_ref_key as the index. We can't
             just change the index of the quick select, because this may
-            result in an incosistent QUICK_SELECT object. Below we
+            result in an inconsistent QUICK_SELECT object. Below we
             create a new QUICK_SELECT from scratch so that all its
-            parameres are set correctly by the range optimizer.
+            parameters are set correctly by the range optimizer.
            */
           key_map new_ref_key_map;
+          COND *save_cond;
+          bool res;
           new_ref_key_map.clear_all();  // Force the creation of quick select
           new_ref_key_map.set_bit(new_ref_key); // only for new_ref_key.
 
           /* Reset quick;  This will be restored in 'use_filesort' if needed */
           select->quick= 0;
-          if (select->test_quick_select(tab->join->thd, new_ref_key_map, 0,
-                                        (tab->join->select_options &
-                                         OPTION_FOUND_ROWS) ?
-                                        HA_POS_ERROR :
-                                        tab->join->unit->select_limit_cnt,0) <=
-              0)
+          save_cond= select->cond;
+          if (select->pre_idx_push_select_cond)
+            select->cond= select->pre_idx_push_select_cond;
+          res= select->test_quick_select(tab->join->thd, new_ref_key_map, 0,
+                                         (tab->join->select_options &
+                                          OPTION_FOUND_ROWS) ?
+                                         HA_POS_ERROR :
+                                         tab->join->unit->select_limit_cnt,0,
+                                         TRUE) <= 0;
+          if (res)
+          {
+            select->cond= save_cond;
             goto use_filesort;
+          }
+          /*
+            We don't restore select->cond as we want to use the
+            original condition as index condition pushdown is not
+            active for the new index.
+          */
 	}
         ref_key= new_ref_key;
-      }
+        changed_key= true;
+     }
     }
     /* Check if we get the rows in requested sorted order by using the key */
     if (usable_keys.is_set(ref_key) &&
@@ -13968,7 +17920,7 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit,
       resolved with a key;  This is because filesort() is usually faster than
       retrieving all rows through an index.
     */
-    if (select_limit >= table_records)
+    if (select_limit_arg >= table_records)
     {
       keys= *table->file->keys_to_use_for_scanning();
       keys.merge(table->covering_keys);
@@ -13990,12 +17942,13 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit,
       ref_key_quick_rows= table->quick_rows[ref_key];
 
     read_time= join->best_positions[tablenr].read_time;
-    for (uint i= tablenr+1; i < join->tables; i++)
+    for (uint i= tablenr+1; i < join->table_count; i++)
       fanout*= join->best_positions[i].records_read; // fanout is always >= 1
 
     for (nr=0; nr < table->s->keys ; nr++)
     {
       int direction;
+      ha_rows select_limit= select_limit_arg;
 
       if (keys.is_set(nr) &&
           (direction= test_if_order_by_key(order, table, nr, &used_key_parts)))
@@ -14007,9 +17960,9 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit,
         */
         DBUG_ASSERT (ref_key != (int) nr);
 
-        bool is_covering= table->covering_keys.is_set(nr) ||
-                          (nr == table->s->primary_key &&
-                          table->file->primary_key_is_clustered());
+        bool is_covering= (table->covering_keys.is_set(nr) ||
+                           (table->file->index_flags(nr, 0, 1) &
+                            HA_CLUSTERED_INDEX));
 	
         /* 
           Don't use an index scan with ORDER BY without limit.
@@ -14089,7 +18042,8 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit,
                 select_limit= table_records;
             else
               select_limit= (ha_rows) (select_limit*rec_per_key);
-          }
+          } /* group */
+
           /* 
             If tab=tk is not the last joined table tn then to get first
             L records from the result set we can expect to retrieve
@@ -14133,8 +18087,7 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit,
 	  */
           index_scan_time= select_limit/rec_per_key *
 	                   min(rec_per_key, table->file->scan_time());
-          if ((ref_key < 0 && is_covering) || 
-              (ref_key < 0 && (group || table->force_index)) ||
+          if ((ref_key < 0 && (group || table->force_index || is_covering)) ||
               index_scan_time < read_time)
           {
             ha_rows quick_records= table_records;
@@ -14146,7 +18099,8 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit,
             if (best_key < 0 ||
                 (select_limit <= min(quick_records,best_records) ?
                  keyinfo->key_parts < best_key_parts :
-                 quick_records < best_records))
+                 quick_records < best_records) ||
+                (!is_best_covering && is_covering))
             {
               best_key= nr;
               best_key_parts= keyinfo->key_parts;
@@ -14154,6 +18108,7 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit,
               best_records= quick_records;
               is_best_covering= is_covering;
               best_key_direction= direction; 
+              best_select_limit= select_limit;
             }
           }   
 	}      
@@ -14163,42 +18118,38 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit,
     /*
       filesort() and join cache are usually faster than reading in 
       index order and not using join cache, except in case that chosen
-      index is clustered primary key.
+      index is clustered key.
     */
-    if ((select_limit >= table_records) &&
-        (tab->type == JT_ALL &&
-         tab->join->tables > tab->join->const_tables + 1) &&
-         ((unsigned) best_key != table->s->primary_key ||
-          !table->file->primary_key_is_clustered()))
+    if (best_key < 0 ||
+        ((select_limit_arg >= table_records) &&
+         (tab->type == JT_ALL &&
+         tab->join->table_count > tab->join->const_tables + 1) &&
+         !(table->file->index_flags(best_key, 0, 1) & HA_CLUSTERED_INDEX)))
       goto use_filesort;
 
-    if (best_key >= 0)
+    if (table->quick_keys.is_set(best_key) && best_key != ref_key)
     {
-      if (table->quick_keys.is_set(best_key) && best_key != ref_key)
-      {
-        key_map map;
-        map.clear_all();       // Force the creation of quick select
-        map.set_bit(best_key); // only best_key.
-        select->quick= 0;
-        select->test_quick_select(join->thd, map, 0,
-                                  join->select_options & OPTION_FOUND_ROWS ?
-                                  HA_POS_ERROR :
-                                  join->unit->select_limit_cnt,
-                                  0);
-      }
-      order_direction= best_key_direction;
-      /*
-        saved_best_key_parts is actual number of used keyparts found by the
-        test_if_order_by_key function. It could differ from keyinfo->key_parts,
-        thus we have to restore it in case of desc order as it affects
-        QUICK_SELECT_DESC behaviour.
-      */
-      used_key_parts= (order_direction == -1) ?
-        saved_best_key_parts :  best_key_parts;
+      key_map map;
+      map.clear_all();       // Force the creation of quick select
+      map.set_bit(best_key); // only best_key.
+      select->quick= 0;
+      select->test_quick_select(join->thd, map, 0,
+                                join->select_options & OPTION_FOUND_ROWS ?
+                                HA_POS_ERROR :
+                                join->unit->select_limit_cnt,
+                                TRUE, FALSE);
     }
-    else
-      goto use_filesort;
-  } 
+    order_direction= best_key_direction;
+    /*
+      saved_best_key_parts is actual number of used keyparts found by the
+      test_if_order_by_key function. It could differ from keyinfo->key_parts,
+      thus we have to restore it in case of desc order as it affects
+      QUICK_SELECT_DESC behaviour.
+    */
+    used_key_parts= (order_direction == -1) ?
+      saved_best_key_parts :  best_key_parts;
+    changed_key= true;
+  }
 
 check_reverse_order:                  
   DBUG_ASSERT(order_direction != 0);
@@ -14217,6 +18168,7 @@ check_reverse_order:
 
       quick_type= select->quick->get_type();
       if (quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_MERGE ||
+          quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_INTERSECT ||
           quick_type == QUICK_SELECT_I::QS_TYPE_ROR_INTERSECT ||
           quick_type == QUICK_SELECT_I::QS_TYPE_ROR_UNION ||
           quick_type == QUICK_SELECT_I::QS_TYPE_GROUP_MIN_MAX)
@@ -14256,13 +18208,23 @@ check_reverse_order:
 
         if (table->covering_keys.is_set(best_key) && ! table->key_read)
           table->enable_keyread();
+        if (tab->pre_idx_push_select_cond)
+        {
+          tab->set_cond(tab->pre_idx_push_select_cond);
+          /*
+            orig_cond is a part of pre_idx_push_cond,
+            no need to restore it.
+          */
+          orig_cond= 0;
+          orig_cond_saved= false;
+        }
         table->file->ha_index_or_rnd_end();
         if (tab->join->select_options & SELECT_DESCRIBE)
         {
           tab->ref.key= -1;
           tab->ref.key_parts= 0;
-          if (select_limit < table->file->stats.records) 
-            tab->limit= select_limit;
+          if (best_select_limit < table->file->stats.records)
+            tab->limit= best_select_limit;
         }
       }
       else if (tab->type != JT_ALL)
@@ -14280,6 +18242,16 @@ check_reverse_order:
         tab->read_first_record= join_init_read_record;
         if (tab->is_using_loose_index_scan())
           tab->join->tmp_table_param.precomputed_group_by= TRUE;
+
+        /*
+          Restore the original condition as changes done by pushdown
+          condition are not relevant anymore
+        */
+        if (tab->select && tab->select->pre_idx_push_select_cond)
+	{
+          tab->set_cond(tab->select->pre_idx_push_select_cond);
+           tab->table->file->cancel_pushed_idx_cond();
+        }
         /*
           TODO: update the number of records in join->best_positions[tablenr]
         */
@@ -14291,13 +18263,15 @@ check_reverse_order:
       if (select && select->quick)
       {
         QUICK_SELECT_DESC *tmp;
+        bool error= FALSE;
+
         /* ORDER BY range_key DESC */
         tmp= new QUICK_SELECT_DESC((QUICK_RANGE_SELECT*)(select->quick),
-                                    used_key_parts);
+                                   used_key_parts, &error);
         if (tmp && select->quick == save_quick)
           save_quick= 0;    // ::QUICK_SELECT_DESC consumed it
 
-        if (!tmp || tmp->error)
+        if (!tmp || error)
         {
           delete tmp;
           tab->limit= 0;
@@ -14319,7 +18293,7 @@ check_reverse_order:
       }
     }
     else if (select && select->quick)
-      select->quick->sorted= 1;
+      select->quick->need_sorted_output();
 
   } // QEP has been modified
 
@@ -14336,6 +18310,11 @@ skipped_filesort:
     delete save_quick;
     save_quick= NULL;
   }
+  if (orig_cond_saved && !changed_key)
+    tab->set_cond(orig_cond);
+  if (!no_changes && changed_key && table->file->pushed_idx_cond)
+    table->file->cancel_pushed_idx_cond();
+
   DBUG_RETURN(1);
 
 use_filesort:
@@ -14345,6 +18324,9 @@ use_filesort:
     delete select->quick;
     select->quick= save_quick;
   }
+  if (orig_cond_saved)
+    tab->set_cond(orig_cond);
+
   DBUG_RETURN(0);
 }
 
@@ -14390,12 +18372,15 @@ create_sort_index(THD *thd, JOIN *join, ORDER *order,
   JOIN_TAB *tab;
   DBUG_ENTER("create_sort_index");
 
-  if (join->tables == join->const_tables)
+  if (join->table_count == join->const_tables)
     DBUG_RETURN(0);				// One row, no need to sort
   tab=    join->join_tab + join->const_tables;
   table=  tab->table;
   select= tab->select;
 
+  /* Currently ORDER BY ... LIMIT is not supported in subqueries. */
+  DBUG_ASSERT(join->group_list || !join->is_in_subquery());
+
   /*
     When there is SQL_BIG_RESULT do not sort using index for GROUP BY,
     and thus force sorting on disk unless a group min-max optimization
@@ -14443,7 +18428,7 @@ create_sort_index(THD *thd, JOIN *join, ORDER *order,
 	field, quick will contain an empty record set.
       */
       if (!(select->quick= (tab->type == JT_FT ?
-			    new FT_SELECT(thd, table, tab->ref.key) :
+			    get_ft_select(thd, table, tab->ref.key) :
 			    get_quick_select_for_ref(thd, table, &tab->ref, 
                                                      tab->found_records))))
 	goto err;
@@ -14455,6 +18440,8 @@ create_sort_index(THD *thd, JOIN *join, ORDER *order,
       get_schema_tables_result(join, PROCESSED_BY_CREATE_SORT_INDEX))
     goto err;
 
+  if (!tab->preread_init_done && tab->preread_init())
+    goto err;
   if (table->s->tmp_table)
     table->file->info(HA_STATUS_VARIABLE);	// Get record count
   table->sort.found_records=filesort(thd, table,join->sortorder, length,
@@ -14479,8 +18466,10 @@ create_sort_index(THD *thd, JOIN *join, ORDER *order,
 
     select->cleanup();				// filesort did select
     table->quick_keys.clear_all();  // as far as we cleanup select->quick
+    table->intersect_keys.clear_all();
     table->sort.io_cache= tablesort_result_cache;
   }
+  tab->set_select_cond(NULL, __LINE__);
   tab->last_inner= 0;
   tab->first_unmatched= 0;
   tab->type=JT_ALL;				// Read with normal read_record
@@ -14504,7 +18493,9 @@ static bool fix_having(JOIN *join, Item **having)
   table_map used_tables= join->const_table_map | table->table->map;
 
   DBUG_EXECUTE("where",print_where(*having,"having", QT_ORDINARY););
-  Item* sort_table_cond=make_cond_for_table(*having,used_tables,used_tables);
+  Item* sort_table_cond= make_cond_for_table(join->thd, *having, used_tables,
+                                            used_tables, MAX_TABLES,
+                                            FALSE, FALSE);
   if (sort_table_cond)
   {
     if (!table->select)
@@ -14517,12 +18508,14 @@ static bool fix_having(JOIN *join, Item **having)
 						   sort_table_cond)) ||
 	  table->select->cond->fix_fields(join->thd, &table->select->cond))
 	return 1;
-    table->select_cond=table->select->cond;
+    table->set_select_cond(table->select->cond, __LINE__);
     table->select_cond->top_level_item();
     DBUG_EXECUTE("where",print_where(table->select_cond,
 				     "select and having",
                                      QT_ORDINARY););
-    *having=make_cond_for_table(*having,~ (table_map) 0,~used_tables);
+    *having= make_cond_for_table(join->thd, *having,
+                                 ~ (table_map) 0,~used_tables,
+                                 MAX_TABLES, FALSE, FALSE);
     DBUG_EXECUTE("where",
                  print_where(*having,"having after make_cond", QT_ORDINARY););
   }
@@ -14870,277 +18863,53 @@ SORT_FIELD *make_unireg_sortorder(ORDER *order, uint *length,
 }
 
 
-/*****************************************************************************
-  Fill join cache with packed records
-  Records are stored in tab->cache.buffer and last record in
-  last record is stored with pointers to blobs to support very big
-  records
-******************************************************************************/
-
-static int
-join_init_cache(THD *thd,JOIN_TAB *tables,uint table_count)
-{
-  reg1 uint i;
-  uint length, blobs;
-  size_t size;
-  CACHE_FIELD *copy,**blob_ptr;
-  JOIN_CACHE  *cache;
-  JOIN_TAB *join_tab;
-  DBUG_ENTER("join_init_cache");
-
-  cache= &tables[table_count].cache;
-  cache->fields=blobs=0;
-
-  join_tab=tables;
-  for (i=0 ; i < table_count ; i++,join_tab++)
-  {
-    if (!join_tab->used_fieldlength)		/* Not calced yet */
-      calc_used_field_length(thd, join_tab);
-    cache->fields+=join_tab->used_fields;
-    blobs+=join_tab->used_blobs;
-  }
-  if (!(cache->field=(CACHE_FIELD*)
-	sql_alloc(sizeof(CACHE_FIELD)*(cache->fields+table_count*2)+(blobs+1)*
-
-		  sizeof(CACHE_FIELD*))))
-  {
-    my_free((uchar*) cache->buff,MYF(0));		/* purecov: inspected */
-    cache->buff=0;				/* purecov: inspected */
-    DBUG_RETURN(1);				/* purecov: inspected */
-  }
-  copy=cache->field;
-  blob_ptr=cache->blob_ptr=(CACHE_FIELD**)
-    (cache->field+cache->fields+table_count*2);
-
-  length=0;
-  for (i=0 ; i < table_count ; i++)
-  {
-    bool have_bit_fields= FALSE;
-    uint null_fields=0,used_fields;
-    Field **f_ptr,*field;
-    MY_BITMAP *read_set= tables[i].table->read_set;
-    for (f_ptr=tables[i].table->field,used_fields=tables[i].used_fields ;
-	 used_fields ;
-	 f_ptr++)
-    {
-      field= *f_ptr;
-      if (bitmap_is_set(read_set, field->field_index))
-      {
-	used_fields--;
-	length+=field->fill_cache_field(copy);
-	if (copy->type == CACHE_BLOB)
-	  (*blob_ptr++)=copy;
-	if (field->real_maybe_null())
-	  null_fields++;
-        if (field->type() == MYSQL_TYPE_BIT &&
-            ((Field_bit*)field)->bit_len)
-          have_bit_fields= TRUE;    
-	copy++;
-      }
-    }
-    /* Copy null bits from table */
-    if (null_fields || have_bit_fields)
-    {						/* must copy null bits */
-      copy->str= tables[i].table->null_flags;
-      copy->length= tables[i].table->s->null_bytes;
-      copy->type=0;
-      copy->field=0;
-      length+=copy->length;
-      copy++;
-      cache->fields++;
-    }
-    /* If outer join table, copy null_row flag */
-    if (tables[i].table->maybe_null)
-    {
-      copy->str= (uchar*) &tables[i].table->null_row;
-      copy->length=sizeof(tables[i].table->null_row);
-      copy->type=0;
-      copy->field=0;
-      length+=copy->length;
-      copy++;
-      cache->fields++;
-    }
-  }
-
-  cache->length=length+blobs*sizeof(char*);
-  cache->blobs=blobs;
-  *blob_ptr=0;					/* End sequentel */
-  size=max(thd->variables.join_buff_size, cache->length);
-  if (!(cache->buff=(uchar*) my_malloc(size,MYF(0))))
-    DBUG_RETURN(1);				/* Don't use cache */ /* purecov: inspected */
-  cache->end=cache->buff+size;
-  reset_cache_write(cache);
-  DBUG_RETURN(0);
-}
-
-
-static ulong
-used_blob_length(CACHE_FIELD **ptr)
-{
-  uint length,blob_length;
-  for (length=0 ; *ptr ; ptr++)
-  {
-    Field_blob *field_blob= (Field_blob *) (*ptr)->field;
-    (*ptr)->blob_length=blob_length= field_blob->get_length();
-    length+=blob_length;
-    field_blob->get_ptr(&(*ptr)->str);
-  }
-  return length;
-}
-
-
-static bool
-store_record_in_cache(JOIN_CACHE *cache)
-{
-  uint length;
-  uchar *pos;
-  CACHE_FIELD *copy,*end_field;
-  bool last_record;
-
-  pos=cache->pos;
-  end_field=cache->field+cache->fields;
-
-  length=cache->length;
-  if (cache->blobs)
-    length+=used_blob_length(cache->blob_ptr);
-  if ((last_record= (length + cache->length > (size_t) (cache->end - pos))))
-    cache->ptr_record=cache->records;
 
-  /*
-    There is room in cache. Put record there
-  */
-  cache->records++;
-  for (copy=cache->field ; copy < end_field; copy++)
-  {
-    if (copy->type == CACHE_BLOB)
-    {
-      Field_blob *blob_field= (Field_blob *) copy->field;
-      if (last_record)
-      {
-	blob_field->get_image(pos, copy->length+sizeof(char*), 
-                              blob_field->charset());
-	pos+=copy->length+sizeof(char*);
-      }
-      else
-      {
-	blob_field->get_image(pos, copy->length, // blob length
-                              blob_field->charset());
-	memcpy(pos+copy->length,copy->str,copy->blob_length);  // Blob data
-	pos+=copy->length+copy->blob_length;
-      }
-    }
-    else
-    {
-      if (copy->type == CACHE_STRIPPED)
-      {
-	uchar *str,*end;
-        Field *field= copy->field;
-        if (field && field->is_null())
-          end= str= copy->str;
-        else
-        {
-          for (str=copy->str,end= str+copy->length;
-               end > str && end[-1] == ' ' ;
-               end--)
-            ;
-        }
-	length=(uint) (end-str);
-	memcpy(pos+2, str, length);
-        int2store(pos, length);
-	pos+= length+2;
-      }
-      else
-      {
-	memcpy(pos,copy->str,copy->length);
-	pos+=copy->length;
-      }
-    }
-  }
-  cache->pos=pos;
-  return last_record || (size_t) (cache->end - pos) < cache->length;
-}
+/*
+  eq_ref: Create the lookup key and check if it is the same as saved key
 
 
-static void
-reset_cache_read(JOIN_CACHE *cache)
-{
-  cache->record_nr=0;
-  cache->pos=cache->buff;
-}
 
 
-static void reset_cache_write(JOIN_CACHE *cache)
-{
-  reset_cache_read(cache);
-  cache->records= 0;
-  cache->ptr_record= (uint) ~0;
-}
+  SYNOPSIS
+    cmp_buffer_with_ref()
+      tab      Join tab of the accessed table
+      table    The table to read.  This is usually tab->table, except for 
+               semi-join when we might need to make a lookup in a temptable
+               instead.
+      tab_ref  The structure with methods to collect index lookup tuple. 
+               This is usually table->ref, except for the case of when we're 
+               doing lookup into semi-join materialization table.
+
+  DESCRIPTION 
+    Used by eq_ref access method: create the index lookup key and check if 
+    we've used this key at previous lookup (If yes, we don't need to repeat
+    the lookup - the record has been already fetched)
 
+  RETURN 
+    TRUE   No cached record for the key, or failed to create the key (due to
+           out-of-domain error)
+    FALSE  The created key is the same as the previous one (and the record 
+           is already in table->record)
+*/
 
-static void
-read_cached_record(JOIN_TAB *tab)
+static bool
+cmp_buffer_with_ref(THD *thd, TABLE *table, TABLE_REF *tab_ref)
 {
-  uchar *pos;
-  uint length;
-  bool last_record;
-  CACHE_FIELD *copy,*end_field;
-
-  last_record=tab->cache.record_nr++ == tab->cache.ptr_record;
-  pos=tab->cache.pos;
-
-  for (copy=tab->cache.field,end_field=copy+tab->cache.fields ;
-       copy < end_field;
-       copy++)
+  bool no_prev_key;
+  if (!tab_ref->disable_cache)
   {
-    if (copy->type == CACHE_BLOB)
+    if (!(no_prev_key= tab_ref->key_err))
     {
-      Field_blob *blob_field= (Field_blob *) copy->field;
-      if (last_record)
-      {
-	blob_field->set_image(pos, copy->length+sizeof(char*),
-                              blob_field->charset());
-	pos+=copy->length+sizeof(char*);
-      }
-      else
-      {
-	blob_field->set_ptr(pos, pos+copy->length);
-	pos+=copy->length + blob_field->get_length();
-      }
-    }
-    else
-    {
-      if (copy->type == CACHE_STRIPPED)
-      {
-        length= uint2korr(pos);
-	memcpy(copy->str, pos+2, length);
-	memset(copy->str+length, ' ', copy->length-length);
-	pos+= 2 + length;
-      }
-      else
-      {
-	memcpy(copy->str,pos,copy->length);
-	pos+=copy->length;
-      }
+      /* Previous access found a row. Copy its key */
+      memcpy(tab_ref->key_buff2, tab_ref->key_buff, tab_ref->key_length);
     }
   }
-  tab->cache.pos=pos;
-  return;
-}
-
-
-static bool
-cmp_buffer_with_ref(JOIN_TAB *tab)
-{
-  bool diff;
-  if (!(diff=tab->ref.key_err))
-  {
-    memcpy(tab->ref.key_buff2, tab->ref.key_buff, tab->ref.key_length);
-  }
-  if ((tab->ref.key_err= cp_buffer_from_ref(tab->join->thd, tab->table,
-                                            &tab->ref)) ||
-      diff)
+  else 
+    no_prev_key= TRUE;
+  if ((tab_ref->key_err= cp_buffer_from_ref(thd, table, tab_ref)) ||
+      no_prev_key)
     return 1;
-  return memcmp(tab->ref.key_buff2, tab->ref.key_buff, tab->ref.key_length)
+  return memcmp(tab_ref->key_buff2, tab_ref->key_buff, tab_ref->key_length)
     != 0;
 }
 
@@ -15233,7 +19002,7 @@ find_order_in_list(THD *thd, Item **ref_pointer_array, TABLE_LIST *tables,
     order->in_field_list= 1;
     order->counter= count;
     order->counter_used= 1;
-    return FALSE;
+   return FALSE;
   }
   /* Lookup the current GROUP/ORDER field in the SELECT clause. */
   select_item= find_item_in_list(order_item, fields, &counter,
@@ -15678,8 +19447,10 @@ test_if_subpart(ORDER *a,ORDER *b)
 */
 
 static TABLE *
-get_sort_by_table(ORDER *a,ORDER *b,TABLE_LIST *tables)
+get_sort_by_table(ORDER *a,ORDER *b, List<TABLE_LIST> &tables)
 {
+  TABLE_LIST *table;
+  List_iterator<TABLE_LIST> ti(tables);
   table_map map= (table_map) 0;
   DBUG_ENTER("get_sort_by_table");
 
@@ -15697,11 +19468,11 @@ get_sort_by_table(ORDER *a,ORDER *b,TABLE_LIST *tables)
   if (!map || (map & (RAND_TABLE_BIT | OUTER_REF_TABLE_BIT)))
     DBUG_RETURN(0);
 
-  for (; !(map & tables->table->map); tables= tables->next_leaf) ;
-  if (map != tables->table->map)
+  while ((table= ti++) && !(map & table->table->map)) ;
+  if (map != table->table->map)
     DBUG_RETURN(0);				// More than one table
-  DBUG_PRINT("exit",("sort by table: %d",tables->table->tablenr));
-  DBUG_RETURN(tables->table);
+  DBUG_PRINT("exit",("sort by table: %d",table->table->tablenr));
+  DBUG_RETURN(table->table);
 }
 
 
@@ -15838,7 +19609,7 @@ alloc_group_fields(JOIN *join,ORDER *group)
   {
     for (; group ; group=group->next)
     {
-      Cached_item *tmp=new_Cached_item(join->thd, *group->item);
+      Cached_item *tmp=new_Cached_item(join->thd, *group->item, TRUE);
       if (!tmp || join->group_fields.push_front(tmp))
 	return TRUE;
     }
@@ -15848,6 +19619,38 @@ alloc_group_fields(JOIN *join,ORDER *group)
 }
 
 
+
+/*
+  Test if a single-row cache of items changed, and update the cache.
+
+  @details Test if a list of items that typically represents a result
+  row has changed. If the value of some item changed, update the cached
+  value for this item.
+  
+  @param list list of <item, cached_value> pairs stored as Cached_item.
+
+  @return -1 if no item changed
+  @return index of the first item that changed
+*/
+
+int test_if_item_cache_changed(List<Cached_item> &list)
+{
+  DBUG_ENTER("test_if_item_cache_changed");
+  List_iterator<Cached_item> li(list);
+  int idx= -1,i;
+  Cached_item *buff;
+
+  for (i=(int) list.elements-1 ; (buff=li++) ; i--)
+  {
+    if (buff->cmp())
+      idx=i;
+  }
+  DBUG_PRINT("info", ("idx: %d", idx));
+  DBUG_RETURN(idx);
+}
+
+
+
 static int
 test_if_group_changed(List<Cached_item> &list)
 {
@@ -15987,7 +19790,7 @@ setup_copy_fields(THD *thd, TMP_TABLE_PARAM *param,
       }
     }
     else if ((real_pos->type() == Item::FUNC_ITEM ||
-	      real_pos->type() == Item::SUBSELECT_ITEM ||
+	      real_pos->real_type() == Item::SUBSELECT_ITEM ||
 	      real_pos->type() == Item::CACHE_ITEM ||
 	      real_pos->type() == Item::COND_ITEM) &&
 	     !real_pos->with_sum_func)
@@ -16450,7 +20253,6 @@ static bool add_ref_to_table_cond(THD *thd, JOIN_TAB *join_tab)
   }
   if (thd->is_fatal_error)
     DBUG_RETURN(TRUE);
-
   if (!cond->fixed)
   {
     Item *tmp_item= (Item*) cond;
@@ -16459,13 +20261,26 @@ static bool add_ref_to_table_cond(THD *thd, JOIN_TAB *join_tab)
   }
   if (join_tab->select)
   {
+    Item *cond_copy;
+    UNINIT_VAR(cond_copy); // used when pre_idx_push_select_cond!=NULL
+    if (join_tab->select->pre_idx_push_select_cond)
+      cond_copy= cond->copy_andor_structure(thd);
     if (join_tab->select->cond)
       error=(int) cond->add(join_tab->select->cond);
-    join_tab->select_cond=join_tab->select->cond=cond;
+    join_tab->select->cond= cond;
+    if (join_tab->select->pre_idx_push_select_cond)
+    {
+      Item *new_cond= and_conds(cond_copy, join_tab->select->pre_idx_push_select_cond);
+      if (!new_cond->fixed && new_cond->fix_fields(thd, &new_cond))
+        error= 1;
+      join_tab->pre_idx_push_select_cond=
+        join_tab->select->pre_idx_push_select_cond= new_cond;
+    }
+    join_tab->set_select_cond(cond, __LINE__);
   }
   else if ((join_tab->select= make_select(join_tab->table, 0, 0, cond, 0,
                                           &error)))
-    join_tab->select_cond=cond;
+    join_tab->set_select_cond(cond, __LINE__);
 
   DBUG_RETURN(error ? TRUE : FALSE);
 }
@@ -16922,10 +20737,12 @@ int JOIN::rollup_write_data(uint idx, TABLE *table_arg)
           item->save_in_result_field(1);
       }
       copy_sum_funcs(sum_funcs_end[i+1], sum_funcs_end[i]);
-      if ((write_error= table_arg->file->ha_write_row(table_arg->record[0])))
+      if ((write_error= table_arg->file->ha_write_tmp_row(table_arg->record[0])))
       {
-	if (create_internal_tmp_table_from_heap(thd, table_arg, &tmp_table_param,
-                                    write_error, 0))
+	if (create_internal_tmp_table_from_heap(thd, table_arg, 
+                                                tmp_table_param.start_recinfo,
+                                                &tmp_table_param.recinfo,
+                                                write_error, 0))
 	  return 1;		     
       }
     }
@@ -17068,27 +20885,46 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order,
     if (result->send_data(item_list))
       join->error= 1;
   }
-  else
+  else if (!join->select_lex->master_unit()->derived ||
+           join->select_lex->master_unit()->derived->is_materialized_derived())
   {
     table_map used_tables=0;
-    for (uint i=0 ; i < join->tables ; i++)
+
+    bool printing_materialize_nest= FALSE;
+    uint select_id= join->select_lex->select_number;
+
+    for (JOIN_TAB *tab= first_breadth_first_tab(join); tab;
+         tab= next_breadth_first_tab(join, tab))
     {
-      JOIN_TAB *tab=join->join_tab+i;
+      if (tab->bush_root_tab)
+      {
+        JOIN_TAB *first_sibling= tab->bush_root_tab->bush_children->start;
+        select_id= first_sibling->emb_sj_nest->sj_subq_pred->get_identifier();
+        printing_materialize_nest= TRUE;
+      }
+
       TABLE *table=tab->table;
       TABLE_LIST *table_list= tab->table->pos_in_table_list;
       char buff[512]; 
-      char buff1[512], buff2[512], buff3[512];
+      char buff1[512], buff2[512], buff3[512], buff4[512];
       char keylen_str_buf[64];
+      my_bool key_read;
       String extra(buff, sizeof(buff),cs);
       char table_name_buffer[SAFE_NAME_LEN];
       String tmp1(buff1,sizeof(buff1),cs);
       String tmp2(buff2,sizeof(buff2),cs);
       String tmp3(buff3,sizeof(buff3),cs);
+      String tmp4(buff4,sizeof(buff4),cs);
+      char hash_key_prefix[]= "#hash#";
+      KEY *key_info= 0;
+      uint key_len= 0;
+      bool is_hj= tab->type == JT_HASH || tab->type ==JT_HASH_NEXT;
+
       extra.length(0);
       tmp1.length(0);
       tmp2.length(0);
       tmp3.length(0);
-
+      tmp4.length(0);
       quick_type= -1;
 
       /* Don't show eliminated tables */
@@ -17100,22 +20936,25 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order,
 
       item_list.empty();
       /* id */
-      item_list.push_back(new Item_uint((uint32)
-				       join->select_lex->select_number));
+      item_list.push_back(new Item_uint((uint32)select_id));
       /* select_type */
-      item_list.push_back(new Item_string(join->select_lex->type,
-					  strlen(join->select_lex->type),
-					  cs));
-      if (tab->type == JT_ALL && tab->select && tab->select->quick)
+      const char* stype= printing_materialize_nest? "MATERIALIZED" : 
+                                                    join->select_lex->type;
+      item_list.push_back(new Item_string(stype, strlen(stype), cs));
+      
+      if ((tab->type == JT_ALL || tab->type == JT_HASH) &&
+           tab->select && tab->select->quick)
       {
         quick_type= tab->select->quick->get_type();
         if ((quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_MERGE) ||
+            (quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_INTERSECT) ||
             (quick_type == QUICK_SELECT_I::QS_TYPE_ROR_INTERSECT) ||
             (quick_type == QUICK_SELECT_I::QS_TYPE_ROR_UNION))
-          tab->type = JT_INDEX_MERGE;
+          tab->type= tab->type == JT_ALL ? JT_INDEX_MERGE : JT_HASH_INDEX_MERGE;
         else
-	  tab->type = JT_RANGE;
+	  tab->type= tab->type == JT_ALL ? JT_RANGE : JT_HASH_RANGE;
       }
+
       /* table */
       if (table->derived_select_number)
       {
@@ -17125,6 +20964,16 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order,
 			     table->derived_select_number);
 	item_list.push_back(new Item_string(table_name_buffer, len, cs));
       }
+      else if (tab->bush_children)
+      {
+        JOIN_TAB *ctab= tab->bush_children->start;
+        /* table */
+        int len= my_snprintf(table_name_buffer, 
+                             sizeof(table_name_buffer)-1,
+                             "<subquery%d>", 
+                             ctab->emb_sj_nest->sj_subq_pred->get_identifier());
+	item_list.push_back(new Item_string(table_name_buffer, len, cs));
+      }
       else
       {
         TABLE_LIST *real_table= table->pos_in_table_list; 
@@ -17177,49 +21026,71 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order,
 	item_list.push_back(item_null);
 
       /* Build "key", "key_len", and "ref" values and add them to item_list */
-      if (tab->ref.key_parts)
+      if (tab->type == JT_NEXT)
+      {
+	key_info= table->key_info+tab->index;
+        key_len= key_info->key_length;
+      }
+      else if (tab->ref.key_parts)
+      {
+	key_info= tab->get_keyinfo_by_key_no(tab->ref.key);
+        key_len= tab->ref.key_length;
+      }
+      if (key_info)
       {
-	KEY *key_info=table->key_info+ tab->ref.key;
         register uint length;
-	item_list.push_back(new Item_string(key_info->name,
-					    strlen(key_info->name),
-					    system_charset_info));
-        length= (longlong10_to_str(tab->ref.key_length, keylen_str_buf, 10) - 
+        if (is_hj)
+          tmp2.append(hash_key_prefix, strlen(hash_key_prefix), cs);
+        tmp2.append(key_info->name,  strlen(key_info->name), cs);
+        length= (longlong10_to_str(key_len, keylen_str_buf, 10) - 
                  keylen_str_buf);
-        item_list.push_back(new Item_string(keylen_str_buf, length,
-                                            system_charset_info));
-	for (store_key **ref=tab->ref.key_copy ; *ref ; ref++)
+        tmp3.append(keylen_str_buf, length, cs);
+        if (tab->ref.key_parts)
 	{
-	  if (tmp2.length())
-	    tmp2.append(',');
-	  tmp2.append((*ref)->name(), strlen((*ref)->name()),
-		      system_charset_info);
-	}
-	item_list.push_back(new Item_string(tmp2.ptr(),tmp2.length(),cs));
+	  for (store_key **ref=tab->ref.key_copy ; *ref ; ref++)
+	  {
+	    if (tmp4.length())
+	      tmp4.append(',');
+	    tmp4.append((*ref)->name(), strlen((*ref)->name()), cs);
+          }
+        }
       }
-      else if (tab->type == JT_NEXT)
+      if (is_hj && tab->type != JT_HASH)
       {
-	KEY *key_info=table->key_info+ tab->index;
-        register uint length;
-	item_list.push_back(new Item_string(key_info->name,
-					    strlen(key_info->name),cs));
-        length= (longlong10_to_str(key_info->key_length, keylen_str_buf, 10) - 
-                 keylen_str_buf);
-        item_list.push_back(new Item_string(keylen_str_buf, 
-                                            length,
-                                            system_charset_info));
-	item_list.push_back(item_null);
+        tmp2.append(':');
+        tmp3.append(':');
       }
-      else if (tab->select && tab->select->quick)
+      if (tab->type == JT_HASH_NEXT)
       {
+        register uint length;
+	key_info= table->key_info+tab->index;
+        key_len= key_info->key_length;
+        tmp2.append(key_info->name,  strlen(key_info->name), cs);
+        length= (longlong10_to_str(key_len, keylen_str_buf, 10) - 
+                 keylen_str_buf);
+        tmp3.append(keylen_str_buf, length, cs);
+      }         
+      if (tab->type != JT_CONST && tab->select && tab->select->quick)
         tab->select->quick->add_keys_and_lengths(&tmp2, &tmp3);
-	item_list.push_back(new Item_string(tmp2.ptr(),tmp2.length(),cs));
-	item_list.push_back(new Item_string(tmp3.ptr(),tmp3.length(),cs));
-	item_list.push_back(item_null);
+      if (key_info || (tab->select && tab->select->quick))
+      {
+        if (tmp2.length())
+          item_list.push_back(new Item_string(tmp2.ptr(),tmp2.length(),cs));
+        else
+          item_list.push_back(item_null);
+        if (tmp3.length())
+          item_list.push_back(new Item_string(tmp3.ptr(),tmp3.length(),cs));
+        else
+          item_list.push_back(item_null);
+        if (key_info && tab->type != JT_NEXT)
+          item_list.push_back(new Item_string(tmp4.ptr(),tmp4.length(),cs));
+        else
+          item_list.push_back(item_null);
       }
       else
       {
-        if (table_list->schema_table &&
+        if (table_list && /* SJM bushes don't have table_list */
+            table_list->schema_table &&
             table_list->schema_table->i_s_requested_object & OPTIMIZE_I_S_TABLE)
         {
           const char *tmp_buff;
@@ -17250,7 +21121,8 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order,
       }
       
       /* Add "rows" field to item_list. */
-      if (table_list->schema_table)
+      if (table_list /* SJM bushes don't have table_list */ &&
+          table_list->schema_table)
       {
         /* in_rows */
         if (join->thd->lex->describe & DESCRIBE_EXTENDED)
@@ -17263,18 +21135,28 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order,
         ha_rows examined_rows;
         if (tab->select && tab->select->quick)
           examined_rows= tab->select->quick->records;
-        else if (tab->type == JT_NEXT || tab->type == JT_ALL)
+        else if (tab->type == JT_NEXT || tab->type == JT_ALL || is_hj)
         {
           if (tab->limit)
             examined_rows= tab->limit;
           else
           {
-            tab->table->file->info(HA_STATUS_VARIABLE);
-            examined_rows= tab->table->file->stats.records;
+            if (tab->table->is_filled_at_execution())
+            {
+              examined_rows= tab->records;
+            }
+            else
+            {
+              /*
+                handler->info(HA_STATUS_VARIABLE) has been called in
+                make_join_statistics()
+              */
+              examined_rows= tab->table->file->stats.records;
+            }
           }
         }
         else
-          examined_rows=(ha_rows)join->best_positions[i].records_read; 
+          examined_rows=(ha_rows)tab->records_read; 
  
         item_list.push_back(new Item_int((longlong) (ulonglong) examined_rows, 
                                          MY_INT64_NUM_DECIMAL_DIGITS));
@@ -17284,14 +21166,14 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order,
         {
           float f= 0.0; 
           if (examined_rows)
-            f= (float) (100.0 * join->best_positions[i].records_read /
-                        examined_rows);
+            f= (float) (100.0 * tab->records_read / examined_rows);
+ 	  set_if_smaller(f, 100.0);
           item_list.push_back(new Item_float(f, 2));
         }
       }
 
       /* Build "Extra" field and add it to item_list. */
-      my_bool key_read=table->key_read;
+      key_read=table->key_read;
       if ((tab->type == JT_NEXT || tab->type == JT_CONST) &&
           table->covering_keys.is_set(tab->index))
 	key_read=1;
@@ -17321,8 +21203,21 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order,
       }
       else
       {
+        uint keyno= MAX_KEY;
+        if (tab->ref.key_parts)
+          keyno= tab->ref.key;
+        else if (tab->select && tab->select->quick)
+          keyno = tab->select->quick->index;
+
+        if (keyno != MAX_KEY && keyno == table->file->pushed_idx_cond_keyno &&
+            table->file->pushed_idx_cond)
+          extra.append(STRING_WITH_LEN("; Using index condition"));
+        else if (tab->cache_idx_cond)
+          extra.append(STRING_WITH_LEN("; Using index condition(BKA)"));
+
         if (quick_type == QUICK_SELECT_I::QS_TYPE_ROR_UNION || 
             quick_type == QUICK_SELECT_I::QS_TYPE_ROR_INTERSECT ||
+            quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_INTERSECT ||
             quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_MERGE)
         {
           extra.append(STRING_WITH_LEN("; Using "));
@@ -17357,7 +21252,8 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order,
               extra.append(STRING_WITH_LEN("; Using where"));
           }
 	}
-        if (table_list->schema_table &&
+        if (table_list /* SJM bushes don't have table_list */ &&
+            table_list->schema_table &&
             table_list->schema_table->i_s_requested_object & OPTIMIZE_I_S_TABLE)
         {
           if (!table_list->table_open_method)
@@ -17384,6 +21280,32 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order,
         }
 	if (table->reginfo.not_exists_optimize)
 	  extra.append(STRING_WITH_LEN("; Not exists"));
+
+        /*
+        if (quick_type == QUICK_SELECT_I::QS_TYPE_RANGE &&
+            !(((QUICK_RANGE_SELECT*)(tab->select->quick))->mrr_flags &
+             HA_MRR_USE_DEFAULT_IMPL))
+        {
+	  extra.append(STRING_WITH_LEN("; Using MRR"));
+        }
+        */
+        if (quick_type == QUICK_SELECT_I::QS_TYPE_RANGE)
+        {
+          char mrr_str_buf[128];
+          mrr_str_buf[0]=0;
+          int len;
+          uint mrr_flags= 
+            ((QUICK_RANGE_SELECT*)(tab->select->quick))->mrr_flags;
+          len= table->file->multi_range_read_explain_info(mrr_flags,
+                                                          mrr_str_buf,
+                                                          sizeof(mrr_str_buf));
+          if (len > 0)
+          {
+            extra.append(STRING_WITH_LEN("; "));
+            extra.append(mrr_str_buf, len);
+          }
+        }
+
 	if (need_tmp_table)
 	{
 	  need_tmp_table=0;
@@ -17394,8 +21316,40 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order,
 	  need_order=0;
 	  extra.append(STRING_WITH_LEN("; Using filesort"));
 	}
-	if (distinct & test_all_bits(used_tables, thd->lex->used_tables))
+	if (distinct & test_all_bits(used_tables,
+                                     join->select_list_used_tables))
 	  extra.append(STRING_WITH_LEN("; Distinct"));
+        if (tab->loosescan_match_tab)
+        {
+          extra.append(STRING_WITH_LEN("; LooseScan"));
+        }
+
+        if (tab->first_weedout_table)
+          extra.append(STRING_WITH_LEN("; Start temporary"));
+        if (tab->check_weed_out_table)
+          extra.append(STRING_WITH_LEN("; End temporary"));
+        else if (tab->do_firstmatch)
+        {
+          if (tab->do_firstmatch == join->join_tab - 1)
+            extra.append(STRING_WITH_LEN("; FirstMatch"));
+          else
+          {
+            extra.append(STRING_WITH_LEN("; FirstMatch("));
+            TABLE *prev_table=tab->do_firstmatch->table;
+            if (prev_table->derived_select_number)
+            {
+              char namebuf[NAME_LEN];
+              /* Derived table name generation */
+              int len= my_snprintf(namebuf, sizeof(namebuf)-1,
+                                   "<derived%u>",
+                                   prev_table->derived_select_number);
+              extra.append(namebuf, len);
+            }
+            else
+              extra.append(prev_table->pos_in_table_list->alias);
+            extra.append(STRING_WITH_LEN(")"));
+          }
+        }
 
         for (uint part= 0; part < tab->ref.key_parts; part++)
         {
@@ -17405,8 +21359,12 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order,
             break;
           }
         }
-        if (i > 0 && tab[-1].next_select == sub_select_cache)
+
+        if (tab->cache)
+	{
           extra.append(STRING_WITH_LEN("; Using join buffer"));
+          tab->cache->print_explain_comment(&extra);
+        }
         
         /* Skip initial "; "*/
         const char *str= extra.ptr();
@@ -17418,6 +21376,7 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order,
         }
 	item_list.push_back(new Item_string(str, len, cs));
       }
+      
       // For next iteration
       used_tables|=table->map;
       if (result->send_data(item_list))
@@ -17428,6 +21387,26 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order,
        unit;
        unit= unit->next_unit())
   {
+    /*
+      This fix_fields() call is to handle an edge case like this:
+       
+        SELECT ... UNION SELECT ... ORDER BY (SELECT ...)
+      
+      for such queries, we'll get here before having called
+      subquery_expr->fix_fields(), which will cause failure to
+    */
+    if (unit->item && !unit->item->fixed)
+    {
+      Item *ref= unit->item;
+      if (unit->item->fix_fields(thd, &ref))
+        DBUG_VOID_RETURN;
+      DBUG_ASSERT(ref == unit->item);
+    }
+
+    /* 
+      Display subqueries only if they are not parts of eliminated WHERE/ON
+      clauses.
+    */
     if (!(unit->item && unit->item->eliminated))
     {
       if (mysql_explain_union(thd, unit, result))
@@ -17444,28 +21423,12 @@ bool mysql_explain_union(THD *thd, SELECT_LEX_UNIT *unit, select_result *result)
   bool res= 0;
   SELECT_LEX *first= unit->first_select();
 
-  for (SELECT_LEX *sl= first;
-       sl;
-       sl= sl->next_select())
-  {
-    // drop UNCACHEABLE_EXPLAIN, because it is for internal usage only
-    uint8 uncacheable= (sl->uncacheable & ~UNCACHEABLE_EXPLAIN);
-    sl->type= (((&thd->lex->select_lex)==sl)?
-	       (sl->first_inner_unit() || sl->next_select() ? 
-		"PRIMARY" : "SIMPLE"):
-	       ((sl == first)?
-		((sl->linkage == DERIVED_TABLE_TYPE) ?
-		 "DERIVED":
-		 ((uncacheable & UNCACHEABLE_DEPENDENT) ?
-		  "DEPENDENT SUBQUERY":
-		  (uncacheable?"UNCACHEABLE SUBQUERY":
-		   "SUBQUERY"))):
-		((uncacheable & UNCACHEABLE_DEPENDENT) ?
-		 "DEPENDENT UNION":
-		 uncacheable?"UNCACHEABLE UNION":
-		 "UNION")));
+  for (SELECT_LEX *sl= first; sl; sl= sl->next_select())
+  {
+    sl->set_explain_type();
     sl->options|= SELECT_DESCRIBE;
   }
+
   if (unit->is_union())
   {
     unit->fake_select_lex->select_number= UINT_MAX; // jost for initialization
@@ -17495,6 +21458,53 @@ bool mysql_explain_union(THD *thd, SELECT_LEX_UNIT *unit, select_result *result)
 }
 
 
+static void print_table_array(THD *thd, 
+                              table_map eliminated_tables,
+                              String *str, TABLE_LIST **table, 
+                              TABLE_LIST **end,
+                              enum_query_type query_type)
+{
+  (*table)->print(thd, eliminated_tables, str, query_type);
+
+  for (TABLE_LIST **tbl= table + 1; tbl < end; tbl++)
+  {
+    TABLE_LIST *curr= *tbl;
+    
+    /*
+      The "eliminated_tables &&" check guards againist the case of 
+      printing the query for CREATE VIEW. We do that without having run 
+      JOIN::optimize() and so will have nested_join->used_tables==0.
+    */
+    if (eliminated_tables &&
+        ((curr->table && (curr->table->map & eliminated_tables)) ||
+         (curr->nested_join && !(curr->nested_join->used_tables &
+                                ~eliminated_tables))))
+    {
+      continue;
+    }
+
+    if (curr->outer_join)
+    {
+      /* MySQL converts right to left joins */
+      str->append(STRING_WITH_LEN(" left join "));
+    }
+    else if (curr->straight)
+      str->append(STRING_WITH_LEN(" straight_join "));
+    else if (curr->sj_inner_tables)
+      str->append(STRING_WITH_LEN(" semi join "));
+    else
+      str->append(STRING_WITH_LEN(" join "));
+    curr->print(thd, eliminated_tables, str, query_type);
+    if (curr->on_expr)
+    {
+      str->append(STRING_WITH_LEN(" on("));
+      curr->on_expr->print(str, query_type);
+      str->append(')');
+    }
+  }
+}
+
+
 /**
   Print joins from the FROM clause.
 
@@ -17529,45 +21539,28 @@ static void print_join(THD *thd,
               !(((*table)->table && ((*table)->table->map & eliminated_tables)) ||
                 ((*table)->nested_join && !((*table)->nested_join->used_tables &
                                            ~eliminated_tables))));
-  (*table)->print(thd, eliminated_tables, str, query_type);
-
-  TABLE_LIST **end= table + tables->elements;
-  for (TABLE_LIST **tbl= table + 1; tbl < end; tbl++)
+  /* 
+    If the first table is a semi-join nest, swap it with something that is
+    not a semi-join nest.
+  */
+  if ((*table)->sj_inner_tables)
   {
-    TABLE_LIST *curr= *tbl;
-    /*
-      The "eliminated_tables &&" check guards againist the case of 
-      printing the query for CREATE VIEW. We do that without having run 
-      JOIN::optimize() and so will have nested_join->used_tables==0.
-    */
-    if (eliminated_tables &&
-        ((curr->table && (curr->table->map & eliminated_tables)) ||
-         (curr->nested_join && !(curr->nested_join->used_tables &
-                                ~eliminated_tables))))
-    {
-      continue;
-    }
-
-    if (curr->outer_join)
-    {
-      /* MySQL converts right to left joins */
-      str->append(STRING_WITH_LEN(" left join "));
-    }
-    else if (curr->straight)
-      str->append(STRING_WITH_LEN(" straight_join "));
-    else
-      str->append(STRING_WITH_LEN(" join "));
-    curr->print(thd, eliminated_tables, str, query_type);
-    if (curr->on_expr)
+    TABLE_LIST **end= table + tables->elements;
+    for (TABLE_LIST **t2= table; t2!=end; t2++)
     {
-      str->append(STRING_WITH_LEN(" on("));
-      curr->on_expr->print(str, query_type);
-      str->append(')');
+      if (!(*t2)->sj_inner_tables)
+      {
+        TABLE_LIST *tmp= *t2;
+        *t2= *table;
+        *table= tmp;
+        break;
+      }
     }
   }
+  print_table_array(thd, eliminated_tables, str, table, 
+                    table + tables->elements, query_type);
 }
 
-
 /**
   @brief Print an index hint
 
@@ -17620,6 +21613,32 @@ void TABLE_LIST::print(THD *thd, table_map eliminated_tables, String *str,
     print_join(thd, eliminated_tables, str, &nested_join->join_list, query_type);
     str->append(')');
   }
+  else if (jtbm_subselect)
+  {
+    if (jtbm_subselect->engine->engine_type() ==
+          subselect_engine::SINGLE_SELECT_ENGINE)
+    {
+      /* 
+        We get here when conversion into materialization didn't finish (this
+        happens when
+        - The subquery is a degenerate case which produces 0 or 1 record
+        - subquery's optimization didn't finish because of @@max_join_size
+          limits
+        - ... maybe some other cases like this 
+      */
+      str->append(STRING_WITH_LEN(" <materialize> ("));
+      jtbm_subselect->engine->print(str, query_type);
+      str->append(')');
+    }
+    else
+    {
+      str->append(STRING_WITH_LEN(" <materialize> ("));
+      subselect_hash_sj_engine *hash_engine;
+      hash_engine= (subselect_hash_sj_engine*)jtbm_subselect->engine;
+      hash_engine->materialize_engine->print(str, query_type);
+      str->append(')');
+    }
+  }
   else
   {
     const char *cmp_name;                         // Name to compare with alias
@@ -17702,9 +21721,7 @@ void TABLE_LIST::print(THD *thd, table_map eliminated_tables, String *str,
 
 void st_select_lex::print(THD *thd, String *str, enum_query_type query_type)
 {
-  /* QQ: thd may not be set for sub queries, but this should be fixed */
-  if (!thd)
-    thd= current_thd;
+  DBUG_ASSERT(thd);
 
   str->append(STRING_WITH_LEN("select "));
 
@@ -17853,6 +21870,8 @@ bool JOIN::change_result(select_result *res)
 {
   DBUG_ENTER("JOIN::change_result");
   result= res;
+  if (tmp_join)
+    tmp_join->result= res;
   if (!procedure && (result->prepare(fields_list, select_lex->master_unit()) ||
                      result->prepare2()))
   {
@@ -17861,6 +21880,225 @@ bool JOIN::change_result(select_result *res)
   DBUG_RETURN(FALSE);
 }
 
+
+/**
+  @brief
+  Set allowed types of join caches that can be used for join operations
+
+  @details
+  The function sets a bitmap of allowed join buffers types in the field
+  allowed_join_cache_types of this JOIN structure:
+    bit 1 is set if tjoin buffers are allowed to be incremental
+    bit 2 is set if the join buffers are allowed to be hashed
+    but 3 is set if the join buffers are allowed to be used for BKA
+  join algorithms.
+  The allowed types are read from system variables.
+  Besides the function sets maximum allowed join cache level that is
+  also read from a system variable.
+*/
+
+void JOIN::set_allowed_join_cache_types()
+{
+  allowed_join_cache_types= 0;
+  if (optimizer_flag(thd, OPTIMIZER_SWITCH_JOIN_CACHE_INCREMENTAL))
+    allowed_join_cache_types|= JOIN_CACHE_INCREMENTAL_BIT;
+  if (optimizer_flag(thd, OPTIMIZER_SWITCH_JOIN_CACHE_HASHED))
+    allowed_join_cache_types|= JOIN_CACHE_HASHED_BIT;
+  if (optimizer_flag(thd, OPTIMIZER_SWITCH_JOIN_CACHE_BKA))
+    allowed_join_cache_types|= JOIN_CACHE_BKA_BIT;
+  allowed_semijoin_with_cache=
+    optimizer_flag(thd, OPTIMIZER_SWITCH_SEMIJOIN_WITH_CACHE);
+  allowed_outer_join_with_cache=
+    optimizer_flag(thd, OPTIMIZER_SWITCH_OUTER_JOIN_WITH_CACHE);
+  max_allowed_join_cache_level= thd->variables.join_cache_level;
+}
+
+
+/**
+  Save a query execution plan so that the caller can revert to it if needed,
+  and reset the current query plan so that it can be reoptimized.
+
+  @param save_to  The object into which the current query plan state is saved
+*/
+
+void JOIN::save_query_plan(Join_plan_state *save_to)
+{
+  if (keyuse.elements)
+  {
+    DYNAMIC_ARRAY tmp_keyuse;
+    /* Swap the current and the backup keyuse internal arrays. */
+    tmp_keyuse= keyuse;
+    keyuse= save_to->keyuse; /* keyuse is reset to an empty array. */
+    save_to->keyuse= tmp_keyuse;
+
+    for (uint i= 0; i < table_count; i++)
+    {
+      save_to->join_tab_keyuse[i]= join_tab[i].keyuse;
+      join_tab[i].keyuse= NULL;
+      save_to->join_tab_checked_keys[i]= join_tab[i].checked_keys;
+      join_tab[i].checked_keys.clear_all();
+    }
+  }
+  memcpy((uchar*) save_to->best_positions, (uchar*) best_positions,
+         sizeof(POSITION) * (table_count + 1));
+  memset(best_positions, 0, sizeof(POSITION) * (table_count + 1));
+  
+  /* Save SJM nests */
+  List_iterator<TABLE_LIST> it(select_lex->sj_nests);
+  TABLE_LIST *tlist;
+  SJ_MATERIALIZATION_INFO **p_info= save_to->sj_mat_info;
+  while ((tlist= it++))
+  {
+    *(p_info++)= tlist->sj_mat_info;
+  }
+}
+
+
+/**
+  Reset a query execution plan so that it can be reoptimized in-place.
+*/
+void JOIN::reset_query_plan()
+{
+  for (uint i= 0; i < table_count; i++)
+  {
+    join_tab[i].keyuse= NULL;
+    join_tab[i].checked_keys.clear_all();
+  }
+}
+
+
+/**
+  Restore a query execution plan previously saved by the caller.
+
+  @param The object from which the current query plan state is restored.
+*/
+
+void JOIN::restore_query_plan(Join_plan_state *restore_from)
+{
+  if (restore_from->keyuse.elements)
+  {
+    DYNAMIC_ARRAY tmp_keyuse;
+    tmp_keyuse= keyuse;
+    keyuse= restore_from->keyuse;
+    restore_from->keyuse= tmp_keyuse;
+
+    for (uint i= 0; i < table_count; i++)
+    {
+      join_tab[i].keyuse= restore_from->join_tab_keyuse[i];
+      join_tab[i].checked_keys= restore_from->join_tab_checked_keys[i];
+    }
+
+  }
+  memcpy((uchar*) best_positions, (uchar*) restore_from->best_positions,
+         sizeof(POSITION) * (table_count + 1));
+  /* Restore SJM nests */
+  List_iterator<TABLE_LIST> it(select_lex->sj_nests);
+  TABLE_LIST *tlist;
+  SJ_MATERIALIZATION_INFO **p_info= restore_from->sj_mat_info;
+  while ((tlist= it++))
+  {
+    tlist->sj_mat_info= *(p_info++);
+  }
+}
+
+
+/**
+  Reoptimize a query plan taking into account an additional conjunct to the
+  WHERE clause.
+
+  @param added_where  An extra conjunct to the WHERE clause to reoptimize with
+  @param join_tables  The set of tables to reoptimize
+  @param save_to      If != NULL, save here the state of the current query plan,
+                      otherwise reuse the existing query plan structures.
+
+  @notes
+  Given a query plan that was already optimized taking into account some WHERE
+  clause 'C', reoptimize this plan with a new WHERE clause 'C AND added_where'.
+  The reoptimization works as follows:
+
+  1. Call update_ref_and_keys *only* for the new conditions 'added_where'
+     that are about to be injected into the query.
+  2. Expand if necessary the original KEYUSE array JOIN::keyuse to
+     accommodate the new REF accesses computed for the 'added_where' condition.
+  3. Add the new KEYUSEs into JOIN::keyuse.
+  4. Re-sort and re-filter the JOIN::keyuse array with the newly added
+     KEYUSE elements. 
+ 
+  @retval REOPT_NEW_PLAN  there is a new plan.
+  @retval REOPT_OLD_PLAN  no new improved plan was produced, use the old one.
+  @retval REOPT_ERROR     an irrecovarable error occured during reoptimization.
+*/
+
+JOIN::enum_reopt_result
+JOIN::reoptimize(Item *added_where, table_map join_tables,
+                 Join_plan_state *save_to)
+{
+  DYNAMIC_ARRAY added_keyuse;
+  SARGABLE_PARAM *sargables= 0; /* Used only as a dummy parameter. */
+  uint org_keyuse_elements;
+
+  /* Re-run the REF optimizer to take into account the new conditions. */
+  if (update_ref_and_keys(thd, &added_keyuse, join_tab, table_count, added_where,
+                          ~outer_join, select_lex, &sargables))
+  {
+    delete_dynamic(&added_keyuse);
+    return REOPT_ERROR;
+  }
+
+  if (!added_keyuse.elements)
+  {
+    delete_dynamic(&added_keyuse);
+    return REOPT_OLD_PLAN;
+  }
+
+  if (save_to)
+    save_query_plan(save_to);
+  else
+    reset_query_plan();
+
+  if (!keyuse.buffer &&
+      my_init_dynamic_array(&keyuse, sizeof(KEYUSE), 20, 64))
+  {
+    delete_dynamic(&added_keyuse);
+    return REOPT_ERROR;
+  }
+
+  org_keyuse_elements= save_to ? save_to->keyuse.elements : keyuse.elements;
+  allocate_dynamic(&keyuse, org_keyuse_elements + added_keyuse.elements);
+
+  /* If needed, add the access methods from the original query plan. */
+  if (save_to)
+  {
+    DBUG_ASSERT(!keyuse.elements);
+    memcpy(keyuse.buffer,
+           save_to->keyuse.buffer,
+           (size_t) save_to->keyuse.elements * keyuse.size_of_element);
+    keyuse.elements= save_to->keyuse.elements;
+  }
+
+  /* Add the new access methods to the keyuse array. */
+  memcpy(keyuse.buffer + keyuse.elements * keyuse.size_of_element,
+         added_keyuse.buffer,
+         (size_t) added_keyuse.elements * added_keyuse.size_of_element);
+  keyuse.elements+= added_keyuse.elements;
+  /* added_keyuse contents is copied, and it is no longer needed. */
+  delete_dynamic(&added_keyuse);
+
+  if (sort_and_filter_keyuse(thd, &keyuse, true))
+    return REOPT_ERROR;
+  optimize_keyuse(this, &keyuse);
+
+  if (optimize_semijoin_nests(this, join_tables))
+    return REOPT_ERROR;
+
+  /* Re-run the join optimizer to compute a new query plan. */
+  if (choose_plan(this, join_tables))
+    return REOPT_ERROR;
+
+  return REOPT_NEW_PLAN;
+}
+
+
 /**
   @} (end of group Query_Optimizer)
 */
diff --git a/sql/sql_select.h b/sql/sql_select.h
index 79c07e80b25..a5fa59a070a 100644
--- a/sql/sql_select.h
+++ b/sql/sql_select.h
@@ -15,6 +15,8 @@
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
 
 
+#ifndef SQL_SELECT_INCLUDED
+#define SQL_SELECT_INCLUDED
 /**
   @file
 
@@ -29,6 +31,21 @@
 #include "procedure.h"
 #include <myisam.h>
 
+#if defined(WITH_ARIA_STORAGE_ENGINE) && defined(USE_MARIA_FOR_TMP_TABLES)
+#include "../storage/maria/ha_maria.h"
+#define TMP_ENGINE_HTON maria_hton
+#else
+#define TMP_ENGINE_HTON myisam_hton
+#endif
+/* Values in optimize */
+#define KEY_OPTIMIZE_EXISTS		1
+#define KEY_OPTIMIZE_REF_OR_NULL	2
+#define KEY_OPTIMIZE_EQ	                4
+
+inline uint get_hash_join_key_no() { return MAX_KEY; }
+
+inline bool is_hash_join_key_no(uint key) { return key == MAX_KEY; }
+
 typedef struct keyuse_t {
   TABLE *table;
   Item	*val;				/**< or value if no field */
@@ -52,10 +69,21 @@ typedef struct keyuse_t {
     NULL  - Otherwise (the source equality can't be turned off)
   */
   bool *cond_guard;
+  /*
+     0..64    <=> This was created from semi-join IN-equality # sj_pred_no.
+     MAX_UINT  Otherwise
+  */
+  uint         sj_pred_no;
+
+  bool is_for_hash_join() { return is_hash_join_key_no(key); }
 } KEYUSE;
 
+#define NO_KEYPART ((uint)(-1))
+
 class store_key;
 
+const int NO_REF_PART= uint(-1);
+
 typedef struct st_table_ref
 {
   bool		key_err;
@@ -86,39 +114,33 @@ typedef struct st_table_ref
   */
   key_part_map  null_rejecting;
   table_map	depend_map;		  ///< Table depends on these tables.
+
   /* null byte position in the key_buf. Used for REF_OR_NULL optimization */
   uchar          *null_ref_key;
+  /* 
+    ref_or_null optimization: number of key part that alternates between
+    the lookup value or NULL (there's only one such part). 
+    If we're not using ref_or_null, the value is NO_REF_PART
+  */
+  uint           null_ref_part;
+
   /*
     The number of times the record associated with this key was used
     in the join.
   */
   ha_rows       use_count;
-} TABLE_REF;
-
 
+  /*
+    TRUE <=> disable the "cache" as doing lookup with the same key value may
+    produce different results (because of Index Condition Pushdown)
 
-#define CACHE_BLOB      1        /* blob field  */
-#define CACHE_STRIPPED  2        /* field stripped of trailing spaces */
-
-/**
-  CACHE_FIELD and JOIN_CACHE is used on full join to cache records in outer
-  table
-*/
-
-typedef struct st_cache_field {
-  uchar *str;
-  uint length, blob_length;
-  Field *field;
-  uint type;    /**< category of the of the copied field (CACHE_BLOB et al.) */
-} CACHE_FIELD;
-
+  */
+  bool          disable_cache;
 
-typedef struct st_join_cache {
-  uchar *buff,*pos,*end;
-  uint records,record_nr,ptr_record,fields,length,blobs;
-  CACHE_FIELD *field,**blob_ptr;
-  SQL_SELECT *select;
-} JOIN_CACHE;
+  bool tmp_table_index_lookup_init(THD *thd, KEY *tmp_key, Item_iterator &it,
+                                   bool value, uint skip= 0);
+  bool is_access_triggered();
+} TABLE_REF;
 
 
 /*
@@ -126,7 +148,8 @@ typedef struct st_join_cache {
 */
 enum join_type { JT_UNKNOWN,JT_SYSTEM,JT_CONST,JT_EQ_REF,JT_REF,JT_MAYBE_REF,
 		 JT_ALL, JT_RANGE, JT_NEXT, JT_FT, JT_REF_OR_NULL,
-		 JT_UNIQUE_SUBQUERY, JT_INDEX_SUBQUERY, JT_INDEX_MERGE};
+		 JT_UNIQUE_SUBQUERY, JT_INDEX_SUBQUERY, JT_INDEX_MERGE,
+                 JT_HASH, JT_HASH_RANGE, JT_HASH_NEXT, JT_HASH_INDEX_MERGE};
 
 class JOIN;
 
@@ -138,6 +161,17 @@ enum enum_nested_loop_state
 };
 
 
+/* Possible sj_strategy values */
+enum sj_strategy_enum
+{
+  SJ_OPT_NONE=0,
+  SJ_OPT_DUPS_WEEDOUT=1,
+  SJ_OPT_LOOSE_SCAN  =2,
+  SJ_OPT_FIRST_MATCH =3,
+  SJ_OPT_MATERIALIZE =4,
+  SJ_OPT_MATERIALIZE_SCAN=5
+};
+
 /* Values for JOIN_TAB::packed_info */
 #define TAB_INFO_HAVE_VALUE 1
 #define TAB_INFO_USING_INDEX 2
@@ -146,20 +180,52 @@ enum enum_nested_loop_state
 
 typedef enum_nested_loop_state
 (*Next_select_func)(JOIN *, struct st_join_table *, bool);
+
+/*
+  Function prototype for reading first record for a join tab
+
+  RETURN
+     0     - OK
+    -1     - Record not found
+    Other  - A fatal error
+*/
 typedef int (*Read_record_func)(struct st_join_table *tab);
+
 Next_select_func setup_end_select_func(JOIN *join);
+int rr_sequential(READ_RECORD *info);
+int rr_sequential_and_unpack(READ_RECORD *info);
 
 
+class JOIN_CACHE;
+class SJ_TMP_TABLE;
+class JOIN_TAB_RANGE;
+
 typedef struct st_join_table {
   st_join_table() {}                          /* Remove gcc warning */
   TABLE		*table;
   KEYUSE	*keyuse;			/**< pointer to first used key */
+  KEY           *hj_key;       /**< descriptor of the used best hash join key
+				    not supported by any index                 */
   SQL_SELECT	*select;
-  COND          *select_cond;
+  COND		*select_cond;
   COND          *on_precond;    /**< part of on condition to check before
 				     accessing the first inner table           */  
   QUICK_SELECT_I *quick;
-  Item	       **on_expr_ref;   /**< pointer to the associated on expression   */
+  /* 
+    The value of select_cond before we've attempted to do Index Condition
+    Pushdown. We may need to restore everything back if we first choose one
+    index but then reconsider (see test_if_skip_sort_order() for such
+    scenarios).
+    NULL means no index condition pushdown was performed.
+  */
+  Item          *pre_idx_push_select_cond;
+  /*
+    Pointer to the associated ON expression. on_expr_ref=!NULL except for
+    degenerate joins. 
+    *on_expr_ref!=NULL for tables that are first inner tables within an outer
+    join.
+  */
+  Item	       **on_expr_ref;
   COND_EQUAL    *cond_equal;    /**< multiple equalities for the on expression */
   st_join_table *first_inner;   /**< first inner table for including outerjoin */
   bool           found;         /**< true after all matches or null complement */
@@ -167,6 +233,21 @@ typedef struct st_join_table {
   st_join_table *last_inner;    /**< last table table for embedding outer join */
   st_join_table *first_upper;  /**< first inner table for embedding outer join */
   st_join_table *first_unmatched; /**< used for optimization purposes only     */
+
+  /*
+    For join tabs that are inside an SJM bush: root of the bush
+  */
+  st_join_table *bush_root_tab;
+
+  /* TRUE <=> This join_tab is inside an SJM bush and is the last leaf tab here */
+  bool          last_leaf_in_bush;
+  
+  /*
+    ptr  - this is a bush, and ptr points to description of child join_tab
+           range
+    NULL - this join tab has no bush children
+  */
+  JOIN_TAB_RANGE *bush_children;
   
   /* Special content for EXPLAIN 'Extra' column or NULL if none */
   const char	*info;
@@ -204,12 +285,34 @@ typedef struct st_join_table {
     method (but not 'index' for some reason), i.e. this matches method which
     E(#records) is in found_records.
   */
-  ha_rows       read_time;
+  double        read_time;
+  
+  /* psergey-todo: make the below have type double, like POSITION::records_read? */
+  ha_rows       records_read;
   
+  /* Startup cost for execution */
+  double        startup_cost;
+    
+  double        partial_join_cardinality;
+
   table_map	dependent,key_dependent;
-  uint		use_quick,index;
+  /*
+     1 - use quick select
+     2 - use "Range checked for each record"
+  */
+  uint		use_quick;
+  /*
+    Index to use. Note: this is valid only for 'index' access, but not range or
+    ref access.
+  */
+  uint          index;
   uint		status;				///< Save status for cache
-  uint		used_fields,used_fieldlength,used_blobs;
+  uint		used_fields;
+  ulong         used_fieldlength;
+  ulong         max_used_fieldlength;
+  uint          used_blobs;
+  uint          used_null_fields;
+  uint          used_uneven_bit_fields;
   enum join_type type;
   bool		cached_eq_ref_table,eq_ref_table,not_used_in_distinct;
   bool		sorted;
@@ -220,11 +323,94 @@ typedef struct st_join_table {
   */ 
   ha_rows       limit; 
   TABLE_REF	ref;
-  JOIN_CACHE	cache;
+  /* TRUE <=> condition pushdown supports other tables presence */
+  bool          icp_other_tables_ok;
+  /* 
+    TRUE <=> condition pushed to the index has to be factored out of
+    the condition pushed to the table
+  */
+  bool          idx_cond_fact_out;
+  bool          use_join_cache;
+  uint          used_join_cache_level;
+  ulong         join_buffer_size_limit;
+  JOIN_CACHE	*cache;
+  /*
+    Index condition for BKA access join
+  */
+  Item          *cache_idx_cond;
+  SQL_SELECT    *cache_select;
   JOIN		*join;
-  /** Bitmap of nested joins this table is part of */
+  /*
+    Embedding SJ-nest (may be not the direct parent), or NULL if none.
+    This variable holds the result of table pullout.
+  */
+  TABLE_LIST    *emb_sj_nest;
+
+  /* FirstMatch variables (final QEP) */
+  struct st_join_table *first_sj_inner_tab;
+  struct st_join_table *last_sj_inner_tab;
+
+  /* Variables for semi-join duplicate elimination */
+  SJ_TMP_TABLE  *flush_weedout_table;
+  SJ_TMP_TABLE  *check_weed_out_table;
+  /* for EXPLAIN only: */
+  SJ_TMP_TABLE  *first_weedout_table;
+  
+  /*
+    If set, means we should stop join enumeration after we've got the first
+    match and return to the specified join tab. May point to
+    join->join_tab[-1] which means stop join execution after the first
+    match.
+  */
+  struct st_join_table  *do_firstmatch;
+ 
+  /* 
+     ptr  - We're doing a LooseScan, this join tab is the first (i.e. 
+            "driving") join tab), and ptr points to the last join tab
+            handled by the strategy. loosescan_match_tab->found_match
+            should be checked to see if the current value group had a match.
+     NULL - Not doing a loose scan on this join tab.
+  */
+  struct st_join_table *loosescan_match_tab;
+  
+  /* TRUE <=> we are inside LooseScan range */
+  bool inside_loosescan_range;
+
+  /* Buffer to save index tuple to be able to skip duplicates */
+  uchar *loosescan_buf;
+  
+  /* 
+    Index used by LooseScan (we store it here separately because ref access
+    stores it in tab->ref.key, while range scan stores it in tab->index, etc)
+  */
+  uint loosescan_key;
+
+  /* Length of key tuple (depends on #keyparts used) to store in the above */
+  uint loosescan_key_len;
+
+  /* Used by LooseScan. TRUE<=> there has been a matching record combination */
+  bool found_match;
+  
+  /*
+    Used by DuplicateElimination. tab->table->ref must have the rowid
+    whenever we have a current record.
+  */
+  int  keep_current_rowid;
+
+  /* NestedOuterJoins: Bitmap of nested joins this table is part of */
   nested_join_map embedding_map;
 
+  /*
+    Semi-join strategy to be used for this join table. This is a copy of
+    POSITION::sj_strategy field. This field is set up by the
+    fix_semijoin_strategies_for_picked_join_order.
+  */
+  enum sj_strategy_enum sj_strategy;
+
+  uint n_sj_tables;
+
+  bool preread_init_done;
+
   void cleanup();
   inline bool is_using_loose_index_scan()
   {
@@ -232,12 +418,325 @@ typedef struct st_join_table {
             (select->quick->get_type() ==
              QUICK_SELECT_I::QS_TYPE_GROUP_MIN_MAX));
   }
+  bool is_inner_table_of_semi_join_with_first_match()
+  {
+    return first_sj_inner_tab != NULL;
+  }
+  bool is_inner_table_of_semijoin()
+  {
+    return emb_sj_nest != NULL;
+  }
+  bool is_inner_table_of_outer_join()
+  {
+    return first_inner != NULL;
+  }
+  bool is_single_inner_of_semi_join_with_first_match()
+  {
+    return first_sj_inner_tab == this && last_sj_inner_tab == this;            
+  }
+  bool is_single_inner_of_outer_join()
+  {
+    return first_inner == this && first_inner->last_inner == this;
+  }
+  bool is_first_inner_for_outer_join()
+  {
+    return first_inner && first_inner == this;
+  }
+  bool use_match_flag()
+  {
+    return is_first_inner_for_outer_join() || first_sj_inner_tab == this ; 
+  }
+  bool check_only_first_match()
+  {
+    return is_inner_table_of_semi_join_with_first_match() ||
+           (is_inner_table_of_outer_join() &&
+            table->reginfo.not_exists_optimize);
+  }
+  bool is_last_inner_table()
+  {
+    return (first_inner && first_inner->last_inner == this) ||
+           last_sj_inner_tab == this;
+  }
+  /*
+    Check whether the table belongs to a nest of inner tables of an
+    outer join or to a nest of inner tables of a semi-join
+  */
+  bool is_nested_inner()
+  {
+    if (first_inner && 
+        (first_inner != first_inner->last_inner || first_inner->first_upper))
+      return TRUE;
+    if (first_sj_inner_tab && first_sj_inner_tab != last_sj_inner_tab)
+      return TRUE;
+    return FALSE;
+  }
+  struct st_join_table *get_first_inner_table()
+  {
+    if (first_inner)
+      return first_inner;
+    return first_sj_inner_tab; 
+  }
+  void set_select_cond(COND *to, uint line)
+  {
+    DBUG_PRINT("info", ("select_cond changes %p -> %p at line %u tab %p",
+                        select_cond, to, line, this));
+    select_cond= to;
+  }
+  COND *set_cond(COND *new_cond)
+  {
+    COND *tmp_select_cond= select_cond;
+    set_select_cond(new_cond, __LINE__);
+    if (select)
+      select->cond= new_cond;
+    return tmp_select_cond;
+  }
+  void calc_used_field_length(bool max_fl);
+  ulong get_used_fieldlength()
+  {
+    if (!used_fieldlength)
+      calc_used_field_length(FALSE);
+    return used_fieldlength;
+  }
+  ulong get_max_used_fieldlength()
+  {
+    if (!max_used_fieldlength)
+      calc_used_field_length(TRUE);
+    return max_used_fieldlength;
+  }
+  double get_partial_join_cardinality() { return partial_join_cardinality; }
+  bool hash_join_is_possible();
+  int make_scan_filter();
+  bool is_ref_for_hash_join() { return is_hash_join_key_no(ref.key); }
+  KEY *get_keyinfo_by_key_no(uint key) 
+  {
+    return (is_hash_join_key_no(key) ? hj_key : table->key_info+key);
+  }
+  double scan_time();
+  bool preread_init();
+
+  bool is_sjm_nest() { return test(bush_children); }
 } JOIN_TAB;
 
+
+#include "sql_join_cache.h"
+
 enum_nested_loop_state sub_select_cache(JOIN *join, JOIN_TAB *join_tab, bool
                                         end_of_records);
 enum_nested_loop_state sub_select(JOIN *join,JOIN_TAB *join_tab, bool
                                   end_of_records);
+enum_nested_loop_state
+end_send_group(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
+	       bool end_of_records);
+enum_nested_loop_state
+end_write_group(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
+		bool end_of_records);
+
+
+struct st_position;
+
+class Semi_join_strategy_picker
+{
+public:
+  /* Called when starting to build a new join prefix */
+  virtual void set_empty() = 0;
+
+  /* 
+    Update internal state after another table has been added to the join
+    prefix
+  */
+  virtual void set_from_prev(struct st_position *prev) = 0;
+  
+  virtual bool check_qep(JOIN *join,
+                         uint idx,
+                         table_map remaining_tables, 
+                         const JOIN_TAB *new_join_tab,
+                         double *record_count,
+                         double *read_time,
+                         table_map *handled_fanout,
+                         sj_strategy_enum *strategy,
+                         struct st_position *loose_scan_pos) = 0;
+
+  virtual void mark_used() = 0;
+
+  virtual ~Semi_join_strategy_picker() {} 
+};
+
+
+/*
+  Duplicate Weedout strategy optimization state
+*/
+
+class Duplicate_weedout_picker : public Semi_join_strategy_picker
+{
+  /* The first table that the strategy will need to handle */
+  uint  first_dupsweedout_table;
+
+  /*
+    Tables that we will need to have in the prefix to do the weedout step
+    (all inner and all outer that the involved semi-joins are correlated with)
+  */
+  table_map dupsweedout_tables;
+  
+  bool is_used;
+public:
+  void set_empty()
+  {
+    dupsweedout_tables= 0;
+    first_dupsweedout_table= MAX_TABLES;
+    is_used= FALSE;
+  }
+  void set_from_prev(struct st_position *prev);
+  
+  bool check_qep(JOIN *join,
+                 uint idx,
+                 table_map remaining_tables, 
+                 const JOIN_TAB *new_join_tab,
+                 double *record_count,
+                 double *read_time,
+                 table_map *handled_fanout,
+                 sj_strategy_enum *stratey,
+                 struct st_position *loose_scan_pos);
+
+  void mark_used() { is_used= TRUE; }
+  friend void fix_semijoin_strategies_for_picked_join_order(JOIN *join);
+};
+
+
+class Firstmatch_picker : public Semi_join_strategy_picker
+{
+  /*
+    Index of the first inner table that we intend to handle with this
+    strategy
+  */
+  uint first_firstmatch_table;
+  /*
+    Tables that were not in the join prefix when we've started considering 
+    FirstMatch strategy.
+  */
+  table_map first_firstmatch_rtbl;
+  /* 
+    Tables that need to be in the prefix before we can calculate the cost
+    of using FirstMatch strategy.
+   */
+  table_map firstmatch_need_tables;
+
+  bool is_used;
+
+  bool in_firstmatch_prefix() { return (first_firstmatch_table != MAX_TABLES); }
+  void invalidate_firstmatch_prefix() { first_firstmatch_table= MAX_TABLES; }
+public:
+  void set_empty()
+  {
+    invalidate_firstmatch_prefix();
+    is_used= FALSE;
+  }
+
+  void set_from_prev(struct st_position *prev);
+  bool check_qep(JOIN *join,
+                 uint idx,
+                 table_map remaining_tables, 
+                 const JOIN_TAB *new_join_tab,
+                 double *record_count,
+                 double *read_time,
+                 table_map *handled_fanout,
+                 sj_strategy_enum *strategy,
+                 struct st_position *loose_scan_pos);
+
+  void mark_used() { is_used= TRUE; }
+  friend void fix_semijoin_strategies_for_picked_join_order(JOIN *join);
+};
+
+
+class LooseScan_picker : public Semi_join_strategy_picker
+{
+  /* The first (i.e. driving) table we're doing loose scan for */
+  uint        first_loosescan_table;
+  /* 
+     Tables that need to be in the prefix before we can calculate the cost
+     of using LooseScan strategy.
+  */
+  table_map   loosescan_need_tables;
+
+  /*
+    keyno  -  Planning to do LooseScan on this key. If keyuse is NULL then 
+              this is a full index scan, otherwise this is a ref+loosescan
+              scan (and keyno matches the KEUSE's)
+    MAX_KEY - Not doing a LooseScan
+  */
+  uint loosescan_key;  // final (one for strategy instance )
+  uint loosescan_parts; /* Number of keyparts to be kept distinct */
+  
+  bool is_used;
+public:
+  void set_empty()
+  {
+    first_loosescan_table= MAX_TABLES; 
+    is_used= FALSE;
+  }
+
+  void set_from_prev(struct st_position *prev);
+  bool check_qep(JOIN *join,
+                 uint idx,
+                 table_map remaining_tables, 
+                 const JOIN_TAB *new_join_tab,
+                 double *record_count,
+                 double *read_time,
+                 table_map *handled_fanout,
+                 sj_strategy_enum *strategy,
+                 struct st_position *loose_scan_pos);
+  void mark_used() { is_used= TRUE; }
+
+  friend class Loose_scan_opt;
+  friend void best_access_path(JOIN      *join,
+                               JOIN_TAB  *s,
+                               table_map remaining_tables,
+                               uint      idx,
+                               bool      disable_jbuf,
+                               double    record_count,
+                               struct st_position *pos,
+                               struct st_position *loose_scan_pos);
+  friend bool get_best_combination(JOIN *join);
+  friend int setup_semijoin_dups_elimination(JOIN *join, ulonglong options,
+                                             uint no_jbuf_after);
+  friend void fix_semijoin_strategies_for_picked_join_order(JOIN *join);
+};
+
+
+class Sj_materialization_picker : public Semi_join_strategy_picker
+{
+  bool is_used;
+
+  /* The last inner table (valid once we're after it) */
+  uint      sjm_scan_last_inner;
+  /*
+    Tables that we need to have in the prefix to calculate the correct cost.
+    Basically, we need all inner tables and outer tables mentioned in the
+    semi-join's ON expression so we can correctly account for fanout.
+  */
+  table_map sjm_scan_need_tables;
+
+public:
+  void set_empty()
+  {
+    sjm_scan_need_tables= 0;
+    LINT_INIT(sjm_scan_last_inner);
+    is_used= FALSE;
+  }
+  void set_from_prev(struct st_position *prev);
+  bool check_qep(JOIN *join,
+                 uint idx,
+                 table_map remaining_tables, 
+                 const JOIN_TAB *new_join_tab,
+                 double *record_count,
+                 double *read_time,
+                 table_map *handled_fanout,
+                 sj_strategy_enum *strategy,
+                 struct st_position *loose_scan_pos);
+  void mark_used() { is_used= TRUE; }
+
+  friend void fix_semijoin_strategies_for_picked_join_order(JOIN *join);
+};
+
 
 /**
   Information about a position of table within a join order. Used in join
@@ -245,6 +744,9 @@ enum_nested_loop_state sub_select(JOIN *join,JOIN_TAB *join_tab, bool
 */
 typedef struct st_position
 {
+  /* The table that's put into join order */
+  JOIN_TAB *table;
+
   /*
     The "fanout": number of output rows that will be produced (after
     pushed down selection condition is applied) per each row combination of
@@ -258,7 +760,10 @@ typedef struct st_position
     number the access method will be invoked.
   */
   double read_time;
-  JOIN_TAB *table;
+
+  /* Cumulative cost and record count for the join prefix */
+  COST_VECT prefix_cost;
+  double    prefix_record_count;
 
   /*
     NULL  -  'index' or 'range' or 'index_merge' or 'ALL' access is used.
@@ -268,8 +773,43 @@ typedef struct st_position
 
   /* If ref-based access is used: bitmap of tables this table depends on  */
   table_map ref_depend_map;
-} POSITION;
+ 
+  /*
+    TRUE <=> join buffering will be used. At the moment this is based on 
+    *very* imprecise guesses made in best_access_path(). 
+  */
+  bool use_join_buffer;
+ 
+  /*
+    Current optimization state: Semi-join strategy to be used for this
+    and preceding join tables.
+    
+    Join optimizer sets this for the *last* join_tab in the
+    duplicate-generating range. That is, in order to interpret this field, 
+    one needs to traverse join->[best_]positions array from right to left.
+    When you see a join table with sj_strategy!= SJ_OPT_NONE, some other
+    field (depending on the strategy) tells how many preceding positions 
+    this applies to. The values of covered_preceding_positions->sj_strategy
+    must be ignored.
+  */
+  enum sj_strategy_enum sj_strategy;
+  
+  /*
+    Valid only after fix_semijoin_strategies_for_picked_join_order() call:
+    if sj_strategy!=SJ_OPT_NONE, this is the number of subsequent tables that
+    are covered by the specified semi-join strategy
+  */
+  uint n_sj_tables;
+
+  table_map prefix_dups_producing_tables;
 
+  table_map inner_tables_handled_with_other_sjs;
+   
+  Duplicate_weedout_picker  dups_weedout_picker;
+  Firstmatch_picker         firstmatch_picker;
+  LooseScan_picker          loosescan_picker;
+  Sj_materialization_picker sjmat_picker;
+} POSITION;
 
 typedef struct st_rollup
 {
@@ -281,17 +821,103 @@ typedef struct st_rollup
 } ROLLUP;
 
 
+class JOIN_TAB_RANGE: public Sql_alloc
+{
+public:
+  JOIN_TAB *start;
+  JOIN_TAB *end;
+};
+
+
 class JOIN :public Sql_alloc
 {
+private:
   JOIN(const JOIN &rhs);                        /**< not implemented */
   JOIN& operator=(const JOIN &rhs);             /**< not implemented */
+
+protected:
+
+  /**
+    The subset of the state of a JOIN that represents an optimized query
+    execution plan. Allows saving/restoring different JOIN plans for the same
+    query.
+  */
+  class Join_plan_state {
+  public:
+    DYNAMIC_ARRAY keyuse; /* Copy of the JOIN::keyuse array. */
+    POSITION best_positions[MAX_TABLES+1]; /* Copy of JOIN::best_positions */
+    /* Copies of the JOIN_TAB::keyuse pointers for each JOIN_TAB. */
+    KEYUSE *join_tab_keyuse[MAX_TABLES];
+    /* Copies of JOIN_TAB::checked_keys for each JOIN_TAB. */
+    key_map join_tab_checked_keys[MAX_TABLES];
+    SJ_MATERIALIZATION_INFO *sj_mat_info[MAX_TABLES];
+  public:
+    Join_plan_state()
+    {   
+      keyuse.elements= 0;
+      keyuse.buffer= NULL;
+    }
+    Join_plan_state(JOIN *join);
+    ~Join_plan_state()
+    {
+      delete_dynamic(&keyuse);
+    }
+  };
+
+  /* Results of reoptimizing a JOIN via JOIN::reoptimize(). */
+  enum enum_reopt_result {
+    REOPT_NEW_PLAN, /* there is a new reoptimized plan */
+    REOPT_OLD_PLAN, /* no new improved plan can be found, use the old one */
+    REOPT_ERROR,    /* an irrecovarable error occured during reoptimization */
+    REOPT_NONE      /* not yet reoptimized */
+  };
+
+  /* Support for plan reoptimization with rewritten conditions. */
+  enum_reopt_result reoptimize(Item *added_where, table_map join_tables,
+                               Join_plan_state *save_to);
+  void save_query_plan(Join_plan_state *save_to);
+  void reset_query_plan();
+  void restore_query_plan(Join_plan_state *restore_from);
+  /* Choose a subquery plan for a table-less subquery. */
+  bool choose_tableless_subquery_plan();
+
 public:
-  JOIN_TAB *join_tab,**best_ref;
+  JOIN_TAB *join_tab, **best_ref;
   JOIN_TAB **map2table;    ///< mapping between table indexes and JOIN_TABs
   JOIN_TAB *join_tab_save; ///< saved join_tab for subquery reexecution
-  TABLE    **table,**all_tables,*sort_by_table;
-  uint	   tables,const_tables;
+
+  List<JOIN_TAB_RANGE> join_tab_ranges;
+  
+  /*
+    Base tables participating in the join. After join optimization is done, the
+    tables are stored in the join order (but the only really important part is 
+    that const tables are first).
+  */
+  TABLE    **table;
+  /**
+    The table which has an index that allows to produce the requried ordering.
+    A special value of 0x1 means that the ordering will be produced by
+    passing 1st non-const table to filesort(). NULL means no such table exists.
+  */
+  TABLE    *sort_by_table;
+  /* 
+    Number of tables in the join. 
+    (In MySQL, it is named 'tables' and is also the number of elements in 
+     join->join_tab array. In MariaDB, the latter is not true, so we've renamed
+     the variable)
+  */
+  uint	   table_count;
+  uint     outer_tables;  /**< Number of tables that are not inside semijoin */
+  uint     const_tables;
+  /* 
+    Number of tables in the top join_tab array. Normally this matches
+    (join_tab_ranges.head()->end - join_tab_ranges.head()->start). 
+    
+    We keep it here so that it is saved/restored with JOIN::restore_tmp.
+  */
+  uint     top_join_tab_count;
   uint	   send_group_parts;
+  bool	   group;          /**< If query contains GROUP BY clause */
   /**
     Indicates that grouping will be performed on the result set during
     query execution. This field belongs to query execution.
@@ -299,7 +925,8 @@ public:
     @see make_group_fields, alloc_group_fields, JOIN::exec
   */
   bool     sort_and_group; 
-  bool     first_record,full_join,group, no_field_update;
+  bool     first_record,full_join, no_field_update;
+  bool     hash_join;
   bool	   do_send_rows;
   /**
     TRUE when we want to resume nested loop iterations when
@@ -316,9 +943,12 @@ public:
   /* Tables removed by table elimination. Set to 0 before the elimination. */
   table_map eliminated_tables;
   /*
-     Bitmap of all inner tables from outer joins
+     Bitmap of all inner tables from outer joins (set at start of
+     make_join_statistics)
   */
   table_map outer_join;
+  /* Bitmap of tables used in the select list items */
+  table_map select_list_used_tables;
   ha_rows  send_records,found_records,examined_rows,row_limit, select_limit;
   /**
     Used to fetch no more than given amount of rows per one
@@ -330,15 +960,61 @@ public:
       - on each fetch iteration we add num_rows to fetch to fetch_limit
   */
   ha_rows  fetch_limit;
-  POSITION positions[MAX_TABLES+1],best_positions[MAX_TABLES+1];
+  /* Finally picked QEP. This is result of join optimization */
+  POSITION best_positions[MAX_TABLES+1];
+
+/******* Join optimization state members start *******/
+  /*
+    pointer - we're doing optimization for a semi-join materialization nest.
+    NULL    - otherwise
+  */
+  TABLE_LIST *emb_sjm_nest;
+  
+  /* Current join optimization state */
+  POSITION positions[MAX_TABLES+1];
   
-  /* *
+  /*
     Bitmap of nested joins embedding the position at the end of the current 
     partial join (valid only during join optimizer run).
   */
   nested_join_map cur_embedding_map;
+  
+  /*
+    Bitmap of inner tables of semi-join nests that have a proper subset of
+    their tables in the current join prefix. That is, of those semi-join
+    nests that have their tables both in and outside of the join prefix.
+  */
+  table_map cur_sj_inner_tables;
+  
+  /*
+    Bitmap of semi-join inner tables that are in the join prefix and for
+    which there's no provision for how to eliminate semi-join duplicates
+    they produce.
+  */
+  table_map cur_dups_producing_tables;
+  
+  /* We also maintain a stack of join optimization states in * join->positions[] */
+/******* Join optimization state members end *******/
 
+  /*
+    Tables within complex firstmatch ranges (i.e. those where inner tables are
+    interleaved with outer tables). Join buffering cannot be used for these.
+  */
+  table_map complex_firstmatch_tables;
+
+  /*
+    The cost of best complete join plan found so far during optimization,
+    after optimization phase - cost of picked join order (not taking into
+    account the changes made by test_if_skip_sort_order()).
+  */
   double   best_read;
+  /*
+    Estimated result rows (fanout) of the join operation. If this is a subquery
+    that is reexecuted multiple times, this value includes the estiamted # of
+    reexecutions. This value is equal to the multiplication of all
+    join->positions[i].records_read of a JOIN.
+  */
+  double   record_count;
   List<Item> *fields;
   List<Cached_item> group_fields, group_fields_cache;
   TABLE    *tmp_table;
@@ -353,6 +1029,15 @@ public:
   Item      *tmp_having; ///< To store having when processed temporary table
   Item      *having_history; ///< Store having for explain
   ulonglong  select_options;
+  /* 
+    Bitmap of allowed types of the join caches that
+    can be used for join operations
+  */
+  uint allowed_join_cache_types;
+  bool allowed_semijoin_with_cache;
+  bool allowed_outer_join_with_cache;
+  /* Maximum level of the join caches that can be used for join operations */ 
+  uint max_allowed_join_cache_level;
   select_result *result;
   TMP_TABLE_PARAM tmp_table_param;
   MYSQL_LOCK *lock;
@@ -438,9 +1123,25 @@ public:
   ORDER *order, *group_list, *proc_param; //hold parameters of mysql_select
   COND *conds;                            // ---"---
   Item *conds_history;                    // store WHERE for explain
+  COND *outer_ref_cond;       ///<part of conds containing only outer references
+  COND *pseudo_bits_cond;     // part of conds containing special bita
   TABLE_LIST *tables_list;           ///<hold 'tables' parameter of mysql_select
   List<TABLE_LIST> *join_list;       ///< list of joined tables in reverse order
   COND_EQUAL *cond_equal;
+  COND_EQUAL *having_equal;
+  /*
+    Constant codition computed during optimization, but evaluated during
+    join execution. Typically expensive conditions that should not be
+    evaluated at optimization time.
+  */
+  Item *exec_const_cond;
+  /*
+    Constant ORDER and/or GROUP expressions that contain subqueries. Such
+    expressions need to evaluated to verify that the subquery indeed
+    returns a single row. The evaluation of such expressions is delayed
+    until query execution.
+  */
+  List<Item> exec_const_order_group_cond;
   SQL_SELECT *select;                ///<created in optimisation phase
   JOIN_TAB *return_tab;              ///<used only for outer joins
   Item **ref_pointer_array; ///<used pointer reference for this select
@@ -451,6 +1152,19 @@ public:
   
   bool union_part; ///< this subselect is part of union 
   bool optimized; ///< flag to avoid double optimization in EXPLAIN
+  bool initialized; ///< flag to avoid double init_execution calls
+
+  /*
+    Additional WHERE and HAVING predicates to be considered for IN=>EXISTS
+    subquery transformation of a JOIN object.
+  */
+  Item *in_to_exists_where;
+  Item *in_to_exists_having;
+  
+  /* Temporary tables used to weed-out semi-join duplicates */
+  List<TABLE> sj_tmp_tables;
+  /* SJM nests that are executed with SJ-Materialization strategy */
+  List<SJ_MATERIALIZATION_INFO> sjm_info_list;
 
   /* 
     storage for caching buffers allocated during query execution. 
@@ -479,7 +1193,8 @@ public:
   {
     join_tab= join_tab_save= 0;
     table= 0;
-    tables= 0;
+    table_count= 0;
+    top_join_tab_count= 0;
     const_tables= 0;
     eliminated_tables= 0;
     join_list= 0;
@@ -520,7 +1235,10 @@ public:
     ref_pointer_array_size= 0;
     zero_result_cause= 0;
     optimized= 0;
+    initialized= 0;
     cond_equal= 0;
+    having_equal= 0;
+    exec_const_cond= 0;
     group_optimized_away= 0;
     no_rows_in_result_called= 0;
 
@@ -533,18 +1251,25 @@ public:
     rollup.state= ROLLUP::STATE_NONE;
 
     no_const_tables= FALSE;
+    outer_ref_cond= pseudo_bits_cond= NULL;
+    in_to_exists_where= NULL;
+    in_to_exists_having= NULL;
   }
 
   int prepare(Item ***rref_pointer_array, TABLE_LIST *tables, uint wind_num,
 	      COND *conds, uint og_num, ORDER *order, ORDER *group,
 	      Item *having, ORDER *proc_param, SELECT_LEX *select,
 	      SELECT_LEX_UNIT *unit);
+  bool prepare_stage2();
   int optimize();
   int reinit();
+  int init_execution();
   void exec();
   int destroy();
   void restore_tmp();
   bool alloc_func_list();
+  bool flatten_subqueries();
+  bool optimize_unflattened_subqueries();
   bool make_sum_func_list(List<Item> &all_fields, List<Item> &send_fields,
 			  bool before_group_by, bool recompute= FALSE);
 
@@ -566,7 +1291,6 @@ public:
 			  Item_sum ***func);
   int rollup_send_data(uint idx);
   int rollup_write_data(uint idx, TABLE *table);
-  void remove_subq_pushed_predicates(Item **where);
   /**
     Release memory and, if possible, the open tables held by this execution
     plan (and nested plans). It's used to release some tables before
@@ -581,8 +1305,8 @@ public:
   bool init_save_join_tab();
   bool send_row_on_empty_set()
   {
-    return (do_send_rows && tmp_table_param.sum_func_count != 0 &&
-	    !group_list && having_value != Item::COND_FALSE);
+    return (do_send_rows && implicit_grouping && !group_optimized_away &&
+            having_value != Item::COND_FALSE);
   }
   bool change_result(select_result *result);
   bool is_top_level_join() const
@@ -592,7 +1316,63 @@ public:
   }
   inline table_map all_tables_map()
   {
-    return (table_map(1) << tables) - 1;
+    return (table_map(1) << table_count) - 1;
+  }
+  void drop_unused_derived_keys();
+  inline void eval_select_list_used_tables();
+  /* 
+    Return the table for which an index scan can be used to satisfy 
+    the sort order needed by the ORDER BY/(implicit) GROUP BY clause 
+  */
+  JOIN_TAB *get_sort_by_join_tab()
+  {
+    return (need_tmp || !sort_by_table || skip_sort_order ||
+            ((group || tmp_table_param.sum_func_count) && !group_list)) ?
+              NULL : join_tab+const_tables;
+  }
+  bool setup_subquery_caches();
+  bool shrink_join_buffers(JOIN_TAB *jt, 
+                           ulonglong curr_space,
+                           ulonglong needed_space);
+  void set_allowed_join_cache_types();
+  bool is_allowed_hash_join_access()
+  { 
+    return test(allowed_join_cache_types & JOIN_CACHE_HASHED_BIT) &&
+           max_allowed_join_cache_level > JOIN_CACHE_HASHED_BIT;
+  }
+  /*
+    Check if we need to create a temporary table.
+    This has to be done if all tables are not already read (const tables)
+    and one of the following conditions holds:
+    - We are using DISTINCT (simple distinct's are already optimized away)
+    - We are using an ORDER BY or GROUP BY on fields not in the first table
+    - We are using different ORDER BY and GROUP BY orders
+    - The user wants us to buffer the result.
+    When the WITH ROLLUP modifier is present, we cannot skip temporary table
+    creation for the DISTINCT clause just because there are only const tables.
+  */
+  bool test_if_need_tmp_table()
+  {
+    return ((const_tables != table_count &&
+	    ((select_distinct || !simple_order || !simple_group) ||
+	     (group_list && order) ||
+	     test(select_options & OPTION_BUFFER_RESULT))) ||
+            (rollup.state != ROLLUP::STATE_NONE && select_distinct));
+  }
+  bool choose_subquery_plan(table_map join_tables);
+  void get_partial_cost_and_fanout(int end_tab_idx,
+                                   table_map filter_map,
+                                   double *read_time_arg, 
+                                   double *record_count_arg);
+  void get_prefix_cost_and_fanout(uint n_tables, 
+                                  double *read_time_arg,
+                                  double *record_count_arg);
+  /* defined in opt_subselect.cc */
+  bool transform_max_min_subquery();
+  /* True if this JOIN is a subquery under an IN predicate. */
+  bool is_in_subquery()
+  {
+    return (unit->item && unit->item->is_in_predicate());
   }
 private:
   /**
@@ -604,6 +1384,15 @@ private:
   void cleanup_item_list(List<Item> &items) const;
 };
 
+enum enum_with_bush_roots { WITH_BUSH_ROOTS, WITHOUT_BUSH_ROOTS};
+enum enum_with_const_tables { WITH_CONST_TABLES, WITHOUT_CONST_TABLES};
+
+JOIN_TAB *first_linear_tab(JOIN *join, enum enum_with_const_tables const_tbls);
+JOIN_TAB *next_linear_tab(JOIN* join, JOIN_TAB* tab, 
+                          enum enum_with_bush_roots include_bush_roots);
+
+JOIN_TAB *first_top_level_tab(JOIN *join, enum enum_with_const_tables with_const);
+JOIN_TAB *next_top_level_tab(JOIN *join, JOIN_TAB *tab);
 
 typedef struct st_select_check {
   uint const_ref,reg_ref;
@@ -614,11 +1403,6 @@ void TEST_join(JOIN *join);
 
 /* Extern functions in sql_select.cc */
 bool store_val_in_field(Field *field, Item *val, enum_check_fields check_flag);
-TABLE *create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
-			ORDER *group, bool distinct, bool save_sum_fields,
-			ulonglong select_options, ha_rows rows_limit,
-			char* alias);
-void free_tmp_table(THD *thd, TABLE *entry);
 void count_field_types(SELECT_LEX *select_lex, TMP_TABLE_PARAM *param, 
                        List<Item> &fields, bool reset_with_sum_func);
 bool setup_copy_fields(THD *thd, TMP_TABLE_PARAM *param,
@@ -637,7 +1421,7 @@ Field* create_tmp_field_from_field(THD *thd, Field* org_field,
 /* functions from opt_sum.cc */
 bool simple_pred(Item_func *func_item, Item **args, bool *inv_order);
 int opt_sum_query(THD* thd,
-                  TABLE_LIST *tables, List<Item> &all_fields, COND *conds);
+                  List<TABLE_LIST> &tables, List<Item> &all_fields, COND *conds);
 
 /* from sql_delete.cc, used by opt_range.cc */
 extern "C" int refpos_order_cmp(void* arg, const void *a,const void *b);
@@ -649,6 +1433,7 @@ class store_key :public Sql_alloc
 public:
   bool null_key; /* TRUE <=> the value of the key has a null part */
   enum store_key_result { STORE_KEY_OK, STORE_KEY_FATAL, STORE_KEY_CONV };
+  enum Type { FIELD_STORE_KEY, ITEM_STORE_KEY, CONST_ITEM_STORE_KEY };
   store_key(THD *thd, Field *field_arg, uchar *ptr, uchar *null, uint length)
     :null_key(0), null_ptr(null), err(0)
   {
@@ -669,6 +1454,7 @@ public:
                                         ptr, null, 1);
   }
   virtual ~store_key() {}			/** Not actually needed */
+  virtual enum Type type() const=0;
   virtual const char *name() const=0;
   virtual bool store_key_is_const() { return false; }
 
@@ -721,15 +1507,32 @@ class store_key_field: public store_key
     {
       copy_field.set(to_field,from_field,0);
     }
-  }
+  }  
+
+  enum Type type() const { return FIELD_STORE_KEY; }
   const char *name() const { return field_name; }
 
+  void change_source_field(Item_field *fld_item)
+  {
+    copy_field.set(to_field, fld_item->field, 0);
+    field_name= fld_item->full_name();
+  }
+
  protected: 
   enum store_key_result copy_inner()
   {
     TABLE *table= copy_field.to_field->table;
     my_bitmap_map *old_map= dbug_tmp_use_all_columns(table,
                                                      table->write_set);
+
+    /* 
+      It looks like the next statement is needed only for a simplified
+      hash function over key values used now in BNLH join.
+      When the implementation of this function will be replaced for a proper
+      full version this statement probably should be removed.
+    */  
+    bzero(copy_field.to_ptr,copy_field.to_length);
+
     copy_field.do_copy(&copy_field);
     dbug_tmp_restore_column_map(table->write_set, old_map);
     null_key= to_field->is_null();
@@ -742,13 +1545,20 @@ class store_key_item :public store_key
 {
  protected:
   Item *item;
+  /*
+    Flag that forces usage of save_val() method which save value of the
+    item instead of save_in_field() method which saves result.
+  */
+  bool use_value;
 public:
   store_key_item(THD *thd, Field *to_field_arg, uchar *ptr,
-                 uchar *null_ptr_arg, uint length, Item *item_arg)
+                 uchar *null_ptr_arg, uint length, Item *item_arg, bool val)
     :store_key(thd, to_field_arg, ptr,
 	       null_ptr_arg ? null_ptr_arg : item_arg->maybe_null ?
-	       &err : (uchar*) 0, length), item(item_arg)
+	       &err : (uchar*) 0, length), item(item_arg), use_value(val)
   {}
+
+  enum Type type() const { return ITEM_STORE_KEY; }
   const char *name() const { return "func"; }
 
  protected:  
@@ -757,7 +1567,20 @@ public:
     TABLE *table= to_field->table;
     my_bitmap_map *old_map= dbug_tmp_use_all_columns(table,
                                                      table->write_set);
-    int res= item->save_in_field(to_field, 1);
+    int res= FALSE;
+
+    /* 
+      It looks like the next statement is needed only for a simplified
+      hash function over key values used now in BNLH join.
+      When the implementation of this function will be replaced for a proper
+      full version this statement probably should be removed.
+    */  
+    to_field->reset();
+
+    if (use_value)
+      item->save_val(to_field);
+    else
+      res= item->save_in_field(to_field, 1);
     /*
      Item::save_in_field() may call Item::val_xxx(). And if this is a subquery
      we need to check for errors executing it and react accordingly
@@ -781,9 +1604,11 @@ public:
 		       Item *item_arg)
     :store_key_item(thd, to_field_arg,ptr,
 		    null_ptr_arg ? null_ptr_arg : item_arg->maybe_null ?
-		    &err : (uchar*) 0, length, item_arg), inited(0)
+		    &err : (uchar*) 0, length, item_arg, FALSE), inited(0)
   {
   }
+
+  enum Type type() const { return CONST_ITEM_STORE_KEY; }
   const char *name() const { return "const"; }
   bool store_key_is_const() { return true; }
 
@@ -794,6 +1619,9 @@ protected:
     if (!inited)
     {
       inited=1;
+      TABLE *table= to_field->table;
+      my_bitmap_map *old_map= dbug_tmp_use_all_columns(table,
+                                                       table->write_set);
       if ((res= item->save_in_field(to_field, 1)))
       {       
         if (!err)
@@ -805,6 +1633,7 @@ protected:
         */
       if (!err && to_field->table->in_use->is_error())
         err= 1; /* STORE_KEY_FATAL */
+      dbug_tmp_restore_column_map(table->write_set, old_map);
     }
     null_key= to_field->is_null() || item->null_value;
     return (err > 2 ? STORE_KEY_FATAL : (store_key_result) err);
@@ -816,12 +1645,60 @@ bool error_if_full_join(JOIN *join);
 int report_error(TABLE *table, int error);
 int safe_index_read(JOIN_TAB *tab);
 COND *remove_eq_conds(THD *thd, COND *cond, Item::cond_result *cond_value);
+int test_if_item_cache_changed(List<Cached_item> &list);
+int join_init_read_record(JOIN_TAB *tab);
+int join_read_record_no_init(JOIN_TAB *tab);
 void set_position(JOIN *join,uint idx,JOIN_TAB *table,KEYUSE *key);
+inline Item * and_items(Item* cond, Item *item)
+{
+  return (cond? (new Item_cond_and(cond, item)) : item);
+}
+bool choose_plan(JOIN *join, table_map join_tables);
+void optimize_wo_join_buffering(JOIN *join, uint first_tab, uint last_tab, 
+                                table_map last_remaining_tables, 
+                                bool first_alt, uint no_jbuf_before,
+                                double *outer_rec_count, double *reopt_cost);
+Item_equal *find_item_equal(COND_EQUAL *cond_equal, Field *field,
+                            bool *inherited_fl);
+bool test_if_ref(COND *root_cond, 
+                 Item_field *left_item,Item *right_item);
 
 inline bool optimizer_flag(THD *thd, uint flag)
 { 
   return (thd->variables.optimizer_switch & flag);
 }
 
+/* Table elimination entry point function */
 void eliminate_tables(JOIN *join);
 
+/* Index Condition Pushdown entry point function */
+void push_index_cond(JOIN_TAB *tab, uint keyno);
+
+/****************************************************************************
+  Temporary table support for SQL Runtime
+ ***************************************************************************/
+
+#define STRING_TOTAL_LENGTH_TO_PACK_ROWS 128
+#define AVG_STRING_LENGTH_TO_PACK_ROWS   64
+#define RATIO_TO_PACK_ROWS	       2
+#define MIN_STRING_LENGTH_TO_PACK_ROWS   10
+
+TABLE *create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
+			ORDER *group, bool distinct, bool save_sum_fields,
+			ulonglong select_options, ha_rows rows_limit,
+                        char* alias, bool do_not_open=FALSE);
+void free_tmp_table(THD *thd, TABLE *entry);
+bool create_internal_tmp_table_from_heap(THD *thd, TABLE *table,
+                                         ENGINE_COLUMNDEF *start_recinfo,
+                                         ENGINE_COLUMNDEF **recinfo, 
+                                         int error, bool ignore_last_dupp_key_error);
+bool create_internal_tmp_table(TABLE *table, KEY *keyinfo, 
+                               ENGINE_COLUMNDEF *start_recinfo,
+                               ENGINE_COLUMNDEF **recinfo, 
+                               ulonglong options);
+bool open_tmp_table(TABLE *table);
+void setup_tmp_table_column_bitmaps(TABLE *table, uchar *bitmaps);
+double prev_record_reads(POSITION *positions, uint idx, table_map found_ref);
+void fix_list_after_tbl_changes(SELECT_LEX *new_parent, List<TABLE_LIST> *tlist);
+
+#endif /* SQL_SELECT_INCLUDED */
diff --git a/sql/sql_show.cc b/sql/sql_show.cc
index 5bbc337c761..0e8b919db53 100644
--- a/sql/sql_show.cc
+++ b/sql/sql_show.cc
@@ -772,7 +772,8 @@ mysqld_show_create(THD *thd, TABLE_LIST *table_list)
   {
     Show_create_error_handler view_error_suppressor(thd, table_list);
     thd->push_internal_handler(&view_error_suppressor);
-    bool error= open_normal_and_derived_tables(thd, table_list, 0);
+    bool error= open_normal_and_derived_tables(thd, table_list, 0,
+                                               DT_PREPARE | DT_CREATE);
     thd->pop_internal_handler();
     if (error && (thd->killed || thd->main_da.is_error()))
       goto error;
@@ -949,7 +950,8 @@ mysqld_list_fields(THD *thd, TABLE_LIST *table_list, const char *wild)
   DBUG_ENTER("mysqld_list_fields");
   DBUG_PRINT("enter",("table: %s",table_list->table_name));
 
-  if (open_normal_and_derived_tables(thd, table_list, 0))
+  if (open_normal_and_derived_tables(thd, table_list, 0,
+                                     DT_PREPARE | DT_CREATE))
     DBUG_VOID_RETURN;
   table= table_list->table;
 
@@ -1765,7 +1767,7 @@ view_store_options(THD *thd, TABLE_LIST *table, String *buff)
 static void append_algorithm(TABLE_LIST *table, String *buff)
 {
   buff->append(STRING_WITH_LEN("ALGORITHM="));
-  switch ((int8)table->algorithm) {
+  switch ((int16)table->algorithm) {
   case VIEW_ALGORITHM_UNDEFINED:
     buff->append(STRING_WITH_LEN("UNDEFINED "));
     break;
@@ -1889,6 +1891,7 @@ public:
   uint   command;
   const char *user,*host,*db,*proc_info,*state_info;
   char *query;
+  double progress;
 };
 
 #ifdef HAVE_EXPLICIT_TEMPLATE_INSTANTIATION
@@ -1917,6 +1920,11 @@ void mysqld_list_processes(THD *thd,const char *user, bool verbose)
   field->maybe_null=1;
   field_list.push_back(field=new Item_empty_string("Info",max_query_length));
   field->maybe_null=1;
+  if (!thd->variables.old_mode)
+  {
+    field_list.push_back(field= new Item_float("Progress", 0.0, 3, 7));
+    field->maybe_null= 0;
+  }
   if (protocol->send_fields(&field_list,
                             Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF))
     DBUG_VOID_RETURN;
@@ -1956,12 +1964,10 @@ void mysqld_list_processes(THD *thd,const char *user, bool verbose)
         pthread_mutex_lock(&tmp->LOCK_thd_data);
         if ((mysys_var= tmp->mysys_var))
           pthread_mutex_lock(&mysys_var->mutex);
-        thd_info->proc_info= (char*) (tmp->killed != THD::NOT_KILLED &&
-                                      tmp->killed != THD::KILL_BAD_DATA ?
+        thd_info->proc_info= (char*) (tmp->killed >= KILL_QUERY ?
                                       "Killed" : 0);
 #ifndef EMBEDDED_LIBRARY
-        thd_info->state_info= (char*) (tmp->locked ? "Locked" :
-                                       tmp->net.reading_or_writing ?
+        thd_info->state_info= (char*) (tmp->net.reading_or_writing ?
                                        (tmp->net.reading_or_writing == 2 ?
                                         "Writing to net" :
                                         thd_info->command == COM_SLEEP ? "" :
@@ -1979,6 +1985,8 @@ void mysqld_list_processes(THD *thd,const char *user, bool verbose)
 
         thd_info->start_time= tmp->start_time;
         thd_info->query=0;
+        thd_info->progress= 0.0;
+
         /* Lock THD mutex that protects its data when looking at it. */
         pthread_mutex_lock(&tmp->LOCK_thd_data);
         if (tmp->query())
@@ -1986,6 +1994,20 @@ void mysqld_list_processes(THD *thd,const char *user, bool verbose)
           uint length= min(max_query_length, tmp->query_length());
           thd_info->query= (char*) thd->strmake(tmp->query(),length);
         }
+
+        /*
+          Progress report. We need to do this under a lock to ensure that all
+          is from the same stage.
+        */
+        if (tmp->progress.max_counter)
+        {
+          uint max_stage= max(tmp->progress.max_stage, 1);
+          thd_info->progress= (((tmp->progress.stage / (double) max_stage) +
+                                ((tmp->progress.counter /
+                                  (double) tmp->progress.max_counter) /
+                                 (double) max_stage)) *
+                               100.0);
+        }
         pthread_mutex_unlock(&tmp->LOCK_thd_data);
         thread_infos.append(thd_info);
       }
@@ -1995,6 +2017,9 @@ void mysqld_list_processes(THD *thd,const char *user, bool verbose)
 
   thread_info *thd_info;
   time_t now= my_time(0);
+  char buff[20];                                // For progress
+  String store_buffer(buff, sizeof(buff), system_charset_info);
+
   while ((thd_info=thread_infos.get()))
   {
     protocol->prepare_for_resend();
@@ -2012,6 +2037,8 @@ void mysqld_list_processes(THD *thd,const char *user, bool verbose)
       protocol->store_null();
     protocol->store(thd_info->state_info, system_charset_info);
     protocol->store(thd_info->query, system_charset_info);
+    if (!thd->variables.old_mode)
+      protocol->store(thd_info->progress, 3, &store_buffer);
     if (protocol->write())
       break; /* purecov: inspected */
   }
@@ -2024,7 +2051,7 @@ int fill_schema_processlist(THD* thd, TABLE_LIST* tables, COND* cond)
   TABLE *table= tables->table;
   CHARSET_INFO *cs= system_charset_info;
   char *user;
-  ulonglong unow= my_micro_time();
+  my_hrtime_t unow= my_hrtime();
   DBUG_ENTER("fill_process_list");
 
   user= thd->security_ctx->master_access & PROCESS_ACL ?
@@ -2042,6 +2069,7 @@ int fill_schema_processlist(THD* thd, TABLE_LIST* tables, COND* cond)
       Security_context *tmp_sctx= tmp->security_ctx;
       struct st_my_thread_var *mysys_var;
       const char *val;
+      ulonglong max_counter;
 
       if ((!tmp->vio_ok() && !tmp->system_thread) ||
           (user && (!tmp_sctx->user || strcmp(tmp_sctx->user, user))))
@@ -2076,20 +2104,20 @@ int fill_schema_processlist(THD* thd, TABLE_LIST* tables, COND* cond)
       if ((mysys_var= tmp->mysys_var))
         pthread_mutex_lock(&mysys_var->mutex);
       /* COMMAND */
-      if ((val= (char *) ((tmp->killed != THD::NOT_KILLED &&
-                           tmp->killed != THD::KILL_BAD_DATA ?
+      if ((val= (char *) ((tmp->killed >= KILL_QUERY ?
                            "Killed" : 0))))
         table->field[4]->store(val, strlen(val), cs);
       else
         table->field[4]->store(command_name[tmp->command].str,
                                command_name[tmp->command].length, cs);
       /* MYSQL_TIME */
-      const ulonglong utime= tmp->start_utime ? unow - tmp->start_utime : 0;
-      table->field[5]->store(utime / 1000000, TRUE);
+      const ulonglong utime= (tmp->start_time ?
+                              (unow.val - tmp->start_time * HRTIME_RESOLUTION -
+                               tmp->start_time_sec_part) : 0);
+      table->field[5]->store(utime / HRTIME_RESOLUTION, TRUE);
       /* STATE */
 #ifndef EMBEDDED_LIBRARY
-      val= (char*) (tmp->locked ? "Locked" :
-                    tmp->net.reading_or_writing ?
+      val= (char*) (tmp->net.reading_or_writing ?
                     (tmp->net.reading_or_writing == 2 ?
                      "Writing to net" :
                      tmp->command == COM_SLEEP ? "" :
@@ -2110,6 +2138,9 @@ int fill_schema_processlist(THD* thd, TABLE_LIST* tables, COND* cond)
       if (mysys_var)
         pthread_mutex_unlock(&mysys_var->mutex);
 
+      /* TIME_MS */
+      table->field[8]->store((double)(utime / (HRTIME_RESOLUTION / 1000.0)));
+
       /* INFO */
       /* Lock THD mutex that protects its data when looking at it. */
       pthread_mutex_lock(&tmp->LOCK_thd_data);
@@ -2120,10 +2151,19 @@ int fill_schema_processlist(THD* thd, TABLE_LIST* tables, COND* cond)
                                    tmp->query_length()), cs);
         table->field[7]->set_notnull();
       }
-      pthread_mutex_unlock(&tmp->LOCK_thd_data);
 
-      /* TIME_MS */
-      table->field[8]->store((double)(utime / 1000.0));
+      /*
+        Progress report. We need to do this under a lock to ensure that all
+        is from the same stage.
+      */
+      if ((max_counter= tmp->progress.max_counter))
+      {
+        table->field[9]->store((longlong) tmp->progress.stage + 1, 1);
+        table->field[10]->store((longlong) tmp->progress.max_stage, 1);
+        table->field[11]->store((double) tmp->progress.counter /
+                                (double) max_counter*100.0);
+      }
+      pthread_mutex_unlock(&tmp->LOCK_thd_data);
 
       if (schema_table_store_record(thd, table))
       {
@@ -2420,7 +2460,7 @@ static bool show_status_array(THD *thd, const char *wild,
           end= strmov(buff, *(my_bool*) value ? "ON" : "OFF");
           break;
         case SHOW_INT:
-          end= int10_to_str((long) *(uint32*) value, buff, 10);
+          end= int10_to_str((long) *(int*) value, buff, -10);
           break;
         case SHOW_HAVE:
         {
@@ -2837,11 +2877,12 @@ typedef struct st_lookup_field_values
 bool schema_table_store_record(THD *thd, TABLE *table)
 {
   int error;
-  if ((error= table->file->ha_write_row(table->record[0])))
+  if ((error= table->file->ha_write_tmp_row(table->record[0])))
   {
-    if (create_internal_tmp_table_from_heap(thd, table,
-                                table->pos_in_table_list->schema_table_param,
-                                error, 0))
+    TMP_TABLE_PARAM *param= table->pos_in_table_list->schema_table_param;
+    if (create_internal_tmp_table_from_heap(thd, table, param->start_recinfo, 
+                                            &param->recinfo, error, 0))
+
       return 1;
   }
   return 0;
@@ -3028,7 +3069,7 @@ bool uses_only_table_name_fields(Item *item, TABLE_LIST *table)
   else if (item->type() == Item::REF_ITEM)
     return uses_only_table_name_fields(item->real_item(), table);
 
-  if (item->type() == Item::SUBSELECT_ITEM && !item->const_item())
+  if (item->real_type() == Item::SUBSELECT_ITEM && !item->const_item())
     return 0;
 
   return 1;
@@ -3535,8 +3576,8 @@ fill_schema_table_by_open(THD *thd, bool is_show_fields_or_keys,
   */
   lex->sql_command= SQLCOM_SHOW_FIELDS;
   result= open_normal_and_derived_tables(thd, table_list,
-                                         MYSQL_LOCK_IGNORE_FLUSH);
-
+                                         MYSQL_LOCK_IGNORE_FLUSH,
+                                         DT_PREPARE | DT_CREATE);
   /*
     Restore old value of sql_command back as it is being looked at in
     process_table() function.
@@ -3974,8 +4015,14 @@ int get_all_tables(THD *thd, TABLE_LIST *tables, COND *cond)
             if (!(table_open_method & ~OPEN_FRM_ONLY) &&
                 !with_i_schema)
             {
-              if (!fill_schema_table_from_frm(thd, table, schema_table, db_name,
-                                              table_name, schema_table_idx))
+              my_bool res;
+              uint8 save_context_analysis_only= lex->context_analysis_only;
+              lex->context_analysis_only|= CONTEXT_ANALYSIS_ONLY_VIEW;
+              res= fill_schema_table_from_frm(thd, table, schema_table,
+                                              db_name,
+                                              table_name, schema_table_idx);
+              lex->context_analysis_only= save_context_analysis_only;
+              if (!res)
                 continue;
             }
 
@@ -4279,21 +4326,21 @@ static int get_schema_tables_record(THD *thd, TABLE_LIST *tables,
       {
         thd->variables.time_zone->gmt_sec_to_TIME(&time,
                                                   (my_time_t) file->stats.create_time);
-        table->field[14]->store_time(&time, MYSQL_TIMESTAMP_DATETIME);
+        table->field[14]->store_time(&time);
         table->field[14]->set_notnull();
       }
       if (file->stats.update_time)
       {
         thd->variables.time_zone->gmt_sec_to_TIME(&time,
                                                   (my_time_t) file->stats.update_time);
-        table->field[15]->store_time(&time, MYSQL_TIMESTAMP_DATETIME);
+        table->field[15]->store_time(&time);
         table->field[15]->set_notnull();
       }
       if (file->stats.check_time)
       {
         thd->variables.time_zone->gmt_sec_to_TIME(&time,
                                                   (my_time_t) file->stats.check_time);
-        table->field[16]->store_time(&time, MYSQL_TIMESTAMP_DATETIME);
+        table->field[16]->store_time(&time);
         table->field[16]->set_notnull();
       }
       if (file->ha_table_flags() & (HA_HAS_OLD_CHECKSUM | HA_HAS_NEW_CHECKSUM))
@@ -4399,7 +4446,7 @@ static int get_schema_column_record(THD *thd, TABLE_LIST *tables,
         end=strmov(end,grant_types.type_names[bitnr]);
       }
     }
-    table->field[17]->store(tmp+1,end == tmp ? 0 : (uint) (end-tmp-1), cs);
+    table->field[18]->store(tmp+1,end == tmp ? 0 : (uint) (end-tmp-1), cs);
 
 #endif
     table->field[1]->store(db_name->str, db_name->length, cs);
@@ -4408,7 +4455,7 @@ static int get_schema_column_record(THD *thd, TABLE_LIST *tables,
                            cs);
     table->field[4]->store((longlong) count, TRUE);
     field->sql_type(type);
-    table->field[14]->store(type.ptr(), type.length(), cs);
+    table->field[15]->store(type.ptr(), type.length(), cs);
     /*
       MySQL column type has the following format:
       base_type [(dimension)] [unsigned] [zerofill].
@@ -4455,6 +4502,7 @@ static int get_schema_column_record(THD *thd, TABLE_LIST *tables,
       They are set to -1 if they should not be set (we should return NULL)
     */
 
+    field_length= -1;
     decimals= field->decimals();
     switch (field->type()) {
     case MYSQL_TYPE_NEWDECIMAL:
@@ -4483,8 +4531,13 @@ static int get_schema_column_record(THD *thd, TABLE_LIST *tables,
       if (decimals == NOT_FIXED_DEC)
         decimals= -1;                           // return NULL
     break;
+    case MYSQL_TYPE_TIME:
+    case MYSQL_TYPE_TIMESTAMP:
+    case MYSQL_TYPE_DATETIME:
+      table->field[12]->store((longlong) field->decimals(), TRUE);
+      table->field[12]->set_notnull();
+      break;
     default:
-      field_length= decimals= -1;
       break;
     }
 
@@ -4492,40 +4545,44 @@ static int get_schema_column_record(THD *thd, TABLE_LIST *tables,
     {
       table->field[10]->store((longlong) field_length, TRUE);
       table->field[10]->set_notnull();
-    }
-    if (decimals >= 0)
-    {
-      table->field[11]->store((longlong) decimals, TRUE);
-      table->field[11]->set_notnull();
+      if (decimals >= 0)
+      {
+        table->field[11]->store((longlong) decimals, TRUE);
+        table->field[11]->set_notnull();
+      }
     }
 
     if (field->has_charset())
     {
       pos=(uchar*) field->charset()->csname;
-      table->field[12]->store((const char*) pos,
-                              strlen((const char*) pos), cs);
-      table->field[12]->set_notnull();
-      pos=(uchar*) field->charset()->name;
       table->field[13]->store((const char*) pos,
                               strlen((const char*) pos), cs);
       table->field[13]->set_notnull();
+      pos=(uchar*) field->charset()->name;
+      table->field[14]->store((const char*) pos,
+                              strlen((const char*) pos), cs);
+      table->field[14]->set_notnull();
     }
     pos=(uchar*) ((field->flags & PRI_KEY_FLAG) ? "PRI" :
                  (field->flags & UNIQUE_KEY_FLAG) ? "UNI" :
                  (field->flags & MULTIPLE_KEY_FLAG) ? "MUL":"");
-    table->field[15]->store((const char*) pos,
+    table->field[16]->store((const char*) pos,
                             strlen((const char*) pos), cs);
 
     if (field->unireg_check == Field::NEXT_NUMBER)
-      table->field[16]->store(STRING_WITH_LEN("auto_increment"), cs);
+      table->field[17]->store(STRING_WITH_LEN("auto_increment"), cs);
     if (show_table->timestamp_field == field &&
         field->unireg_check != Field::TIMESTAMP_DN_FIELD)
-      table->field[16]->store(STRING_WITH_LEN("on update CURRENT_TIMESTAMP"),
+      table->field[17]->store(STRING_WITH_LEN("on update CURRENT_TIMESTAMP"),
                               cs);
     if (field->vcol_info)
-      table->field[16]->store(STRING_WITH_LEN("VIRTUAL"), cs);
-
-    table->field[18]->store(field->comment.str, field->comment.length, cs);
+    {
+      if (field->stored_in_db)
+        table->field[17]->store(STRING_WITH_LEN("PERSISTENT"), cs);
+      else
+        table->field[17]->store(STRING_WITH_LEN("VIRTUAL"), cs);
+    }
+    table->field[19]->store(field->comment.str, field->comment.length, cs);
     if (schema_table_store_record(thd, table))
       DBUG_RETURN(1);
   }
@@ -4777,10 +4834,10 @@ bool store_schema_proc(THD *thd, TABLE *table, TABLE *proc_table,
 
       bzero((char *)&time, sizeof(time));
       ((Field_timestamp *) proc_table->field[12])->get_time(&time);
-      table->field[15]->store_time(&time, MYSQL_TIMESTAMP_DATETIME);
+      table->field[15]->store_time(&time);
       bzero((char *)&time, sizeof(time));
       ((Field_timestamp *) proc_table->field[13])->get_time(&time);
-      table->field[16]->store_time(&time, MYSQL_TIMESTAMP_DATETIME);
+      table->field[16]->store_time(&time);
       copy_field_as_string(table->field[17], proc_table->field[14]);
       copy_field_as_string(table->field[18], proc_table->field[15]);
       table->field[19]->store(definer.ptr(), definer.length(), cs);
@@ -5409,21 +5466,21 @@ static void store_schema_partitions_record(THD *thd, TABLE *schema_table,
   {
     thd->variables.time_zone->gmt_sec_to_TIME(&time,
                                               (my_time_t)stat_info.create_time);
-    table->field[18]->store_time(&time, MYSQL_TIMESTAMP_DATETIME);
+    table->field[18]->store_time(&time);
     table->field[18]->set_notnull();
   }
   if (stat_info.update_time)
   {
     thd->variables.time_zone->gmt_sec_to_TIME(&time,
                                               (my_time_t)stat_info.update_time);
-    table->field[19]->store_time(&time, MYSQL_TIMESTAMP_DATETIME);
+    table->field[19]->store_time(&time);
     table->field[19]->set_notnull();
   }
   if (stat_info.check_time)
   {
     thd->variables.time_zone->gmt_sec_to_TIME(&time,
                                               (my_time_t)stat_info.check_time);
-    table->field[20]->store_time(&time, MYSQL_TIMESTAMP_DATETIME);
+    table->field[20]->store_time(&time);
     table->field[20]->set_notnull();
   }
   if (file->ha_table_flags() & (HA_HAS_OLD_CHECKSUM | HA_HAS_NEW_CHECKSUM))
@@ -5804,15 +5861,13 @@ copy_event_to_schema_table(THD *thd, TABLE *sch_table, TABLE *event_table)
     /* starts & ends . STARTS is always set - see sql_yacc.yy */
     et.time_zone->gmt_sec_to_TIME(&time, et.starts);
     sch_table->field[ISE_STARTS]->set_notnull();
-    sch_table->field[ISE_STARTS]->
-                                store_time(&time, MYSQL_TIMESTAMP_DATETIME);
+    sch_table->field[ISE_STARTS]->store_time(&time);
 
     if (!et.ends_null)
     {
       et.time_zone->gmt_sec_to_TIME(&time, et.ends);
       sch_table->field[ISE_ENDS]->set_notnull();
-      sch_table->field[ISE_ENDS]->
-                                store_time(&time, MYSQL_TIMESTAMP_DATETIME);
+      sch_table->field[ISE_ENDS]->store_time(&time);
     }
   }
   else
@@ -5822,8 +5877,7 @@ copy_event_to_schema_table(THD *thd, TABLE *sch_table, TABLE *event_table)
 
     et.time_zone->gmt_sec_to_TIME(&time, et.execute_at);
     sch_table->field[ISE_EXECUTE_AT]->set_notnull();
-    sch_table->field[ISE_EXECUTE_AT]->
-                          store_time(&time, MYSQL_TIMESTAMP_DATETIME);
+    sch_table->field[ISE_EXECUTE_AT]->store_time(&time);
   }
 
   /* status */
@@ -5853,21 +5907,19 @@ copy_event_to_schema_table(THD *thd, TABLE *sch_table, TABLE *event_table)
     sch_table->field[ISE_ON_COMPLETION]->
                                 store(STRING_WITH_LEN("PRESERVE"), scs);
 
-  number_to_datetime(et.created, &time, 0, &not_used);
+  number_to_datetime(et.created, 0, &time, 0, &not_used);
   DBUG_ASSERT(not_used==0);
-  sch_table->field[ISE_CREATED]->store_time(&time, MYSQL_TIMESTAMP_DATETIME);
+  sch_table->field[ISE_CREATED]->store_time(&time);
 
-  number_to_datetime(et.modified, &time, 0, &not_used);
+  number_to_datetime(et.modified, 0, &time, 0, &not_used);
   DBUG_ASSERT(not_used==0);
-  sch_table->field[ISE_LAST_ALTERED]->
-                                store_time(&time, MYSQL_TIMESTAMP_DATETIME);
+  sch_table->field[ISE_LAST_ALTERED]->store_time(&time);
 
   if (et.last_executed)
   {
     et.time_zone->gmt_sec_to_TIME(&time, et.last_executed);
     sch_table->field[ISE_LAST_EXECUTED]->set_notnull();
-    sch_table->field[ISE_LAST_EXECUTED]->
-                       store_time(&time, MYSQL_TIMESTAMP_DATETIME);
+    sch_table->field[ISE_LAST_EXECUTED]->store_time(&time);
   }
 
   sch_table->field[ISE_EVENT_COMMENT]->
@@ -6269,14 +6321,23 @@ TABLE *create_schema_table(THD *thd, TABLE_LIST *table_list)
       item->unsigned_flag= (fields_info->field_flags & MY_I_S_UNSIGNED);
       break;
     case MYSQL_TYPE_DATE:
+      if (!(item=new Item_return_date_time(fields_info->field_name,
+                                           MAX_DATE_WIDTH,
+                                           fields_info->field_type)))
+        DBUG_RETURN(0);
+      break;
     case MYSQL_TYPE_TIME:
+      if (!(item=new Item_return_date_time(fields_info->field_name,
+                                           MAX_TIME_FULL_WIDTH,
+                                           fields_info->field_type)))
+        DBUG_RETURN(0);
+      break;
     case MYSQL_TYPE_TIMESTAMP:
     case MYSQL_TYPE_DATETIME:
       if (!(item=new Item_return_date_time(fields_info->field_name,
+                                           MAX_DATETIME_WIDTH,
                                            fields_info->field_type)))
-      {
         DBUG_RETURN(0);
-      }
       break;
     case MYSQL_TYPE_FLOAT:
     case MYSQL_TYPE_DOUBLE:
@@ -6462,7 +6523,7 @@ int make_table_names_old_format(THD *thd, ST_SCHEMA_TABLE *schema_table)
 
 int make_columns_old_format(THD *thd, ST_SCHEMA_TABLE *schema_table)
 {
-  int fields_arr[]= {3, 14, 13, 6, 15, 5, 16, 17, 18, -1};
+  int fields_arr[]= {3, 15, 14, 6, 16, 5, 17, 18, 19, -1};
   int *field_num= fields_arr;
   ST_FIELD_INFO *field_info;
   Name_resolution_context *context= &thd->lex->select_lex.context;
@@ -6470,9 +6531,9 @@ int make_columns_old_format(THD *thd, ST_SCHEMA_TABLE *schema_table)
   for (; *field_num >= 0; field_num++)
   {
     field_info= &schema_table->fields_info[*field_num];
-    if (!thd->lex->verbose && (*field_num == 13 ||
-                               *field_num == 17 ||
-                               *field_num == 18))
+    if (!thd->lex->verbose && (*field_num == 14 ||
+                               *field_num == 18 ||
+                               *field_num == 19))
       continue;
     Item_field *field= new Item_field(context,
                                       NullS, NullS, field_info->field_name);
@@ -6677,14 +6738,15 @@ int make_schema_select(THD *thd, SELECT_LEX *sel,
 bool get_schema_tables_result(JOIN *join,
                               enum enum_schema_table_state executed_place)
 {
-  JOIN_TAB *tmp_join_tab= join->join_tab+join->tables;
   THD *thd= join->thd;
   LEX *lex= thd->lex;
   bool result= 0;
   DBUG_ENTER("get_schema_tables_result");
 
   thd->no_warnings_for_error= 1;
-  for (JOIN_TAB *tab= join->join_tab; tab < tmp_join_tab; tab++)
+  for (JOIN_TAB *tab= first_linear_tab(join, WITH_CONST_TABLES); 
+       tab; 
+       tab= next_linear_tab(join, tab, WITHOUT_BUSH_ROOTS))
   {
     if (!tab->table || !tab->table->pos_in_table_list)
       break;
@@ -6739,11 +6801,11 @@ bool get_schema_tables_result(JOIN *join,
       {
         result= 1;
         join->error= 1;
-        tab->read_record.file= table_list->table->file;
+        tab->read_record.table->file= table_list->table->file;
         table_list->schema_table_state= executed_place;
         break;
       }
-      tab->read_record.file= table_list->table->file;
+      tab->read_record.table->file= table_list->table->file;
       table_list->schema_table_state= executed_place;
     }
   }
@@ -6941,6 +7003,8 @@ ST_FIELD_INFO columns_fields_info[]=
    0, (MY_I_S_MAYBE_NULL | MY_I_S_UNSIGNED), 0, OPEN_FRM_ONLY},
   {"NUMERIC_SCALE", MY_INT64_NUM_DECIMAL_DIGITS , MYSQL_TYPE_LONGLONG,
    0, (MY_I_S_MAYBE_NULL | MY_I_S_UNSIGNED), 0, OPEN_FRM_ONLY},
+  {"DATETIME_PRECISION", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG,
+   0, (MY_I_S_MAYBE_NULL | MY_I_S_UNSIGNED), 0, OPEN_FRM_ONLY},
   {"CHARACTER_SET_NAME", MY_CS_NAME_SIZE, MYSQL_TYPE_STRING, 0, 1, 0,
    OPEN_FRM_ONLY},
   {"COLLATION_NAME", MY_CS_NAME_SIZE, MYSQL_TYPE_STRING, 0, 1, "Collation",
@@ -7334,6 +7398,10 @@ ST_FIELD_INFO processlist_fields_info[]=
    SKIP_OPEN_TABLE},
   {"TIME_MS", 100 * (MY_INT64_NUM_DECIMAL_DIGITS + 1) + 3, MYSQL_TYPE_DECIMAL,
    0, 0, "Time_ms", SKIP_OPEN_TABLE},
+  {"STAGE", 2, MYSQL_TYPE_TINY,  0, 0, "Stage", SKIP_OPEN_TABLE},
+  {"MAX_STAGE", 2, MYSQL_TYPE_TINY,  0, 0, "Max_stage", SKIP_OPEN_TABLE},
+  {"PROGRESS", 703, MYSQL_TYPE_DECIMAL,  0, 0, "Progress",
+   SKIP_OPEN_TABLE},
   {0, 0, MYSQL_TYPE_STRING, 0, 0, 0, SKIP_OPEN_TABLE}
 };
 
diff --git a/sql/sql_sort.h b/sql/sql_sort.h
index f54b085eeda..fc4117e2767 100644
--- a/sql/sql_sort.h
+++ b/sql/sql_sort.h
@@ -57,6 +57,7 @@ typedef struct st_sort_param {
   uint addon_length;        /* Length of added packed fields */
   uint res_length;          /* Length of records in final sorted file/buffer */
   uint keys;				/* Max keys / buffer */
+  element_count min_dupl_count;
   ha_rows max_rows,examined_rows;
   TABLE *sort_form;			/* For quicker make_sortkey */
   SORT_FIELD *local_sortorder;
@@ -80,4 +81,9 @@ int merge_buffers(SORTPARAM *param,IO_CACHE *from_file,
 		  IO_CACHE *to_file, uchar *sort_buffer,
 		  BUFFPEK *lastbuff,BUFFPEK *Fb,
 		  BUFFPEK *Tb,int flag);
+int merge_index(SORTPARAM *param, uchar *sort_buffer,
+		BUFFPEK *buffpek, uint maxbuffer,
+		IO_CACHE *tempfile, IO_CACHE *outfile);
+
 void reuse_freed_buff(QUEUE *queue, BUFFPEK *reuse, uint key_length);
+
diff --git a/sql/sql_string.cc b/sql/sql_string.cc
index c14dee91c73..57a308f581d 100644
--- a/sql/sql_string.cc
+++ b/sql/sql_string.cc
@@ -582,11 +582,11 @@ uint32 String::numchars()
   return str_charset->cset->numchars(str_charset, Ptr, Ptr+str_length);
 }
 
-int String::charpos(int i,uint32 offset)
+int String::charpos(longlong i,uint32 offset)
 {
   if (i <= 0)
-    return i;
-  return str_charset->cset->charpos(str_charset,Ptr+offset,Ptr+str_length,i);
+    return (int)i;
+  return (int)str_charset->cset->charpos(str_charset,Ptr+offset,Ptr+str_length,(size_t)i);
 }
 
 int String::strstr(const String &s,uint32 offset)
diff --git a/sql/sql_string.h b/sql/sql_string.h
index a4bc2f53f3a..ba90c8140df 100644
--- a/sql/sql_string.h
+++ b/sql/sql_string.h
@@ -193,6 +193,18 @@ public:
   { return set_int((longlong)num, true, cs); }
   bool set_real(double num,uint decimals, CHARSET_INFO *cs);
 
+  /* Move handling of buffer from some other object to String */
+  void reassociate(char *ptr, uint32 length, uint32 alloced_length,
+                   CHARSET_INFO *cs)
+  { 
+    free();
+    Ptr= ptr;
+    str_length= length;
+    Alloced_length= alloced_length;
+    str_charset= cs;
+    alloced= ptr != 0;
+  }
+
   /*
     PMG 2004.11.12
     This is a method that works the same as perl's "chop". It simply
@@ -335,7 +347,7 @@ public:
   friend String *copy_if_not_alloced(String *a,String *b,uint32 arg_length);
   friend class Field;
   uint32 numchars();
-  int charpos(int i,uint32 offset=0);
+  int charpos(longlong i,uint32 offset=0);
 
   int reserve(uint32 space_needed)
   {
@@ -449,8 +461,9 @@ public:
   }
 };
 
-static inline bool check_if_only_end_space(CHARSET_INFO *cs, char *str, 
-                                           char *end)
+static inline bool check_if_only_end_space(CHARSET_INFO *cs,
+                                           const char *str, 
+                                           const char *end)
 {
   return str+ cs->cset->scan(cs, str, end, MY_SEQ_SPACES) == end;
 }
diff --git a/sql/sql_table.cc b/sql/sql_table.cc
index 2563f923730..222f6e07bc2 100644
--- a/sql/sql_table.cc
+++ b/sql/sql_table.cc
@@ -26,6 +26,7 @@
 #include "sql_trigger.h"
 #include "sql_show.h"
 #include "debug_sync.h"
+#include "sql_handler.h"
 
 #ifdef __WIN__
 #include <io.h>
@@ -37,17 +38,16 @@ const char *primary_key_name="PRIMARY";
 
 static bool check_if_keyname_exists(const char *name,KEY *start, KEY *end);
 static char *make_unique_key_name(const char *field_name,KEY *start,KEY *end);
-static int copy_data_between_tables(TABLE *from,TABLE *to,
-                                    List<Create_field> &create, bool ignore,
-				    uint order_num, ORDER *order,
-				    ha_rows *copied,ha_rows *deleted,
-                                    enum enum_enable_or_disable keys_onoff,
-                                    bool error_if_not_empty);
+static int copy_data_between_tables(THD *thd, TABLE *,TABLE *,
+                                    List<Create_field> &, bool,
+				    uint, ORDER *, ha_rows *,ha_rows *,
+                                    enum enum_enable_or_disable, bool);
 
 static bool prepare_blob_field(THD *thd, Create_field *sql_field);
 static bool check_engine(THD *, const char *, HA_CREATE_INFO *);
 static int mysql_prepare_create_table(THD *, HA_CREATE_INFO *, Alter_info *,
-                              bool, uint *, handler *, KEY **, uint *, int);
+                                      bool, uint *, handler *, KEY **, uint *,
+                                      int);
 static bool mysql_prepare_alter_table(THD *, TABLE *, HA_CREATE_INFO *,
                                       Alter_info *);
 static bool admin_recreate_table(THD *thd, TABLE_LIST *table_list);
@@ -61,7 +61,7 @@ static void wait_for_kill_signal(THD *thd)
   while (thd->killed == 0)
     sleep(1);
   // Reset signal and continue as if nothing happend
-  thd->killed= THD::NOT_KILLED;
+  thd->killed= NOT_KILLED;
 }
 #endif
 
@@ -3226,7 +3226,6 @@ mysql_prepare_create_table(THD *thd, HA_CREATE_INFO *create_info,
             my_error(ER_WRONG_SUB_KEY, MYF(0));
             DBUG_RETURN(TRUE);
           }
-
           if (!f_is_geom(sql_field->pack_flag))
           {
             my_error(ER_WRONG_ARGUMENTS, MYF(0), "SPATIAL INDEX");
@@ -4695,6 +4694,7 @@ static bool mysql_admin_table(THD* thd, TABLE_LIST* tables,
   int result_code;
   bool need_repair_or_alter= 0;
   DBUG_ENTER("mysql_admin_table");
+  DBUG_PRINT("enter", ("extra_open_options: %u", extra_open_options));
 
   if (end_active_trans(thd))
     DBUG_RETURN(1);
@@ -4719,9 +4719,7 @@ static bool mysql_admin_table(THD* thd, TABLE_LIST* tables,
     bool fatal_error=0;
 
     DBUG_PRINT("admin", ("table: '%s'.'%s'", table->db, table->table_name));
-    DBUG_PRINT("admin", ("extra_open_options: %u", extra_open_options));
     strxmov(table_name, db, ".", table->table_name, NullS);
-    thd->open_options|= extra_open_options;
     table->lock_type= lock_type;
     /* open only one table from local list of command */
     {
@@ -4743,12 +4741,18 @@ static bool mysql_admin_table(THD* thd, TABLE_LIST* tables,
       thd->no_warnings_for_error= no_warnings_for_error;
       if (view_operator_func == NULL)
         table->required_type=FRMTYPE_TABLE;
-
+      if (lex->sql_command == SQLCOM_CHECK ||
+          lex->sql_command == SQLCOM_REPAIR ||
+          lex->sql_command == SQLCOM_ANALYZE ||
+          lex->sql_command == SQLCOM_OPTIMIZE)
+	thd->prepare_derived_at_open= TRUE;
+      thd->open_options|= extra_open_options;
       open_and_lock_tables(thd, table);
+      thd->open_options&= ~extra_open_options;
+      thd->prepare_derived_at_open= FALSE;
       thd->no_warnings_for_error= 0;
       table->next_global= save_next_global;
       table->next_local= save_next_local;
-      thd->open_options&= ~extra_open_options;
 #ifdef WITH_PARTITION_STORAGE_ENGINE
       if (table->table)
       {
@@ -4842,7 +4846,7 @@ static bool mysql_admin_table(THD* thd, TABLE_LIST* tables,
       else
         /* Default failure code is corrupt table */
         result_code= HA_ADMIN_CORRUPT;
-      goto send_result;
+     goto send_result;
     }
 
     if (table->view)
@@ -4932,7 +4936,7 @@ static bool mysql_admin_table(THD* thd, TABLE_LIST* tables,
         /* We use extra_open_options to be able to open crashed tables */
         thd->open_options|= extra_open_options;
         result_code= admin_recreate_table(thd, table);
-        thd->open_options= ~extra_open_options;
+        thd->open_options&= ~extra_open_options;
         goto send_result;
       }
       if (check_old_types || check_for_upgrade)
@@ -5821,8 +5825,8 @@ is_index_maintenance_unique (TABLE *table, Alter_info *alter_info)
     that need to be dropped and/or (re-)created.
 
   RETURN VALUES
-    TRUE   error
-    FALSE  success
+    TRUE   The tables are not compatible; We have to do a full alter table
+    FALSE  The tables are compatible; We only have to modify the .frm
 */
 
 static
@@ -5962,6 +5966,9 @@ compare_tables(TABLE *table,
     DBUG_ASSERT(i < table->s->fields);
     create_info->fields_option_struct[i]= tmp_new_field->option_struct;
 
+    /* reset common markers of how field changed */
+    field->flags&= ~(FIELD_IS_RENAMED | FIELD_IN_ADD_INDEX);
+
     /* Make sure we have at least the default charset in use. */
     if (!new_field->charset)
       new_field->charset= create_info->default_table_charset;
@@ -5996,7 +6003,6 @@ compare_tables(TABLE *table,
         create_info->table_options|= HA_OPTION_PACK_RECORD;
 
     /* Check if field was renamed */
-    field->flags&= ~FIELD_IS_RENAMED;
     if (my_strcasecmp(system_charset_info,
 		      field->field_name,
 		      tmp_new_field->field_name))
@@ -6009,8 +6015,6 @@ compare_tables(TABLE *table,
                           new_field->field_name));
       DBUG_RETURN(0);
     }
-    // Clear indexed marker
-    field->flags&= ~FIELD_IN_ADD_INDEX;
     changes|= tmp;
   }
 
@@ -6324,7 +6328,7 @@ mysql_prepare_alter_table(THD *thd, TABLE *table,
   for (f_ptr=table->field ; (field= *f_ptr) ; f_ptr++)
   {
     Alter_drop *drop;
-    if (field->type() == MYSQL_TYPE_STRING)
+    if (field->type() == MYSQL_TYPE_VARCHAR)
       create_info->varchar= TRUE;
     /* Check if field should be dropped */
     drop_it.rewind();
@@ -6667,6 +6671,7 @@ err:
       order_num        How many ORDER BY fields has been specified.
       order            List of fields to ORDER BY.
       ignore           Whether we have ALTER IGNORE TABLE
+      require_online   Give an error if we can't do operation online
 
   DESCRIPTION
     This is a veery long function and is everything but the kitchen sink :)
@@ -6697,7 +6702,8 @@ bool mysql_alter_table(THD *thd,char *new_db, char *new_name,
                        HA_CREATE_INFO *create_info,
                        TABLE_LIST *table_list,
                        Alter_info *alter_info,
-                       uint order_num, ORDER *order, bool ignore)
+                       uint order_num, ORDER *order, bool ignore,
+                       bool require_online)
 {
   TABLE *table, *new_table= 0, *name_lock= 0;
   int error= 0;
@@ -7500,10 +7506,23 @@ view_err:
     */
   }
 
+  /* Check if we can do the ALTER TABLE as online */
+  if (require_online)
+  {
+    if (index_add_count || index_drop_count ||
+        (new_table &&
+         !(new_table->file->ha_table_flags() & HA_NO_COPY_ON_ALTER)))
+    {
+      my_error(ER_CANT_DO_ONLINE, MYF(0), "ALTER");
+      goto close_table_and_return_error;
+    }
+  }
+
   /* Copy the data if necessary. */
   thd->count_cuted_fields= CHECK_FIELD_WARN;	// calc cuted fields
   thd->cuted_fields=0L;
   copied=deleted=0;
+
   /*
     We do not copy data for MERGE tables. Only the children have data.
     MERGE tables have HA_NO_COPY_ON_ALTER set.
@@ -7513,8 +7532,7 @@ view_err:
     /* We don't want update TIMESTAMP fields during ALTER TABLE. */
     new_table->timestamp_field_type= TIMESTAMP_NO_AUTO_SET;
     new_table->next_number_field=new_table->found_next_number_field;
-    thd_proc_info(thd, "copy to tmp table");
-    error= copy_data_between_tables(table, new_table,
+    error= copy_data_between_tables(thd, table, new_table,
                                     alter_info->create_list, ignore,
                                     order_num, order, &copied, &deleted,
                                     alter_info->keys_onoff,
@@ -7535,6 +7553,8 @@ view_err:
       error= 1;
   }
   thd->count_cuted_fields= CHECK_FIELD_IGNORE;
+  if (error)
+    goto close_table_and_return_error;
 
   /* If we did not need to copy, we might still need to add/drop indexes. */
   if (! new_table)
@@ -7628,11 +7648,11 @@ view_err:
   }
   /*end of if (! new_table) for add/drop index*/
 
+  DBUG_ASSERT(error == 0);
+
   if (table->s->tmp_table != NO_TMP_TABLE)
   {
     /* We changed a temporary table */
-    if (error)
-      goto err1;
     /* Close lock if this is a transactional table */
     if (thd->lock)
     {
@@ -7667,12 +7687,6 @@ view_err:
   }
   DEBUG_SYNC(thd, "alter_table_before_rename_result_table");
   VOID(pthread_mutex_lock(&LOCK_open));
-  if (error)
-  {
-    VOID(quick_rm_table(new_db_type, new_db, tmp_name, FN_IS_TMP));
-    VOID(pthread_mutex_unlock(&LOCK_open));
-    goto err;
-  }
 
   /*
     Data is copied. Now we:
@@ -7845,6 +7859,16 @@ end_temporary:
   thd->some_tables_deleted=0;
   DBUG_RETURN(FALSE);
 
+close_table_and_return_error:
+  if (new_table && table->s->tmp_table == NO_TMP_TABLE)
+  {
+    /* This is not a temporary table, so close it the normal way */
+    new_table->s->deleting= TRUE;
+    intern_close_table(new_table);
+    my_free(new_table,MYF(0));
+    new_table= 0;               // This forces call to quick_rm_table() below
+  }
+
 err1:
   if (new_table)
   {
@@ -7915,7 +7939,7 @@ err_with_placeholders:
 /* Copy all rows from one table to another */
 
 static int
-copy_data_between_tables(TABLE *from,TABLE *to,
+copy_data_between_tables(THD *thd, TABLE *from,TABLE *to,
 			 List<Create_field> &create,
                          bool ignore,
 			 uint order_num, ORDER *order,
@@ -7927,7 +7951,6 @@ copy_data_between_tables(TABLE *from,TABLE *to,
   int error= 1, errpos= 0;
   Copy_field *copy= NULL, *copy_end;
   ha_rows found_count= 0, delete_count= 0;
-  THD *thd= current_thd;
   uint length= 0;
   SORT_FIELD *sortorder;
   READ_RECORD info;
@@ -7937,11 +7960,14 @@ copy_data_between_tables(TABLE *from,TABLE *to,
   ha_rows examined_rows;
   bool auto_increment_field_copied= 0;
   ulong save_sql_mode= thd->variables.sql_mode;
-  ulonglong prev_insert_id;
+  ulonglong prev_insert_id, time_to_report_progress;
   List_iterator<Create_field> it(create);
   Create_field *def;
   DBUG_ENTER("copy_data_between_tables");
 
+  /* Two or 3 stages; Sorting, copying data and update indexes */
+  thd_progress_init(thd, 2 + test(order));
+
   /*
     Turn off recovery logging since rollback of an alter table is to
     delete the new table so there is no need to log the changes to it.
@@ -7996,7 +8022,8 @@ copy_data_between_tables(TABLE *from,TABLE *to,
 
   if (order)
   {
-    if (to->s->primary_key != MAX_KEY && to->file->primary_key_is_clustered())
+    if (to->s->primary_key != MAX_KEY &&
+        to->file->ha_table_flags() & HA_TABLE_SCAN_ON_INDEX)
     {
       char warn_buff[MYSQL_ERRMSG_SIZE];
       my_snprintf(warn_buff, sizeof(warn_buff), 
@@ -8014,6 +8041,7 @@ copy_data_between_tables(TABLE *from,TABLE *to,
       tables.alias= tables.table_name= from->s->table_name.str;
       tables.db= from->s->db.str;
 
+      thd_proc_info(thd, "Sorting");
       if (thd->lex->select_lex.setup_ref_array(thd, order_num) ||
           setup_order(thd, thd->lex->select_lex.ref_pointer_array,
                       &tables, fields, all_fields, order) ||
@@ -8024,8 +8052,10 @@ copy_data_between_tables(TABLE *from,TABLE *to,
           HA_POS_ERROR)
         goto err;
     }
-  };
+    thd_progress_next_stage(thd);
+  }
 
+  thd_proc_info(thd, "copy to tmp table");
   /* Tell handler that we have values for all columns in the to table */
   to->use_all_columns();
   to->mark_virtual_columns_for_write(TRUE);
@@ -8036,6 +8066,10 @@ copy_data_between_tables(TABLE *from,TABLE *to,
     to->file->extra(HA_EXTRA_IGNORE_DUP_KEY);
   thd->row_count= 0;
   restore_record(to, s->default_values);        // Create empty record
+
+  thd->progress.max_counter= from->file->records();
+  time_to_report_progress= MY_HOW_OFTEN_TO_WRITE/10;
+
   while (!(error=info.read_record(&info)))
   {
     if (thd->killed)
@@ -8046,6 +8080,13 @@ copy_data_between_tables(TABLE *from,TABLE *to,
     }
     update_virtual_fields(thd, from);
     thd->row_count++;
+    if (++thd->progress.counter >= time_to_report_progress)
+    {
+      time_to_report_progress+= MY_HOW_OFTEN_TO_WRITE/10;
+      thd_progress_report(thd, thd->progress.counter,
+                          thd->progress.max_counter);
+    }
+
     /* Return error if source table isn't empty. */
     if (error_if_not_empty)
     {
@@ -8109,6 +8150,9 @@ err:
   free_io_cache(from);
   delete [] copy;
 
+  thd_proc_info(thd, "Enabling keys");
+  thd_progress_next_stage(thd);
+
   if (error > 0)
     to->file->extra(HA_EXTRA_PREPARE_FOR_DROP);
   if (errpos >= 3 && to->file->ha_end_bulk_insert() && error <= 0)
@@ -8198,7 +8242,7 @@ bool mysql_recreate_table(THD *thd, TABLE_LIST *table_list)
   alter_info.flags= (ALTER_CHANGE_COLUMN | ALTER_RECREATE);
   DBUG_RETURN(mysql_alter_table(thd, NullS, NullS, &create_info,
                                 table_list, &alter_info, 0,
-                                (ORDER *) 0, 0));
+                                (ORDER *) 0, 0, 0));
 }
 
 
diff --git a/sql/sql_test.cc b/sql/sql_test.cc
index 04adf6dcadc..010cc2f52c6 100644
--- a/sql/sql_test.cc
+++ b/sql/sql_test.cc
@@ -169,64 +169,115 @@ void TEST_filesort(SORT_FIELD *sortorder,uint s_length)
 void
 TEST_join(JOIN *join)
 {
-  uint i,ref;
+  uint ref;
+  int i;
+  List_iterator<JOIN_TAB_RANGE> it(join->join_tab_ranges);
+  JOIN_TAB_RANGE *jt_range;
   DBUG_ENTER("TEST_join");
 
-  /*
-    Assemble results of all the calls to full_name() first,
-    in order not to garble the tabular output below.
-  */
-  String ref_key_parts[MAX_TABLES];
-  for (i= 0; i < join->tables; i++)
-  {
-    JOIN_TAB *tab= join->join_tab + i;
-    for (ref= 0; ref < tab->ref.key_parts; ref++)
-    {
-      ref_key_parts[i].append(tab->ref.items[ref]->full_name());
-      ref_key_parts[i].append("  ");
-    }
-  }
-
   DBUG_LOCK_FILE;
   VOID(fputs("\nInfo about JOIN\n",DBUG_FILE));
-  for (i=0 ; i < join->tables ; i++)
+
+  while ((jt_range= it++))
   {
-    JOIN_TAB *tab=join->join_tab+i;
-    TABLE *form=tab->table;
-    char key_map_buff[128];
-    fprintf(DBUG_FILE,"%-16.16s  type: %-7s  q_keys: %s  refs: %d  key: %d  len: %d\n",
-	    form->alias.c_ptr(),
-	    join_type_str[tab->type],
-	    tab->keys.print(key_map_buff),
-	    tab->ref.key_parts,
-	    tab->ref.key,
-	    tab->ref.key_length);
-    if (tab->select)
+    /*
+      Assemble results of all the calls to full_name() first,
+      in order not to garble the tabular output below.
+    */
+    String ref_key_parts[MAX_TABLES];
+    int tables_in_range= jt_range->end - jt_range->start;
+    for (i= 0; i < tables_in_range; i++)
     {
-      char buf[MAX_KEY/8+1];
-      if (tab->use_quick == 2)
-	fprintf(DBUG_FILE,
-		"                  quick select checked for each record (keys: %s)\n",
-		tab->select->quick_keys.print(buf));
-      else if (tab->select->quick)
+      JOIN_TAB *tab= jt_range->start + i;
+      for (ref= 0; ref < tab->ref.key_parts; ref++)
       {
-	fprintf(DBUG_FILE, "                  quick select used:\n");
-        tab->select->quick->dbug_dump(18, FALSE);
+        ref_key_parts[i].append(tab->ref.items[ref]->full_name());
+        ref_key_parts[i].append("  ");
       }
-      else
-	VOID(fputs("                  select used\n",DBUG_FILE));
     }
-    if (tab->ref.key_parts)
+
+    for (i= 0; i < tables_in_range; i++)
     {
-      fprintf(DBUG_FILE,
+      JOIN_TAB *tab= jt_range->start + i;
+      TABLE *form=tab->table;
+      char key_map_buff[128];
+      fprintf(DBUG_FILE,"%-16.16s  type: %-7s  q_keys: %s  refs: %d  key: %d  len: %d\n",
+	    form->alias.c_ptr(),
+              join_type_str[tab->type],
+              tab->keys.print(key_map_buff),
+              tab->ref.key_parts,
+              tab->ref.key,
+              tab->ref.key_length);
+      if (tab->select)
+      {
+        char buf[MAX_KEY/8+1];
+        if (tab->use_quick == 2)
+          fprintf(DBUG_FILE,
+                  "                  quick select checked for each record (keys: %s)\n",
+                  tab->select->quick_keys.print(buf));
+        else if (tab->select->quick)
+        {
+          fprintf(DBUG_FILE, "                  quick select used:\n");
+          tab->select->quick->dbug_dump(18, FALSE);
+        }
+        else
+          VOID(fputs("                  select used\n",DBUG_FILE));
+      }
+      if (tab->ref.key_parts)
+      {
+        fprintf(DBUG_FILE,
               "                  refs:  %s\n", ref_key_parts[i].c_ptr_safe());
+      }
     }
+    VOID(fputs("\n",DBUG_FILE));
   }
   DBUG_UNLOCK_FILE;
   DBUG_VOID_RETURN;
 }
 
 
+#define FT_KEYPART   (MAX_REF_PARTS+10)
+
+void print_keyuse(KEYUSE *keyuse)
+{
+  char buff[256];
+  char buf2[64]; 
+  const char *fieldname;
+  JOIN_TAB *join_tab= keyuse->table->reginfo.join_tab;
+  KEY *key_info= join_tab->get_keyinfo_by_key_no(keyuse->key);
+  String str(buff,(uint32) sizeof(buff), system_charset_info);
+  str.length(0);
+  keyuse->val->print(&str, QT_ORDINARY);
+  str.append('\0');
+  if (keyuse->is_for_hash_join())
+    fieldname= keyuse->table->field[keyuse->keypart]->field_name;
+  else if (keyuse->keypart == FT_KEYPART)
+    fieldname= "FT_KEYPART";
+  else
+    fieldname= key_info->key_part[keyuse->keypart].field->field_name;
+  longlong2str(keyuse->used_tables, buf2, 16, 0); 
+  DBUG_LOCK_FILE;
+  fprintf(DBUG_FILE, "KEYUSE: %s.%s=%s  optimize: %u  used_tables: %s "
+          "ref_table_rows: %lu  keypart_map: %0lx\n",
+          keyuse->table->alias.c_ptr(), fieldname, str.ptr(),
+          (uint) keyuse->optimize, buf2, (ulong) keyuse->ref_table_rows, 
+          (ulong) keyuse->keypart_map);
+  DBUG_UNLOCK_FILE;
+  //key_part_map keypart_map; --?? there can be several? 
+}
+
+
+/* purecov: begin inspected */
+void print_keyuse_array(DYNAMIC_ARRAY *keyuse_array)
+{
+  DBUG_LOCK_FILE;
+  fprintf(DBUG_FILE, "KEYUSE array (%d elements)\n", keyuse_array->elements);
+  DBUG_UNLOCK_FILE;
+  for(uint i=0; i < keyuse_array->elements; i++)
+    print_keyuse((KEYUSE*)dynamic_array_ptr(keyuse_array, i));
+}
+
+
 /* 
   Print the current state during query optimization.
 
@@ -327,6 +378,27 @@ print_plan(JOIN* join, uint idx, double record_count, double read_time,
   DBUG_UNLOCK_FILE;
 }
 
+
+void print_sjm(SJ_MATERIALIZATION_INFO *sjm)
+{
+  DBUG_LOCK_FILE;
+  fprintf(DBUG_FILE, "\nsemi-join nest{\n");
+  fprintf(DBUG_FILE, "  tables { \n");
+  for (uint i= 0;i < sjm->tables; i++)
+  {
+    fprintf(DBUG_FILE, "    %s%s\n", 
+            sjm->positions[i].table->table->alias.c_ptr(),
+            (i == sjm->tables -1)? "": ",");
+  }
+  fprintf(DBUG_FILE, "  }\n");
+  fprintf(DBUG_FILE, "  materialize_cost= %g\n",
+          sjm->materialization_cost.total_cost());
+  fprintf(DBUG_FILE, "  rows= %g\n", sjm->rows);
+  fprintf(DBUG_FILE, "}\n");
+  DBUG_UNLOCK_FILE;
+}
+/* purecov: end */
+
 #endif
 
 typedef struct st_debug_lock
diff --git a/sql/sql_trigger.cc b/sql/sql_trigger.cc
index fb9abc06f6b..e82b15f9ebb 100644
--- a/sql/sql_trigger.cc
+++ b/sql/sql_trigger.cc
@@ -21,6 +21,7 @@
 #include "sp_head.h"
 #include "sql_trigger.h"
 #include "parse_file.h"
+#include "sql_handler.h"
 #include <mysys_err.h>
 
 /*************************************************************************/
diff --git a/sql/sql_trigger.h b/sql/sql_trigger.h
index a7224bd74bd..d3808610ea8 100644
--- a/sql/sql_trigger.h
+++ b/sql/sql_trigger.h
@@ -19,7 +19,7 @@
 /**
   This class holds all information about triggers of table.
 
-  QQ: Will it be merged into TABLE in the future ?
+  TODO: Will it be merged into TABLE in the future ?
 */
 
 class Table_triggers_list: public Sql_alloc
diff --git a/sql/sql_union.cc b/sql/sql_union.cc
index bcdbf81836a..5c235fd1778 100644
--- a/sql/sql_union.cc
+++ b/sql/sql_union.cc
@@ -52,19 +52,26 @@ int select_union::prepare(List<Item> &list, SELECT_LEX_UNIT *u)
 
 int select_union::send_data(List<Item> &values)
 {
-  int error= 0;
   if (unit->offset_limit_cnt)
   {						// using limit offset,count
     unit->offset_limit_cnt--;
     return 0;
   }
-  fill_record(thd, table->field, values, 1);
+  if (table->no_rows_with_nulls)
+    table->null_catch_flags= CHECK_ROW_FOR_NULLS_TO_REJECT;
+  fill_record(thd, table->field, values, TRUE, FALSE);
   if (thd->is_error())
     return 1;
+  if (table->no_rows_with_nulls)
+  {
+    table->null_catch_flags&= ~CHECK_ROW_FOR_NULLS_TO_REJECT;
+    if (table->null_catch_flags)
+      return 0;
+  }
 
-  if ((error= table->file->ha_write_row(table->record[0])))
+  if ((write_err= table->file->ha_write_tmp_row(table->record[0])))
   {
-    if (error == HA_ERR_FOUND_DUPP_KEY)
+    if (write_err == HA_ERR_FOUND_DUPP_KEY)
     {
       /*
         Inform upper level that we found a duplicate key, that should not
@@ -73,8 +80,11 @@ int select_union::send_data(List<Item> &values)
       return -1;
     }
     /* create_internal_tmp_table_from_heap will generate error if needed */
-    if (table->file->is_fatal_error(error, HA_CHECK_DUP) &&
-        create_internal_tmp_table_from_heap(thd, table, &tmp_table_param, error, 1))
+    if (table->file->is_fatal_error(write_err, HA_CHECK_DUP) &&
+        create_internal_tmp_table_from_heap(thd, table,
+                                            tmp_table_param.start_recinfo, 
+                                            &tmp_table_param.recinfo,
+                                            write_err, 1))
       return 1;
   }
   return 0;
@@ -109,6 +119,9 @@ bool select_union::flush()
       is_union_distinct  if set, the temporary table will eliminate
                          duplicates on insert
       options            create options
+      table_alias        name of the temporary table
+      bit_fields_as_long convert bit fields to ulonglong
+      create_table       whether to physically create result table
 
   DESCRIPTION
     Create a temporary table that is used to store the result of a UNION,
@@ -122,22 +135,49 @@ bool select_union::flush()
 bool
 select_union::create_result_table(THD *thd_arg, List<Item> *column_types,
                                   bool is_union_distinct, ulonglong options,
-                                  const char *alias)
+                                  const char *alias,
+                                  bool bit_fields_as_long, bool create_table)
 {
   DBUG_ASSERT(table == 0);
   tmp_table_param.init();
   tmp_table_param.field_count= column_types->elements;
+  tmp_table_param.bit_fields_as_long= bit_fields_as_long;
 
   if (! (table= create_tmp_table(thd_arg, &tmp_table_param, *column_types,
                                  (ORDER*) 0, is_union_distinct, 1,
-                                 options, HA_POS_ERROR, (char*) alias)))
+                                 options, HA_POS_ERROR, (char*) alias,
+                                 !create_table)))
     return TRUE;
-  table->file->extra(HA_EXTRA_WRITE_CACHE);
-  table->file->extra(HA_EXTRA_IGNORE_DUP_KEY);
+
+  table->keys_in_use_for_query.clear_all();
+  for (uint i=0; i < table->s->fields; i++)
+    table->field[i]->flags &= ~PART_KEY_FLAG;
+
+  if (create_table)
+  {
+    table->file->extra(HA_EXTRA_WRITE_CACHE);
+    table->file->extra(HA_EXTRA_IGNORE_DUP_KEY);
+  }
   return FALSE;
 }
 
 
+/**
+  Reset and empty the temporary table that stores the materialized query result.
+
+  @note The cleanup performed here is exactly the same as for the two temp
+  tables of JOIN - exec_tmp_table_[1 | 2].
+*/
+
+void select_union::cleanup()
+{
+  table->file->extra(HA_EXTRA_RESET_STATE);
+  table->file->ha_delete_all_rows();
+  free_io_cache(table);
+  filesort_free_buffers(table,0);
+}
+
+
 /*
   initialization procedures before fake_select_lex preparation()
 
@@ -171,6 +211,8 @@ st_select_lex_unit::init_prepare_fake_select_lex(THD *thd_arg)
   {
     (*order->item)->walk(&Item::change_context_processor, 0,
                          (uchar*) &fake_select_lex->context);
+    (*order->item)->walk(&Item::set_fake_select_as_master_processor, 0,
+                         (uchar*) fake_select_lex);
   }
 }
 
@@ -255,6 +297,18 @@ bool st_select_lex_unit::prepare(THD *thd_arg, select_result *sel_result,
 
     can_skip_order_by= is_union_select && !(sl->braces && sl->explicit_limit);
 
+    /*
+      Remove all references from the select_lex_units to the subqueries that
+      are inside the ORDER BY clause.
+    */
+    if (can_skip_order_by)
+    {
+      for (ORDER *ord= (ORDER *)sl->order_list.first; ord; ord= ord->next)
+      {
+        (*ord->item)->walk(&Item::eliminate_subselect_processor, FALSE, NULL);
+      }
+    }
+
     saved_error= join->prepare(&sl->ref_pointer_array,
                                sl->table_list.first,
                                sl->with_wild,
@@ -269,6 +323,7 @@ bool st_select_lex_unit::prepare(THD *thd_arg, select_result *sel_result,
                                (is_union_select ? NULL :
                                 thd_arg->lex->proc_list.first),
                                sl, this);
+
     /* There are no * in the statement anymore (for PS) */
     sl->with_wild= 0;
     last_procedure= join->procedure;
@@ -323,6 +378,9 @@ bool st_select_lex_unit::prepare(THD *thd_arg, select_result *sel_result,
     List_iterator_fast<Item> tp(types);
     Item *type;
     ulonglong create_options;
+    uint save_tablenr= 0;
+    table_map save_map= 0;
+    uint save_maybe_null= 0;
 
     while ((type= tp++))
     {
@@ -375,12 +433,24 @@ bool st_select_lex_unit::prepare(THD *thd_arg, select_result *sel_result,
       create_options= create_options | TMP_TABLE_FORCE_MYISAM;
 
     if (union_result->create_result_table(thd, &types, test(union_distinct),
-                                          create_options, ""))
+                                          create_options, "", FALSE, TRUE))
       goto err;
+    if (fake_select_lex && !fake_select_lex->first_cond_optimization)
+    {
+      save_tablenr= result_table_list.tablenr_exec;
+      save_map= result_table_list.map_exec;
+      save_maybe_null= result_table_list.maybe_null_exec;
+    }
     bzero((char*) &result_table_list, sizeof(result_table_list));
     result_table_list.db= (char*) "";
     result_table_list.table_name= result_table_list.alias= (char*) "union";
     result_table_list.table= table= union_result->table;
+    if (fake_select_lex && !fake_select_lex->first_cond_optimization)
+    {
+      result_table_list.tablenr_exec= save_tablenr;
+      result_table_list.map_exec= save_map;
+      result_table_list.maybe_null_exec= save_maybe_null;
+    }
 
     thd_arg->lex->current_select= lex_select_save;
     if (!item_list.elements)
@@ -456,18 +526,21 @@ err:
 }
 
 
-bool st_select_lex_unit::exec()
+/**
+  Run optimization phase.
+
+  @return FALSE unit successfully passed optimization phase.
+  @return TRUE an error occur.
+*/
+bool st_select_lex_unit::optimize()
 {
   SELECT_LEX *lex_select_save= thd->lex->current_select;
   SELECT_LEX *select_cursor=first_select();
-  ulonglong add_rows=0;
-  ha_rows examined_rows= 0;
-  DBUG_ENTER("st_select_lex_unit::exec");
+  DBUG_ENTER("st_select_lex_unit::optimize");
 
-  if (executed && !uncacheable && !describe)
+  if (optimized && !uncacheable && !describe)
     DBUG_RETURN(FALSE);
-  executed= 1;
-  
+
   if (uncacheable || !item || !item->assigned() || describe)
   {
     if (item)
@@ -488,7 +561,6 @@ bool st_select_lex_unit::exec()
     }
     for (SELECT_LEX *sl= select_cursor; sl; sl= sl->next_select())
     {
-      ha_rows records_at_start= 0;
       thd->lex->current_select= sl;
 
       if (optimized)
@@ -515,6 +587,66 @@ bool st_select_lex_unit::exec()
         sl->join->select_options= 
           (select_limit_cnt == HA_POS_ERROR || sl->braces) ?
           sl->options & ~OPTION_FOUND_ROWS : sl->options | found_rows_for_union;
+
+	saved_error= sl->join->optimize();
+      }
+
+      if (saved_error)
+      {
+	thd->lex->current_select= lex_select_save;
+	DBUG_RETURN(saved_error);
+      }
+    }
+  }
+  optimized= 1;
+
+  thd->lex->current_select= lex_select_save;
+  DBUG_RETURN(saved_error);
+}
+
+
+bool st_select_lex_unit::exec()
+{
+  SELECT_LEX *lex_select_save= thd->lex->current_select;
+  SELECT_LEX *select_cursor=first_select();
+  ulonglong add_rows=0;
+  ha_rows examined_rows= 0;
+  DBUG_ENTER("st_select_lex_unit::exec");
+
+  if (executed && !uncacheable && !describe)
+    DBUG_RETURN(FALSE);
+  executed= 1;
+  
+  saved_error= optimize();
+
+  if (uncacheable || !item || !item->assigned() || describe)
+  {
+    for (SELECT_LEX *sl= select_cursor; sl; sl= sl->next_select())
+    {
+      ha_rows records_at_start= 0;
+      thd->lex->current_select= sl;
+
+      {
+        set_limit(sl);
+	if (sl == global_parameters || describe)
+	{
+	  offset_limit_cnt= 0;
+	  /*
+	    We can't use LIMIT at this stage if we are using ORDER BY for the
+	    whole query
+	  */
+	  if (sl->order_list.first || describe)
+	    select_limit_cnt= HA_POS_ERROR;
+        }
+
+        /*
+          When using braces, SQL_CALC_FOUND_ROWS affects the whole query:
+          we don't calculate found_rows() per union part.
+          Otherwise, SQL_CALC_FOUND_ROWS should be done on all sub parts.
+        */
+        sl->join->select_options= 
+          (select_limit_cnt == HA_POS_ERROR || sl->braces) ?
+          sl->options & ~OPTION_FOUND_ROWS : sl->options | found_rows_for_union;
 	saved_error= sl->join->optimize();
       }
       if (!saved_error)
@@ -534,6 +666,7 @@ bool st_select_lex_unit::exec()
 	if (!saved_error)
 	{
 	  examined_rows+= thd->examined_row_count;
+          thd->examined_row_count= 0;
 	  if (union_result->flush())
 	  {
 	    thd->lex->current_select= lex_select_save;
@@ -567,7 +700,6 @@ bool st_select_lex_unit::exec()
       }
     }
   }
-  optimized= 1;
 
   /* Send result to 'result' */
   saved_error= TRUE;
@@ -699,7 +831,8 @@ bool st_select_lex_unit::cleanup()
     if ((join= fake_select_lex->join))
     {
       join->tables_list= 0;
-      join->tables= 0;
+      join->table_count= 0;
+      join->top_join_tab_count= 0;
     }
     error|= fake_select_lex->cleanup();
     /*
@@ -761,8 +894,8 @@ void st_select_lex_unit::reinit_exec_mechanism()
     TRUE  - error
 */
 
-bool st_select_lex_unit::change_result(select_subselect *new_result,
-                                       select_subselect *old_result)
+bool st_select_lex_unit::change_result(select_result_interceptor *new_result,
+                                       select_result_interceptor *old_result)
 {
   bool res= FALSE;
   for (SELECT_LEX *sl= first_select(); sl; sl= sl->next_select())
@@ -836,6 +969,7 @@ bool st_select_lex::cleanup()
   }
   non_agg_fields.empty();
   inner_refs_list.empty();
+  exclude_from_table_unique_test= FALSE;
   DBUG_RETURN(error);
 }
 
@@ -852,3 +986,27 @@ void st_select_lex::cleanup_all_joins(bool full)
     for (sl= unit->first_select(); sl; sl= sl->next_select())
       sl->cleanup_all_joins(full);
 }
+
+
+/**
+  Set exclude_from_table_unique_test for selects of this unit and all
+  underlying selects.
+
+  @note used to exclude materialized derived tables (views) from unique
+  table check.
+*/
+
+void st_select_lex_unit::set_unique_exclude()
+{
+  for (SELECT_LEX *sl= first_select(); sl; sl= sl->next_select())
+  {
+    sl->exclude_from_table_unique_test= TRUE;
+    for (SELECT_LEX_UNIT *unit= sl->first_inner_unit();
+         unit;
+         unit= unit->next_unit())
+    {
+      unit->set_unique_exclude();
+    }
+  }
+}
+
diff --git a/sql/sql_update.cc b/sql/sql_update.cc
index b3c001849b5..a921a87884e 100644
--- a/sql/sql_update.cc
+++ b/sql/sql_update.cc
@@ -221,7 +221,7 @@ int mysql_update(THD *thd,
   bool          need_reopen;
   ulonglong     id;
   List<Item> all_fields;
-  THD::killed_state killed_status= THD::NOT_KILLED;
+  killed_state killed_status= NOT_KILLED;
   DBUG_ENTER("mysql_update");
 
   for ( ; ; )
@@ -229,7 +229,11 @@ int mysql_update(THD *thd,
     if (open_tables(thd, &table_list, &table_count, 0))
       DBUG_RETURN(1);
 
-    if (table_list->multitable_view)
+     //Prepare views so they are handled correctly.
+    if (mysql_handle_derived(thd->lex, DT_INIT))
+      DBUG_RETURN(1);
+
+    if (table_list->is_multitable())
     {
       DBUG_ASSERT(table_list->view != 0);
       DBUG_PRINT("info", ("Switch to multi-update"));
@@ -245,14 +249,19 @@ int mysql_update(THD *thd,
     close_tables_for_reopen(thd, &table_list);
   }
 
-  if (mysql_handle_derived(thd->lex, &mysql_derived_prepare) ||
-      (thd->fill_derived_tables() &&
-       mysql_handle_derived(thd->lex, &mysql_derived_filling)))
+  if (table_list->handle_derived(thd->lex, DT_MERGE_FOR_INSERT))
+    DBUG_RETURN(1);
+  if (table_list->handle_derived(thd->lex, DT_PREPARE))
     DBUG_RETURN(1);
 
   thd_proc_info(thd, "init");
   table= table_list->table;
 
+  if (!table_list->single_table_updatable())
+  {
+    my_error(ER_NON_UPDATABLE_TABLE, MYF(0), table_list->alias, "UPDATE");
+    DBUG_RETURN(1);
+  }
   /* Calculate "table->covering_keys" based on the WHERE */
   table->covering_keys= table->s->keys_in_use;
   table->quick_keys.clear_all();
@@ -271,14 +280,17 @@ int mysql_update(THD *thd,
   table_list->grant.want_privilege= table->grant.want_privilege= want_privilege;
   table_list->register_want_access(want_privilege);
 #endif
+  /* 'Unfix' fields to allow correct marking by the setup_fields function. */
+  if (table_list->is_view())
+    unfix_fields(fields);
+
   if (setup_fields_with_no_wrap(thd, 0, fields, MARK_COLUMNS_WRITE, 0, 0))
     DBUG_RETURN(1);                     /* purecov: inspected */
   if (table_list->view && check_fields(thd, fields))
   {
     DBUG_RETURN(1);
   }
-  if (!table_list->single_table_updatable() ||
-      check_key_in_view(thd, table_list))
+  if (check_key_in_view(thd, table_list))
   {
     my_error(ER_NON_UPDATABLE_TABLE, MYF(0), table_list->alias, "UPDATE");
     DBUG_RETURN(1);
@@ -308,6 +320,10 @@ int mysql_update(THD *thd,
     DBUG_RETURN(1);				/* purecov: inspected */
   }
 
+  /* Apply the IN=>EXISTS transformation to all subqueries and optimize them. */
+  if (select_lex->optimize_unflattened_subqueries())
+    DBUG_RETURN(TRUE);
+
   if (select_lex->inner_refs_list.elements &&
     fix_inner_refs(thd, all_fields, select_lex, select_lex->ref_pointer_array))
     DBUG_RETURN(1);
@@ -772,9 +788,9 @@ int mysql_update(THD *thd,
   // simulated killing after the loop must be ineffective for binlogging
   DBUG_EXECUTE_IF("simulate_kill_bug27571",
                   {
-                    thd->killed= THD::KILL_QUERY;
+                    thd->killed= KILL_QUERY;
                   };);
-  error= (killed_status == THD::NOT_KILLED)?  error : 1;
+  error= (killed_status == NOT_KILLED)?  error : 1;
   
   if (error &&
       will_batch &&
@@ -835,7 +851,7 @@ int mysql_update(THD *thd,
       if (error < 0)
         thd->clear_error();
       else
-        errcode= query_error_code(thd, killed_status == THD::NOT_KILLED);
+        errcode= query_error_code(thd, killed_status == NOT_KILLED);
 
       if (thd->binlog_query(THD::ROW_QUERY_TYPE,
                             thd->query(), thd->query_length(),
@@ -866,6 +882,11 @@ int mysql_update(THD *thd,
   }
   thd->count_cuted_fields= CHECK_FIELD_IGNORE;		/* calc cuted fields */
   thd->abort_on_warning= 0;
+  if (thd->lex->current_select->first_cond_optimization)
+  {
+    thd->lex->current_select->save_leaf_tables(thd);
+    thd->lex->current_select->first_cond_optimization= 0;
+  }
   DBUG_RETURN((error >= 0 || thd->is_error()) ? 1 : 0);
 
 err:
@@ -926,8 +947,8 @@ bool mysql_prepare_update(THD *thd, TABLE_LIST *table_list,
   if (setup_tables_and_check_access(thd, &select_lex->context, 
                                     &select_lex->top_join_list,
                                     table_list,
-                                    &select_lex->leaf_tables,
-                                    FALSE, UPDATE_ACL, SELECT_ACL) ||
+                                    select_lex->leaf_tables,
+                                    FALSE, UPDATE_ACL, SELECT_ACL, TRUE) ||
       setup_conds(thd, table_list, select_lex->leaf_tables, conds) ||
       select_lex->setup_ref_array(thd, order_num) ||
       setup_order(thd, select_lex->ref_pointer_array,
@@ -964,8 +985,8 @@ static table_map get_table_map(List<Item> *items)
   Item_field *item;
   table_map map= 0;
 
-  while ((item= (Item_field *) item_it++)) 
-    map|= item->used_tables();
+  while ((item= (Item_field *) item_it++))
+    map|= item->all_used_tables();
   DBUG_PRINT("info", ("table_map: 0x%08lx", (long) map));
   return map;
 }
@@ -987,7 +1008,7 @@ int mysql_multi_update_prepare(THD *thd)
 {
   LEX *lex= thd->lex;
   TABLE_LIST *table_list= lex->query_tables;
-  TABLE_LIST *tl, *leaves;
+  TABLE_LIST *tl;
   List<Item> *fields= &lex->select_lex.item_list;
   table_map tables_for_update;
   bool update_view= 0;
@@ -1010,7 +1031,7 @@ reopen_tables:
   /* open tables and create derived ones, but do not lock and fill them */
   if (((original_multiupdate || need_reopen) &&
        open_tables(thd, &table_list, &table_count, 0)) ||
-      mysql_handle_derived(lex, &mysql_derived_prepare))
+      mysql_handle_derived(lex, DT_INIT))
     DBUG_RETURN(TRUE);
   /*
     setup_tables() need for VIEWs. JOIN::prepare() will call setup_tables()
@@ -1018,11 +1039,20 @@ reopen_tables:
     call in setup_tables()).
   */
 
+  //We need to merge for insert prior to prepare.
+  if (mysql_handle_derived(lex, DT_MERGE_FOR_INSERT))
+    DBUG_RETURN(TRUE);
+  if (mysql_handle_derived(lex, DT_PREPARE))
+    DBUG_RETURN(TRUE);
+
   if (setup_tables_and_check_access(thd, &lex->select_lex.context,
                                     &lex->select_lex.top_join_list,
                                     table_list,
-                                    &lex->select_lex.leaf_tables, FALSE,
-                                    UPDATE_ACL, SELECT_ACL))
+                                    lex->select_lex.leaf_tables, FALSE,
+                                    UPDATE_ACL, SELECT_ACL, FALSE))
+    DBUG_RETURN(TRUE);
+
+  if (lex->select_lex.handle_derived(thd->lex, DT_MERGE))  
     DBUG_RETURN(TRUE);
 
   if (setup_fields_with_no_wrap(thd, 0, *fields, MARK_COLUMNS_WRITE, 0, 0))
@@ -1047,8 +1077,8 @@ reopen_tables:
   /*
     Setup timestamp handling and locking mode
   */
-  leaves= lex->select_lex.leaf_tables;
-  for (tl= leaves; tl; tl= tl->next_leaf)
+  List_iterator<TABLE_LIST> ti(lex->select_lex.leaf_tables);
+  while ((tl= ti++))
   {
     TABLE *table= tl->table;
     /* Only set timestamp column if this is not modified */
@@ -1060,7 +1090,7 @@ reopen_tables:
     /* if table will be updated then check that it is unique */
     if (table->map & tables_for_update)
     {
-      if (!tl->updatable || check_key_in_view(thd, tl))
+      if (!tl->single_table_updatable() || check_key_in_view(thd, tl))
       {
         my_error(ER_NON_UPDATABLE_TABLE, MYF(0), tl->alias, "UPDATE");
         DBUG_RETURN(TRUE);
@@ -1090,7 +1120,7 @@ reopen_tables:
   for (tl= table_list; tl; tl= tl->next_local)
   {
     /* Check access privileges for table */
-    if (!tl->derived)
+    if (!tl->is_derived())
     {
       uint want_privilege= tl->updating ? UPDATE_ACL : SELECT_ACL;
       if (check_access(thd, want_privilege,
@@ -1104,7 +1134,7 @@ reopen_tables:
   /* check single table update for view compound from several tables */
   for (tl= table_list; tl; tl= tl->next_local)
   {
-    if (tl->effective_algorithm == VIEW_ALGORITHM_MERGE)
+    if (tl->is_merged_derived())
     {
       TABLE_LIST *for_update= 0;
       if (tl->check_single_table(&for_update, tables_for_update, tl))
@@ -1159,6 +1189,8 @@ reopen_tables:
       */
       unit->unclean();
     }
+    // Reset 'prepared' flags for all derived tables/views
+    mysql_handle_list_of_derived(thd->lex, table_list, DT_REINIT);
 
     /*
       Also we need to cleanup Natural_join_column::table_field items.
@@ -1181,7 +1213,8 @@ reopen_tables:
   */
   lex->select_lex.exclude_from_table_unique_test= TRUE;
   /* We only need SELECT privilege for columns in the values list */
-  for (tl= leaves; tl; tl= tl->next_leaf)
+  ti.rewind();
+  while ((tl= ti++))
   {
     TABLE *table= tl->table;
     TABLE_LIST *tlist;
@@ -1209,11 +1242,10 @@ reopen_tables:
     further check in multi_update::prepare whether to use record cache.
   */
   lex->select_lex.exclude_from_table_unique_test= FALSE;
- 
-  if (thd->fill_derived_tables() &&
-      mysql_handle_derived(lex, &mysql_derived_filling))
-    DBUG_RETURN(TRUE);
 
+  if (lex->select_lex.save_prep_leaf_tables(thd))
+    DBUG_RETURN(TRUE);
+ 
   DBUG_RETURN (FALSE);
 }
 
@@ -1236,7 +1268,7 @@ bool mysql_multi_update(THD *thd,
   DBUG_ENTER("mysql_multi_update");
 
   if (!(result= new multi_update(table_list,
-				 thd->lex->select_lex.leaf_tables,
+				 &thd->lex->select_lex.leaf_tables,
 				 fields, values,
 				 handle_duplicates, ignore)))
     DBUG_RETURN(TRUE);
@@ -1271,7 +1303,7 @@ bool mysql_multi_update(THD *thd,
 
 
 multi_update::multi_update(TABLE_LIST *table_list,
-			   TABLE_LIST *leaves_list,
+                           List<TABLE_LIST> *leaves_list,
 			   List<Item> *field_list, List<Item> *value_list,
 			   enum enum_duplicates handle_duplicates_arg,
                            bool ignore_arg)
@@ -1289,6 +1321,7 @@ multi_update::multi_update(TABLE_LIST *table_list,
 
 int multi_update::prepare(List<Item> &not_used_values,
 			  SELECT_LEX_UNIT *lex_unit)
+
 {
   TABLE_LIST *table_ref;
   SQL_I_List<TABLE_LIST> update;
@@ -1298,6 +1331,7 @@ int multi_update::prepare(List<Item> &not_used_values,
   List_iterator_fast<Item> value_it(*values);
   uint i, max_fields;
   uint leaf_table_count= 0;
+  List_iterator<TABLE_LIST> ti(*leaves);
   DBUG_ENTER("multi_update::prepare");
 
   thd->count_cuted_fields= CHECK_FIELD_WARN;
@@ -1317,7 +1351,7 @@ int multi_update::prepare(List<Item> &not_used_values,
     TABLE::tmp_set by pointing TABLE::read_set to it and then restore it after
     setup_fields().
   */
-  for (table_ref= leaves; table_ref; table_ref= table_ref->next_leaf)
+  while ((table_ref= ti++))
   {
     TABLE *table= table_ref->table;
     if (tables_to_update & table->map)
@@ -1335,7 +1369,8 @@ int multi_update::prepare(List<Item> &not_used_values,
 
   int error= setup_fields(thd, 0, *values, MARK_COLUMNS_READ, 0, 0);
 
-  for (table_ref= leaves; table_ref; table_ref= table_ref->next_leaf)
+  ti.rewind();
+  while ((table_ref= ti++))
   {
     TABLE *table= table_ref->table;
     if (tables_to_update & table->map)
@@ -1364,7 +1399,8 @@ int multi_update::prepare(List<Item> &not_used_values,
   */
 
   update.empty();
-  for (table_ref= leaves; table_ref; table_ref= table_ref->next_leaf)
+  ti.rewind();
+  while ((table_ref= ti++))
   {
     /* TODO: add support of view of join support */
     TABLE *table=table_ref->table;
@@ -1590,9 +1626,9 @@ loop_end:
     {
       table_map unupdated_tables= table_ref->check_option->used_tables() &
                                   ~first_table_for_update->map;
-      for (TABLE_LIST *tbl_ref =leaves;
-           unupdated_tables && tbl_ref;
-           tbl_ref= tbl_ref->next_leaf)
+      List_iterator<TABLE_LIST> ti(*leaves);
+      TABLE_LIST *tbl_ref;
+      while ((tbl_ref= ti++) && unupdated_tables)
       {
         if (unupdated_tables & tbl_ref->table->map)
           unupdated_tables&= ~tbl_ref->table->map;
@@ -1830,15 +1866,17 @@ int multi_update::send_data(List<Item> &not_used_values)
       /* Store regular updated fields in the row. */
       fill_record(thd,
                   tmp_table->field + 1 + unupdated_check_opt_tables.elements,
-                  *values_for_table[offset], 1);
+                  *values_for_table[offset], TRUE, FALSE);
 
       /* Write row, ignoring duplicated updates to a row */
-      error= tmp_table->file->ha_write_row(tmp_table->record[0]);
+      error= tmp_table->file->ha_write_tmp_row(tmp_table->record[0]);
       if (error != HA_ERR_FOUND_DUPP_KEY && error != HA_ERR_FOUND_DUPP_UNIQUE)
       {
         if (error &&
             create_internal_tmp_table_from_heap(thd, tmp_table,
-                                    tmp_table_param + offset, error, 1))
+                                         tmp_table_param[offset].start_recinfo,
+                                         &tmp_table_param[offset].recinfo,
+                                         error, 1))
         {
           do_update= 0;
           DBUG_RETURN(1);			// Not a table_is_full error
@@ -1899,7 +1937,7 @@ void multi_update::abort()
         got caught and if happens later the killed error is written
         into repl event.
       */
-      int errcode= query_error_code(thd, thd->killed == THD::NOT_KILLED);
+      int errcode= query_error_code(thd, thd->killed == NOT_KILLED);
       /* the error of binary logging is ignored */
       (void)thd->binlog_query(THD::ROW_QUERY_TYPE,
                               thd->query(), thd->query_length(),
@@ -2113,7 +2151,7 @@ bool multi_update::send_eof()
 {
   char buff[STRING_BUFFER_USUAL_SIZE];
   ulonglong id;
-  THD::killed_state killed_status= THD::NOT_KILLED;
+  killed_state killed_status= NOT_KILLED;
   DBUG_ENTER("multi_update::send_eof");
   thd_proc_info(thd, "updating reference tables");
 
@@ -2126,7 +2164,7 @@ bool multi_update::send_eof()
     if local_error is not set ON until after do_updates() then
     later carried out killing should not affect binlogging.
   */
-  killed_status= (local_error == 0)? THD::NOT_KILLED : thd->killed;
+  killed_status= (local_error == 0) ? NOT_KILLED : thd->killed;
   thd_proc_info(thd, "end");
 
   /* We must invalidate the query cache before binlog writing and
@@ -2155,7 +2193,7 @@ bool multi_update::send_eof()
       if (local_error == 0)
         thd->clear_error();
       else
-        errcode= query_error_code(thd, killed_status == THD::NOT_KILLED);
+        errcode= query_error_code(thd, killed_status == NOT_KILLED);
       if (thd->binlog_query(THD::ROW_QUERY_TYPE,
                             thd->query(), thd->query_length(),
                             transactional_tables, FALSE, errcode))
diff --git a/sql/sql_view.cc b/sql/sql_view.cc
index 0fe1b597d82..8760e936d36 100644
--- a/sql/sql_view.cc
+++ b/sql/sql_view.cc
@@ -248,7 +248,7 @@ fill_defined_view_parts (THD *thd, TABLE_LIST *view)
     view->definer.user= decoy.definer.user;
     lex->definer= &view->definer;
   }
-  if (lex->create_view_algorithm == VIEW_ALGORITHM_UNDEFINED)
+  if (lex->create_view_algorithm == DTYPE_ALGORITHM_UNDEFINED)
     lex->create_view_algorithm= (uint8) decoy.algorithm;
   if (lex->create_view_suid == VIEW_SUID_DEFAULT)
     lex->create_view_suid= decoy.view_suid ? 
@@ -843,7 +843,7 @@ static int mysql_register_view(THD *thd, TABLE_LIST *view,
     ulong sql_mode= thd->variables.sql_mode & MODE_ANSI_QUOTES;
     thd->variables.sql_mode&= ~MODE_ANSI_QUOTES;
 
-    lex->unit.print(&view_query, QT_ORDINARY);
+    lex->unit.print(&view_query, QT_VIEW_INTERNAL);
     lex->unit.print(&is_query, QT_IS);
 
     thd->variables.sql_mode|= sql_mode;
@@ -876,7 +876,7 @@ static int mysql_register_view(THD *thd, TABLE_LIST *view,
   {
     push_warning(thd, MYSQL_ERROR::WARN_LEVEL_WARN, ER_WARN_VIEW_MERGE,
                  ER(ER_WARN_VIEW_MERGE));
-    lex->create_view_algorithm= VIEW_ALGORITHM_UNDEFINED;
+    lex->create_view_algorithm= DTYPE_ALGORITHM_UNDEFINED;
   }
   view->algorithm= lex->create_view_algorithm;
   view->definer.user= lex->definer->user;
@@ -1491,7 +1491,7 @@ bool mysql_make_view(THD *thd, File_parser *parser, TABLE_LIST *table,
 
       List_iterator_fast<TABLE_LIST> ti(view_select->top_join_list);
 
-      table->effective_algorithm= VIEW_ALGORITHM_MERGE;
+      table->derived_type= VIEW_ALGORITHM_MERGE;
       DBUG_PRINT("info", ("algorithm: MERGE"));
       table->updatable= (table->updatable_view != 0);
       table->effective_with_check=
@@ -1505,74 +1505,31 @@ bool mysql_make_view(THD *thd, File_parser *parser, TABLE_LIST *table,
       /* prepare view context */
       lex->select_lex.context.resolve_in_table_list_only(view_main_select_tables);
       lex->select_lex.context.outer_context= 0;
-      lex->select_lex.context.select_lex= table->select_lex;
       lex->select_lex.select_n_having_items+=
         table->select_lex->select_n_having_items;
 
-      /*
-        Tables of the main select of the view should be marked as belonging
-        to the same select as original view (again we can use LEX::select_lex
-        for this purprose because we don't support MERGE algorithm for views
-        with unions).
-      */
-      for (tbl= lex->select_lex.get_table_list(); tbl; tbl= tbl->next_local)
-        tbl->select_lex= table->select_lex;
-
-      {
-        if (view_main_select_tables->next_local)
-        {
-          table->multitable_view= TRUE;
-          if (table->belong_to_view)
-           table->belong_to_view->multitable_view= TRUE;
-        }
-        /* make nested join structure for view tables */
-        NESTED_JOIN *nested_join;
-        if (!(nested_join= table->nested_join=
-              (NESTED_JOIN *) thd->calloc(sizeof(NESTED_JOIN))))
-          goto err;
-        nested_join->join_list= view_select->top_join_list;
-
-        /* re-nest tables of VIEW */
-        ti.rewind();
-        while ((tbl= ti++))
-        {
-          tbl->join_list= &nested_join->join_list;
-          tbl->embedding= table;
-        }
-      }
-
-      /* Store WHERE clause for post-processing in setup_underlying */
       table->where= view_select->where;
-      /*
-        Add subqueries units to SELECT into which we merging current view.
-        unit(->next)* chain starts with subqueries that are used by this
-        view and continues with subqueries that are used by other views.
-        We must not add any subquery twice (otherwise we'll form a loop),
-        to do this we remember in end_unit the first subquery that has
-        been already added.
-
-        NOTE: we do not support UNION here, so we take only one select
-      */
-      SELECT_LEX_NODE *end_unit= table->select_lex->slave;
-      SELECT_LEX_UNIT *next_unit;
-      for (SELECT_LEX_UNIT *unit= lex->select_lex.first_inner_unit();
-           unit;
-           unit= next_unit)
-      {
-        if (unit == end_unit)
-          break;
-        SELECT_LEX_NODE *save_slave= unit->slave;
-        next_unit= unit->next_unit();
-        unit->include_down(table->select_lex);
-        unit->slave= save_slave; // fix include_down initialisation
-      }
 
       /* 
         We can safely ignore the VIEW's ORDER BY if we merge into union 
         branch, as order is not important there.
       */
-      if (!table->select_lex->master_unit()->is_union())
+      if (!table->select_lex->master_unit()->is_union() &&
+          table->select_lex->order_list.elements == 0)
         table->select_lex->order_list.push_back(&lex->select_lex.order_list);
+      else
+      {
+        if (old_lex->sql_command == SQLCOM_SELECT &&
+            (old_lex->describe & DESCRIBE_EXTENDED) &&
+            lex->select_lex.order_list.elements &&
+            !table->select_lex->master_unit()->is_union())
+        {
+          push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_NOTE,
+                              ER_VIEW_ORDERBY_IGNORED,
+                              ER(ER_VIEW_ORDERBY_IGNORED),
+                              table->db, table->table_name);
+        }
+      }
       /*
 	This SELECT_LEX will be linked in global SELECT_LEX list
 	to make it processed by mysql_handle_derived(),
@@ -1582,23 +1539,22 @@ bool mysql_make_view(THD *thd, File_parser *parser, TABLE_LIST *table,
       goto ok;
     }
 
-    table->effective_algorithm= VIEW_ALGORITHM_TMPTABLE;
+    table->derived_type= VIEW_ALGORITHM_TMPTABLE;
     DBUG_PRINT("info", ("algorithm: TEMPORARY TABLE"));
     view_select->linkage= DERIVED_TABLE_TYPE;
     table->updatable= 0;
     table->effective_with_check= VIEW_CHECK_NONE;
     old_lex->subqueries= TRUE;
 
-    /* SELECT tree link */
-    lex->unit.include_down(table->select_lex);
-    lex->unit.slave= view_select; // fix include_down initialisation
-
     table->derived= &lex->unit;
   }
   else
     goto err;
 
 ok:
+  /* SELECT tree link */
+  lex->unit.include_down(table->select_lex);
+  lex->unit.slave= view_select; // fix include_down initialisation
   /* global SELECT list linking */
   end= view_select;	// primary SELECT_LEX is always last
   end->link_next= old_lex->all_selects_list;
@@ -1766,6 +1722,8 @@ frm_type_enum mysql_frm_type(THD *thd, char *path, enum legacy_db_type *dbt)
 
   *dbt= DB_TYPE_UNKNOWN;
 
+
+
   if ((file= my_open(path, O_RDONLY | O_SHARE, MYF(0))) < 0)
     DBUG_RETURN(FRMTYPE_ERROR);
   error= my_read(file, (uchar*) header, sizeof(header), MYF(MY_NABP));
diff --git a/sql/sql_yacc.yy b/sql/sql_yacc.yy
index 5f94bcd8bf3..084314ff23f 100644
--- a/sql/sql_yacc.yy
+++ b/sql/sql_yacc.yy
@@ -36,6 +36,7 @@
 #define YYINITDEPTH 100
 #define YYMAXDEPTH 3200                        /* Because of 64K stack */
 #define Lex (YYTHD->lex)
+
 #define Select Lex->current_select
 #include "mysql_priv.h"
 #include "slave.h"
@@ -673,6 +674,8 @@ static bool add_create_index (LEX *lex, Key::Keytype type, const char *name,
   sp_head *sphead;
   struct p_elem_val *p_elem_value;
   enum index_hint_type index_hint;
+  DYNCALL_CREATE_DEF *dyncol_def;
+  List<DYNCALL_CREATE_DEF> *dyncol_def_list;
 }
 
 %{
@@ -681,10 +684,10 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize);
 
 %pure_parser                                    /* We have threads */
 /*
-  Currently there are 169 shift/reduce conflicts.
+  Currently there are 174 shift/reduce conflicts.
   We should not introduce new conflicts any more.
 */
-%expect 169
+%expect 174
 
 /*
    Comments for TOKENS.
@@ -760,6 +763,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize);
 %token  CHANGED
 %token  CHARSET
 %token  CHAR_SYM                      /* SQL-2003-R */
+%token  CHECKPOINT_SYM
 %token  CHECKSUM_SYM
 %token  CHECK_SYM                     /* SQL-2003-R */
 %token  CIPHER_SYM
@@ -771,6 +775,12 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize);
 %token  COLLATE_SYM                   /* SQL-2003-R */
 %token  COLLATION_SYM                 /* SQL-2003-N */
 %token  COLUMNS
+%token  COLUMN_ADD_SYM
+%token  COLUMN_CREATE_SYM
+%token  COLUMN_DELETE_SYM
+%token  COLUMN_EXISTS_SYM
+%token  COLUMN_GET_SYM
+%token  COLUMN_LIST_SYM
 %token  COLUMN_SYM                    /* SQL-2003-R */
 %token  COMMENT_SYM
 %token  COMMITTED_SYM                 /* SQL-2003-N */
@@ -893,6 +903,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize);
 %token  GROUP_CONCAT_SYM
 %token  GT_SYM                        /* OPERATOR */
 %token  HANDLER_SYM
+%token  HARD_SYM
 %token  HASH_SYM
 %token  HAVING                        /* SQL-2003-R */
 %token  HELP_SYM
@@ -1043,6 +1054,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize);
 %token  ON                            /* SQL-2003-R */
 %token  ONE_SHOT_SYM
 %token  ONE_SYM
+%token  ONLINE_SYM
 %token  OPEN_SYM                      /* SQL-2003-R */
 %token  OPTIMIZE
 %token  OPTIONS_SYM
@@ -1160,6 +1172,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize);
 %token  SMALLINT                      /* SQL-2003-R */
 %token  SNAPSHOT_SYM
 %token  SOCKET_SYM
+%token  SOFT_SYM
 %token  SONAME_SYM
 %token  SOUNDS_SYM
 %token  SOURCE_SYM
@@ -1338,6 +1351,8 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize);
         opt_natural_language_mode opt_query_expansion
         opt_ev_status opt_ev_on_completion ev_on_completion opt_ev_comment
         ev_alter_on_schedule_completion opt_ev_rename_to opt_ev_sql_stmt
+        optional_flush_tables_arguments opt_dyncol_type dyncol_type
+        opt_time_precision kill_type kill_option int_num
 
 %type <ulong_num>
         ulong_num real_ulong_num merge_insert_types
@@ -1369,7 +1384,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize);
         function_call_keyword
         function_call_nonkeyword
         function_call_generic
-        function_call_conflict
+        function_call_conflict kill_expr
 
 %type <item_num>
         NUM_literal
@@ -1437,6 +1452,10 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize);
 
 %type <boolfunc2creator> comp_op
 
+%type <dyncol_def> dyncall_create_element
+
+%type <dyncol_def_list> dyncall_create_list
+
 %type <NONE>
         query verb_clause create change select do drop insert replace insert2
         insert_values update delete truncate rename
@@ -1930,7 +1949,7 @@ create:
         | CREATE
           {
             Lex->create_view_mode= VIEW_CREATE_NEW;
-            Lex->create_view_algorithm= VIEW_ALGORITHM_UNDEFINED;
+            Lex->create_view_algorithm= DTYPE_ALGORITHM_UNDEFINED;
             Lex->create_view_suid= TRUE;
           }
           view_or_trigger_or_sp_or_event
@@ -2072,7 +2091,7 @@ opt_ev_status:
 ev_starts:
           /* empty */
           {
-            Item *item= new (YYTHD->mem_root) Item_func_now_local();
+            Item *item= new (YYTHD->mem_root) Item_func_now_local(0);
             if (item == NULL)
               MYSQL_YYABORT;
             Lex->event_parse_data->item_starts= item;
@@ -5204,7 +5223,7 @@ type:
           { $$=MYSQL_TYPE_YEAR; }
         | DATE_SYM
           { $$=MYSQL_TYPE_DATE; }
-        | TIME_SYM
+        | TIME_SYM opt_field_length
           { $$=MYSQL_TYPE_TIME; }
         | TIMESTAMP opt_field_length
           {
@@ -5219,7 +5238,7 @@ type:
               $$=MYSQL_TYPE_TIMESTAMP;
             }
           }
-        | DATETIME
+        | DATETIME opt_field_length
           { $$=MYSQL_TYPE_DATETIME; }
         | TINYBLOB
           {
@@ -5412,9 +5431,9 @@ attribute:
           NULL_SYM { Lex->type&= ~ NOT_NULL_FLAG; }
         | not NULL_SYM { Lex->type|= NOT_NULL_FLAG; }
         | DEFAULT now_or_signed_literal { Lex->default_value=$2; }
-        | ON UPDATE_SYM NOW_SYM optional_braces
+        | ON UPDATE_SYM NOW_SYM opt_time_precision
           {
-            Item *item= new (YYTHD->mem_root) Item_func_now_local();
+            Item *item= new (YYTHD->mem_root) Item_func_now_local($4);
             if (item == NULL)
               MYSQL_YYABORT;
             Lex->on_update_value= item;
@@ -5484,9 +5503,9 @@ attribute:
         ;
 
 now_or_signed_literal:
-          NOW_SYM optional_braces
+          NOW_SYM opt_time_precision
           {
-            $$= new (YYTHD->mem_root) Item_func_now_local();
+            $$= new (YYTHD->mem_root) Item_func_now_local($2);
             if ($$ == NULL)
               MYSQL_YYABORT;
           }
@@ -5868,7 +5887,7 @@ string_list:
 */
 
 alter:
-          ALTER opt_ignore TABLE_SYM table_ident
+          ALTER alter_options TABLE_SYM table_ident
           {
             THD *thd= YYTHD;
             LEX *lex= thd->lex;
@@ -5981,7 +6000,7 @@ alter:
               my_error(ER_SP_BADSTATEMENT, MYF(0), "ALTER VIEW");
               MYSQL_YYABORT;
             }
-            lex->create_view_algorithm= VIEW_ALGORITHM_UNDEFINED;
+            lex->create_view_algorithm= DTYPE_ALGORITHM_UNDEFINED;
             lex->create_view_mode= VIEW_ALTER;
           }
           view_tail
@@ -6435,6 +6454,25 @@ opt_ignore:
         | IGNORE_SYM { Lex->ignore= 1;}
         ;
 
+alter_options:
+        { Lex->ignore= Lex->online= 0;} alter_options_part2
+	;
+	
+alter_options_part2:
+          /* empty */ 
+        | alter_option_list
+        ;
+
+alter_option_list:
+        alter_option_list alter_option
+        | alter_option
+        ;
+
+alter_option:
+	  IGNORE_SYM { Lex->ignore= 1;}
+        | ONLINE_SYM { Lex->online= 1;}
+
+
 opt_restrict:
           /* empty */ { Lex->drop_mode= DROP_DEFAULT; }
         | RESTRICT    { Lex->drop_mode= DROP_RESTRICT; }
@@ -7074,6 +7112,12 @@ select_alias:
         | TEXT_STRING_sys { $$=$1; }
         ;
 
+opt_time_precision:
+          /* empty */             { $$= 0;  }
+        | '(' ')'                 { $$= 0;  }
+        | '(' real_ulong_num ')'  { $$= $2; };
+        ;
+
 optional_braces:
           /* empty */ {}
         | '(' ')' {}
@@ -7135,7 +7179,7 @@ expr:
         | expr XOR expr %prec XOR
           {
             /* XOR is a proprietary extension */
-            $$ = new (YYTHD->mem_root) Item_cond_xor($1, $3);
+            $$ = new (YYTHD->mem_root) Item_func_xor($1, $3);
             if ($$ == NULL)
               MYSQL_YYABORT;
           }
@@ -7489,6 +7533,133 @@ all_or_any:
         | ANY_SYM { $$ = 0; }
         ;
 
+opt_dyncol_type:
+          /* empty */ 
+          {
+            LEX *lex= Lex;
+	    $$= DYN_COL_NULL; /* automatic type */
+            lex->charset= NULL;
+            lex->length= lex->dec= 0;
+	  }
+        | AS dyncol_type { $$= $2; }
+        ;
+
+dyncol_type:
+          INT_SYM
+          {
+            LEX *lex= Lex;
+            $$= DYN_COL_INT;
+            lex->charset= NULL;
+            lex->length= lex->dec= 0;
+          }
+        | UNSIGNED INT_SYM 
+          {
+            LEX *lex= Lex;
+            $$= DYN_COL_UINT;
+            lex->charset= NULL;
+            lex->length= lex->dec= 0;
+          }
+        | DOUBLE_SYM
+          {
+            LEX *lex= Lex;
+            $$= DYN_COL_DOUBLE;
+            lex->charset= NULL;
+            lex->length= lex->dec= 0;
+          }
+        | REAL
+          {
+            LEX *lex= Lex;
+            $$= DYN_COL_DOUBLE;
+            lex->charset= NULL;
+            lex->length= lex->dec= 0;
+          }
+        | FLOAT_SYM
+          {
+            LEX *lex= Lex;
+            $$= DYN_COL_DOUBLE;
+            lex->charset= NULL;
+            lex->length= lex->dec= 0;
+          }
+        | DECIMAL_SYM float_options
+          {
+            $$= DYN_COL_DECIMAL;
+            Lex->charset= NULL;
+          }
+        | char opt_binary
+          {
+            LEX *lex= Lex;
+            $$= DYN_COL_STRING;
+            lex->length= lex->dec= 0;
+          }
+        | nchar
+          {
+            LEX *lex= Lex;
+            $$= DYN_COL_STRING;
+            lex->charset= national_charset_info;
+            lex->length= lex->dec= 0;
+          }
+        | DATE_SYM
+          {
+            LEX *lex= Lex;
+            $$= DYN_COL_DATE;
+            lex->charset= NULL;
+            lex->length= lex->dec= 0;
+          }
+        | TIME_SYM opt_field_length
+          {
+            LEX *lex= Lex;
+            $$= DYN_COL_TIME;
+            lex->charset= NULL;
+            lex->dec= lex->length;
+            lex->length= 0;
+          }
+        | DATETIME opt_field_length
+          {
+            LEX *lex= Lex;
+            $$= DYN_COL_DATETIME;
+            lex->charset= NULL;
+            lex->dec= lex->length;
+            lex->length= 0;
+          }
+        ;
+
+dyncall_create_element:
+   expr ',' expr opt_dyncol_type
+   {
+     LEX *lex= Lex;
+     $$= (DYNCALL_CREATE_DEF *)
+       alloc_root(YYTHD->mem_root, sizeof(DYNCALL_CREATE_DEF));
+     if ($$ == NULL)
+       MYSQL_YYABORT;
+     $$->num= $1;
+     $$->value= $3;
+     $$->type= (DYNAMIC_COLUMN_TYPE)$4;
+     $$->cs= lex->charset;
+     if (lex->length)
+       $$->len= strtoul(lex->length, NULL, 10);
+     else
+       $$->len= 0;
+     if (lex->dec)
+       $$->frac= strtoul(lex->dec, NULL, 10);
+     else
+       $$->len= 0;
+   }
+
+dyncall_create_list:
+     dyncall_create_element
+       {
+         $$= new (YYTHD->mem_root) List<DYNCALL_CREATE_DEF>;
+         if ($$ == NULL)
+           MYSQL_YYABORT;
+         $$->push_back($1);
+       }
+   | dyncall_create_list ',' dyncall_create_element
+       {
+         $1->push_back($3);
+         $$= $1;
+       }
+   ;
+
 simple_expr:
           simple_ident
         | function_call_keyword
@@ -7752,13 +7923,13 @@ function_call_keyword:
           }
         | TIME_SYM '(' expr ')'
           {
-            $$= new (YYTHD->mem_root) Item_time_typecast($3);
+            $$= new (YYTHD->mem_root) Item_time_typecast($3, AUTO_SEC_PART_DIGITS);
             if ($$ == NULL)
               MYSQL_YYABORT;
           }
         | TIMESTAMP '(' expr ')'
           {
-            $$= new (YYTHD->mem_root) Item_datetime_typecast($3);
+            $$= new (YYTHD->mem_root) Item_datetime_typecast($3, AUTO_SEC_PART_DIGITS);
             if ($$ == NULL)
               MYSQL_YYABORT;
           }
@@ -7865,16 +8036,9 @@ function_call_nonkeyword:
               MYSQL_YYABORT;
             Lex->safe_to_cache_query=0;
           }
-        | CURTIME optional_braces
+        | CURTIME opt_time_precision
           {
-            $$= new (YYTHD->mem_root) Item_func_curtime_local();
-            if ($$ == NULL)
-              MYSQL_YYABORT;
-            Lex->safe_to_cache_query=0;
-          }
-        | CURTIME '(' expr ')'
-          {
-            $$= new (YYTHD->mem_root) Item_func_curtime_local($3);
+            $$= new (YYTHD->mem_root) Item_func_curtime_local($2);
             if ($$ == NULL)
               MYSQL_YYABORT;
             Lex->safe_to_cache_query=0;
@@ -7905,16 +8069,9 @@ function_call_nonkeyword:
             if ($$ == NULL)
               MYSQL_YYABORT;
           }
-        | NOW_SYM optional_braces
+        | NOW_SYM opt_time_precision
           {
-            $$= new (YYTHD->mem_root) Item_func_now_local();
-            if ($$ == NULL)
-              MYSQL_YYABORT;
-            Lex->safe_to_cache_query=0;
-          }
-        | NOW_SYM '(' expr ')'
-          {
-            $$= new (YYTHD->mem_root) Item_func_now_local($3);
+            $$= new (YYTHD->mem_root) Item_func_now_local($2);
             if ($$ == NULL)
               MYSQL_YYABORT;
             Lex->safe_to_cache_query=0;
@@ -7962,7 +8119,7 @@ function_call_nonkeyword:
             if ($$ == NULL)
               MYSQL_YYABORT;
           }
-        | SYSDATE optional_braces
+        | SYSDATE opt_time_precision
           {
             /*
               Unlike other time-related functions, SYSDATE() is
@@ -7973,19 +8130,9 @@ function_call_nonkeyword:
             */
             Lex->set_stmt_unsafe();
             if (global_system_variables.sysdate_is_now == 0)
-              $$= new (YYTHD->mem_root) Item_func_sysdate_local();
+              $$= new (YYTHD->mem_root) Item_func_sysdate_local($2);
             else
-              $$= new (YYTHD->mem_root) Item_func_now_local();
-            if ($$ == NULL)
-              MYSQL_YYABORT;
-            Lex->safe_to_cache_query=0;
-          }
-        | SYSDATE '(' expr ')'
-          {
-            if (global_system_variables.sysdate_is_now == 0)
-              $$= new (YYTHD->mem_root) Item_func_sysdate_local($3);
-            else
-              $$= new (YYTHD->mem_root) Item_func_now_local($3);
+              $$= new (YYTHD->mem_root) Item_func_now_local($2);
             if ($$ == NULL)
               MYSQL_YYABORT;
             Lex->safe_to_cache_query=0;
@@ -8009,20 +8156,65 @@ function_call_nonkeyword:
               MYSQL_YYABORT;
             Lex->safe_to_cache_query=0;
           }
-        | UTC_TIME_SYM optional_braces
+        | UTC_TIME_SYM opt_time_precision
           {
-            $$= new (YYTHD->mem_root) Item_func_curtime_utc();
+            $$= new (YYTHD->mem_root) Item_func_curtime_utc($2);
             if ($$ == NULL)
               MYSQL_YYABORT;
             Lex->safe_to_cache_query=0;
           }
-        | UTC_TIMESTAMP_SYM optional_braces
+        | UTC_TIMESTAMP_SYM opt_time_precision
           {
-            $$= new (YYTHD->mem_root) Item_func_now_utc();
+            $$= new (YYTHD->mem_root) Item_func_now_utc($2);
             if ($$ == NULL)
               MYSQL_YYABORT;
             Lex->safe_to_cache_query=0;
           }
+        |
+          COLUMN_ADD_SYM '(' expr ',' dyncall_create_list ')'
+          {
+            $$= create_func_dyncol_add(YYTHD, $3, *$5);
+            if ($$ == NULL)
+              MYSQL_YYABORT;
+          }
+        |
+          COLUMN_DELETE_SYM '(' expr ',' expr_list ')'
+          {
+            $$= create_func_dyncol_delete(YYTHD, $3, *$5);
+            if ($$ == NULL)
+              MYSQL_YYABORT;
+          }
+        |
+          COLUMN_EXISTS_SYM '(' expr ',' expr ')'
+          {
+            $$= new (YYTHD->mem_root) Item_func_dyncol_exists($3, $5);
+            if ($$ == NULL)
+              MYSQL_YYABORT;
+          }
+        |
+          COLUMN_LIST_SYM '(' expr ')'
+          {
+            $$= new (YYTHD->mem_root) Item_func_dyncol_list($3);
+            if ($$ == NULL)
+              MYSQL_YYABORT;
+          }
+        |
+          COLUMN_CREATE_SYM '(' dyncall_create_list ')'
+          {
+            $$= create_func_dyncol_create(YYTHD, *$3);
+            if ($$ == NULL)
+              MYSQL_YYABORT;
+          }
+        |
+          COLUMN_GET_SYM '(' expr ',' expr AS cast_type ')'
+          {
+            LEX *lex= Lex;
+            $$= create_func_dyncol_get(YYTHD, $3, $5, $7,
+                                        lex->length, lex->dec,
+                                        lex->charset);
+            if ($$ == NULL)
+              MYSQL_YYABORT;
+          }
         ;
 
 /*
@@ -8639,6 +8831,8 @@ cast_type:
           { $$=ITEM_CAST_CHAR; Lex->dec= 0; }
         | NCHAR_SYM opt_field_length
           { $$=ITEM_CAST_CHAR; Lex->charset= national_charset_info; Lex->dec=0; }
+        | INT_SYM
+          { $$=ITEM_CAST_SIGNED_INT; Lex->charset= NULL; Lex->dec=Lex->length= (char*)0; }
         | SIGNED_SYM
           { $$=ITEM_CAST_SIGNED_INT; Lex->charset= NULL; Lex->dec=Lex->length= (char*)0; }
         | SIGNED_SYM INT_SYM
@@ -8649,13 +8843,24 @@ cast_type:
           { $$=ITEM_CAST_UNSIGNED_INT; Lex->charset= NULL; Lex->dec=Lex->length= (char*)0; }
         | DATE_SYM
           { $$=ITEM_CAST_DATE; Lex->charset= NULL; Lex->dec=Lex->length= (char*)0; }
-        | TIME_SYM
-          { $$=ITEM_CAST_TIME; Lex->charset= NULL; Lex->dec=Lex->length= (char*)0; }
-        | DATETIME
-          { $$=ITEM_CAST_DATETIME; Lex->charset= NULL; Lex->dec=Lex->length= (char*)0; }
+        | TIME_SYM opt_field_length
+          {
+            $$=ITEM_CAST_TIME;
+            LEX *lex= Lex;
+            lex->charset= NULL; lex->dec= lex->length; lex->length= (char*)0;
+           }
+        | DATETIME opt_field_length
+          {
+            $$=ITEM_CAST_DATETIME;
+            LEX *lex= Lex;
+            lex->charset= NULL; lex->dec= lex->length; lex->length= (char*)0;
+           }
         | DECIMAL_SYM float_options
           { $$=ITEM_CAST_DECIMAL; Lex->charset= NULL; }
-        ;
+        | DOUBLE_SYM
+          { Lex->charset= NULL; Lex->length= Lex->dec= 0;}
+          opt_precision
+          { $$=ITEM_CAST_DOUBLE; }
 
 opt_expr_list:
           /* empty */ { $$= NULL; }
@@ -9491,6 +9696,12 @@ delete_limit_clause:
           }
         ;
 
+int_num:
+          NUM           { int error; $$= (int) my_strtoll10($1.str, (char**) 0, &error); }
+        | '-' NUM       { int error; $$= -(int) my_strtoll10($2.str, (char**) 0, &error); }
+        | '-' LONG_NUM  { int error; $$= -(int) my_strtoll10($2.str, (char**) 0, &error); }
+        ;
+
 ulong_num:
           NUM           { int error; $$= (ulong) my_strtoll10($1.str, (char**) 0, &error); }
         | HEX_NUM       { $$= (ulong) strtol($1.str, (char**) 0, 16); }
@@ -10804,8 +11015,8 @@ flush_option:
           table_or_tables
           { Lex->type|= REFRESH_TABLES; }
           opt_table_list {}
-        | TABLES WITH READ_SYM LOCK_SYM
-          { Lex->type|= REFRESH_TABLES | REFRESH_READ_LOCK; }
+        | TABLES WITH READ_SYM LOCK_SYM optional_flush_tables_arguments
+          { Lex->type|= REFRESH_TABLES | REFRESH_READ_LOCK | $5; }
         | QUERY_SYM CACHE_SYM
           { Lex->type|= REFRESH_QUERY_CACHE_FREE; }
         | HOSTS_SYM
@@ -10841,6 +11052,10 @@ opt_table_list:
         | table_list {}
         ;
 
+optional_flush_tables_arguments:
+          /* empty */        {$$= 0;}
+        | AND_SYM DISABLE_SYM CHECKPOINT_SYM {$$= REFRESH_CHECKPOINT; } 
+
 reset:
           RESET_SYM
           {
@@ -10894,19 +11109,41 @@ purge_option:
 /* kill threads */
 
 kill:
-          KILL_SYM kill_option expr
+          KILL_SYM
           {
             LEX *lex=Lex;
             lex->value_list.empty();
-            lex->value_list.push_front($3);
+            lex->users_list.empty();
             lex->sql_command= SQLCOM_KILL;
           }
+          kill_type kill_option kill_expr
+          {
+            Lex->kill_signal= (killed_state) ($3 | $4);
+          }
         ;
 
+kill_type:
+        /* Empty */    { $$= (int) KILL_HARD_BIT; }
+        | HARD_SYM     { $$= (int) KILL_HARD_BIT; }
+        | SOFT_SYM     { $$= 0; }
+
 kill_option:
-          /* empty */ { Lex->type= 0; }
-        | CONNECTION_SYM { Lex->type= 0; }
-        | QUERY_SYM      { Lex->type= ONLY_KILL_QUERY; }
+          /* empty */    { $$= (int) KILL_CONNECTION; }
+        | CONNECTION_SYM { $$= (int) KILL_CONNECTION; }
+        | QUERY_SYM      { $$= (int) KILL_QUERY; }
+        ;
+
+kill_expr:
+        expr
+        {
+          Lex->value_list.push_front($$);
+          Lex->kill_type= KILL_TYPE_ID;
+         }
+        | USER user
+          {
+            Lex->users_list.push_back($2);
+            Lex->kill_type= KILL_TYPE_USER;
+          }
         ;
 
 /* change database */
@@ -11879,7 +12116,14 @@ keyword:
         | CACHE_SYM             {}
         | CHARSET               {}
         | CHECKSUM_SYM          {}
+        | CHECKPOINT_SYM        {}
         | CLOSE_SYM             {}
+        | COLUMN_ADD_SYM        {}
+        | COLUMN_CREATE_SYM     {}
+        | COLUMN_DELETE_SYM     {}
+        | COLUMN_EXISTS_SYM     {}
+        | COLUMN_GET_SYM        {}
+        | COLUMN_LIST_SYM       {}
         | COMMENT_SYM           {}
         | COMMIT_SYM            {}
         | CONTAINS_SYM          {}
@@ -12014,6 +12258,7 @@ keyword_sp:
         | GRANTS                   {}
         | GLOBAL_SYM               {}
         | HASH_SYM                 {}
+        | HARD_SYM                 {}
         | HOSTS_SYM                {}
         | HOUR_SYM                 {}
         | IDENTIFIED_SYM           {}
@@ -12090,6 +12335,7 @@ keyword_sp:
         | OLD_PASSWORD             {}
         | ONE_SHOT_SYM             {}
         | ONE_SYM                  {}
+        | ONLINE_SYM               {}
         | PACK_KEYS_SYM            {}
         | PAGE_SYM                 {}
         | PARTIAL                  {}
@@ -12144,6 +12390,7 @@ keyword_sp:
         | SHUTDOWN                 {}
         | SLOW_SYM                 {}
         | SNAPSHOT_SYM             {}
+        | SOFT_SYM                 {}
         | SOUNDS_SYM               {}
         | SOURCE_SYM               {}
         | SQL_CACHE_SYM            {}
@@ -13213,7 +13460,7 @@ grant_option:
             lex->mqh.conn_per_hour= $2;
             lex->mqh.specified_limits|= USER_RESOURCES::CONNECTIONS_PER_HOUR;
           }
-        | MAX_USER_CONNECTIONS_SYM ulong_num
+        | MAX_USER_CONNECTIONS_SYM int_num
           {
             LEX *lex=Lex;
             lex->mqh.user_conn= $2;
@@ -13545,7 +13792,7 @@ view_replace:
 
 view_algorithm:
           ALGORITHM_SYM EQ UNDEFINED_SYM
-          { Lex->create_view_algorithm= VIEW_ALGORITHM_UNDEFINED; }
+          { Lex->create_view_algorithm= DTYPE_ALGORITHM_UNDEFINED; }
         | ALGORITHM_SYM EQ MERGE_SYM
           { Lex->create_view_algorithm= VIEW_ALGORITHM_MERGE; }
         | ALGORITHM_SYM EQ TEMPTABLE_SYM
diff --git a/sql/structs.h b/sql/structs.h
index ee6a2ffdcfd..c2bdf6db747 100644
--- a/sql/structs.h
+++ b/sql/structs.h
@@ -56,7 +56,8 @@ typedef struct st_key_part_info {	/* Info about a key part */
   Field *field;
   uint	offset;				/* offset in record (from 0) */
   uint	null_offset;			/* Offset to null_bit in record */
-  uint16 length;                        /* Length of keypart value in bytes */
+  /* Length of key part in bytes, excluding NULL flag and length bytes */
+  uint16 length;
   /* 
     Number of bytes required to store the keypart value. This may be
     different from the "length" field as it also counts
@@ -78,7 +79,6 @@ typedef struct st_key {
   uint	key_length;			/* Tot length of key */
   ulong flags;                          /* dupp key and pack flags */
   uint	key_parts;			/* How many key_parts */
-  uint  extra_length;
   uint	usable_key_parts;		/* Should normally be = key_parts */
   uint  block_size;
   uint  name_length;
@@ -130,6 +130,21 @@ class SQL_SELECT;
 class THD;
 class handler;
 struct st_join_table;
+class Copy_field;
+
+/**
+  A context for reading through a single table using a chosen access method:
+  index read, scan, etc, use of cache, etc.
+
+  Use by:
+  READ_RECORD read_record;
+  init_read_record(&read_record, ...);
+  while (read_record.read_record())
+  {
+    ...
+  }
+  end_read_record();
+*/
 
 void rr_unlock_row(st_join_table *tab);
 
@@ -137,7 +152,7 @@ struct READ_RECORD {			/* Parameter to read_record */
   typedef int (*Read_func)(READ_RECORD*);
   typedef void (*Unlock_row_func)(st_join_table *);
   struct st_table *table;			/* Head-form */
-  handler *file;
+ // handler *file_;
   struct st_table **forms;			/* head and ref forms */
 
   Read_func read_record;
@@ -153,6 +168,12 @@ struct READ_RECORD {			/* Parameter to read_record */
   uchar	*cache,*cache_pos,*cache_end,*read_positions;
   IO_CACHE *io_cache;
   bool print_error, ignore_not_found_rows;
+  /* 
+    SJ-Materialization runtime may need to read fields from the materialized
+    table and unpack them into original table fields:
+  */
+  Copy_field *copy_field;
+  Copy_field *copy_field_end;
 };
 
 
@@ -204,8 +225,11 @@ typedef struct user_resources {
   uint updates;
   /* Maximum number of connections established per hour. */
   uint conn_per_hour;
-  /* Maximum number of concurrent connections. */
-  uint user_conn;
+  /*
+    Maximum number of concurrent connections. If -1 then no new
+    connections allowed
+  */
+  int user_conn;
   /*
      Values of this enum and specified_limits member are used by the
      parser to store which user limits were specified in GRANT statement.
@@ -238,7 +262,7 @@ typedef struct  user_conn {
   /* Total length of the key. */
   uint len;
   /* Current amount of concurrent connections for this account. */
-  uint connections;
+  int connections;
   /*
      Current number of connections per hour, number of updating statements
      per hour and total number of statements per hour for this account.
diff --git a/sql/table.cc b/sql/table.cc
index 5bcc85e3cc8..9174168bd68 100644
--- a/sql/table.cc
+++ b/sql/table.cc
@@ -24,6 +24,8 @@
 #include "create_options.h"
 #include <m_ctype.h>
 #include "my_md5.h"
+#include "my_bit.h"
+#include "sql_select.h"
 
 /* INFORMATION_SCHEMA name */
 LEX_STRING INFORMATION_SCHEMA_NAME= {C_STRING_WITH_LEN("information_schema")};
@@ -776,8 +778,6 @@ static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head,
   share->db_record_offset= 1;
   if (db_create_options & HA_OPTION_LONG_BLOB_PTR)
     share->blob_ptr_size= portable_sizeof_char_ptr;
-  /* Set temporarily a good value for db_low_byte_first */
-  share->db_low_byte_first= test(legacy_db_type != DB_TYPE_ISAM);
   error=4;
   share->max_rows= uint4korr(head+18);
   share->min_rows= uint4korr(head+22);
@@ -1493,7 +1493,6 @@ static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head,
           key_part->null_bit= field->null_bit;
           key_part->store_length+=HA_KEY_NULL_LENGTH;
           keyinfo->flags|=HA_NULL_PART_KEY;
-          keyinfo->extra_length+= HA_KEY_NULL_LENGTH;
           keyinfo->key_length+= HA_KEY_NULL_LENGTH;
         }
         if (field->type() == MYSQL_TYPE_BLOB ||
@@ -1505,7 +1504,6 @@ static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head,
             key_part->key_part_flag|= HA_BLOB_PART;
           else
             key_part->key_part_flag|= HA_VAR_LENGTH_PART;
-          keyinfo->extra_length+=HA_KEY_BLOB_LENGTH;
           key_part->store_length+=HA_KEY_BLOB_LENGTH;
           keyinfo->key_length+= HA_KEY_BLOB_LENGTH;
         }
@@ -1578,19 +1576,11 @@ static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head,
                                 share->table_name.str,
                                 share->table_name.str);
             share->crashed= 1;                // Marker for CHECK TABLE
-            goto to_be_deleted;
+            continue;
           }
 #endif
           key_part->key_part_flag|= HA_PART_KEY_SEG;
         }
-
-	to_be_deleted:
-
-        /*
-          If the field can be NULL, don't optimize away the test
-          key_part_column = expression from the WHERE clause
-          as we need to test for NULL = NULL.
-        */
         if (field->real_maybe_null())
           key_part->key_part_flag|= HA_NULL_PART;
         /*
@@ -1705,7 +1695,6 @@ static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head,
   share->can_cmp_whole_record= (share->blob_fields == 0 &&
                                 share->varchar_fields == 0);
 
-  share->db_low_byte_first= handler_file->low_byte_first();
   share->column_bitmap_size= bitmap_buffer_size(share->fields);
 
   if (!(bitmaps= (my_bitmap_map*) alloc_root(&share->mem_root,
@@ -1852,7 +1841,9 @@ bool fix_vcol_expr(THD *thd,
   save_use_only_table_context= thd->lex->use_only_table_context;
   thd->lex->use_only_table_context= TRUE;
   /* Fix fields referenced to by the virtual column function */
+  thd->lex->context_analysis_only|= CONTEXT_ANALYSIS_ONLY_VCOL_EXPR;
   error= func_expr->fix_fields(thd, (Item**)0);
+  thd->lex->context_analysis_only&= ~CONTEXT_ANALYSIS_ONLY_VCOL_EXPR;
   /* Restore the original context*/
   thd->lex->use_only_table_context= save_use_only_table_context;
   context->table_list= save_table_list;
@@ -2003,7 +1994,12 @@ bool unpack_vcol_info_from_frm(THD *thd,
   vcol_arena= table->expr_arena;
   if (!vcol_arena)
   {
-    Query_arena expr_arena(&table->mem_root, Query_arena::INITIALIZED);
+    /*
+      We need to use CONVENTIONAL_EXECUTION here to ensure that
+      any new items created by fix_fields() are not reverted.
+    */
+    Query_arena expr_arena(&table->mem_root,
+                           Query_arena::CONVENTIONAL_EXECUTION);
     if (!(vcol_arena= (Query_arena *) alloc_root(&table->mem_root,
                                                  sizeof(Query_arena))))
       goto err;
@@ -2351,7 +2347,7 @@ partititon_err:
   /* Allocate bitmaps */
 
   bitmap_size= share->column_bitmap_size;
-  if (!(bitmaps= (uchar*) alloc_root(&outparam->mem_root, bitmap_size*4)))
+  if (!(bitmaps= (uchar*) alloc_root(&outparam->mem_root, bitmap_size*5)))
     goto err;
   bitmap_init(&outparam->def_read_set,
               (my_bitmap_map*) bitmaps, share->fields, FALSE);
@@ -2361,6 +2357,8 @@ partititon_err:
               (my_bitmap_map*) (bitmaps+bitmap_size*2), share->fields, FALSE);
   bitmap_init(&outparam->tmp_set,
               (my_bitmap_map*) (bitmaps+bitmap_size*3), share->fields, FALSE);
+  bitmap_init(&outparam->eq_join_set,
+              (my_bitmap_map*) (bitmaps+bitmap_size*4), share->fields, FALSE);
   outparam->default_column_bitmaps();
 
   /* The table struct is now initialized;  Open the table */
@@ -2463,7 +2461,7 @@ int closefrm(register TABLE *table, bool free_share)
   {
     if (table->s->deleting)
       table->file->extra(HA_EXTRA_PREPARE_FOR_DROP);
-    error=table->file->close();
+    error=table->file->ha_close();
   }
   table->alias.free();
   if (table->expr_arena)
@@ -2946,7 +2944,7 @@ File create_frm(THD *thd, const char *name, const char *db,
   if (create_info->options & HA_LEX_CREATE_TMP_TABLE)
     create_flags|= O_EXCL | O_NOFOLLOW;
 
-  /* Fix this when we have new .frm files;  Current limit is 4G rows (QQ) */
+  /* Fix this when we have new .frm files;  Current limit is 4G rows (TODO) */
   if (create_info->max_rows > UINT_MAX32)
     create_info->max_rows= UINT_MAX32;
   if (create_info->min_rows > UINT_MAX32)
@@ -3516,129 +3514,112 @@ void  TABLE_LIST::calc_md5(char *buffer)
 
 
 /**
-   @brief Set underlying table for table place holder of view.
-
-   @details
-
-   Replace all views that only use one table with the table itself.  This
-   allows us to treat the view as a simple table and even update it (it is a
-   kind of optimization).
+  @brief
+  Create field translation for mergeable derived table/view.
 
-   @note 
+  @param thd  Thread handle
 
-   This optimization is potentially dangerous as it makes views
-   masquerade as base tables: Views don't have the pointer TABLE_LIST::table
-   set to non-@c NULL.
+  @details
+  Create field translation for mergeable derived table/view.
 
-   We may have the case where a view accesses tables not normally accessible
-   in the current Security_context (only in the definer's
-   Security_context). According to the table's GRANT_INFO (TABLE::grant),
-   access is fulfilled, but this is implicitly meant in the definer's security
-   context. Hence we must never look at only a TABLE's GRANT_INFO without
-   looking at the one of the referring TABLE_LIST.
+  @return FALSE ok.
+  @return TRUE an error occur.
 */
 
-void TABLE_LIST::set_underlying_merge()
+bool TABLE_LIST::create_field_translation(THD *thd)
 {
-  TABLE_LIST *tbl;
+  Item *item;
+  Field_translator *transl;
+  SELECT_LEX *select= get_single_select();
+  List_iterator_fast<Item> it(select->item_list);
+  uint field_count= 0;
+  Query_arena *arena= thd->stmt_arena, backup;
+  bool res= FALSE;
 
-  if ((tbl= merge_underlying_list))
+  used_items.empty();
+
+  if (field_translation)
   {
-    /* This is a view. Process all tables of view */
-    DBUG_ASSERT(view && effective_algorithm == VIEW_ALGORITHM_MERGE);
-    do
+    /*
+      Update items in the field translation aftet view have been prepared.
+      It's needed because some items in the select list, like IN subselects,
+      might be substituted for optimized ones.
+    */
+    if (is_view() && get_unit()->prepared && !field_translation_updated)
     {
-      if (tbl->merge_underlying_list)          // This is a view
+      while ((item= it++))
       {
-        DBUG_ASSERT(tbl->view &&
-                    tbl->effective_algorithm == VIEW_ALGORITHM_MERGE);
-        /*
-          This is the only case where set_ancestor is called on an object
-          that may not be a view (in which case ancestor is 0)
-        */
-        tbl->merge_underlying_list->set_underlying_merge();
+        field_translation[field_count++].item= item;
       }
-    } while ((tbl= tbl->next_local));
-
-    if (!multitable_view)
-    {
-      table= merge_underlying_list->table;
-      schema_table= merge_underlying_list->schema_table;
+      field_translation_updated= TRUE;
     }
+
+    return FALSE;
   }
+
+  if (arena->is_conventional())
+    arena= 0;                                   // For easier test
+  else
+    thd->set_n_backup_active_arena(arena, &backup);
+
+  /* Create view fields translation table */
+
+  if (!(transl=
+        (Field_translator*)(thd->stmt_arena->
+                            alloc(select->item_list.elements *
+                                  sizeof(Field_translator)))))
+  {
+    res= TRUE;
+    goto exit;
+  }
+
+  while ((item= it++))
+  {
+    transl[field_count].name= item->name;
+    transl[field_count++].item= item;
+  }
+  field_translation= transl;
+  field_translation_end= transl + field_count;
+
+exit:
+  if (arena)
+    thd->restore_active_arena(arena, &backup);
+
+  return res;
 }
 
 
-/*
-  setup fields of placeholder of merged VIEW
+/**
+  @brief
+  Create field translation for mergeable derived table/view.
 
-  SYNOPSIS
-    TABLE_LIST::setup_underlying()
-    thd		    - thread handler
+  @param thd  Thread handle
 
-  DESCRIPTION
-    It is:
-    - preparing translation table for view columns
-    If there are underlying view(s) procedure first will be called for them.
+  @details
+  Create field translation for mergeable derived table/view.
 
-  RETURN
-    FALSE - OK
-    TRUE  - error
+  @return FALSE ok.
+  @return TRUE an error occur.
 */
 
 bool TABLE_LIST::setup_underlying(THD *thd)
 {
   DBUG_ENTER("TABLE_LIST::setup_underlying");
 
-  if (!field_translation && merge_underlying_list)
+  if (!view || (!field_translation && merge_underlying_list))
   {
-    Field_translator *transl;
-    SELECT_LEX *select= &view->select_lex;
-    Item *item;
-    TABLE_LIST *tbl;
-    List_iterator_fast<Item> it(select->item_list);
-    uint field_count= 0;
-
-    if (check_stack_overrun(thd, STACK_MIN_SIZE, (uchar*) &field_count))
-    {
-      DBUG_RETURN(TRUE);
-    }
-
-    for (tbl= merge_underlying_list; tbl; tbl= tbl->next_local)
-    {
-      if (tbl->merge_underlying_list &&
-          tbl->setup_underlying(thd))
-      {
-        DBUG_RETURN(TRUE);
-      }
-    }
-
-    /* Create view fields translation table */
-
-    if (!(transl=
-          (Field_translator*)(thd->stmt_arena->
-                              alloc(select->item_list.elements *
-                                    sizeof(Field_translator)))))
-    {
+    SELECT_LEX *select= get_single_select();
+    
+    if (create_field_translation(thd))
       DBUG_RETURN(TRUE);
-    }
-
-    while ((item= it++))
-    {
-      transl[field_count].name= item->name;
-      transl[field_count++].item= item;
-    }
-    field_translation= transl;
-    field_translation_end= transl + field_count;
-    /* TODO: use hash for big number of fields */
 
     /* full text function moving to current select */
-    if (view->select_lex.ftfunc_list->elements)
+    if (select->ftfunc_list->elements)
     {
       Item_func_match *ifm;
       SELECT_LEX *current_select= thd->lex->current_select;
       List_iterator_fast<Item_func_match>
-        li(*(view->select_lex.ftfunc_list));
+        li(*(select_lex->ftfunc_list));
       while ((ifm= li++))
         current_select->ftfunc_list->push_front(ifm);
     }
@@ -3648,7 +3629,7 @@ bool TABLE_LIST::setup_underlying(THD *thd)
 
 
 /*
-  Prepare where expression of view
+   Prepare where expression of derived table/view
 
   SYNOPSIS
     TABLE_LIST::prep_where()
@@ -3672,7 +3653,8 @@ bool TABLE_LIST::prep_where(THD *thd, Item **conds,
 
   for (TABLE_LIST *tbl= merge_underlying_list; tbl; tbl= tbl->next_local)
   {
-    if (tbl->view && tbl->prep_where(thd, conds, no_where_clause))
+    if (tbl->is_view_or_derived() &&
+        tbl->prep_where(thd, conds, no_where_clause))
     {
       DBUG_RETURN(TRUE);
     }
@@ -3680,6 +3662,8 @@ bool TABLE_LIST::prep_where(THD *thd, Item **conds,
 
   if (where)
   {
+    if (where->fixed)
+      where->update_used_tables();
     if (!where->fixed && where->fix_fields(thd, &where))
     {
       DBUG_RETURN(TRUE);
@@ -3712,7 +3696,13 @@ bool TABLE_LIST::prep_where(THD *thd, Item **conds,
         }
       }
       if (tbl == 0)
+      {
+        if (*conds && !(*conds)->fixed)
+	  (*conds)->fix_fields(thd, conds);
         *conds= and_conds(*conds, where->copy_andor_structure(thd));
+        if (*conds && !(*conds)->fixed)
+          (*conds)->fix_fields(thd, conds);        
+      }
       if (arena)
         thd->restore_active_arena(arena, &backup);
       where_processed= TRUE;
@@ -3773,10 +3763,11 @@ merge_on_conds(THD *thd, TABLE_LIST *table, bool is_cascaded)
   DBUG_PRINT("info", ("alias: %s", table->alias));
   if (table->on_expr)
     cond= table->on_expr->copy_andor_structure(thd);
-  if (!table->nested_join)
+  if (!table->view)
     DBUG_RETURN(cond);
-  List_iterator<TABLE_LIST> li(table->nested_join->join_list);
-  while (TABLE_LIST *tbl= li++)
+  for (TABLE_LIST *tbl= (TABLE_LIST*)table->view->select_lex.table_list.first;
+       tbl;
+       tbl= tbl->next_local)
   {
     if (tbl->view && !is_cascaded)
       continue;
@@ -3816,7 +3807,7 @@ bool TABLE_LIST::prep_check_option(THD *thd, uint8 check_opt_type)
 {
   DBUG_ENTER("TABLE_LIST::prep_check_option");
   bool is_cascaded= check_opt_type == VIEW_CHECK_CASCADED;
-
+  TABLE_LIST *merge_underlying_list= view->select_lex.get_table_list();
   for (TABLE_LIST *tbl= merge_underlying_list; tbl; tbl= tbl->next_local)
   {
     /* see comment of check_opt_type parameter */
@@ -3833,7 +3824,6 @@ bool TABLE_LIST::prep_check_option(THD *thd, uint8 check_opt_type)
 
     if (where)
     {
-      DBUG_ASSERT(where->fixed);
       check_option= where->copy_andor_structure(thd);
     }
     if (is_cascaded)
@@ -3929,10 +3919,14 @@ void TABLE_LIST::hide_view_error(THD *thd)
 TABLE_LIST *TABLE_LIST::find_underlying_table(TABLE *table_to_find)
 {
   /* is this real table and table which we are looking for? */
-  if (table == table_to_find && merge_underlying_list == 0)
+  if (table == table_to_find && view == 0)
     return this;
+  if (!view)
+    return 0;
 
-  for (TABLE_LIST *tbl= merge_underlying_list; tbl; tbl= tbl->next_local)
+  for (TABLE_LIST *tbl= view->select_lex.get_table_list();
+       tbl;
+       tbl= tbl->next_local)
   {
     TABLE_LIST *result;
     if ((result= tbl->find_underlying_table(table_to_find)))
@@ -4014,7 +4008,12 @@ bool TABLE_LIST::check_single_table(TABLE_LIST **table_arg,
                                        table_map map,
                                        TABLE_LIST *view_arg)
 {
-  for (TABLE_LIST *tbl= merge_underlying_list; tbl; tbl= tbl->next_local)
+  if (!select_lex)
+    return FALSE;
+  DBUG_ASSERT(is_merged_derived());
+  for (TABLE_LIST *tbl= get_single_select()->get_table_list();
+       tbl;
+       tbl= tbl->next_local)
   {
     if (tbl->table)
     {
@@ -4056,8 +4055,10 @@ bool TABLE_LIST::set_insert_values(MEM_ROOT *mem_root)
   }
   else
   {
-    DBUG_ASSERT(view && merge_underlying_list);
-    for (TABLE_LIST *tbl= merge_underlying_list; tbl; tbl= tbl->next_local)
+    DBUG_ASSERT(is_view_or_derived() && is_merged_derived());
+    for (TABLE_LIST *tbl= (TABLE_LIST*)view->select_lex.table_list.first;
+         tbl;
+         tbl= tbl->next_local)
       if (tbl->set_insert_values(mem_root))
         return TRUE;
   }
@@ -4083,7 +4084,7 @@ bool TABLE_LIST::set_insert_values(MEM_ROOT *mem_root)
 */
 bool TABLE_LIST::is_leaf_for_name_resolution()
 {
-  return (view || is_natural_join || is_join_columns_complete ||
+  return (is_merged_derived() || is_natural_join || is_join_columns_complete ||
           !nested_join);
 }
 
@@ -4221,7 +4222,11 @@ void TABLE_LIST::register_want_access(ulong want_access)
     if (table)
       table->grant.want_privilege= want_access;
   }
-  for (TABLE_LIST *tbl= merge_underlying_list; tbl; tbl= tbl->next_local)
+  if (!view)
+    return;
+  for (TABLE_LIST *tbl= view->select_lex.get_table_list();
+       tbl;
+       tbl= tbl->next_local)
     tbl->register_want_access(want_access);
 }
 
@@ -4373,6 +4378,36 @@ bool TABLE_LIST::prepare_security(THD *thd)
   DBUG_RETURN(FALSE);
 }
 
+#ifndef DBUG_OFF
+void TABLE_LIST::set_check_merged()
+{
+  DBUG_ASSERT(derived);
+  /*
+    It is not simple to check all, but at least this should be checked:
+    this select is not excluded or the exclusion came from above.
+  */
+  DBUG_ASSERT(!derived->first_select()->exclude_from_table_unique_test ||
+              derived->outer_select()->
+              exclude_from_table_unique_test);
+}
+#endif
+
+void TABLE_LIST::set_check_materialized()
+{
+  DBUG_ASSERT(derived);
+  if (!derived->first_select()->exclude_from_table_unique_test)
+    derived->set_unique_exclude();
+  else
+  {
+    /*
+      The subtree should be already excluded
+    */
+    DBUG_ASSERT(!derived->first_select()->first_inner_unit() ||
+                derived->first_select()->first_inner_unit()->first_select()->
+                exclude_from_table_unique_test);
+  }
+}
+
 
 Natural_join_column::Natural_join_column(Field_translator *field_param,
                                          TABLE_LIST *tab)
@@ -4451,14 +4486,23 @@ const char *Natural_join_column::db_name()
   DBUG_ASSERT(!strcmp(table_ref->db,
                       table_ref->table->s->db.str) ||
               (table_ref->schema_table &&
-               table_ref->table->s->db.str[0] == 0));
+               table_ref->table->s->db.str[0] == 0) ||
+               table_ref->is_materialized_derived());
   return table_ref->db;
 }
 
 
 GRANT_INFO *Natural_join_column::grant()
 {
-  if (view_field)
+/*  if (view_field)
+    return &(table_ref->grant);
+  return &(table_ref->table->grant);*/
+  /*
+    Have to check algorithm because merged derived also has
+    field_translation.
+  */
+//if (table_ref->effective_algorithm == DTYPE_ALGORITHM_MERGE)
+  if (table_ref->is_merged_derived())
     return &(table_ref->grant);
   return &(table_ref->table->grant);
 }
@@ -4540,7 +4584,17 @@ Item *create_view_field(THD *thd, TABLE_LIST *view, Item **field_ref,
   {
     DBUG_RETURN(field);
   }
-  Item *item= new Item_direct_view_ref(view, field_ref, name);
+  Item *item= new Item_direct_view_ref(&view->view->select_lex.context,
+                                       field_ref, view->alias,
+                                       name, view);
+  /*
+    Force creation of nullable item for the result tmp table for outer joined
+    views/derived tables.
+  */
+  if (view->table && view->table->maybe_null)
+    item->maybe_null= TRUE;
+  /* Save item in case we will need to fall back to materialization. */
+  view->used_items.push_back(item);
   DBUG_RETURN(item);
 }
 
@@ -4594,8 +4648,7 @@ void Field_iterator_table_ref::set_field_iterator()
   /* This is a merge view, so use field_translation. */
   else if (table_ref->field_translation)
   {
-    DBUG_ASSERT(table_ref->view &&
-                table_ref->effective_algorithm == VIEW_ALGORITHM_MERGE);
+    DBUG_ASSERT(table_ref->is_merged_derived());
     field_it= &view_field_it;
     DBUG_PRINT("info", ("field_it for '%s' is Field_iterator_view",
                         table_ref->alias));
@@ -5187,6 +5240,9 @@ void st_table::mark_virtual_columns_for_write(bool insert_fl)
   if (!vfield)
     return;
 
+  if (!vfield)
+    return;
+
   for (vfield_ptr= vfield; *vfield_ptr; vfield_ptr++)
   {
     tmp_vfield= *vfield_ptr;
@@ -5220,6 +5276,175 @@ void st_table::mark_virtual_columns_for_write(bool insert_fl)
     file->column_bitmaps_signal();
 }
 
+
+/**
+  @brief
+  Allocate space for keys
+
+  @param key_count  number of keys to allocate additionally
+
+  @details
+  The function allocates memory  to fit additionally 'key_count' keys 
+  for this table.
+
+  @return FALSE   space was successfully allocated
+  @return TRUE    an error occur
+*/
+
+bool TABLE::alloc_keys(uint key_count)
+{
+  key_info= (KEY*) alloc_root(&mem_root, sizeof(KEY)*(s->keys+key_count));
+  if (s->keys)
+    memmove(key_info, s->key_info, sizeof(KEY)*s->keys);
+  s->key_info= key_info;
+  max_keys= s->keys+key_count;
+  return !(key_info);
+}
+
+
+void TABLE::create_key_part_by_field(KEY *keyinfo,
+                                     KEY_PART_INFO *key_part_info,
+                                     Field *field, uint fieldnr)
+{   
+  field->flags|= PART_KEY_FLAG;
+  key_part_info->null_bit= field->null_bit;
+  key_part_info->null_offset= (uint) (field->null_ptr -
+                                      (uchar*) record[0]);
+  key_part_info->field= field;
+  key_part_info->fieldnr= fieldnr;
+  key_part_info->offset= field->offset(record[0]);
+  key_part_info->length=   (uint16) field->pack_length();
+  keyinfo->key_length+= key_part_info->length;
+  key_part_info->key_part_flag= 0;
+  /* TODO:
+    The below method of computing the key format length of the
+    key part is a copy/paste from opt_range.cc, and table.cc.
+    This should be factored out, e.g. as a method of Field.
+    In addition it is not clear if any of the Field::*_length
+    methods is supposed to compute the same length. If so, it
+    might be reused.
+  */
+  key_part_info->store_length= key_part_info->length;
+
+  if (field->real_maybe_null())
+  {
+    key_part_info->store_length+= HA_KEY_NULL_LENGTH;
+    keyinfo->key_length+= HA_KEY_NULL_LENGTH;
+  }
+  if (field->type() == MYSQL_TYPE_BLOB || 
+      field->real_type() == MYSQL_TYPE_VARCHAR)
+  {
+    key_part_info->store_length+= HA_KEY_BLOB_LENGTH;
+    keyinfo->key_length+= HA_KEY_BLOB_LENGTH; // ???
+    key_part_info->key_part_flag|=
+      field->type() == MYSQL_TYPE_BLOB ? HA_BLOB_PART: HA_VAR_LENGTH_PART;
+  }
+
+  key_part_info->type=     (uint8) field->key_type();
+  key_part_info->key_type =
+    ((ha_base_keytype) key_part_info->type == HA_KEYTYPE_TEXT ||
+    (ha_base_keytype) key_part_info->type == HA_KEYTYPE_VARTEXT1 ||
+    (ha_base_keytype) key_part_info->type == HA_KEYTYPE_VARTEXT2) ?
+    0 : FIELDFLAG_BINARY;
+}
+
+
+/**
+  @brief
+  Add one key to a temporary table
+
+  @param key            the number of the key
+  @param key_parts      number of components of the key
+  @param next_field_no  the call-back function that returns the number of
+                        the field used as the next component of the key
+  @param arg            the argument for the above function
+  @param unique         TRUE <=> it is a unique index
+
+  @details
+  The function adds a new key to the table that is assumed to be a temporary
+  table. At each its invocation the call-back function must return
+  the number of the field that is used as the next component of this key.
+
+  @return FALSE is a success
+  @return TRUE if a failure
+
+*/
+
+bool TABLE::add_tmp_key(uint key, uint key_parts,
+                        uint (*next_field_no) (uchar *), uchar *arg,
+                        bool unique)
+{
+  DBUG_ASSERT(key < max_keys);
+
+  char buf[NAME_CHAR_LEN];
+  KEY* keyinfo;
+  Field **reg_field;
+  uint i;
+  bool key_start= TRUE;
+  KEY_PART_INFO* key_part_info=
+      (KEY_PART_INFO*) alloc_root(&mem_root, sizeof(KEY_PART_INFO)*key_parts);
+  if (!key_part_info)
+    return TRUE;
+  keyinfo= key_info + key;
+  keyinfo->key_part= key_part_info;
+  keyinfo->usable_key_parts= keyinfo->key_parts = key_parts;
+  keyinfo->key_length=0;
+  keyinfo->algorithm= HA_KEY_ALG_UNDEF;
+  keyinfo->flags= HA_GENERATED_KEY;
+  if (unique)
+    keyinfo->flags|= HA_NOSAME;
+  sprintf(buf, "key%i", key);
+  if (!(keyinfo->name= strdup_root(&mem_root, buf)))
+    return TRUE;
+  keyinfo->rec_per_key= (ulong*) alloc_root(&mem_root,
+                                            sizeof(ulong)*key_parts);
+  if (!keyinfo->rec_per_key)
+    return TRUE;
+  bzero(keyinfo->rec_per_key, sizeof(ulong)*key_parts);
+
+  for (i= 0; i < key_parts; i++)
+  {
+    uint fld_idx= next_field_no(arg); 
+    reg_field= field + fld_idx;
+    if (key_start)
+      (*reg_field)->key_start.set_bit(key);
+    (*reg_field)->part_of_key.set_bit(key);
+    create_key_part_by_field(keyinfo, key_part_info, *reg_field, fld_idx+1);
+    key_start= FALSE;
+    key_part_info++;
+  }
+
+  set_if_bigger(s->max_key_length, keyinfo->key_length);
+  s->keys++;
+  return FALSE;
+}
+
+/*
+  @brief
+  Drop all indexes except specified one.
+
+  @param key_to_save the key to save
+
+  @details
+  Drop all indexes on this table except 'key_to_save'. The saved key becomes
+  key #0. Memory occupied by key parts of dropped keys are freed.
+  If the 'key_to_save' is negative then all keys are freed.
+*/
+
+void TABLE::use_index(int key_to_save)
+{
+  uint i= 1;
+  DBUG_ASSERT(!created && key_to_save < (int)s->keys);
+  if (key_to_save >= 0)
+    /* Save the given key. */
+    memmove(key_info, key_info + key_to_save, sizeof(KEY));
+  else
+    /* Drop all keys; */
+    i= 0;
+
+  s->keys= i;
+}
+
 /**
   @brief Check if this is part of a MERGE table with attached children.
 
@@ -5238,6 +5463,21 @@ bool st_table::is_children_attached(void)
          (parent && parent->children_attached));
 }
 
+
+/*
+  Return TRUE if the table is filled at execution phase 
+  
+  (and so, the optimizer must not do anything that depends on the contents of
+   the table, like range analysis or constant table detection)
+*/
+
+bool st_table::is_filled_at_execution()
+{ 
+  return test(pos_in_table_list->jtbm_subselect || 
+              pos_in_table_list->is_active_sjm());
+}
+
+
 /*
   Cleanup this table for re-execution.
 
@@ -5268,6 +5508,7 @@ void TABLE_LIST::reinit_before_use(THD *thd)
          parent_embedding->nested_join->join_list.head() == embedded);
 }
 
+
 /*
   Return subselect that contains the FROM list this table is taken from
 
@@ -5539,6 +5780,300 @@ int update_virtual_fields(THD *thd, TABLE *table, bool for_write)
   DBUG_RETURN(0);
 }
 
+/*
+  @brief Reset const_table flag
+
+  @detail
+  Reset const_table flag for this table. If this table is a merged derived
+  table/view the flag is recursively reseted for all tables of the underlying
+  select.
+*/
+
+void TABLE_LIST::reset_const_table()
+{
+  table->const_table= 0;
+  if (is_merged_derived())
+  {
+    SELECT_LEX *select_lex= get_unit()->first_select();
+    TABLE_LIST *tl;
+    List_iterator<TABLE_LIST> ti(select_lex->leaf_tables);
+    while ((tl= ti++))
+      tl->reset_const_table();
+  }
+}
+
+
+/*
+  @brief Run derived tables/view handling phases on underlying select_lex.
+
+  @param lex    LEX for this thread
+  @param phases derived tables/views handling phases to run
+                (set of DT_XXX constants)
+  @details
+  This function runs this derived table through specified 'phases'.
+  Underlying tables of this select are handled prior to this derived.
+  'lex' is passed as an argument to called functions.
+
+  @return TRUE on error
+  @return FALSE ok
+*/
+
+bool TABLE_LIST::handle_derived(struct st_lex *lex, uint phases)
+{
+  SELECT_LEX_UNIT *unit= get_unit();
+  if (unit)
+  {
+    for (SELECT_LEX *sl= unit->first_select(); sl; sl= sl->next_select())
+      if (sl->handle_derived(lex, phases))
+        return TRUE;
+    return mysql_handle_single_derived(lex, this, phases);
+  }
+  return FALSE;
+}
+
+
+/**
+  @brief
+  Return unit of this derived table/view
+
+  @return reference to a unit  if it's a derived table/view.
+  @return 0                    when it's not a derived table/view.
+*/
+
+st_select_lex_unit *TABLE_LIST::get_unit()
+{
+  return (view ? &view->unit : derived);
+}
+
+
+/**
+  @brief
+  Return select_lex of this derived table/view
+
+  @return select_lex of this derived table/view.
+  @return 0          when it's not a derived table.
+*/
+
+st_select_lex *TABLE_LIST::get_single_select()
+{
+  SELECT_LEX_UNIT *unit= get_unit();
+  return (unit ? unit->first_select() : 0);
+}
+
+
+/**
+  @brief
+  Attach a join table list as a nested join to this TABLE_LIST.
+
+  @param join_list join table list to attach
+
+  @details
+  This function wraps 'join_list' into a nested_join of this table, thus
+  turning it to a nested join leaf.
+*/
+
+void TABLE_LIST::wrap_into_nested_join(List<TABLE_LIST> &join_list)
+{
+  TABLE_LIST *tl;
+  /*
+    Walk through derived table top list and set 'embedding' to point to
+    the nesting table.
+  */
+  nested_join->join_list.empty();
+  List_iterator_fast<TABLE_LIST> li(join_list);
+  nested_join->join_list= join_list;
+  while ((tl= li++))
+  {
+    tl->embedding= this;
+    tl->join_list= &nested_join->join_list;
+  }
+}
+
+
+/**
+  @brief
+  Initialize this derived table/view
+
+  @param thd  Thread handle
+
+  @details
+  This function makes initial preparations of this derived table/view for
+  further processing:
+    if it's a derived table this function marks it either as mergeable or
+      materializable
+    creates temporary table for name resolution purposes
+    creates field translation for mergeable derived table/view
+
+  @return TRUE  an error occur
+  @return FALSE ok
+*/
+
+bool TABLE_LIST::init_derived(THD *thd, bool init_view)
+{
+  SELECT_LEX *first_select= get_single_select();
+  SELECT_LEX_UNIT *unit= get_unit();
+
+  if (!unit)
+    return FALSE;
+  /*
+    Check whether we can merge this derived table into main select.
+    Depending on the result field translation will or will not
+    be created.
+  */
+  TABLE_LIST *first_table= (TABLE_LIST *) first_select->table_list.first;
+  if (first_select->table_list.elements > 1 ||
+      (first_table && first_table->is_multitable()))
+    set_multitable();
+
+  unit->derived= this;
+  if (init_view && !view)
+  {
+    /* This is all what we can do for a derived table for now. */
+    set_derived();
+  }
+
+  if (!is_view())
+  {
+    /* A subquery might be forced to be materialized due to a side-effect. */
+    if (!is_materialized_derived() && first_select->is_mergeable() &&
+        optimizer_flag(thd, OPTIMIZER_SWITCH_DERIVED_MERGE) &&
+        !(thd->lex->sql_command == SQLCOM_UPDATE_MULTI ||
+          thd->lex->sql_command == SQLCOM_DELETE_MULTI))
+      set_merged_derived();
+    else
+      set_materialized_derived();
+  }
+  /*
+    Derived tables/view are materialized prior to UPDATE, thus we can skip
+    them from table uniqueness check
+  */
+  if (is_materialized_derived())
+  {
+    set_check_materialized();
+  }
+
+  /*
+    Create field translation for mergeable derived tables/views.
+    For derived tables field translation can be created only after
+    unit is prepared so all '*' are get unrolled.
+  */
+  if (is_merged_derived())
+  {
+    if (is_view() || unit->prepared)
+      create_field_translation(thd);
+  }
+
+  return FALSE;
+}
+
+
+/**
+  @brief
+  Retrieve number of rows in the table
+
+  @details
+  Retrieve number of rows in the table referred by this TABLE_LIST and
+  store it in the table's stats.records variable. If this TABLE_LIST refers
+  to a materialized derived table/view then the estimated number of rows of
+  the derived table/view is used instead.
+
+  @return 0          ok
+  @return non zero   error
+*/
+
+int TABLE_LIST::fetch_number_of_rows()
+{
+  int error= 0;
+  if (jtbm_subselect)
+    return 0;
+  if (is_materialized_derived() && !fill_me)
+
+  {
+    table->file->stats.records= ((select_union*)derived->result)->records;
+    set_if_bigger(table->file->stats.records, 2);
+  }
+  else
+    error= table->file->info(HA_STATUS_VARIABLE | HA_STATUS_NO_LOCK);
+  return error;
+}
+
+/*
+  Procedure of keys generation for result tables of materialized derived
+  tables/views.
+
+  A key is generated for each equi-join pair derived table-another table.
+  Each generated key consists of fields of derived table used in equi-join.
+  Example:
+
+    SELECT * FROM (SELECT * FROM t1 GROUP BY 1) tt JOIN
+                  t1 ON tt.f1=t1.f3 and tt.f2.=t1.f4;
+  In this case for the derived table tt one key will be generated. It will
+  consist of two parts f1 and f2.
+  Example:
+
+    SELECT * FROM (SELECT * FROM t1 GROUP BY 1) tt JOIN
+                  t1 ON tt.f1=t1.f3 JOIN
+                  t2 ON tt.f2=t2.f4;
+  In this case for the derived table tt two keys will be generated.
+  One key over f1 field, and another key over f2 field.
+  Currently optimizer may choose to use only one such key, thus the second
+  one will be dropped after range optimizer is finished.
+  See also JOIN::drop_unused_derived_keys function.
+  Example:
+
+    SELECT * FROM (SELECT * FROM t1 GROUP BY 1) tt JOIN
+                  t1 ON tt.f1=a_function(t1.f3);
+  In this case for the derived table tt one key will be generated. It will
+  consist of one field - f1.
+*/
+
+
+
+/*
+  @brief
+  Change references to underlying items of a merged derived table/view
+  for fields in derived table's result table.
+
+  @return FALSE ok
+  @return TRUE  Out of memory
+*/
+bool TABLE_LIST::change_refs_to_fields()
+{
+  List_iterator<Item> li(used_items);
+  Item_direct_ref *ref;
+  Field_iterator_view field_it;
+  THD *thd= table->in_use;
+  DBUG_ASSERT(is_merged_derived());
+
+  if (!used_items.elements)
+    return FALSE;
+
+  materialized_items= (Item**)thd->calloc(sizeof(void*) * table->s->fields);
+
+  while ((ref= (Item_direct_ref*)li++))
+  {
+    uint idx;
+    Item *orig_item= *ref->ref;
+    field_it.set(this);
+    for (idx= 0; !field_it.end_of_fields(); field_it.next(), idx++)
+    {
+      if (field_it.item() == orig_item)
+        break;
+    }
+    DBUG_ASSERT(!field_it.end_of_fields());
+    if (!materialized_items[idx])
+    {
+      materialized_items[idx]= new Item_field(table->field[idx]);
+      if (!materialized_items[idx])
+        return TRUE;
+    }
+    ref->ref= materialized_items + idx;
+  }
+
+  return FALSE;
+}
+
+
 /*****************************************************************************
 ** Instansiate templates
 *****************************************************************************/
diff --git a/sql/table.h b/sql/table.h
index fed30722d71..920d2fa7818 100644
--- a/sql/table.h
+++ b/sql/table.h
@@ -66,7 +66,8 @@ typedef struct st_order {
   bool   counter_used;                  /* parameter was counter of columns */
   Field  *field;			/* If tmp-table group */
   char	 *buff;				/* If tmp-table group */
-  table_map used, depend_map;
+  table_map used; /* NOTE: the below is only set to 0 but is still used by eq_ref_table */
+  table_map depend_map;
 } ORDER;
 
 /**
@@ -165,7 +166,7 @@ typedef struct st_filesort_info
   uchar     *addon_buf;         /* Pointer to a buffer if sorted with fields */
   size_t    addon_length;       /* Length of the buffer */
   struct st_sort_addon_field *addon_field;     /* Pointer to the fields info */
-  void    (*unpack)(struct st_sort_addon_field *, uchar *); /* To unpack back */
+  void    (*unpack)(struct st_sort_addon_field *, uchar *, uchar *); /* To unpack back */
   uchar     *record_pointers;    /* If sorted in memory */
   ha_rows   found_records;      /* How many records in sort */
 } FILESORT_INFO;
@@ -440,7 +441,6 @@ typedef struct st_table_share
   bool null_field_first;
   bool system;                          /* Set if system table (one record) */
   bool crypted;                         /* If .frm file is crypted */
-  bool db_low_byte_first;		/* Portable row format */
   bool crashed;
   bool is_view;
   bool name_lock, replace_with_name_lock;
@@ -665,6 +665,9 @@ enum index_hint_type
   INDEX_HINT_FORCE
 };
 
+#define      CHECK_ROW_FOR_NULLS_TO_REJECT   (1 << 0)
+#define      REJECT_ROW_DUE_TO_NULL_FIELDS   (1 << 1)
+
 struct st_table {
   st_table() {}                               /* Remove gcc warning */
 
@@ -693,7 +696,7 @@ struct st_table {
     needed by the query without reading the row.
   */
   key_map covering_keys;
-  key_map quick_keys, merge_keys;
+  key_map quick_keys, merge_keys,intersect_keys;
   /*
     A set of keys that can be used in the query that references this
     table.
@@ -725,6 +728,7 @@ struct st_table {
   uchar		*null_flags;
   my_bitmap_map	*bitmap_init_value;
   MY_BITMAP     def_read_set, def_write_set, def_vcol_set, tmp_set; 
+  MY_BITMAP     eq_join_set;         /* used to mark equi-joined fields */
   MY_BITMAP     *read_set, *write_set, *vcol_set; /* Active column sets */
   /*
    The ID of the query that opened and is using this table. Has different
@@ -806,6 +810,26 @@ struct st_table {
     NULL, including columns declared as "not null" (see maybe_null).
   */
   bool null_row;
+  /*
+    No rows that contain null values can be placed into this table.
+    Currently this flag can be set to true only for a temporary table
+    that used to store the result of materialization of a subquery.
+  */
+  bool no_rows_with_nulls;
+  /*
+    This field can contain two bit flags: 
+      CHECK_ROW_FOR_NULLS_TO_REJECT
+      REJECT_ROW_DUE_TO_NULL_FIELDS
+    The first flag is set for the dynamic contexts where it is prohibited
+    to write any null into the table.
+    The second flag is set only if the first flag is set on.
+    The informs the outer scope that there was an attept to write null
+    into a field of the table in the context where it is prohibited.
+    This flag should be set off as soon as the first flag is set on.
+    Currently these flags are used only the tables tno_rows_with_nulls set
+    to true. 
+  */       
+  uint8 null_catch_flags;
 
   /*
     TODO: Each of the following flags take up 8 bits. They can just as easily
@@ -825,7 +849,7 @@ struct st_table {
     See TABLE_LIST::process_index_hints().
   */
   bool force_index_group;
-  bool distinct,const_table,no_rows;
+  bool distinct,const_table,no_rows, used_for_duplicate_elimination;
 
   /**
      If set, the optimizer has found that row retrieval should access index 
@@ -865,8 +889,10 @@ struct st_table {
   */
   bool auto_increment_field_not_null;
   bool insert_or_update;             /* Can be used by the handler */
-  bool alias_name_used;		/* true if table_name is alias */
+  bool alias_name_used;              /* true if table_name is alias */
   bool get_fields_in_item_tree;      /* Signal to fix_field */
+  bool created;    /* For tmp tables. TRUE <=> tmp table was actually created.*/
+
   /* If MERGE children attached to parent. See top comment in ha_myisammrg.cc */
   bool children_attached;
 
@@ -887,6 +913,7 @@ struct st_table {
   bool no_partitions_used; /* If true, all partitions have been pruned away */
 #endif
 
+  uint max_keys; /* Size of allocated key_info array. */
   bool fill_item_list(List<Item> *item_list) const;
   void reset_item_list(List<Item> *item_list) const;
   void clear_column_bitmaps(void);
@@ -950,6 +977,18 @@ struct st_table {
   */
   inline bool needs_reopen_or_name_lock()
   { return s->version != refresh_version; }
+  bool alloc_keys(uint key_count);
+  bool add_tmp_key(uint key, uint key_parts,
+                   uint (*next_field_no) (uchar *), uchar *arg,
+                   bool unique);
+  void create_key_part_by_field(KEY *keyinfo, KEY_PART_INFO *key_part_info,
+                                Field *field, uint fieldnr);
+  void use_index(int key_to_save);
+  void set_table_map(table_map map_arg, uint tablenr_arg)
+  {
+    map= map_arg;
+    tablenr= tablenr_arg;
+  }
   bool is_children_attached(void);
   inline void enable_keyread()
   {
@@ -959,6 +998,12 @@ struct st_table {
     file->extra(HA_EXTRA_KEYREAD);
     DBUG_VOID_RETURN;
   }
+  /*
+    Returns TRUE if the table is filled at execution phase (and so, the
+    optimizer must not do anything that depends on the contents of the table,
+    like range analysis or constant table detection)
+  */
+  bool is_filled_at_execution();
   inline void disable_keyread()
   {
     DBUG_ENTER("disable_keyread");
@@ -1101,13 +1146,52 @@ typedef struct st_schema_table
 } ST_SCHEMA_TABLE;
 
 
+/*
+  Types of derived tables. The ending part is a bitmap of phases that are
+  applicable to a derived table of the type.
+ * /
+#define VIEW_ALGORITHM_UNDEFINED        0
+#define VIEW_ALGORITHM_MERGE            1 + DT_COMMON + DT_MERGE
+#define DERIVED_ALGORITHM_MERGE         2 + DT_COMMON + DT_MERGE
+#define VIEW_ALGORITHM_TMPTABLE         3 + DT_COMMON + DT_MATERIALIZE
+#define DERIVED_ALGORITHM_MATERIALIZE   4 + DT_COMMON + DT_MATERIALIZE
+*/
+#define DTYPE_ALGORITHM_UNDEFINED    0
+#define DTYPE_VIEW                   1
+#define DTYPE_TABLE                  2
+#define DTYPE_MERGE                  4
+#define DTYPE_MATERIALIZE            8
+#define DTYPE_MULTITABLE             16
+#define DTYPE_MASK                   19
+
+/*
+  Phases of derived tables/views handling, see sql_derived.cc
+  Values are used as parts of a bitmap attached to derived table types.
+*/
+#define DT_INIT             1
+#define DT_PREPARE          2
+#define DT_OPTIMIZE         4
+#define DT_MERGE            8
+#define DT_MERGE_FOR_INSERT 16
+#define DT_CREATE           32
+#define DT_FILL             64
+#define DT_REINIT           128
+#define DT_PHASES           8
+/* Phases that are applicable to all derived tables. */
+#define DT_COMMON       (DT_INIT + DT_PREPARE + DT_REINIT + DT_OPTIMIZE)
+/* Phases that are applicable only to materialized derived tables. */
+#define DT_MATERIALIZE  (DT_CREATE + DT_FILL)
+
+#define DT_PHASES_MERGE (DT_COMMON | DT_MERGE | DT_MERGE_FOR_INSERT)
+#define DT_PHASES_MATERIALIZE (DT_COMMON | DT_MATERIALIZE)
+
+#define VIEW_ALGORITHM_UNDEFINED 0
+#define VIEW_ALGORITHM_MERGE    (DTYPE_VIEW | DTYPE_MERGE)
+#define VIEW_ALGORITHM_TMPTABLE (DTYPE_VIEW + DTYPE_MATERIALIZE )
+
 #define JOIN_TYPE_LEFT	1
 #define JOIN_TYPE_RIGHT	2
 
-#define VIEW_ALGORITHM_UNDEFINED        0
-#define VIEW_ALGORITHM_TMPTABLE         1
-#define VIEW_ALGORITHM_MERGE            2
-
 #define VIEW_SUID_INVOKER               0
 #define VIEW_SUID_DEFINER               1
 #define VIEW_SUID_DEFAULT               2
@@ -1171,6 +1255,11 @@ public:
 };
 
 
+class SJ_MATERIALIZATION_INFO;
+class Index_hint;
+class Item_in_subselect;
+
+
 /*
   Table reference in the FROM clause.
 
@@ -1182,7 +1271,7 @@ public:
   1) table (TABLE_LIST::view == NULL)
      - base table
        (TABLE_LIST::derived == NULL)
-     - subquery - TABLE_LIST::table is a temp table
+     - FROM-clause subquery - TABLE_LIST::table is a temp table
        (TABLE_LIST::derived != NULL)
      - information schema table
        (TABLE_LIST::schema_table != NULL)
@@ -1192,6 +1281,7 @@ public:
            also (TABLE_LIST::field_translation != NULL)
      - tmptable (TABLE_LIST::effective_algorithm == VIEW_ALGORITHM_TMPTABLE)
            also (TABLE_LIST::field_translation == NULL)
+  2.5) TODO: Add derived tables description here
   3) nested table reference (TABLE_LIST::nested_join != NULL)
      - table sequence - e.g. (t1, t2, t3)
        TODO: how to distinguish from a JOIN?
@@ -1201,9 +1291,12 @@ public:
        (TABLE_LIST::natural_join != NULL)
        - JOIN ... USING
          (TABLE_LIST::join_using_fields != NULL)
+     - semi-join nest (sj_on_expr!= NULL && sj_subq_pred!=NULL)
+  4) jtbm semi-join (jtbm_subselect != NULL)
 */
 
 class Index_hint;
+struct st_lex;
 struct TABLE_LIST
 {
   TABLE_LIST() {}                          /* Remove gcc warning */
@@ -1233,6 +1326,28 @@ struct TABLE_LIST
   char		*db, *alias, *table_name, *schema_table_name;
   char          *option;                /* Used by cache index  */
   Item		*on_expr;		/* Used with outer join */
+
+  Item          *sj_on_expr;
+  /*
+    (Valid only for semi-join nests) Bitmap of tables that are within the
+    semi-join (this is different from bitmap of all nest's children because
+    tables that were pulled out of the semi-join nest remain listed as
+    nest's children).
+  */
+  table_map     sj_inner_tables;
+  /* Number of IN-compared expressions */
+  uint          sj_in_exprs;
+  
+  /* If this is a non-jtbm semi-join nest: corresponding subselect predicate */
+  Item_in_subselect  *sj_subq_pred;
+
+  /* If this is a jtbm semi-join object: corresponding subselect predicate */
+  Item_in_subselect  *jtbm_subselect;
+  /* TODO: check if this can be joined with tablenr_exec */
+  uint jtbm_table_no;
+
+  SJ_MATERIALIZATION_INFO *sj_mat_info;
+
   /*
     The structure of ON expression presented in the member above
     can be changed during certain optimizations. This member
@@ -1283,6 +1398,28 @@ struct TABLE_LIST
     filling procedure
   */
   select_union  *derived_result;
+  /* Stub used for materialized derived tables. */
+  table_map	map;                    /* ID bit of table (1,2,4,8,16...) */
+  table_map get_map()
+  {
+    return jtbm_subselect? table_map(1) << jtbm_table_no : table->map;
+  }
+  uint get_tablenr()
+  {
+    return jtbm_subselect? jtbm_table_no : table->tablenr;
+  }
+  void set_tablenr(uint new_tablenr)
+  {
+    if (jtbm_subselect)
+    {
+      jtbm_table_no= new_tablenr;
+    }
+    if (table)
+    {
+      table->tablenr= new_tablenr;
+      table->map= table_map(1) << new_tablenr;
+    }
+  }
   /*
     Reference from aux_tables to local list entry of main select of
     multi-delete statement:
@@ -1327,6 +1464,7 @@ struct TABLE_LIST
   Field_translator *field_translation;	/* array of VIEW fields */
   /* pointer to element after last one in translation table above */
   Field_translator *field_translation_end;
+  bool field_translation_updated;
   /*
     List (based on next_local) of underlying tables of this view. I.e. it
     does not include the tables of subqueries used in the view. Is set only
@@ -1341,11 +1479,20 @@ struct TABLE_LIST
   List<TABLE_LIST> *view_tables;
   /* most upper view this table belongs to */
   TABLE_LIST	*belong_to_view;
+  /* A derived table this table belongs to */
+  TABLE_LIST    *belong_to_derived;
   /*
     The view directly referencing this table
     (non-zero only for merged underlying tables of a view).
   */
   TABLE_LIST	*referencing_view;
+
+  table_map view_used_tables;
+  table_map     map_exec;
+  /* TODO: check if this can be joined with jtbm_table_no */
+  uint          tablenr_exec;
+  uint          maybe_null_exec;
+
   /* Ptr to parent MERGE table list item. See top comment in ha_myisammrg.cc */
   TABLE_LIST    *parent_l;
   /*
@@ -1358,13 +1505,7 @@ struct TABLE_LIST
     SQL SECURITY DEFINER)
   */
   Security_context *view_sctx;
-  /*
-    List of all base tables local to a subquery including all view
-    tables. Unlike 'next_local', this in this list views are *not*
-    leaves. Created in setup_tables() -> make_leaves_list().
-  */
   bool allowed_show;
-  TABLE_LIST	*next_leaf;
   Item          *where;                 /* VIEW WHERE clause condition */
   Item          *check_option;          /* WITH CHECK OPTION condition */
   LEX_STRING	select_stmt;		/* text of (CREATE/SELECT) statement */
@@ -1400,7 +1541,7 @@ struct TABLE_LIST
       - VIEW_ALGORITHM_MERGE
       @to do Replace with an enum 
   */
-  uint8         effective_algorithm;
+  uint8         derived_type;
   GRANT_INFO	grant;
   /* data need by some engines in query cache*/
   ulonglong     engine_data;
@@ -1427,7 +1568,6 @@ struct TABLE_LIST
   bool		skip_temporary;		/* this table shouldn't be temporary */
   /* TRUE if this merged view contain auto_increment field */
   bool          contain_auto_increment;
-  bool          multitable_view;        /* TRUE iff this is multitable view */
   bool          compact_view_format;    /* Use compact format for SHOW CREATE VIEW */
   /* view where processed */
   bool          where_processed;
@@ -1451,6 +1591,17 @@ struct TABLE_LIST
   bool          internal_tmp_table;
   bool          deleting;               /* going to delete this table */
 
+  /* TRUE <=> derived table should be filled right after optimization. */
+  bool          fill_me;
+  /* TRUE <=> view/DT is merged. */
+  bool          merged;
+  bool          merged_for_insert;
+  /* TRUE <=> don't prepare this derived table/view as it should be merged.*/
+  bool          skip_prepare_derived;
+
+  List<Item>    used_items;
+  Item          **materialized_items;
+
   /* View creation context. */
 
   View_creation_ctx *view_creation_ctx;
@@ -1488,9 +1639,10 @@ struct TABLE_LIST
   bool has_table_lookup_value;
   uint table_open_method;
   enum enum_schema_table_state schema_table_state;
+
   void calc_md5(char *buffer);
-  void set_underlying_merge();
   int view_check_option(THD *thd, bool ignore_failure);
+  bool create_field_translation(THD *thd);
   bool setup_underlying(THD *thd);
   void cleanup_items();
   bool placeholder()
@@ -1520,7 +1672,7 @@ struct TABLE_LIST
   inline bool prepare_where(THD *thd, Item **conds,
                             bool no_where_clause)
   {
-    if (effective_algorithm == VIEW_ALGORITHM_MERGE)
+    if (!view || is_merged_derived())
       return prep_where(thd, conds, no_where_clause);
     return FALSE;
   }
@@ -1586,6 +1738,62 @@ struct TABLE_LIST
     m_table_ref_version= s->get_table_ref_version();
   }
 
+  /* Set of functions returning/setting state of a derived table/view. */
+  inline bool is_non_derived()
+  {
+    return (!derived_type);
+  }
+  inline bool is_view_or_derived()
+  {
+    return (derived_type);
+  }
+  inline bool is_view()
+  {
+    return (derived_type & DTYPE_VIEW);
+  }
+  inline bool is_derived()
+  {
+    return (derived_type & DTYPE_TABLE);
+  }
+  inline void set_view()
+  {
+    derived_type= DTYPE_VIEW;
+  }
+  inline void set_derived()
+  {
+    derived_type= DTYPE_TABLE;
+  }
+  inline bool is_merged_derived()
+  {
+    return (derived_type & DTYPE_MERGE);
+  }
+  inline void set_merged_derived()
+  {
+    derived_type= ((derived_type & DTYPE_MASK) |
+                   DTYPE_TABLE | DTYPE_MERGE);
+    set_check_merged();
+  }
+  inline bool is_materialized_derived()
+  {
+    return (derived_type & DTYPE_MATERIALIZE);
+  }
+  void set_materialized_derived()
+  {
+    derived_type= ((derived_type & DTYPE_MASK) |
+                   DTYPE_TABLE | DTYPE_MATERIALIZE);
+    set_check_materialized();
+  }
+  inline bool is_multitable()
+  {
+    return (derived_type & DTYPE_MULTITABLE);
+  }
+  inline void set_multitable()
+  {
+    derived_type|= DTYPE_MULTITABLE;
+  }
+  void reset_const_table();
+  bool handle_derived(struct st_lex *lex, uint phases);
+
   /**
      @brief True if this TABLE_LIST represents an anonymous derived table,
      i.e.  the result of a subquery.
@@ -1605,13 +1813,26 @@ struct TABLE_LIST
      respectively.
    */
   char *get_table_name() { return view != NULL ? view_name.str : table_name; }
-
+  bool is_active_sjm();
+  bool is_jtbm() { return test(jtbm_subselect!=NULL); }
+  st_select_lex_unit *get_unit();
+  st_select_lex *get_single_select();
+  void wrap_into_nested_join(List<TABLE_LIST> &join_list);
+  bool init_derived(THD *thd, bool init_view);
+  int fetch_number_of_rows();
+  bool change_refs_to_fields();
 
   bool single_table_updatable();
 
 private:
   bool prep_check_option(THD *thd, uint8 check_opt_type);
   bool prep_where(THD *thd, Item **conds, bool no_where_clause);
+  void set_check_materialized();
+#ifndef DBUG_OFF
+  void set_check_merged();
+#else
+  inline void set_check_merged() {}
+#endif
   /*
     Cleanup for re-execution in a prepared statement or a stored
     procedure.
@@ -1776,6 +1997,14 @@ typedef struct st_nested_join
   */
   uint              n_tables;
   nested_join_map   nj_map;          /* Bit used to identify this nested join*/
+  /*
+    (Valid only for semi-join nests) Bitmap of tables outside the semi-join
+    that are used within the semi-join's ON condition.
+  */
+  table_map         sj_depends_on;
+  /* Outer non-trivially correlated tables */
+  table_map         sj_corr_tables;
+  List<Item>        sj_outer_expr_list;
   /**
      True if this join nest node is completely covered by the query execution
      plan. This means two things.
diff --git a/sql/time.cc b/sql/time.cc
index ebbb5d4c583..778f8b8f313 100644
--- a/sql/time.cc
+++ b/sql/time.cc
@@ -1,5 +1,6 @@
 /*
    Copyright (c) 2000-2007 MySQL AB, 2009 Sun Microsystems, Inc.
+   Copyright (c) 2009-2011 Monty Program Ab
    Use is subject to license terms.
 
    This program is free software; you can redistribute it and/or modify
@@ -226,56 +227,132 @@ ulong convert_month_to_period(ulong month)
 
 timestamp_type
 str_to_datetime_with_warn(const char *str, uint length, MYSQL_TIME *l_time,
-                          uint flags)
+                          ulong flags)
 {
   int was_cut;
   THD *thd= current_thd;
   timestamp_type ts_type;
   
   ts_type= str_to_datetime(str, length, l_time,
-                           (flags | (thd->variables.sql_mode &
-                                     (MODE_INVALID_DATES |
-                                      MODE_NO_ZERO_DATE))),
+                           (flags | (sql_mode_for_dates(thd))),
                            &was_cut);
   if (was_cut || ts_type <= MYSQL_TIMESTAMP_ERROR)
-    make_truncated_value_warning(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN,
-                                 str, length, ts_type,  NullS);
+    make_truncated_value_warning(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
+                                 str, length, flags & TIME_TIME_ONLY ?
+                                 MYSQL_TIMESTAMP_TIME : ts_type, NullS);
   return ts_type;
 }
 
 
+/**
+  converts a pair of numbers (integer part, microseconds) to MYSQL_TIME
+
+  @param neg           sign of the time value
+  @param nr            integer part of the number to convert
+  @param sec_part      microsecond part of the number
+  @param ltime         converted value will be written here
+  @param fuzzydate     conversion flags (TIME_FUZZY_DATE, etc)
+  @param str           original number, as a Lazy_string. For the warning
+  @param field_name    field name or NULL if not a field. For the warning
+  
+  @returns 0 for success, 1 for a failure
+*/
+static bool number_to_time_with_warn(bool neg, ulonglong nr, ulong sec_part,
+                                     MYSQL_TIME *ltime, ulong fuzzydate,
+                                     const Lazy_string *str,
+                                     const char *field_name)
+{
+  int was_cut;
+  longlong res;
+  enum_field_types f_type;
+
+  if (fuzzydate & TIME_TIME_ONLY)
+  {
+    f_type= MYSQL_TYPE_TIME;
+    res= number_to_time(neg, nr, sec_part, ltime, &was_cut);
+  }
+  else
+  {
+    f_type= MYSQL_TYPE_DATETIME;
+    res= neg ? -1 : number_to_datetime(nr, sec_part, ltime, fuzzydate, &was_cut);
+  }
+
+  if (res < 0 || (was_cut && !(fuzzydate & TIME_FUZZY_DATE)))
+  {
+    make_truncated_value_warning(current_thd,
+                                 MYSQL_ERROR::WARN_LEVEL_WARN, str,
+                                 res < 0 ? MYSQL_TIMESTAMP_ERROR
+                                         : mysql_type_to_time_type(f_type),
+                                 field_name);
+  }
+  return res < 0;
+}
+
+
+bool double_to_datetime_with_warn(double value, MYSQL_TIME *ltime,
+                                  ulong fuzzydate, const char *field_name)
+{
+  const Lazy_string_double str(value);
+  bool neg= value < 0;
+
+  if (neg)
+    value= -value;
+
+  if (value > LONGLONG_MAX)
+    value= static_cast<double>(LONGLONG_MAX);
+
+  longlong nr= static_cast<ulonglong>(floor(value));
+  uint sec_part= static_cast<ulong>((value - floor(value))*TIME_SECOND_PART_FACTOR);
+  return number_to_time_with_warn(neg, nr, sec_part, ltime, fuzzydate, &str,
+                                  field_name);
+}
+
+
+bool decimal_to_datetime_with_warn(const my_decimal *value, MYSQL_TIME *ltime,
+                                   ulong fuzzydate, const char *field_name)
+{
+  const Lazy_string_decimal str(value);
+  ulonglong nr;
+  ulong sec_part;
+  bool neg= my_decimal2seconds(value, &nr, &sec_part);
+  return number_to_time_with_warn(neg, nr, sec_part, ltime, fuzzydate, &str,
+                                  field_name);
+}
+
+
+bool int_to_datetime_with_warn(longlong value, MYSQL_TIME *ltime,
+                               ulong fuzzydate, const char *field_name)
+{
+  const Lazy_string_num str(value);
+  bool neg= value < 0;
+  return number_to_time_with_warn(neg, neg ? -value : value, 0, ltime,
+                                  fuzzydate, &str, field_name);
+}
+
+
 /*
-  Convert a datetime from broken-down MYSQL_TIME representation to corresponding 
-  TIMESTAMP value.
+  Convert a datetime from broken-down MYSQL_TIME representation to
+  corresponding TIMESTAMP value.
 
   SYNOPSIS
     TIME_to_timestamp()
       thd             - current thread
       t               - datetime in broken-down representation, 
-      in_dst_time_gap - pointer to bool which is set to true if t represents
-                        value which doesn't exists (falls into the spring 
-                        time-gap) or to false otherwise.
+      error_code      - 0, if the conversion was successful;
+                        ER_WARN_DATA_OUT_OF_RANGE, if t contains datetime value
+                           which is out of TIMESTAMP range;
+                        ER_WARN_INVALID_TIMESTAMP, if t represents value which
+                           doesn't exists (falls into the spring time-gap).
    
   RETURN
      Number seconds in UTC since start of Unix Epoch corresponding to t.
-     0 - t contains datetime value which is out of TIMESTAMP range.
-     
+     0 - in case of ER_WARN_DATA_OUT_OF_RANGE
 */
-my_time_t TIME_to_timestamp(THD *thd, const MYSQL_TIME *t, my_bool *in_dst_time_gap)
-{
-  my_time_t timestamp;
 
-  *in_dst_time_gap= 0;
+my_time_t TIME_to_timestamp(THD *thd, const MYSQL_TIME *t, uint *error_code)
+{
   thd->time_zone_used= 1;
-
-  timestamp= thd->variables.time_zone->TIME_to_gmt_sec(t, in_dst_time_gap);
-  if (timestamp)
-  {
-    return timestamp;
-  }
-
-  /* If we are here we have range error. */
-  return(0);
+  return thd->variables.time_zone->TIME_to_gmt_sec(t, error_code);
 }
 
 
@@ -285,12 +362,19 @@ my_time_t TIME_to_timestamp(THD *thd, const MYSQL_TIME *t, my_bool *in_dst_time_
 
   NOTE
     See str_to_time() for more info.
+
+  RETURN
+    0  ok
+    1  wrong time value
 */
+
 bool
-str_to_time_with_warn(const char *str, uint length, MYSQL_TIME *l_time)
+str_to_time_with_warn(const char *str, uint length, MYSQL_TIME *l_time,
+                      ulong fuzzydate)
 {
   int warning;
-  bool ret_val= str_to_time(str, length, l_time, &warning);
+  bool ret_val= (str_to_time(str, length, l_time, fuzzydate, &warning) ==
+                 MYSQL_TIMESTAMP_ERROR);
   if (ret_val || warning)
     make_truncated_value_warning(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN,
                                  str, length, MYSQL_TIMESTAMP_TIME, NullS);
@@ -659,11 +743,6 @@ KNOWN_DATE_TIME_FORMAT known_date_time_formats[6]=
 };
 
 
-/*
-   Return format string according format name.
-   If name is unknown, result is NULL
-*/
-
 const char *get_date_time_format_str(KNOWN_DATE_TIME_FORMAT *format,
 				     timestamp_type type)
 {
@@ -680,51 +759,10 @@ const char *get_date_time_format_str(KNOWN_DATE_TIME_FORMAT *format,
   }
 }
 
-/****************************************************************************
-  Functions to create default time/date/datetime strings
- 
-  NOTE:
-    For the moment the DATE_TIME_FORMAT argument is ignored becasue
-    MySQL doesn't support comparing of date/time/datetime strings that
-    are not in arbutary order as dates are compared as strings in some
-    context)
-    This functions don't check that given MYSQL_TIME structure members are
-    in valid range. If they are not, return value won't reflect any 
-    valid date either. Additionally, make_time doesn't take into
-    account time->day member: it's assumed that days have been converted
-    to hours already.
-****************************************************************************/
-
-void make_time(const DATE_TIME_FORMAT *format __attribute__((unused)),
-               const MYSQL_TIME *l_time, String *str)
-{
-  uint length= (uint) my_time_to_str(l_time, (char*) str->ptr());
-  str->length(length);
-  str->set_charset(&my_charset_bin);
-}
-
-
-void make_date(const DATE_TIME_FORMAT *format __attribute__((unused)),
-               const MYSQL_TIME *l_time, String *str)
-{
-  uint length= (uint) my_date_to_str(l_time, (char*) str->ptr());
-  str->length(length);
-  str->set_charset(&my_charset_bin);
-}
-
-
-void make_datetime(const DATE_TIME_FORMAT *format __attribute__((unused)),
-                   const MYSQL_TIME *l_time, String *str)
-{
-  uint length= (uint) my_datetime_to_str(l_time, (char*) str->ptr());
-  str->length(length);
-  str->set_charset(&my_charset_bin);
-}
-
-
-void make_truncated_value_warning(THD *thd, MYSQL_ERROR::enum_warning_level level,
-                                  const char *str_val,
-				  uint str_length, timestamp_type time_type,
+void make_truncated_value_warning(THD *thd,
+                                  MYSQL_ERROR::enum_warning_level level,
+                                  const Lazy_string *sval,
+				  timestamp_type time_type,
                                   const char *field_name)
 {
   char warn_buff[MYSQL_ERRMSG_SIZE];
@@ -732,8 +770,7 @@ void make_truncated_value_warning(THD *thd, MYSQL_ERROR::enum_warning_level leve
   CHARSET_INFO *cs= &my_charset_latin1;
   char buff[128];
   String str(buff,(uint32) sizeof(buff), system_charset_info);
-  str.copy(str_val, str_length, system_charset_info);
-  str[str_length]= 0;               // Ensure we have end 0 for snprintf
+  sval->copy_to(&str);
 
   switch (time_type) {
     case MYSQL_TIMESTAMP_DATE: 
@@ -750,32 +787,37 @@ void make_truncated_value_warning(THD *thd, MYSQL_ERROR::enum_warning_level leve
   if (field_name)
     cs->cset->snprintf(cs, warn_buff, sizeof(warn_buff),
                        ER(ER_TRUNCATED_WRONG_VALUE_FOR_FIELD),
-                       type_str, str.c_ptr(), field_name,
+                       type_str, str.c_ptr_safe(), field_name,
                        (ulong) thd->row_count);
   else
   {
     if (time_type > MYSQL_TIMESTAMP_ERROR)
       cs->cset->snprintf(cs, warn_buff, sizeof(warn_buff),
                          ER(ER_TRUNCATED_WRONG_VALUE),
-                         type_str, str.c_ptr());
+                         type_str, str.c_ptr_safe());
     else
       cs->cset->snprintf(cs, warn_buff, sizeof(warn_buff),
-                         ER(ER_WRONG_VALUE), type_str, str.c_ptr());
+                         ER(ER_WRONG_VALUE), type_str, str.c_ptr_safe());
   }
   push_warning(thd, level,
                ER_TRUNCATED_WRONG_VALUE, warn_buff);
 }
 
+
 /* Daynumber from year 0 to 9999-12-31 */
 #define MAX_DAY_NUMBER 3652424L
-
-bool date_add_interval(MYSQL_TIME *ltime, interval_type int_type, INTERVAL interval)
+#define COMBINE(X)                                                      \
+               (((((X)->day * 24LL + (X)->hour) * 60LL +                \
+                   (X)->minute) * 60LL + (X)->second)*1000000LL +       \
+                   (X)->second_part)
+#define GET_PART(X, N) X % N ## LL; X/= N ## LL
+
+bool date_add_interval(MYSQL_TIME *ltime, interval_type int_type,
+                       INTERVAL interval)
 {
   long period, sign;
 
-  ltime->neg= 0;
-
-  sign= (interval.neg ? -1 : 1);
+  sign= (interval.neg == ltime->neg ? 1 : -1);
 
   switch (int_type) {
   case INTERVAL_SECOND:
@@ -792,35 +834,43 @@ bool date_add_interval(MYSQL_TIME *ltime, interval_type int_type, INTERVAL inter
   case INTERVAL_DAY_SECOND:
   case INTERVAL_DAY_MINUTE:
   case INTERVAL_DAY_HOUR:
+  case INTERVAL_DAY:
   {
-    longlong sec, days, daynr, microseconds, extra_sec;
-    ltime->time_type= MYSQL_TIMESTAMP_DATETIME; // Return full date
-    microseconds= ltime->second_part + sign*interval.second_part;
-    extra_sec= microseconds/1000000L;
-    microseconds= microseconds%1000000L;
-
-    sec=((ltime->day-1)*3600*24L+ltime->hour*3600+ltime->minute*60+
-	 ltime->second +
-	 sign* (longlong) (interval.day*3600*24L +
-                           interval.hour*LL(3600)+interval.minute*LL(60)+
-                           interval.second))+ extra_sec;
-    if (microseconds < 0)
+    longlong usec, daynr;
+    my_bool neg= 0;
+    enum enum_mysql_timestamp_type time_type= ltime->time_type;
+
+    if (time_type != MYSQL_TIMESTAMP_TIME)
+      ltime->day+= calc_daynr(ltime->year, ltime->month, 1) - 1;
+
+    usec= COMBINE(ltime) + sign*COMBINE(&interval);
+
+    if (usec < 0)
     {
-      microseconds+= LL(1000000);
-      sec--;
+      neg= 1;
+      usec= -usec;
     }
-    days= sec/(3600*LL(24));
-    sec-= days*3600*LL(24);
-    if (sec < 0)
+
+    ltime->second_part= GET_PART(usec, 1000000);
+    ltime->second= GET_PART(usec, 60);
+    ltime->minute= GET_PART(usec, 60);
+    ltime->neg^= neg;
+
+    if (time_type == MYSQL_TIMESTAMP_TIME)
     {
-      days--;
-      sec+= 3600*LL(24);
+      if (usec > TIME_MAX_HOUR)
+        goto invalid_date;
+      ltime->hour= static_cast<uint>(usec);
+      ltime->day= 0;
+      return 0;
     }
-    ltime->second_part= (uint) microseconds;
-    ltime->second= (uint) (sec % 60);
-    ltime->minute= (uint) (sec/60 % 60);
-    ltime->hour=   (uint) (sec/3600);
-    daynr= calc_daynr(ltime->year,ltime->month,1) + days;
+
+    if (int_type != INTERVAL_DAY)
+      ltime->time_type= MYSQL_TIMESTAMP_DATETIME; // Return full date
+
+    ltime->hour= GET_PART(usec, 24);
+    daynr= usec;
+
     /* Day number from year 0 to 9999-12-31 */
     if ((ulonglong) daynr > MAX_DAY_NUMBER)
       goto invalid_date;
@@ -828,7 +878,6 @@ bool date_add_interval(MYSQL_TIME *ltime, interval_type int_type, INTERVAL inter
                         &ltime->day);
     break;
   }
-  case INTERVAL_DAY:
   case INTERVAL_WEEK:
     period= (calc_daynr(ltime->year,ltime->month,ltime->day) +
              sign * (long) interval.day);
@@ -866,13 +915,15 @@ bool date_add_interval(MYSQL_TIME *ltime, interval_type int_type, INTERVAL inter
     goto null_date;
   }
 
-  return 0;					// Ok
+  if (ltime->time_type != MYSQL_TIMESTAMP_TIME)
+    return 0;                                   // Ok
 
 invalid_date:
   push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN,
                       ER_DATETIME_FUNCTION_OVERFLOW,
                       ER(ER_DATETIME_FUNCTION_OVERFLOW),
-                      "datetime");
+                      ltime->time_type == MYSQL_TIMESTAMP_TIME ?
+                      "time" : "datetime");
 null_date:
   return 1;
 }
@@ -972,19 +1023,14 @@ calc_time_diff(MYSQL_TIME *l_time1, MYSQL_TIME *l_time2, int l_sign, longlong *s
 
 int my_time_compare(MYSQL_TIME *a, MYSQL_TIME *b)
 {
-  ulonglong a_t= TIME_to_ulonglong_datetime(a);
-  ulonglong b_t= TIME_to_ulonglong_datetime(b);
+  ulonglong a_t= pack_time(a);
+  ulonglong b_t= pack_time(b);
 
   if (a_t < b_t)
     return -1;
   if (a_t > b_t)
     return 1;
 
-  if (a->second_part < b->second_part)
-    return -1;
-  if (a->second_part > b->second_part)
-    return 1;
-
   return 0;
 }
 
diff --git a/sql/tztime.cc b/sql/tztime.cc
index 6bb295b98c6..eed575957b6 100644
--- a/sql/tztime.cc
+++ b/sql/tztime.cc
@@ -817,9 +817,11 @@ sec_since_epoch(int year, int mon, int mday, int hour, int min ,int sec)
     TIME_to_gmt_sec()
       t               - pointer to structure for broken down represenatation
       sp              - pointer to struct with time zone description
-      in_dst_time_gap - pointer to bool which is set to true if datetime
-                        value passed doesn't really exist (i.e. falls into
-                        spring time-gap) and is not touched otherwise.
+      error_code      - 0, if the conversion was successful;
+                        ER_WARN_DATA_OUT_OF_RANGE, if t contains datetime value
+                           which is out of TIMESTAMP range;
+                        ER_WARN_INVALID_TIMESTAMP, if t represents value which
+                           doesn't exists (falls into the spring time-gap).
 
   DESCRIPTION
     This is mktime analog for MySQL. It is essentially different
@@ -881,20 +883,23 @@ sec_since_epoch(int year, int mon, int mday, int hour, int min ,int sec)
     Seconds in UTC since Epoch.
     0 in case of error.
 */
+
 static my_time_t
-TIME_to_gmt_sec(const MYSQL_TIME *t, const TIME_ZONE_INFO *sp,
-                my_bool *in_dst_time_gap)
+TIME_to_gmt_sec(const MYSQL_TIME *t, const TIME_ZONE_INFO *sp, uint *error_code)
 {
   my_time_t local_t;
   uint saved_seconds;
   uint i;
   int shift= 0;
-
   DBUG_ENTER("TIME_to_gmt_sec");
 
   if (!validate_timestamp_range(t))
+  {
+    *error_code= ER_WARN_DATA_OUT_OF_RANGE;
     DBUG_RETURN(0);
+  }
 
+  *error_code= 0;
 
   /* We need this for correct leap seconds handling */
   if (t->second < SECS_PER_MIN)
@@ -938,6 +943,7 @@ TIME_to_gmt_sec(const MYSQL_TIME *t, const TIME_ZONE_INFO *sp,
       This means that source time can't be represented as my_time_t due to
       limited my_time_t range.
     */
+    *error_code= ER_WARN_DATA_OUT_OF_RANGE;
     DBUG_RETURN(0);
   }
 
@@ -954,6 +960,7 @@ TIME_to_gmt_sec(const MYSQL_TIME *t, const TIME_ZONE_INFO *sp,
     if (local_t > (my_time_t) (TIMESTAMP_MAX_VALUE - shift * SECS_PER_DAY +
                                sp->revtis[i].rt_offset - saved_seconds))
     {
+      *error_code= ER_WARN_DATA_OUT_OF_RANGE;
       DBUG_RETURN(0);                           /* my_time_t overflow */
     }
     local_t+= shift * SECS_PER_DAY;
@@ -967,7 +974,7 @@ TIME_to_gmt_sec(const MYSQL_TIME *t, const TIME_ZONE_INFO *sp,
       Now we are returning my_time_t value corresponding to the
       beginning of the gap.
     */
-    *in_dst_time_gap= 1;
+    *error_code= ER_WARN_INVALID_TIMESTAMP;
     local_t= sp->revts[i] - sp->revtis[i].rt_offset + saved_seconds;
   }
   else
@@ -975,7 +982,10 @@ TIME_to_gmt_sec(const MYSQL_TIME *t, const TIME_ZONE_INFO *sp,
 
   /* check for TIMESTAMP_MAX_VALUE was already done above */
   if (local_t < TIMESTAMP_MIN_VALUE)
+  {
     local_t= 0;
+    *error_code= ER_WARN_DATA_OUT_OF_RANGE;
+  }
 
   DBUG_RETURN(local_t);
 }
@@ -1009,8 +1019,7 @@ class Time_zone_system : public Time_zone
 {
 public:
   Time_zone_system() {}                       /* Remove gcc warning */
-  virtual my_time_t TIME_to_gmt_sec(const MYSQL_TIME *t,
-                                    my_bool *in_dst_time_gap) const;
+  virtual my_time_t TIME_to_gmt_sec(const MYSQL_TIME *t, uint *error_code) const;
   virtual void gmt_sec_to_TIME(MYSQL_TIME *tmp, my_time_t t) const;
   virtual const String * get_name() const;
 };
@@ -1024,9 +1033,11 @@ public:
     TIME_to_gmt_sec()
       t               - pointer to MYSQL_TIME structure with local time in
                         broken-down representation.
-      in_dst_time_gap - pointer to bool which is set to true if datetime
-                        value passed doesn't really exist (i.e. falls into
-                        spring time-gap) and is not touched otherwise.
+      error_code      - 0, if the conversion was successful;
+                        ER_WARN_DATA_OUT_OF_RANGE, if t contains datetime value
+                           which is out of TIMESTAMP range;
+                        ER_WARN_INVALID_TIMESTAMP, if t represents value which
+                           doesn't exists (falls into the spring time-gap).
 
   DESCRIPTION
     This method uses system function (localtime_r()) for conversion
@@ -1042,10 +1053,10 @@ public:
     Corresponding my_time_t value or 0 in case of error
 */
 my_time_t
-Time_zone_system::TIME_to_gmt_sec(const MYSQL_TIME *t, my_bool *in_dst_time_gap) const
+Time_zone_system::TIME_to_gmt_sec(const MYSQL_TIME *t, uint *error_code) const
 {
   long not_used;
-  return my_system_gmt_sec(t, &not_used, in_dst_time_gap);
+  return my_system_gmt_sec(t, &not_used, error_code);
 }
 
 
@@ -1105,7 +1116,7 @@ class Time_zone_utc : public Time_zone
 public:
   Time_zone_utc() {}                          /* Remove gcc warning */
   virtual my_time_t TIME_to_gmt_sec(const MYSQL_TIME *t,
-                                    my_bool *in_dst_time_gap) const;
+                                    uint *error_code) const;
   virtual void gmt_sec_to_TIME(MYSQL_TIME *tmp, my_time_t t) const;
   virtual const String * get_name() const;
 };
@@ -1114,14 +1125,6 @@ public:
 /*
   Convert UTC time from MYSQL_TIME representation to its my_time_t representation.
 
-  SYNOPSIS
-    TIME_to_gmt_sec()
-      t               - pointer to MYSQL_TIME structure with local time
-                        in broken-down representation.
-      in_dst_time_gap - pointer to bool which is set to true if datetime
-                        value passed doesn't really exist (i.e. falls into
-                        spring time-gap) and is not touched otherwise.
-
   DESCRIPTION
     Since Time_zone_utc is used only internally for my_time_t -> TIME
     conversions, this function of Time_zone interface is not implemented for
@@ -1131,10 +1134,11 @@ public:
     0
 */
 my_time_t
-Time_zone_utc::TIME_to_gmt_sec(const MYSQL_TIME *t, my_bool *in_dst_time_gap) const
+Time_zone_utc::TIME_to_gmt_sec(const MYSQL_TIME *t, uint *error_code) const
 {
   /* Should be never called */
   DBUG_ASSERT(0);
+  *error_code= ER_WARN_DATA_OUT_OF_RANGE;
   return 0;
 }
 
@@ -1194,8 +1198,7 @@ class Time_zone_db : public Time_zone
 {
 public:
   Time_zone_db(TIME_ZONE_INFO *tz_info_arg, const String * tz_name_arg);
-  virtual my_time_t TIME_to_gmt_sec(const MYSQL_TIME *t,
-                                    my_bool *in_dst_time_gap) const;
+  virtual my_time_t TIME_to_gmt_sec(const MYSQL_TIME *t, uint *error_code) const;
   virtual void gmt_sec_to_TIME(MYSQL_TIME *tmp, my_time_t t) const;
   virtual const String * get_name() const;
 private:
@@ -1232,9 +1235,11 @@ Time_zone_db::Time_zone_db(TIME_ZONE_INFO *tz_info_arg,
     TIME_to_gmt_sec()
       t               - pointer to MYSQL_TIME structure with local time
                         in broken-down representation.
-      in_dst_time_gap - pointer to bool which is set to true if datetime
-                        value passed doesn't really exist (i.e. falls into
-                        spring time-gap) and is not touched otherwise.
+      error_code      - 0, if the conversion was successful;
+                        ER_WARN_DATA_OUT_OF_RANGE, if t contains datetime value
+                           which is out of TIMESTAMP range;
+                        ER_WARN_INVALID_TIMESTAMP, if t represents value which
+                           doesn't exists (falls into the spring time-gap).
 
   DESCRIPTION
     Please see ::TIME_to_gmt_sec for function description and
@@ -1244,9 +1249,9 @@ Time_zone_db::Time_zone_db(TIME_ZONE_INFO *tz_info_arg,
     Corresponding my_time_t value or 0 in case of error
 */
 my_time_t
-Time_zone_db::TIME_to_gmt_sec(const MYSQL_TIME *t, my_bool *in_dst_time_gap) const
+Time_zone_db::TIME_to_gmt_sec(const MYSQL_TIME *t, uint *error_code) const
 {
-  return ::TIME_to_gmt_sec(t, tz_info, in_dst_time_gap);
+  return ::TIME_to_gmt_sec(t, tz_info, error_code);
 }
 
 
@@ -1292,7 +1297,7 @@ class Time_zone_offset : public Time_zone
 public:
   Time_zone_offset(long tz_offset_arg);
   virtual my_time_t TIME_to_gmt_sec(const MYSQL_TIME *t,
-                                    my_bool *in_dst_time_gap) const;
+                                    uint *error_code) const;
   virtual void   gmt_sec_to_TIME(MYSQL_TIME *tmp, my_time_t t) const;
   virtual const String * get_name() const;
   /*
@@ -1334,17 +1339,19 @@ Time_zone_offset::Time_zone_offset(long tz_offset_arg):
     TIME_to_gmt_sec()
       t               - pointer to MYSQL_TIME structure with local time
                         in broken-down representation.
-      in_dst_time_gap - pointer to bool which should be set to true if
-                        datetime  value passed doesn't really exist
-                        (i.e. falls into spring time-gap) and is not
-                        touched otherwise.
-                        It is not really used in this class.
+      error_code      - 0, if the conversion was successful;
+                        ER_WARN_DATA_OUT_OF_RANGE, if t contains datetime value
+                           which is out of TIMESTAMP range;
+                        ER_WARN_INVALID_TIMESTAMP, if t represents value which
+                           doesn't exists (falls into the spring time-gap).
 
   RETURN VALUE
-    Corresponding my_time_t value or 0 in case of error
+    Corresponding my_time_t value or 0 in case of error. In case of error
+    *in_dst_time_gap is also set.
 */
+
 my_time_t
-Time_zone_offset::TIME_to_gmt_sec(const MYSQL_TIME *t, my_bool *in_dst_time_gap) const
+Time_zone_offset::TIME_to_gmt_sec(const MYSQL_TIME *t, uint *error_code) const
 {
   my_time_t local_t;
   int shift= 0;
@@ -1354,7 +1361,11 @@ Time_zone_offset::TIME_to_gmt_sec(const MYSQL_TIME *t, my_bool *in_dst_time_gap)
     us to make all validation checks here.
   */
   if (!validate_timestamp_range(t))
+  {
+    *error_code= ER_WARN_DATA_OUT_OF_RANGE;
     return 0;
+  }
+  *error_code= 0;
 
   /*
     Do a temporary shift of the boundary dates to avoid
@@ -1378,6 +1389,7 @@ Time_zone_offset::TIME_to_gmt_sec(const MYSQL_TIME *t, my_bool *in_dst_time_gap)
     return local_t;
 
   /* range error*/
+  *error_code= ER_WARN_DATA_OUT_OF_RANGE;
   return 0;
 }
 
@@ -2745,7 +2757,7 @@ main(int argc, char **argv)
             for (time_tmp.second=0; time_tmp.second<60; time_tmp.second+=25)
             {
               long not_used;
-              my_bool not_used_2;
+              uint not_used_2;
               t= (time_t)my_system_gmt_sec(&time_tmp, &not_used, &not_used_2);
               t1= (time_t)TIME_to_gmt_sec(&time_tmp, &tz_info, &not_used_2);
               if (t != t1)
diff --git a/sql/tztime.h b/sql/tztime.h
index daedbb14fca..2b3e9c04aa6 100644
--- a/sql/tztime.h
+++ b/sql/tztime.h
@@ -36,11 +36,11 @@ public:
   /**
     Converts local time in broken down MYSQL_TIME representation to 
     my_time_t (UTC seconds since Epoch) represenation.
-    Returns 0 in case of error. Sets in_dst_time_gap to true if date provided
-    falls into spring time-gap (or lefts it untouched otherwise).
+    Returns 0 in case of error. May set error_code to ER_WARN_DATA_OUT_OF_RANGE
+    or ER_WARN_INVALID_TIMESTAMP, see TIME_to_timestamp())
   */
   virtual my_time_t TIME_to_gmt_sec(const MYSQL_TIME *t, 
-                                    my_bool *in_dst_time_gap) const = 0;
+                                    uint *error_code) const = 0;
   /**
     Converts time in my_time_t representation to local time in
     broken down MYSQL_TIME representation.
diff --git a/sql/uniques.cc b/sql/uniques.cc
index fd6056ae8a6..0d0c253df5d 100644
--- a/sql/uniques.cc
+++ b/sql/uniques.cc
@@ -36,7 +36,6 @@
 #include "mysql_priv.h"
 #include "sql_sort.h"
 
-
 int unique_write_to_file(uchar* key, element_count count, Unique *unique)
 {
   /*
@@ -48,6 +47,12 @@ int unique_write_to_file(uchar* key, element_count count, Unique *unique)
   return my_b_write(&unique->file, key, unique->size) ? 1 : 0;
 }
 
+int unique_write_to_file_with_count(uchar* key, element_count count, Unique *unique)
+{
+  return my_b_write(&unique->file, key, unique->size) ||
+         my_b_write(&unique->file, &count, sizeof(element_count)) ? 1 : 0;
+}
+
 int unique_write_to_ptrs(uchar* key, element_count count, Unique *unique)
 {
   memcpy(unique->record_pointers, key, unique->size);
@@ -55,10 +60,28 @@ int unique_write_to_ptrs(uchar* key, element_count count, Unique *unique)
   return 0;
 }
 
+int unique_intersect_write_to_ptrs(uchar* key, element_count count, Unique *unique)
+{
+  if (count >= unique->min_dupl_count)
+  {
+    memcpy(unique->record_pointers, key, unique->size);
+    unique->record_pointers+=unique->size;
+  }
+  else
+    unique->filtered_out_elems++;
+  return 0;
+}
+
+
 Unique::Unique(qsort_cmp2 comp_func, void * comp_func_fixed_arg,
-	       uint size_arg, ulonglong max_in_memory_size_arg)
+	       uint size_arg, ulonglong max_in_memory_size_arg,
+               uint min_dupl_count_arg)
   :max_in_memory_size(max_in_memory_size_arg), size(size_arg), elements(0)
 {
+  min_dupl_count= min_dupl_count_arg;
+  full_size= size;
+  if (min_dupl_count_arg)
+    full_size+= sizeof(element_count);
   my_b_clear(&file);
   init_tree(&tree, (ulong) (max_in_memory_size / 16), 0, size, comp_func, 0,
             NULL, comp_func_fixed_arg);
@@ -126,7 +149,8 @@ inline double log2_n_fact(double x)
 */
 
 static double get_merge_buffers_cost(uint *buff_elems, uint elem_size,
-                                     uint *first, uint *last)
+                                     uint *first, uint *last,
+                                     uint compare_factor)
 {
   uint total_buf_elems= 0;
   for (uint *pbuf= first; pbuf <= last; pbuf++)
@@ -137,7 +161,7 @@ static double get_merge_buffers_cost(uint *buff_elems, uint elem_size,
 
   /* Using log2(n)=log(n)/log(2) formula */
   return 2*((double)total_buf_elems*elem_size) / IO_SIZE +
-     total_buf_elems*log((double) n_buffers) / (TIME_FOR_COMPARE_ROWID * M_LN2);
+     total_buf_elems*log((double) n_buffers) / (compare_factor * M_LN2);
 }
 
 
@@ -170,7 +194,8 @@ static double get_merge_buffers_cost(uint *buff_elems, uint elem_size,
 
 static double get_merge_many_buffs_cost(uint *buffer,
                                         uint maxbuffer, uint max_n_elems,
-                                        uint last_n_elems, int elem_size)
+                                        uint last_n_elems, int elem_size,
+                                        uint compare_factor)
 {
   register int i;
   double total_cost= 0.0;
@@ -197,19 +222,22 @@ static double get_merge_many_buffs_cost(uint *buffer,
       {
         total_cost+=get_merge_buffers_cost(buff_elems, elem_size,
                                            buff_elems + i,
-                                           buff_elems + i + MERGEBUFF-1);
+                                           buff_elems + i + MERGEBUFF-1,
+                                           compare_factor);
 	lastbuff++;
       }
       total_cost+=get_merge_buffers_cost(buff_elems, elem_size,
                                          buff_elems + i,
-                                         buff_elems + maxbuffer);
+                                         buff_elems + maxbuffer,
+                                         compare_factor);
       maxbuffer= lastbuff;
     }
   }
 
   /* Simulate final merge_buff call. */
   total_cost += get_merge_buffers_cost(buff_elems, elem_size,
-                                       buff_elems, buff_elems + maxbuffer);
+                                       buff_elems, buff_elems + maxbuffer,
+                                       compare_factor);
   return total_cost;
 }
 
@@ -224,7 +252,11 @@ static double get_merge_many_buffs_cost(uint *buffer,
                 to get # bytes needed.
       nkeys     #of elements in Unique
       key_size  size of each elements in bytes
-      max_in_memory_size amount of memory Unique will be allowed to use
+      max_in_memory_size   amount of memory Unique will be allowed to use
+      compare_factor   used to calculate cost of one comparison
+      write_fl  if the result must be saved written to disk
+      in_memory_elems  OUT estimate of the number of elements in memory
+                           if disk is not used  
 
   RETURN
     Cost in disk seeks.
@@ -261,15 +293,17 @@ static double get_merge_many_buffs_cost(uint *buffer,
       these will be random seeks.
 */
 
-double Unique::get_use_cost(uint *buffer, uint nkeys, uint key_size,
-                            ulonglong max_in_memory_size)
+double Unique::get_use_cost(uint *buffer, size_t nkeys, uint key_size,
+                            ulonglong max_in_memory_size,
+                            uint compare_factor,
+                            bool intersect_fl, bool *in_memory)
 {
-  ulong max_elements_in_tree;
-  ulong last_tree_elems;
+  size_t max_elements_in_tree;
+  size_t last_tree_elems;
   int   n_full_trees; /* number of trees in unique - 1 */
   double result;
 
-  max_elements_in_tree= ((ulong) max_in_memory_size /
+  max_elements_in_tree= ((size_t) max_in_memory_size /
                          ALIGN_SIZE(sizeof(TREE_ELEMENT)+key_size));
 
   n_full_trees=    nkeys / max_elements_in_tree;
@@ -279,11 +313,15 @@ double Unique::get_use_cost(uint *buffer, uint nkeys, uint key_size,
   result= 2*log2_n_fact(last_tree_elems + 1.0);
   if (n_full_trees)
     result+= n_full_trees * log2_n_fact(max_elements_in_tree + 1.0);
-  result /= TIME_FOR_COMPARE_ROWID;
+  result /= compare_factor;
+
+  DBUG_PRINT("info",("unique trees sizes: %u=%u*%u + %u", (uint)nkeys,
+                     (uint)n_full_trees, 
+                     (uint)(n_full_trees?max_elements_in_tree:0),
+                     (uint)last_tree_elems));
 
-  DBUG_PRINT("info",("unique trees sizes: %u=%u*%lu + %lu", nkeys,
-                     n_full_trees, n_full_trees?max_elements_in_tree:0,
-                     last_tree_elems));
+  if (in_memory)
+    *in_memory= !n_full_trees;
 
   if (!n_full_trees)
     return result;
@@ -298,12 +336,12 @@ double Unique::get_use_cost(uint *buffer, uint nkeys, uint key_size,
   result += DISK_SEEK_BASE_COST * ceil(((double) key_size)*last_tree_elems / IO_SIZE);
 
   /* Cost of merge */
+  if (intersect_fl)
+    key_size+= sizeof(element_count);
   double merge_cost= get_merge_many_buffs_cost(buffer, n_full_trees,
                                                max_elements_in_tree,
-                                               last_tree_elems, key_size);
-  if (merge_cost < 0.0)
-    return merge_cost;
-
+                                               last_tree_elems, key_size,
+                                               compare_factor);
   result += merge_cost;
   /*
     Add cost of reading the resulting sequence, assuming there were no
@@ -330,7 +368,10 @@ bool Unique::flush()
   file_ptr.count=tree.elements_in_tree;
   file_ptr.file_pos=my_b_tell(&file);
 
-  if (tree_walk(&tree, (tree_walk_action) unique_write_to_file,
+  tree_walk_action action= min_dupl_count ?
+		           (tree_walk_action) unique_write_to_file_with_count :
+		           (tree_walk_action) unique_write_to_file;
+  if (tree_walk(&tree, action,
 		(void*) this, left_root_right) ||
       insert_dynamic(&file_ptrs, (uchar*) &file_ptr))
     return 1;
@@ -360,6 +401,7 @@ Unique::reset()
     reinit_io_cache(&file, WRITE_CACHE, 0L, 0, 1);
   }
   elements= 0;
+  tree.flag= 0;
 }
 
 /*
@@ -426,7 +468,7 @@ static bool merge_walk(uchar *merge_buffer, ulong merge_buffer_size,
   if (end <= begin ||
       merge_buffer_size < (ulong) (key_length * (end - begin + 1)) ||
       init_queue(&queue, (uint) (end - begin), offsetof(BUFFPEK, key), 0,
-                 buffpek_compare, &compare_context))
+                 buffpek_compare, &compare_context, 0, 0))
     return 1;
   /* we need space for one key when a piece of merge buffer is re-read */
   merge_buffer_size-= key_length;
@@ -471,7 +513,7 @@ static bool merge_walk(uchar *merge_buffer, ulong merge_buffer_size,
     */
     top->key+= key_length;
     if (--top->mem_count)
-      queue_replaced(&queue);
+      queue_replace_top(&queue);
     else /* next piece should be read */
     {
       /* save old_key not to overwrite it in read_to_buffer */
@@ -481,14 +523,14 @@ static bool merge_walk(uchar *merge_buffer, ulong merge_buffer_size,
       if (bytes_read == (uint) (-1))
         goto end;
       else if (bytes_read > 0)      /* top->key, top->mem_count are reset */
-        queue_replaced(&queue);     /* in read_to_buffer */
+        queue_replace_top(&queue);             /* in read_to_buffer */
       else
       {
         /*
           Tree for old 'top' element is empty: remove it from the queue and
           give all its memory to the nearest tree.
         */
-        queue_remove(&queue, 0);
+        queue_remove_top(&queue);
         reuse_freed_buff(&queue, top, key_length);
       }
     }
@@ -579,15 +621,19 @@ bool Unique::get(TABLE *table)
 {
   SORTPARAM sort_param;
   table->sort.found_records=elements+tree.elements_in_tree;
-
   if (my_b_tell(&file) == 0)
   {
     /* Whole tree is in memory;  Don't use disk if you don't need to */
     if ((record_pointers=table->sort.record_pointers= (uchar*)
 	 my_malloc(size * tree.elements_in_tree, MYF(0))))
     {
-      (void) tree_walk(&tree, (tree_walk_action) unique_write_to_ptrs,
+      tree_walk_action action= min_dupl_count ?
+		         (tree_walk_action) unique_intersect_write_to_ptrs :
+		         (tree_walk_action) unique_write_to_ptrs;
+      filtered_out_elems= 0;
+      (void) tree_walk(&tree, action,
 		       this, left_root_right);
+      table->sort.found_records-= filtered_out_elems;
       return 0;
     }
   }
@@ -617,7 +663,9 @@ bool Unique::get(TABLE *table)
   sort_param.max_rows= elements;
   sort_param.sort_form=table;
   sort_param.rec_length= sort_param.sort_length= sort_param.ref_length=
-    size;
+   full_size;
+  sort_param.min_dupl_count= min_dupl_count;
+  sort_param.res_length= 0;
   sort_param.keys= (uint) (max_in_memory_size / sort_param.sort_length);
   sort_param.not_killable=1;
 
@@ -638,8 +686,9 @@ bool Unique::get(TABLE *table)
   if (flush_io_cache(&file) ||
       reinit_io_cache(&file,READ_CACHE,0L,0,0))
     goto err;
-  if (merge_buffers(&sort_param, &file, outfile, sort_buffer, file_ptr,
-		    file_ptr, file_ptr+maxbuffer,0))
+  sort_param.res_length= sort_param.rec_length-
+                         (min_dupl_count ? sizeof(min_dupl_count) : 0);
+  if (merge_index(&sort_param, sort_buffer, file_ptr, maxbuffer, &file, outfile))
     goto err;
   error=0;
 err:
@@ -654,3 +703,5 @@ err:
   outfile->end_of_file=save_pos;
   return error;
 }
+
+
diff --git a/sql/unireg.cc b/sql/unireg.cc
index dde755462dd..589833541d9 100644
--- a/sql/unireg.cc
+++ b/sql/unireg.cc
@@ -1046,7 +1046,6 @@ static bool make_empty_rec(THD *thd, File file,enum legacy_db_type table_type,
   }
 
   table.in_use= thd;
-  table.s->db_low_byte_first= handler->low_byte_first();
   table.s->blob_ptr_size= portable_sizeof_char_ptr;
 
   null_count=0;
diff --git a/sql/unireg.h b/sql/unireg.h
index 33cd5d1006d..22195f52d97 100644
--- a/sql/unireg.h
+++ b/sql/unireg.h
@@ -74,10 +74,13 @@
 #define MAX_BIT_FIELD_LENGTH    64      /* Max length in bits for bit fields */
 
 #define MAX_DATE_WIDTH		10	/* YYYY-MM-DD */
-#define MAX_TIME_WIDTH		23	/* -DDDDDD HH:MM:SS.###### */
+#define MIN_TIME_WIDTH          10      /* -HHH:MM:SS */
+#define MAX_TIME_WIDTH          16      /* -DDDDDD HH:MM:SS */
+#define MAX_TIME_FULL_WIDTH     23      /* -DDDDDD HH:MM:SS.###### */
 #define MAX_DATETIME_FULL_WIDTH 29	/* YYYY-MM-DD HH:MM:SS.###### AM */
 #define MAX_DATETIME_WIDTH	19	/* YYYY-MM-DD HH:MM:SS */
 #define MAX_DATETIME_COMPRESSED_WIDTH 14  /* YYYYMMDDHHMMSS */
+#define MAX_DATETIME_PRECISION  6
 
 #define MAX_TABLES	(sizeof(table_map)*8-3)	/* Max tables in join */
 #define PARAM_TABLE_BIT	(((table_map) 1) << (sizeof(table_map)*8-3))
@@ -91,7 +94,7 @@
 #define MAX_SELECT_NESTING (sizeof(nesting_map)*8-1)
 
 #define MAX_SORT_MEMORY (2048*1024-MALLOC_OVERHEAD)
-#define MIN_SORT_MEMORY (32*1024-MALLOC_OVERHEAD)
+#define MIN_SORT_MEMORY (1024-MALLOC_OVERHEAD)
 
 /* Memory allocated when parsing a statement / saving a statement */
 #define MEM_ROOT_BLOCK_SIZE       8192