109 files changed, 19825 insertions, 1974 deletions
diff --git a/sql/Makefile.am b/sql/Makefile.am
index 007239f2e8c..a1eada6e3d2 100644
--- a/sql/Makefile.am
+++ b/sql/Makefile.am
@@ -59,7 +59,9 @@ noinst_HEADERS =	item.h item_func.h item_sum.h item_cmpfunc.h \
 			log_event.h sql_repl.h slave.h \
 			stacktrace.h sql_sort.h sql_cache.h set_var.h \
 			spatial.h gstream.h client_settings.h tzfile.h \
-                        tztime.h examples/ha_example.h examples/ha_archive.h
+                        tztime.h examples/ha_example.h examples/ha_archive.h \
+			sp_head.h sp_pcontext.h sp_rcontext.h sp.h sp_cache.h \
+			parse_file.h			
 mysqld_SOURCES =	sql_lex.cc sql_handler.cc \
 			item.cc item_sum.cc item_buff.cc item_func.cc \
 			item_cmpfunc.cc item_strfunc.cc item_timefunc.cc \
@@ -90,8 +92,9 @@ mysqld_SOURCES =	sql_lex.cc sql_handler.cc \
 			stacktrace.c repl_failsafe.h repl_failsafe.cc \
 			gstream.cc spatial.cc sql_help.cc protocol_cursor.cc \
 			tztime.cc my_time.c \
-			examples/ha_example.cc examples/ha_archive.cc
-
+			examples/ha_example.cc examples/ha_archive.cc \
+			sp_head.cc sp_pcontext.cc  sp_rcontext.cc sp.cc \
+			sp_cache.cc parse_file.cc
 gen_lex_hash_SOURCES =	gen_lex_hash.cc
 gen_lex_hash_LDADD =	$(LDADD) $(CXXLDFLAGS)
 mysql_tzinfo_to_sql_SOURCES =   mysql_tzinfo_to_sql.cc
diff --git a/sql/filesort.cc b/sql/filesort.cc
index a84fa4fe6f4..fe2b3850197 100644
--- a/sql/filesort.cc
+++ b/sql/filesort.cc
@@ -48,7 +48,8 @@ static int merge_index(SORTPARAM *param,uchar *sort_buffer,
 		       BUFFPEK *buffpek,
 		       uint maxbuffer,IO_CACHE *tempfile,
 		       IO_CACHE *outfile);
-static bool save_index(SORTPARAM *param,uchar **sort_keys, uint count);
+static bool save_index(SORTPARAM *param,uchar **sort_keys, uint count, 
+                       FILESORT_INFO *table_sort);
 static uint sortlength(SORT_FIELD *sortorder, uint s_length,
 		       bool *multi_byte_charset);
 static SORT_ADDON_FIELD *get_addon_fields(THD *thd, Field **ptabfield,
@@ -106,8 +107,16 @@ ha_rows filesort(THD *thd, TABLE *table, SORT_FIELD *sortorder, uint s_length,
 #ifdef SKIP_DBUG_IN_FILESORT
   DBUG_PUSH("");		/* No DBUG here */
 #endif
-
-  outfile= table->sort.io_cache;
+  FILESORT_INFO table_sort;
+  /* 
+    Don't use table->sort in filesort as it is also used by 
+    QUICK_INDEX_MERGE_SELECT. Work with a copy and put it back at the end 
+    when index_merge select has finished with it.
+  */
+  memcpy(&table_sort, &table->sort, sizeof(FILESORT_INFO));
+  table->sort.io_cache= NULL;
+  
+  outfile= table_sort.io_cache;
   my_b_clear(&tempfile);
   my_b_clear(&buffpek_pointers);
   buffpek=0;
@@ -128,14 +137,15 @@ ha_rows filesort(THD *thd, TABLE *table, SORT_FIELD *sortorder, uint s_length,
                                         param.sort_length,
                                         &param.addon_length);
   }
-  table->sort.addon_buf= 0;
-  table->sort.addon_length= param.addon_length;
-  table->sort.addon_field= param.addon_field;
-  table->sort.unpack= unpack_addon_fields;
+
+  table_sort.addon_buf= 0;
+  table_sort.addon_length= param.addon_length;
+  table_sort.addon_field= param.addon_field;
+  table_sort.unpack= unpack_addon_fields;
   if (param.addon_field)
   {
     param.res_length= param.addon_length;
-    if (!(table->sort.addon_buf= (byte *) my_malloc(param.addon_length,
+    if (!(table_sort.addon_buf= (byte *) my_malloc(param.addon_length,
                                                     MYF(MY_WME))))
       goto err;
   }
@@ -212,7 +222,7 @@ ha_rows filesort(THD *thd, TABLE *table, SORT_FIELD *sortorder, uint s_length,
 
   if (maxbuffer == 0)			// The whole set is in memory
   {
-    if (save_index(&param,sort_keys,(uint) records))
+    if (save_index(&param,sort_keys,(uint) records, &table_sort))
       goto err;
   }
   else
@@ -275,6 +285,7 @@ ha_rows filesort(THD *thd, TABLE *table, SORT_FIELD *sortorder, uint s_length,
 #ifdef SKIP_DBUG_IN_FILESORT
   DBUG_POP();			/* Ok to DBUG */
 #endif
+  memcpy(&table->sort, &table_sort, sizeof(FILESORT_INFO));
   DBUG_PRINT("exit",("records: %ld",records));
   DBUG_RETURN(error ? HA_POS_ERROR : records);
 } /* filesort */
@@ -349,7 +360,7 @@ static ha_rows find_all_keys(SORTPARAM *param, SQL_SELECT *select,
   byte *ref_pos,*next_pos,ref_buff[MAX_REFLENGTH];
   my_off_t record;
   TABLE *sort_form;
-  volatile my_bool *killed= &current_thd->killed;
+  volatile THD::killed_state *killed= &current_thd->killed;
   handler *file;
   DBUG_ENTER("find_all_keys");
   DBUG_PRINT("info",("using: %s",(select?select->quick?"ranges":"where":"every row")));
@@ -378,12 +389,24 @@ static ha_rows find_all_keys(SORTPARAM *param, SQL_SELECT *select,
 		    current_thd->variables.read_buff_size);
   }
 
+  READ_RECORD read_record_info;
+  if (quick_select)
+  {
+    if (select->quick->reset())
+      DBUG_RETURN(HA_POS_ERROR);
+    init_read_record(&read_record_info, current_thd, select->quick->head,
+                     select, 1, 1);
+  }
+
   for (;;)
   {
     if (quick_select)
     {
-      if ((error=select->quick->get_next()))
-	break;
+      if ((error= read_record_info.read_record(&read_record_info)))
+      {
+        error= HA_ERR_END_OF_FILE;
+        break;
+      }
       file->position(sort_form->record[0]);
     }
     else					/* Not quick-select */
@@ -411,6 +434,7 @@ static ha_rows find_all_keys(SORTPARAM *param, SQL_SELECT *select,
       if (error && error != HA_ERR_RECORD_DELETED)
 	break;
     }
+
     if (*killed)
     {
       DBUG_PRINT("info",("Sort killed by user"));
@@ -434,9 +458,21 @@ static ha_rows find_all_keys(SORTPARAM *param, SQL_SELECT *select,
     else
       file->unlock_row();
   }
-  (void) file->extra(HA_EXTRA_NO_CACHE);	/* End cacheing of records */
-  if (!next_pos)
-    file->ha_rnd_end();
+  if (quick_select)
+  {
+    /*
+      index_merge quick select uses table->sort when retrieving rows, so free
+      resoures it has allocated.
+    */
+    end_read_record(&read_record_info);
+  }
+  else
+  {
+    (void) file->extra(HA_EXTRA_NO_CACHE);	/* End cacheing of records */
+    if (!next_pos)
+      file->ha_rnd_end();
+  }
+
   DBUG_PRINT("test",("error: %d  indexpos: %d",error,indexpos));
   if (error != HA_ERR_END_OF_FILE)
   {
@@ -691,8 +727,8 @@ static void make_sortkey(register SORTPARAM *param,
   return;
 }
 
-
-static bool save_index(SORTPARAM *param, uchar **sort_keys, uint count)
+static bool save_index(SORTPARAM *param, uchar **sort_keys, uint count, 
+                       FILESORT_INFO *table_sort)
 {
   uint offset,res_length;
   byte *to;
@@ -703,7 +739,7 @@ static bool save_index(SORTPARAM *param, uchar **sort_keys, uint count)
   offset= param->rec_length-res_length;
   if ((ha_rows) count > param->max_rows)
     count=(uint) param->max_rows;
-  if (!(to= param->sort_form->sort.record_pointers=
+  if (!(to= table_sort->record_pointers= 
         (byte*) my_malloc(res_length*count, MYF(MY_WME))))
     DBUG_RETURN(1);                 /* purecov: inspected */
   for (uchar **end= sort_keys+count ; sort_keys != end ; sort_keys++)
@@ -783,6 +819,39 @@ uint read_to_buffer(IO_CACHE *fromfile, BUFFPEK *buffpek,
 } /* read_to_buffer */
 
 
+/*
+    Put all room used by freed buffer to use in adjacent buffer.  Note, that
+    we can't simply distribute memory evenly between all buffers, because
+    new areas must not overlap with old ones.
+  SYNOPSYS
+    reuse_freed_buff()
+    queue      IN  list of non-empty buffers, without freed buffer
+    reuse      IN  empty buffer
+    key_length IN  key length
+*/
+
+void reuse_freed_buff(QUEUE *queue, BUFFPEK *reuse, uint key_length)
+{
+  uchar *reuse_end= reuse->base + reuse->max_keys * key_length;
+  for (uint i= 0; i < queue->elements; ++i)
+  {
+    BUFFPEK *bp= (BUFFPEK *) queue_element(queue, i);
+    if (bp->base + bp->max_keys * key_length == reuse->base)
+    {
+      bp->max_keys+= reuse->max_keys;
+      return;
+    }
+    else if (bp->base == reuse_end)
+    {
+      bp->base= reuse->base;
+      bp->max_keys+= reuse->max_keys;
+      return;
+    }
+  }
+  DBUG_ASSERT(0);
+}
+
+
 /* 
    Merge buffers to one buffer 
 */
@@ -798,18 +867,18 @@ int merge_buffers(SORTPARAM *param, IO_CACHE *from_file,
   ha_rows max_rows,org_max_rows;
   my_off_t to_start_filepos;
   uchar *strpos;
-  BUFFPEK *buffpek,**refpek;
+  BUFFPEK *buffpek;
   QUEUE queue;
   qsort2_cmp cmp;
-  volatile my_bool *killed= &current_thd->killed;
-  my_bool not_killable;
+  volatile THD::killed_state *killed= &current_thd->killed;
+  THD::killed_state not_killable;
   DBUG_ENTER("merge_buffers");
 
   statistic_increment(filesort_merge_passes, &LOCK_status);
   if (param->not_killable)
   {
     killed= &not_killable;
-    not_killable= 0;
+    not_killable= THD::NOT_KILLED;
   }
 
   error=0;
@@ -909,29 +978,8 @@ int merge_buffers(SORTPARAM *param, IO_CACHE *from_file,
         if (!(error= (int) read_to_buffer(from_file,buffpek,
                                           rec_length)))
         {
-          uchar *base= buffpek->base;
-          ulong max_keys= buffpek->max_keys;
-
           VOID(queue_remove(&queue,0));
-
-          /* Put room used by buffer to use in other buffer */
-          for (refpek= (BUFFPEK**) &queue_top(&queue);
-               refpek <= (BUFFPEK**) &queue_end(&queue);
-               refpek++)
-          {
-            buffpek= *refpek;
-            if (buffpek->base+buffpek->max_keys*rec_length == base)
-            {
-              buffpek->max_keys+= max_keys;
-              break;
-            }
-            else if (base+max_keys*rec_length == buffpek->base)
-            {
-              buffpek->base= base;
-              buffpek->max_keys+= max_keys;
-              break;
-            }
-          }
+          reuse_freed_buff(&queue, buffpek, rec_length);
           break;                        /* One buffer have been removed */
         }
         else if (error == -1)
diff --git a/sql/ha_berkeley.cc b/sql/ha_berkeley.cc
index 4bed33af15b..e25640280a3 100644
--- a/sql/ha_berkeley.cc
+++ b/sql/ha_berkeley.cc
@@ -2148,7 +2148,7 @@ static void print_msg(THD *thd, const char *table_name, const char *op_name,
   protocol->store(msg_type);
   protocol->store(msgbuf);
   if (protocol->write())
-    thd->killed=1;
+    thd->killed=THD::KILL_CONNECTION;
 }
 #endif
 
@@ -2530,4 +2530,29 @@ ha_rows ha_berkeley::estimate_number_of_rows()
   return share->rows + HA_BERKELEY_EXTRA_ROWS;
 }
 
+int ha_berkeley::cmp_ref(const byte *ref1, const byte *ref2)
+{
+  if (hidden_primary_key)
+    return memcmp(ref1, ref2, BDB_HIDDEN_PRIMARY_KEY_LENGTH);
+
+  int result;
+  Field *field;
+  KEY *key_info=table->key_info+table->primary_key;
+  KEY_PART_INFO *key_part=key_info->key_part;
+  KEY_PART_INFO *end=key_part+key_info->key_parts;
+
+  for (; key_part != end; key_part++)
+  {
+    field=  key_part->field; 
+    result= field->pack_cmp((const char*)ref1, (const char*)ref2, 
+                            key_part->length);
+    if (result)
+      return result;
+    ref1 += field->packed_col_length((const char*)ref1, key_part->length);
+    ref2 += field->packed_col_length((const char*)ref2, key_part->length);
+  }
+
+  return 0;
+}
+
 #endif /* HAVE_BERKELEY_DB */
diff --git a/sql/ha_berkeley.h b/sql/ha_berkeley.h
index 52116710726..3213a81fadf 100644
--- a/sql/ha_berkeley.h
+++ b/sql/ha_berkeley.h
@@ -163,6 +163,8 @@ class ha_berkeley: public handler
   longlong get_auto_increment();
   void print_error(int error, myf errflag);
   uint8 table_cache_type() { return HA_CACHE_TBL_TRANSACT; }
+  bool primary_key_is_clustered() { return true; }
+  int cmp_ref(const byte *ref1, const byte *ref2);
 };
 
 extern bool berkeley_shared_data;
diff --git a/sql/ha_heap.h b/sql/ha_heap.h
index f05146acdc4..9d0b9999300 100644
--- a/sql/ha_heap.h
+++ b/sql/ha_heap.h
@@ -91,5 +91,10 @@ class ha_heap: public handler
 
   THR_LOCK_DATA **store_lock(THD *thd, THR_LOCK_DATA **to,
 			     enum thr_lock_type lock_type);
-
+  int cmp_ref(const byte *ref1, const byte *ref2)
+  {
+    HEAP_PTR ptr1=*(HEAP_PTR*)ref1;
+    HEAP_PTR ptr2=*(HEAP_PTR*)ref2;
+    return ptr1 < ptr2? -1 : (ptr1 > ptr2? 1 : 0);
+  }
 };
diff --git a/sql/ha_innodb.cc b/sql/ha_innodb.cc
index 6eae315e443..9f14e22ce38 100644
--- a/sql/ha_innodb.cc
+++ b/sql/ha_innodb.cc
@@ -750,6 +750,8 @@ ha_innobase::init_table_handle_for_HANDLER(void)
         prebuilt->read_just_key = FALSE;
 
 	prebuilt->used_in_HANDLER = TRUE;
+
+	prebuilt->keep_other_fields_on_keyread = FALSE;
 }
 
 /*************************************************************************
@@ -2052,12 +2054,15 @@ build_template(
 		update field->query_id so that the formula
 		thd->query_id == field->query_id did not work. */
 
-		if (templ_type == ROW_MYSQL_REC_FIELDS
-		    && !(fetch_all_in_key
-			 && dict_index_contains_col_or_prefix(index, i))
-		    && !(fetch_primary_key_cols
-			 && dict_table_col_in_clustered_key(index->table, i))
-		    && thd->query_id != field->query_id) {
+                ibool index_contains_field=
+                  dict_index_contains_col_or_prefix(index, i);
+
+		if (templ_type == ROW_MYSQL_REC_FIELDS && 
+                    ((prebuilt->read_just_key && !index_contains_field) ||
+		     (!(fetch_all_in_key && index_contains_field) &&
+		      !(fetch_primary_key_cols &&
+			dict_table_col_in_clustered_key(index->table, i)) &&
+		      thd->query_id != field->query_id))) {
 
 			/* This field is not needed in the query, skip it */
 
@@ -4559,9 +4564,11 @@ ha_innobase::extra(
                         if (prebuilt->blob_heap) {
                                 row_mysql_prebuilt_free_blob_heap(prebuilt);
                         }
+                        prebuilt->keep_other_fields_on_keyread = 0;
                         prebuilt->read_just_key = 0;
                         break;
   		case HA_EXTRA_RESET_STATE:
+	        	prebuilt->keep_other_fields_on_keyread = 0;
 	        	prebuilt->read_just_key = 0;
     	        	break;
 		case HA_EXTRA_NO_KEYREAD:
@@ -4580,6 +4587,9 @@ ha_innobase::extra(
 	        case HA_EXTRA_KEYREAD:
 	        	prebuilt->read_just_key = 1;
 	        	break;
+		case HA_EXTRA_KEYREAD_PRESERVE_FIELDS:
+			prebuilt->keep_other_fields_on_keyread = 1;
+			break;
 		default:/* Do nothing */
 			;
 	}
@@ -4628,6 +4638,7 @@ ha_innobase::start_stmt(
 	prebuilt->sql_stat_start = TRUE;
 	prebuilt->hint_need_to_fetch_extra_cols = 0;
 	prebuilt->read_just_key = 0;
+        prebuilt->keep_other_fields_on_keyread = FALSE;
 
 	if (!prebuilt->mysql_has_locked) {
 	        /* This handle is for a temporary table created inside
@@ -4720,6 +4731,7 @@ ha_innobase::external_lock(
 	prebuilt->hint_need_to_fetch_extra_cols = 0;
 
 	prebuilt->read_just_key = 0;
+	prebuilt->keep_other_fields_on_keyread = FALSE;
 
 	if (lock_type == F_WRLCK) {
 
@@ -5188,6 +5200,55 @@ innobase_store_binlog_offset_and_flush_log(
 	log_buffer_flush_to_disk();
 }
 
+
+int
+ha_innobase::cmp_ref(
+	const mysql_byte *ref1,
+	const mysql_byte *ref2)
+{
+	row_prebuilt_t*	 prebuilt = (row_prebuilt_t*) innobase_prebuilt;
+	enum_field_types mysql_type;
+	Field*		 field;
+	int result;
+
+	if (prebuilt->clust_index_was_generated)
+		return memcmp(ref1, ref2, DATA_ROW_ID_LEN);
+
+	/* Do type-aware comparison of Primary Key members. PK members
+	are always NOT NULL, so no checks for NULL are performed */
+	KEY_PART_INFO *key_part= table->key_info[table->primary_key].key_part;
+	KEY_PART_INFO *key_part_end= 
+	  key_part + table->key_info[table->primary_key].key_parts;
+	for (; key_part != key_part_end; ++key_part) {
+		field = key_part->field;
+		mysql_type = field->type();
+		if (mysql_type == FIELD_TYPE_TINY_BLOB
+		    || mysql_type == FIELD_TYPE_MEDIUM_BLOB
+		    || mysql_type == FIELD_TYPE_BLOB
+		    || mysql_type == FIELD_TYPE_LONG_BLOB) {
+		    
+			ut_a(!ref1[1]);
+			ut_a(!ref2[1]);
+			byte len1= *ref1;
+			byte len2= *ref2;
+			ref1 += 2;
+			ref2 += 2;
+			result =
+			  ((Field_blob*)field)->cmp((const char*)ref1, len1,
+			                            (const char*)ref2, len2);
+		} else {
+			result = 
+			  field->cmp((const char*)ref1, (const char*)ref2);
+		}
+
+		if (result)
+			return result;
+		ref1 += key_part->length;
+		ref2 += key_part->length;
+	}
+	return 0;
+}
+
 char *ha_innobase::get_mysql_bin_log_name()
 {
   return trx_sys_mysql_bin_log_name;
diff --git a/sql/ha_innodb.h b/sql/ha_innodb.h
index 7e05488289e..0a86f4adf3c 100644
--- a/sql/ha_innodb.h
+++ b/sql/ha_innodb.h
@@ -166,9 +166,10 @@ class ha_innobase: public handler
 	void init_table_handle_for_HANDLER(); 
 	longlong get_auto_increment();
         uint8 table_cache_type() { return HA_CACHE_TBL_ASKTRANSACT; }
-
-        static char      *get_mysql_bin_log_name();
+        static char *get_mysql_bin_log_name();
         static ulonglong get_mysql_bin_log_pos();
+        bool primary_key_is_clustered() { return true; }
+        int cmp_ref(const byte *ref1, const byte *ref2);
 };
 
 extern uint innobase_init_flags, innobase_lock_type;
diff --git a/sql/ha_myisam.cc b/sql/ha_myisam.cc
index 77615d68fe4..f7e966a9d16 100644
--- a/sql/ha_myisam.cc
+++ b/sql/ha_myisam.cc
@@ -87,9 +87,9 @@ static void mi_check_print_msg(MI_CHECK *param,	const char* msg_type,
 
 extern "C" {
 
-volatile my_bool *killed_ptr(MI_CHECK *param)
+int *killed_ptr(void *thd)
 {
-  return &(((THD *)(param->thd))->killed);
+  return (int*)&((THD *)thd)->killed;
 }
 
 void mi_check_print_error(MI_CHECK *param, const char *fmt,...)
diff --git a/sql/handler.h b/sql/handler.h
index 506038fccfd..13874fa6bb0 100644
--- a/sql/handler.h
+++ b/sql/handler.h
@@ -497,10 +497,18 @@ public:
 
   /* Type of table for caching query */
   virtual uint8 table_cache_type() { return HA_CACHE_TBL_NONTRANSACT; }
-  /*
-    Is query with this table cachable (have sense only for ASKTRANSACT
-    tables)
-  */
+ 
+ /*
+  RETURN
+    true  Primary key (if there is one) is clustered key covering all fields
+    false otherwise
+ */
+ virtual bool primary_key_is_clustered() { return FALSE; }
+
+ virtual int cmp_ref(const byte *ref1, const byte *ref2)
+ {
+   return memcmp(ref1, ref2, ref_length);
+ }
 };
 
 	/* Some extern variables used with handlers */
diff --git a/sql/item.cc b/sql/item.cc
index 658f5c42a43..8dfb8fb5587 100644
--- a/sql/item.cc
+++ b/sql/item.cc
@@ -22,6 +22,7 @@
 #include "mysql_priv.h"
 #include <m_ctype.h>
 #include "my_dir.h"
+#include "sp_rcontext.h"
 
 static void mark_as_dependent(THD *thd,
 			      SELECT_LEX *last, SELECT_LEX *current,
@@ -41,7 +42,7 @@ void item_init(void)
 }
 
 Item::Item():
-  fixed(0)
+  name_length(0), fixed(0)
 {
   marker= 0;
   maybe_null=null_value=with_sum_func=unsigned_flag=0;
@@ -174,6 +175,7 @@ void Item::set_name(const char *str, uint length, CHARSET_INFO *cs)
   {
     /* Empty string, used by AS or internal function like last_insert_id() */
     name= (char*) str;
+    name_length= 0;
     return;
   }
   while (length && !my_isgraph(cs,*str))
@@ -184,12 +186,12 @@ void Item::set_name(const char *str, uint length, CHARSET_INFO *cs)
   if (!my_charset_same(cs, system_charset_info))
   {
     uint32 res_length;
-    name= sql_strmake_with_convert(str, length, cs,
+    name= sql_strmake_with_convert(str, name_length= length, cs,
 				   MAX_ALIAS_NAME, system_charset_info,
 				   &res_length);
   }
   else
-    name=sql_strmake(str, min(length,MAX_ALIAS_NAME));
+    name= sql_strmake(str, (name_length= min(length,MAX_ALIAS_NAME)));
 }
 
 
@@ -260,6 +262,34 @@ CHARSET_INFO *Item::default_charset()
   return current_thd->variables.collation_connection;
 }
 
+
+Item *
+Item_splocal::this_item()
+{
+  THD *thd= current_thd;
+
+  return thd->spcont->get_item(m_offset);
+}
+
+Item *
+Item_splocal::this_const_item() const
+{
+  THD *thd= current_thd;
+
+  return thd->spcont->get_item(m_offset);
+}
+
+Item::Type
+Item_splocal::type() const
+{
+  THD *thd= current_thd;
+
+  if (thd->spcont)
+    return thd->spcont->get_item(m_offset)->type();
+  return NULL_ITEM;		// Anything but SUBSELECT_ITEM
+}
+
+
 bool DTCollation::aggregate(DTCollation &dt)
 {
   if (!my_charset_same(collation, dt.collation))
diff --git a/sql/item.h b/sql/item.h
index e1bed6bd1d8..a169fc32a0b 100644
--- a/sql/item.h
+++ b/sql/item.h
@@ -111,6 +111,7 @@ public:
   my_string name;			/* Name from select */
   Item *next;
   uint32 max_length;
+  uint name_length;                     /* Length of name */
   uint8 marker,decimals;
   my_bool maybe_null;			/* If item may be null */
   my_bool null_value;			/* if item is null */
@@ -257,6 +258,9 @@ public:
   virtual bool remove_dependence_processor(byte * arg) { return 0; }
   virtual bool remove_fixed(byte * arg) { fixed= 0; return 0; }
   
+  virtual Item *this_item() { return this; } /* For SPs mostly. */
+  virtual Item *this_const_item() const { return const_cast<Item*>(this); } /* For SPs mostly. */
+
   // Row emulation
   virtual uint cols() { return 1; }
   virtual Item* el(uint i) { return this; }
@@ -279,6 +283,104 @@ public:
 };
 
 
+// A local SP variable (incl. parameters), used in runtime
+class Item_splocal : public Item
+{
+private:
+  
+  uint m_offset;
+  LEX_STRING m_name;
+
+public:
+
+  Item_splocal(LEX_STRING name, uint offset)
+    : m_offset(offset), m_name(name)
+  {
+    Item::maybe_null= TRUE;
+  }
+
+  Item *this_item();
+  Item *this_const_item() const;
+
+  inline uint get_offset()
+  {
+    return m_offset;
+  }
+
+  // Abstract methods inherited from Item. Just defer the call to
+  // the item in the frame
+  enum Type type() const;
+
+  inline double val()
+  {
+    Item *it= this_item();
+    double ret= it->val();
+    Item::null_value= it->null_value;
+    return ret;
+  }
+
+  inline longlong val_int()
+  {
+    Item *it= this_item();
+    longlong ret= it->val_int();
+    Item::null_value= it->null_value;
+    return ret;
+  }
+
+  inline String *val_str(String *sp)
+  {
+    Item *it= this_item();
+    String *ret= it->val_str(sp);
+    Item::null_value= it->null_value;
+    return ret;
+  }
+
+  inline bool is_null()
+  {
+    Item *it= this_item();
+    bool ret= it->is_null();
+    Item::null_value= it->null_value;
+    return ret;
+  }
+
+  inline void make_field(Send_field *field)
+  {
+    Item *it= this_item();
+
+    it->set_name(m_name.str, m_name.length, system_charset_info);
+    it->make_field(field);
+  }
+
+  inline Item_result result_type() const
+  {
+    return this_const_item()->result_type();
+  }
+
+  inline bool const_item() const
+  {
+    return TRUE;
+  }
+
+  inline int save_in_field(Field *field, bool no_conversions)
+  {
+    return this_item()->save_in_field(field, no_conversions);
+  }
+
+  inline void print(String *str)
+  {
+    str->reserve(m_name.length+8);
+    str->append(m_name.str, m_name.length);
+    str->append('@');
+    str->qs_append(m_offset);
+  }
+
+  inline bool send(Protocol *protocol, String *str)
+  {
+    return this_item()->send(protocol, str);
+  }
+};
+
+
 class Item_num: public Item
 {
 public:
diff --git a/sql/item_cmpfunc.h b/sql/item_cmpfunc.h
index 7c96226b08a..23ac82e79e6 100644
--- a/sql/item_cmpfunc.h
+++ b/sql/item_cmpfunc.h
@@ -233,6 +233,40 @@ public:
   Item *neg_transformer(THD *thd);
 };
 
+
+/*
+  The class Item_func_trig_cond is used for guarded predicates 
+  which are employed only for internal purposes.
+  A guarded predicates is an object consisting of an a regular or
+  a guarded predicate P and a pointer to a boolean guard variable g. 
+  A guarded predicate P/g is evaluated to true if the value of the
+  guard g is false, otherwise it is evaluated to the same value that
+  the predicate P: val(P/g)= g ? val(P):true.
+  Guarded predicates allow us to include predicates into a conjunction
+  conditionally. Currently they are utilized for pushed down predicates
+  in queries with outer join operations.
+
+  In the future, probably, it makes sense to extend this class to
+  the objects consisting of three elements: a predicate P, a pointer
+  to a variable g and a firing value s with following evaluation
+  rule: val(P/g,s)= g==s? val(P) : true. It will allow us to build only
+  one item for the objects of the form P/g1/g2... 
+
+  Objects of this class are built only for query execution after
+  the execution plan has been already selected. That's why this
+  class needs only val_int out of generic methods. 
+*/
+
+class Item_func_trig_cond: public Item_bool_func
+{
+  bool *trig_var;
+public:
+  Item_func_trig_cond(Item *a, bool *f) : Item_bool_func(a) { trig_var= f; }
+  longlong val_int() { return *trig_var ? args[0]->val_int() : 1; }
+  enum Functype functype() const { return TRIG_COND_FUNC; };
+  const char *func_name() const { return "trigcond"; };
+};
+
 class Item_func_not_all :public Item_func_not
 {
   bool abort_on_null;
@@ -817,7 +851,7 @@ public:
   }
   const char *func_name() const { return "isnotnull"; }
   optimize_type select_optimize() const { return OPTIMIZE_NULL; }
-  table_map not_null_tables() const { return 0; }
+  table_map not_null_tables() const { return used_tables(); }
   Item *neg_transformer(THD *thd);
   void print(String *str);
   CHARSET_INFO *compare_collation() { return args[0]->collation.collation; }
diff --git a/sql/item_create.cc b/sql/item_create.cc
index 4290a25e348..e2c85af49bc 100644
--- a/sql/item_create.cc
+++ b/sql/item_create.cc
@@ -18,10 +18,6 @@
 
 #include "mysql_priv.h"
 
-#ifndef M_PI
-#define M_PI 3.14159265358979323846
-#endif
-
 Item *create_func_abs(Item* a)
 {
   return new Item_func_abs(a);
@@ -317,11 +313,6 @@ Item *create_func_current_user()
 			 system_charset_info);
 }
 
-Item *create_func_quarter(Item* a)
-{
-  return new Item_func_quarter(a);
-}
-
 Item *create_func_radians(Item *a)
 {
   return new Item_func_units((char*) "radians",a,M_PI/180,0.0);
diff --git a/sql/item_create.h b/sql/item_create.h
index 19f0c9133f2..18c5b3239f3 100644
--- a/sql/item_create.h
+++ b/sql/item_create.h
@@ -73,7 +73,6 @@ Item *create_func_period_diff(Item* a, Item *b);
 Item *create_func_pi(void);
 Item *create_func_pow(Item* a, Item *b);
 Item *create_func_current_user(void);
-Item *create_func_quarter(Item* a);
 Item *create_func_radians(Item *a);
 Item *create_func_release_lock(Item* a);
 Item *create_func_repeat(Item* a, Item *b);
diff --git a/sql/item_func.cc b/sql/item_func.cc
index e3874d8e4fa..c2c93586af8 100644
--- a/sql/item_func.cc
+++ b/sql/item_func.cc
@@ -28,6 +28,9 @@
 #include <time.h>
 #include <ft_global.h>
 
+#include "sp_head.h"
+#include "sp_rcontext.h"
+#include "sp.h"
 
 bool check_reserved_words(LEX_STRING *name)
 {
@@ -782,7 +785,7 @@ double Item_func_log2::val()
   double value=args[0]->val();
   if ((null_value=(args[0]->null_value || value <= 0.0)))
     return 0.0;
-  return log(value) / log(2.0);
+  return log(value) / M_LN2;
 }
 
 double Item_func_log10::val()
@@ -1570,11 +1573,16 @@ udf_handler::fix_fields(THD *thd, TABLE_LIST *tables, Item_result_field *func,
       const_item_cache&=item->const_item();
       f_args.arg_type[i]=item->result_type();
     }
+    //TODO: why all folowing memory is not allocated with 1 call of sql_alloc?
     if (!(buffers=new String[arg_count]) ||
 	!(f_args.args= (char**) sql_alloc(arg_count * sizeof(char *))) ||
-	!(f_args.lengths=(ulong*) sql_alloc(arg_count * sizeof(long))) ||
-	!(f_args.maybe_null=(char*) sql_alloc(arg_count * sizeof(char))) ||
-	!(num_buffer= (char*) sql_alloc(ALIGN_SIZE(sizeof(double))*arg_count)))
+	!(f_args.lengths= (ulong*) sql_alloc(arg_count * sizeof(long))) ||
+	!(f_args.maybe_null= (char*) sql_alloc(arg_count * sizeof(char))) ||
+	!(num_buffer= (char*) sql_alloc(arg_count *
+					ALIGN_SIZE(sizeof(double)))) ||
+	!(f_args.attributes= (char**) sql_alloc(arg_count * sizeof(char *))) ||
+	!(f_args.attribute_lengths= (ulong*) sql_alloc(arg_count *
+						       sizeof(long))))
     {
       free_udf(u_d);
       DBUG_RETURN(1);
@@ -1593,8 +1601,10 @@ udf_handler::fix_fields(THD *thd, TABLE_LIST *tables, Item_result_field *func,
     for (uint i=0; i < arg_count; i++)
     {
       f_args.args[i]=0;
-      f_args.lengths[i]=arguments[i]->max_length;
-      f_args.maybe_null[i]=(char) arguments[i]->maybe_null;
+      f_args.lengths[i]= arguments[i]->max_length;
+      f_args.maybe_null[i]= (char) arguments[i]->maybe_null;
+      f_args.attributes[i]= arguments[i]->name;
+      f_args.attribute_lengths[i]= arguments[i]->name_length;
 
       switch(arguments[i]->type()) {
       case Item::STRING_ITEM:			// Constant string !
@@ -3236,6 +3246,137 @@ longlong Item_func_is_used_lock::val_int()
 }
 
 
+longlong Item_func_row_count::val_int()
+{
+  DBUG_ASSERT(fixed == 1);
+  THD *thd= current_thd;
+
+  return thd->row_count_func;
+}
+
+
+Item_func_sp::Item_func_sp(sp_name *name)
+  :Item_func(), m_name(name), m_sp(NULL)
+{
+  m_name->init_qname(current_thd);
+}
+
+Item_func_sp::Item_func_sp(sp_name *name, List<Item> &list)
+  :Item_func(list), m_name(name), m_sp(NULL)
+{
+  m_name->init_qname(current_thd);
+}
+
+const char *
+Item_func_sp::func_name() const
+{
+  return m_name->m_name.str;
+}
+
+int
+Item_func_sp::execute(Item **itp)
+{
+  DBUG_ENTER("Item_func_sp::execute");
+  THD *thd= current_thd;
+  int res;
+#ifndef NO_EMBEDDED_ACCESS_CHECKS
+  st_sp_security_context save_ctx;
+#endif
+
+  if (! m_sp)
+    m_sp= sp_find_function(thd, m_name);
+  if (! m_sp)
+  {
+    my_printf_error(ER_SP_DOES_NOT_EXIST, ER(ER_SP_DOES_NOT_EXIST), MYF(0),
+		    "FUNCTION", m_name->m_qname);
+    DBUG_RETURN(-1);
+  }
+
+#ifndef NO_EMBEDDED_ACCESS_CHECKS
+  sp_change_security_context(thd, m_sp, &save_ctx);
+#endif
+
+  res= m_sp->execute_function(thd, args, arg_count, itp);
+
+#ifndef NO_EMBEDDED_ACCESS_CHECKS
+  sp_restore_security_context(thd, m_sp, &save_ctx);
+#endif
+
+  DBUG_RETURN(res);
+}
+
+enum enum_field_types
+Item_func_sp::field_type() const
+{
+  DBUG_ENTER("Item_func_sp::field_type");
+
+  if (! m_sp)
+    m_sp= sp_find_function(current_thd, m_name);
+  if (m_sp)
+  {
+    DBUG_PRINT("info", ("m_returns = %d", m_sp->m_returns));
+    DBUG_RETURN(m_sp->m_returns);
+  }
+  my_printf_error(ER_SP_DOES_NOT_EXIST, ER(ER_SP_DOES_NOT_EXIST), MYF(0),
+		  "FUNCTION", m_name->m_qname);
+  DBUG_RETURN(MYSQL_TYPE_STRING);
+}
+
+Item_result
+Item_func_sp::result_type() const
+{
+  DBUG_ENTER("Item_func_sp::result_type");
+  DBUG_PRINT("info", ("m_sp = %p", m_sp));
+
+  if (! m_sp)
+    m_sp= sp_find_function(current_thd, m_name);
+  if (m_sp)
+  {
+    DBUG_RETURN(m_sp->result());
+  }
+  my_printf_error(ER_SP_DOES_NOT_EXIST, ER(ER_SP_DOES_NOT_EXIST), MYF(0),
+		  "FUNCTION", m_name->m_qname);
+  DBUG_RETURN(STRING_RESULT);
+}
+
+void
+Item_func_sp::fix_length_and_dec()
+{
+  DBUG_ENTER("Item_func_sp::fix_length_and_dec");
+
+  if (! m_sp)
+    m_sp= sp_find_function(current_thd, m_name);
+  if (! m_sp)
+  {
+    my_printf_error(ER_SP_DOES_NOT_EXIST, ER(ER_SP_DOES_NOT_EXIST), MYF(0),
+		    "FUNCTION", m_name->m_qname);
+  }
+  else
+  {
+    switch (m_sp->result()) {
+    case STRING_RESULT:
+      maybe_null= 1;
+      max_length= MAX_BLOB_WIDTH;
+      break;
+    case REAL_RESULT:
+      decimals= NOT_FIXED_DEC;
+      max_length= float_length(decimals);
+      break;
+    case INT_RESULT:
+      decimals= 0;
+      max_length= 21;
+      break;
+    case ROW_RESULT:
+    default:
+      // This case should never be choosen
+      DBUG_ASSERT(0);
+      break;
+    }
+  }
+  DBUG_VOID_RETURN;
+}
+
+
 longlong Item_func_found_rows::val_int()
 {
   DBUG_ASSERT(fixed == 1);
diff --git a/sql/item_func.h b/sql/item_func.h
index c05c1b01259..8636bcd6507 100644
--- a/sql/item_func.h
+++ b/sql/item_func.h
@@ -47,7 +47,9 @@ public:
 		  SP_CONTAINS_FUNC,SP_OVERLAPS_FUNC,
 		  SP_STARTPOINT,SP_ENDPOINT,SP_EXTERIORRING,
 		  SP_POINTN,SP_GEOMETRYN,SP_INTERIORRINGN,
-		  NOT_FUNC, NOT_ALL_FUNC, NOW_FUNC};
+                  NOT_FUNC, NOT_ALL_FUNC,
+                  NOW_FUNC, TRIG_COND_FUNC,
+                  GUSERVAR_FUNC};
   enum optimize_type { OPTIMIZE_NONE,OPTIMIZE_KEY,OPTIMIZE_OP, OPTIMIZE_NULL };
   enum Type type() const { return FUNC_ITEM; }
   virtual enum Functype functype() const   { return UNKNOWN_FUNC; }
@@ -955,6 +957,8 @@ class Item_func_get_user_var :public Item_func
 public:
   Item_func_get_user_var(LEX_STRING a):
     Item_func(), name(a) {}
+  enum Functype functype() const { return GUSERVAR_FUNC; }
+  LEX_STRING get_name() { return name; }
   double val();
   longlong val_int();
   String *val_str(String* str);
@@ -1073,6 +1077,82 @@ enum Cast_target
 };
 
 
+class Item_func_row_count :public Item_int_func
+{
+public:
+  Item_func_row_count() :Item_int_func() {}
+  longlong val_int();
+  const char *func_name() const { return "row_count"; }
+  void fix_length_and_dec() { decimals= 0; maybe_null=0; }
+};
+
+
+/*
+ *
+ * Stored FUNCTIONs
+ *
+ */
+
+class sp_head;
+class sp_name;
+
+class Item_func_sp :public Item_func
+{
+private:
+  sp_name *m_name;
+  mutable sp_head *m_sp;
+
+  int execute(Item **itp);
+
+public:
+
+  Item_func_sp(sp_name *name);
+
+  Item_func_sp(sp_name *name, List<Item> &list);
+
+  virtual ~Item_func_sp()
+  {}
+
+  const char *func_name() const;
+
+  enum enum_field_types field_type() const;
+
+  Item_result result_type() const;
+
+  longlong val_int()
+  {
+    return (longlong)Item_func_sp::val();
+  }
+
+  double val()
+  {
+    Item *it;
+
+    if (execute(&it))
+    {
+      null_value= 1;
+      return 0.0;
+    }
+    return it->val();
+  }
+
+  String *val_str(String *str)
+  {
+    Item *it;
+
+    if (execute(&it))
+    {
+      null_value= 1;
+      return NULL;
+    }
+    return it->val_str(str);
+  }
+
+  void fix_length_and_dec();
+
+};
+
+
 class Item_func_found_rows :public Item_int_func
 {
 public:
diff --git a/sql/item_subselect.cc b/sql/item_subselect.cc
index 4c1ffeecc97..91a31502780 100644
--- a/sql/item_subselect.cc
+++ b/sql/item_subselect.cc
@@ -125,7 +125,7 @@ bool Item_subselect::fix_fields(THD *thd_param, TABLE_LIST *tables, Item **ref)
 {
   DBUG_ASSERT(fixed == 0);
   engine->set_thd((thd= thd_param));
-  stmt= thd->current_statement;
+  arena= thd->current_arena;
 
   char const *save_where= thd->where;
   int res= engine->prepare();
@@ -336,8 +336,8 @@ Item_singlerow_subselect::select_transformer(JOIN *join)
     if (join->conds || join->having)
     {
       Item *cond;
-      if (stmt)
-	thd->set_n_backup_item_arena(stmt, &backup);
+      if (arena)
+	thd->set_n_backup_item_arena(arena, &backup);
 
       if (!join->having)
 	cond= join->conds;
@@ -350,15 +350,15 @@ Item_singlerow_subselect::select_transformer(JOIN *join)
 					   new Item_null())))
 	goto err;
     }
-    if (stmt)
-      thd->restore_backup_item_arena(stmt, &backup);
+    if (arena)
+      thd->restore_backup_item_arena(arena, &backup);
     return RES_REDUCE;
   }
   return RES_OK;
   
 err:
-  if (stmt)
-    thd->restore_backup_item_arena(stmt, &backup);
+  if (arena)
+    thd->restore_backup_item_arena(arena, &backup);
   return RES_ERROR;
 }
 
@@ -638,8 +638,8 @@ Item_in_subselect::single_value_transformer(JOIN *join,
   Statement backup;
 
   thd->where= "scalar IN/ALL/ANY subquery";
-  if (stmt)
-    thd->set_n_backup_item_arena(stmt, &backup);
+  if (arena)
+    thd->set_n_backup_item_arena(arena, &backup);
 
   if (select_lex->item_list.elements > 1)
   {
@@ -852,21 +852,21 @@ Item_in_subselect::single_value_transformer(JOIN *join,
 	  push_warning(thd, MYSQL_ERROR::WARN_LEVEL_NOTE,
 		       ER_SELECT_REDUCED, warn_buff);
 	}
-	if (stmt)
-	  thd->restore_backup_item_arena(stmt, &backup);
+	if (arena)
+	  thd->restore_backup_item_arena(arena, &backup);
 	DBUG_RETURN(RES_REDUCE);
       }
     }
   }
 
 ok:
-  if (stmt)
-    thd->restore_backup_item_arena(stmt, &backup);
+  if (arena)
+    thd->restore_backup_item_arena(arena, &backup);
   DBUG_RETURN(RES_OK);
 
 err:
-  if (stmt)
-    thd->restore_backup_item_arena(stmt, &backup);
+  if (arena)
+    thd->restore_backup_item_arena(arena, &backup);
   DBUG_RETURN(RES_ERROR);
 }
 
@@ -884,8 +884,8 @@ Item_in_subselect::row_value_transformer(JOIN *join)
   Item *item= 0;
 
   thd->where= "row IN/ALL/ANY subquery";
-  if (stmt)
-    thd->set_n_backup_item_arena(stmt, &backup);  
+  if (arena)
+    thd->set_n_backup_item_arena(arena, &backup);  
 
   SELECT_LEX *select_lex= join->select_lex;
 
@@ -969,13 +969,13 @@ Item_in_subselect::row_value_transformer(JOIN *join)
     if (join->conds->fix_fields(thd, join->tables_list, 0))
       goto err;
   }
-  if (stmt)
-    thd->restore_backup_item_arena(stmt, &backup);
+  if (arena)
+    thd->restore_backup_item_arena(arena, &backup);
   DBUG_RETURN(RES_OK);
 
 err:
-  if (stmt)
-    thd->restore_backup_item_arena(stmt, &backup);
+  if (arena)
+    thd->restore_backup_item_arena(arena, &backup);
   DBUG_RETURN(RES_ERROR);
 }
 
@@ -1037,13 +1037,7 @@ subselect_single_select_engine(st_select_lex *select,
 {
   select_lex= select;
   SELECT_LEX_UNIT *unit= select_lex->master_unit();
-  unit->offset_limit_cnt= unit->global_parameters->offset_limit;
-  unit->select_limit_cnt= unit->global_parameters->select_limit+
-    unit->global_parameters ->offset_limit;
-  if (unit->select_limit_cnt < unit->global_parameters->select_limit)
-    unit->select_limit_cnt= HA_POS_ERROR;	// no limit
-  if (unit->select_limit_cnt == HA_POS_ERROR)
-    select_lex->options&= ~OPTION_FOUND_ROWS;
+  unit->set_limit(unit->global_parameters, select_lex);
   unit->item= item;
   this->select_lex= select_lex;
 }
diff --git a/sql/item_subselect.h b/sql/item_subselect.h
index 364781de362..019d43e4819 100644
--- a/sql/item_subselect.h
+++ b/sql/item_subselect.h
@@ -26,7 +26,7 @@ class JOIN;
 class select_subselect;
 class subselect_engine;
 class Item_bool_func2;
-class Statement;
+class Item_arena;
 
 /* base class for subselects */
 
@@ -36,8 +36,8 @@ class Item_subselect :public Item_result_field
 protected:
   /* thread handler, will be assigned in fix_fields only */
   THD *thd;
-  /* prepared statement, or 0 */
-  Statement *stmt;
+  /* Item_arena used or 0 */
+  Item_arena *arena;
   /* substitution instead of subselect in case of optimization */
   Item *substitution;
   /* unit of subquery */
diff --git a/sql/item_sum.cc b/sql/item_sum.cc
index 7a8e15e0a9d..50864132932 100644
--- a/sql/item_sum.cc
+++ b/sql/item_sum.cc
@@ -77,15 +77,15 @@ Item_sum::Item_sum(THD *thd, Item_sum *item):
 */
 bool Item_sum::save_args_for_prepared_statements(THD *thd)
 {
-  if (thd->current_statement)
-    return save_args(thd->current_statement);
+  if (thd->current_arena && args_copy == 0)
+    return save_args(thd->current_arena);
   return 0;
 }
 
 
-bool Item_sum::save_args(Statement* stmt)
+bool Item_sum::save_args(Item_arena* arena)
 {
-  if (!(args_copy= (Item**) stmt->alloc(sizeof(Item*)*arg_count)))
+  if (!(args_copy= (Item**) arena->alloc(sizeof(Item*)*arg_count)))
     return 1;
   memcpy(args_copy, args, sizeof(Item*)*arg_count);
   return 0;
@@ -330,6 +330,122 @@ double Item_sum_sum::val()
 }
 
 
+/* Item_sum_sum_distinct */
+
+Item_sum_sum_distinct::Item_sum_sum_distinct(Item *item)
+  :Item_sum_num(item), sum(0.0), tree(0)
+{
+  /*
+    quick_group is an optimizer hint, which means that GROUP BY can be
+    handled with help of index on grouped columns.
+    By setting quick_group to zero we force creation of temporary table
+    to perform GROUP BY.
+  */
+  quick_group= 0;
+}
+
+
+Item_sum_sum_distinct::Item_sum_sum_distinct(THD *thd,
+                                             Item_sum_sum_distinct *original)
+  :Item_sum_num(thd, original), sum(0.0), tree(0)
+{
+  quick_group= 0;
+}
+
+
+Item_sum_sum_distinct::~Item_sum_sum_distinct()
+{
+  delete tree;
+}
+  
+
+Item *
+Item_sum_sum_distinct::copy_or_same(THD *thd)
+{
+  return new (&thd->mem_root) Item_sum_sum_distinct(thd, this);
+}
+
+C_MODE_START
+
+static int simple_raw_key_cmp(void* arg, const void* key1, const void* key2)
+{
+  return memcmp(key1, key2, *(uint *) arg);
+}
+
+C_MODE_END
+
+bool Item_sum_sum_distinct::setup(THD *thd)
+{
+  SELECT_LEX *select_lex= thd->lex->current_select;
+  /* what does it mean??? */
+  if (select_lex->linkage == GLOBAL_OPTIONS_TYPE)
+    return 1;
+
+  DBUG_ASSERT(tree == 0);                 /* setup can not be called twice */
+
+  /*
+    Uniques handles all unique elements in a tree until they can't fit in.
+    Then thee tree is dumped to the temporary file.
+    See class Unique for details.
+  */
+  null_value= maybe_null= 1;
+  /*
+    TODO: if underlying item result fits in 4 bytes we can take advantage
+    of it and have tree of long/ulong. It gives 10% performance boost
+  */
+  static uint key_length= sizeof(double);
+  tree= new Unique(simple_raw_key_cmp, &key_length, key_length,
+                   thd->variables.max_heap_table_size);
+  return tree == 0;
+}
+
+void Item_sum_sum_distinct::clear()
+{
+  DBUG_ASSERT(tree);                            /* we always have a tree */
+  null_value= 1; 
+  tree->reset();
+}
+
+bool Item_sum_sum_distinct::add()
+{
+  /* args[0]->val() may reset args[0]->null_value */
+  double val= args[0]->val();
+  if (!args[0]->null_value)
+  {
+    DBUG_ASSERT(tree);
+    null_value= 0;
+    if (val)
+      return tree->unique_add(&val);
+  }
+  return 0;
+}
+
+C_MODE_START
+
+static int sum_sum_distinct(void *element, element_count num_of_dups,
+                     void *item_sum_sum_distinct)
+{ 
+  ((Item_sum_sum_distinct *)
+   (item_sum_sum_distinct))->add(* (double *) element);
+  return 0;
+}
+
+C_MODE_END
+
+double Item_sum_sum_distinct::val()
+{
+  /*
+    We don't have a tree only if 'setup()' hasn't been called;
+    this is the case of sql_select.cc:return_zero_rows.
+  */
+  sum= 0.0;
+  if (tree) 
+    tree->walk(sum_sum_distinct, (void *) this);
+  return sum;
+}
+
+/* end of Item_sum_sum_distinct */
+
 Item *Item_sum_count::copy_or_same(THD* thd)
 {
   return new (&thd->mem_root) Item_sum_count(thd, this);
@@ -1121,11 +1237,6 @@ String *Item_variance_field::val_str(String *str)
 
 #include "sql_select.h"
 
-int simple_raw_key_cmp(void* arg, byte* key1, byte* key2)
-{
-  return memcmp(key1, key2, *(uint*) arg);
-}
-
 int simple_str_key_cmp(void* arg, byte* key1, byte* key2)
 {
   Item_sum_count_distinct* item = (Item_sum_count_distinct*)arg;
diff --git a/sql/item_sum.h b/sql/item_sum.h
index 507a36194de..2dde6f73425 100644
--- a/sql/item_sum.h
+++ b/sql/item_sum.h
@@ -23,13 +23,15 @@
 
 #include <my_tree.h>
 
+class Item_arena;
+
 class Item_sum :public Item_result_field
 {
 public:
   enum Sumfunctype
-  { COUNT_FUNC,COUNT_DISTINCT_FUNC,SUM_FUNC,AVG_FUNC,MIN_FUNC,
-    MAX_FUNC, UNIQUE_USERS_FUNC,STD_FUNC,VARIANCE_FUNC,SUM_BIT_FUNC,
-    UDF_SUM_FUNC, GROUP_CONCAT_FUNC
+  { COUNT_FUNC, COUNT_DISTINCT_FUNC, SUM_FUNC, SUM_DISTINCT_FUNC, AVG_FUNC,
+    MIN_FUNC, MAX_FUNC, UNIQUE_USERS_FUNC, STD_FUNC, VARIANCE_FUNC,
+    SUM_BIT_FUNC, UDF_SUM_FUNC, GROUP_CONCAT_FUNC
   };
 
   Item **args, *tmp_args[2];
@@ -93,7 +95,7 @@ public:
   virtual void make_unique() {}
   Item *get_tmp_table_item(THD *thd);
   bool save_args_for_prepared_statements(THD *);
-  bool save_args(Statement* stmt);
+  bool save_args(Item_arena* stmt);
 
   bool walk (Item_processor processor, byte *argument);
 };
@@ -150,6 +152,39 @@ class Item_sum_sum :public Item_sum_num
 };
 
 
+/*
+  Item_sum_sum_distinct - SELECT SUM(DISTINCT expr) FROM ... 
+  support. See also: MySQL manual, chapter 'Adding New Functions To MySQL'
+  and comments in item_sum.cc.
+*/
+
+class Unique;
+
+class Item_sum_sum_distinct :public Item_sum_num
+{
+  double sum;
+  Unique *tree;
+private:
+  Item_sum_sum_distinct(THD *thd, Item_sum_sum_distinct *item);
+public:
+  Item_sum_sum_distinct(Item *item_par);
+  ~Item_sum_sum_distinct();
+
+  bool setup(THD *thd);
+  void clear();
+  bool add();
+  double val();
+
+  inline void add(double val) { sum+= val; }
+  enum Sumfunctype sum_func () const { return SUM_DISTINCT_FUNC; }
+  void reset_field() {} // not used
+  void update_field() {} // not used
+  const char *func_name() const { return "sum_distinct"; }
+  Item *copy_or_same(THD* thd);
+  virtual void no_rows_in_result() {}
+};
+
+
 class Item_sum_count :public Item_sum_int
 {
   longlong count;
diff --git a/sql/item_timefunc.cc b/sql/item_timefunc.cc
index 786bcf434ed..3baf55abc0c 100644
--- a/sql/item_timefunc.cc
+++ b/sql/item_timefunc.cc
@@ -986,9 +986,15 @@ static bool get_interval_value(Item *args,interval_type int_type,
   case INTERVAL_YEAR:
     interval->year= (ulong) value;
     break;
+  case INTERVAL_QUARTER:
+    interval->month=value*3;
+    break;
   case INTERVAL_MONTH:
     interval->month= (ulong) value;
     break;
+  case INTERVAL_WEEK:
+    interval->day=value*7;
+    break;
   case INTERVAL_DAY:
     interval->day= (ulong) value;
     break;
@@ -1750,6 +1756,7 @@ bool Item_date_add_interval::get_date(TIME *ltime, uint fuzzy_date)
     break;
   }
   case INTERVAL_DAY:
+  case INTERVAL_WEEK:
     period= (calc_daynr(ltime->year,ltime->month,ltime->day) +
              sign * (long) interval.day);
     /* Daynumber from year 0 to 9999-12-31 */
@@ -1766,6 +1773,7 @@ bool Item_date_add_interval::get_date(TIME *ltime, uint fuzzy_date)
       ltime->day=28;				// Was leap-year
     break;
   case INTERVAL_YEAR_MONTH:
+  case INTERVAL_QUARTER:
   case INTERVAL_MONTH:
     period= (ltime->year*12 + sign * (long) interval.year*12 +
 	     ltime->month-1 + sign * (long) interval.month);
@@ -1829,12 +1837,13 @@ longlong Item_date_add_interval::val_int()
 
 static const char *interval_names[]=
 {
-  "year", "month", "day", "hour", "minute",
-  "second", "microsecond", "year_month",
-  "day_hour", "day_minute", "day_second",
-  "hour_minute", "hour_second", "minute_second",
-  "day_microsecond", "hour_microsecond",
-  "minute_microsecond", "second_microsecond"
+  "year", "quarter", "month", "day", "hour",
+  "minute", "week", "second", "microsecond",
+  "year_month", "day_hour", "day_minute", 
+  "day_second", "hour_minute", "hour_second",
+  "minute_second", "day_microsecond",
+  "hour_microsecond", "minute_microsecond",
+  "second_microsecond"
 };
 
 void Item_date_add_interval::print(String *str)
@@ -1865,7 +1874,9 @@ void Item_extract::fix_length_and_dec()
   switch (int_type) {
   case INTERVAL_YEAR:		max_length=4; date_value=1; break;
   case INTERVAL_YEAR_MONTH:	max_length=6; date_value=1; break;
+  case INTERVAL_QUARTER:        max_length=2; date_value=1; break;
   case INTERVAL_MONTH:		max_length=2; date_value=1; break;
+  case INTERVAL_WEEK:		max_length=2; date_value=1; break;
   case INTERVAL_DAY:		max_length=2; date_value=1; break;
   case INTERVAL_DAY_HOUR:	max_length=9; date_value=0; break;
   case INTERVAL_DAY_MINUTE:	max_length=11; date_value=0; break;
@@ -1889,6 +1900,8 @@ longlong Item_extract::val_int()
 {
   DBUG_ASSERT(fixed == 1);
   TIME ltime;
+  uint year;
+  ulong week_format;
   long neg;
   if (date_value)
   {
@@ -1910,7 +1923,13 @@ longlong Item_extract::val_int()
   switch (int_type) {
   case INTERVAL_YEAR:		return ltime.year;
   case INTERVAL_YEAR_MONTH:	return ltime.year*100L+ltime.month;
+  case INTERVAL_QUARTER:	return ltime.month/3 + 1;
   case INTERVAL_MONTH:		return ltime.month;
+  case INTERVAL_WEEK:
+  {
+    week_format= current_thd->variables.default_week_format;
+    return calc_week(&ltime, week_mode(week_format), &year);
+  }
   case INTERVAL_DAY:		return ltime.day;
   case INTERVAL_DAY_HOUR:	return (long) (ltime.day*100L+ltime.hour)*neg;
   case INTERVAL_DAY_MINUTE:	return (long) (ltime.day*10000L+
@@ -2301,6 +2320,79 @@ void Item_func_add_time::print(String *str)
 
 
 /*
+  SYNOPSIS
+    calc_time_diff()
+    l_time1		TIME/DATE/DATETIME value
+    l_time2		TIME/DATE/DATETIME value
+    l_sign		Can be 1 (operation of addition)
+                        or -1 (substraction)
+    seconds_out         Returns count of seconds bitween
+                        l_time1 and l_time2
+    microseconds_out    Returns count of microseconds bitween
+                        l_time1 and l_time2.
+
+  DESCRIPTION
+    Calculates difference in seconds(seconds_out)
+    and microseconds(microseconds_out)
+    bitween two TIME/DATE/DATETIME values.
+
+  RETURN VALUES
+    Rertuns sign of difference.
+    1 means negative result
+    0 means positive result
+
+*/
+
+bool calc_time_diff(TIME *l_time1,TIME *l_time2, int l_sign,
+		    longlong *seconds_out, long *microseconds_out)
+{
+  long days;
+  bool neg;
+  longlong seconds= *seconds_out;
+  long microseconds= *microseconds_out;
+
+  /*
+    We suppose that if first argument is MYSQL_TIMESTAMP_TIME
+    the second argument should be TIMESTAMP_TIME also.
+    We should check it before calc_time_diff call.
+  */
+  if (l_time1->time_type == MYSQL_TIMESTAMP_TIME)  // Time value
+    days= l_time1->day - l_sign*l_time2->day;
+  else                                      // DateTime value
+    days= (calc_daynr((uint) l_time1->year,
+		      (uint) l_time1->month,
+		      (uint) l_time1->day) - 
+	   l_sign*calc_daynr((uint) l_time2->year,
+			     (uint) l_time2->month,
+			     (uint) l_time2->day));
+
+  microseconds= l_time1->second_part - l_sign*l_time2->second_part;
+  seconds= ((longlong) days*86400L + l_time1->hour*3600L + 
+	    l_time1->minute*60L + l_time1->second + microseconds/1000000L -
+	    (longlong)l_sign*(l_time2->hour*3600L+l_time2->minute*60L+l_time2->second));
+
+  neg= 0;
+  if (seconds < 0)
+  {
+    seconds= -seconds;
+    neg= 1;
+  }
+  else if (seconds == 0 && microseconds < 0)
+  {
+    microseconds= -microseconds;
+    neg= 1;
+  }
+  if (microseconds < 0)
+  {
+    microseconds+= 1000000L;
+    seconds--;
+  }
+  *seconds_out= seconds;
+  *microseconds_out= microseconds;
+  return neg;
+}
+
+/*
   TIMEDIFF(t,s) is a time function that calculates the 
   time value between a start and end time.
 
@@ -2313,7 +2405,6 @@ String *Item_func_timediff::val_str(String *str)
   DBUG_ASSERT(fixed == 1);
   longlong seconds;
   long microseconds;
-  long days;
   int l_sign= 1;
   TIME l_time1 ,l_time2, l_time3;
 
@@ -2326,40 +2417,16 @@ String *Item_func_timediff::val_str(String *str)
   if (l_time1.neg != l_time2.neg)
     l_sign= -l_sign;
 
-  if (l_time1.time_type == MYSQL_TIMESTAMP_TIME) // Time value
-    days= l_time1.day - l_sign*l_time2.day;
-  else                                      // DateTime value
-    days= (calc_daynr((uint) l_time1.year,
-		      (uint) l_time1.month,
-		      (uint) l_time1.day) - 
-	   l_sign*calc_daynr((uint) l_time2.year,
-			     (uint) l_time2.month,
-			     (uint) l_time2.day));
-
-  microseconds= l_time1.second_part - l_sign*l_time2.second_part;
-  seconds= ((longlong) days*86400L + l_time1.hour*3600L + 
-	    l_time1.minute*60L + l_time1.second + microseconds/1000000L -
-	    (longlong)l_sign*(l_time2.hour*3600L+l_time2.minute*60L+
-			      l_time2.second));
+  l_time3.neg= calc_time_diff(&l_time1,&l_time2, l_sign,
+			      &seconds, &microseconds);
 
-  l_time3.neg= 0;
-  if (seconds < 0)
-  {
-    seconds= -seconds;
-    l_time3.neg= 1;
-  }
-  else if (seconds == 0 && microseconds < 0)
-  {
-    microseconds= -microseconds;
-    l_time3.neg= 1;
-  }
-  if (microseconds < 0)
-  {
-    microseconds+= 1000000L;
-    seconds--;
-  }
+  /*
+    For MYSQL_TIMESTAMP_TIME only:
+      If both argumets are negative values and diff between them
+      is negative we need to swap sign as result should be positive.
+  */
   if ((l_time2.neg == l_time1.neg) && l_time1.neg)
-    l_time3.neg= l_time3.neg ? 0 : 1;
+    l_time3.neg= 1-l_time3.neg;         // Swap sign of result
 
   calc_time_from_sec(&l_time3, (long) seconds, microseconds);
 
@@ -2428,6 +2495,166 @@ longlong Item_func_microsecond::val_int()
 }
 
 
+longlong Item_func_timestamp_diff::val_int()
+{
+  TIME ltime1, ltime2;
+  longlong seconds;
+  long microseconds;
+  long months= 0;
+  int neg= 1;
+
+  null_value= 0;  
+  if (args[0]->get_date(&ltime1, 0) ||
+      args[1]->get_date(&ltime2, 0))
+    goto null_date;
+
+  if (calc_time_diff(&ltime2,&ltime1, 1,
+		     &seconds, &microseconds))
+    neg= -1;
+
+  if (int_type == INTERVAL_YEAR ||
+      int_type == INTERVAL_QUARTER ||
+      int_type == INTERVAL_MONTH)
+  {
+    uint year;
+    uint year_beg, year_end, month_beg, month_end;
+    uint diff_days= (uint) (seconds/86400L);
+    uint diff_months= 0;
+    uint diff_years= 0;
+    if (neg == -1)
+    {
+      year_beg= ltime2.year;
+      year_end= ltime1.year;
+      month_beg= ltime2.month;
+      month_end= ltime1.month;
+    }
+    else
+    {
+      year_beg= ltime1.year;
+      year_end= ltime2.year;
+      month_beg= ltime1.month;
+      month_end= ltime2.month;
+    }
+    /* calc years*/
+    for (year= year_beg;year < year_end; year++)
+    {
+      uint days=calc_days_in_year(year);
+      if (days > diff_days)
+	break;
+      diff_days-= days;
+      diff_years++;
+    }
+
+    /* calc months;  Current year is in the 'year' variable */
+    month_beg--;	/* Change months to be 0-11 for easier calculation */
+    month_end--;
+
+    months= 12*diff_years;
+    while (month_beg != month_end)
+    {
+      uint m_days= (uint) days_in_month[month_beg];
+      if (month_beg == 1)
+      {
+	/* This is only calculated once so there is no reason to cache it*/
+	uint leap= (uint) ((year & 3) == 0 && (year%100 ||
+					       (year%400 == 0 && year)));
+	m_days+= leap;
+      }
+      if (m_days > diff_days)
+	break;
+      diff_days-= m_days;
+      months++;
+      if (month_beg++ == 11)		/* if we wrap to next year */
+      {
+	month_beg= 0;
+	year++;
+      }
+    }
+    if (neg == -1)
+      months= -months;
+  }
+
+  switch (int_type) {
+  case INTERVAL_YEAR:
+    return months/12;
+  case INTERVAL_QUARTER:
+    return months/3;
+  case INTERVAL_MONTH:
+    return months;
+  case INTERVAL_WEEK:          
+    return seconds/86400L/7L*neg;
+  case INTERVAL_DAY:		
+    return seconds/86400L*neg;
+  case INTERVAL_HOUR:		
+    return seconds/3600L*neg;
+  case INTERVAL_MINUTE:		
+    return seconds/60L*neg;
+  case INTERVAL_SECOND:		
+    return seconds*neg;
+  case INTERVAL_MICROSECOND:
+  {
+    longlong max_sec= LONGLONG_MAX/1000000;
+    if (max_sec > seconds ||
+	max_sec == seconds && LONGLONG_MAX%1000000 >= microseconds)
+      return (longlong) (seconds*1000000L+microseconds)*neg;
+    goto null_date;
+  }
+  default:
+    break;
+  }
+
+null_date:
+  null_value=1;
+  return 0;
+}
+
+
+void Item_func_timestamp_diff::print(String *str)
+{
+  str->append(func_name());
+  str->append('(');
+
+  switch (int_type) {
+  case INTERVAL_YEAR:
+    str->append("YEAR");
+    break;
+  case INTERVAL_QUARTER:
+    str->append("QUARTER");
+    break;
+  case INTERVAL_MONTH:
+    str->append("MONTH");
+    break;
+  case INTERVAL_WEEK:          
+    str->append("WEEK");
+    break;
+  case INTERVAL_DAY:		
+    str->append("DAY");
+    break;
+  case INTERVAL_HOUR:
+    str->append("HOUR");
+    break;
+  case INTERVAL_MINUTE:		
+    str->append("MINUTE");
+    break;
+  case INTERVAL_SECOND:
+    str->append("SECOND");
+    break;		
+  case INTERVAL_MICROSECOND:
+    str->append("SECOND_FRAC");
+    break;
+  default:
+    break;
+  }
+
+  for (uint i=0 ; i < 2 ; i++)
+  {
+    str->append(',');
+    args[i]->print(str);
+  }
+  str->append(')');
+}
+
+
 String *Item_func_get_format::val_str(String *str)
 {
   DBUG_ASSERT(fixed == 1);
diff --git a/sql/item_timefunc.h b/sql/item_timefunc.h
index a7ff2924786..fe72ac6193b 100644
--- a/sql/item_timefunc.h
+++ b/sql/item_timefunc.h
@@ -579,11 +579,11 @@ public:
 
 enum interval_type
 {
-  INTERVAL_YEAR, INTERVAL_MONTH, INTERVAL_DAY, INTERVAL_HOUR, INTERVAL_MINUTE,
-  INTERVAL_SECOND, INTERVAL_MICROSECOND ,INTERVAL_YEAR_MONTH,
-  INTERVAL_DAY_HOUR, INTERVAL_DAY_MINUTE, INTERVAL_DAY_SECOND,
-  INTERVAL_HOUR_MINUTE, INTERVAL_HOUR_SECOND, INTERVAL_MINUTE_SECOND,
-  INTERVAL_DAY_MICROSECOND, INTERVAL_HOUR_MICROSECOND,
+  INTERVAL_YEAR, INTERVAL_QUARTER, INTERVAL_MONTH, INTERVAL_DAY, INTERVAL_HOUR,
+  INTERVAL_MINUTE, INTERVAL_WEEK, INTERVAL_SECOND, INTERVAL_MICROSECOND ,
+  INTERVAL_YEAR_MONTH, INTERVAL_DAY_HOUR, INTERVAL_DAY_MINUTE,
+  INTERVAL_DAY_SECOND, INTERVAL_HOUR_MINUTE, INTERVAL_HOUR_SECOND,
+  INTERVAL_MINUTE_SECOND, INTERVAL_DAY_MICROSECOND, INTERVAL_HOUR_MICROSECOND,
   INTERVAL_MINUTE_MICROSECOND, INTERVAL_SECOND_MICROSECOND
 };
 
@@ -819,6 +819,23 @@ public:
 };
 
 
+class Item_func_timestamp_diff :public Item_int_func
+{
+  const interval_type int_type;
+public:
+  Item_func_timestamp_diff(Item *a,Item *b,interval_type type_arg)
+    :Item_int_func(a,b), int_type(type_arg) {}
+  const char *func_name() const { return "timestamp_diff"; }
+  longlong val_int();
+  void fix_length_and_dec()
+  {
+    decimals=0;
+    maybe_null=1;
+  }
+  void print(String *str);
+};
+
+
 enum date_time_format
 {
   USA_FORMAT, JIS_FORMAT, ISO_FORMAT, EUR_FORMAT, INTERNAL_FORMAT
diff --git a/sql/lex.h b/sql/lex.h
index 218a1762a5c..24a6431adf5 100644
--- a/sql/lex.h
+++ b/sql/lex.h
@@ -72,6 +72,7 @@ static SYMBOL symbols[] = {
   { "AS",		SYM(AS)},
   { "ASC",		SYM(ASC)},
   { "ASCII",		SYM(ASCII_SYM)},
+  { "ASENSITIVE",       SYM(ASENSITIVE_SYM)},
   { "AUTO_INCREMENT",	SYM(AUTO_INC)},
   { "AVG",		SYM(AVG_SYM)},
   { "AVG_ROW_LENGTH",	SYM(AVG_ROW_LENGTH)},
@@ -93,6 +94,7 @@ static SYMBOL symbols[] = {
   { "BY",		SYM(BY)},
   { "BYTE",		SYM(BYTE_SYM)},
   { "CACHE",		SYM(CACHE_SYM)},
+  { "CALL",             SYM(CALL_SYM)},
   { "CASCADE",		SYM(CASCADE)},
   { "CASE",		SYM(CASE_SYM)},
   { "CHANGE",		SYM(CHANGE)},
@@ -114,7 +116,10 @@ static SYMBOL symbols[] = {
   { "COMMITTED",	SYM(COMMITTED_SYM)},
   { "COMPRESSED",	SYM(COMPRESSED_SYM)},
   { "CONCURRENT",	SYM(CONCURRENT)},
+  { "CONDITION",        SYM(CONDITION_SYM)},
+  { "CONNECTION",       SYM(CONNECTION_SYM)},
   { "CONSTRAINT",	SYM(CONSTRAINT)},
+  { "CONTINUE",         SYM(CONTINUE_SYM)},
   { "CONVERT",		SYM(CONVERT_SYM)},
   { "CREATE",		SYM(CREATE)},
   { "CROSS",		SYM(CROSS)},
@@ -123,6 +128,7 @@ static SYMBOL symbols[] = {
   { "CURRENT_TIME",	SYM(CURTIME)},
   { "CURRENT_TIMESTAMP", SYM(NOW_SYM)},
   { "CURRENT_USER",	SYM(CURRENT_USER)},
+  { "CURSOR",           SYM(CURSOR_SYM)},
   { "DATA",		SYM(DATA_SYM)},
   { "DATABASE",		SYM(DATABASE)},
   { "DATABASES",	SYM(DATABASES)},
@@ -136,13 +142,16 @@ static SYMBOL symbols[] = {
   { "DEALLOCATE",       SYM(DEALLOCATE_SYM)},     
   { "DEC",		SYM(DECIMAL_SYM)},
   { "DECIMAL",		SYM(DECIMAL_SYM)},
+  { "DECLARE",          SYM(DECLARE_SYM)},
   { "DEFAULT",		SYM(DEFAULT)},
+  { "DEFINER",          SYM(DEFINER_SYM)},
   { "DELAYED",		SYM(DELAYED_SYM)},
   { "DELAY_KEY_WRITE",	SYM(DELAY_KEY_WRITE_SYM)},
   { "DELETE",		SYM(DELETE_SYM)},
   { "DESC",		SYM(DESC)},
   { "DESCRIBE",		SYM(DESCRIBE)},
   { "DES_KEY_FILE",	SYM(DES_KEY_FILE)},
+  { "DETERMINISTIC",    SYM(DETERMINISTIC_SYM)},
   { "DIRECTORY",	SYM(DIRECTORY_SYM)},
   { "DISABLE",		SYM(DISABLE_SYM)},
   { "DISCARD",		SYM(DISCARD)},
@@ -157,6 +166,7 @@ static SYMBOL symbols[] = {
   { "DUPLICATE",	SYM(DUPLICATE_SYM)},
   { "DYNAMIC",		SYM(DYNAMIC_SYM)},
   { "ELSE",             SYM(ELSE)},
+  { "ELSEIF",           SYM(ELSEIF_SYM)},
   { "ENABLE",		SYM(ENABLE_SYM)},
   { "ENCLOSED",		SYM(ENCLOSED)},
   { "END",		SYM(END)},
@@ -169,11 +179,13 @@ static SYMBOL symbols[] = {
   { "EVENTS",		SYM(EVENTS_SYM)},
   { "EXECUTE",		SYM(EXECUTE_SYM)},
   { "EXISTS",		SYM(EXISTS)},
+  { "EXIT",             SYM(EXIT_SYM)},
   { "EXPANSION",	SYM(EXPANSION_SYM)},
   { "EXPLAIN",		SYM(DESCRIBE)},
   { "EXTENDED",		SYM(EXTENDED_SYM)},
   { "FALSE",		SYM(FALSE_SYM)},
   { "FAST",		SYM(FAST_SYM)},
+  { "FETCH",            SYM(FETCH_SYM)},
   { "FIELDS",		SYM(COLUMNS)},
   { "FILE",		SYM(FILE_SYM)},
   { "FIRST",		SYM(FIRST_SYM)},
@@ -185,10 +197,12 @@ static SYMBOL symbols[] = {
   { "FOR",		SYM(FOR_SYM)},
   { "FORCE",		SYM(FORCE_SYM)},
   { "FOREIGN",		SYM(FOREIGN)},
+  { "FOUND",            SYM(FOUND_SYM)},
+  { "FRAC_SECOND",      SYM(FRAC_SECOND_SYM)},
   { "FROM",		SYM(FROM)},
   { "FULL",		SYM(FULL)},
   { "FULLTEXT",		SYM(FULLTEXT_SYM)},
-  { "FUNCTION",		SYM(UDF_SYM)},
+  { "FUNCTION",		SYM(FUNCTION_SYM)},
   { "GEOMETRY",		SYM(GEOMETRY_SYM)},
   { "GEOMETRYCOLLECTION",SYM(GEOMETRYCOLLECTION)},
   { "GET_FORMAT",       SYM(GET_FORMAT)},
@@ -217,6 +231,8 @@ static SYMBOL symbols[] = {
   { "INNER",		SYM(INNER_SYM)},
   { "INNOBASE",		SYM(INNOBASE_SYM)},
   { "INNODB",		SYM(INNOBASE_SYM)},
+  { "INOUT",            SYM(INOUT_SYM)},
+  { "INSENSITIVE",      SYM(INSENSITIVE_SYM)},
   { "INSERT",		SYM(INSERT)},
   { "INSERT_METHOD",    SYM(INSERT_METHOD)},
   { "INT",		SYM(INT_SYM)},
@@ -232,12 +248,16 @@ static SYMBOL symbols[] = {
   { "IS",		SYM(IS)},
   { "ISOLATION",	SYM(ISOLATION)},
   { "ISSUER",		SYM(ISSUER_SYM)},
+  { "ITERATE",          SYM(ITERATE_SYM)},
+  { "INVOKER",          SYM(INVOKER_SYM)},
   { "JOIN",		SYM(JOIN_SYM)},
   { "KEY",		SYM(KEY_SYM)},
   { "KEYS",		SYM(KEYS)},
   { "KILL",		SYM(KILL_SYM)},
+  { "LANGUAGE",         SYM(LANGUAGE_SYM)},
   { "LAST",		SYM(LAST_SYM)},
   { "LEADING",		SYM(LEADING)},
+  { "LEAVE",            SYM(LEAVE_SYM)},
   { "LEAVES",		SYM(LEAVES)},
   { "LEFT",		SYM(LEFT)},
   { "LEVEL",		SYM(LEVEL_SYM)},
@@ -255,6 +275,7 @@ static SYMBOL symbols[] = {
   { "LONG",		SYM(LONG_SYM)},
   { "LONGBLOB",		SYM(LONGBLOB)},
   { "LONGTEXT",		SYM(LONGTEXT)},
+  { "LOOP",             SYM(LOOP_SYM)},
   { "LOW_PRIORITY",	SYM(LOW_PRIORITY)},
   { "MASTER",           SYM(MASTER_SYM)},
   { "MASTER_CONNECT_RETRY",           SYM(MASTER_CONNECT_RETRY_SYM)},
@@ -293,6 +314,7 @@ static SYMBOL symbols[] = {
   { "MULTILINESTRING",	SYM(MULTILINESTRING)},
   { "MULTIPOINT",	SYM(MULTIPOINT)},
   { "MULTIPOLYGON",	SYM(MULTIPOLYGON)},
+  { "NAME",             SYM(NAME_SYM)},
   { "NAMES",		SYM(NAMES_SYM)},
   { "NATIONAL",		SYM(NATIONAL_SYM)},
   { "NATURAL",		SYM(NATURAL)},
@@ -318,6 +340,7 @@ static SYMBOL symbols[] = {
   { "OPTIONALLY",	SYM(OPTIONALLY)},
   { "OR",		SYM(OR_SYM)},
   { "ORDER",		SYM(ORDER_SYM)},
+  { "OUT",              SYM(OUT_SYM)},
   { "OUTER",		SYM(OUTER)},
   { "OUTFILE",		SYM(OUTFILE)},
   { "PACK_KEYS",	SYM(PACK_KEYS_SYM)},
@@ -334,6 +357,7 @@ static SYMBOL symbols[] = {
   { "PROCESS"	,	SYM(PROCESS)},
   { "PROCESSLIST",	SYM(PROCESSLIST_SYM)},
   { "PURGE",		SYM(PURGE)},
+  { "QUARTER",          SYM(QUARTER_SYM)},
   { "QUERY",		SYM(QUERY_SYM)},
   { "QUICK",	        SYM(QUICK)},
   { "RAID0",		SYM(RAID_0_SYM)},
@@ -353,11 +377,13 @@ static SYMBOL symbols[] = {
   { "REPEATABLE",	SYM(REPEATABLE_SYM)},
   { "REPLACE",		SYM(REPLACE)},
   { "REPLICATION",	SYM(REPLICATION)},
+  { "REPEAT",           SYM(REPEAT_SYM)},
   { "REQUIRE",	        SYM(REQUIRE_SYM)},
   { "RESET",		SYM(RESET_SYM)},
   { "RESTORE",		SYM(RESTORE_SYM)},
   { "RESTRICT",		SYM(RESTRICT)},
-  { "RETURNS",		SYM(UDF_RETURNS_SYM)},
+  { "RETURN",           SYM(RETURN_SYM)},
+  { "RETURNS",		SYM(RETURNS_SYM)},
   { "REVOKE",		SYM(REVOKE)},
   { "RIGHT",		SYM(RIGHT)},
   { "RLIKE",		SYM(REGEXP)},	/* Like in mSQL2 */
@@ -370,7 +396,9 @@ static SYMBOL symbols[] = {
   { "SAVEPOINT",	SYM(SAVEPOINT_SYM)},
   { "SECOND",		SYM(SECOND_SYM)},
   { "SECOND_MICROSECOND", SYM(SECOND_MICROSECOND_SYM)},
+  { "SECURITY",         SYM(SECURITY_SYM)},
   { "SELECT",		SYM(SELECT_SYM)},
+  { "SENSITIVE",        SYM(SENSITIVE_SYM)},
   { "SEPARATOR",	SYM(SEPARATOR_SYM)},
   { "SERIAL",		SYM(SERIAL_SYM)},
   { "SERIALIZABLE",	SYM(SERIALIZABLE_SYM)},
@@ -387,6 +415,11 @@ static SYMBOL symbols[] = {
   { "SONAME",		SYM(UDF_SONAME_SYM)},
   { "SOUNDS",		SYM(SOUNDS_SYM)},
   { "SPATIAL",		SYM(SPATIAL_SYM)},
+  { "SPECIFIC",         SYM(SPECIFIC_SYM)},
+  { "SQL",              SYM(SQL_SYM)},
+  { "SQLEXCEPTION",     SYM(SQLEXCEPTION_SYM)},
+  { "SQLSTATE",         SYM(SQLSTATE_SYM)},
+  { "SQLWARNING",       SYM(SQLWARNING_SYM)},
   { "SQL_BIG_RESULT",	SYM(SQL_BIG_RESULT)},
   { "SQL_BUFFER_RESULT", SYM(SQL_BUFFER_RESULT)},
   { "SQL_CACHE",        SYM(SQL_CACHE_SYM)},
@@ -394,6 +427,15 @@ static SYMBOL symbols[] = {
   { "SQL_NO_CACHE",	SYM(SQL_NO_CACHE_SYM)},
   { "SQL_SMALL_RESULT", SYM(SQL_SMALL_RESULT)},
   { "SQL_THREAD",	SYM(SQL_THREAD)},
+  { "SQL_TSI_FRAC_SECOND", SYM(FRAC_SECOND_SYM)},
+  { "SQL_TSI_SECOND",   SYM(SECOND_SYM)},
+  { "SQL_TSI_MINUTE",   SYM(MINUTE_SYM)},
+  { "SQL_TSI_HOUR",     SYM(HOUR_SYM)},
+  { "SQL_TSI_DAY",      SYM(DAY_SYM)},
+  { "SQL_TSI_WEEK",     SYM(WEEK_SYM)},
+  { "SQL_TSI_MONTH",    SYM(MONTH_SYM)},
+  { "SQL_TSI_QUARTER",  SYM(QUARTER_SYM)},
+  { "SQL_TSI_YEAR",     SYM(YEAR_SYM)},
   { "SSL",		SYM(SSL_SYM)},
   { "START",		SYM(START_SYM)},
   { "STARTING",		SYM(STARTING)},
@@ -414,6 +456,8 @@ static SYMBOL symbols[] = {
   { "THEN",		SYM(THEN_SYM)},
   { "TIME",		SYM(TIME_SYM)},
   { "TIMESTAMP",	SYM(TIMESTAMP)},
+  { "TIMESTAMPADD",     SYM(TIMESTAMP_ADD)},
+  { "TIMESTAMPDIFF",    SYM(TIMESTAMP_DIFF)},
   { "TINYBLOB",		SYM(TINYBLOB)},
   { "TINYINT",		SYM(TINYINT)},
   { "TINYTEXT",		SYM(TINYTEXT)},
@@ -425,6 +469,7 @@ static SYMBOL symbols[] = {
   { "TYPE",		SYM(TYPE_SYM)},
   { "TYPES",		SYM(TYPES_SYM)},
   { "UNCOMMITTED",	SYM(UNCOMMITTED_SYM)},
+  { "UNDO",             SYM(UNDO_SYM)},
   { "UNICODE",	        SYM(UNICODE_SYM)},
   { "UNION",	        SYM(UNION_SYM)},
   { "UNIQUE",		SYM(UNIQUE_SYM)},
@@ -449,8 +494,10 @@ static SYMBOL symbols[] = {
   { "VARIABLES",	SYM(VARIABLES)},
   { "VARYING",		SYM(VARYING)},
   { "WARNINGS",		SYM(WARNINGS)},
+  { "WEEK",		SYM(WEEK_SYM)},
   { "WHEN",		SYM(WHEN_SYM)},
   { "WHERE",		SYM(WHERE)},
+  { "WHILE",            SYM(WHILE_SYM)},
   { "WITH",		SYM(WITH)},
   { "WORK",		SYM(WORK_SYM)},
   { "WRITE",		SYM(WRITE_SYM)},
@@ -634,14 +681,13 @@ static SYMBOL sql_functions[] = {
   { "POSITION",		SYM(POSITION_SYM)},
   { "POW",		F_SYM(FUNC_ARG2),0,CREATE_FUNC(create_func_pow)},
   { "POWER",		F_SYM(FUNC_ARG2),0,CREATE_FUNC(create_func_pow)},
-  { "QUARTER",		F_SYM(FUNC_ARG1),0,CREATE_FUNC(create_func_quarter)},
   { "QUOTE",		F_SYM(FUNC_ARG1),0,CREATE_FUNC(create_func_quote)},
   { "RADIANS",		F_SYM(FUNC_ARG1),0,CREATE_FUNC(create_func_radians)},
   { "RAND",		SYM(RAND)},
   { "RELEASE_LOCK",	F_SYM(FUNC_ARG1),0,CREATE_FUNC(create_func_release_lock)},
-  { "REPEAT",		F_SYM(FUNC_ARG2),0,CREATE_FUNC(create_func_repeat)},
   { "REVERSE",		F_SYM(FUNC_ARG1),0,CREATE_FUNC(create_func_reverse)},
   { "ROUND",		SYM(ROUND)},
+  { "ROW_COUNT",	SYM(ROW_COUNT_SYM)},
   { "RPAD",		F_SYM(FUNC_ARG3),0,CREATE_FUNC(create_func_rpad)},
   { "RTRIM",		F_SYM(FUNC_ARG1),0,CREATE_FUNC(create_func_rtrim)},
   { "SEC_TO_TIME",	F_SYM(FUNC_ARG1),0,CREATE_FUNC(create_func_sec_to_time)},
@@ -684,7 +730,6 @@ static SYMBOL sql_functions[] = {
   { "UUID",		F_SYM(FUNC_ARG0),0,CREATE_FUNC(create_func_uuid)},
   { "VARIANCE",		SYM(VARIANCE_SYM)},
   { "VERSION",		F_SYM(FUNC_ARG0),0,CREATE_FUNC(create_func_version)},
-  { "WEEK",		SYM(WEEK_SYM)},
   { "WEEKDAY",		F_SYM(FUNC_ARG1),0,CREATE_FUNC(create_func_weekday)},
   { "WEEKOFYEAR",	F_SYM(FUNC_ARG1),0,CREATE_FUNC(create_func_weekofyear)},
   { "WITHIN",		F_SYM(FUNC_ARG2),0,CREATE_FUNC_GEOM(create_func_within)},
diff --git a/sql/lock.cc b/sql/lock.cc
index ac689495ca3..723469b255e 100644
--- a/sql/lock.cc
+++ b/sql/lock.cc
@@ -152,7 +152,7 @@ retry:
   thd->proc_info=0;
   if (thd->killed)
   {
-    my_error(ER_SERVER_SHUTDOWN,MYF(0));
+    thd->send_kill_message();
     if (sql_lock)
     {
       mysql_unlock_tables(thd,sql_lock);
diff --git a/sql/log.cc b/sql/log.cc
index 124439ae9eb..225aaae0549 100644
--- a/sql/log.cc
+++ b/sql/log.cc
@@ -31,7 +31,7 @@
 #include <stdarg.h>
 #include <m_ctype.h>				// For test_if_number
 
-MYSQL_LOG mysql_log,mysql_update_log,mysql_slow_log,mysql_bin_log;
+MYSQL_LOG mysql_log, mysql_slow_log, mysql_bin_log;
 ulong sync_binlog_counter= 0;
 
 static bool test_if_number(const char *str,
@@ -86,7 +86,8 @@ static int find_uniq_filename(char *name)
 MYSQL_LOG::MYSQL_LOG()
   :bytes_written(0), last_time(0), query_start(0), name(0),
    file_id(1), open_count(1), log_type(LOG_CLOSED), write_error(0), inited(0),
-   need_start_event(1)
+   need_start_event(1), description_event_for_exec(0),
+   description_event_for_queue(0)
 {
   /*
     We don't want to initialize LOCK_Log here as such initialization depends on
@@ -114,6 +115,8 @@ void MYSQL_LOG::cleanup()
   {
     inited= 0;
     close(LOG_CLOSE_INDEX);
+    delete description_event_for_queue;
+    delete description_event_for_exec;
     (void) pthread_mutex_destroy(&LOCK_log);
     (void) pthread_mutex_destroy(&LOCK_index);
     (void) pthread_cond_destroy(&update_cond);
@@ -183,7 +186,8 @@ bool MYSQL_LOG::open(const char *log_name, enum_log_type log_type_arg,
 		     const char *new_name, const char *index_file_name_arg,
 		     enum cache_type io_cache_type_arg,
 		     bool no_auto_events_arg,
-                     ulong max_size_arg)
+                     ulong max_size_arg,
+                     bool null_created_arg)
 {
   char buff[512];
   File file= -1, index_file_nr= -1;
@@ -281,8 +285,8 @@ bool MYSQL_LOG::open(const char *log_name, enum_log_type log_type_arg,
       if (my_b_safe_write(&log_file, (byte*) BINLOG_MAGIC,
 			  BIN_LOG_HEADER_SIZE))
         goto err;
-      bytes_written += BIN_LOG_HEADER_SIZE;
-      write_file_name_to_index_file=1;
+      bytes_written+= BIN_LOG_HEADER_SIZE;
+      write_file_name_to_index_file= 1;
     }
 
     if (!my_b_inited(&index_file))
@@ -312,10 +316,51 @@ bool MYSQL_LOG::open(const char *log_name, enum_log_type log_type_arg,
     }
     if (need_start_event && !no_auto_events)
     {
-      need_start_event=0;
-      Start_log_event s;
+      /*
+        In 4.x we set need_start_event=0 here, but in 5.0 we want a Start event
+        even if this is not the very first binlog.
+      */
+      Format_description_log_event s(BINLOG_VERSION);
+      if (!s.is_valid())
+        goto err;
       s.set_log_pos(this);
-      s.write(&log_file);
+      if (null_created_arg)
+        s.created= 0;
+      if (s.write(&log_file))
+        goto err;
+      bytes_written+= s.get_event_len();
+    }
+    if (description_event_for_queue &&
+        description_event_for_queue->binlog_version>=4)
+    {
+      /*
+        This is a relay log written to by the I/O slave thread.
+        Write the event so that others can later know the format of this relay
+        log.
+        Note that this event is very close to the original event from the
+        master (it has binlog version of the master, event types of the
+        master), so this is suitable to parse the next relay log's event. It
+        has been produced by
+        Format_description_log_event::Format_description_log_event(char*
+        buf,).
+        Why don't we want to write the description_event_for_queue if this event
+        is for format<4 (3.23 or 4.x): this is because in that case, the
+        description_event_for_queue describes the data received from the master,
+        but not the data written to the relay log (*conversion*), which is in
+        format 4 (slave's).
+      */
+      /*
+        Set 'created' to 0, so that in next relay logs this event does not trigger
+        cleaning actions on the slave in
+        Format_description_log_event::exec_event().
+        Set 'log_pos' to 0 to show that it's an artificial event.
+      */
+      description_event_for_queue->created= 0;
+      description_event_for_queue->log_pos= 0;
+      
+      if (description_event_for_queue->write(&log_file))
+        goto err;
+      bytes_written+= description_event_for_queue->get_event_len();
     }
     if (flush_io_cache(&log_file) ||
         my_sync(log_file.file, MYF(MY_WME)))
@@ -611,7 +656,7 @@ bool MYSQL_LOG::reset_logs(THD* thd)
   if (!thd->slave_thread)
     need_start_event=1;
   open(save_name, save_log_type, 0, index_file_name,
-       io_cache_type, no_auto_events, max_size);
+       io_cache_type, no_auto_events, max_size, 0);
   my_free((gptr) save_name, MYF(0));
 
 err:  
@@ -1001,8 +1046,17 @@ void MYSQL_LOG::new_file(bool need_lock)
      Note that at this point, log_type != LOG_CLOSED (important for is_open()).
   */
 
+  /* 
+     new_file() is only used for rotation (in FLUSH LOGS or because size >
+     max_binlog_size or max_relay_log_size). 
+     If this is a binary log, the Format_description_log_event at the beginning of
+     the new file should have created=0 (to distinguish with the
+     Format_description_log_event written at server startup, which should
+     trigger temp tables deletion on slaves.
+  */ 
+
   open(old_name, save_log_type, new_name_ptr, index_file_name, io_cache_type,
-       no_auto_events, max_size);
+       no_auto_events, max_size, 1);
   if (this == &mysql_bin_log)
     report_pos_in_innodb();
   my_free(old_name,MYF(0));
@@ -1086,7 +1140,7 @@ err:
 
 /*
   Write to normal (not rotable) log
-  This is the format for the 'normal', 'slow' and 'update' logs.
+  This is the format for the 'normal' log.
 */
 
 bool MYSQL_LOG::write(THD *thd,enum enum_server_command command,
@@ -1177,11 +1231,15 @@ bool MYSQL_LOG::write(THD *thd,enum enum_server_command command,
 
 inline bool sync_binlog(IO_CACHE *cache)
 {
-  return (sync_binlog_period &&
-          (sync_binlog_period == ++sync_binlog_counter) &&
-          (sync_binlog_counter= 0, my_sync(cache->file, MYF(MY_WME))));
+  if (sync_binlog_period == ++sync_binlog_counter && sync_binlog_period)
+  {
+    sync_binlog_counter= 0;
+    return my_sync(cache->file, MYF(MY_WME));
+  }
+  return 0;
 }
 
+
 /*
   Write an event to the binary log
 */
@@ -1349,6 +1407,12 @@ COLLATION_CONNECTION=%u,COLLATION_DATABASE=%u,COLLATION_SERVER=%u",
       }
 #endif
 
+#if MYSQL_VERSION_ID < 50000
+      /*
+        In 5.0 this is not needed anymore as we store the value of
+        FOREIGN_KEY_CHECKS in a binary way in the Query event's header.
+        The code below was enabled in 4.0 and 4.1.
+      */
       /*
 	If the user has set FOREIGN_KEY_CHECKS=0 we wrap every SQL
 	command in the binlog inside:
@@ -1364,6 +1428,7 @@ COLLATION_CONNECTION=%u,COLLATION_DATABASE=%u,COLLATION_SERVER=%u",
 	if (e.write(file))
 	  goto err;
       }
+#endif
     }
 
     /* Write the SQL command */
@@ -1374,6 +1439,7 @@ COLLATION_CONNECTION=%u,COLLATION_DATABASE=%u,COLLATION_SERVER=%u",
 
     /* Write log events to reset the 'run environment' of the SQL command */
 
+#if MYSQL_VERSION_ID < 50000
     if (thd && thd->options & OPTION_NO_FOREIGN_KEY_CHECKS)
     {
       Query_log_event e(thd, "SET FOREIGN_KEY_CHECKS=1", 24, 0);
@@ -1381,6 +1447,7 @@ COLLATION_CONNECTION=%u,COLLATION_DATABASE=%u,COLLATION_SERVER=%u",
       if (e.write(file))
 	goto err;
     }
+#endif
 
     /*
       Tell for transactional table handlers up to which position in the
@@ -1646,8 +1713,7 @@ err:
 
 
 /*
-  Write update log in a format suitable for incremental backup
-  This is also used by the slow query log.
+  Write to the slow query log.
 */
 
 bool MYSQL_LOG::write(THD *thd,const char *query, uint query_length,
@@ -1663,11 +1729,6 @@ bool MYSQL_LOG::write(THD *thd,const char *query, uint query_length,
     int tmp_errno=0;
     char buff[80],*end;
     end=buff;
-    if (!(thd->options & OPTION_UPDATE_LOG))
-    {
-      VOID(pthread_mutex_unlock(&LOCK_log));
-      return 0;
-    }
     if (!(specialflag & SPECIAL_SHORT_LOG_FORMAT) || query_start_arg)
     {
       current_time=time(NULL);
@@ -1834,6 +1895,7 @@ void MYSQL_LOG::close(uint exiting)
       Stop_log_event s;
       s.set_log_pos(this);
       s.write(&log_file);
+      bytes_written+= s.get_event_len();
       signal_update();
     }
 #endif /* HAVE_REPLICATION */
diff --git a/sql/log_event.cc b/sql/log_event.cc
index 315b0f670dd..3b69e1c3176 100644
--- a/sql/log_event.cc
+++ b/sql/log_event.cc
@@ -225,9 +225,25 @@ static char *str_to_hex(char *to, char *from, uint len)
   return p; // pointer to end 0 of 'to'
 }
 
+/*
+  Prints a "session_var=value" string. Used by mysqlbinlog to print some SET
+  commands just before it prints a query.
+*/
+
+static void print_set_option(FILE* file, uint32 bits_changed, uint32 option,
+                             uint32 flags, const char* name, bool* need_comma) 
+{
+  if (bits_changed & option)
+  {
+    if (*need_comma)
+      fprintf(file,", ");
+    fprintf(file,"%s=%d", name, (bool)(flags & option));
+    *need_comma= 1;
+  }
+}
 
 /**************************************************************************
-	Log_event methods
+	Log_event methods (= the parent class of all events)
 **************************************************************************/
 
 /*
@@ -237,7 +253,7 @@ static char *str_to_hex(char *to, char *from, uint len)
 const char* Log_event::get_type_str()
 {
   switch(get_type_code()) {
-  case START_EVENT:  return "Start";
+  case START_EVENT_V3:  return "Start_v3";
   case STOP_EVENT:   return "Stop";
   case QUERY_EVENT:  return "Query";
   case ROTATE_EVENT: return "Rotate";
@@ -251,6 +267,7 @@ const char* Log_event::get_type_str()
   case EXEC_LOAD_EVENT: return "Exec_load";
   case RAND_EVENT: return "RAND";
   case USER_VAR_EVENT: return "User var";
+  case FORMAT_DESCRIPTION_EVENT: return "Format_desc";
   default: return "Unknown";				/* impossible */ 
   }
 }
@@ -294,24 +311,66 @@ Log_event::Log_event()
   Log_event::Log_event()
 */
 
-Log_event::Log_event(const char* buf, bool old_format)
+Log_event::Log_event(const char* buf,
+                     const Format_description_log_event* description_event) 
   :temp_buf(0), cached_event_len(0), cache_stmt(0)
 {
+#ifndef MYSQL_CLIENT
+  thd = 0;
+#endif  
   when = uint4korr(buf);
   server_id = uint4korr(buf + SERVER_ID_OFFSET);
-  if (old_format)
+  if (description_event->binlog_version==1)
   {
-    log_pos=0;
-    flags=0;
+    log_pos= 0;
+    flags= 0;
+    return;
   }
-  else
+  /* 4.0 or newer */
+  log_pos= uint4korr(buf + LOG_POS_OFFSET);
+  /*
+    If the log is 4.0 (so here it can only be a 4.0 relay log read by the SQL
+    thread or a 4.0 master binlog read by the I/O thread), log_pos is the
+    beginning of the event: we transform it into the end of the event, which is
+    more useful.
+    But how do you know that the log is 4.0: you know it if description_event is
+    version 3 *and* you are not reading a Format_desc (remember that mysqlbinlog
+    starts by assuming that 5.0 logs are in 4.0 format, until it finds a
+    Format_desc).
+  */
+  if (description_event->binlog_version==3 &&
+      buf[EVENT_TYPE_OFFSET]<FORMAT_DESCRIPTION_EVENT &&
+      /*
+        If log_pos=0, don't change it. log_pos==0 is a marker to mean
+        "don't change rli->group_master_log_pos" (see
+        inc_group_relay_log_pos()). As it is unreal log_pos, adding the event
+        len's is nonsense. For example, a fake Rotate event should 
+        not have its log_pos (which is 0) changed or it will modify
+        Exec_master_log_pos in SHOW SLAVE STATUS, displaying a nonsense value of
+        (a non-zero offset which does not exist in the master's binlog, so which
+        will cause problems if the user uses this value in CHANGE MASTER).
+      */
+      log_pos)
+    log_pos+= uint4korr(buf + EVENT_LEN_OFFSET);
+  flags= uint2korr(buf + FLAGS_OFFSET);
+  if ((buf[EVENT_TYPE_OFFSET] == FORMAT_DESCRIPTION_EVENT) ||
+      (buf[EVENT_TYPE_OFFSET] == ROTATE_EVENT))
   {
-    log_pos = uint4korr(buf + LOG_POS_OFFSET);
-    flags = uint2korr(buf + FLAGS_OFFSET);
+    /*
+      These events always have a header which stops here (i.e. their header is
+      FROZEN).
+    */
+    /*
+      Initialization to zero of all other Log_event members as they're not
+      specified. Currently there are no such members; in the future there will
+      be an event UID (but Format_description and Rotate don't need this UID, as
+      they are not propagated through --log-slave-updates (remember the UID is
+      used to not play a query twice when you have two masters which are slaves
+      of a 3rd master). Then we are done.
+    */
+    return;
   }
-#ifndef MYSQL_CLIENT
-  thd = 0;
-#endif  
+  /* otherwise, go on with reading the header from buf (nothing now) */
 }
 
 #ifndef MYSQL_CLIENT
@@ -364,10 +423,10 @@ int Log_event::exec_event(struct st_relay_log_info* rli)
       has already been updated.
     */
     if ((thd->options & OPTION_BEGIN) && opt_using_transactions)
-      rli->inc_event_relay_log_pos(get_event_len());
+      rli->inc_event_relay_log_pos();
     else
     {
-      rli->inc_group_relay_log_pos(get_event_len(),log_pos);
+      rli->inc_group_relay_log_pos(log_pos);
       flush_relay_log_info(rli);
       /* 
          Note that Rotate_log_event::exec_event() does not call this function,
@@ -429,7 +488,7 @@ void Log_event::init_show_field_list(List<Item>* field_list)
   field_list->push_back(new Item_empty_string("Event_type", 20));
   field_list->push_back(new Item_return_int("Server_id", 10,
 					    MYSQL_TYPE_LONG));
-  field_list->push_back(new Item_return_int("Orig_log_pos", 11,
+  field_list->push_back(new Item_return_int("End_log_pos", 11,
 					    MYSQL_TYPE_LONGLONG));
   field_list->push_back(new Item_empty_string("Info", 20));
 }
@@ -452,6 +511,12 @@ int Log_event::write(IO_CACHE* file)
 
 int Log_event::write_header(IO_CACHE* file)
 {
+  /*
+    Header will be of size LOG_EVENT_HEADER_LEN for all events, except for
+    FORMAT_DESCRIPTION_EVENT and ROTATE_EVENT, where it will be
+    LOG_EVENT_MINIMAL_HEADER_LEN (remember these 2 have a frozen header, because
+    we read them before knowing the format).
+  */
   char buf[LOG_EVENT_HEADER_LEN];
   char* pos = buf;
   int4store(pos, (ulong) when); // timestamp
@@ -459,19 +524,45 @@ int Log_event::write_header(IO_CACHE* file)
   *pos++ = get_type_code(); // event type code
   int4store(pos, server_id);
   pos += 4;
-  long tmp=get_data_size() + LOG_EVENT_HEADER_LEN;
+  long tmp; // total length of the event
+  switch (get_type_code())
+    {
+    case FORMAT_DESCRIPTION_EVENT:
+    case ROTATE_EVENT:
+      tmp= get_data_size() + LOG_EVENT_MINIMAL_HEADER_LEN;
+      break;
+    default:
+      tmp= get_data_size() + LOG_EVENT_HEADER_LEN;
+      break;
+    }
   int4store(pos, tmp);
   pos += 4;
   int4store(pos, log_pos);
   pos += 4;
   int2store(pos, flags);
   pos += 2;
+  switch (get_type_code())
+    {
+    case FORMAT_DESCRIPTION_EVENT:
+    case ROTATE_EVENT:
+      break;
+    default:
+      /*
+        Other data to print in the header (nothing now); in that case increment
+        pos.
+      */
+      break;
+    }
   return (my_b_safe_write(file, (byte*) buf, (uint) (pos - buf)));
 }
 
 
 /*
   Log_event::read_log_event()
+
+  This needn't be format-tolerant, because we only read
+  LOG_EVENT_MINIMAL_HEADER_LEN (we just want to read the event's length).
+
 */
 
 #ifndef MYSQL_CLIENT
@@ -480,7 +571,7 @@ int Log_event::read_log_event(IO_CACHE* file, String* packet,
 {
   ulong data_len;
   int result=0;
-  char buf[LOG_EVENT_HEADER_LEN];
+  char buf[LOG_EVENT_MINIMAL_HEADER_LEN];
   DBUG_ENTER("read_log_event");
 
   if (log_lock)
@@ -500,24 +591,25 @@ int Log_event::read_log_event(IO_CACHE* file, String* packet,
     goto end;
   }
   data_len= uint4korr(buf + EVENT_LEN_OFFSET);
-  if (data_len < LOG_EVENT_HEADER_LEN ||
+  if (data_len < LOG_EVENT_MINIMAL_HEADER_LEN ||
       data_len > current_thd->variables.max_allowed_packet)
   {
     DBUG_PRINT("error",("data_len: %ld", data_len));
-    result= ((data_len < LOG_EVENT_HEADER_LEN) ? LOG_READ_BOGUS :
+    result= ((data_len < LOG_EVENT_MINIMAL_HEADER_LEN) ? LOG_READ_BOGUS :
 	     LOG_READ_TOO_LARGE);
     goto end;
   }
   packet->append(buf, sizeof(buf));
-  data_len-= LOG_EVENT_HEADER_LEN;
+  data_len-= LOG_EVENT_MINIMAL_HEADER_LEN;
   if (data_len)
   {
     if (packet->append(file, data_len))
     {
       /*
-	Here we should never hit EOF in a non-error condition.
+	Here if we hit EOF it's really an error: as data_len is >=0
+        there's supposed to be more bytes available. 
 	EOF means we are reading the event partially, which should
-	never happen.
+	never happen: either we read badly or the binlog is truncated.
       */
       result= file->error >= 0 ? LOG_READ_TRUNC: LOG_READ_IO;
       /* Implicit goto end; */
@@ -545,24 +637,42 @@ end:
   Log_event::read_log_event()
 
   NOTE:
-    Allocates memory;  The caller is responsible for clean-up
+    Allocates memory;  The caller is responsible for clean-up.
 */
 
 #ifndef MYSQL_CLIENT
 Log_event* Log_event::read_log_event(IO_CACHE* file,
 				     pthread_mutex_t* log_lock,
-				     bool old_format)
+                                     const Format_description_log_event *description_event)
 #else
-Log_event* Log_event::read_log_event(IO_CACHE* file, bool old_format)
+Log_event* Log_event::read_log_event(IO_CACHE* file,
+                                     const Format_description_log_event *description_event)
 #endif  
 {
-  char head[LOG_EVENT_HEADER_LEN];
-  uint header_size= old_format ? OLD_HEADER_LEN : LOG_EVENT_HEADER_LEN;
+  DBUG_ASSERT(description_event);
+  char head[LOG_EVENT_MINIMAL_HEADER_LEN];
+  /*
+    First we only want to read at most LOG_EVENT_MINIMAL_HEADER_LEN, just to
+    check the event for sanity and to know its length; no need to really parse
+    it. We say "at most" because this could be a 3.23 master, which has header
+    of 13 bytes, whereas LOG_EVENT_MINIMAL_HEADER_LEN is 19 bytes (it's "minimal"
+    over the set {MySQL >=4.0}).
+  */
+  uint header_size= min(description_event->common_header_len,
+                        LOG_EVENT_MINIMAL_HEADER_LEN);
 
   LOCK_MUTEX;
+  DBUG_PRINT("info", ("my_b_tell=%lu", my_b_tell(file)));
   if (my_b_read(file, (byte *) head, header_size))
   {
+    DBUG_PRINT("info", ("Log_event::read_log_event(IO_CACHE*,Format_desc*) \
+failed my_b_read"));
     UNLOCK_MUTEX;
+    /*
+      No error here; it could be that we are at the file's end. However if the
+      next my_b_read() fails (below), it will be an error as we were able to
+      read the first bytes.
+    */
     return 0;
   }
 
@@ -596,7 +706,8 @@ Log_event* Log_event::read_log_event(IO_CACHE* file, bool old_format)
     error = "read error";
     goto err;
   }
-  if ((res = read_log_event(buf, data_len, &error, old_format)))
+  if ((res= read_log_event(buf, data_len, &error,
+                           description_event))) 
     res->register_temp_buf(buf);
 
 err:
@@ -623,13 +734,17 @@ Error in Log_event::read_log_event(): '%s', data_len: %d, event_type: %d",
 
 /*
   Log_event::read_log_event()
+  Binlog format tolerance is in (buf, event_len, description_event)
+  constructors.
 */
 
-Log_event* Log_event::read_log_event(const char* buf, int event_len,
-				     const char **error, bool old_format)
+Log_event* Log_event::read_log_event(const char* buf, uint event_len,
+				     const char **error,
+                                     const Format_description_log_event *description_event)
 {
-  DBUG_ENTER("Log_event::read_log_event");
-
+  DBUG_ENTER("Log_event::read_log_event(char*,...)");
+  DBUG_ASSERT(description_event);
+  DBUG_PRINT("info", ("binlog_version=%d", description_event->binlog_version));
   if (event_len < EVENT_LEN_OFFSET ||
       (uint) event_len != uint4korr(buf+EVENT_LEN_OFFSET))
   {
@@ -641,64 +756,76 @@ Log_event* Log_event::read_log_event(const char* buf, int event_len,
   
   switch(buf[EVENT_TYPE_OFFSET]) {
   case QUERY_EVENT:
-    ev  = new Query_log_event(buf, event_len, old_format);
+    ev  = new Query_log_event(buf, event_len, description_event);
     break;
   case LOAD_EVENT:
-    ev = new Create_file_log_event(buf, event_len, old_format);
+    ev = new Create_file_log_event(buf, event_len, description_event);
     break;
   case NEW_LOAD_EVENT:
-    ev = new Load_log_event(buf, event_len, old_format);
+    ev = new Load_log_event(buf, event_len, description_event);
     break;
   case ROTATE_EVENT:
-    ev = new Rotate_log_event(buf, event_len, old_format);
+    ev = new Rotate_log_event(buf, event_len, description_event);
     break;
 #ifdef HAVE_REPLICATION
-  case SLAVE_EVENT:
+  case SLAVE_EVENT: /* can never happen (unused event) */
     ev = new Slave_log_event(buf, event_len);
     break;
 #endif /* HAVE_REPLICATION */
   case CREATE_FILE_EVENT:
-    ev = new Create_file_log_event(buf, event_len, old_format);
+    ev = new Create_file_log_event(buf, event_len, description_event);
     break;
   case APPEND_BLOCK_EVENT:
-    ev = new Append_block_log_event(buf, event_len);
+    ev = new Append_block_log_event(buf, event_len, description_event);
     break;
   case DELETE_FILE_EVENT:
-    ev = new Delete_file_log_event(buf, event_len);
+    ev = new Delete_file_log_event(buf, event_len, description_event);
     break;
   case EXEC_LOAD_EVENT:
-    ev = new Execute_load_log_event(buf, event_len);
+    ev = new Execute_load_log_event(buf, event_len, description_event);
     break;
-  case START_EVENT:
-    ev = new Start_log_event(buf, old_format);
+  case START_EVENT_V3: /* this is sent only by MySQL <=4.x */
+    ev = new Start_log_event_v3(buf, description_event);
     break;
 #ifdef HAVE_REPLICATION
   case STOP_EVENT:
-    ev = new Stop_log_event(buf, old_format);
+    ev = new Stop_log_event(buf, description_event);
     break;
 #endif /* HAVE_REPLICATION */
   case INTVAR_EVENT:
-    ev = new Intvar_log_event(buf, old_format);
+    ev = new Intvar_log_event(buf, description_event);
     break;
   case RAND_EVENT:
-    ev = new Rand_log_event(buf, old_format);
+    ev = new Rand_log_event(buf, description_event);
     break;
   case USER_VAR_EVENT:
-    ev = new User_var_log_event(buf, old_format);
+    ev = new User_var_log_event(buf, description_event);
+    break;
+  case FORMAT_DESCRIPTION_EVENT:
+    ev = new Format_description_log_event(buf, event_len, description_event); 
     break;
   default:
     break;
   }
+  /*
+    is_valid() are small event-specific sanity tests which are important; for
+    example there are some my_malloc() in constructors
+    (e.g. Query_log_event::Query_log_event(char*...)); when these my_malloc()
+    fail we can't return an error out of the constructor (because constructor is
+    "void") ; so instead we leave the pointer we wanted to allocate
+    (e.g. 'query') to 0 and we test it in is_valid(). Same for
+    Format_description_log_event, member 'post_header_len'. 
+  */
   if (!ev || !ev->is_valid())
   {
     delete ev;
 #ifdef MYSQL_CLIENT
-    if (!force_opt)
+    if (!force_opt) /* then mysqlbinlog dies */
     {
       *error= "Found invalid event in binary log";
       DBUG_RETURN(0);
     }
-    ev= new Unknown_log_event(buf, old_format);
+    ev= new Unknown_log_event(buf, description_event);
 #else
     *error= "Found invalid event in binary log";
     DBUG_RETURN(0);
@@ -719,7 +846,7 @@ void Log_event::print_header(FILE* file)
   char llbuff[22];
   fputc('#', file);
   print_timestamp(file);
-  fprintf(file, " server id %d  log_pos %s ", server_id,
+  fprintf(file, " server id %d  end_log_pos %s ", server_id,
 	  llstr(log_pos,llbuff)); 
 }
 
@@ -753,13 +880,36 @@ void Log_event::print_timestamp(FILE* file, time_t* ts)
 
 /*
   Log_event::set_log_pos()
+  Only used when we are writing an event which we created, to the BINlog. That
+  is, when we have parsed and executed a query; we then want to set the event's
+  log_pos to what it is going to be in the binlog after we write it. Note that
+  this is the position of the END of the event.
 */
 
 #ifndef MYSQL_CLIENT
 void Log_event::set_log_pos(MYSQL_LOG* log)
 {
+  /*
+    Note that with a SEQ_READ_APPEND cache, my_b_tell() does not work well.
+    So this will give slightly wrong positions for the Format_desc/Rotate/Stop
+    events which the slave writes to its relay log. For example, the initial
+    Format_desc will have end_log_pos=91 instead of 95. Because after writing
+    the first 4 bytes of the relay log, my_b_tell() still reports 0. Because
+    my_b_append() does not update the counter which my_b_tell() later uses (one
+    should probably use my_b_append_tell() to work around this).
+    To get right positions even when writing to the relay log, we use the (new)
+    my_b_safe_tell().
+    Note that this raises a question on the correctness of all these
+    DBUG_ASSERT(my_b_tell()=rli->event_relay_log_pos).
+    If in a transaction, the log_pos which we calculate below is not very good
+    (because then my_b_safe_tell() returns start position of the BEGIN, so it's
+    like the statement was at the BEGIN's place), but it's not a very serious
+    problem (as the slave, when it is in a transaction, does not take those
+    end_log_pos into account (as it calls inc_event_relay_log_pos()). To be
+    fixed later, so that it looks less strange. But not bug.
+  */
   if (!log_pos)
-    log_pos = my_b_tell(&log->log_file);
+    log_pos = my_b_safe_tell(&log->log_file)+get_event_len();
 }
 #endif /* !MYSQL_CLIENT */
 
@@ -772,10 +922,15 @@ void Log_event::set_log_pos(MYSQL_LOG* log)
 
 /*
   Query_log_event::pack_info()
+  This (which is used only for SHOW BINLOG EVENTS) could be updated to
+  print SET @@session_var=. But this is not urgent, as SHOW BINLOG EVENTS is
+  only an information, it does not produce suitable queries to replay (for
+  example it does not print LOAD DATA INFILE).
 */
 
 void Query_log_event::pack_info(Protocol *protocol)
 {
+  // TODO: show the catalog ??
   char *buf, *pos;
   if (!(buf= my_malloc(9 + db_len + q_len, MYF(MY_WME))))
     return;
@@ -813,7 +968,7 @@ int Query_log_event::write(IO_CACHE* file)
 
 int Query_log_event::write_data(IO_CACHE* file)
 {
-  char buf[QUERY_HEADER_LEN]; 
+  uchar buf[QUERY_HEADER_LEN+1+4+1+8+1+1+FN_REFLEN], *start;
 
   if (!query)
     return -1;
@@ -859,10 +1014,61 @@ int Query_log_event::write_data(IO_CACHE* file)
   int4store(buf + Q_EXEC_TIME_OFFSET, exec_time);
   buf[Q_DB_LEN_OFFSET] = (char) db_len;
   int2store(buf + Q_ERR_CODE_OFFSET, error_code);
+  int2store(buf + Q_STATUS_VARS_LEN_OFFSET, status_vars_len);
 
-  return (my_b_safe_write(file, (byte*) buf, QUERY_HEADER_LEN) ||
-	  my_b_safe_write(file, (db) ? (byte*) db : (byte*)"", db_len + 1) ||
-	  my_b_safe_write(file, (byte*) query, q_len)) ? -1 : 0;
+  /*
+    You MUST always write status vars in increasing order of code. This
+    guarantees that a slightly older slave will be able to parse those he
+    knows.
+  */
+  start= buf+QUERY_HEADER_LEN;
+  if (flags2_inited)
+  {
+    *(start++)= Q_FLAGS2_CODE;
+    int4store(start, flags2);
+    start+= 4;
+  }
+  if (sql_mode_inited)
+  {
+    *(start++)= Q_SQL_MODE_CODE;
+    int8store(start, sql_mode);
+    start+= 8;
+  }
+  if (catalog)
+  {
+    *(start++)= Q_CATALOG_CODE;
+    *(start++)= (uchar) catalog_len;
+    bmove(start, catalog, catalog_len);
+    start+= catalog_len;
+    /*
+      We write a \0 at the end. As we also have written the length, it's
+      apparently useless; but in fact it enables us to just do
+      catalog= a_pointer_to_the_buffer_of_the_read_event
+      later in the slave SQL thread.
+      If we didn't have the \0, we would need to memdup to build the catalog in
+      the slave SQL thread. 
+      And still the interest of having the length too is that in the slave SQL
+      thread we immediately know at which position the catalog ends (no need to
+      search for '\0'. In other words: length saves search, \0 saves mem alloc,
+      at the cost of 1 redundant byte on the disk.
+      Note that this is only a fix until we change 'catalog' to LEX_STRING
+      (then we won't need the \0).
+    */
+    *(start++)= '\0';
+  }
+  /*
+    Here there could be code like
+    if (command-line-option-which-says-"log_this_variable")
+    {
+    *(start++)= Q_THIS_VARIABLE_CODE;
+    int4store(start, this_variable);
+    start+= 4;
+    }
+  */
+  
+  return (my_b_safe_write(file, (byte*) buf, (start-buf)) ||
+  	  my_b_safe_write(file, (db) ? (byte*) db : (byte*)"", db_len + 1) ||
+  	  my_b_safe_write(file, (byte*) query, q_len)) ? -1 : 0;
 }
 
 
@@ -875,60 +1081,173 @@ Query_log_event::Query_log_event(THD* thd_arg, const char* query_arg,
 				 ulong query_length, bool using_trans)
   :Log_event(thd_arg, !thd_arg->tmp_table_used ?
 	     0 : LOG_EVENT_THREAD_SPECIFIC_F, using_trans),
-   data_buf(0), query(query_arg),
+   data_buf(0), query(query_arg), catalog(thd_arg->catalog),
    db(thd_arg->db), q_len((uint32) query_length),
-   error_code(thd_arg->killed ?
+   error_code((thd_arg->killed != THD::NOT_KILLED) ?
               ((thd_arg->system_thread & SYSTEM_THREAD_DELAYED_INSERT) ?
-               0 : ER_SERVER_SHUTDOWN) : thd_arg->net.last_errno),
+               0 : thd->killed_errno()) : thd_arg->net.last_errno),
    thread_id(thd_arg->thread_id),
    /* save the original thread id; we already know the server id */
-   slave_proxy_id(thd_arg->variables.pseudo_thread_id)
+   slave_proxy_id(thd_arg->variables.pseudo_thread_id),
+   flags2_inited(1), sql_mode_inited(1), flags2(0), sql_mode(0)
 {
   time_t end_time;
   time(&end_time);
   exec_time = (ulong) (end_time  - thd->start_time);
+  catalog_len = (catalog) ? (uint32) strlen(catalog) : 0;
+  status_vars_len= 1+4+1+8+1+1+catalog_len+1;
   db_len = (db) ? (uint32) strlen(db) : 0;
+  /*
+    If we don't use flags2 for anything else than options contained in
+    thd->options, it would be more efficient to flags2=thd_arg->options
+    (OPTIONS_WRITTEN_TO_BINLOG would be used only at reading time).
+    But it's likely that we don't want to use 32 bits for 3 bits; in the future
+    we will probably want to reclaim the 29 bits. So we need the &.
+  */
+  flags2= thd_arg->options & OPTIONS_WRITTEN_TO_BIN_LOG;
+  sql_mode= thd_arg->variables.sql_mode;
+  DBUG_PRINT("info",("Query_log_event has flags2=%lu sql_mode=%lu",flags2,sql_mode));
 }
 #endif /* MYSQL_CLIENT */
 
 
 /*
   Query_log_event::Query_log_event()
+  This is used by the SQL slave thread to prepare the event before execution.
 */
 
-Query_log_event::Query_log_event(const char* buf, int event_len,
-				 bool old_format)
-  :Log_event(buf, old_format),data_buf(0), query(NULL), db(NULL)
+Query_log_event::Query_log_event(const char* buf, uint event_len,
+                                 const Format_description_log_event *description_event)
+  :Log_event(buf, description_event), data_buf(0), query(NULL),
+   catalog(NULL), db(NULL), catalog_len(0), status_vars_len(0),
+   flags2_inited(0), sql_mode_inited(0)
 {
   ulong data_len;
-  if (old_format)
+  uint8 common_header_len, post_header_len;
+  DBUG_ENTER("Query_log_event::Query_log_event(char*,...)");
+  common_header_len= description_event->common_header_len;
+  post_header_len= description_event->post_header_len[QUERY_EVENT-1]; 
+  DBUG_PRINT("info",("event_len=%ld, common_header_len=%d, post_header_len=%d",
+                     event_len, common_header_len, post_header_len));
+  
+  /*
+    We test if the event's length is sensible, and if so we compute data_len.
+    We cannot rely on QUERY_HEADER_LEN here as it would not be format-tolerant.
+    We use QUERY_HEADER_MINIMAL_LEN which is the same for 3.23, 4.0 & 5.0.
+  */
+  if (event_len < (uint)(common_header_len + post_header_len))
+    return;				
+  data_len = event_len - (common_header_len + post_header_len);
+  buf+= common_header_len;
+  
+  slave_proxy_id= thread_id = uint4korr(buf + Q_THREAD_ID_OFFSET);
+  exec_time = uint4korr(buf + Q_EXEC_TIME_OFFSET);
+  db_len = (uint)buf[Q_DB_LEN_OFFSET];
+  error_code = uint2korr(buf + Q_ERR_CODE_OFFSET);
+
+  /*
+    5.0 format starts here.
+    Depending on the format, we may or not have affected/warnings etc
+    The remnent post-header to be parsed has length:
+  */
+  uint32 tmp= post_header_len - QUERY_HEADER_MINIMAL_LEN; 
+  if (tmp)
   {
-    if ((uint)event_len < OLD_HEADER_LEN + QUERY_HEADER_LEN)
-      return;				
-    data_len = event_len - (QUERY_HEADER_LEN + OLD_HEADER_LEN);
-    buf += OLD_HEADER_LEN;
+    status_vars_len= uint2korr(buf + Q_STATUS_VARS_LEN_OFFSET);
+    DBUG_PRINT("info", ("Query_log_event has status_vars_len=%d",
+                        status_vars_len));
+    tmp-= 2;
   }
-  else
+  /* we have parsed everything we know in the post header */
+#ifndef DBUG_OFF
+  if (tmp) /* this is probably a master newer than us */
+    DBUG_PRINT("info", ("Query_log_event has longer post header than we know\
+  (%d more bytes)", tmp));
+#endif
+  
+  /*
+    Suppose you have a 4.0 master with --ansi and a 5.0 slave with --ansi.
+    The slave sets flags2 to 0 (because that's a 4.0 event);  if we simply use
+    this value of 0, we will cancel --ansi on the slave, which is
+    unwanted. In this example 0 means "unset", not really "set to 0".
+    Fortunately we have flags2_inited==0 to distinguish between "unset" and
+    "set to 0". See below.
+  */
+  
+  /* variable-part: the status vars; only in MySQL 5.0  */
+  
+  const uchar *start= (uchar*) (buf+post_header_len);
+  const uchar *end= (uchar*) (start+status_vars_len);
+  /*
+    The place from which we will start string duplication.
+  */
+  const uchar *start_dup= end;
+  
+  for (const uchar* pos=start;pos<end;)
   {
-    if ((uint)event_len < QUERY_EVENT_OVERHEAD)
-      return;				
-    data_len = event_len - QUERY_EVENT_OVERHEAD;
-    buf += LOG_EVENT_HEADER_LEN;
+    switch (*pos++) {
+    case Q_FLAGS2_CODE:
+      flags2_inited= 1;
+      flags2= uint4korr(pos);
+      DBUG_PRINT("info",("In Query_log_event, read flags2: %lu", flags2));
+      pos+= 4;
+      break;
+    case Q_SQL_MODE_CODE:
+    {
+#ifndef DBUG_OFF
+      char buff[22];
+#endif
+      sql_mode_inited= 1;
+      sql_mode= (ulong) uint8korr(pos); // QQ: Fix when sql_mode is ulonglong
+      DBUG_PRINT("info",("In Query_log_event, read sql_mode: %s",
+			 llstr(sql_mode, buff)));
+      pos+= 8;
+      break;
+    }
+    case Q_CATALOG_CODE:
+      catalog_len= *pos;
+      /*
+        Now 'pos' points to beginning of catalog - 1.
+        The catalog must be included in the string which we will duplicate
+        later. If string status vars having a smaller code had been seen before
+        and so marked to-be-duplicated, start_dup would be != end and we would
+        not need (and want) to change start_dup (because this would cut the
+        previously marked status vars).
+      */
+      if (start_dup==end)
+        start_dup= ++pos;
+      pos+= catalog_len+1;
+      break;
+    default:
+      /* That's why you must write status vars in growing order of code */
+      DBUG_PRINT("info",("Query_log_event has unknown status vars (first has\
+ code: %u), skipping the rest of them", (uint) *(pos-1)));
+      pos= end;
+    }
   }
-
-  exec_time = uint4korr(buf + Q_EXEC_TIME_OFFSET);
-  error_code = uint2korr(buf + Q_ERR_CODE_OFFSET);
-
-  if (!(data_buf = (char*) my_malloc(data_len + 1, MYF(MY_WME))))
+  
+  /* A 2nd variable part; this is common to all versions */ 
+  
+  data_len-= (uint) (start_dup-start); /* cut not-to-be-duplicated head */
+  if (!(data_buf = (char*) my_strdup_with_length((byte*) start_dup,
+                                                 data_len,
+                                                 MYF(MY_WME))))
     return;
-
-  memcpy(data_buf, buf + Q_DATA_OFFSET, data_len);
-  slave_proxy_id= thread_id= uint4korr(buf + Q_THREAD_ID_OFFSET);
-  db = data_buf;
-  db_len = (uint)buf[Q_DB_LEN_OFFSET];
-  query=data_buf + db_len + 1;
-  q_len = data_len - 1 - db_len;
-  *((char*)query+q_len) = 0;
+  
+  const char* tmp_buf= data_buf;
+  /* Now set event's pointers to point to bits of the new string */
+  if (catalog_len)
+  {
+    catalog= tmp_buf;
+    tmp_buf+= (uint) (end-start_dup); /* "seek" to db */
+  }
+  db= tmp_buf;
+  query= tmp_buf + db_len + 1;
+  q_len = data_buf + data_len - query;
+  /* This is used to detect wrong parsing. Could be removed in the future. */
+  DBUG_PRINT("info", ("catalog_len:%d  catalog: '%s' db: '%s'  q_len: %d",
+                      catalog_len, catalog, db, q_len));
+  DBUG_VOID_RETURN;
 }
 
 
@@ -937,8 +1256,10 @@ Query_log_event::Query_log_event(const char* buf, int event_len,
 */
 
 #ifdef MYSQL_CLIENT
-void Query_log_event::print(FILE* file, bool short_form, char* last_db)
+void Query_log_event::print(FILE* file, bool short_form,
+                            LAST_EVENT_INFO* last_event_info)
 {
+  // TODO: print the catalog ??
   char buff[40],*end;				// Enough for SET TIMESTAMP
   if (!short_form)
   {
@@ -949,10 +1270,10 @@ void Query_log_event::print(FILE* file, bool short_form, char* last_db)
 
   bool different_db= 1;
 
-  if (db && last_db)
+  if (db && last_event_info->db)
   {
-    if (different_db= memcmp(last_db, db, db_len + 1))
-      memcpy(last_db, db, db_len + 1);
+    if ((different_db = memcmp(last_event_info->db, db, db_len + 1)))
+      memcpy(last_event_info->db, db, db_len + 1);
   }
   
   if (db && db[0] && different_db)
@@ -963,6 +1284,67 @@ void Query_log_event::print(FILE* file, bool short_form, char* last_db)
   my_fwrite(file, (byte*) buff, (uint) (end-buff),MYF(MY_NABP | MY_WME));
   if (flags & LOG_EVENT_THREAD_SPECIFIC_F)
     fprintf(file,"SET @@session.pseudo_thread_id=%lu;\n",(ulong)thread_id);
+  /*
+    Now the session variables;
+    it's more efficient to pass SQL_MODE as a number instead of a
+    comma-separated list.
+    FOREIGN_KEY_CHECKS, SQL_AUTO_IS_NULL, UNIQUE_CHECKS are session-only
+    variables (they have no global version; they're not listed in sql_class.h),
+    The tests below work for pure binlogs or pure relay logs. Won't work for
+    mixed relay logs but we don't create mixed relay logs (that is, there is no
+    relay log with a format change except within the 3 first events, which
+    mysqlbinlog handles gracefully). So this code should always be good.
+  */
+
+  uint32 tmp;
+
+  if (likely(flags2_inited)) /* likely as this will mainly read 5.0 logs */
+  {
+    /* tmp is a bitmask of bits which have changed. */
+    if (likely(last_event_info->flags2_inited)) 
+      /* All bits which have changed */
+      tmp= (last_event_info->flags2) ^ flags2;
+    else /* that's the first Query event we read */
+    {
+      last_event_info->flags2_inited= 1;
+      tmp= ~((uint32)0); /* all bits have changed */
+    }
+
+    if (unlikely(tmp)) /* some bits have changed */
+    {
+      bool need_comma= 0;
+      fprintf(file, "SET ");
+      print_set_option(file, tmp, OPTION_NO_FOREIGN_KEY_CHECKS, ~flags2,
+                   "@@session.foreign_key_checks", &need_comma);
+      print_set_option(file, tmp, OPTION_AUTO_IS_NULL, flags2,
+                   "@@session.sql_auto_is_null", &need_comma);
+      print_set_option(file, tmp, OPTION_RELAXED_UNIQUE_CHECKS, ~flags2,
+                   "@@session.unique_checks", &need_comma);
+      fprintf(file,";\n");
+      last_event_info->flags2= flags2;
+    }
+  }
+
+  /*
+    If flags2_inited==0, this is an event from 3.23 or 4.0; nothing to print
+    (remember we don't produce mixed relay logs so there cannot be 5.0 events
+    before that one so there is nothing to reset).
+  */
+
+  if (likely(sql_mode_inited))
+  {
+    if (unlikely(!last_event_info->sql_mode_inited)) /* first Query event */
+    {
+      last_event_info->sql_mode_inited= 1;
+      last_event_info->sql_mode= ~sql_mode; // force a difference to force write
+    }
+    if (unlikely(last_event_info->sql_mode != sql_mode))
+    {
+      fprintf(file,"SET @@session.sql_mode=%lu;\n",(ulong)sql_mode);
+      last_event_info->sql_mode= sql_mode;
+    }
+  }
+
   my_fwrite(file, (byte*) query, q_len, MYF(MY_NABP | MY_WME));
   fprintf(file, ";\n");
 }
@@ -977,11 +1359,24 @@ void Query_log_event::print(FILE* file, bool short_form, char* last_db)
 int Query_log_event::exec_event(struct st_relay_log_info* rli)
 {
   int expected_error,actual_error= 0;
+  /*
+    Colleagues: please never free(thd->catalog) in MySQL. This would lead to
+    bugs as here thd->catalog is a part of an alloced block, not an entire
+    alloced block (see Query_log_event::exec_event()). Same for thd->db.
+    Thank you.
+  */
+  thd->catalog= (char*) catalog;
   thd->db= (char*) rewrite_db(db);
 
   /*
-    InnoDB internally stores the master log position it has processed so far;
-    position to store is of the END of the current log event.
+    InnoDB internally stores the master log position it has executed so far,
+    i.e. the position just after the COMMIT event.
+    When InnoDB will want to store, the positions in rli won't have
+    been updated yet, so group_master_log_* will point to old BEGIN
+    and event_master_log* will point to the beginning of current COMMIT.
+    But log_pos of the COMMIT Query event is what we want, i.e. the pos of the
+    END of the current log event (COMMIT). We save it in rli so that InnoDB can
+    access it.
   */
 #if MYSQL_VERSION_ID < 50000
   rli->future_group_master_log_pos= log_pos + get_event_len() -
@@ -1001,9 +1396,33 @@ int Query_log_event::exec_event(struct st_relay_log_info* rli)
     thd->query_id = query_id++;
     VOID(pthread_mutex_unlock(&LOCK_thread_count));
     thd->variables.pseudo_thread_id= thread_id;		// for temp tables
-
     mysql_log.write(thd,COM_QUERY,"%s",thd->query);
     DBUG_PRINT("query",("%s",thd->query));
+
+    if (flags2_inited)
+      /*
+        all bits of thd->options which are 1 in OPTIONS_WRITTEN_TO_BIN_LOG must
+        take their value from flags2.
+      */
+      thd->options= flags2|(thd->options & ~(ulong)OPTIONS_WRITTEN_TO_BIN_LOG);
+    /*
+      else, we are in a 3.23/4.0 binlog; we previously received a
+      Rotate_log_event which reset thd->options and sql_mode, so nothing to do.
+    */
+    
+    /*
+      We do not replicate IGNORE_DIR_IN_CREATE. That is, if the master is a
+      slave which runs with SQL_MODE=IGNORE_DIR_IN_CREATE, this should not
+      force us to ignore the dir too. Imagine you are a ring of machines, and
+      one has a disk problem so that you temporarily need IGNORE_DIR_IN_CREATE
+      on this machine; you don't want it to propagate elsewhere (you don't want
+      all slaves to start ignoring the dirs).
+    */
+    if (sql_mode_inited)
+      thd->variables.sql_mode=
+	(ulong) ((thd->variables.sql_mode & MODE_NO_DIR_IN_CREATE) |
+		 (sql_mode & ~(ulong) MODE_NO_DIR_IN_CREATE));
+    
     if (ignored_error_code((expected_error= error_code)) ||
 	!check_expected_error(thd,rli,expected_error))
       mysql_parse(thd, thd->query, q_len);
@@ -1031,27 +1450,27 @@ START SLAVE; . Query: '%s'", expected_error, thd->query);
       }
       goto end;
     }
-
+ 
     /*
       If we expected a non-zero error code, and we don't get the same error
       code, and none of them should be ignored.
     */
     DBUG_PRINT("info",("expected_error: %d  last_errno: %d",
-		       expected_error, thd->net.last_errno));
+ 		       expected_error, thd->net.last_errno));
     if ((expected_error != (actual_error= thd->net.last_errno)) &&
-	expected_error &&
-	!ignored_error_code(actual_error) &&
-	!ignored_error_code(expected_error))
+ 	expected_error &&
+ 	!ignored_error_code(actual_error) &&
+ 	!ignored_error_code(expected_error))
     {
       slave_print_error(rli, 0,
-			"\
+ 			"\
 Query caused different errors on master and slave. \
 Error on master: '%s' (%d), Error on slave: '%s' (%d). \
 Default database: '%s'. Query: '%s'",
-			ER_SAFE(expected_error),
-			expected_error,
-			actual_error ? thd->net.last_error: "no error",
-			actual_error,
+ 			ER_SAFE(expected_error),
+ 			expected_error,
+ 			actual_error ? thd->net.last_error: "no error",
+ 			actual_error,
 			print_slave_db_safe(db), query);
       thd->query_error= 1;
     }
@@ -1059,7 +1478,7 @@ Default database: '%s'. Query: '%s'",
       If we get the same error code as expected, or they should be ignored. 
     */
     else if (expected_error == actual_error ||
-	     ignored_error_code(actual_error))
+ 	     ignored_error_code(actual_error))
     {
       DBUG_PRINT("info",("error ignored"));
       clear_all_errors(thd, rli);
@@ -1071,15 +1490,47 @@ Default database: '%s'. Query: '%s'",
     {
       slave_print_error(rli,actual_error,
 			"Error '%s' on query. Default database: '%s'. Query: '%s'",
-			(actual_error ? thd->net.last_error :
-			 "unexpected success or fatal error"),
+ 			(actual_error ? thd->net.last_error :
+ 			 "unexpected success or fatal error"),
 			print_slave_db_safe(db), query);
       thd->query_error= 1;
     }
+
+    /*
+      TODO: compare the values of "affected rows" around here. Something
+      like:
+      if ((uint32) affected_in_event != (uint32) affected_on_slave)
+      {
+      sql_print_error("Slave: did not get the expected number of affected \
+      rows running query from master - expected %d, got %d (this numbers \
+      should have matched modulo 4294967296).", 0, ...);
+      thd->query_error = 1;
+      }
+      We may also want an option to tell the slave to ignore "affected"
+      mismatch. This mismatch could be implemented with a new ER_ code, and
+      to ignore it you would use --slave-skip-errors...
+        
+      To do the comparison we need to know the value of "affected" which the
+      above mysql_parse() computed. And we need to know the value of
+      "affected" in the master's binlog. Both will be implemented later. The
+      important thing is that we now have the format ready to log the values
+      of "affected" in the binlog. So we can release 5.0.0 before effectively
+      logging "affected" and effectively comparing it.
+    */
   } /* End of if (db_ok(... */
 
 end:
   VOID(pthread_mutex_lock(&LOCK_thread_count));
+  /*
+    Probably we have set thd->query, thd->db, thd->catalog to point to places
+    in the data_buf of this event. Now the event is going to be deleted
+    probably, so data_buf will be freed, so the thd->... listed above will be
+    pointers to freed memory. 
+    So we must set them to 0, so that those bad pointers values are not later
+    used. Note that "cleanup" queries (automatic DO RELEASE_LOCK() and DROP
+    TEMPORARY TABLE don't suffer from these assignments to 0 as DROP TEMPORARY
+    TABLE uses the db.table syntax).
+  */
   thd->db= 0;	                        // prevent db from being freed
   thd->query= 0;			// just to be sure
   thd->query_length= 0;
@@ -1096,15 +1547,23 @@ end:
 
 
 /**************************************************************************
-	Start_log_event methods
+	Start_log_event_v3 methods
 **************************************************************************/
 
+#ifndef MYSQL_CLIENT
+Start_log_event_v3::Start_log_event_v3() :Log_event(), binlog_version(BINLOG_VERSION)
+{
+  created= when;
+  memcpy(server_version, ::server_version, ST_SERVER_VER_LEN);
+}
+#endif
+
 /*
-  Start_log_event::pack_info()
+  Start_log_event_v3::pack_info()
 */
 
 #if defined(HAVE_REPLICATION) && !defined(MYSQL_CLIENT)
-void Start_log_event::pack_info(Protocol *protocol)
+void Start_log_event_v3::pack_info(Protocol *protocol)
 {
   char buf[12 + ST_SERVER_VER_LEN + 14 + 22], *pos;
   pos= strmov(buf, "Server ver: ");
@@ -1117,35 +1576,43 @@ void Start_log_event::pack_info(Protocol *protocol)
 
 
 /*
-  Start_log_event::print()
+  Start_log_event_v3::print()
 */
 
 #ifdef MYSQL_CLIENT
-void Start_log_event::print(FILE* file, bool short_form, char* last_db)
+void Start_log_event_v3::print(FILE* file, bool short_form, LAST_EVENT_INFO* last_event_info)
 {
-  if (short_form)
-    return;
-
-  print_header(file);
-  fprintf(file, "\tStart: binlog v %d, server v %s created ", binlog_version,
-	  server_version);
-  print_timestamp(file);
-  if (created)
-    fprintf(file," at startup");
-  fputc('\n', file);
+  if (!short_form)
+  {
+    print_header(file);
+    fprintf(file, "\tStart: binlog v %d, server v %s created ", binlog_version,
+            server_version);
+    print_timestamp(file);
+    if (created)
+      fprintf(file," at startup");
+    fputc('\n', file);
+  }
+#ifdef WHEN_WE_HAVE_THE_RESET_CONNECTION_SQL_COMMAND
+  /*
+    This is for mysqlbinlog: like in replication, we want to delete the stale
+    tmp files left by an unclean shutdown of mysqld (temporary tables). Probably
+    this can be done with RESET CONNECTION (syntax to be defined).
+  */
+  fprintf(file,"RESET CONNECTION;\n");
+#endif
   fflush(file);
 }
 #endif /* MYSQL_CLIENT */
 
 /*
-  Start_log_event::Start_log_event()
+  Start_log_event_v3::Start_log_event_v3()
 */
 
-Start_log_event::Start_log_event(const char* buf,
-				 bool old_format)
-  :Log_event(buf, old_format)
+Start_log_event_v3::Start_log_event_v3(const char* buf,
+                                       const Format_description_log_event* description_event)
+  :Log_event(buf, description_event)
 {
-  buf += (old_format) ? OLD_HEADER_LEN : LOG_EVENT_HEADER_LEN;
+  buf += description_event->common_header_len;
   binlog_version = uint2korr(buf+ST_BINLOG_VER_OFFSET);
   memcpy(server_version, buf+ST_SERVER_VER_OFFSET,
 	 ST_SERVER_VER_LEN);
@@ -1154,12 +1621,12 @@ Start_log_event::Start_log_event(const char* buf,
 
 
 /*
-  Start_log_event::write_data()
+  Start_log_event_v3::write_data()
 */
 
-int Start_log_event::write_data(IO_CACHE* file)
+int Start_log_event_v3::write_data(IO_CACHE* file)
 {
-  char buff[START_HEADER_LEN];
+  char buff[START_V3_HEADER_LEN];
   int2store(buff + ST_BINLOG_VER_OFFSET,binlog_version);
   memcpy(buff + ST_SERVER_VER_OFFSET,server_version,ST_SERVER_VER_LEN);
   int4store(buff + ST_CREATED_OFFSET,created);
@@ -1167,7 +1634,7 @@ int Start_log_event::write_data(IO_CACHE* file)
 }
 
 /*
-  Start_log_event::exec_event()
+  Start_log_event_v3::exec_event()
 
   The master started
 
@@ -1186,23 +1653,29 @@ int Start_log_event::write_data(IO_CACHE* file)
 */
 
 #if defined(HAVE_REPLICATION) && !defined(MYSQL_CLIENT)
-int Start_log_event::exec_event(struct st_relay_log_info* rli)
+int Start_log_event_v3::exec_event(struct st_relay_log_info* rli)
 {
-  DBUG_ENTER("Start_log_event::exec_event");
-
+  DBUG_ENTER("Start_log_event_v3::exec_event");
   /*
     If the I/O thread has not started, mi->old_format is BINLOG_FORMAT_CURRENT
     (that's what the MASTER_INFO constructor does), so the test below is not
     perfect at all.
   */
-  switch (rli->mi->old_format) {
-  case BINLOG_FORMAT_CURRENT:
-    /* 
-       This is 4.x, so a Start_log_event is only at master startup,
-       so we are sure the master has restarted and cleared his temp tables.
+  switch (rli->relay_log.description_event_for_exec->binlog_version)
+  {
+  case 3:
+  case 4:
+    /*
+      This can either be 4.x (then a Start_log_event_v3 is only at master
+      startup so we are sure the master has restarted and cleared his temp
+      tables; the event always has 'created'>0) or 5.0 (then we have to test
+      'created').
     */
-    close_temporary_tables(thd);
-    cleanup_load_tmpdir();
+    if (created)
+    {
+      close_temporary_tables(thd);
+      cleanup_load_tmpdir();
+    }
     /*
       As a transaction NEVER spans on 2 or more binlogs:
       if we have an active transaction at this point, the master died while
@@ -1210,8 +1683,12 @@ int Start_log_event::exec_event(struct st_relay_log_info* rli)
       cache to the binlog. As the write was started, the transaction had been
       committed on the master, so we lack of information to replay this
       transaction on the slave; all we can do is stop with error.
+      Note: this event could be sent by the master to inform us of the format
+      of its binlog; in other words maybe it is not at its original place when
+      it comes to us; we'll know this by checking log_pos ("artificial" events
+      have log_pos == 0).
     */
-    if (thd->options & OPTION_BEGIN)
+    if (log_pos && (thd->options & OPTION_BEGIN))
     {
       slave_print_error(rli, 0, "\
 Rolling back unfinished transaction (no COMMIT or ROLLBACK) from relay log. \
@@ -1225,33 +1702,270 @@ binary log.");
        Now the older formats; in that case load_tmpdir is cleaned up by the I/O
        thread.
     */
-  case BINLOG_FORMAT_323_LESS_57:
-    /*
-      Cannot distinguish a Start_log_event generated at master startup and
-      one generated by master FLUSH LOGS, so cannot be sure temp tables
-      have to be dropped. So do nothing.
-    */
-    break;
-  case BINLOG_FORMAT_323_GEQ_57:
+  case 1:
+    if (strncmp(rli->relay_log.description_event_for_exec->server_version, 
+                "3.23.57",7) >= 0 && created)
+    {
+      /*
+        Can distinguish, based on the value of 'created': this event was
+        generated at master startup.
+      */
+      close_temporary_tables(thd);
+    }
     /*
-      Can distinguish, based on the value of 'created',
-      which was generated at master startup.
+      Otherwise, can't distinguish a Start_log_event generated at master startup
+      and one generated by master FLUSH LOGS, so cannot be sure temp tables have
+      to be dropped. So do nothing.
     */
-    if (created)
-      close_temporary_tables(thd);
     break;
   default:
     /* this case is impossible */
-    return 1;
+    DBUG_RETURN(1);
   }
-
   DBUG_RETURN(Log_event::exec_event(rli));
 }
 #endif /* defined(HAVE_REPLICATION) && !defined(MYSQL_CLIENT) */
 
-/**************************************************************************
-	Load_log_event methods
-**************************************************************************/
+/***************************************************************************
+       Format_description_log_event methods
+****************************************************************************/
+
+/*
+  Format_description_log_event 1st ctor.
+
+  SYNOPSIS
+    Format_description_log_event::Format_description_log_event
+      binlog_version          	  the binlog version for which we want to build
+                                  an event. Can be 1 (=MySQL 3.23), 3 (=4.0.x
+                                  x>=2 and 4.1) or 4 (MySQL 5.0). Note that the
+                                  old 4.0 (binlog version 2) is not supported;
+                                  it should not be used for replication with
+                                  5.0.
+
+  DESCRIPTION
+    Ctor. Can be used to create the event to write to the binary log (when the
+    server starts or when FLUSH LOGS), or to create artificial events to parse
+    binlogs from MySQL 3.23 or 4.x.
+    When in a client, only the 2nd use is possible.
+
+  TODO
+    Update this code with the new event for LOAD DATA, once they are pushed (in
+    4.1 or 5.0). If it's in 5.0, only the "case 4" block should be updated.
+
+*/
+
+Format_description_log_event::Format_description_log_event(uint8 binlog_ver,
+                                                           const char* server_ver) 
+  : Start_log_event_v3()
+{
+  created= when;
+  binlog_version= binlog_ver;
+  switch(binlog_ver)
+  {
+  case 4: /* MySQL 5.0 */
+    memcpy(server_version, ::server_version, ST_SERVER_VER_LEN);
+    common_header_len= LOG_EVENT_HEADER_LEN;
+    number_of_event_types= LOG_EVENT_TYPES;
+    /* we'll catch my_malloc() error in is_valid() */
+    post_header_len=(uint8*) my_malloc(number_of_event_types*sizeof(uint8),
+                                       MYF(0)); 
+    /*
+      This long list of assignments is not beautiful, but I see no way to
+      make it nicer, as the right members are #defines, not array members, so
+      it's impossible to write a loop.
+    */
+    if (post_header_len)
+    {
+      post_header_len[START_EVENT_V3-1]= START_V3_HEADER_LEN;
+      post_header_len[QUERY_EVENT-1]= QUERY_HEADER_LEN;
+      post_header_len[STOP_EVENT-1]= 0;
+      post_header_len[ROTATE_EVENT-1]= ROTATE_HEADER_LEN;
+      post_header_len[INTVAR_EVENT-1]= 0;
+      post_header_len[LOAD_EVENT-1]= LOAD_HEADER_LEN;
+      post_header_len[SLAVE_EVENT-1]= 0;
+      post_header_len[CREATE_FILE_EVENT-1]= CREATE_FILE_HEADER_LEN;
+      post_header_len[APPEND_BLOCK_EVENT-1]= APPEND_BLOCK_HEADER_LEN;
+      post_header_len[EXEC_LOAD_EVENT-1]= EXEC_LOAD_HEADER_LEN;
+      post_header_len[DELETE_FILE_EVENT-1]= DELETE_FILE_HEADER_LEN;
+      post_header_len[NEW_LOAD_EVENT-1]= post_header_len[LOAD_EVENT-1];
+      post_header_len[RAND_EVENT-1]= 0;
+      post_header_len[USER_VAR_EVENT-1]= 0;
+      post_header_len[FORMAT_DESCRIPTION_EVENT-1]= FORMAT_DESCRIPTION_HEADER_LEN;
+    }
+    break;
+
+  case 1: /* 3.23 */
+  case 3: /* 4.0.x x>=2 */
+    /*
+      We build an artificial (i.e. not sent by the master) event, which
+      describes what those old master versions send.
+    */
+    if (binlog_ver==1)
+      strmov(server_version, server_ver ? server_ver : "3.23");
+    else
+      strmov(server_version, server_ver ? server_ver : "4.0");
+    common_header_len= binlog_ver==1 ? OLD_HEADER_LEN :
+      LOG_EVENT_MINIMAL_HEADER_LEN;  
+    /*
+      The first new event in binlog version 4 is Format_desc. So any event type
+      after that does not exist in older versions. We use the events known by
+      version 3, even if version 1 had only a subset of them (this is not a
+      problem: it uses a few bytes for nothing but unifies code; it does not
+      make the slave detect less corruptions).
+    */
+    number_of_event_types= FORMAT_DESCRIPTION_EVENT - 1;
+    post_header_len=(uint8*) my_malloc(number_of_event_types*sizeof(uint8),
+                                       MYF(0)); 
+    if (post_header_len)
+    {
+      post_header_len[START_EVENT_V3-1]= START_V3_HEADER_LEN;
+      post_header_len[QUERY_EVENT-1]= QUERY_HEADER_MINIMAL_LEN;
+      post_header_len[STOP_EVENT-1]= 0;
+      post_header_len[ROTATE_EVENT-1]= (binlog_ver==1) ? 0 : ROTATE_HEADER_LEN;
+      post_header_len[INTVAR_EVENT-1]= 0;
+      post_header_len[LOAD_EVENT-1]= LOAD_HEADER_LEN;
+      post_header_len[SLAVE_EVENT-1]= 0;
+      post_header_len[CREATE_FILE_EVENT-1]= CREATE_FILE_HEADER_LEN;
+      post_header_len[APPEND_BLOCK_EVENT-1]= APPEND_BLOCK_HEADER_LEN;
+      post_header_len[EXEC_LOAD_EVENT-1]= EXEC_LOAD_HEADER_LEN;
+      post_header_len[DELETE_FILE_EVENT-1]= DELETE_FILE_HEADER_LEN;
+      post_header_len[NEW_LOAD_EVENT-1]= post_header_len[LOAD_EVENT-1];
+      post_header_len[RAND_EVENT-1]= 0;
+      post_header_len[USER_VAR_EVENT-1]= 0;
+    }
+    break;
+  default: /* Includes binlog version 2 i.e. 4.0.x x<=1 */
+    post_header_len= 0; /* will make is_valid() fail */
+    break;
+  }
+}
+
+Format_description_log_event::Format_description_log_event(const char* buf,
+                                                           uint event_len,
+                                                           const
+                                                           Format_description_log_event*
+                                                           description_event) 
+  /*
+    The problem with this constructor is that the fixed header may have a length
+    different from this version, but we don't know this length as we have not 
+    read the Format_description_log_event which says it, yet. This length is in
+    the post-header of the event, but we don't know where the post-header
+    starts. 
+    So this type of event HAS to:
+    - either have the header's length at the beginning (in the header, at a
+    fixed position which will never be changed), not in the post-header. That
+    would make the header be "shifted" compared to other events.
+    - or have a header of size LOG_EVENT_MINIMAL_HEADER_LEN (19), in all future
+    versions, so that we know for sure.
+    I (Guilhem) chose the 2nd solution. Rotate has the same constraint (because
+    it is sent before Format_description_log_event).
+  */
+ 
+  :Start_log_event_v3(buf, description_event)
+{
+  DBUG_ENTER("Format_description_log_event::Format_description_log_event(char*,...)");
+  buf+= LOG_EVENT_MINIMAL_HEADER_LEN;
+  if ((common_header_len=buf[ST_COMMON_HEADER_LEN_OFFSET]) < OLD_HEADER_LEN)
+    DBUG_VOID_RETURN; /* sanity check */
+  number_of_event_types=
+    event_len-(LOG_EVENT_MINIMAL_HEADER_LEN+ST_COMMON_HEADER_LEN_OFFSET+1);
+  DBUG_PRINT("info", ("common_header_len=%d number_of_event_types=%d",
+                      common_header_len, number_of_event_types)); 
+  /* If alloc fails, we'll detect it in is_valid() */
+  post_header_len= (uint8*) my_memdup((byte*)buf+ST_COMMON_HEADER_LEN_OFFSET+1,
+                                      number_of_event_types*
+                                      sizeof(*post_header_len),
+                                      MYF(0));  
+  DBUG_VOID_RETURN;
+}
+
+int Format_description_log_event::write_data(IO_CACHE* file)
+{
+  /*
+    We don't call Start_log_event_v3::write_data() because this would make 2
+    my_b_safe_write().
+  */
+  char buff[FORMAT_DESCRIPTION_HEADER_LEN];
+  int2store(buff + ST_BINLOG_VER_OFFSET,binlog_version);
+  memcpy(buff + ST_SERVER_VER_OFFSET,server_version,ST_SERVER_VER_LEN);
+  int4store(buff + ST_CREATED_OFFSET,created);
+  buff[ST_COMMON_HEADER_LEN_OFFSET]= LOG_EVENT_HEADER_LEN;    
+  memcpy(buff+ST_COMMON_HEADER_LEN_OFFSET+1, (byte*) post_header_len,
+         LOG_EVENT_TYPES);
+  return (my_b_safe_write(file, (byte*) buff, sizeof(buff)) ? -1 : 0);
+}
+  
+/*
+  SYNOPSIS
+    Format_description_log_event::exec_event()
+  
+  IMPLEMENTATION
+    Save the information which describes the binlog's format, to be able to
+    read all coming events.
+    Call Start_log_event_v3::exec_event().
+*/
+
+#if defined(HAVE_REPLICATION) && !defined(MYSQL_CLIENT)
+int Format_description_log_event::exec_event(struct st_relay_log_info* rli)
+{
+  DBUG_ENTER("Format_description_log_event::exec_event");
+  
+  /* save the information describing this binlog */  
+  delete rli->relay_log.description_event_for_exec;
+  rli->relay_log.description_event_for_exec= this;
+
+  /*
+    If this event comes from ourselves, there is no cleaning task to perform,
+    we don't call Start_log_event_v3::exec_event() (this was just to update the
+    log's description event).
+  */
+  if (server_id == (uint32) ::server_id)
+  {
+    /*
+      Do not modify rli->group_master_log_pos, as this event did not exist on
+      the master. That is, just update the *relay log* coordinates; this is done
+      by passing log_pos=0 to inc_group_relay_log_pos, like we do in
+      Stop_log_event::exec_event().
+      If in a transaction, don't touch group_* coordinates.
+    */
+    if (thd->options & OPTION_BEGIN)
+      rli->inc_event_relay_log_pos();
+    else
+    {
+      rli->inc_group_relay_log_pos(0);
+      flush_relay_log_info(rli);
+    }
+    DBUG_RETURN(0);
+  }
+
+  /*
+    If the event was not requested by the slave i.e. the master sent it while
+    the slave asked for a position >4, the event will make
+    rli->group_master_log_pos advance. Say that the slave asked for position
+    1000, and the Format_desc event's end is 95. Then in the beginning of
+    replication rli->group_master_log_pos will be 0, then 95, then jump to first
+    really asked event (which is >95). So this is ok.
+  */
+  DBUG_RETURN(Start_log_event_v3::exec_event(rli));
+}
+#endif
+
+  /**************************************************************************
+  	Load_log_event methods
+   General note about Load_log_event: the binlogging of LOAD DATA INFILE is
+   going to be changed in 5.0 (or maybe in 4.1; not decided yet).
+   However, the 5.0 slave could still have to read such events (from a 4.x
+   master), convert them (which just means maybe expand the header, when 5.0
+   servers have a UID in events) (remember that whatever is after the header
+   will be like in 4.x, as this event's format is not modified in 5.0 as we
+   will use new types of events to log the new LOAD DATA INFILE features).
+   To be able to read/convert, we just need to not assume that the common
+   header is of length LOG_EVENT_HEADER_LEN (we must use the description
+   event).
+   Note that I (Guilhem) manually tested replication of a big LOAD DATA INFILE
+   between 3.23 and 5.0, and between 4.0 and 5.0, and it works fine (and the
+   positions displayed in SHOW SLAVE STATUS then are fine too).
+  **************************************************************************/
 
 /*
   Load_log_event::pack_info()
@@ -1486,15 +2200,25 @@ Load_log_event::Load_log_event(THD *thd_arg, sql_exchange *ex,
     constructed event.
 */
 
-Load_log_event::Load_log_event(const char *buf, int event_len,
-			       bool old_format)
-  :Log_event(buf, old_format), num_fields(0), fields(0),
-   field_lens(0), field_block_len(0),
+Load_log_event::Load_log_event(const char *buf, uint event_len,
+                               const Format_description_log_event *description_event)
+  :Log_event(buf, description_event), num_fields(0), fields(0),
+   field_lens(0),field_block_len(0),
    table_name(0), db(0), fname(0), local_fname(FALSE)
 {
   DBUG_ENTER("Load_log_event");
-  if (event_len) // derived class, will call copy_log_event() itself
-    copy_log_event(buf, event_len, old_format);
+  /*
+    I (Guilhem) manually tested replication of LOAD DATA INFILE for 3.23->5.0,
+    4.0->5.0 and 5.0->5.0 and it works.
+  */
+  if (event_len)
+    copy_log_event(buf, event_len,
+                   ((buf[EVENT_TYPE_OFFSET] == LOAD_EVENT) ?
+                    LOAD_HEADER_LEN + 
+                    description_event->common_header_len :
+                    LOAD_HEADER_LEN + LOG_EVENT_HEADER_LEN),
+                   description_event);
+  /* otherwise it's a derived class, will call copy_log_event() itself */
   DBUG_VOID_RETURN;
 }
 
@@ -1504,14 +2228,14 @@ Load_log_event::Load_log_event(const char *buf, int event_len,
 */
 
 int Load_log_event::copy_log_event(const char *buf, ulong event_len,
-				   bool old_format)
+                                   int body_offset,
+                                   const Format_description_log_event *description_event)
 {
+  DBUG_ENTER("Load_log_event::copy_log_event");
   uint data_len;
   char* buf_end = (char*)buf + event_len;
-  uint header_len= old_format ? OLD_HEADER_LEN : LOG_EVENT_HEADER_LEN;
-  const char* data_head = buf + header_len;
-  DBUG_ENTER("Load_log_event::copy_log_event");
-
+  /* this is the beginning of the post-header */
+  const char* data_head = buf + description_event->common_header_len;
   slave_proxy_id= thread_id= uint4korr(data_head + L_THREAD_ID_OFFSET);
   exec_time = uint4korr(data_head + L_EXEC_TIME_OFFSET);
   skip_lines = uint4korr(data_head + L_SKIP_LINES_OFFSET);
@@ -1519,21 +2243,17 @@ int Load_log_event::copy_log_event(const char *buf, ulong event_len,
   db_len = (uint)data_head[L_DB_LEN_OFFSET];
   num_fields = uint4korr(data_head + L_NUM_FIELDS_OFFSET);
 	  
-  int body_offset = ((buf[EVENT_TYPE_OFFSET] == LOAD_EVENT) ?
-		     LOAD_HEADER_LEN + header_len :
-		     get_data_body_offset());
-  
   if ((int) event_len < body_offset)
     DBUG_RETURN(1);
   /*
     Sql_ex.init() on success returns the pointer to the first byte after
     the sql_ex structure, which is the start of field lengths array.
   */
-  if (!(field_lens=(uchar*)sql_ex.init((char*)buf + body_offset,
-				       buf_end,
-				       buf[EVENT_TYPE_OFFSET] != LOAD_EVENT)))
+  if (!(field_lens= (uchar*)sql_ex.init((char*)buf + body_offset,
+                                        buf_end,
+                                        buf[EVENT_TYPE_OFFSET] != LOAD_EVENT)))
     DBUG_RETURN(1);
-
+  
   data_len = event_len - body_offset;
   if (num_fields > data_len) // simple sanity check against corruption
     DBUG_RETURN(1);
@@ -1546,6 +2266,12 @@ int Load_log_event::copy_log_event(const char *buf, ulong event_len,
   fname = db + db_len + 1;
   fname_len = strlen(fname);
   // null termination is accomplished by the caller doing buf[event_len]=0
+
+  /*
+    In 5.0 this event will have the same format, as we are planning to log LOAD
+    DATA INFILE in a completely different way (as a plain-text query) since 4.1
+    or 5.0 (Dmitri's WL#874)
+  */
   DBUG_RETURN(0);
 }
 
@@ -1555,13 +2281,13 @@ int Load_log_event::copy_log_event(const char *buf, ulong event_len,
 */
 
 #ifdef MYSQL_CLIENT
-void Load_log_event::print(FILE* file, bool short_form, char* last_db)
+void Load_log_event::print(FILE* file, bool short_form, LAST_EVENT_INFO* last_event_info)
 {
-  print(file, short_form, last_db, 0);
+  print(file, short_form, last_event_info, 0);
 }
 
 
-void Load_log_event::print(FILE* file, bool short_form, char* last_db,
+void Load_log_event::print(FILE* file, bool short_form, LAST_EVENT_INFO* last_event_info,
 			   bool commented)
 {
   DBUG_ENTER("Load_log_event::print");
@@ -1573,7 +2299,7 @@ void Load_log_event::print(FILE* file, bool short_form, char* last_db,
   }
 
   bool different_db= 1;
-  if (db && last_db)
+  if (db && last_event_info->db)
   {
     /*
       If the database is different from the one of the previous statement, we
@@ -1581,9 +2307,9 @@ void Load_log_event::print(FILE* file, bool short_form, char* last_db,
       But if commented, the "use" is going to be commented so we should not
       update the last_db.
     */
-    if ((different_db= memcmp(last_db, db, db_len + 1)) &&
+    if ((different_db= memcmp(last_event_info->db, db, db_len + 1)) &&
         !commented)
-      memcpy(last_db, db, db_len + 1);
+      memcpy(last_event_info->db, db, db_len + 1);
   }
   
   if (db && db[0] && different_db)
@@ -1704,6 +2430,7 @@ int Load_log_event::exec_event(NET* net, struct st_relay_log_info* rli,
   clear_all_errors(thd, rli);
   if (!use_rli_only_for_errors)
   {
+    /* Saved for InnoDB, see comment in Query_log_event::exec_event() */
 #if MYSQL_VERSION_ID < 50000
     rli->future_group_master_log_pos= log_pos + get_event_len() -
       (rli->mi->old_format ? (LOG_EVENT_HEADER_LEN - OLD_HEADER_LEN) : 0);
@@ -1711,8 +2438,8 @@ int Load_log_event::exec_event(NET* net, struct st_relay_log_info* rli,
     rli->future_group_master_log_pos= log_pos;
 #endif
   }
-
-  /*
+ 
+   /*
     We test replicate_*_db rules. Note that we have already prepared the file
     to load, even if we are going to ignore and delete it now. So it is
     possible that we did a lot of disk writes for nothing. In other words, a
@@ -1830,7 +2557,8 @@ Slave: load data infile on table '%s' at log position %s in log \
   }
 	    
   thd->net.vio = 0; 
-  thd->db= 0;					// prevent db from being freed
+  /* Same reason as in Query_log_event::exec_event() */
+  thd->db= thd->catalog= 0; 
   close_thread_tables(thd);
   if (thd->query_error)
   {
@@ -1892,17 +2620,17 @@ void Rotate_log_event::pack_info(Protocol *protocol)
 */
 
 #ifdef MYSQL_CLIENT
-void Rotate_log_event::print(FILE* file, bool short_form, char* last_db)
+void Rotate_log_event::print(FILE* file, bool short_form, LAST_EVENT_INFO* last_event_info)
 {
   char buf[22];
+
   if (short_form)
     return;
-
   print_header(file);
   fprintf(file, "\tRotate to ");
   if (new_log_ident)
     my_fwrite(file, (byte*) new_log_ident, (uint)ident_len, 
-	      MYF(MY_NABP | MY_WME));
+              MYF(MY_NABP | MY_WME));
   fprintf(file, "  pos: %s", llstr(pos, buf));
   fputc('\n', file);
   fflush(file);
@@ -1914,31 +2642,22 @@ void Rotate_log_event::print(FILE* file, bool short_form, char* last_db)
   Rotate_log_event::Rotate_log_event()
 */
 
-Rotate_log_event::Rotate_log_event(const char* buf, int event_len,
-				   bool old_format)
-  :Log_event(buf, old_format),new_log_ident(NULL),alloced(0)
+Rotate_log_event::Rotate_log_event(const char* buf, uint event_len,
+                                   const Format_description_log_event* description_event)
+  :Log_event(buf, description_event) ,new_log_ident(NULL),alloced(0)
 {
+  DBUG_ENTER("Rotate_log_event::Rotate_log_event(char*,...)");
   // The caller will ensure that event_len is what we have at EVENT_LEN_OFFSET
-  int header_size = (old_format) ? OLD_HEADER_LEN : LOG_EVENT_HEADER_LEN;
+  uint8 header_size= description_event->common_header_len;
+  uint8 post_header_len= description_event->post_header_len[ROTATE_EVENT-1];
   uint ident_offset;
-  DBUG_ENTER("Rotate_log_event");
-
   if (event_len < header_size)
     DBUG_VOID_RETURN;
-
   buf += header_size;
-  if (old_format)
-  {
-    ident_len = (uint)(event_len - OLD_HEADER_LEN);
-    pos = 4;
-    ident_offset = 0;
-  }
-  else
-  {
-    ident_len = (uint)(event_len - ROTATE_EVENT_OVERHEAD);
-    pos = uint8korr(buf + R_POS_OFFSET);
-    ident_offset = ROTATE_HEADER_LEN;
-  }
+  pos = post_header_len ? uint8korr(buf + R_POS_OFFSET) : 4;
+  ident_len = (uint)(event_len -
+                     (header_size+post_header_len)); 
+  ident_offset = post_header_len; 
   set_if_smaller(ident_len,FN_REFLEN-1);
   if (!(new_log_ident= my_strdup_with_length((byte*) buf +
 					     ident_offset,
@@ -1966,14 +2685,15 @@ int Rotate_log_event::write_data(IO_CACHE* file)
 /*
   Rotate_log_event::exec_event()
 
-  Got a rotate log even from the master
+  Got a rotate log event from the master
 
   IMPLEMENTATION
     This is mainly used so that we can later figure out the logname and
     position for the master.
 
-    We can't rotate the slave as this will cause infinitive rotations
+    We can't rotate the slave's BINlog as this will cause infinitive rotations
     in a A -> B -> A setup.
+    The NOTES below is a wrong comment which will disappear when 4.1 is merged.
 
   RETURN VALUES
     0	ok
@@ -1985,7 +2705,7 @@ int Rotate_log_event::exec_event(struct st_relay_log_info* rli)
   DBUG_ENTER("Rotate_log_event::exec_event");
 
   pthread_mutex_lock(&rli->data_lock);
-  rli->event_relay_log_pos += get_event_len();
+  rli->event_relay_log_pos= my_b_tell(rli->cur_log);
   /*
     If we are in a transaction: the only normal case is when the I/O thread was
     copying a big transaction, then it was stopped and restarted: we have this
@@ -1997,15 +2717,28 @@ int Rotate_log_event::exec_event(struct st_relay_log_info* rli)
     COMMIT or ROLLBACK
     In that case, we don't want to touch the coordinates which correspond to
     the beginning of the transaction.
+    Starting from 5.0.0, there also are some rotates from the slave itself, in
+    the relay log.
   */
   if (!(thd->options & OPTION_BEGIN))
   {
     memcpy(rli->group_master_log_name, new_log_ident, ident_len+1);
     rli->notify_group_master_log_name_update();
-    rli->group_master_log_pos = pos;
-    rli->group_relay_log_pos = rli->event_relay_log_pos;
-    DBUG_PRINT("info", ("group_master_log_pos: %lu",
+    rli->group_master_log_pos= pos;
+    rli->group_relay_log_pos= rli->event_relay_log_pos;
+    DBUG_PRINT("info", ("group_master_log_name: '%s' group_master_log_pos:\
+%lu",
+                        rli->group_master_log_name,
                         (ulong) rli->group_master_log_pos));
+    /*
+      Reset thd->options and sql_mode, because this could be the signal of a
+      master's downgrade from 5.0 to 4.0.
+      However, no need to reset description_event_for_exec: indeed, if the next
+      master is 5.0 (even 5.0.1) we will soon get a Format_desc; if the next
+      master is 4.0 then the events are in the slave's format (conversion).
+    */
+    set_slave_thread_options(thd);
+    thd->variables.sql_mode= global_system_variables.sql_mode;
   }
   pthread_mutex_unlock(&rli->data_lock);
   pthread_cond_broadcast(&rli->data_cond);
@@ -2039,12 +2772,13 @@ void Intvar_log_event::pack_info(Protocol *protocol)
   Intvar_log_event::Intvar_log_event()
 */
 
-Intvar_log_event::Intvar_log_event(const char* buf, bool old_format)
-  :Log_event(buf, old_format)
+Intvar_log_event::Intvar_log_event(const char* buf,
+                                   const Format_description_log_event* description_event)
+  :Log_event(buf, description_event)
 {
-  buf += (old_format) ? OLD_HEADER_LEN : LOG_EVENT_HEADER_LEN;
-  type = buf[I_TYPE_OFFSET];
-  val = uint8korr(buf+I_VAL_OFFSET);
+  buf+= description_event->common_header_len;
+  type= buf[I_TYPE_OFFSET];
+  val= uint8korr(buf+I_VAL_OFFSET);
 }
 
 
@@ -2080,7 +2814,7 @@ int Intvar_log_event::write_data(IO_CACHE* file)
 */
 
 #ifdef MYSQL_CLIENT
-void Intvar_log_event::print(FILE* file, bool short_form, char* last_db)
+void Intvar_log_event::print(FILE* file, bool short_form, LAST_EVENT_INFO* last_event_info)
 {
   char llbuff[22];
   const char *msg;
@@ -2123,7 +2857,7 @@ int Intvar_log_event::exec_event(struct st_relay_log_info* rli)
     thd->next_insert_id = val;
     break;
   }
-  rli->inc_event_relay_log_pos(get_event_len());
+  rli->inc_event_relay_log_pos();
   return 0;
 }
 #endif
@@ -2146,12 +2880,13 @@ void Rand_log_event::pack_info(Protocol *protocol)
 #endif
 
 
-Rand_log_event::Rand_log_event(const char* buf, bool old_format)
-  :Log_event(buf, old_format)
+Rand_log_event::Rand_log_event(const char* buf,
+                               const Format_description_log_event* description_event)
+  :Log_event(buf, description_event)
 {
-  buf += (old_format) ? OLD_HEADER_LEN : LOG_EVENT_HEADER_LEN;
-  seed1 = uint8korr(buf+RAND_SEED1_OFFSET);
-  seed2 = uint8korr(buf+RAND_SEED2_OFFSET);
+  buf+= description_event->common_header_len;
+  seed1= uint8korr(buf+RAND_SEED1_OFFSET);
+  seed2= uint8korr(buf+RAND_SEED2_OFFSET);
 }
 
 
@@ -2165,7 +2900,7 @@ int Rand_log_event::write_data(IO_CACHE* file)
 
 
 #ifdef MYSQL_CLIENT
-void Rand_log_event::print(FILE* file, bool short_form, char* last_db)
+void Rand_log_event::print(FILE* file, bool short_form, LAST_EVENT_INFO* last_event_info)
 {
   char llbuff[22],llbuff2[22];
   if (!short_form)
@@ -2185,7 +2920,7 @@ int Rand_log_event::exec_event(struct st_relay_log_info* rli)
 {
   thd->rand.seed1= (ulong) seed1;
   thd->rand.seed2= (ulong) seed2;
-  rli->inc_event_relay_log_pos(get_event_len());
+  rli->inc_event_relay_log_pos();
   return 0;
 }
 #endif /* !MYSQL_CLIENT */
@@ -2256,10 +2991,11 @@ void User_var_log_event::pack_info(Protocol* protocol)
 #endif /* !MYSQL_CLIENT */
 
 
-User_var_log_event::User_var_log_event(const char* buf, bool old_format)
-  :Log_event(buf, old_format)
+User_var_log_event::User_var_log_event(const char* buf,
+                                       const Format_description_log_event* description_event)
+  :Log_event(buf, description_event)
 {
-  buf+= (old_format) ? OLD_HEADER_LEN : LOG_EVENT_HEADER_LEN;
+  buf+= description_event->common_header_len;
   name_len= uint4korr(buf);
   name= (char *) buf + UV_NAME_LEN_SIZE;
   buf+= UV_NAME_LEN_SIZE + name_len;
@@ -2333,7 +3069,7 @@ int User_var_log_event::write_data(IO_CACHE* file)
 */
 
 #ifdef MYSQL_CLIENT
-void User_var_log_event::print(FILE* file, bool short_form, char* last_db)
+void User_var_log_event::print(FILE* file, bool short_form, LAST_EVENT_INFO* last_event_info)
 {
   if (!short_form)
   {
@@ -2465,7 +3201,7 @@ int User_var_log_event::exec_event(struct st_relay_log_info* rli)
   e.update_hash(val, val_len, type, charset, DERIVATION_NONE);
   free_root(&thd->mem_root,0);
 
-  rli->inc_event_relay_log_pos(get_event_len());
+  rli->inc_event_relay_log_pos();
   return 0;
 }
 #endif /* !MYSQL_CLIENT */
@@ -2477,7 +3213,7 @@ int User_var_log_event::exec_event(struct st_relay_log_info* rli)
 
 #ifdef HAVE_REPLICATION
 #ifdef MYSQL_CLIENT
-void Unknown_log_event::print(FILE* file, bool short_form, char* last_db)
+void Unknown_log_event::print(FILE* file, bool short_form, LAST_EVENT_INFO* last_event_info)
 {
   if (short_form)
     return;
@@ -2507,7 +3243,7 @@ void Slave_log_event::pack_info(Protocol *protocol)
 #ifndef MYSQL_CLIENT
 Slave_log_event::Slave_log_event(THD* thd_arg,
 				 struct st_relay_log_info* rli)
-  :Log_event(thd_arg, 0, 0), mem_pool(0), master_host(0)
+  :Log_event(thd_arg, 0, 0) , mem_pool(0), master_host(0)
 {
   DBUG_ENTER("Slave_log_event");
   if (!rli->inited)				// QQ When can this happen ?
@@ -2548,7 +3284,7 @@ Slave_log_event::~Slave_log_event()
 
 
 #ifdef MYSQL_CLIENT
-void Slave_log_event::print(FILE* file, bool short_form, char* last_db)
+void Slave_log_event::print(FILE* file, bool short_form, LAST_EVENT_INFO* last_event_info)
 {
   char llbuff[22];
   if (short_form)
@@ -2594,12 +3330,13 @@ void Slave_log_event::init_from_mem_pool(int data_size)
 }
 
 
-Slave_log_event::Slave_log_event(const char* buf, int event_len)
-  :Log_event(buf,0),mem_pool(0),master_host(0)
+/* This code is not used, so has not been updated to be format-tolerant */
+Slave_log_event::Slave_log_event(const char* buf, uint event_len)
+  :Log_event(buf,0) /*unused event*/ ,mem_pool(0),master_host(0)
 {
-  event_len -= LOG_EVENT_HEADER_LEN;
-  if (event_len < 0)
+  if (event_len < LOG_EVENT_HEADER_LEN)
     return;
+  event_len -= LOG_EVENT_HEADER_LEN;
   if (!(mem_pool = (char*) my_malloc(event_len + 1, MYF(MY_WME))))
     return;
   memcpy(mem_pool, buf + LOG_EVENT_HEADER_LEN, event_len);
@@ -2627,7 +3364,7 @@ int Slave_log_event::exec_event(struct st_relay_log_info* rli)
 */
 
 #ifdef MYSQL_CLIENT
-void Stop_log_event::print(FILE* file, bool short_form, char* last_db)
+void Stop_log_event::print(FILE* file, bool short_form, LAST_EVENT_INFO* last_event_info)
 {
   if (short_form)
     return;
@@ -2649,7 +3386,7 @@ void Stop_log_event::print(FILE* file, bool short_form, char* last_db)
   We used to clean up slave_load_tmpdir, but this is useless as it has been
   cleared at the end of LOAD DATA INFILE.
   So we have nothing to do here.
-  The place were we must do this cleaning is in Start_log_event::exec_event(),
+  The place were we must do this cleaning is in Start_log_event_v3::exec_event(),
   not here. Because if we come here, the master was sane.
 */
 
@@ -2663,8 +3400,13 @@ int Stop_log_event::exec_event(struct st_relay_log_info* rli)
     could give false triggers in MASTER_POS_WAIT() that we have reached
     the target position when in fact we have not.
   */
-  rli->inc_group_relay_log_pos(get_event_len(), 0);
-  flush_relay_log_info(rli);
+  if (thd->options & OPTION_BEGIN)
+    rli->inc_event_relay_log_pos();
+  else
+  {
+    rli->inc_group_relay_log_pos(0);
+    flush_relay_log_info(rli);
+  }
   return 0;
 }
 #endif /* !MYSQL_CLIENT */
@@ -2744,28 +3486,42 @@ int Create_file_log_event::write_base(IO_CACHE* file)
   Create_file_log_event ctor
 */
 
-Create_file_log_event::Create_file_log_event(const char* buf, int len,
-					     bool old_format)
-  :Load_log_event(buf,0,old_format),fake_base(0),block(0),inited_from_old(0)
+Create_file_log_event::Create_file_log_event(const char* buf, uint len,
+                                             const Format_description_log_event* description_event)
+  :Load_log_event(buf,0,description_event),fake_base(0),block(0),inited_from_old(0)
 {
-  int block_offset;
-  DBUG_ENTER("Create_file_log_event");
-
-  /*
-    We must make copy of 'buf' as this event may have to live over a
-    rotate log entry when used in mysqlbinlog
-  */
+  DBUG_ENTER("Create_file_log_event::Create_file_log_event(char*,...)");
+  uint block_offset;
+  uint header_len= description_event->common_header_len;
+  uint8 load_header_len= description_event->post_header_len[LOAD_EVENT-1];
+  uint8 create_file_header_len= description_event->post_header_len[CREATE_FILE_EVENT-1];
   if (!(event_buf= my_memdup((byte*) buf, len, MYF(MY_WME))) ||
-      (copy_log_event(event_buf, len, old_format)))
+      copy_log_event(event_buf,len,
+                     ((buf[EVENT_TYPE_OFFSET] == LOAD_EVENT) ?
+                      load_header_len + header_len :
+                      (fake_base ? (header_len+load_header_len) :
+                       (header_len+load_header_len) +
+                       create_file_header_len)),
+                     description_event))
     DBUG_VOID_RETURN;
-
-  if (!old_format)
+  if (description_event->binlog_version!=1)
   {
-    file_id = uint4korr(buf + LOG_EVENT_HEADER_LEN +
-			+ LOAD_HEADER_LEN + CF_FILE_ID_OFFSET);
+    file_id= uint4korr(buf + 
+                       header_len +
+		       load_header_len + CF_FILE_ID_OFFSET);
     // + 1 for \0 terminating fname  
-    block_offset = (LOG_EVENT_HEADER_LEN + Load_log_event::get_data_size() +
-		    CREATE_FILE_HEADER_LEN + 1);
+    /*
+      Note that it's ok to use get_data_size() below, because it is computed
+      with values we have already read from this event (because we called
+      copy_log_event()); we are not using slave's format info to decode master's
+      format, we are really using master's format info.
+      Anyway, both formats should be identical (except the common_header_len) as
+      these Load events are not changed between 4.0 and 5.0 (as logging of LOAD
+      DATA INFILE does not use Load_log_event in 5.0).
+    */
+    block_offset= description_event->common_header_len +
+      Load_log_event::get_data_size() +
+      create_file_header_len + 1;
     if (len < block_offset)
       return;
     block = (char*)buf + block_offset;
@@ -2786,18 +3542,18 @@ Create_file_log_event::Create_file_log_event(const char* buf, int len,
 
 #ifdef MYSQL_CLIENT
 void Create_file_log_event::print(FILE* file, bool short_form, 
-				  char* last_db, bool enable_local)
+				  LAST_EVENT_INFO* last_event_info, bool enable_local)
 {
   if (short_form)
   {
     if (enable_local && check_fname_outside_temp_buf())
-      Load_log_event::print(file, 1, last_db);
+      Load_log_event::print(file, 1, last_event_info);
     return;
   }
 
   if (enable_local)
   {
-    Load_log_event::print(file, 1, last_db, !check_fname_outside_temp_buf());
+    Load_log_event::print(file, 1, last_event_info, !check_fname_outside_temp_buf());
     /* 
        That one is for "file_id: etc" below: in mysqlbinlog we want the #, in
        SHOW BINLOG EVENTS we don't.
@@ -2810,9 +3566,9 @@ void Create_file_log_event::print(FILE* file, bool short_form,
 
 
 void Create_file_log_event::print(FILE* file, bool short_form,
-				  char* last_db)
+				  LAST_EVENT_INFO* last_event_info)
 {
-  print(file,short_form,last_db,0);
+  print(file,short_form,last_event_info,0);
 }
 #endif /* MYSQL_CLIENT */
 
@@ -2926,15 +3682,20 @@ Append_block_log_event::Append_block_log_event(THD* thd_arg, const char* db_arg,
   Append_block_log_event ctor
 */
 
-Append_block_log_event::Append_block_log_event(const char* buf, int len)
-  :Log_event(buf, 0),block(0)
+Append_block_log_event::Append_block_log_event(const char* buf, uint len,
+                                               const Format_description_log_event* description_event)
+  :Log_event(buf, description_event),block(0)
 {
-  DBUG_ENTER("Append_block_log_event");
-  if ((uint)len < APPEND_BLOCK_EVENT_OVERHEAD)
+  DBUG_ENTER("Append_block_log_event::Append_block_log_event(char*,...)");
+  uint8 common_header_len= description_event->common_header_len; 
+  uint8 append_block_header_len=
+    description_event->post_header_len[APPEND_BLOCK_EVENT-1];
+  uint total_header_len= common_header_len+append_block_header_len;
+  if (len < total_header_len)
     DBUG_VOID_RETURN;
-  file_id = uint4korr(buf + LOG_EVENT_HEADER_LEN + AB_FILE_ID_OFFSET);
-  block = (char*)buf + APPEND_BLOCK_EVENT_OVERHEAD;
-  block_len = len - APPEND_BLOCK_EVENT_OVERHEAD;
+  file_id= uint4korr(buf + common_header_len + AB_FILE_ID_OFFSET);
+  block= (char*)buf + total_header_len;
+  block_len= len - total_header_len;
   DBUG_VOID_RETURN;
 }
 
@@ -2958,7 +3719,7 @@ int Append_block_log_event::write_data(IO_CACHE* file)
 
 #ifdef MYSQL_CLIENT  
 void Append_block_log_event::print(FILE* file, bool short_form,
-				   char* last_db)
+				   LAST_EVENT_INFO* last_event_info)
 {
   if (short_form)
     return;
@@ -3041,12 +3802,15 @@ Delete_file_log_event::Delete_file_log_event(THD *thd_arg, const char* db_arg,
   Delete_file_log_event ctor
 */
 
-Delete_file_log_event::Delete_file_log_event(const char* buf, int len)
-  :Log_event(buf, 0),file_id(0)
+Delete_file_log_event::Delete_file_log_event(const char* buf, uint len,
+                                             const Format_description_log_event* description_event)
+  :Log_event(buf, description_event),file_id(0)
 {
-  if ((uint)len < DELETE_FILE_EVENT_OVERHEAD)
+  uint8 common_header_len= description_event->common_header_len;
+  uint8 delete_file_header_len= description_event->post_header_len[DELETE_FILE_EVENT-1];
+  if (len < (uint)(common_header_len + delete_file_header_len))
     return;
-  file_id = uint4korr(buf + LOG_EVENT_HEADER_LEN + AB_FILE_ID_OFFSET);
+  file_id= uint4korr(buf + common_header_len + DF_FILE_ID_OFFSET);
 }
 
 
@@ -3068,7 +3832,7 @@ int Delete_file_log_event::write_data(IO_CACHE* file)
 
 #ifdef MYSQL_CLIENT  
 void Delete_file_log_event::print(FILE* file, bool short_form,
-				  char* last_db)
+				  LAST_EVENT_INFO* last_event_info)
 {
   if (short_form)
     return;
@@ -3131,12 +3895,15 @@ Execute_load_log_event::Execute_load_log_event(THD *thd_arg, const char* db_arg,
   Execute_load_log_event ctor
 */
 
-Execute_load_log_event::Execute_load_log_event(const char* buf, int len)
-  :Log_event(buf, 0), file_id(0)
+Execute_load_log_event::Execute_load_log_event(const char* buf, uint len,
+                                               const Format_description_log_event* description_event)
+  :Log_event(buf, description_event), file_id(0)
 {
-  if ((uint)len < EXEC_LOAD_EVENT_OVERHEAD)
+  uint8 common_header_len= description_event->common_header_len;
+  uint8 exec_load_header_len= description_event->post_header_len[EXEC_LOAD_EVENT-1];
+  if (len < (uint)(common_header_len+exec_load_header_len))
     return;
-  file_id = uint4korr(buf + LOG_EVENT_HEADER_LEN + EL_FILE_ID_OFFSET);
+  file_id= uint4korr(buf + common_header_len + EL_FILE_ID_OFFSET);
 }
 
 
@@ -3158,7 +3925,7 @@ int Execute_load_log_event::write_data(IO_CACHE* file)
 
 #ifdef MYSQL_CLIENT  
 void Execute_load_log_event::print(FILE* file, bool short_form,
-				   char* last_db)
+				   LAST_EVENT_INFO* last_event_info)
 {
   if (short_form)
     return;
@@ -3205,8 +3972,8 @@ int Execute_load_log_event::exec_event(struct st_relay_log_info* rli)
     goto err;
   }
   if (!(lev = (Load_log_event*)Log_event::read_log_event(&file,
-							 (pthread_mutex_t*)0,
-							 (bool)0)) ||
+                                                         (pthread_mutex_t*)0,
+                                                         rli->relay_log.description_event_for_exec)) ||
       lev->get_type_code() != NEW_LOAD_EVENT)
   {
     slave_print_error(rli,0, "Error in Exec_load event: file '%s' appears corrupted", fname);
diff --git a/sql/log_event.h b/sql/log_event.h
index 28d1f44df92..ec23aad1717 100644
--- a/sql/log_event.h
+++ b/sql/log_event.h
@@ -34,13 +34,43 @@
 #define LOG_READ_TOO_LARGE -7
 
 #define LOG_EVENT_OFFSET 4
-
-#define BINLOG_VERSION    3
+ 
+/* 
+   3 is MySQL 4.x; 4 is MySQL 5.0.0.
+   Compared to version 3, version 4 has:
+   - a different Start_log_event, which includes info about the binary log
+   (sizes of headers); this info is included for better compatibility if the
+   master's MySQL version is different from the slave's.
+   - all events have a unique ID (the triplet (server_id, timestamp at server
+   start, other) to be sure an event is not executed more than once in a
+   multimaster setup, example:
+                M1
+              /   \
+             v     v
+             M2    M3
+             \     /
+              v   v
+                S
+   if a query is run on M1, it will arrive twice on S, so we need that S
+   remembers the last unique ID it has processed, to compare and know if the
+   event should be skipped or not. Example of ID: we already have the server id
+   (4 bytes), plus:
+   timestamp_when_the_master_started (4 bytes), a counter (a sequence number
+   which increments every time we write an event to the binlog) (3 bytes).
+   Q: how do we handle when the counter is overflowed and restarts from 0 ?
+   
+   - Query and Load (Create or Execute) events may have a more precise timestamp
+   (with microseconds), number of matched/affected/warnings rows 
+   and fields of session variables: SQL_MODE,
+   FOREIGN_KEY_CHECKS, UNIQUE_CHECKS, SQL_AUTO_IS_NULL, the collations and
+   charsets, the PASSWORD() version (old/new/...). 
+*/
+#define BINLOG_VERSION    4
 
 /*
  We could have used SERVER_VERSION_LENGTH, but this introduces an
  obscure dependency - if somebody decided to change SERVER_VERSION_LENGTH
- this would have broken the replication protocol
+ this would break the replication protocol
 */
 #define ST_SERVER_VER_LEN 50
 
@@ -49,6 +79,12 @@
   TERMINATED etc).
 */
 
+/*
+  These are flags and structs to handle all the LOAD DATA INFILE options (LINES
+  TERMINATED etc).
+  DUMPFILE_FLAG is probably useless (DUMPFILE is a clause of SELECT, not of LOAD
+  DATA).
+*/
 #define DUMPFILE_FLAG		0x1
 #define OPT_ENCLOSED_FLAG	0x2
 #define REPLACE_FLAG		0x4
@@ -136,16 +172,28 @@ struct sql_ex_info
 
 #define LOG_EVENT_HEADER_LEN 19     /* the fixed header length */
 #define OLD_HEADER_LEN       13     /* the fixed header length in 3.23 */
-
+/* 
+   Fixed header length, where 4.x and 5.0 agree. That is, 5.0 may have a longer
+   header (it will for sure when we have the unique event's ID), but at least
+   the first 19 bytes are the same in 4.x and 5.0. So when we have the unique
+   event's ID, LOG_EVENT_HEADER_LEN will be something like 26, but
+   LOG_EVENT_MINIMAL_HEADER_LEN will remain 19.
+*/
+#define LOG_EVENT_MINIMAL_HEADER_LEN 19     
+ 
 /* event-specific post-header sizes */
-#define QUERY_HEADER_LEN     (4 + 4 + 1 + 2)
+// where 3.23, 4.x and 5.0 agree
+#define QUERY_HEADER_MINIMAL_LEN     (4 + 4 + 1 + 2)
+// where 5.0 differs: 2 for len of N-bytes vars.
+#define QUERY_HEADER_LEN     (QUERY_HEADER_MINIMAL_LEN + 2) 
 #define LOAD_HEADER_LEN      (4 + 4 + 4 + 1 +1 + 4)
-#define START_HEADER_LEN     (2 + ST_SERVER_VER_LEN + 4)
-#define ROTATE_HEADER_LEN    8
+#define START_V3_HEADER_LEN     (2 + ST_SERVER_VER_LEN + 4)
+#define ROTATE_HEADER_LEN    8 // this is FROZEN (the Rotate post-header is frozen)
 #define CREATE_FILE_HEADER_LEN 4
 #define APPEND_BLOCK_HEADER_LEN 4
 #define EXEC_LOAD_HEADER_LEN   4
 #define DELETE_FILE_HEADER_LEN 4
+#define FORMAT_DESCRIPTION_HEADER_LEN (START_V3_HEADER_LEN+1+LOG_EVENT_TYPES)
 
 /* 
    Event header offsets; 
@@ -158,11 +206,12 @@ struct sql_ex_info
 #define LOG_POS_OFFSET       13
 #define FLAGS_OFFSET         17
 
-/* start event post-header */
+/* start event post-header (for v3 and v4) */
 
 #define ST_BINLOG_VER_OFFSET  0
 #define ST_SERVER_VER_OFFSET  2
 #define ST_CREATED_OFFSET     (ST_SERVER_VER_OFFSET + ST_SERVER_VER_LEN)
+#define ST_COMMON_HEADER_LEN_OFFSET (ST_CREATED_OFFSET + 4)
 
 /* slave event post-header (this event is never written) */
 
@@ -176,7 +225,13 @@ struct sql_ex_info
 #define Q_EXEC_TIME_OFFSET	4
 #define Q_DB_LEN_OFFSET		8
 #define Q_ERR_CODE_OFFSET	9
+#define Q_STATUS_VARS_LEN_OFFSET 11
 #define Q_DATA_OFFSET		QUERY_HEADER_LEN
+/* these are codes, not offsets; not more than 256 values (1 byte). */
+#define Q_FLAGS2_CODE           0
+#define Q_SQL_MODE_CODE         1
+#define Q_CATALOG_CODE          2
+
 
 /* Intvar event post-header */
 
@@ -228,16 +283,6 @@ struct sql_ex_info
 /* DF = "Delete File" */
 #define DF_FILE_ID_OFFSET  0
 
-#define QUERY_EVENT_OVERHEAD	(LOG_EVENT_HEADER_LEN+QUERY_HEADER_LEN)
-#define QUERY_DATA_OFFSET	(LOG_EVENT_HEADER_LEN+QUERY_HEADER_LEN)
-#define ROTATE_EVENT_OVERHEAD	(LOG_EVENT_HEADER_LEN+ROTATE_HEADER_LEN)
-#define LOAD_EVENT_OVERHEAD	(LOG_EVENT_HEADER_LEN+LOAD_HEADER_LEN)
-#define CREATE_FILE_EVENT_OVERHEAD (LOG_EVENT_HEADER_LEN+\
- +LOAD_HEADER_LEN+CREATE_FILE_HEADER_LEN)
-#define DELETE_FILE_EVENT_OVERHEAD (LOG_EVENT_HEADER_LEN+DELETE_FILE_HEADER_LEN)
-#define EXEC_LOAD_EVENT_OVERHEAD (LOG_EVENT_HEADER_LEN+EXEC_LOAD_HEADER_LEN)
-#define APPEND_BLOCK_EVENT_OVERHEAD (LOG_EVENT_HEADER_LEN+APPEND_BLOCK_HEADER_LEN)
-
 /* 4 bytes which all binlogs should begin with */
 #define BINLOG_MAGIC        "\xfe\x62\x69\x6e"
 
@@ -264,15 +309,54 @@ struct sql_ex_info
 */
 #define LOG_EVENT_THREAD_SPECIFIC_F 0x4 
 
+/* 
+   OPTIONS_WRITTEN_TO_BIN_LOG are the bits of thd->options which must be written
+   to the binlog. OPTIONS_WRITTEN_TO_BINLOG could be written into the
+   Format_description_log_event, so that if later we don't want to replicate a
+   variable we did replicate, or the contrary, it's doable. But it should not be
+   too hard to decide once for all of what we replicate and what we don't, among
+   the fixed 32 bits of thd->options.
+   I (Guilhem) have read through every option's usage, and it looks like
+   OPTION_AUTO_IS_NULL and OPTION_NO_FOREIGN_KEYS are the only ones which alter 
+   how the query modifies the table. It's good to replicate
+   OPTION_RELAXED_UNIQUE_CHECKS too because otherwise, the slave may insert data
+   slower than the master, in InnoDB.
+   OPTION_BIG_SELECTS is not needed (the slave thread runs with
+   max_join_size=HA_POS_ERROR) and OPTION_BIG_TABLES is not needed either, as
+   the manual says (because a too big in-memory temp table is automatically
+   written to disk).
+*/ 
+#define OPTIONS_WRITTEN_TO_BIN_LOG (OPTION_AUTO_IS_NULL | \
+OPTION_NO_FOREIGN_KEY_CHECKS | OPTION_RELAXED_UNIQUE_CHECKS)
+
 enum Log_event_type
 {
-  UNKNOWN_EVENT= 0, START_EVENT= 1, QUERY_EVENT= 2, STOP_EVENT= 3,
-  ROTATE_EVENT= 4, INTVAR_EVENT= 5, LOAD_EVENT=6, SLAVE_EVENT= 7, 
-  CREATE_FILE_EVENT= 8, APPEND_BLOCK_EVENT= 9, EXEC_LOAD_EVENT= 10,
-  DELETE_FILE_EVENT= 11, NEW_LOAD_EVENT= 12, RAND_EVENT= 13,
-  USER_VAR_EVENT= 14
+  /*
+    Every time you update this enum (when you add a type), you have to
+    update the code of Format_description_log_event::Format_description_log_event().
+    Make sure you always insert new types ***BEFORE*** ENUM_END_EVENT.
+  */
+  UNKNOWN_EVENT= 0, START_EVENT_V3, QUERY_EVENT, STOP_EVENT, ROTATE_EVENT,
+  INTVAR_EVENT, LOAD_EVENT, SLAVE_EVENT, CREATE_FILE_EVENT,
+  APPEND_BLOCK_EVENT, EXEC_LOAD_EVENT, DELETE_FILE_EVENT,
+  /*
+    NEW_LOAD_EVENT is like LOAD_EVENT except that it has a longer sql_ex,
+    allowing multibyte TERMINATED BY etc; both types share the same class
+    (Load_log_event)
+  */
+  NEW_LOAD_EVENT, 
+  RAND_EVENT, USER_VAR_EVENT,
+  FORMAT_DESCRIPTION_EVENT, 
+  ENUM_END_EVENT /* end marker */
 };
 
+/* 
+   The number of types we handle in Format_description_log_event (UNKNOWN_EVENT
+   is not to be handled, it does not exist in binlogs, it does not have a
+   format).
+*/
+#define LOG_EVENT_TYPES (ENUM_END_EVENT-1)
+
 enum Int_event_type
 {
   INVALID_INT_EVENT = 0, LAST_INSERT_ID_EVENT = 1, INSERT_ID_EVENT = 2
@@ -285,8 +369,33 @@ class MYSQL_LOG;
 class THD;
 #endif
 
+class Format_description_log_event;
+
 struct st_relay_log_info;
 
+#ifdef MYSQL_CLIENT
+/*
+  A structure for mysqlbinlog to remember the last db, flags2, sql_mode etc; it
+  is passed to events' print() methods, so that they print only the necessary
+  USE and SET commands.
+*/
+typedef struct st_last_event_info
+{
+  // TODO: have the last catalog here ??
+  char db[FN_REFLEN+1]; // TODO: make this a LEX_STRING when thd->db is
+  bool flags2_inited;
+  uint32 flags2;
+  bool sql_mode_inited;
+  ulong sql_mode;		/* must be same as THD.variables.sql_mode */
+  st_last_event_info()
+    : flags2_inited(0), flags2(0), sql_mode_inited(0), sql_mode(0) 
+    {
+      db[0]= 0; /* initially, the db is unknown */
+    }
+} LAST_EVENT_INFO;
+#endif
+
+
 /*****************************************************************************
 
   Log_event class
@@ -337,21 +446,26 @@ public:
   uint16 flags;
 
   bool cache_stmt;
+
 #ifndef MYSQL_CLIENT
   THD* thd;
 
-  Log_event(THD* thd_arg, uint16 flags_arg, bool cache_stmt);
   Log_event();
+  Log_event(THD* thd_arg, uint16 flags_arg, bool cache_stmt);
   /*
     read_log_event() functions read an event from a binlog or relay log; used by
     SHOW BINLOG EVENTS, the binlog_dump thread on the master (reads master's
     binlog), the slave IO thread (reads the event sent by binlog_dump), the
     slave SQL thread (reads the event from the relay log).
+    If mutex is 0, the read will proceed without mutex.
+    We need the description_event to be able to parse the event (to know the
+    post-header's size); in fact in read_log_event we detect the event's type,
+    then call the specific event's constructor and pass description_event as an
+    argument.
   */
-  // if mutex is 0, the read will proceed without mutex
   static Log_event* read_log_event(IO_CACHE* file,
 				   pthread_mutex_t* log_lock,
-				   bool old_format);
+                                   const Format_description_log_event *description_event);
   static int read_log_event(IO_CACHE* file, String* packet,
 			    pthread_mutex_t* log_lock);
   /* set_log_pos() is used to fill log_pos with tell(log). */
@@ -379,10 +493,12 @@ public:
     return thd ? thd->db : 0;
   }
 #else
+  Log_event() : temp_buf(0) {}
  // avoid having to link mysqlbinlog against libpthread
-  static Log_event* read_log_event(IO_CACHE* file, bool old_format);
+  static Log_event* read_log_event(IO_CACHE* file,
+                                   const Format_description_log_event *description_event);
   /* print*() functions are used by mysqlbinlog */
-  virtual void print(FILE* file, bool short_form = 0, char* last_db = 0) = 0;
+  virtual void print(FILE* file, bool short_form = 0, LAST_EVENT_INFO* last_event_info= 0) = 0;
   void print_timestamp(FILE* file, time_t *ts = 0);
   void print_header(FILE* file);
 #endif  
@@ -405,9 +521,9 @@ public:
   virtual int write_data_body(IO_CACHE* file __attribute__((unused)))
   { return 0; }
   virtual Log_event_type get_type_code() = 0;
-  virtual bool is_valid() = 0;
+  virtual const bool is_valid() = 0;
   inline bool get_cache_stmt() { return cache_stmt; }
-  Log_event(const char* buf, bool old_format);
+  Log_event(const char* buf, const Format_description_log_event* description_event);
   virtual ~Log_event() { free_temp_buf();}
   void register_temp_buf(char* buf) { temp_buf = buf; }
   void free_temp_buf()
@@ -419,18 +535,37 @@ public:
     }
   }
   virtual int get_data_size() { return 0;}
-  virtual int get_data_body_offset() { return 0; }
   int get_event_len()
   {
-    return (cached_event_len ? cached_event_len :
-	    (cached_event_len = LOG_EVENT_HEADER_LEN + get_data_size()));
+    /*
+      We don't re-use the cached event's length anymore (we did in 4.x) because
+      this leads to nasty problems: when the 5.0 slave reads an event from a 4.0
+      master, it caches the event's length, then this event is converted before
+      it goes into the relay log, so it would be written to the relay log with
+      its old length, which is garbage.
+    */
+    return (cached_event_len=(LOG_EVENT_HEADER_LEN + get_data_size()));
   }
-  static Log_event* read_log_event(const char* buf, int event_len,
-				   const char **error, bool old_format);
+  static Log_event* read_log_event(const char* buf, uint event_len,
+				   const char **error,
+                                   const Format_description_log_event
+                                   *description_event); 
   /* returns the human readable name of the event's type */
   const char* get_type_str();
 };
 
+/* 
+   One class for each type of event.
+   Two constructors for each class:
+   - one to create the event for logging (when the server acts as a master),
+   called after an update to the database is done,
+   which accepts parameters like the query, the database, the options for LOAD
+   DATA INFILE...
+   - one to create the event from a packet (when the server acts as a slave),
+   called before reproducing the update, which accepts parameters (like a
+   buffer). Used to read from the master, from the relay log, and in
+   mysqlbinlog. This constructor must be format-tolerant.
+*/
 
 /*****************************************************************************
 
@@ -445,6 +580,7 @@ protected:
   char* data_buf;
 public:
   const char* query;
+  const char* catalog;
   const char* db;
   /*
     If we already know the length of the query string
@@ -462,6 +598,52 @@ public:
      BUG#1686).
   */
   ulong slave_proxy_id;
+  
+  /* 
+     Binlog format 3 and 4 start to differ (as far as class members are
+     concerned) from here. 
+  */
+  
+  uint catalog_len;				/* <= 255 char */
+  
+  /*
+    We want to be able to store a variable number of N-bit status vars:
+    (generally N=32; but N=64 for SQL_MODE) a user may want to log the number of
+    affected rows (for debugging) while another does not want to lose 4 bytes in
+    this.
+    The storage on disk is the following:
+    status_vars_len is part of the post-header,
+    status_vars are in the variable-length part, after the post-header, before
+    the db & query.
+    status_vars on disk is a sequence of pairs (code, value) where 'code' means
+    'sql_mode', 'affected' etc. Sometimes 'value' must be a short string, so its
+    first byte is its length. For now the order of status vars is:
+    flags2 - sql_mode - catalog.
+    We should add the same thing to Load_log_event, but in fact
+    LOAD DATA INFILE is going to be logged with a new type of event (logging of
+    the plain text query), so Load_log_event would be frozen, so no need. The
+    new way of logging LOAD DATA INFILE would use a derived class of
+    Query_log_event, so automatically benefit from the work already done for
+    status variables in Query_log_event.
+ */
+  uint16 status_vars_len;
+  
+  /*
+    'flags2' is a second set of flags (on top of those in Log_event), for
+    session variables. These are thd->options which is & against a mask
+    (OPTIONS_WRITTEN_TO_BINLOG).
+    flags2_inited helps make a difference between flags2==0 (3.23 or 4.x
+    master, we don't know flags2, so use the slave server's global options) and
+    flags2==0 (5.0 master, we know this has a meaning of flags all down which
+    must influence the query).
+  */
+  bool flags2_inited; 
+  bool sql_mode_inited;
+  
+  uint32 flags2; 
+  /* In connections sql_mode is 32 bits now but will be 64 bits soon */
+  ulong sql_mode;
+  
 #ifndef MYSQL_CLIENT
 
   Query_log_event(THD* thd_arg, const char* query_arg, ulong query_length,
@@ -472,10 +654,11 @@ public:
   int exec_event(struct st_relay_log_info* rli);
 #endif /* HAVE_REPLICATION */
 #else
-  void print(FILE* file, bool short_form = 0, char* last_db = 0);
+  void print(FILE* file, bool short_form = 0, LAST_EVENT_INFO* last_event_info= 0);
 #endif
 
-  Query_log_event(const char* buf, int event_len, bool old_format);
+  Query_log_event(const char* buf, uint event_len,
+                  const Format_description_log_event *description_event);
   ~Query_log_event()
   {
     if (data_buf)
@@ -486,14 +669,11 @@ public:
   Log_event_type get_type_code() { return QUERY_EVENT; }
   int write(IO_CACHE* file);
   int write_data(IO_CACHE* file); // returns 0 on success, -1 on error
-  bool is_valid() { return query != 0; }
+  const bool is_valid() { return query != 0; }
   int get_data_size()
   {
-    return (q_len + db_len + 2
-	    + 4	// thread_id
-	    + 4	// exec_time
-	    + 2	// error_code
-	    );
+    /* Note that the "1" below is the db's length. */
+    return (q_len + db_len + 1 + status_vars_len + QUERY_HEADER_LEN); 
   }
 };
 
@@ -504,6 +684,7 @@ public:
   Slave Log Event class
   Note that this class is currently not used at all; no code writes a
   Slave_log_event (though some code in repl_failsafe.cc reads Slave_log_event).
+  So it's not a problem if this code is not maintained.
 
  ****************************************************************************/
 class Slave_log_event: public Log_event
@@ -524,13 +705,13 @@ public:
   void pack_info(Protocol* protocol);
   int exec_event(struct st_relay_log_info* rli);
 #else
-  void print(FILE* file, bool short_form = 0, char* last_db = 0);
+  void print(FILE* file, bool short_form = 0, LAST_EVENT_INFO* last_event_info= 0);
 #endif  
 
-  Slave_log_event(const char* buf, int event_len);
+  Slave_log_event(const char* buf, uint event_len);
   ~Slave_log_event();
   int get_data_size();
-  bool is_valid() { return master_host != 0; }
+  const bool is_valid() { return master_host != 0; }
   Log_event_type get_type_code() { return SLAVE_EVENT; }
   int write_data(IO_CACHE* file );
 };
@@ -546,12 +727,18 @@ public:
 class Load_log_event: public Log_event
 {
 protected:
-  int copy_log_event(const char *buf, ulong event_len, bool old_format);
+  int copy_log_event(const char *buf, ulong event_len, 
+                     int body_offset, const Format_description_log_event* description_event);
 
 public:
   ulong thread_id;
   ulong slave_proxy_id;
   uint32 table_name_len;
+  /*
+    No need to have a catalog, as these events can only come from 4.x.
+    TODO: this may become false if Dmitri pushes his new LOAD DATA INFILE in
+    5.0 only (not in 4.x).
+  */
   uint32 db_len;
   uint32 fname_len;
   uint32 num_fields;
@@ -599,11 +786,18 @@ public:
 		 bool use_rli_only_for_errors);
 #endif /* HAVE_REPLICATION */
 #else
-  void print(FILE* file, bool short_form = 0, char* last_db = 0);
-  void print(FILE* file, bool short_form, char* last_db, bool commented);
+  void print(FILE* file, bool short_form = 0, LAST_EVENT_INFO* last_event_info = 0);
+  void print(FILE* file, bool short_form, LAST_EVENT_INFO* last_event_info, bool commented);
 #endif
 
-  Load_log_event(const char* buf, int event_len, bool old_format);
+  /*
+    Note that for all the events related to LOAD DATA (Load_log_event,
+    Create_file/Append/Exec/Delete, we pass description_event; however as
+    logging of LOAD DATA is going to be changed in 4.1 or 5.0, this is only used
+    for the common_header_len (post_header_len will not be changed).
+  */
+  Load_log_event(const char* buf, uint event_len,
+                 const Format_description_log_event* description_event); 
   ~Load_log_event()
   {}
   Log_event_type get_type_code()
@@ -612,27 +806,31 @@ public:
   }
   int write_data_header(IO_CACHE* file); 
   int write_data_body(IO_CACHE* file); 
-  bool is_valid() { return table_name != 0; }
+  const bool is_valid() { return table_name != 0; }
   int get_data_size()
   {
-    return (table_name_len + 2 + db_len + 2 + fname_len
-	    + 4 // thread_id
-	    + 4 // exec_time
-	    + 4 // skip_lines
-	    + 4 // field block len
+    return (table_name_len + db_len + 2 + fname_len
+	    + LOAD_HEADER_LEN
 	    + sql_ex.data_size() + field_block_len + num_fields);
   }
-  int get_data_body_offset() { return LOAD_EVENT_OVERHEAD; }
 };
 
 extern char server_version[SERVER_VERSION_LENGTH];
 
 /*****************************************************************************
 
-  Start Log Event class
+  Start Log Event_v3 class
 
+  Start_log_event_v3 is the Start_log_event of binlog format 3 (MySQL 3.23 and
+  4.x).
+  Format_description_log_event derives from Start_log_event_v3; it is the
+  Start_log_event of binlog format 4 (MySQL 5.0), that is, the event that
+  describes the other events' header/postheader lengths. This event is sent by
+  MySQL 5.0 whenever it starts sending a new binlog if the requested position
+  is >4 (otherwise if ==4 the event will be sent naturally).
+ 
  ****************************************************************************/
-class Start_log_event: public Log_event
+class Start_log_event_v3: public Log_event
 {
 public:
   /* 
@@ -660,27 +858,81 @@ public:
   char server_version[ST_SERVER_VER_LEN];
 
 #ifndef MYSQL_CLIENT
-  Start_log_event() :Log_event(), binlog_version(BINLOG_VERSION)
-  {
-    created = (time_t) when;
-    memcpy(server_version, ::server_version, ST_SERVER_VER_LEN);
-  }
+  Start_log_event_v3();
 #ifdef HAVE_REPLICATION
   void pack_info(Protocol* protocol);
   int exec_event(struct st_relay_log_info* rli);
 #endif /* HAVE_REPLICATION */
 #else
-  void print(FILE* file, bool short_form = 0, char* last_db = 0);
+  Start_log_event_v3() {}
+  void print(FILE* file, bool short_form = 0, LAST_EVENT_INFO* last_event_info= 0);
 #endif  
 
-  Start_log_event(const char* buf, bool old_format);
-  ~Start_log_event() {}
-  Log_event_type get_type_code() { return START_EVENT;}
+  Start_log_event_v3(const char* buf,
+                     const Format_description_log_event* description_event);
+  ~Start_log_event_v3() {}
+  Log_event_type get_type_code() { return START_EVENT_V3;}
   int write_data(IO_CACHE* file);
-  bool is_valid() { return 1; }
+  const bool is_valid() { return 1; }
   int get_data_size()
   {
-    return START_HEADER_LEN;
+    return START_V3_HEADER_LEN; //no variable-sized part
+  }
+};
+
+/* 
+   For binlog version 4.
+   This event is saved by threads which read it, as they need it for future
+   use (to decode the ordinary events).
+*/
+
+class Format_description_log_event: public Start_log_event_v3
+{
+public:
+  /* 
+     The size of the fixed header which _all_ events have
+     (for binlogs written by this version, this is equal to
+     LOG_EVENT_HEADER_LEN), except FORMAT_DESCRIPTION_EVENT and ROTATE_EVENT
+     (those have a header of size LOG_EVENT_MINIMAL_HEADER_LEN).
+  */
+  uint8 common_header_len;
+  uint8 number_of_event_types;
+  /* The list of post-headers' lengthes */
+  uint8 *post_header_len;
+
+  Format_description_log_event(uint8 binlog_ver, const char* server_ver=0);
+
+#ifndef MYSQL_CLIENT
+#ifdef HAVE_REPLICATION
+  int exec_event(struct st_relay_log_info* rli);
+#endif /* HAVE_REPLICATION */
+#endif  
+  
+  Format_description_log_event(const char* buf, uint event_len,
+                               const Format_description_log_event* description_event);
+  ~Format_description_log_event() { my_free((gptr)post_header_len, MYF(0)); }
+  Log_event_type get_type_code() { return FORMAT_DESCRIPTION_EVENT;}
+  int write_data(IO_CACHE* file);
+  const bool is_valid() 
+    { 
+      return ((common_header_len >= ((binlog_version==1) ? OLD_HEADER_LEN :
+                                     LOG_EVENT_MINIMAL_HEADER_LEN)) && 
+              (post_header_len != NULL)); 
+    }
+  int get_event_len()
+  {
+    int i= LOG_EVENT_MINIMAL_HEADER_LEN + get_data_size();
+    DBUG_PRINT("info",("event_len=%d",i));
+    return i;
+  }
+  int get_data_size()
+  {
+    /*
+      The vector of post-header lengths is considered as part of the
+      post-header, because in a given version it never changes (contrary to the
+      query in a Query_log_event).
+    */
+    return FORMAT_DESCRIPTION_HEADER_LEN;
   }
 };
 
@@ -707,23 +959,26 @@ public:
   int exec_event(struct st_relay_log_info* rli);
 #endif /* HAVE_REPLICATION */
 #else
-  void print(FILE* file, bool short_form = 0, char* last_db = 0);
+  void print(FILE* file, bool short_form = 0, LAST_EVENT_INFO* last_event_info= 0);
 #endif  
 
-  Intvar_log_event(const char* buf, bool old_format);
+  Intvar_log_event(const char* buf, const Format_description_log_event* description_event);
   ~Intvar_log_event() {}
   Log_event_type get_type_code() { return INTVAR_EVENT;}
   const char* get_var_type_name();
   int get_data_size() { return  9; /* sizeof(type) + sizeof(val) */;}
   int write_data(IO_CACHE* file);
-  bool is_valid() { return 1; }
+  const bool is_valid() { return 1; }
 };
 
 /*****************************************************************************
 
   Rand Log Event class
 
-  Logs random seed used by the next RAND(), and by PASSWORD() in 4.1.
+  Logs random seed used by the next RAND(), and by PASSWORD() in 4.1.0. 
+  4.1.1 does not need it (it's repeatable again) so this event needn't be
+  written in 4.1.1 for PASSWORD() (but the fact that it is written is just a
+  waste, it does not cause bugs).
 
  ****************************************************************************/
 class Rand_log_event: public Log_event
@@ -741,15 +996,15 @@ class Rand_log_event: public Log_event
   int exec_event(struct st_relay_log_info* rli);
 #endif /* HAVE_REPLICATION */
 #else
-  void print(FILE* file, bool short_form = 0, char* last_db = 0);
+  void print(FILE* file, bool short_form = 0, LAST_EVENT_INFO* last_event_info= 0);
 #endif
 
-  Rand_log_event(const char* buf, bool old_format);
+  Rand_log_event(const char* buf, const Format_description_log_event* description_event);
   ~Rand_log_event() {}
   Log_event_type get_type_code() { return RAND_EVENT;}
   int get_data_size() { return 16; /* sizeof(ulonglong) * 2*/ }
   int write_data(IO_CACHE* file);
-  bool is_valid() { return 1; }
+  const bool is_valid() { return 1; }
 };
 
 /*****************************************************************************
@@ -759,6 +1014,9 @@ class Rand_log_event: public Log_event
   Every time a query uses the value of a user variable, a User_var_log_event is
   written before the Query_log_event, to set the user variable.
 
+  Every time a query uses the value of a user variable, a User_var_log_event is
+  written before the Query_log_event, to set the user variable.
+
  ****************************************************************************/
 class User_var_log_event: public Log_event
 {
@@ -780,10 +1038,10 @@ public:
   void pack_info(Protocol* protocol);
   int exec_event(struct st_relay_log_info* rli);
 #else
-  void print(FILE* file, bool short_form = 0, char* last_db = 0);
+  void print(FILE* file, bool short_form = 0, LAST_EVENT_INFO* last_event_info= 0);
 #endif
 
-  User_var_log_event(const char* buf, bool old_format);
+  User_var_log_event(const char* buf, const Format_description_log_event* description_event);
   ~User_var_log_event() {}
   Log_event_type get_type_code() { return USER_VAR_EVENT;}
   int get_data_size()
@@ -793,7 +1051,7 @@ public:
 	UV_CHARSET_NUMBER_SIZE + UV_VAL_LEN_SIZE + val_len);
     }
   int write_data(IO_CACHE* file);
-  bool is_valid() { return 1; }
+  const bool is_valid() { return 1; }
 };
 
 /*****************************************************************************
@@ -811,15 +1069,15 @@ public:
   {}
   int exec_event(struct st_relay_log_info* rli);
 #else
-  void print(FILE* file, bool short_form = 0, char* last_db = 0);
+  void print(FILE* file, bool short_form = 0, LAST_EVENT_INFO* last_event_info= 0);
 #endif  
 
-  Stop_log_event(const char* buf, bool old_format):
-    Log_event(buf, old_format)
+  Stop_log_event(const char* buf, const Format_description_log_event* description_event):
+    Log_event(buf, description_event)
   {}
   ~Stop_log_event() {}
   Log_event_type get_type_code() { return STOP_EVENT;}
-  bool is_valid() { return 1; }
+  const bool is_valid() { return 1; }
 };
 
 #endif /* HAVE_REPLICATION */
@@ -852,18 +1110,23 @@ public:
   int exec_event(struct st_relay_log_info* rli);
 #endif /* HAVE_REPLICATION */
 #else
-  void print(FILE* file, bool short_form = 0, char* last_db = 0);
+  void print(FILE* file, bool short_form = 0, LAST_EVENT_INFO* last_event_info= 0);
 #endif
 
-  Rotate_log_event(const char* buf, int event_len, bool old_format);
+  Rotate_log_event(const char* buf, uint event_len,
+                   const Format_description_log_event* description_event);
   ~Rotate_log_event()
   {
     if (alloced)
       my_free((gptr) new_log_ident, MYF(0));
   }
   Log_event_type get_type_code() { return ROTATE_EVENT;}
+  int get_event_len()
+  {
+    return (LOG_EVENT_MINIMAL_HEADER_LEN + get_data_size());
+  }
   int get_data_size() { return  ident_len + ROTATE_HEADER_LEN;}
-  bool is_valid() { return new_log_ident != 0; }
+  const bool is_valid() { return new_log_ident != 0; }
   int write_data(IO_CACHE* file);
 };
 
@@ -902,11 +1165,12 @@ public:
   int exec_event(struct st_relay_log_info* rli);
 #endif /* HAVE_REPLICATION */
 #else
-  void print(FILE* file, bool short_form = 0, char* last_db = 0);
-  void print(FILE* file, bool short_form, char* last_db, bool enable_local);
+  void print(FILE* file, bool short_form = 0, LAST_EVENT_INFO* last_event_info= 0);
+  void print(FILE* file, bool short_form, LAST_EVENT_INFO* last_event_info, bool enable_local);
 #endif  
   
-  Create_file_log_event(const char* buf, int event_len, bool old_format);
+  Create_file_log_event(const char* buf, uint event_len,
+                        const Format_description_log_event* description_event);
   ~Create_file_log_event()
   {
     my_free((char*) event_buf, MYF(MY_ALLOW_ZERO_PTR));
@@ -922,12 +1186,7 @@ public:
 	    Load_log_event::get_data_size() +
 	    4 + 1 + block_len);
   }
-  int get_data_body_offset()
-  {
-    return (fake_base ? LOAD_EVENT_OVERHEAD:
-	    LOAD_EVENT_OVERHEAD + CREATE_FILE_HEADER_LEN);
-  }
-  bool is_valid() { return inited_from_old || block != 0; }
+  const bool is_valid() { return inited_from_old || block != 0; }
   int write_data_header(IO_CACHE* file);
   int write_data_body(IO_CACHE* file);
   /*
@@ -969,14 +1228,15 @@ public:
   void pack_info(Protocol* protocol);
 #endif /* HAVE_REPLICATION */
 #else
-  void print(FILE* file, bool short_form = 0, char* last_db = 0);
+  void print(FILE* file, bool short_form = 0, LAST_EVENT_INFO* last_event_info= 0);
 #endif
   
-  Append_block_log_event(const char* buf, int event_len);
+  Append_block_log_event(const char* buf, uint event_len, 
+                         const Format_description_log_event* description_event);
   ~Append_block_log_event() {}
   Log_event_type get_type_code() { return APPEND_BLOCK_EVENT;}
   int get_data_size() { return  block_len + APPEND_BLOCK_HEADER_LEN ;}
-  bool is_valid() { return block != 0; }
+  const bool is_valid() { return block != 0; }
   int write_data(IO_CACHE* file);
   const char* get_db() { return db; }
 };
@@ -999,15 +1259,16 @@ public:
   int exec_event(struct st_relay_log_info* rli);
 #endif /* HAVE_REPLICATION */
 #else
-  void print(FILE* file, bool short_form = 0, char* last_db = 0);
-  void print(FILE* file, bool short_form, char* last_db, bool enable_local);
+  void print(FILE* file, bool short_form = 0, LAST_EVENT_INFO* last_event_info= 0);
+  void print(FILE* file, bool short_form, LAST_EVENT_INFO* last_event_info, bool enable_local);
 #endif  
   
-  Delete_file_log_event(const char* buf, int event_len);
+  Delete_file_log_event(const char* buf, uint event_len,
+                        const Format_description_log_event* description_event);
   ~Delete_file_log_event() {}
   Log_event_type get_type_code() { return DELETE_FILE_EVENT;}
   int get_data_size() { return DELETE_FILE_HEADER_LEN ;}
-  bool is_valid() { return file_id != 0; }
+  const bool is_valid() { return file_id != 0; }
   int write_data(IO_CACHE* file);
   const char* get_db() { return db; }
 };
@@ -1030,14 +1291,15 @@ public:
   int exec_event(struct st_relay_log_info* rli);
 #endif /* HAVE_REPLICATION */
 #else
-  void print(FILE* file, bool short_form = 0, char* last_db = 0);
+  void print(FILE* file, bool short_form = 0, LAST_EVENT_INFO* last_event_info= 0);
 #endif  
   
-  Execute_load_log_event(const char* buf, int event_len);
+  Execute_load_log_event(const char* buf, uint event_len, 
+                         const Format_description_log_event* description_event);
   ~Execute_load_log_event() {}
   Log_event_type get_type_code() { return EXEC_LOAD_EVENT;}
   int get_data_size() { return  EXEC_LOAD_HEADER_LEN ;}
-  bool is_valid() { return file_id != 0; }
+  const bool is_valid() { return file_id != 0; }
   int write_data(IO_CACHE* file);
   const char* get_db() { return db; }
 };
@@ -1046,13 +1308,18 @@ public:
 class Unknown_log_event: public Log_event
 {
 public:
-  Unknown_log_event(const char* buf, bool old_format):
-    Log_event(buf, old_format)
+  /*
+    Even if this is an unknown event, we still pass description_event to
+    Log_event's ctor, this way we can extract maximum information from the
+    event's header (the unique ID for example).
+  */
+  Unknown_log_event(const char* buf, const Format_description_log_event* description_event):
+    Log_event(buf, description_event)
   {}
   ~Unknown_log_event() {}
-  void print(FILE* file, bool short_form= 0, char* last_db= 0);
+  void print(FILE* file, bool short_form= 0, LAST_EVENT_INFO* last_event_info= 0);
   Log_event_type get_type_code() { return UNKNOWN_EVENT;}
-  bool is_valid() { return 1; }
+  const bool is_valid() { return 1; }
 };
 #endif  
 
diff --git a/sql/mysql_priv.h b/sql/mysql_priv.h
index 72ac3af70ff..792b1a1189f 100644
--- a/sql/mysql_priv.h
+++ b/sql/mysql_priv.h
@@ -14,6 +14,15 @@
    along with this program; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
 
+/*
+  Mostly this file is used in the server. But a little part of it is used in
+  mysqlbinlog too (definition of SELECT_DISTINCT and others).
+  The consequence is that 90% of the file is wrapped in #ifndef MYSQL_CLIENT,
+  except the part which must be in the server and in the client.
+*/
+
+#ifndef MYSQL_CLIENT
+
 #include <my_global.h>
 #include <mysql_version.h>
 #include <mysql_embed.h>
@@ -55,7 +64,7 @@ char *sql_strmake_with_convert(const char *str, uint32 arg_length,
 			       CHARSET_INFO *from_cs,
 			       uint32 max_res_length,
 			       CHARSET_INFO *to_cs, uint32 *result_length);
-void kill_one_thread(THD *thd, ulong id);
+void kill_one_thread(THD *thd, ulong id, bool only_kill_query);
 bool net_request_file(NET* net, const char* fname);
 char* query_table_status(THD *thd,const char *db,const char *table_name);
 
@@ -112,6 +121,26 @@ extern CHARSET_INFO *national_charset_info, *table_alias_charset;
 #define TIME_FOR_COMPARE   5	// 5 compares == one read
 
 /*
+  Number of comparisons of table rowids equivalent to reading one row from a 
+  table.
+*/
+#define TIME_FOR_COMPARE_ROWID  (TIME_FOR_COMPARE*2)
+
+/*
+  For sequential disk seeks the cost formula is:
+    DISK_SEEK_BASE_COST + DISK_SEEK_PROP_COST * #blocks_to_skip  
+  
+  The cost of average seek 
+    DISK_SEEK_BASE_COST + DISK_SEEK_PROP_COST*BLOCKS_IN_AVG_SEEK =1.0.
+*/
+#define DISK_SEEK_BASE_COST ((double)0.5)
+
+#define BLOCKS_IN_AVG_SEEK  128
+
+#define DISK_SEEK_PROP_COST ((double)0.5/BLOCKS_IN_AVG_SEEK)
+
+
+/*
   Number of rows in a reference table when refereed through a not unique key.
   This value is only used when we don't know anything about the key
   distribution.
@@ -173,8 +202,15 @@ extern CHARSET_INFO *national_charset_info, *table_alias_charset;
 #define TEST_SIGINT		1024	/* Allow sigint on threads */
 #define TEST_SYNCHRONIZATION	2048	/* get server to do sleep in some 
                                                                        places */
+#endif
+
+/* 
+   This is included in the server and in the client.
+   Options for select set by the yacc parser (stored in lex->options).
+   None of the 32 defines below should have its value changed, or this will
+   break replication.
+*/
 
-/* options for select set by the yacc parser (stored in lex->options) */
 #define SELECT_DISTINCT		(1L << 0)
 #define SELECT_STRAIGHT_JOIN	(1L << 1)
 #define SELECT_DESCRIBE		(1L << 2)
@@ -212,6 +248,9 @@ extern CHARSET_INFO *national_charset_info, *table_alias_charset;
 #define OPTION_RELAXED_UNIQUE_CHECKS    (1L << 27)
 #define SELECT_NO_UNLOCK                (1L << 28)
 
+/* The rest of the file is included in the server only */
+#ifndef MYSQL_CLIENT
+
 /* Bits for different SQL modes modes (including ANSI mode) */
 #define MODE_REAL_AS_FLOAT      	1
 #define MODE_PIPES_AS_CONCAT    	2
@@ -344,6 +383,7 @@ inline THD *_current_thd(void)
 #include "sql_list.h"
 #include "sql_map.h"
 #include "handler.h"
+#include "parse_file.h"
 #include "table.h"
 #include "field.h"				/* Field definitions */
 #include "protocol.h"
@@ -434,7 +474,7 @@ bool mysql_test_parse_for_slave(THD *thd,char *inBuf,uint length);
 bool is_update_query(enum enum_sql_command command);
 bool alloc_query(THD *thd, char *packet, ulong packet_length);
 void mysql_init_select(LEX *lex);
-void mysql_init_query(THD *thd);
+void mysql_init_query(THD *thd, bool lexonly=0);
 bool mysql_new_select(LEX *lex, bool move_down);
 void create_select_for_variable(const char *var_name);
 void mysql_init_multi_delete(LEX *lex);
@@ -445,7 +485,7 @@ extern "C" pthread_handler_decl(handle_one_connection,arg);
 extern "C" pthread_handler_decl(handle_bootstrap,arg);
 void end_thread(THD *thd,bool put_in_cache);
 void flush_thread_cache();
-void mysql_execute_command(THD *thd);
+int mysql_execute_command(THD *thd);
 bool do_command(THD *thd);
 bool dispatch_command(enum enum_server_command command, THD *thd,
 		      char* packet, uint packet_length);
@@ -652,6 +692,7 @@ void mysql_stmt_reset(THD *thd, char *packet);
 void mysql_stmt_get_longdata(THD *thd, char *pos, ulong packet_length);
 int check_insert_fields(THD *thd,TABLE *table,List<Item> &fields,
 			List<Item> &values, ulong counter);
+void reset_stmt_for_execute(THD *thd, LEX *lex);
 
 /* sql_error.cc */
 MYSQL_ERROR *push_warning(THD *thd, MYSQL_ERROR::enum_warning_level level, uint code,
@@ -760,6 +801,8 @@ extern "C" pthread_handler_decl(handle_manager, arg);
 void print_where(COND *cond,const char *info);
 void print_cached_tables(void);
 void TEST_filesort(SORT_FIELD *sortorder,uint s_length);
+void print_plan(JOIN* join, double read_time, double record_count,
+                uint idx, const char *info);
 #endif
 void mysql_print_status(THD *thd);
 /* key.cc */
@@ -828,6 +871,7 @@ extern char language[LIBLEN],reg_ext[FN_EXTLEN];
 extern char glob_hostname[FN_REFLEN], mysql_home[FN_REFLEN];
 extern char pidfile_name[FN_REFLEN], system_time_zone[30], *opt_init_file;
 extern char log_error_file[FN_REFLEN];
+extern double last_query_cost;
 extern double log_10[32];
 extern ulonglong log_10_int[20];
 extern ulonglong keybuff_size;
@@ -873,6 +917,7 @@ extern bool using_update_log, opt_large_files, server_id_supplied;
 extern bool opt_log, opt_update_log, opt_bin_log, opt_slow_log, opt_error_log;
 extern bool opt_disable_networking, opt_skip_show_db;
 extern bool volatile abort_loop, shutdown_in_progress, grant_option;
+extern bool mysql_proc_table_exists;
 extern uint volatile thread_count, thread_running, global_read_lock;
 extern my_bool opt_sql_bin_update, opt_safe_user_create, opt_no_mix_types;
 extern my_bool opt_safe_show_db, opt_local_infile;
@@ -1195,3 +1240,5 @@ bool check_stack_overrun(THD *thd,char *dummy);
 inline void kill_delayed_threads(void) {}
 #define check_stack_overrun(A, B) 0
 #endif
+
+#endif /* MYSQL_CLIENT */
diff --git a/sql/mysqld.cc b/sql/mysqld.cc
index 436f693d734..564a8090b23 100644
--- a/sql/mysqld.cc
+++ b/sql/mysqld.cc
@@ -23,6 +23,7 @@
 #include "repl_failsafe.h"
 #include "stacktrace.h"
 #include "mysqld_suffix.h"
+#include "mysys_err.h"
 #ifdef HAVE_BERKELEY_DB
 #include "ha_berkeley.h"
 #endif
@@ -40,6 +41,8 @@
 #include <thr_alarm.h>
 #include <ft_global.h>
 #include <errmsg.h>
+#include "sp_rcontext.h"
+#include "sp_cache.h"
 
 #define mysqld_charset &my_charset_latin1
 
@@ -309,6 +312,7 @@ ulong expire_logs_days = 0;
 ulong rpl_recovery_rank=0;
 ulong my_bind_addr;			/* the address we bind to */
 volatile ulong cached_thread_count= 0;
+double last_query_cost= -1; /* -1 denotes that no query was compiled yet */
 
 double log_10[32];			/* 10 potences */
 time_t start_time;
@@ -620,7 +624,7 @@ static void close_connections(void)
   {
     DBUG_PRINT("quit",("Informing thread %ld that it's time to die",
 		       tmp->thread_id));
-    tmp->killed=1;
+    tmp->killed= THD::KILL_CONNECTION;
     if (tmp->mysys_var)
     {
       tmp->mysys_var->abort=1;
@@ -803,6 +807,7 @@ static void __cdecl kill_server(int sig_ptr)
     unireg_abort(1);				/* purecov: inspected */
   else
     unireg_end();
+
 #ifdef __NETWARE__
   pthread_join(select_thread, NULL);		// wait for main thread
 #endif /* __NETWARE__ */
@@ -893,7 +898,6 @@ void clean_up(bool print_message)
 
   mysql_log.cleanup();
   mysql_slow_log.cleanup();
-  mysql_update_log.cleanup();
   mysql_bin_log.cleanup();
 
 #ifdef HAVE_REPLICATION
@@ -1424,7 +1428,7 @@ extern "C" sig_handler abort_thread(int sig __attribute__((unused)))
   THD *thd=current_thd;
   DBUG_ENTER("abort_thread");
   if (thd)
-    thd->killed=1;
+    thd->killed= THD::KILL_CONNECTION;
   DBUG_VOID_RETURN;
 }
 #endif
@@ -2068,6 +2072,10 @@ extern "C" int my_message_sql(uint error, const char *str,
   DBUG_PRINT("error", ("Message: '%s'", str));
   if ((thd= current_thd))
   {
+    if (thd->spcont && thd->spcont->find_handler(error))
+    {
+      DBUG_RETURN(0);
+    }
     /*
       thd->lex->current_select == 0 if lex structure is not inited
       (not query command (COM_QUERY))
@@ -2191,7 +2199,7 @@ bool open_log(MYSQL_LOG *log, const char *hostname,
   }
   return log->open(opt_name, type, 0, index_file_name,
 		   (read_append) ? SEQ_READ_APPEND : WRITE_CACHE,
-		   no_auto_events, max_size);
+		   no_auto_events, max_size, 0);
 }
 
 
@@ -2282,7 +2290,6 @@ static int init_common_variables(const char *conf_file_name, int argc,
     before MY_INIT(). So we do it here.
   */
   mysql_log.init_pthread_objects();
-  mysql_update_log.init_pthread_objects();
   mysql_slow_log.init_pthread_objects();
   mysql_bin_log.init_pthread_objects();
   
@@ -2417,6 +2424,7 @@ static int init_thread_environment()
   (void) pthread_mutex_init(&LOCK_rpl_status, MY_MUTEX_INIT_FAST);
   (void) pthread_cond_init(&COND_rpl_status, NULL);
 #endif
+  sp_cache_init();
   /* Parameter for threads created for connections */
   (void) pthread_attr_init(&connection_attrib);
   (void) pthread_attr_setdetachstate(&connection_attrib,
@@ -2475,9 +2483,55 @@ static int init_server_components()
 	     LOG_NORMAL, 0, 0, 0);
   if (opt_update_log)
   {
-    open_log(&mysql_update_log, glob_hostname, opt_update_logname, "",
-	     NullS, LOG_NEW, 0, 0, 0);
-    using_update_log=1;
+    /*
+      Update log is removed since 5.0. But we still accept the option.
+      The idea is if the user already uses the binlog and the update log,
+      we completely ignore any option/variable related to the update log, like
+      if the update log did not exist. But if the user uses only the update log, 
+      then we translate everything into binlog for him (with warnings).
+      Implementation of the above :
+      - If mysqld is started with --log-update and --log-bin,
+      ignore --log-update (print a warning), push a warning when SQL_LOG_UPDATE
+      is used, and turn off --sql-bin-update-same.
+      This will completely ignore SQL_LOG_UPDATE
+      - If mysqld is started with --log-update only,
+      change it to --log-bin (with the filename passed to log-update,
+      plus '-bin') (print a warning), push a warning when SQL_LOG_UPDATE is
+      used, and turn on --sql-bin-update-same.
+      This will translate SQL_LOG_UPDATE to SQL_LOG_BIN.
+
+      Note that we tell the user that --sql-bin-update-same is deprecated and
+      does nothing, and we don't take into account if he used this option or
+      not; but internally we give this variable a value to have the behaviour we
+      want (i.e. have SQL_LOG_UPDATE influence SQL_LOG_BIN or not).
+      As sql-bin-update-same, log-update and log-bin cannot be changed by the user 
+      after starting the server (they are not variables), the user will not
+      later interfere with the settings we do here.
+    */
+    if (opt_bin_log)
+    {
+      opt_sql_bin_update= 0;
+      sql_print_error("The update log is no longer supported by MySQL in \
+version 5.0 and above. It is replaced by the binary log.");
+    }
+    else
+    {
+      opt_sql_bin_update= 1;
+      opt_bin_log= 1;
+      if (opt_update_logname)
+      {
+        // as opt_bin_log==0, no need to free opt_bin_logname
+        if (!(opt_bin_logname= my_strdup(opt_update_logname, MYF(MY_WME))))
+          exit(EXIT_OUT_OF_MEMORY);
+        sql_print_error("The update log is no longer supported by MySQL in \
+version 5.0 and above. It is replaced by the binary log. Now starting MySQL \
+with --log-bin='%s' instead.",opt_bin_logname);
+      }
+      else
+        sql_print_error("The update log is no longer supported by MySQL in \
+version 5.0 and above. It is replaced by the binary log. Now starting MySQL \
+with --log-bin instead.");
+    }
   }
   if (opt_slow_log)
     open_log(&mysql_slow_log, glob_hostname, opt_slow_logname, "-slow.log",
@@ -3022,7 +3076,8 @@ default_service_handling(char **argv,
 			 const char *servicename,
 			 const char *displayname,
 			 const char *file_path,
-			 const char *extra_opt)
+			 const char *extra_opt,
+			 const char *account_name)
 {
   char path_and_service[FN_REFLEN+FN_REFLEN+32], *pos, *end;
   end= path_and_service + sizeof(path_and_service)-3;
@@ -3041,12 +3096,12 @@ default_service_handling(char **argv,
 
   if (Service.got_service_option(argv, "install"))
   {
-    Service.Install(1, servicename, displayname, path_and_service);
+    Service.Install(1, servicename, displayname, path_and_service, account_name);
     return 0;
   }
   if (Service.got_service_option(argv, "install-manual"))
   {
-    Service.Install(0, servicename, displayname, path_and_service);
+    Service.Install(0, servicename, displayname, path_and_service, account_name);
     return 0;
   }
   if (Service.got_service_option(argv, "remove"))
@@ -3081,7 +3136,7 @@ int main(int argc, char **argv)
     if (argc == 2)
     {
       if (!default_service_handling(argv, MYSQL_SERVICENAME, MYSQL_SERVICENAME,
-				   file_path, ""))
+				   file_path, "", NULL))
 	return 0;
       if (Service.IsService(argv[1]))        /* Start an optional service */
       {
@@ -3100,7 +3155,7 @@ int main(int argc, char **argv)
     }
     else if (argc == 3) /* install or remove any optional service */
     {
-      if (!default_service_handling(argv, argv[2], argv[2], file_path, ""))
+      if (!default_service_handling(argv, argv[2], argv[2], file_path, "", NULL))
 	return 0;
       if (Service.IsService(argv[2]))
       {
@@ -3118,15 +3173,39 @@ int main(int argc, char **argv)
 	return 0;
       }
     }
-    else if (argc == 4)
+    else if (argc >= 4)
     {
-      /*
-	Install an optional service with optional config file
-	mysqld --install-manual mysqldopt --defaults-file=c:\miguel\my.ini
-      */
-      if (!default_service_handling(argv, argv[2], argv[2], file_path,
-				    argv[3]))
-	return 0;
+      const char *defaults_file = "--defaults-file";
+      const char *service = "--local-service";
+      char extra_opt[FN_REFLEN] = "";  
+      char *account_name = NULL;
+      char *option;
+      int index;
+      for (index = 3; index < argc; index++)
+      {
+        option= argv[index];
+        /* 
+          Install an optional service with optional config file
+          mysqld --install-manual mysqldopt --defaults-file=c:\miguel\my.ini
+        */
+        if (strncmp(option, defaults_file, strlen(defaults_file)) == 0)
+        {
+          strmov(extra_opt, option);	  
+        }
+        else
+        /* 
+          Install an optional service as local service
+          mysqld --install-manual mysqldopt --local-service
+        */
+        if (strncmp(option, service, strlen(service)) == 0)
+        {
+          account_name=(char*)malloc(27);
+          strmov(account_name, "NT AUTHORITY\\LocalService\0");
+        }
+      }
+
+      if (!default_service_handling(argv, argv[2], argv[2], file_path, extra_opt, account_name))
+        return 0;
     }
     else if (argc == 1 && Service.IsService(MYSQL_SERVICENAME))
     {
@@ -3267,7 +3346,7 @@ static void create_new_thread(THD *thd)
 		   ("Can't create thread to handle request (error %d)",
 		    error));
 	thread_count--;
-	thd->killed=1;				// Safety
+	thd->killed= THD::KILL_CONNECTION;				// Safety
 	(void) pthread_mutex_unlock(&LOCK_thread_count);
 	statistic_increment(aborted_connects,&LOCK_status);
 	net_printf(thd,ER_CANT_CREATE_THREAD,error);
@@ -3947,7 +4026,9 @@ enum options_mysqld
   OPT_TIME_FORMAT,
   OPT_DATETIME_FORMAT,
   OPT_LOG_QUERIES_NOT_USING_INDEXES,
-  OPT_DEFAULT_TIME_ZONE
+  OPT_DEFAULT_TIME_ZONE,
+  OPT_OPTIMIZER_SEARCH_DEPTH,
+  OPT_OPTIMIZER_PRUNE_LEVEL
 };
 
 
@@ -4178,7 +4259,8 @@ Disable with --skip-bdb (will save memory).",
    (gptr*) &myisam_log_filename, (gptr*) &myisam_log_filename, 0, GET_STR,
    OPT_ARG, 0, 0, 0, 0, 0, 0},
   {"log-update", OPT_UPDATE_LOG,
-   "Log updates to file.# where # is a unique number if not given.",
+   "The update log is deprecated since version 5.0, is replaced by the binary \
+log and this option justs turns on --log-bin instead.",
    (gptr*) &opt_update_logname, (gptr*) &opt_update_logname, 0, GET_STR,
    OPT_ARG, 0, 0, 0, 0, 0, 0},
   {"log-slow-queries", OPT_SLOW_QUERY_LOG,
@@ -4466,9 +4548,9 @@ replicating a LOAD DATA INFILE command.",
    (gptr*) &mysqld_unix_port, (gptr*) &mysqld_unix_port, 0, GET_STR,
    REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
   {"sql-bin-update-same", OPT_SQL_BIN_UPDATE_SAME,
-   "If set, setting SQL_LOG_BIN to a value will automatically set SQL_LOG_UPDATE to the same value and vice versa.",
-   (gptr*) &opt_sql_bin_update, (gptr*) &opt_sql_bin_update, 0, GET_BOOL,
-   NO_ARG, 0, 0, 0, 0, 0, 0},
+   "The update log is deprecated since version 5.0, is replaced by the binary \
+log and this option does nothing anymore.",
+   0, 0, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},
   {"sql-mode", OPT_SQL_MODE,
    "Syntax: sql-mode=option[,option[,option...]] where option can be one of: REAL_AS_FLOAT, PIPES_AS_CONCAT, ANSI_QUOTES, IGNORE_SPACE, ONLY_FULL_GROUP_BY, NO_UNSIGNED_SUBTRACTION.",
    (gptr*) &sql_mode_str, (gptr*) &sql_mode_str, 0, GET_STR, REQUIRED_ARG, 0,
@@ -4846,6 +4928,16 @@ The minimum value for this variable is 4096.",
    "If this is not 0, then mysqld will use this value to reserve file descriptors to use with setrlimit(). If this value is 0 then mysqld will reserve max_connections*5 or max_connections + table_cache*2 (whichever is larger) number of files.",
    (gptr*) &open_files_limit, (gptr*) &open_files_limit, 0, GET_ULONG,
    REQUIRED_ARG, 0, 0, OS_FILE_LIMIT, 0, 1, 0},
+  {"optimizer_prune_level", OPT_OPTIMIZER_PRUNE_LEVEL,
+   "Controls the heuristic(s) applied during query optimization to prune less-promising partial plans from the optimizer search space. Meaning: 0 - do not apply any heuristic, thus perform exhaustive search; 1 - prune plans based on number of retrieved rows.",
+   (gptr*) &global_system_variables.optimizer_prune_level,
+   (gptr*) &max_system_variables.optimizer_prune_level,
+   0, GET_ULONG, OPT_ARG, 1, 0, 1, 0, 1, 0},
+  {"optimizer_search_depth", OPT_OPTIMIZER_SEARCH_DEPTH,
+   "Maximum depth of search performed by the query optimizer. Values larger than the number of relations in a query result in better query plans, but take longer to compile a query. Smaller values than the number of tables in a relation result in faster optimization, but may produce very bad query plans. If set to 0, the system will automatically pick a reasonable value; if set to MAX_TABLES+2, the optimizer will switch to the original find_best (used for testing/comparison).",
+   (gptr*) &global_system_variables.optimizer_search_depth,
+   (gptr*) &max_system_variables.optimizer_search_depth,
+   0, GET_ULONG, OPT_ARG, MAX_TABLES+1, 0, MAX_TABLES+2, 0, 1, 0},
    {"preload_buffer_size", OPT_PRELOAD_BUFFER_SIZE,
     "The size of the buffer that is allocated when preloading indexes",
     (gptr*) &global_system_variables.preload_buff_size,
@@ -5148,6 +5240,7 @@ struct show_var_st status_vars[]= {
    SHOW_KEY_CACHE_LONG},
   {"Key_writes",               (char*) &dflt_key_cache_var.global_cache_write,
    SHOW_KEY_CACHE_LONG},
+  {"Last_query_cost",          (char*) &last_query_cost,        SHOW_DOUBLE},
   {"Max_used_connections",     (char*) &max_used_connections,  SHOW_LONG},
   {"Not_flushed_delayed_rows", (char*) &delayed_rows_in_use,    SHOW_LONG_CONST},
   {"Open_files",               (char*) &my_file_opened,         SHOW_LONG_CONST},
diff --git a/sql/net_serv.cc b/sql/net_serv.cc
index c2da47b480e..36a10370508 100644
--- a/sql/net_serv.cc
+++ b/sql/net_serv.cc
@@ -129,6 +129,7 @@ my_bool my_net_init(NET *net, Vio* vio)
   net->buff_end=net->buff+net->max_packet;
   net->vio = vio;
   net->no_send_ok = 0;
+  net->no_send_eof = 0;
   net->error=0; net->return_errno=0; net->return_status=0;
   net->pkt_nr=net->compress_pkt_nr=0;
   net->write_pos=net->read_pos = net->buff;
diff --git a/sql/opt_range.cc b/sql/opt_range.cc
index 32a0391a777..08eae68c855 100644
--- a/sql/opt_range.cc
+++ b/sql/opt_range.cc
@@ -23,6 +23,19 @@
 
 */
 
+/*
+  Classes in this file are used in the following way:
+  1. For a selection condition a tree of SEL_IMERGE/SEL_TREE/SEL_ARG objects 
+     is created. #of rows in table and index statistics are ignored at this 
+     step.
+  2. Created SEL_TREE and index stats data are used to construct a 
+     TABLE_READ_PLAN-derived object (TRP_*). Several 'candidate' table read 
+     plans may be created. 
+  3. The least expensive table read plan is used to create a tree of 
+     QUICK_SELECT_I-derived objects which are later used for row retrieval.
+     QUICK_RANGEs are also created in this step.
+*/
+
 #ifdef __GNUC__
 #pragma implementation				// gcc: Class implementation
 #endif
@@ -167,8 +180,7 @@ public:
     min_value=arg->max_value;
     min_flag=arg->max_flag & NEAR_MAX ? 0 : NEAR_MIN;
   }
-  void store(uint length,char **min_key,uint min_key_flag,
-	     char **max_key, uint max_key_flag)
+  void store_min(uint length,char **min_key,uint min_key_flag)
   {
     if ((min_flag & GEOM_FLAG) ||
         (!(min_flag & NO_MIN_RANGE) &&
@@ -183,6 +195,11 @@ public:
 	memcpy(*min_key,min_value,length);
       (*min_key)+= length;
     }
+  }
+  void store(uint length,char **min_key,uint min_key_flag,
+	     char **max_key, uint max_key_flag)
+  {
+    store_min(length, min_key, min_key_flag);
     if (!(max_flag & NO_MAX_RANGE) &&
 	!(max_key_flag & (NO_MAX_RANGE | NEAR_MAX)))
     {
@@ -267,31 +284,77 @@ public:
   SEL_ARG *clone_tree();
 };
 
+class SEL_IMERGE;
+
 
 class SEL_TREE :public Sql_alloc
 {
 public:
   enum Type { IMPOSSIBLE, ALWAYS, MAYBE, KEY, KEY_SMALLER } type;
   SEL_TREE(enum Type type_arg) :type(type_arg) {}
-  SEL_TREE() :type(KEY) { bzero((char*) keys,sizeof(keys));}
+  SEL_TREE() :type(KEY) 
+  {
+    keys_map.clear_all(); 
+    bzero((char*) keys,sizeof(keys));
+  }
   SEL_ARG *keys[MAX_KEY];
+  key_map keys_map;        /* bitmask of non-NULL elements in keys */
+
+  /* 
+    Possible ways to read rows using index_merge. The list is non-empty only 
+    if type==KEY. Currently can be non empty only if keys_map.is_clear_all().
+  */
+  List<SEL_IMERGE> merges;
+  
+  /* The members below are filled/used only after get_mm_tree is done */
+  key_map ror_scans_map;   /* bitmask of ROR scan-able elements in keys */
+  uint    n_ror_scans;     /* number of set bits in ror_scans_map */
+
+  struct st_ror_scan_info **ror_scans;     /* list of ROR key scans */
+  struct st_ror_scan_info **ror_scans_end; /* last ROR scan */
+  /* Note that #records for each key scan is stored in table->quick_rows */
 };
 
 
 typedef struct st_qsel_param {
   THD	*thd;
   TABLE *table;
-  KEY_PART *key_parts,*key_parts_end,*key[MAX_KEY];
+  KEY_PART *key_parts,*key_parts_end;
+  KEY_PART *key[MAX_KEY]; /* First key parts of keys used in the query */
   MEM_ROOT *mem_root;
   table_map prev_tables,read_tables,current_table;
-  uint baseflag, keys, max_key_part, range_count;
-  uint real_keynr[MAX_KEY];
+  uint baseflag, max_key_part, range_count;
+    
+  uint keys; /* number of keys used in the query */
+
+  /* used_key_no -> table_key_no translation table */
+  uint real_keynr[MAX_KEY]; 
+
   char min_key[MAX_KEY_LENGTH+MAX_FIELD_WIDTH],
     max_key[MAX_KEY_LENGTH+MAX_FIELD_WIDTH];
   bool quick;				// Don't calulate possible keys
   COND *cond;
+
+  uint fields_bitmap_size;    
+  MY_BITMAP needed_fields;    /* bitmask of fields needed by the query */
+
+  key_map *needed_reg;        /* ptr to SQL_SELECT::needed_reg */
+
+  uint *imerge_cost_buff;     /* buffer for index_merge cost estimates */
+  uint imerge_cost_buff_size; /* size of the buffer */
+  
+ /* true if last checked tree->key can be used for ROR-scan */ 
+  bool is_ror_scan; 
 } PARAM;
 
+class TABLE_READ_PLAN;
+  class TRP_RANGE;
+  class TRP_ROR_INTERSECT;
+  class TRP_ROR_UNION;
+  class TRP_ROR_INDEX_MERGE;
+
+struct st_ror_scan_info;
+
 static SEL_TREE * get_mm_parts(PARAM *param,COND *cond_func,Field *field,
 			       Item_func::Functype type,Item *value,
 			       Item_result cmp_type);
@@ -299,32 +362,277 @@ static SEL_ARG *get_mm_leaf(PARAM *param,COND *cond_func,Field *field,
 			    KEY_PART *key_part,
 			    Item_func::Functype type,Item *value);
 static SEL_TREE *get_mm_tree(PARAM *param,COND *cond);
+
+static bool is_key_scan_ror(PARAM *param, uint keynr, uint8 nparts);
 static ha_rows check_quick_select(PARAM *param,uint index,SEL_ARG *key_tree);
 static ha_rows check_quick_keys(PARAM *param,uint index,SEL_ARG *key_tree,
 				char *min_key,uint min_key_flag,
 				char *max_key, uint max_key_flag);
 
-static QUICK_SELECT *get_quick_select(PARAM *param,uint index,
-				      SEL_ARG *key_tree);
+QUICK_RANGE_SELECT *get_quick_select(PARAM *param,uint index,
+                                     SEL_ARG *key_tree, 
+                                     MEM_ROOT *alloc = NULL);
+static TRP_RANGE *get_key_scans_params(PARAM *param, SEL_TREE *tree,
+                                       bool index_read_must_be_used, 
+                                       double read_time);
+static
+TRP_ROR_INTERSECT *get_best_ror_intersect(const PARAM *param, SEL_TREE *tree,
+                                          double read_time,
+                                          bool *are_all_covering);
+static
+TRP_ROR_INTERSECT *get_best_covering_ror_intersect(PARAM *param, 
+                                                   SEL_TREE *tree, 
+                                                   double read_time);
+static
+TABLE_READ_PLAN *get_best_disjunct_quick(PARAM *param, SEL_IMERGE *imerge,
+                                         double read_time);
+static int get_index_merge_params(PARAM *param, key_map& needed_reg,
+                           SEL_IMERGE *imerge, double *read_time, 
+                           ha_rows* imerge_rows);
+inline double get_index_only_read_time(const PARAM* param, ha_rows records, 
+                                       int keynr);
+
 #ifndef DBUG_OFF
-static void print_quick(QUICK_SELECT *quick,const key_map* needed_reg);
+static void print_sel_tree(PARAM *param, SEL_TREE *tree, key_map *tree_map,
+                           const char *msg);
+static void print_ror_scans_arr(TABLE *table, const char *msg, 
+                                struct st_ror_scan_info **start, 
+                                struct st_ror_scan_info **end);
+static void print_rowid(byte* val, int len);
+static void print_quick(QUICK_SELECT_I *quick, const key_map *needed_reg);
 #endif
+
 static SEL_TREE *tree_and(PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2);
 static SEL_TREE *tree_or(PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2);
 static SEL_ARG *sel_add(SEL_ARG *key1,SEL_ARG *key2);
 static SEL_ARG *key_or(SEL_ARG *key1,SEL_ARG *key2);
 static SEL_ARG *key_and(SEL_ARG *key1,SEL_ARG *key2,uint clone_flag);
 static bool get_range(SEL_ARG **e1,SEL_ARG **e2,SEL_ARG *root1);
-static bool get_quick_keys(PARAM *param,QUICK_SELECT *quick,KEY_PART *key,
+bool get_quick_keys(PARAM *param,QUICK_RANGE_SELECT *quick,KEY_PART *key,
 			   SEL_ARG *key_tree,char *min_key,uint min_key_flag,
 			   char *max_key,uint max_key_flag);
 static bool eq_tree(SEL_ARG* a,SEL_ARG *b);
 
 static SEL_ARG null_element(SEL_ARG::IMPOSSIBLE);
-static bool null_part_in_key(KEY_PART *key_part, const char *key, uint length);
+static bool null_part_in_key(KEY_PART *key_part, const char *key, 
+                             uint length);
+bool sel_trees_can_be_ored(SEL_TREE *tree1, SEL_TREE *tree2, PARAM* param);
+
+
+/*
+  SEL_IMERGE is a list of possible ways to do index merge, i.e. it is 
+  a condition in the following form:
+   (t_1||t_2||...||t_N) && (next) 
+
+  where all t_i are SEL_TREEs, next is another SEL_IMERGE and no pair 
+  (t_i,t_j) contains SEL_ARGS for the same index.
+
+  SEL_TREE contained in SEL_IMERGE always has merges=NULL.
+
+  This class relies on memory manager to do the cleanup.
+*/
+
+class SEL_IMERGE : public Sql_alloc
+{
+  enum { PREALLOCED_TREES= 10};
+public:
+  SEL_TREE *trees_prealloced[PREALLOCED_TREES];  
+  SEL_TREE **trees;             /* trees used to do index_merge   */
+  SEL_TREE **trees_next;        /* last of these trees            */
+  SEL_TREE **trees_end;         /* end of allocated space         */
+
+  SEL_ARG  ***best_keys;        /* best keys to read in SEL_TREEs */
+
+  SEL_IMERGE() :
+    trees(&trees_prealloced[0]),
+    trees_next(trees),
+    trees_end(trees + PREALLOCED_TREES)
+  {}
+  int or_sel_tree(PARAM *param, SEL_TREE *tree);
+  int or_sel_tree_with_checks(PARAM *param, SEL_TREE *new_tree);
+  int or_sel_imerge_with_checks(PARAM *param, SEL_IMERGE* imerge);
+};
+
+
+/* 
+  Add SEL_TREE to this index_merge without any checks,
+
+  NOTES 
+    This function implements the following: 
+      (x_1||...||x_N) || t = (x_1||...||x_N||t), where x_i, t are SEL_TREEs
+
+  RETURN
+     0 - OK
+    -1 - Out of memory.
+*/
+
+int SEL_IMERGE::or_sel_tree(PARAM *param, SEL_TREE *tree)
+{
+  if (trees_next == trees_end)
+  {
+    const int realloc_ratio= 2;		/* Double size for next round */
+    uint old_elements= (trees_end - trees);
+    uint old_size= sizeof(SEL_TREE**) * old_elements;
+    uint new_size= old_size * realloc_ratio;
+    SEL_TREE **new_trees;
+    if (!(new_trees= (SEL_TREE**)alloc_root(param->mem_root, new_size)))
+      return -1;
+    memcpy(new_trees, trees, old_size);
+    trees=      new_trees;
+    trees_next= trees + old_elements;
+    trees_end=  trees + old_elements * realloc_ratio;
+  }
+  *(trees_next++)= tree;
+  return 0;
+}
+
+
+/*
+  Perform OR operation on this SEL_IMERGE and supplied SEL_TREE new_tree,
+  combining new_tree with one of the trees in this SEL_IMERGE if they both
+  have SEL_ARGs for the same key.
+ 
+  SYNOPSIS
+    or_sel_tree_with_checks()
+      param    PARAM from SQL_SELECT::test_quick_select
+      new_tree SEL_TREE with type KEY or KEY_SMALLER.
+
+  NOTES 
+    This does the following:
+    (t_1||...||t_k)||new_tree = 
+     either 
+       = (t_1||...||t_k||new_tree)
+     or
+       = (t_1||....||(t_j|| new_tree)||...||t_k),
+    
+     where t_i, y are SEL_TREEs.
+    new_tree is combined with the first t_j it has a SEL_ARG on common 
+    key with. As a consequence of this, choice of keys to do index_merge 
+    read may depend on the order of conditions in WHERE part of the query.
+
+  RETURN 
+    0  OK
+    1  One of the trees was combined with new_tree to SEL_TREE::ALWAYS, 
+       and (*this) should be discarded.
+   -1  An error occurred.
+*/
+
+int SEL_IMERGE::or_sel_tree_with_checks(PARAM *param, SEL_TREE *new_tree)
+{
+  for (SEL_TREE** tree = trees;
+       tree != trees_next;
+       tree++)
+  {
+    if (sel_trees_can_be_ored(*tree, new_tree, param))
+    {
+      *tree = tree_or(param, *tree, new_tree);
+      if (!*tree)
+        return 1;
+      if (((*tree)->type == SEL_TREE::MAYBE) ||
+          ((*tree)->type == SEL_TREE::ALWAYS))
+        return 1;
+      /* SEL_TREE::IMPOSSIBLE is impossible here */
+      return 0;
+    }
+  }
+
+  /* New tree cannot be combined with any of existing trees. */
+  return or_sel_tree(param, new_tree);
+}
+
+
+/*
+  Perform OR operation on this index_merge and supplied index_merge list.
+
+  RETURN
+    0 - OK
+    1 - One of conditions in result is always TRUE and this SEL_IMERGE 
+        should be discarded.
+   -1 - An error occurred
+*/
+
+int SEL_IMERGE::or_sel_imerge_with_checks(PARAM *param, SEL_IMERGE* imerge)
+{
+  for (SEL_TREE** tree= imerge->trees;
+       tree != imerge->trees_next;
+       tree++)
+  {
+    if (or_sel_tree_with_checks(param, *tree))
+      return 1;
+  }
+  return 0;
+}
+
+
+/* 
+  Perform AND operation on two index_merge lists and store result in *im1.
+*/
+
+inline void imerge_list_and_list(List<SEL_IMERGE> *im1, List<SEL_IMERGE> *im2)
+{
+  im1->concat(im2);
+}
+
+
+/*
+  Perform OR operation on 2 index_merge lists, storing result in first list.
+
+  NOTES 
+    The following conversion is implemented:
+     (a_1 &&...&& a_N)||(b_1 &&...&& b_K) = AND_i,j(a_i || b_j) =>
+      => (a_1||b_1).
+     
+    i.e. all conjuncts except the first one are currently dropped. 
+    This is done to avoid producing N*K ways to do index_merge.
+
+    If (a_1||b_1) produce a condition that is always true, NULL is returned
+    and index_merge is discarded (while it is actually possible to try 
+    harder).
+
+    As a consequence of this, choice of keys to do index_merge read may depend
+    on the order of conditions in WHERE part of the query.
+
+  RETURN
+    0     OK, result is stored in *im1
+    other Error, both passed lists are unusable
+*/
+
+int imerge_list_or_list(PARAM *param, 
+                        List<SEL_IMERGE> *im1,
+                        List<SEL_IMERGE> *im2)
+{
+  SEL_IMERGE *imerge= im1->head();
+  im1->empty();
+  im1->push_back(imerge);
+  
+  return imerge->or_sel_imerge_with_checks(param, im2->head());
+}
+
+
+/*
+  Perform OR operation on index_merge list and key tree.
+
+  RETURN
+    0     OK, result is stored in *im1.
+    other Error
+*/
+
+int imerge_list_or_tree(PARAM *param, 
+                        List<SEL_IMERGE> *im1,
+                        SEL_TREE *tree)
+{
+  SEL_IMERGE *imerge;
+  List_iterator<SEL_IMERGE> it(*im1);
+  while((imerge= it++))
+  {
+    if (imerge->or_sel_tree_with_checks(param, tree))
+      it.remove();
+  }
+  return im1->is_empty();
+}
 
 /***************************************************************************
-** Basic functions for SQL_SELECT and QUICK_SELECT
+** Basic functions for SQL_SELECT and QUICK_RANGE_SELECT
 ***************************************************************************/
 
 	/* make a select from mysql info
@@ -392,11 +700,21 @@ SQL_SELECT::~SQL_SELECT()
 
 #undef index					// Fix for Unixware 7
 
-QUICK_SELECT::QUICK_SELECT(THD *thd, TABLE *table, uint key_nr, bool no_alloc)
-  :dont_free(0),error(0),index(key_nr),max_used_key_length(0),
-   used_key_parts(0), head(table), it(ranges),range(0)
+QUICK_SELECT_I::QUICK_SELECT_I()
+  :max_used_key_length(0),
+   used_key_parts(0)
+{}
+
+QUICK_RANGE_SELECT::QUICK_RANGE_SELECT(THD *thd, TABLE *table, uint key_nr, 
+                                       bool no_alloc, MEM_ROOT *parent_alloc)
+  :dont_free(0),error(0),free_file(0),cur_range(NULL),range(0)
 {
-  if (!no_alloc)
+  index= key_nr;
+  head=  table;
+  key_part_info= head->key_info[index].key_part;
+  my_init_dynamic_array(&ranges, sizeof(QUICK_RANGE*), 16, 16);
+
+  if (!no_alloc && !parent_alloc)
   {
     // Allocates everything through the internal memroot
     init_sql_alloc(&alloc, thd->variables.range_alloc_block_size, 0);
@@ -404,21 +722,415 @@ QUICK_SELECT::QUICK_SELECT(THD *thd, TABLE *table, uint key_nr, bool no_alloc)
   }
   else
     bzero((char*) &alloc,sizeof(alloc));
-  file=head->file;
-  record=head->record[0];
-  init();
+  file= head->file;
+  record= head->record[0];
 }
 
-QUICK_SELECT::~QUICK_SELECT()
+int QUICK_RANGE_SELECT::init()
 {
+  DBUG_ENTER("QUICK_RANGE_SELECT::init");
+  DBUG_RETURN(error= file->ha_index_init(index));
+}
+
+QUICK_RANGE_SELECT::~QUICK_RANGE_SELECT()
+{  
+  DBUG_ENTER("QUICK_RANGE_SELECT::~QUICK_RANGE_SELECT");
   if (!dont_free)
   {
     if (file->inited)
       file->ha_index_end();
-    free_root(&alloc,MYF(0));
+    file->extra(HA_EXTRA_NO_KEYREAD);
+    delete_dynamic(&ranges); /* ranges are allocated in alloc */ 
+    if (free_file) 
+    {
+      DBUG_PRINT("info", ("Freeing separate handler %p (free=%d)", file,
+                          free_file));
+      file->reset(); 
+      file->close(); 
+    } 
+    free_root(&alloc,MYF(0)); 
+  } 
+  DBUG_VOID_RETURN; 
+}
+
+
+QUICK_INDEX_MERGE_SELECT::QUICK_INDEX_MERGE_SELECT(THD *thd_param, 
+                                                   TABLE *table)
+  :cur_quick_it(quick_selects),pk_quick_select(NULL),unique(NULL),
+   thd(thd_param)
+{
+  DBUG_ENTER("QUICK_INDEX_MERGE_SELECT::QUICK_INDEX_MERGE_SELECT");
+  index= MAX_KEY;
+  head= table;
+  bzero(&read_record, sizeof(read_record));
+  init_sql_alloc(&alloc,1024,0);
+  DBUG_VOID_RETURN;
+}
+
+int QUICK_INDEX_MERGE_SELECT::init()
+{
+  cur_quick_it.rewind();
+  cur_quick_select= cur_quick_it++;
+  return 0;
+}
+
+int QUICK_INDEX_MERGE_SELECT::reset()
+{
+  int result;
+  DBUG_ENTER("QUICK_INDEX_MERGE_SELECT::reset");
+  result= cur_quick_select->reset() || prepare_unique();
+  DBUG_RETURN(result);
+}
+
+bool 
+QUICK_INDEX_MERGE_SELECT::push_quick_back(QUICK_RANGE_SELECT *quick_sel_range)
+{
+  /* 
+    Save quick_select that does scan on clustered primary key as it will be 
+    processed separately.
+  */
+  if (head->file->primary_key_is_clustered() && 
+      quick_sel_range->index == head->primary_key)
+    pk_quick_select= quick_sel_range;
+  else
+    return quick_selects.push_back(quick_sel_range);
+  return 0;
+}
+
+QUICK_INDEX_MERGE_SELECT::~QUICK_INDEX_MERGE_SELECT()
+{
+  DBUG_ENTER("QUICK_INDEX_MERGE_SELECT::~QUICK_INDEX_MERGE_SELECT");
+  delete unique;
+  quick_selects.delete_elements();
+  delete pk_quick_select;
+  free_root(&alloc,MYF(0));
+  DBUG_VOID_RETURN;
+}
+
+
+QUICK_ROR_INTERSECT_SELECT::QUICK_ROR_INTERSECT_SELECT(THD *thd_param,
+                                                       TABLE *table,
+                                                       bool retrieve_full_rows,
+                                                       MEM_ROOT *parent_alloc)
+  : cpk_quick(NULL), thd(thd_param), need_to_fetch_row(retrieve_full_rows)
+{
+  index= MAX_KEY;
+  head= table;  
+  record= head->record[0];
+  if (!parent_alloc)
+    init_sql_alloc(&alloc,1024,0);
+  else
+    bzero(&alloc, sizeof(MEM_ROOT));
+  last_rowid= (byte*)alloc_root(parent_alloc? parent_alloc : &alloc, 
+                                head->file->ref_length);
+}
+
+
+/* 
+  Do post-constructor initialization.
+  SYNOPSIS
+    QUICK_ROR_INTERSECT_SELECT::init()
+  
+  RETURN
+    0      OK
+    other  Error code
+*/
+
+int QUICK_ROR_INTERSECT_SELECT::init()
+{
+  /* Check if last_rowid was successfully allocated in ctor */
+  return !last_rowid;
+}
+
+
+/*
+  Initialize this quick select to be a ROR-merged scan.
+
+  SYNOPSIS
+    QUICK_RANGE_SELECT::init_ror_merged_scan()
+      reuse_handler If true, use head->file, otherwise create a separate
+                    handler object
+
+  NOTES
+    This function creates and prepares for subsequent use a separate handler
+    object if it can't reuse head->file. The reason for this is that during 
+    ROR-merge several key scans are performed simultaneously, and a single
+    handler is only capable of preserving context of a single key scan.
+
+    In ROR-merge the quick select doing merge does full records retrieval, 
+    merged quick selects read only keys.
+     
+  RETURN 
+    0  ROR child scan initialized, ok to use.
+    1  error
+*/
+
+int QUICK_RANGE_SELECT::init_ror_merged_scan(bool reuse_handler)
+{
+  handler *save_file= file;
+  DBUG_ENTER("QUICK_RANGE_SELECT::init_ror_merged_scan");
+  
+  if (reuse_handler)
+  {
+    DBUG_PRINT("info", ("Reusing handler %p", file));
+    if (file->extra(HA_EXTRA_KEYREAD) ||
+        file->extra(HA_EXTRA_RETRIEVE_ALL_COLS) |
+        init() || reset())
+    {
+      DBUG_RETURN(1);
+    }
+    else
+      DBUG_RETURN(0);
+  }
+
+  /* Create a separate handler object for this quick select */
+  if (free_file)
+  {
+    /* already have own 'handler' object. */
+    DBUG_RETURN(0);
   }
+  
+  if (!(file= get_new_handler(head, head->db_type)))
+    goto failure;
+  DBUG_PRINT("info", ("Allocated new handler %p", file));
+  if (file->ha_open(head->path, head->db_stat, HA_OPEN_IGNORE_IF_LOCKED))
+  {
+    /* Caller will free the memory */ 
+    goto failure;
+  }
+  
+  if (file->extra(HA_EXTRA_KEYREAD) || 
+      file->extra(HA_EXTRA_RETRIEVE_ALL_COLS) ||
+      init() || reset())
+  {
+    file->close();
+    goto failure;
+  }
+  free_file= true;
+  last_rowid= file->ref;
+  DBUG_RETURN(0);
+
+failure:
+  file= save_file;
+  DBUG_RETURN(1);
+}
+
+
+/*
+  Initialize this quick select to be a part of a ROR-merged scan.
+  SYNOPSIS
+    QUICK_ROR_INTERSECT_SELECT::init_ror_merged_scan()
+      reuse_handler If true, use head->file, otherwise create separate
+                    handler object.
+  RETURN 
+    0     OK
+    other error code
+*/
+int QUICK_ROR_INTERSECT_SELECT::init_ror_merged_scan(bool reuse_handler)
+{
+  List_iterator_fast<QUICK_RANGE_SELECT> quick_it(quick_selects);
+  QUICK_RANGE_SELECT* quick;
+  DBUG_ENTER("QUICK_ROR_INTERSECT_SELECT::init_ror_merged_scan");
+
+  /* Initialize all merged "children" quick selects */
+  DBUG_ASSERT(!(need_to_fetch_row && !reuse_handler));
+  if (!need_to_fetch_row && reuse_handler)
+  {
+    quick= quick_it++;
+    /* 
+      There is no use of this->file. Use it for the first of merged range
+      selects.
+    */
+    if (quick->init_ror_merged_scan(true))
+      DBUG_RETURN(1);
+    quick->file->extra(HA_EXTRA_KEYREAD_PRESERVE_FIELDS);
+  }
+  while((quick= quick_it++))
+  {
+    if (quick->init_ror_merged_scan(false))
+      DBUG_RETURN(1);
+    quick->file->extra(HA_EXTRA_KEYREAD_PRESERVE_FIELDS);
+    /* All merged scans share the same record buffer in intersection. */
+    quick->record= head->record[0];
+  }
+
+  if (need_to_fetch_row && head->file->rnd_init())
+  {
+    DBUG_PRINT("error", ("ROR index_merge rnd_init call failed"));
+    DBUG_RETURN(1);
+  }
+  DBUG_RETURN(0);
 }
 
+
+/* 
+  Initialize quick select for row retrieval.
+  SYNOPSIS
+    reset()
+  RETURN
+    0      OK
+    other  Error code
+*/
+
+int QUICK_ROR_INTERSECT_SELECT::reset()
+{
+  int result;
+  DBUG_ENTER("QUICK_ROR_INTERSECT_SELECT::reset");
+  result= init_ror_merged_scan(true);
+  DBUG_RETURN(result);
+}
+
+
+/*
+  Add a merged quick select to this ROR-intersection quick select.
+  
+  SYNOPSIS
+    QUICK_ROR_INTERSECT_SELECT::push_quick_back()
+      quick Quick select to be added. The quick select must return
+            rows in rowid order.
+  NOTES
+    This call can only be made before init() is called.
+    
+  RETURN
+    false OK    
+    true  Out of memory.
+*/
+
+bool 
+QUICK_ROR_INTERSECT_SELECT::push_quick_back(QUICK_RANGE_SELECT *quick)
+{
+  return quick_selects.push_back(quick);
+}
+
+QUICK_ROR_INTERSECT_SELECT::~QUICK_ROR_INTERSECT_SELECT()
+{  
+  DBUG_ENTER("QUICK_ROR_INTERSECT_SELECT::~QUICK_ROR_INTERSECT_SELECT");
+  quick_selects.delete_elements(); 
+  delete cpk_quick;
+  free_root(&alloc,MYF(0));
+  DBUG_VOID_RETURN;
+}
+
+QUICK_ROR_UNION_SELECT::QUICK_ROR_UNION_SELECT(THD *thd_param,
+                                               TABLE *table)
+  : thd(thd_param)
+{
+  index= MAX_KEY;
+  head= table;
+  rowid_length= table->file->ref_length;
+  record= head->record[0];
+  init_sql_alloc(&alloc, thd->variables.range_alloc_block_size, 0);
+  my_pthread_setspecific_ptr(THR_MALLOC,&alloc);
+}
+
+
+/*
+  Do post-constructor initialization.
+  SYNOPSIS
+    QUICK_ROR_UNION_SELECT::init()
+  
+  RETURN
+    0      OK
+    other  Error code
+*/
+
+int QUICK_ROR_UNION_SELECT::init()
+{
+  if (init_queue(&queue, quick_selects.elements, 0,
+                 false , QUICK_ROR_UNION_SELECT::queue_cmp,
+                 (void*) this))
+  {
+    bzero(&queue, sizeof(QUEUE));
+    return 1;
+  }
+  
+  if (!(cur_rowid= (byte*)alloc_root(&alloc, 2*head->file->ref_length)))
+    return 1;
+  prev_rowid= cur_rowid + head->file->ref_length;
+  return 0;
+}
+
+
+/*
+  Comparison function to be used QUICK_ROR_UNION_SELECT::queue priority 
+  queue.
+
+  SYNPOSIS
+    QUICK_ROR_UNION_SELECT::queue_cmp()
+      arg   Pointer to QUICK_ROR_UNION_SELECT
+      val1  First merged select
+      val2  Second merged select
+*/
+int QUICK_ROR_UNION_SELECT::queue_cmp(void *arg, byte *val1, byte *val2)
+{
+  QUICK_ROR_UNION_SELECT *self= (QUICK_ROR_UNION_SELECT*)arg;
+  return self->head->file->cmp_ref(((QUICK_SELECT_I*)val1)->last_rowid,
+                                   ((QUICK_SELECT_I*)val2)->last_rowid);
+}
+
+
+/* 
+  Initialize quick select for row retrieval.
+  SYNOPSIS
+    reset()
+  
+  RETURN
+    0      OK
+    other  Error code
+*/
+
+int QUICK_ROR_UNION_SELECT::reset()
+{
+  QUICK_SELECT_I* quick;
+  int error;
+  DBUG_ENTER("QUICK_ROR_UNION_SELECT::reset");
+  have_prev_rowid= false;
+  /* 
+    Initialize scans for merged quick selects and put all merged quick 
+    selects into the queue.
+  */
+  List_iterator_fast<QUICK_SELECT_I> it(quick_selects);
+  while ((quick= it++))
+  {
+    if (quick->init_ror_merged_scan(false))
+      DBUG_RETURN(1);    
+    if ((error= quick->get_next()))
+    {
+      if (error == HA_ERR_END_OF_FILE)
+        continue;
+      else
+        DBUG_RETURN(error);
+    }
+    quick->save_last_pos();
+    queue_insert(&queue, (byte*)quick);
+  }
+
+  if (head->file->rnd_init())
+  {
+    DBUG_PRINT("error", ("ROR index_merge rnd_init call failed"));
+    DBUG_RETURN(1);
+  }
+
+  DBUG_RETURN(0);
+}
+
+
+bool 
+QUICK_ROR_UNION_SELECT::push_quick_back(QUICK_SELECT_I *quick_sel_range)
+{
+  return quick_selects.push_back(quick_sel_range);
+}
+
+QUICK_ROR_UNION_SELECT::~QUICK_ROR_UNION_SELECT()
+{
+  DBUG_ENTER("QUICK_ROR_UNION_SELECT::~QUICK_ROR_UNION_SELECT");
+  delete_queue(&queue);
+  quick_selects.delete_elements();  
+  free_root(&alloc,MYF(0));
+  DBUG_VOID_RETURN;
+}
+
+
 QUICK_RANGE::QUICK_RANGE()
   :min_key(0),max_key(0),min_length(0),max_length(0),
    flag(NO_MIN_RANGE | NO_MAX_RANGE)
@@ -582,28 +1294,221 @@ SEL_ARG *SEL_ARG::clone_tree()
   return root;
 }
 
+
 /*
-  Test if a key can be used in different ranges
+  Table rows retrieval plan. Range optimizer creates QUICK_SELECT_I-derived 
+  objects from table read plans.
+*/
+class TABLE_READ_PLAN
+{
+public:
+  /* 
+    Plan read cost, with or without cost of full row retrieval, depending 
+    on plan creation parameters.
+  */
+  double read_cost; 
+  ha_rows records; /* estimate of #rows to be examined */
+
+  /* 
+    If true, the scan returns rows in rowid order. This is used only for 
+    scans that can be both ROR and non-ROR.
+  */
+  bool is_ror;
+  
+  /*
+    Create quick select for this plan.
+    SYNOPSIS
+     make_quick()
+       param               Parameter from test_quick_select
+       retrieve_full_rows  If true, created quick select will do full record
+                           retrieval.
+       parent_alloc        Memory pool to use, if any.
+    
+    NOTES
+      retrieve_full_rows is ignored by some implementations.
+    
+    RETURN 
+      created quick select
+      NULL on any error.
+  */
+  virtual QUICK_SELECT_I *make_quick(PARAM *param,
+                                     bool retrieve_full_rows,
+                                     MEM_ROOT *parent_alloc=NULL) = 0;
+
+  /* Table read plans are allocated on MEM_ROOT and are never deleted */
+  static void *operator new(size_t size, MEM_ROOT *mem_root)
+  { return (void*) alloc_root(mem_root, (uint) size); }
+  static void operator delete(void *ptr,size_t size) {}
+};
+
+class TRP_ROR_INTERSECT;
+class TRP_ROR_UNION;
+class TRP_INDEX_MERGE;
+
+
+/*
+  Plan for a QUICK_RANGE_SELECT scan. 
+  TRP_RANGE::make_quick ignores retrieve_full_rows parameter because
+  QUICK_RANGE_SELECT doesn't distinguish between 'index only' scans and full
+  record retrieval scans.
+*/
+
+class TRP_RANGE : public TABLE_READ_PLAN
+{
+public:
+  SEL_ARG *key; /* set of intervals to be used in "range" method retrieval */
+  uint     key_idx; /* key number in PARAM::key */
+
+  TRP_RANGE(SEL_ARG *key_arg, uint idx_arg) 
+   : key(key_arg), key_idx(idx_arg)
+  {}
+  
+  QUICK_SELECT_I *make_quick(PARAM *param, bool retrieve_full_rows,
+                             MEM_ROOT *parent_alloc)
+  {
+    DBUG_ENTER("TRP_RANGE::make_quick");
+    QUICK_RANGE_SELECT *quick;
+    if ((quick= get_quick_select(param, key_idx, key, parent_alloc)))
+    {
+      quick->records= records;
+      quick->read_time= read_cost;
+    }
+    DBUG_RETURN(quick);
+  }
+};
+
+
+/* Plan for QUICK_ROR_INTERSECT_SELECT scan. */
+
+class TRP_ROR_INTERSECT : public TABLE_READ_PLAN
+{
+public:
+  QUICK_SELECT_I *make_quick(PARAM *param, bool retrieve_full_rows, 
+                             MEM_ROOT *parent_alloc);
+  
+  /* Array of pointers to ROR range scans used in this intersection */
+  struct st_ror_scan_info **first_scan;
+  struct st_ror_scan_info **last_scan; /* End of the above array */
+  struct st_ror_scan_info *cpk_scan;  /* Clustered PK scan, if there is one */
+  bool is_covering; /* true if no row retrieval phase is necessary */
+  double index_scan_costs; /* SUM(cost(index_scan)) */
+};
+
+
+/* 
+  Plan for QUICK_ROR_UNION_SELECT scan.
+  QUICK_ROR_UNION_SELECT always retrieves full rows, so retrieve_full_rows
+  is ignored by make_quick. 
+*/
+
+class TRP_ROR_UNION : public TABLE_READ_PLAN
+{
+public:
+  QUICK_SELECT_I *make_quick(PARAM *param, bool retrieve_full_rows, 
+                             MEM_ROOT *parent_alloc);
+  TABLE_READ_PLAN **first_ror; /* array of ptrs to plans for merged scans */
+  TABLE_READ_PLAN **last_ror;  /* end of the above array */
+};
+
+
+/*
+  Plan for QUICK_INDEX_MERGE_SELECT scan.
+  QUICK_ROR_INTERSECT_SELECT always retrieves full rows, so retrieve_full_rows
+  is ignored by make_quick. 
+*/
+
+class TRP_INDEX_MERGE : public TABLE_READ_PLAN
+{
+public:
+  QUICK_SELECT_I *make_quick(PARAM *param, bool retrieve_full_rows, 
+                             MEM_ROOT *parent_alloc);
+  TRP_RANGE **range_scans; /* array of ptrs to plans of merged scans */
+  TRP_RANGE **range_scans_end; /* end of the array */
+};
+
+
+/* 
+  Fill param->needed_fields with bitmap of fields used in the query.
+  SYNOPSIS 
+    fill_used_fields_bitmap()
+      param Parameter from test_quick_select function.
+      
+  NOTES
+    Clustered PK members are not put into the bitmap as they are implicitly
+    present in all keys (and it is impossible to avoid reading them).
+  RETURN 
+    0  Ok 
+    1  Out of memory. 
+*/
+
+static int fill_used_fields_bitmap(PARAM *param)
+{
+  TABLE *table= param->table;
+  param->fields_bitmap_size= (table->fields/8 + 1);
+  uchar *tmp;
+  uint pk;
+  if (!(tmp= (uchar*)alloc_root(param->mem_root,param->fields_bitmap_size)) ||
+      bitmap_init(&param->needed_fields, tmp, param->fields_bitmap_size*8, 
+                  false))
+    return 1;
+  
+  bitmap_clear_all(&param->needed_fields);
+  for (uint i= 0; i < table->fields; i++)
+  {
+    if (param->thd->query_id == table->field[i]->query_id)
+      bitmap_set_bit(&param->needed_fields, i+1);
+  }
+
+  pk= param->table->primary_key;
+  if (param->table->file->primary_key_is_clustered() && pk != MAX_KEY)
+  {
+    /* The table uses clustered PK and it is not internally generated */
+    KEY_PART_INFO *key_part= param->table->key_info[pk].key_part;
+    KEY_PART_INFO *key_part_end= key_part + 
+                                 param->table->key_info[pk].key_parts;
+    for(;key_part != key_part_end; ++key_part)
+    {
+      bitmap_clear_bit(&param->needed_fields, key_part->fieldnr);
+    }
+  }
+  return 0;
+}
+
+
+/*
+  Test if a key can be used in different ranges 
 
   SYNOPSIS
-   SQL_SELECT::test_quick_select(thd,keys_to_use, prev_tables,
-                                 limit, force_quick_range)
-
-   Updates the following in the select parameter:
-    needed_reg - Bits for keys with may be used if all prev regs are read
-    quick      - Parameter to use when reading records.
-   In the table struct the following information is updated:
-    quick_keys - Which keys can be used
-    quick_rows - How many rows the key matches
-
- RETURN VALUES
-  -1 if impossible select
-   0 if can't use quick_select
-   1 if found usable range
-
- TODO
-   check if the function really needs to modify keys_to_use, and change the
-   code to pass it by reference if not
+    SQL_SELECT::test_quick_select()
+      thd               Current thread
+      keys_to_use       Keys to use for range retrieval
+      prev_tables       Tables assumed to be already read when the scan is
+                        performed (but not read at the moment of this call)
+      limit             Query limit 
+      force_quick_range Prefer to use range (instead of full table scan) even 
+                        if it is more expensive. 
+
+  NOTES
+    Updates the following in the select parameter:
+      needed_reg - Bits for keys with may be used if all prev regs are read
+      quick      - Parameter to use when reading records.
+   
+    In the table struct the following information is updated:
+      quick_keys - Which keys can be used
+      quick_rows - How many rows the key matches
+
+  TODO
+   Check if this function really needs to modify keys_to_use, and change the
+   code to pass it by reference if it doesn't.
+
+   In addition to force_quick_range other means can be (an usually are) used 
+   to make this function prefer range over full table scan. Figure out if
+   force_quick_range is really needed.
+   
+  RETURN
+   -1 if impossible select (i.e. certainly no rows will be selected)
+    0 if can't use quick_select
+    1 if found usable ranges and quick select has been successfully created.
 */
 
 int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
@@ -613,6 +1518,7 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
   uint idx;
   double scan_time;
   DBUG_ENTER("test_quick_select");
+  //printf("\nQUERY: %s\n", thd->query);
   DBUG_PRINT("enter",("keys_to_use: %lu  prev_tables: %lu  const_tables: %lu",
 		      keys_to_use.to_ulonglong(), (ulong) prev_tables,
 		      (ulong) const_tables));
@@ -657,11 +1563,16 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
     param.table=head;
     param.keys=0;
     param.mem_root= &alloc;
+    param.needed_reg= &needed_reg;
+
+    param.imerge_cost_buff_size= 0;
+
     thd->no_errors=1;				// Don't warn about NULL
     init_sql_alloc(&alloc, thd->variables.range_alloc_block_size, 0);
     if (!(param.key_parts = (KEY_PART*) alloc_root(&alloc,
 						   sizeof(KEY_PART)*
-						   head->key_parts)))
+						   head->key_parts))
+                              || fill_used_fields_bitmap(&param))
     {
       thd->no_errors=0;
       free_root(&alloc,MYF(0));			// Return memory & allocator
@@ -670,7 +1581,11 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
     key_parts= param.key_parts;
     old_root=my_pthread_getspecific_ptr(MEM_ROOT*,THR_MALLOC);
     my_pthread_setspecific_ptr(THR_MALLOC,&alloc);
-
+    
+    /* 
+      Make an array with description of all key parts of all table keys. 
+      This is used in get_mm_parts function. 
+    */
     key_info= head->key_info;
     for (idx=0 ; idx < head->keys ; idx++, key_info++)
     {
@@ -698,80 +1613,113 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
     }
     param.key_parts_end=key_parts;
 
+    /* Calculate cost of full index read for the shortest covering index */
+    if (!head->used_keys.is_clear_all())
+    {
+      int key_for_use= find_shortest_key(head, &head->used_keys);
+      double key_read_time= get_index_only_read_time(&param, records,
+                                                     key_for_use);
+      DBUG_PRINT("info",  ("'all'+'using index' scan will be using key %d, "
+                           "read time %g", key_for_use, key_read_time));
+      if (key_read_time < read_time)
+        read_time= key_read_time;
+    }
+          
     if ((tree=get_mm_tree(&param,cond)))
     {
       if (tree->type == SEL_TREE::IMPOSSIBLE)
       {
-	records=0L;				// Return -1 from this function
+	records=0L;			// Return -1 from this function
 	read_time= (double) HA_POS_ERROR;
       }
       else if (tree->type == SEL_TREE::KEY ||
-	       tree->type == SEL_TREE::KEY_SMALLER)
+               tree->type == SEL_TREE::KEY_SMALLER)
       {
-	SEL_ARG **key,**end,**best_key=0;
-
-
-	for (idx=0,key=tree->keys, end=key+param.keys ;
-	     key != end ;
-	     key++,idx++)
-	{
-	  ha_rows found_records;
-	  double found_read_time;
-	  if (*key)
-	  {
-	    uint keynr= param.real_keynr[idx];
-	    if ((*key)->type == SEL_ARG::MAYBE_KEY ||
-		(*key)->maybe_flag)
-	        needed_reg.set_bit(keynr);
-
-	    found_records=check_quick_select(&param, idx, *key);
-	    if (found_records != HA_POS_ERROR && found_records > 2 &&
-		head->used_keys.is_set(keynr) &&
-		(head->file->index_flags(keynr) & HA_KEYREAD_ONLY))
-	    {
-	      /*
-		We can resolve this by only reading through this key.
-		Assume that we will read trough the whole key range
-		and that all key blocks are half full (normally things are
-		much better).
-	      */
-	      uint keys_per_block= (head->file->block_size/2/
-				    (head->key_info[keynr].key_length+
-				     head->file->ref_length) + 1);
-	      found_read_time=((double) (found_records+keys_per_block-1)/
-			       (double) keys_per_block);
-	    }
-	    else
-	      found_read_time= (head->file->read_time(keynr,
-						      param.range_count,
-						      found_records)+
-				(double) found_records / TIME_FOR_COMPARE);
-            DBUG_PRINT("info",("read_time: %g  found_read_time: %g",
-                               read_time, found_read_time));
-	    if (read_time > found_read_time && found_records != HA_POS_ERROR)
-	    {
-	      read_time=found_read_time;
-	      records=found_records;
-	      best_key=key;
-	    }
-	  }
-	}
-	if (best_key && records)
-	{
-	  if ((quick=get_quick_select(&param,(uint) (best_key-tree->keys),
-				      *best_key)))
-	  {
-	    quick->records=records;
-	    quick->read_time=read_time;
-	  }
-	}
+        TABLE_READ_PLAN *best_trp;
+        /* 
+          It is possible to use a quick select (but maybe it would be slower
+          than 'all' table scan).
+        */
+        if (tree->merges.is_empty())
+        {
+          double best_read_time= read_time;
+          TRP_ROR_INTERSECT *new_trp;
+          bool can_build_covering= false;
+        
+          /* Get best 'range' plan and prepare data for making other plans */
+          if ((best_trp= get_key_scans_params(&param, tree, false, 
+                                              best_read_time)))
+            best_read_time= best_trp->read_cost;
+          
+          /* 
+            Simultaneous key scans and row deletes on several handler
+            objects are not allowed so don't use ROR-intersection for 
+            table deletes.
+          */
+          if (thd->lex->sql_command != SQLCOM_DELETE)
+          {
+            /* 
+              Get best non-covering ROR-intersection plan and prepare data for 
+              building covering ROR-intersection.
+            */
+            if ((new_trp= get_best_ror_intersect(&param, tree, best_read_time,
+                                                 &can_build_covering)))
+            {
+              best_trp= new_trp;
+              best_read_time= best_trp->read_cost;
+            }
+          
+            /* 
+              Try constructing covering ROR-intersect only if it looks possible 
+              and worth doing.
+            */
+            if (new_trp && !new_trp->is_covering && can_build_covering &&
+                (new_trp= get_best_covering_ror_intersect(&param, tree, 
+                                                          best_read_time)))
+              best_trp= new_trp;
+          }
+        }
+        else
+        {
+          /* Try creating index_merge/ROR-union scan. */
+          SEL_IMERGE *imerge;
+          TABLE_READ_PLAN *best_conj_trp= NULL, *new_conj_trp;
+          LINT_INIT(new_conj_trp); /* no empty index_merge lists possible */
+
+          
+          DBUG_PRINT("info",("No range reads possible,"
+                             " trying to construct index_merge"));
+          List_iterator_fast<SEL_IMERGE> it(tree->merges);
+          while ((imerge= it++))
+          {
+            new_conj_trp= get_best_disjunct_quick(&param, imerge, read_time);
+            if (!best_conj_trp || (new_conj_trp && new_conj_trp->read_cost < 
+                                                   best_conj_trp->read_cost))
+              best_conj_trp= new_conj_trp;
+          }
+          best_trp= best_conj_trp;
+        }
+        my_pthread_setspecific_ptr(THR_MALLOC, old_root);
+
+        /* If we got a read plan, create a quick select from it. */
+        if (best_trp)
+        {
+          records= best_trp->records;
+          if (!(quick= best_trp->make_quick(&param, true)) || quick->init())
+          {
+            delete quick;
+            quick= NULL;
+          }
+        } 
       }
     }
+    my_pthread_setspecific_ptr(THR_MALLOC, old_root);
     free_root(&alloc,MYF(0));			// Return memory & allocator
-    my_pthread_setspecific_ptr(THR_MALLOC,old_root);
     thd->no_errors=0;
   }
-  DBUG_EXECUTE("info",print_quick(quick,&needed_reg););
+
+  DBUG_EXECUTE("info", print_quick(quick, &needed_reg););
+
   /*
     Assume that if the user is using 'limit' we will only need to scan
     limit rows if we are using a key
@@ -779,6 +1727,1400 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
   DBUG_RETURN(records ? test(quick) : -1);
 }
 
+
+/* 
+  Get cost of 'sweep' full records retrieval.
+  SYNOPSIS
+    get_sweep_read_cost()
+      param            Parameter from test_quick_select
+      records          # of records to be retrieved 
+  RETURN
+    cost of sweep  
+*/
+
+double get_sweep_read_cost(const PARAM *param, ha_rows records)
+{
+  double result;
+  if (param->table->file->primary_key_is_clustered())
+  {
+    result= param->table->file->read_time(param->table->primary_key,
+                                          records, records);
+  }
+  else
+  {    
+    double n_blocks=
+      ceil((double)((longlong)param->table->file->data_file_length / IO_SIZE));
+    double busy_blocks=
+      n_blocks * (1.0 - pow(1.0 - 1.0/n_blocks, rows2double(records)));
+    if (busy_blocks < 1.0)
+      busy_blocks= 1.0;
+    DBUG_PRINT("info",("sweep: nblocks=%g, busy_blocks=%g", n_blocks, 
+                       busy_blocks));
+    /*
+      Disabled: Bail out if # of blocks to read is bigger than # of blocks in 
+      table data file.
+    if (max_cost != DBL_MAX  && (busy_blocks+index_reads_cost) >= n_blocks)
+      return 1;
+    */
+    JOIN *join= param->thd->lex->select_lex.join;
+    if (!join || join->tables == 1)
+    {
+      /* No join, assume reading is done in one 'sweep' */
+      result= busy_blocks*(DISK_SEEK_BASE_COST + 
+                          DISK_SEEK_PROP_COST*n_blocks/busy_blocks);
+    }
+    else
+    {
+      /* 
+        Possibly this is a join with source table being non-last table, so
+        assume that disk seeks are random here.
+      */
+      result= busy_blocks;
+    }
+  }
+  DBUG_PRINT("info",("returning cost=%g", result));
+  return result;
+}
+
+
+/*
+  Get best plan for a SEL_IMERGE disjunctive expression.
+  SYNOPSIS
+    get_best_disjunct_quick()
+      param     Parameter from check_quick_select function
+      imerge    Expression to use
+      read_time Don't create scans with cost > read_time
+  
+  NOTES
+    index_merge cost is calculated as follows:
+    index_merge_cost = 
+      cost(index_reads) +         (see #1)
+      cost(rowid_to_row_scan) +   (see #2)
+      cost(unique_use)            (see #3)
+
+    1. cost(index_reads) =SUM_i(cost(index_read_i))
+       For non-CPK scans, 
+         cost(index_read_i) = {cost of ordinary 'index only' scan} 
+       For CPK scan,
+         cost(index_read_i) = {cost of non-'index only' scan}
+
+    2. cost(rowid_to_row_scan)
+      If table PK is clustered then
+        cost(rowid_to_row_scan) = 
+          {cost of ordinary clustered PK scan with n_ranges=n_rows}
+      
+      Otherwise, we use the following model to calculate costs:      
+      We need to retrieve n_rows rows from file that occupies n_blocks blocks.
+      We assume that offsets of rows we need are independent variates with 
+      uniform distribution in [0..max_file_offset] range.
+      
+      We'll denote block as "busy" if it contains row(s) we need to retrieve
+      and "empty" if doesn't contain rows we need.
+      
+      Probability that a block is empty is (1 - 1/n_blocks)^n_rows (this
+      applies to any block in file). Let x_i be a variate taking value 1 if 
+      block #i is empty and 0 otherwise.
+      
+      Then E(x_i) = (1 - 1/n_blocks)^n_rows;
+
+      E(n_empty_blocks) = E(sum(x_i)) = sum(E(x_i)) = 
+        = n_blocks * ((1 - 1/n_blocks)^n_rows) = 
+       ~= n_blocks * exp(-n_rows/n_blocks).
+
+      E(n_busy_blocks) = n_blocks*(1 - (1 - 1/n_blocks)^n_rows) =
+       ~= n_blocks * (1 - exp(-n_rows/n_blocks)).
+      
+      Average size of "hole" between neighbor non-empty blocks is
+           E(hole_size) = n_blocks/E(n_busy_blocks).
+      
+      The total cost of reading all needed blocks in one "sweep" is:
+
+      E(n_busy_blocks)*
+       (DISK_SEEK_BASE_COST + DISK_SEEK_PROP_COST*n_blocks/E(n_busy_blocks)).
+
+    3. Cost of Unique use is calculated in Unique::get_use_cost function.
+  
+  ROR-union cost is calculated in the same way index_merge, but instead of 
+  Unique a priority queue is used. 
+     
+  RETURN 
+    Created read plan
+    NULL - Out of memory or no read scan could be built.
+*/
+
+static
+TABLE_READ_PLAN *get_best_disjunct_quick(PARAM *param, SEL_IMERGE *imerge,
+                                         double read_time)
+{
+  SEL_TREE **ptree;
+  TRP_INDEX_MERGE *imerge_trp= NULL;
+  uint n_child_scans= imerge->trees_next - imerge->trees;
+  TRP_RANGE **range_scans;
+  TRP_RANGE **cur_child;
+  TRP_RANGE **cpk_scan= NULL;
+  bool imerge_too_expensive= false;
+  double imerge_cost= 0.0;
+  ha_rows cpk_scan_records= 0;
+  ha_rows non_cpk_scan_records= 0;
+  bool pk_is_clustered= param->table->file->primary_key_is_clustered();
+  bool all_scans_ror_able= true;
+  bool all_scans_rors= true;
+  uint unique_calc_buff_size;
+  TABLE_READ_PLAN **roru_read_plans;
+  TABLE_READ_PLAN **cur_roru_plan;
+  double roru_index_costs;
+  double blocks_in_index_read;
+  ha_rows roru_total_records;
+  double roru_intersect_part= 1.0;
+  double sweep_cost;
+  DBUG_ENTER("get_best_disjunct_quick");
+  DBUG_PRINT("info", ("Full table scan cost =%g", read_time));
+
+  if (!(range_scans= (TRP_RANGE**)alloc_root(param->mem_root, 
+                                             sizeof(TRP_RANGE*)*
+                                             n_child_scans)))
+    DBUG_RETURN(NULL);
+  /*
+    Collect best 'range' scan for each of disjuncts, and, while doing so,
+    analyze possibility of ROR scans. Also calculate some values needed by
+    other parts of the code.
+  */
+  for (ptree= imerge->trees, cur_child= range_scans;
+       ptree != imerge->trees_next;
+       ptree++, cur_child++)
+  {
+    DBUG_EXECUTE("info", print_sel_tree(param, *ptree, &(*ptree)->keys_map,
+                                        "tree in SEL_IMERGE"););
+    if (!(*cur_child= get_key_scans_params(param, *ptree, true, read_time)))
+    {
+      /*
+        One of index scans in this index_merge is more expensive than entire
+        table read for another available option. The entire index_merge (and
+        any possible ROR-union) will be more expensive then, too. We continue
+        here only to update SQL_SELECT members.
+      */
+      imerge_too_expensive= true;
+    }
+    if (imerge_too_expensive)
+      continue;
+    
+    imerge_cost += (*cur_child)->read_cost;
+    all_scans_ror_able &= ((*ptree)->n_ror_scans > 0);
+    all_scans_rors &= (*cur_child)->is_ror;
+    if (pk_is_clustered && 
+        param->real_keynr[(*cur_child)->key_idx] == param->table->primary_key)
+    {
+      cpk_scan= cur_child;
+      cpk_scan_records= (*cur_child)->records;
+    }
+    else
+      non_cpk_scan_records += (*cur_child)->records;
+  }
+  
+  DBUG_PRINT("info", ("index_merge scans cost=%g", imerge_cost));
+  if (imerge_too_expensive || (imerge_cost > read_time) || 
+      (non_cpk_scan_records+cpk_scan_records >= param->table->file->records) &&
+      read_time != DBL_MAX)
+  {
+    /* 
+      Bail out if it is obvious that both index_merge and ROR-union will be 
+      more expensive
+    */
+    DBUG_PRINT("info", ("Sum of index_merge scans is more expensive than "
+                        "full table scan, bailing out"));
+    DBUG_RETURN(NULL); 
+  }
+  if (all_scans_rors)
+  {
+    roru_read_plans= (TABLE_READ_PLAN**)range_scans;
+    goto skip_to_ror_scan;
+  }
+  blocks_in_index_read= imerge_cost; 
+  if (cpk_scan) 
+  { 
+    /*
+      Add one ROWID comparison for each row retrieved on non-CPK scan.  (it
+      is done in QUICK_RANGE_SELECT::row_in_ranges) 
+     */ 
+    imerge_cost += non_cpk_scan_records / TIME_FOR_COMPARE_ROWID; 
+  }
+
+  /* Calculate cost(rowid_to_row_scan) */
+  imerge_cost += get_sweep_read_cost(param, non_cpk_scan_records);
+  DBUG_PRINT("info",("index_merge cost with rowid-to-row scan: %g", 
+                     imerge_cost));
+  if (imerge_cost > read_time)
+    goto build_ror_index_merge;
+
+  /* Add Unique operations cost */
+  unique_calc_buff_size= 
+    Unique::get_cost_calc_buff_size(non_cpk_scan_records, 
+                                    param->table->file->ref_length,
+                                    param->thd->variables.sortbuff_size);
+  if (param->imerge_cost_buff_size < unique_calc_buff_size)
+  {
+    if (!(param->imerge_cost_buff= (uint*)alloc_root(param->mem_root,
+                                                     unique_calc_buff_size)))
+      DBUG_RETURN(NULL);
+    param->imerge_cost_buff_size= unique_calc_buff_size;
+  }
+
+  imerge_cost += 
+    Unique::get_use_cost(param->imerge_cost_buff, non_cpk_scan_records,
+                         param->table->file->ref_length,
+                         param->thd->variables.sortbuff_size);
+  DBUG_PRINT("info",("index_merge total cost: %g (wanted: less then %g)", 
+                     imerge_cost, read_time));
+  if (imerge_cost < read_time)
+  {
+    if ((imerge_trp= new (param->mem_root)TRP_INDEX_MERGE))
+    {
+      imerge_trp->read_cost= imerge_cost;
+      imerge_trp->records= non_cpk_scan_records + cpk_scan_records;
+      imerge_trp->records= min(imerge_trp->records, 
+                               param->table->file->records);
+      imerge_trp->range_scans= range_scans;
+      imerge_trp->range_scans_end= range_scans + n_child_scans;
+      read_time= imerge_cost;
+    }
+  }
+
+build_ror_index_merge:  
+  if (!all_scans_ror_able || param->thd->lex->sql_command == SQLCOM_DELETE)
+    DBUG_RETURN(imerge_trp);
+  
+  /* Ok, it is possible to build a ROR-union, try it. */
+  bool dummy;
+  if (!(roru_read_plans= 
+          (TABLE_READ_PLAN**)alloc_root(param->mem_root,
+                                        sizeof(TABLE_READ_PLAN*)*
+                                        n_child_scans)))
+    DBUG_RETURN(imerge_trp);
+skip_to_ror_scan:
+  roru_index_costs= 0.0;
+  roru_total_records= 0;
+  cur_roru_plan= roru_read_plans;
+
+  /* Find 'best' ROR scan for each of trees in disjunction */
+  for (ptree= imerge->trees, cur_child= range_scans;
+       ptree != imerge->trees_next;
+       ptree++, cur_child++, cur_roru_plan++)
+  {
+    /*
+      Assume the best ROR scan is the one that has cheapest full-row-retrieval
+      scan cost. 
+      Also accumulate index_only scan costs as we'll need them to calculate 
+      overall index_intersection cost.
+    */
+    double cost;
+    if ((*cur_child)->is_ror)
+    {
+      /* Ok, we have index_only cost, now get full rows scan cost */
+      cost= param->table->file->
+              read_time(param->real_keynr[(*cur_child)->key_idx], 1, 
+                        (*cur_child)->records) +
+              rows2double((*cur_child)->records) / TIME_FOR_COMPARE;
+    }
+    else
+      cost= read_time;
+
+    TABLE_READ_PLAN *prev_plan= *cur_child;
+    if (!(*cur_roru_plan= get_best_ror_intersect(param, *ptree, cost, 
+                                                 &dummy)))
+    {
+      if (prev_plan->is_ror)
+        *cur_roru_plan= prev_plan;
+      else
+        DBUG_RETURN(imerge_trp);
+      roru_index_costs += (*cur_roru_plan)->read_cost;
+    }
+    else
+      roru_index_costs += 
+        ((TRP_ROR_INTERSECT*)(*cur_roru_plan))->index_scan_costs;     
+    roru_total_records += (*cur_roru_plan)->records;
+    roru_intersect_part *= (*cur_roru_plan)->records / 
+                           param->table->file->records;
+  }
+
+  /* 
+    rows to retrieve= 
+      SUM(rows_in_scan_i) - table_rows * PROD(rows_in_scan_i / table_rows).
+    This is valid because index_merge construction guarantees that conditions
+    in disjunction do not share key parts.
+  */
+  roru_total_records -= (ha_rows)(roru_intersect_part*
+                                  param->table->file->records); 
+  /* ok, got a ROR read plan for each of the disjuncts 
+    Calculate cost: 
+    cost(index_union_scan(scan_1, ... scan_n)) =
+      SUM_i(cost_of_index_only_scan(scan_i)) +
+      queue_use_cost(rowid_len, n) +
+      cost_of_row_retrieval
+    See get_merge_buffers_cost function for queue_use_cost formula derivation.
+  */
+  
+  double roru_total_cost;
+  roru_total_cost= roru_index_costs +  
+                   rows2double(roru_total_records)*log(n_child_scans) / 
+                   (TIME_FOR_COMPARE_ROWID * M_LN2) + 
+                   get_sweep_read_cost(param, roru_total_records);
+
+  DBUG_PRINT("info", ("ROR-union: cost %g, %d members", roru_total_cost, 
+                      n_child_scans));
+  TRP_ROR_UNION* roru;
+  if (roru_total_cost < read_time)
+  {
+    if ((roru= new (param->mem_root) TRP_ROR_UNION))
+    {
+      roru->first_ror= roru_read_plans;
+      roru->last_ror= roru_read_plans + n_child_scans;
+      roru->read_cost= roru_total_cost;
+      roru->records= roru_total_records;
+      DBUG_RETURN(roru);
+    }
+  }
+  DBUG_RETURN(imerge_trp);
+}
+
+
+/*
+  Calculate cost of 'index only' scan for given index and number of records.
+
+  SYNOPSIS
+    get_index_only_read_time()
+      param    parameters structure
+      records  #of records to read
+      keynr    key to read
+
+  NOTES
+    It is assumed that we will read trough the whole key range and that all 
+    key blocks are half full (normally things are much better).
+*/
+
+inline double get_index_only_read_time(const PARAM* param, ha_rows records, 
+                                       int keynr)
+{
+  double read_time;
+  uint keys_per_block= (param->table->file->block_size/2/
+			(param->table->key_info[keynr].key_length+
+			 param->table->file->ref_length) + 1);
+  read_time=((double) (records+keys_per_block-1)/
+             (double) keys_per_block);
+  return read_time; 
+}
+
+typedef struct st_ror_scan_info
+{
+  uint      idx;      /* # of used key in param->keys */
+  uint      keynr;    /* # of used key in table */
+  ha_rows   records;  /* estimate of # records this scan will return */
+
+  /* Set of intervals over key fields that will be used for row retrieval. */
+  SEL_ARG   *sel_arg; 
+
+  /* Fields used in the query and covered by this ROR scan. */
+  MY_BITMAP covered_fields;  
+  uint      used_fields_covered; /* # of set bits in covered_fields */ 
+  int       key_rec_length; /* length of key record (including rowid) */
+
+  /*
+    Cost of reading all index records with values in sel_arg intervals set
+    (assuming there is no need to access full table records)
+  */ 
+  double    index_read_cost; 
+  uint      first_uncovered_field; /* first unused bit in covered_fields */
+  uint      key_components; /* # of parts in the key */
+} ROR_SCAN_INFO;
+
+
+/*
+  Create ROR_SCAN_INFO* structure with a single ROR scan on index idx using 
+  sel_arg set of intervals.
+  
+  SYNOPSIS
+    make_ror_scan()
+      param    Parameter from test_quick_select function
+      idx      Index of key in param->keys
+      sel_arg  Set of intervals for a given key
+  RETURN
+    NULL - out of memory 
+    ROR scan structure containing a scan for {idx, sel_arg}
+*/
+
+static
+ROR_SCAN_INFO *make_ror_scan(const PARAM *param, int idx, SEL_ARG *sel_arg)
+{
+  ROR_SCAN_INFO *ror_scan;
+  uchar *bitmap_buf;
+  uint keynr;
+  DBUG_ENTER("make_ror_scan");
+  if (!(ror_scan= (ROR_SCAN_INFO*)alloc_root(param->mem_root,
+                                             sizeof(ROR_SCAN_INFO))))
+    DBUG_RETURN(NULL);
+
+  ror_scan->idx= idx;
+  ror_scan->keynr= keynr= param->real_keynr[idx];
+  ror_scan->key_rec_length= param->table->key_info[keynr].key_length + 
+                            param->table->file->ref_length;
+  ror_scan->sel_arg= sel_arg;
+  ror_scan->records= param->table->quick_rows[keynr];
+  
+  if (!(bitmap_buf= (uchar*)alloc_root(param->mem_root, 
+                                      param->fields_bitmap_size)))
+    DBUG_RETURN(NULL);
+  
+  if (bitmap_init(&ror_scan->covered_fields, bitmap_buf,
+                  param->fields_bitmap_size*8, false))
+    DBUG_RETURN(NULL);
+  bitmap_clear_all(&ror_scan->covered_fields);
+  
+  KEY_PART_INFO *key_part= param->table->key_info[keynr].key_part;
+  KEY_PART_INFO *key_part_end= key_part + 
+                               param->table->key_info[keynr].key_parts;
+  uint n_used_covered= 0;
+  for (;key_part != key_part_end; ++key_part)
+  {
+    if (bitmap_is_set(&param->needed_fields, key_part->fieldnr))
+    {
+      n_used_covered++;
+      bitmap_set_bit(&ror_scan->covered_fields, key_part->fieldnr);
+    }
+  }
+  ror_scan->index_read_cost= 
+    get_index_only_read_time(param, param->table->quick_rows[ror_scan->keynr],
+                             ror_scan->keynr);
+  DBUG_RETURN(ror_scan);
+}
+
+
+/* 
+  Compare two ROR_SCAN_INFO** by  E(#records_matched) * key_record_length.
+  SYNOPSIS
+    cmp_ror_scan_info()
+      a ptr to first compared value
+      b ptr to second compared value
+
+  RETURN
+   -1 a < b  
+    0 a = b
+    1 a > b
+*/
+static int cmp_ror_scan_info(ROR_SCAN_INFO** a, ROR_SCAN_INFO** b)
+{
+  double val1= rows2double((*a)->records) * (*a)->key_rec_length;
+  double val2= rows2double((*b)->records) * (*b)->key_rec_length;
+  return (val1 < val2)? -1: (val1 == val2)? 0 : 1;
+}
+
+/*
+  Compare two ROR_SCAN_INFO** by 
+   (#covered fields in F desc, 
+    #components asc,           
+    number of first not covered component asc)
+
+  SYNOPSIS
+    cmp_ror_scan_info_covering()
+      a ptr to first compared value
+      b ptr to second compared value
+
+  RETURN
+   -1 a < b  
+    0 a = b
+    1 a > b
+*/
+static int cmp_ror_scan_info_covering(ROR_SCAN_INFO** a, ROR_SCAN_INFO** b)
+{
+  if ((*a)->used_fields_covered > (*b)->used_fields_covered)
+    return -1;
+  if ((*a)->used_fields_covered < (*b)->used_fields_covered)
+    return 1;
+  if ((*a)->key_components < (*b)->key_components)
+    return -1;
+  if ((*a)->key_components > (*b)->key_components)
+    return 1;
+  if ((*a)->first_uncovered_field < (*b)->first_uncovered_field)
+    return -1;
+  if ((*a)->first_uncovered_field > (*b)->first_uncovered_field)
+    return 1;
+  return 0;
+}
+
+/* Auxiliary structure for incremental ROR-intersection creation */
+typedef struct 
+{
+  const PARAM *param;
+  MY_BITMAP covered_fields; /* union of fields covered by all scans */
+  
+  /* true if covered_fields is a superset of needed_fields */
+  bool is_covering;  
+  
+  double index_scan_costs; /* SUM(cost of 'index-only' scans) */  
+  double total_cost;
+  /* 
+    Fraction of table records that satisfies conditions of all scans.
+    This is the number of full records that will be retrieved if a 
+    non-index_only index intersection will be employed.
+  */
+  double records_fract;
+  ha_rows index_records; /* sum(#records to look in indexes) */
+} ROR_INTERSECT_INFO;
+
+
+/*
+  Re-initialize an allocated intersect info to contain zero scans.
+  SYNOPSIS
+    info Intersection info structure to re-initialize.
+*/
+
+static void ror_intersect_reinit(ROR_INTERSECT_INFO *info)
+{
+  info->is_covering= false;
+  info->index_scan_costs= 0.0f;
+  info->records_fract= 1.0f;
+  bitmap_clear_all(&info->covered_fields);
+}
+
+/*
+  Allocate a ROR_INTERSECT_INFO and initialize it to contain zero scans.
+
+  SYNOPSIS
+    ror_intersect_init() 
+      param         Parameter from test_quick_select 
+      is_index_only If true, set ROR_INTERSECT_INFO to be covering
+  
+  RETURN
+    allocated structure
+    NULL on error
+*/
+
+static
+ROR_INTERSECT_INFO* ror_intersect_init(const PARAM *param, bool is_index_only)
+{
+  ROR_INTERSECT_INFO *info;
+  uchar* buf;
+  if (!(info= (ROR_INTERSECT_INFO*)alloc_root(param->mem_root, 
+                                              sizeof(ROR_INTERSECT_INFO))))
+    return NULL;
+  info->param= param;
+  if (!(buf= (uchar*)alloc_root(param->mem_root, param->fields_bitmap_size)))
+    return NULL;
+  if (bitmap_init(&info->covered_fields, buf, param->fields_bitmap_size*8,
+                  false))
+    return NULL;
+  ror_intersect_reinit(info);
+  return info;
+}
+
+
+/*
+  Check if adding a ROR scan to a ROR-intersection reduces its cost of 
+  ROR-intersection and if yes, update parameters of ROR-intersection,
+  including its cost.
+
+  SYNOPSIS
+    ror_intersect_add()
+      param        Parameter from test_quick_select 
+      info         ROR-intersection structure to add the scan to.
+      ror_scan     ROR scan info to add.
+      is_cpk_scan  If true, add the scan as CPK scan (this can be inferred
+                   from other parameters and is passed separately only to
+                   avoid duplicating the inference code)
+  
+  NOTES
+    Adding a ROR scan to ROR-intersect "makes sense" iff the cost of ROR-
+    intersection decreases. The cost of ROR-intersection is caclulated as
+    follows:
+
+    cost= SUM_i(key_scan_cost_i) + cost_of_full_rows_retrieval
+ 
+    if (union of indexes used covers all needed fields)
+      cost_of_full_rows_retrieval= 0;
+    else
+    {
+      cost_of_full_rows_retrieval= 
+        cost_of_sweep_read(E(rows_to_retrieve), rows_in_table);
+    }
+
+    E(rows_to_retrieve) is caclulated as follows: 
+    Suppose we have a condition on several keys
+    cond=k_11=c_11 AND k_12=c_12 AND ...  // parts of first key 
+         k_21=c_21 AND k_22=c_22 AND ...  // parts of second key 
+          ...
+         k_n1=c_n1 AND k_n3=c_n3 AND ...  (1)
+    
+    where k_ij may be the same as any k_pq (i.e. keys may have common parts).
+
+    A full row is retrieved iff entire cond holds.
+
+    The recursive procedure for finding P(cond) is as follows:
+    
+    First step:
+    Pick 1st part of 1st key and break conjunction (1) into two parts: 
+      cond= (k_11=c_11 AND R)
+
+    Here R may still contain condition(s) equivalent to k_11=c_11. 
+    Nevertheless, the following holds:
+
+      P(k_11=c_11 AND R) = P(k_11=c_11) * P(R|k_11=c_11). 
+
+    Mark k_11 as fixed field (and satisfied condition) F, save P(F),
+    save R to be cond and proceed to recursion step.
+
+    Recursion step:
+    We have a set of fixed fields/satisfied conditions) F, probability P(F),
+    and remaining conjunction R
+    Pick next key part on current key and its condition "k_ij=c_ij".
+    We will add "k_ij=c_ij" into F and update P(F).
+    Lets denote k_ij as t,  R = t AND R1, where R1 may still contain t. Then
+
+     P((t AND R1)|F) = P(t|F) * P(R1|t|F) = P(t|F) * P(R1|(t AND F)) (2)
+
+    (where '|' mean conditional probability, not "or")
+
+    Consider the first multiplier in (2). One of the following holds:
+    a) F contains condition on field used in t (i.e. t AND F = F).
+      Then P(t|F) = 1
+
+    b) F doesn't contain condition on field used in t. Then F and t are 
+     considered independent. 
+
+     P(t|F) = P(t|(fields_before_t_in_key AND other_fields)) = 
+          = P(t|fields_before_t_in_key).
+
+     P(t|fields_before_t_in_key) = #records(fields_before_t_in_key) /
+                                   #records(fields_before_t_in_key, t)
+    
+    The second multiplier is calculated by applying this step recursively. 
+
+  IMPLEMENTATION
+    This function calculates the result of application of the "recursion step"
+    described above for all fixed key members of a single key, accumulating set
+    of covered fields, selectivity, etc.
+
+    The calculation is conducted as follows: 
+    Lets denote #records(keypart1, ... keypartK) as n_k. We need to calculate
+    
+     n_{k1}      n_{k_2}
+    --------- * ---------  * .... (3)
+     n_{k1-1}    n_{k2_1} 
+
+    where k1,k2,... are key parts which fields were not yet marked as fixed 
+    ( this is result of application of option b) of the recursion step for 
+      parts of a single key).  
+    Since it is reasonable to expect that most of the fields are not marked 
+    as fixed, we calcualate (3) as 
+
+                                  n_{i1}      n_{i_2}
+    (3) = n_{max_key_part}  / (   --------- * ---------  * ....  )
+                                  n_{i1-1}    n_{i2_1} 
+      
+    where i1,i2, .. are key parts that were already marked as fixed. 
+    
+    In order to minimize number of expensive records_in_range calls we group
+    and reduce adjacent fractions.
+ 
+  RETURN
+    true   ROR scan added to ROR-intersection, cost updated.
+    false  It doesn't make sense to add this ROR scan to this ROR-intersection.
+*/
+
+bool ror_intersect_add(const PARAM *param, ROR_INTERSECT_INFO *info,
+                       ROR_SCAN_INFO* ror_scan, bool is_cpk_scan=false)
+{
+  int i;
+  SEL_ARG *sel_arg;
+  KEY_PART_INFO *key_part= 
+    info->param->table->key_info[ror_scan->keynr].key_part;
+  double selectivity_mult= 1.0;
+  byte key_val[MAX_KEY_LENGTH+MAX_FIELD_WIDTH]; /* key values tuple */
+
+  DBUG_ENTER("ror_intersect_add");  
+  DBUG_PRINT("info", ("Current selectivity= %g", info->records_fract)); 
+  DBUG_PRINT("info", ("Adding scan on %s", 
+                      info->param->table->key_info[ror_scan->keynr].name));
+  SEL_ARG *tuple_arg= NULL;
+  char *key_ptr= key_val;
+  bool cur_covered, prev_covered=
+     bitmap_is_set(&info->covered_fields, key_part->fieldnr);
+
+  ha_rows prev_records= param->table->file->records;
+  key_range min_range;
+  key_range max_range;
+  min_range.key= (byte*) key_val;
+  min_range.flag= HA_READ_KEY_EXACT;
+  max_range.key= (byte*) key_val;
+  max_range.flag= HA_READ_AFTER_KEY;
+
+  for(i= 0, sel_arg= ror_scan->sel_arg; sel_arg; 
+      i++, sel_arg= sel_arg->next_key_part)
+  {
+    cur_covered= bitmap_is_set(&info->covered_fields, (key_part + i)->fieldnr);
+    if (cur_covered != prev_covered)
+    {
+      /* create (part1val, ..., part{n-1}val) tuple. */
+      {
+        if (!tuple_arg)
+        {
+          tuple_arg= ror_scan->sel_arg;
+          tuple_arg->store_min(key_part->length, &key_ptr, 0); 
+        }
+        while (tuple_arg->next_key_part != sel_arg)
+        {
+          tuple_arg= tuple_arg->next_key_part; 
+          tuple_arg->store_min(key_part->length, &key_ptr, 0); 
+        }
+      } 
+      ha_rows records;
+      min_range.length= max_range.length= key_ptr - key_val;
+      records= param->table->file->
+                 records_in_range(ror_scan->keynr,
+                                  &min_range,
+                                  &max_range);
+      if (cur_covered)
+      {
+        /* uncovered -> covered */
+        double tmp= rows2double(records)/rows2double(prev_records);
+        DBUG_PRINT("info", ("Selectivity multiplier: %g", tmp));
+        selectivity_mult *= tmp;
+        prev_records= HA_POS_ERROR;
+      }
+      else
+      {
+        /* covered -> uncovered */
+        prev_records= records; 
+      }
+    }
+    prev_covered= cur_covered;
+  }
+  if (!prev_covered)
+  {
+    double tmp= rows2double(param->table->quick_rows[ror_scan->keynr]) / 
+                rows2double(prev_records);
+    DBUG_PRINT("info", ("Selectivity multiplier: %g", tmp));
+    selectivity_mult *= tmp; 
+  }
+
+  if (selectivity_mult == 1.0)
+  {
+    /* Don't add this scan if it doesn't improve selectivity. */
+    DBUG_PRINT("info", ("The scan doesn't improve selectivity."));
+    DBUG_RETURN(false);
+  }
+
+  info->records_fract *= selectivity_mult;
+  ha_rows cur_scan_records= info->param->table->quick_rows[ror_scan->keynr];  
+  if (is_cpk_scan)
+  {    
+    info->index_scan_costs += rows2double(cur_scan_records)*
+                              TIME_FOR_COMPARE_ROWID;
+  }
+  else
+  {
+    info->index_records += cur_scan_records;
+    info->index_scan_costs += ror_scan->index_read_cost;
+    bitmap_union(&info->covered_fields, &ror_scan->covered_fields);
+  }
+  
+  if (!info->is_covering && bitmap_is_subset(&info->param->needed_fields,
+                                             &info->covered_fields))
+  {
+    DBUG_PRINT("info", ("ROR-intersect is covering now"));
+    info->is_covering= true;
+  }
+  
+  info->total_cost= info->index_scan_costs;
+  if (!info->is_covering)
+  {
+    ha_rows table_recs= info->param->table->file->records;
+    info->total_cost += 
+      get_sweep_read_cost(info->param, 
+                          (ha_rows)(info->records_fract*table_recs));
+  }
+  DBUG_PRINT("info", ("New selectivity= %g", info->records_fract)); 
+  DBUG_PRINT("info", ("New cost= %g, %scovering", info->total_cost, 
+                      info->is_covering?"" : "non-"));
+  DBUG_RETURN(true);
+}
+
+
+/* 
+  Get best ROR-intersection plan using non-covering ROR-intersection search 
+  algorithm. The returned plan may be covering.
+
+  SYNOPSIS
+    get_best_ror_intersect()
+      param            Parameter from test_quick_select function.
+      tree             Transformed restriction condition to be used to look
+                       for ROR scans.
+      read_time        Do not return read plans with cost > read_time.
+      are_all_covering [out] set to true if union of all scans covers all 
+                       fields needed by the query (and it is possible to build
+                       a covering ROR-intersection)
+
+  NOTES
+    get_key_scans_params must be called before for the same SEL_TREE before 
+    this function can be called.
+    
+    The approximate best non-covering plan search algorithm is as follows:
+
+    find_min_ror_intersection_scan()
+    {
+      R= select all ROR scans;
+      order R by (E(#records_matched) * key_record_length).
+  
+      S= first(R); -- set of scans that will be used for ROR-intersection
+      R= R-first(S);
+      min_cost= cost(S);
+      min_scan= make_scan(S);
+      while (R is not empty)
+      {
+        if (!selectivity(S + first(R) < selectivity(S)))
+          continue;
+
+        S= S + first(R);
+        R= R - first(R);
+        if (cost(S) < min_cost)
+        {
+          min_cost= cost(S);
+          min_scan= make_scan(S);
+        }
+      }
+      return min_scan;
+    }
+
+    See ror_intersect_add function for ROR intersection costs.
+
+    Special handling for Clustered PK scans
+    Clustered PK contains all table fields, so using it as a regular scan in 
+    index intersection doesn't make sense: a range scan on CPK will be less 
+    expensive in this case.
+    Clustered PK scan has special handling in ROR-intersection: it is not used
+    to retrieve rows, instead its condition is used to filter row references 
+    we get from scans on other keys.
+
+  RETURN
+    ROR-intersection table read plan 
+    NULL if out of memory or no suitable plan found.
+*/
+
+static
+TRP_ROR_INTERSECT *get_best_ror_intersect(const PARAM *param, SEL_TREE *tree,
+                                          double read_time,
+                                          bool *are_all_covering)
+{
+  uint idx;
+  double min_cost= read_time;
+  DBUG_ENTER("get_best_ror_intersect");
+
+  if (tree->n_ror_scans < 2)
+    DBUG_RETURN(NULL);
+
+  /*
+    Collect ROR-able SEL_ARGs and create ROR_SCAN_INFO for each of them.
+    Also find and save clustered PK scan if there is one.
+  */
+  ROR_SCAN_INFO **cur_ror_scan;
+  ROR_SCAN_INFO *cpk_scan= NULL;
+  bool cpk_scan_used= false;
+  if (!(tree->ror_scans= (ROR_SCAN_INFO**)alloc_root(param->mem_root,
+                                                     sizeof(ROR_SCAN_INFO*)*
+                                                     param->keys)))
+    return NULL;
+  uint cpk_no= (param->table->file->primary_key_is_clustered())?
+               param->table->primary_key : MAX_KEY;
+ 
+  for (idx= 0, cur_ror_scan= tree->ror_scans; idx < param->keys; idx++)
+  {
+    ROR_SCAN_INFO *scan;
+    if (!tree->ror_scans_map.is_set(idx))
+      continue;
+    if (!(scan= make_ror_scan(param, idx, tree->keys[idx])))
+      return NULL;
+    if (param->real_keynr[idx] == cpk_no)
+    {
+      cpk_scan= scan;
+      tree->n_ror_scans--;
+    }
+    else
+      *(cur_ror_scan++)= scan;
+  }
+
+  tree->ror_scans_end= cur_ror_scan;
+  DBUG_EXECUTE("info",print_ror_scans_arr(param->table, "original", 
+                                          tree->ror_scans, 
+                                          tree->ror_scans_end););
+  /*
+    Ok, [ror_scans, ror_scans_end) is array of ptrs to initialized 
+    ROR_SCAN_INFOs.
+    Now, get a minimal key scan using an approximate algorithm.
+  */
+  qsort(tree->ror_scans, tree->n_ror_scans, sizeof(ROR_SCAN_INFO*),
+        (qsort_cmp)cmp_ror_scan_info);
+  DBUG_EXECUTE("info",print_ror_scans_arr(param->table, "ordered", 
+                                          tree->ror_scans, 
+                                          tree->ror_scans_end););
+  
+  ROR_SCAN_INFO **intersect_scans; /* ROR scans used in index intersection */
+  ROR_SCAN_INFO **intersect_scans_end;
+  if (!(intersect_scans= (ROR_SCAN_INFO**)alloc_root(param->mem_root,
+                                                     sizeof(ROR_SCAN_INFO*)*
+                                                     tree->n_ror_scans)))
+    return NULL;
+  intersect_scans_end= intersect_scans;
+
+  /* Create and incrementally update ROR intersection. */
+  ROR_INTERSECT_INFO *intersect;
+  if (!(intersect= ror_intersect_init(param, false)))
+    return NULL;
+  
+  /* [intersect_scans, intersect_scans_best) will hold the best combination */
+  ROR_SCAN_INFO **intersect_scans_best; 
+  ha_rows       best_rows;
+  bool          is_best_covering;
+  double        best_index_scan_costs;
+  LINT_INIT(best_rows);       /* protected by intersect_scans_best */
+  LINT_INIT(is_best_covering);
+  LINT_INIT(best_index_scan_costs);
+
+  cur_ror_scan= tree->ror_scans;
+  /* Start with one scan */
+  intersect_scans_best= intersect_scans;
+  while (cur_ror_scan != tree->ror_scans_end && !intersect->is_covering)
+  {
+    /* S= S + first(R); */
+    if (ror_intersect_add(param, intersect, *cur_ror_scan))
+      *(intersect_scans_end++)= *cur_ror_scan; 
+    /* R= R - first(R); */
+    cur_ror_scan++;
+    
+    if (intersect->total_cost < min_cost)
+    {
+      /* Local minimum found, save it */
+      min_cost= intersect->total_cost;
+      best_rows= (ha_rows)(intersect->records_fract* 
+                           rows2double(param->table->file->records));
+      is_best_covering= intersect->is_covering;
+      intersect_scans_best= intersect_scans_end;
+      best_index_scan_costs= intersect->index_scan_costs;
+    }
+  }
+  
+  DBUG_EXECUTE("info",print_ror_scans_arr(param->table,
+                                          "best ROR-intersection",
+                                          intersect_scans,
+                                          intersect_scans_best););
+  
+  *are_all_covering= intersect->is_covering;
+  uint best_num= intersect_scans_best - intersect_scans;  
+  /*
+    Ok, found the best ROR-intersection of non-CPK key scans.
+    Check if we should add a CPK scan.
+    
+    If the obtained ROR-intersection is covering, it doesn't make sense 
+    to add CPK scan - Clustered PK contains all fields and if we're doing
+    CPK scan doing other CPK scans will only add more overhead.
+  */
+  if (cpk_scan && !intersect->is_covering)
+  {
+    /*
+      Handle the special case: ROR-intersect(PRIMARY, key1) is 
+      the best, but cost(range(key1)) > cost(best_non_ror_range_scan)
+    */
+    if (best_num == 0)
+    { 
+      cur_ror_scan= tree->ror_scans;
+      intersect_scans_end= intersect_scans;
+      ror_intersect_reinit(intersect);
+      if (!ror_intersect_add(param, intersect, *cur_ror_scan))
+        DBUG_RETURN(NULL); /* shouldn't happen actually actually */
+      *(intersect_scans_end++)= *cur_ror_scan;
+      best_num++;
+    }
+
+    if (ror_intersect_add(param, intersect, cpk_scan))
+    {
+      cpk_scan_used= true;
+      min_cost= intersect->total_cost;
+      best_rows= (ha_rows)(intersect->records_fract* 
+                           rows2double(param->table->file->records));
+      is_best_covering= intersect->is_covering;
+      best_index_scan_costs= intersect->index_scan_costs;
+    }
+  }
+
+  /* Ok, return ROR-intersect plan if we have found one */
+  TRP_ROR_INTERSECT *trp= NULL;
+  if (best_num > 1 || cpk_scan_used)
+  {
+    if (!(trp= new (param->mem_root) TRP_ROR_INTERSECT))
+      DBUG_RETURN(trp);
+    if (!(trp->first_scan= 
+           (ROR_SCAN_INFO**)alloc_root(param->mem_root, 
+                                       sizeof(ROR_SCAN_INFO*)*best_num)))
+      DBUG_RETURN(NULL);
+    memcpy(trp->first_scan, intersect_scans, best_num*sizeof(ROR_SCAN_INFO*));
+    trp->last_scan=  trp->first_scan + best_num;
+    trp->is_covering= is_best_covering;
+    trp->read_cost= min_cost;
+    trp->records= best_rows? best_rows : 1;
+    trp->index_scan_costs= best_index_scan_costs;
+    trp->cpk_scan= cpk_scan;
+  } 
+  DBUG_RETURN(trp);
+}
+
+
+/*
+  Get best covering ROR-intersection.
+  SYNOPSIS
+    get_best_covering_ror_intersect()
+      param     Parameter from test_quick_select function.
+      tree      SEL_TREE with sets of intervals for different keys.
+      read_time Don't return table read plans with cost > read_time.
+
+  RETURN 
+    Best covering ROR-intersection plan 
+    NULL if no plan found.
+
+  NOTES
+    get_best_ror_intersect must be called for a tree before calling this
+    function for it. 
+    This function invalidates tree->ror_scans member values.
+  
+  The following approximate algorithm is used:
+    I=set of all covering indexes
+    F=set of all fields to cover
+    S={}
+
+    do {
+      Order I by (#covered fields in F desc,
+                  #components asc,
+                  number of first not covered component asc);
+      F=F-covered by first(I);
+      S=S+first(I);
+      I=I-first(I);
+    } while F is not empty.
+*/
+
+static
+TRP_ROR_INTERSECT *get_best_covering_ror_intersect(PARAM *param, 
+                                                   SEL_TREE *tree, 
+                                                   double read_time)
+{
+  ROR_SCAN_INFO **ror_scan_mark;
+  ROR_SCAN_INFO **ror_scans_end= tree->ror_scans_end;  
+  DBUG_ENTER("get_best_covering_ror_intersect");
+  uint nbits= param->fields_bitmap_size*8;
+
+  for (ROR_SCAN_INFO **scan= tree->ror_scans; scan != ror_scans_end; ++scan)
+    (*scan)->key_components= 
+      param->table->key_info[(*scan)->keynr].key_parts;
+    
+  /*
+    Run covering-ROR-search algorithm.
+    Assume set I is [ror_scan .. ror_scans_end) 
+  */
+  
+  /*I=set of all covering indexes */
+  ror_scan_mark= tree->ror_scans;
+  
+  uchar buf[MAX_KEY/8+1];
+  MY_BITMAP covered_fields;
+  if (bitmap_init(&covered_fields, buf, nbits, false))
+    DBUG_RETURN(0);
+  bitmap_clear_all(&covered_fields);
+
+  double total_cost= 0.0f;
+  ha_rows records=0;
+  bool all_covered;  
+  
+  DBUG_PRINT("info", ("Building covering ROR-intersection"));
+  DBUG_EXECUTE("info", print_ror_scans_arr(param->table,
+                                           "building covering ROR-I",
+                                           ror_scan_mark, ror_scans_end););
+  do {
+    /*
+      Update changed sorting info: 
+        #covered fields,
+	number of first not covered component 
+      Calculate and save these values for each of remaining scans.
+    */
+    for (ROR_SCAN_INFO **scan= ror_scan_mark; scan != ror_scans_end; ++scan)
+    {
+      bitmap_subtract(&(*scan)->covered_fields, &covered_fields);
+      (*scan)->used_fields_covered= 
+        bitmap_bits_set(&(*scan)->covered_fields);
+      (*scan)->first_uncovered_field= 
+        bitmap_get_first(&(*scan)->covered_fields);
+    }
+
+    qsort(ror_scan_mark, ror_scans_end-ror_scan_mark, sizeof(ROR_SCAN_INFO*),
+          (qsort_cmp)cmp_ror_scan_info_covering);
+
+    DBUG_EXECUTE("info", print_ror_scans_arr(param->table,
+                                             "remaining scans",
+                                             ror_scan_mark, ror_scans_end););
+    
+    /* I=I-first(I) */
+    total_cost += (*ror_scan_mark)->index_read_cost;
+    records += (*ror_scan_mark)->records;
+    DBUG_PRINT("info", ("Adding scan on %s", 
+                        param->table->key_info[(*ror_scan_mark)->keynr].name));
+    if (total_cost > read_time)
+      DBUG_RETURN(NULL);
+    /* F=F-covered by first(I) */
+    bitmap_union(&covered_fields, &(*ror_scan_mark)->covered_fields);
+    all_covered= bitmap_is_subset(&param->needed_fields, &covered_fields);
+  } while (!all_covered && (++ror_scan_mark < ror_scans_end));
+  
+  if (!all_covered)
+    DBUG_RETURN(NULL); /* should not happen actually */
+
+  /*
+    Ok, [tree->ror_scans .. ror_scan) holds covering index_intersection with
+    cost total_cost.
+  */
+  DBUG_PRINT("info", ("Covering ROR-intersect scans cost: %g", total_cost));
+  DBUG_EXECUTE("info", print_ror_scans_arr(param->table,
+                                           "creating covering ROR-intersect",
+                                           tree->ror_scans, ror_scan_mark););
+  
+  /* Add priority queue use cost. */
+  total_cost += rows2double(records)*log(ror_scan_mark - tree->ror_scans) / 
+                (TIME_FOR_COMPARE_ROWID * M_LN2);
+  DBUG_PRINT("info", ("Covering ROR-intersect full cost: %g", total_cost));
+
+  if (total_cost > read_time)
+    DBUG_RETURN(NULL);
+
+  TRP_ROR_INTERSECT *trp;
+  if (!(trp= new (param->mem_root) TRP_ROR_INTERSECT))
+    DBUG_RETURN(trp);
+  uint best_num= (ror_scan_mark - tree->ror_scans);
+  if (!(trp->first_scan= (ROR_SCAN_INFO**)alloc_root(param->mem_root,
+                                                     sizeof(ROR_SCAN_INFO*)*
+                                                     best_num)))
+    DBUG_RETURN(NULL);
+  memcpy(trp->first_scan, ror_scan_mark, best_num*sizeof(ROR_SCAN_INFO*));
+  trp->last_scan=  trp->first_scan + best_num;
+  trp->is_covering= true;
+  trp->read_cost= total_cost;
+  trp->records= records;
+
+  DBUG_RETURN(trp);
+}
+
+
+/*
+  Get best "range" table read plan for given SEL_TREE. 
+  Also update PARAM members and store ROR scans info in the SEL_TREE.
+  SYNOPSIS
+    get_key_scans_params
+      param        parameters from test_quick_select
+      tree         make range select for this SEL_TREE 
+      index_read_must_be_used if true, assume 'index only' option will be set
+                             (except for clustered PK indexes)
+      read_time    don't create read plans with cost > read_time.
+  RETURN
+    Best range read plan 
+    NULL if no plan found or error occurred
+*/
+
+static TRP_RANGE *get_key_scans_params(PARAM *param, SEL_TREE *tree,
+                                       bool index_read_must_be_used, 
+                                       double read_time)
+{
+  int idx;
+  SEL_ARG **key,**end, **key_to_read= NULL;
+  ha_rows best_records;
+  TRP_RANGE* read_plan= NULL;
+  bool pk_is_clustered= param->table->file->primary_key_is_clustered();
+  DBUG_ENTER("get_key_scans_params");
+  LINT_INIT(best_records); /* protected by key_to_read */
+  /*
+    Note that there may be trees that have type SEL_TREE::KEY but contain no 
+    key reads at all, e.g. tree for expression "key1 is not null" where key1 
+    is defined as "not null".
+  */  
+  DBUG_EXECUTE("info", print_sel_tree(param, tree, &tree->keys_map, 
+                                      "tree scans"););
+  tree->ror_scans_map.clear_all();
+  tree->n_ror_scans= 0;
+  for (idx= 0,key=tree->keys, end=key+param->keys;
+       key != end ;
+       key++,idx++)
+  {
+    ha_rows found_records;
+    double found_read_time;
+    if (*key)
+    {
+      uint keynr= param->real_keynr[idx];
+      if ((*key)->type == SEL_ARG::MAYBE_KEY ||
+          (*key)->maybe_flag)
+        param->needed_reg->set_bit(keynr);
+      
+      bool read_index_only= index_read_must_be_used? true :
+                            (bool)param->table->used_keys.is_set(keynr);
+
+      found_records= check_quick_select(param, idx, *key);
+      if (param->is_ror_scan)
+      {
+        tree->n_ror_scans++;
+        tree->ror_scans_map.set_bit(idx);
+      }
+      if (found_records != HA_POS_ERROR && found_records > 2 &&
+          read_index_only &&
+          (param->table->file->index_flags(keynr) & HA_KEYREAD_ONLY) &&
+          !(pk_is_clustered && keynr == param->table->primary_key))
+      {
+        /* We can resolve this by only reading through this key. */
+        found_read_time= get_index_only_read_time(param,found_records,keynr);
+      }
+      else
+      {
+        /* 
+          cost(read_through_index) = cost(disk_io) + cost(row_in_range_checks)
+          The row_in_range check is in QUICK_RANGE_SELECT::cmp_next function.
+        */
+	found_read_time= (param->table->file->read_time(keynr,
+						        param->range_count,
+						        found_records)+
+			  (double) found_records / TIME_FOR_COMPARE);
+      }
+      DBUG_PRINT("info",("read_time: %g  found_read_time: %g",
+                         read_time, found_read_time));
+      if (read_time > found_read_time && found_records != HA_POS_ERROR
+          /*|| read_time == DBL_MAX*/ )
+      {        
+        read_time=    found_read_time;
+        best_records= found_records; 
+        key_to_read=  key;
+      }
+
+    }
+  }
+
+  DBUG_EXECUTE("info", print_sel_tree(param, tree, &tree->ror_scans_map,
+                                      "ROR scans"););
+  if (key_to_read)
+  {
+    idx= key_to_read - tree->keys;
+    if ((read_plan= new (param->mem_root) TRP_RANGE(*key_to_read, idx)))
+    {
+      read_plan->records= best_records;
+      read_plan->is_ror= tree->ror_scans_map.is_set(idx);
+      read_plan->read_cost= read_time;
+      DBUG_PRINT("info",("Returning range plan for key %s, cost %g", 
+                         param->table->key_info[param->real_keynr[idx]].name,
+                         read_plan->read_cost));
+    }
+  }
+  else
+    DBUG_PRINT("info", ("No 'range' table read plan found"));
+
+  DBUG_RETURN(read_plan);
+}
+
+
+QUICK_SELECT_I *TRP_INDEX_MERGE::make_quick(PARAM *param, 
+                                            bool retrieve_full_rows,
+                                            MEM_ROOT *parent_alloc)
+{
+  QUICK_INDEX_MERGE_SELECT *quick_imerge;
+  QUICK_RANGE_SELECT *quick;
+  /* index_merge always retrieves full rows, ignore retrieve_full_rows */
+  if (!(quick_imerge= new QUICK_INDEX_MERGE_SELECT(param->thd, param->table)))
+    return NULL;
+
+  quick_imerge->records= records;
+  quick_imerge->read_time= read_cost;
+  for(TRP_RANGE **range_scan= range_scans; range_scan != range_scans_end;
+      range_scan++)
+  {
+    if (!(quick= (QUICK_RANGE_SELECT*)
+           ((*range_scan)->make_quick(param, false, &quick_imerge->alloc)))||
+        quick_imerge->push_quick_back(quick))
+    {
+      delete quick;
+      delete quick_imerge;
+      return NULL;
+    }
+  }
+  return quick_imerge;
+}
+
+QUICK_SELECT_I *TRP_ROR_INTERSECT::make_quick(PARAM *param, 
+                                              bool retrieve_full_rows,
+                                              MEM_ROOT *parent_alloc)
+{
+  QUICK_ROR_INTERSECT_SELECT *quick_intrsect;
+  QUICK_RANGE_SELECT *quick;
+  DBUG_ENTER("TRP_ROR_INTERSECT::make_quick");
+  MEM_ROOT *alloc;
+  
+  if ((quick_intrsect= 
+         new QUICK_ROR_INTERSECT_SELECT(param->thd, param->table,
+                                        retrieve_full_rows? (!is_covering):false,
+                                        parent_alloc)))
+  {
+    DBUG_EXECUTE("info", print_ror_scans_arr(param->table, 
+                                             "creating ROR-intersect",
+                                             first_scan, last_scan););
+    alloc= parent_alloc? parent_alloc: &quick_intrsect->alloc;
+    for(; first_scan != last_scan;++first_scan)
+    {
+      if (!(quick= get_quick_select(param, (*first_scan)->idx,
+                                    (*first_scan)->sel_arg, alloc)) ||
+          quick_intrsect->push_quick_back(quick))
+      {
+        delete quick_intrsect;
+        DBUG_RETURN(NULL);
+      }
+    }
+    if (cpk_scan)
+    {
+      if (!(quick= get_quick_select(param, cpk_scan->idx,
+                                    cpk_scan->sel_arg, alloc)))
+      {
+        delete quick_intrsect;
+        DBUG_RETURN(NULL);
+      }
+      quick_intrsect->cpk_quick= quick;
+    }
+    quick_intrsect->records= records; 
+    quick_intrsect->read_time= read_cost;
+  }
+  DBUG_RETURN(quick_intrsect);
+}
+
+
+QUICK_SELECT_I *TRP_ROR_UNION::make_quick(PARAM *param, 
+                                          bool retrieve_full_rows,
+                                          MEM_ROOT *parent_alloc)
+{
+  QUICK_ROR_UNION_SELECT *quick_roru;
+  TABLE_READ_PLAN **scan;
+  QUICK_SELECT_I *quick;
+  DBUG_ENTER("TRP_ROR_UNION::make_quick");
+  /* 
+    It is impossible to construct a ROR-union that will not retrieve full 
+    rows, ignore retrieve_full_rows parameter.
+  */
+  if ((quick_roru= new QUICK_ROR_UNION_SELECT(param->thd, param->table)))
+  {
+    for(scan= first_ror; scan != last_ror; scan++)
+    {
+      if (!(quick= (*scan)->make_quick(param, false, &quick_roru->alloc)) || 
+          quick_roru->push_quick_back(quick))
+        DBUG_RETURN(NULL);
+    }
+    quick_roru->records= records;
+    quick_roru->read_time= read_cost;
+  }
+  DBUG_RETURN(quick_roru);
+}
+
+/****************************************************************************/
 	/* make a select tree of all keys in condition */
 
 static SEL_TREE *get_mm_tree(PARAM *param,COND *cond)
@@ -970,6 +3312,7 @@ get_mm_parts(PARAM *param, COND *cond_func, Field *field,
       }
       sel_arg->part=(uchar) key_part->part;
       tree->keys[key_part->key]=sel_add(tree->keys[key_part->key],sel_arg);
+      tree->keys_map.set_bit(key_part->key);
     }
   }
 
@@ -981,6 +3324,7 @@ get_mm_parts(PARAM *param, COND *cond_func, Field *field,
     if (tree2)
       tree= tree_or(param,tree,tree2);
   }
+
   DBUG_RETURN(tree);
 }
 
@@ -1286,6 +3630,8 @@ tree_and(PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2)
     DBUG_RETURN(tree1);
   }
 
+  key_map  result_keys;
+  result_keys.clear_all();
   /* Join the trees key per key */
   SEL_ARG **key1,**key2,**end;
   for (key1= tree1->keys,key2= tree2->keys,end=key1+param->keys ;
@@ -1302,17 +3648,59 @@ tree_and(PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2)
       if ((*key1)->type == SEL_ARG::IMPOSSIBLE)
       {
 	tree1->type= SEL_TREE::IMPOSSIBLE;
-	break;
+        DBUG_RETURN(tree1);
       }
+      result_keys.set_bit(key1 - tree1->keys);
 #ifdef EXTRA_DEBUG
       (*key1)->test_use_count(*key1);
 #endif
     }
   }
+  tree1->keys_map= result_keys;
+  /* dispose index_merge if there is a "range" option */
+  if (!result_keys.is_clear_all())
+  {
+    tree1->merges.empty();
+    DBUG_RETURN(tree1);
+  }
+
+  /* ok, both trees are index_merge trees */
+  imerge_list_and_list(&tree1->merges, &tree2->merges);
   DBUG_RETURN(tree1);
 }
 
 
+/*
+  Check if two SEL_TREES can be combined into one (i.e. a single key range 
+  read can be constructed for "cond_of_tree1 OR cond_of_tree2" ) without 
+  using index_merge.
+*/
+
+bool sel_trees_can_be_ored(SEL_TREE *tree1, SEL_TREE *tree2, PARAM* param)
+{
+  key_map common_keys= tree1->keys_map;
+  DBUG_ENTER("sel_trees_can_be_ored");
+  common_keys.intersect(tree2->keys_map);
+
+  if (common_keys.is_clear_all())
+    DBUG_RETURN(false);
+  
+  /* trees have a common key, check if they refer to same key part */  
+  SEL_ARG **key1,**key2;
+  for (uint key_no=0; key_no < param->keys; key_no++)
+  {
+    if (common_keys.is_set(key_no))
+    {
+      key1= tree1->keys + key_no;
+      key2= tree2->keys + key_no;
+      if ((*key1)->part == (*key2)->part)
+      {
+        DBUG_RETURN(true);
+      }
+    }
+  }
+  DBUG_RETURN(false);
+}
 
 static SEL_TREE *
 tree_or(PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2)
@@ -1329,19 +3717,62 @@ tree_or(PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2)
   if (tree2->type == SEL_TREE::MAYBE)
     DBUG_RETURN(tree2);
 
-  /* Join the trees key per key */
-  SEL_ARG **key1,**key2,**end;
-  SEL_TREE *result=0;
-  for (key1= tree1->keys,key2= tree2->keys,end=key1+param->keys ;
-       key1 != end ; key1++,key2++)
+  SEL_TREE *result= 0;
+  key_map  result_keys;
+  result_keys.clear_all();
+  if (sel_trees_can_be_ored(tree1, tree2, param))
   {
-    *key1=key_or(*key1,*key2);
-    if (*key1)
+    /* Join the trees key per key */
+    SEL_ARG **key1,**key2,**end;
+    for (key1= tree1->keys,key2= tree2->keys,end= key1+param->keys ;
+         key1 != end ; key1++,key2++)
     {
-      result=tree1;				// Added to tree1
+      *key1=key_or(*key1,*key2);
+      if (*key1)
+      {
+        result=tree1;				// Added to tree1
+        result_keys.set_bit(key1 - tree1->keys);
 #ifdef EXTRA_DEBUG
-      (*key1)->test_use_count(*key1);
+        (*key1)->test_use_count(*key1);
 #endif
+      }
+    }
+    if (result)
+      result->keys_map= result_keys;
+  }
+  else
+  {
+    /* ok, two trees have KEY type but cannot be used without index merge */
+    if (tree1->merges.is_empty() && tree2->merges.is_empty())
+    {
+      SEL_IMERGE *merge;
+      /* both trees are "range" trees, produce new index merge structure */
+      if (!(result= new SEL_TREE()) || !(merge= new SEL_IMERGE()) ||
+          (result->merges.push_back(merge)) ||
+          (merge->or_sel_tree(param, tree1)) ||
+          (merge->or_sel_tree(param, tree2)))
+        result= NULL;
+      else
+        result->type= tree1->type;
+    }
+    else if (!tree1->merges.is_empty() && !tree2->merges.is_empty())
+    {
+      if (imerge_list_or_list(param, &tree1->merges, &tree2->merges))
+        result= new SEL_TREE(SEL_TREE::ALWAYS);
+      else
+        result= tree1;
+    }
+    else
+    {
+      /* one tree is index merge tree and another is range tree */
+      if (tree1->merges.is_empty())
+        swap_variables(SEL_TREE*, tree1, tree2);
+
+      /* add tree2 to tree1->merges, checking if it collapses to ALWAYS */
+      if (imerge_list_or_tree(param, &tree1->merges, tree2))
+        result= new SEL_TREE(SEL_TREE::ALWAYS);
+      else
+        result= tree1;
     }
   }
   DBUG_RETURN(result);
@@ -1390,7 +3821,6 @@ and_all_keys(SEL_ARG *key1,SEL_ARG *key2,uint clone_flag)
 }
 
 
-
 static SEL_ARG *
 key_and(SEL_ARG *key1,SEL_ARG *key2,uint clone_flag)
 {
@@ -2122,7 +4552,7 @@ SEL_ARG *rb_delete_fixup(SEL_ARG *root,SEL_ARG *key,SEL_ARG *par)
 }
 
 
-	/* Test that the proporties for a red-black tree holds */
+	/* Test that the properties for a red-black tree hold */
 
 #ifdef EXTRA_DEBUG
 int test_rb_tree(SEL_ARG *element,SEL_ARG *parent)
@@ -2210,38 +4640,116 @@ void SEL_ARG::test_use_count(SEL_ARG *root)
 #endif
 
 
+/*
+  Calculate estimate of number records that will be retrieved by a range
+  scan on given index using given SEL_ARG intervals tree.
+  SYNOPSIS
+    check_quick_select
+      param  Parameter from test_quick_select
+      idx    Number of index to use in PARAM::key SEL_TREE::key
+      tree   Transformed selection condition, tree->key[idx] holds intervals
+             tree to be used for scanning.
+  NOTES
+    param->is_ror_scan is set to reflect if the key scan is a ROR (see 
+    is_key_scan_ror function for more info)
+    param->table->quick_*, param->range_count (and maybe others) are 
+    updated with data of given key scan, see check_quick_keys for details.
+    
+  RETURN 
+    Estimate # of records to be retrieved.
+    HA_POS_ERROR if estimate calculation failed due to table handler problems. 
 
-/*****************************************************************************
-** Check how many records we will find by using the found tree
-*****************************************************************************/
+*/
 
 static ha_rows
 check_quick_select(PARAM *param,uint idx,SEL_ARG *tree)
 {
   ha_rows records;
+  bool    cpk_scan;
+  uint key;
   DBUG_ENTER("check_quick_select");
-
+  
+  param->is_ror_scan= false;
+  
   if (!tree)
     DBUG_RETURN(HA_POS_ERROR);			// Can't use it
   param->max_key_part=0;
   param->range_count=0;
+  key= param->real_keynr[idx];
+
   if (tree->type == SEL_ARG::IMPOSSIBLE)
     DBUG_RETURN(0L);				// Impossible select. return
   if (tree->type != SEL_ARG::KEY_RANGE || tree->part != 0)
     DBUG_RETURN(HA_POS_ERROR);				// Don't use tree
+
+  enum ha_key_alg key_alg= param->table->key_info[key].algorithm;
+  if ((key_alg != HA_KEY_ALG_BTREE) && (key_alg!= HA_KEY_ALG_UNDEF))
+  {
+    /* Records are not ordered by rowid for other types of indexes. */
+    cpk_scan= false;
+  }
+  else
+  {
+    /*
+      Clustered PK scan is a special case, check_quick_keys doesn't recognize
+      CPK scans as ROR scans (while actually any CPK scan is a ROR scan).
+    */
+    cpk_scan= (param->table->primary_key == param->real_keynr[idx]) &&
+              param->table->file->primary_key_is_clustered();
+    param->is_ror_scan= !cpk_scan; 
+  }
+
   records=check_quick_keys(param,idx,tree,param->min_key,0,param->max_key,0);
   if (records != HA_POS_ERROR)
-  {
-    uint key=param->real_keynr[idx];
+  {    
     param->table->quick_keys.set_bit(key);
     param->table->quick_rows[key]=records;
     param->table->quick_key_parts[key]=param->max_key_part+1;
+    
+    if (cpk_scan)
+      param->is_ror_scan= true;
   }
   DBUG_PRINT("exit", ("Records: %lu", (ulong) records));
   DBUG_RETURN(records);
 }
 
 
+/*
+  Recursively calculate estimate of # rows that will be retrieved by 
+  key scan on key idx. 
+  SYNOPSIS
+    check_quick_keys()
+      param         Parameter from test_quick select function.
+      idx           Number of key to use in PARAM::keys in list of used keys 
+                    (param->real_keynr[idx] holds the key number in table)
+      key_tree      SEL_ARG tree being examined.
+      min_key       Buffer with partial min key value tuple
+      min_key_flag  
+      max_key       Buffer with partial max key value tuple
+      max_key_flag
+
+  NOTES
+    The function does the recursive descent on the tree via SEL_ARG::left, 
+    SEL_ARG::right, and SEL_ARG::next_key_part edges. The #rows estimates 
+    are calculated using records_in_range calls at the leaf nodes and then
+    summed.
+
+    param->min_key and param->max_key are used to hold prefixes of key value
+    tuples.
+
+    The side effects are:
+    
+    param->max_key_part is updated to hold the maximum number of key parts used
+      in scan minus 1.
+   
+    param->range_count is incremented if the function finds a range that 
+      wasn't counted by the caller.
+    
+    param->is_ror_scan is cleared if the function detects that the key scan is
+      not a Rowid-Ordered Retrieval scan ( see comments for is_key_scan_ror
+      function for description of which key scans are ROR scans)
+*/
+
 static ha_rows
 check_quick_keys(PARAM *param,uint idx,SEL_ARG *key_tree,
 		 char *min_key,uint min_key_flag, char *max_key,
@@ -2252,6 +4760,13 @@ check_quick_keys(PARAM *param,uint idx,SEL_ARG *key_tree,
   param->max_key_part=max(param->max_key_part,key_tree->part);
   if (key_tree->left != &null_element)
   {
+    /*
+      There are at least two intervals for current key part, i.e. condition
+      was converted to something like
+        (keyXpartY less/equals c1) OR (keyXpartY more/equals c2).
+      This is not a ROR scan if the key is not Clustered Primary Key.
+    */
+    param->is_ror_scan= false;
     records=check_quick_keys(param,idx,key_tree->left,min_key,min_key_flag,
 			     max_key,max_key_flag);
     if (records == HA_POS_ERROR)			// Impossible
@@ -2260,12 +4775,25 @@ check_quick_keys(PARAM *param,uint idx,SEL_ARG *key_tree,
 
   uint tmp_min_flag,tmp_max_flag,keynr;
   char *tmp_min_key=min_key,*tmp_max_key=max_key;
-
+  
   key_tree->store(param->key[idx][key_tree->part].store_length,
 		  &tmp_min_key,min_key_flag,&tmp_max_key,max_key_flag);
   uint min_key_length= (uint) (tmp_min_key- param->min_key);
   uint max_key_length= (uint) (tmp_max_key- param->max_key);
 
+  if (param->is_ror_scan)
+  {
+    /* 
+      If the index doesn't cover entire key, mark the scan as non-ROR scan.
+      Actually we're cutting off some ROR scans here.
+    */
+    uint16 fieldnr= param->table->key_info[param->real_keynr[idx]].
+                    key_part[key_tree->part].fieldnr - 1;
+    if (param->table->field[fieldnr]->key_length() != 
+        param->key[idx][key_tree->part].length)
+      param->is_ror_scan= false;
+  }
+
   if (key_tree->next_key_part &&
       key_tree->next_key_part->part == key_tree->part+1 &&
       key_tree->next_key_part->type == SEL_ARG::KEY_RANGE)
@@ -2279,6 +4807,12 @@ check_quick_keys(PARAM *param,uint idx,SEL_ARG *key_tree,
 			   tmp_max_key, max_key_flag | key_tree->max_flag);
       goto end;					// Ugly, but efficient
     }
+    else
+    {
+      /* The interval for current key part is not c1 <= keyXpartY <= c1 */
+      param->is_ror_scan= false;
+    }
+
     tmp_min_flag=key_tree->min_flag;
     tmp_max_flag=key_tree->max_flag;
     if (!tmp_min_flag)
@@ -2307,6 +4841,24 @@ check_quick_keys(PARAM *param,uint idx,SEL_ARG *key_tree,
     tmp=1;					// Max one record
   else
   {
+    if (param->is_ror_scan)
+    {
+      /*
+        If we get here, the condition on the key was converted to form
+        "(keyXpart1 = c1) AND ... AND (keyXpart{key_tree->part - 1} = cN) AND
+          somecond(keyXpart{key_tree->part})"
+        Check if
+          somecond is "keyXpart{key_tree->part} = const" and
+          uncovered "tail" of KeyX parts is either empty or is identical to
+          first members of clustered primary key.
+      */
+      if (!(min_key_length == max_key_length &&
+            !memcmp(min_key,max_key, (uint) (tmp_max_key - max_key)) &&
+            !key_tree->min_flag && !key_tree->max_flag && 
+            is_key_scan_ror(param, keynr, key_tree->part + 1)))
+        param->is_ror_scan= false;
+    }
+
     if (tmp_min_flag & GEOM_FLAG)
     {
       key_range min_range;
@@ -2343,6 +4895,13 @@ check_quick_keys(PARAM *param,uint idx,SEL_ARG *key_tree,
   records+=tmp;
   if (key_tree->right != &null_element)
   {
+    /*
+      There are at least two intervals for current key part, i.e. condition
+      was converted to something like
+        (keyXpartY less/equals c1) OR (keyXpartY more/equals c2).
+      This is not a ROR scan if the key is not Clustered Primary Key.
+    */
+    param->is_ror_scan= false;
     tmp=check_quick_keys(param,idx,key_tree->right,min_key,min_key_flag,
 			 max_key,max_key_flag);
     if (tmp == HA_POS_ERROR)
@@ -2353,23 +4912,113 @@ check_quick_keys(PARAM *param,uint idx,SEL_ARG *key_tree,
 }
 
 
-/****************************************************************************
-** change a tree to a structure to be used by quick_select
-** This uses it's own malloc tree
-****************************************************************************/
+/*
+  Check if key scan on given index with equality conditions on first n key 
+  parts is a ROR scan.
+
+  SYNOPSIS
+    is_key_scan_ror()
+      param  Parameter from test_quick_select 
+      keynr  Number of key in the table. The key must not be a clustered
+             primary key.
+      nparts Number of first key parts for which equality conditions
+             are present.
+    
+  NOTES
+    ROR (Rowid Ordered Retrieval) key scan is a key scan that produces
+    ordered sequence of rowids (ha_xxx::cmp_ref is the comparison function)
+    
+    An index scan is a ROR scan if it is done using a condition in form
+
+        "key1_1=c_1 AND ... AND key1_n=c_n"  (1)
+    
+    where the index is defined on (key1_1, ..., key1_N [,a_1, ..., a_n])
+
+    and the table has a clustered Primary Key 
 
-static QUICK_SELECT *
-get_quick_select(PARAM *param,uint idx,SEL_ARG *key_tree)
+    PRIMARY KEY(a_1, ..., a_n, b1, ..., b_k) with first key parts being 
+    identical to uncovered parts ot the key being scanned (2)
+    
+    Scans on HASH indexes are not ROR scans, 
+    any range scan on clustered primary key is ROR scan  (3)
+
+    Check (1) is made in check_quick_keys()
+    Check (3) is made check_quick_select()
+    Check (2) is made by this function.
+
+  RETURN 
+    true  If the scan is ROR-scan
+    false otherwise
+*/
+
+static bool is_key_scan_ror(PARAM *param, uint keynr, uint8 nparts)
 {
-  QUICK_SELECT *quick;
+  KEY *table_key= param->table->key_info + keynr;
+  KEY_PART_INFO *key_part= table_key->key_part + nparts;
+  KEY_PART_INFO *key_part_end= table_key->key_part + 
+                               table_key->key_parts;
+  
+  if (key_part == key_part_end)
+    return true;
+  uint pk_number= param->table->primary_key;
+  if (!param->table->file->primary_key_is_clustered() || pk_number == MAX_KEY)
+    return false;
+
+  KEY_PART_INFO *pk_part= param->table->key_info[pk_number].key_part;
+  KEY_PART_INFO *pk_part_end= pk_part + 
+                              param->table->key_info[pk_number].key_parts;
+  for(;(key_part!=key_part_end) && (pk_part != pk_part_end); 
+      ++key_part, ++pk_part)
+  {
+    if ((key_part->field != pk_part->field) || 
+        (key_part->length != pk_part->length))
+      return false; 
+  }
+  return (key_part == key_part_end);
+}
+
+
+/*
+  Create a QUICK_RANGE_SELECT from given key and SEL_ARG tree for that key.
+  
+  SYNOPSIS
+    get_quick_select()
+      param 
+      idx          Index of used key in param->key.
+      key_tree     SEL_ARG tree for the used key 
+      parent_alloc If not NULL, use it to allocate memory for 
+                   quick select data. Otherwise use quick->alloc.
+  
+  RETURN 
+    NULL on error
+    otherwise created quick select
+
+  NOTES
+    The caller must call QUICK_SELCT::init for returned quick select
+
+    CAUTION! This function may change THR_MALLOC to a MEM_ROOT which will be 
+    deallocated when the returned quick select is deleted.
+*/
+
+QUICK_RANGE_SELECT *
+get_quick_select(PARAM *param,uint idx,SEL_ARG *key_tree,
+                 MEM_ROOT *parent_alloc)
+{
+  QUICK_RANGE_SELECT *quick;
   DBUG_ENTER("get_quick_select");
 
+
+
   if (param->table->key_info[param->real_keynr[idx]].flags & HA_SPATIAL)
-    quick=new QUICK_SELECT_GEOM(param->thd, param->table, param->real_keynr[idx],
-				0);
+    quick=new QUICK_RANGE_SELECT_GEOM(param->thd, param->table,
+                                      param->real_keynr[idx],
+                                      test(parent_alloc),
+                                      parent_alloc);
   else
-    quick=new QUICK_SELECT(param->thd, param->table, param->real_keynr[idx]);
-
+    quick=new QUICK_RANGE_SELECT(param->thd, param->table,
+                                 param->real_keynr[idx],
+                                 test(parent_alloc), parent_alloc);
+                           
   if (quick)
   {
     if (quick->error ||
@@ -2382,11 +5031,12 @@ get_quick_select(PARAM *param,uint idx,SEL_ARG *key_tree)
     else
     {
       quick->key_parts=(KEY_PART*)
-	memdup_root(&quick->alloc,(char*) param->key[idx],
-		   sizeof(KEY_PART)*
-		   param->table->key_info[param->real_keynr[idx]].key_parts);
+        memdup_root(parent_alloc? parent_alloc : &quick->alloc,
+                    (char*) param->key[idx],
+                    sizeof(KEY_PART)*
+                    param->table->key_info[param->real_keynr[idx]].key_parts);
     }
-  }
+  }  
   DBUG_RETURN(quick);
 }
 
@@ -2394,9 +5044,8 @@ get_quick_select(PARAM *param,uint idx,SEL_ARG *key_tree)
 /*
 ** Fix this to get all possible sub_ranges
 */
-
-static bool
-get_quick_keys(PARAM *param,QUICK_SELECT *quick,KEY_PART *key,
+bool
+get_quick_keys(PARAM *param,QUICK_RANGE_SELECT *quick,KEY_PART *key,
 	       SEL_ARG *key_tree,char *min_key,uint min_key_flag,
 	       char *max_key, uint max_key_flag)
 {
@@ -2492,7 +5141,9 @@ get_quick_keys(PARAM *param,QUICK_SELECT *quick,KEY_PART *key,
   set_if_bigger(quick->max_used_key_length,range->min_length);
   set_if_bigger(quick->max_used_key_length,range->max_length);
   set_if_bigger(quick->used_key_parts, (uint) key_tree->part+1);
-  quick->ranges.push_back(range);
+  if (insert_dynamic(&quick->ranges, (gptr)&range))
+    return 1;
+
 
  end:
   if (key_tree->right != &null_element)
@@ -2506,12 +5157,12 @@ get_quick_keys(PARAM *param,QUICK_SELECT *quick,KEY_PART *key,
   Return 1 if there is only one range and this uses the whole primary key
 */
 
-bool QUICK_SELECT::unique_key_range()
+bool QUICK_RANGE_SELECT::unique_key_range()
 {
   if (ranges.elements == 1)
   {
-    QUICK_RANGE *tmp;
-    if (((tmp=ranges.head())->flag & (EQ_RANGE | NULL_RANGE)) == EQ_RANGE)
+    QUICK_RANGE *tmp= *((QUICK_RANGE**)ranges.buffer);
+    if ((tmp->flag & (EQ_RANGE | NULL_RANGE)) == EQ_RANGE)
     {
       KEY *key=head->key_info+index;
       return ((key->flags & (HA_NOSAME | HA_END_SPACE_KEY)) == HA_NOSAME &&
@@ -2537,13 +5188,55 @@ static bool null_part_in_key(KEY_PART *key_part, const char *key, uint length)
 }
 
 
+bool QUICK_SELECT_I::check_if_keys_used(List<Item> *fields)
+{
+  return check_if_key_used(head, index, *fields); 
+}
+
+bool QUICK_INDEX_MERGE_SELECT::check_if_keys_used(List<Item> *fields)
+{
+  QUICK_RANGE_SELECT *quick;
+  List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
+  while ((quick= it++))
+  {
+    if (check_if_key_used(head, quick->index, *fields))
+      return 1;
+  }
+  return 0;
+}
+
+bool QUICK_ROR_INTERSECT_SELECT::check_if_keys_used(List<Item> *fields)
+{
+  QUICK_RANGE_SELECT *quick;
+  List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
+  while ((quick= it++))
+  {
+    if (check_if_key_used(head, quick->index, *fields))
+      return 1;
+  }
+  return 0;
+}
+
+bool QUICK_ROR_UNION_SELECT::check_if_keys_used(List<Item> *fields)
+{
+  QUICK_SELECT_I *quick;
+  List_iterator_fast<QUICK_SELECT_I> it(quick_selects);
+  while ((quick= it++))
+  {
+    if (quick->check_if_keys_used(fields))
+      return 1;
+  }
+  return 0;
+}
+
 /****************************************************************************
   Create a QUICK RANGE based on a key
 ****************************************************************************/
 
-QUICK_SELECT *get_quick_select_for_ref(THD *thd, TABLE *table, TABLE_REF *ref)
+QUICK_RANGE_SELECT *get_quick_select_for_ref(THD *thd, TABLE *table, 
+                                             TABLE_REF *ref)
 {
-  QUICK_SELECT *quick=new QUICK_SELECT(thd, table, ref->key, 1);
+  QUICK_RANGE_SELECT *quick=new QUICK_RANGE_SELECT(thd, table, ref->key, 1);  
   KEY *key_info = &table->key_info[ref->key];
   KEY_PART *key_part;
   QUICK_RANGE *range;
@@ -2551,11 +5244,16 @@ QUICK_SELECT *get_quick_select_for_ref(THD *thd, TABLE *table, TABLE_REF *ref)
 
   if (!quick)
     return 0;			/* no ranges found */
+  if (quick->init())
+  {
+    delete quick;
+    return 0;
+  }
+
   if (cp_buffer_from_ref(ref))
   {
     if (thd->is_fatal_error)
       goto err;					// out of memory
-    return quick;				// empty range
   }
 
   if (!(range= new QUICK_RANGE()))
@@ -2579,7 +5277,7 @@ QUICK_SELECT *get_quick_select_for_ref(THD *thd, TABLE *table, TABLE_REF *ref)
     key_part->store_length= key_info->key_part[part].store_length;
     key_part->null_bit=     key_info->key_part[part].null_bit;
   }
-  if (quick->ranges.push_back(range))
+  if (insert_dynamic(&quick->ranges,(gptr)&range))
     goto err;
 
   /* 
@@ -2598,7 +5296,7 @@ QUICK_SELECT *get_quick_select_for_ref(THD *thd, TABLE *table, TABLE_REF *ref)
 				      EQ_RANGE)))
       goto err;
     *ref->null_ref_key= 0;		// Clear null byte
-    if (quick->ranges.push_back(null_range))
+    if (insert_dynamic(&quick->ranges,(gptr)&null_range))
       goto err;
   }
 
@@ -2609,11 +5307,286 @@ err:
   return 0;
 }
 
+
+/*
+  Fetch all row ids into unique.
+
+  If table has a clustered primary key that covers all rows (true for bdb 
+     and innodb currently) and one of the index_merge scans is a scan on PK,
+  then 
+    primary key scan rowids are not put into Unique and also 
+    rows that will be retrieved by PK scan are not put into Unique
+  
+  RETURN
+    0     OK
+    other error
+*/
+
+int QUICK_INDEX_MERGE_SELECT::prepare_unique()
+{
+  int result;
+  DBUG_ENTER("QUICK_INDEX_MERGE_SELECT::prepare_unique");
+  
+  /* We're going to just read rowids. */
+  head->file->extra(HA_EXTRA_KEYREAD);
+
+  /* 
+    Make innodb retrieve all PK member fields, so 
+     * ha_innobase::position (which uses them) call works.
+     * We can filter out rows that will be retrieved by clustered PK.
+    (This also creates a deficiency - it is possible that we will retrieve
+     parts of key that are not used by current query at all.)
+  */
+  head->file->extra(HA_EXTRA_RETRIEVE_ALL_COLS);
+
+  cur_quick_select->init();
+
+  unique= new Unique(refpos_order_cmp, (void *)head->file,
+                     head->file->ref_length,
+                     thd->variables.sortbuff_size);
+  if (!unique)
+    DBUG_RETURN(1);
+  do
+  {
+    while ((result= cur_quick_select->get_next()) == HA_ERR_END_OF_FILE)
+    {
+      cur_quick_select= cur_quick_it++;
+      if (!cur_quick_select)
+        break;
+
+      if (cur_quick_select->init())
+        DBUG_RETURN(1);
+
+      /* QUICK_RANGE_SELECT::reset never fails */
+      cur_quick_select->reset();
+    }
+
+    if (result)
+    {      
+      if (result != HA_ERR_END_OF_FILE)
+        DBUG_RETURN(result);
+      break;
+    }
+    
+    if (thd->killed)
+      DBUG_RETURN(1);
+    
+    /* skip row if it will be retrieved by clustered PK scan */
+    if (pk_quick_select && pk_quick_select->row_in_ranges())
+      continue;
+
+    cur_quick_select->file->position(cur_quick_select->record);
+    result= unique->unique_add((char*)cur_quick_select->file->ref);    
+    if (result)
+      DBUG_RETURN(1);
+
+  }while(true);  
+
+  /* ok, all row ids are in Unique */
+  result= unique->get(head);
+  doing_pk_scan= false;
+  init_read_record(&read_record, thd, head, NULL, 1, 1);
+  /* index_merge currently doesn't support "using index" at all */
+  head->file->extra(HA_EXTRA_NO_KEYREAD);
+
+  DBUG_RETURN(result);
+}
+
+
+/*
+  Get next row for index_merge.
+  NOTES
+    The rows are read from
+      1. rowids stored in Unique.
+      2. QUICK_RANGE_SELECT with clustered primary key (if any).
+    The sets of rows retrieved in 1) and 2) are guaranteed to be disjoint.
+*/
+
+int QUICK_INDEX_MERGE_SELECT::get_next()
+{
+  int result;
+  DBUG_ENTER("QUICK_INDEX_MERGE_SELECT::get_next");
+  
+  if (doing_pk_scan)
+    DBUG_RETURN(pk_quick_select->get_next());
+
+  result= read_record.read_record(&read_record);
+
+  if (result == -1)
+  {
+    result= HA_ERR_END_OF_FILE;
+    end_read_record(&read_record);
+    /* All rows from Unique have been retrieved, do a clustered PK scan */
+    if(pk_quick_select)
+    {
+      doing_pk_scan= true;
+      if ((result= pk_quick_select->init()))
+        DBUG_RETURN(result);
+      DBUG_RETURN(pk_quick_select->get_next());
+    }
+  }
+
+  DBUG_RETURN(result);
+}
+
+
+/*
+  Retrieve next record.  
+  SYNOPSIS
+     QUICK_ROR_INTERSECT_SELECT::get_next()  
+  
+  NOTES
+    Invariant on enter/exit: all intersected selects have retrieved all index
+    records with rowid <= some_rowid_val and no intersected select has
+    retrieved any index records with rowid > some_rowid_val.
+    We start fresh and loop until we have retrieved the same rowid in each of
+    the key scans or we got an error.
+
+    If a Clustered PK scan is present, it is used only to check if row 
+    satisfies its condition (and never used for row retrieval).
+
+  RETURN
+   0     - Ok
+   other - Error code if any error occurred.
+*/
+
+int QUICK_ROR_INTERSECT_SELECT::get_next()
+{
+  List_iterator_fast<QUICK_RANGE_SELECT> quick_it(quick_selects);
+  QUICK_RANGE_SELECT* quick;
+  int error, cmp;
+  uint last_rowid_count=0;
+  DBUG_ENTER("QUICK_ROR_INTERSECT_SELECT::get_next");
+  
+  /* Get a rowid for first quick and save it as a 'candidate' */
+  quick= quick_it++;
+  if (cpk_quick)
+  {
+    do {
+      error= quick->get_next();
+    }while (!error && !cpk_quick->row_in_ranges());
+  }
+  else
+    error= quick->get_next();
+  
+  if (error)
+    DBUG_RETURN(error);
+
+  quick->file->position(quick->record);
+  memcpy(last_rowid, quick->file->ref, head->file->ref_length);
+  last_rowid_count= 1;
+ 
+  while (last_rowid_count < quick_selects.elements)
+  {
+    if (!(quick= quick_it++))
+    {
+      quick_it.rewind();
+      quick= quick_it++;
+    }
+   
+    do {
+      if ((error= quick->get_next()))
+        DBUG_RETURN(error);
+      quick->file->position(quick->record);
+      cmp= head->file->cmp_ref(quick->file->ref, last_rowid);      
+    } while (cmp < 0);
+
+    /* Ok, current select 'caught up' and returned ref >= cur_ref */
+    if (cmp > 0)
+    {
+      /* Found a row with ref > cur_ref. Make it a new 'candidate' */
+      if (cpk_quick)
+      {
+        while (!cpk_quick->row_in_ranges())
+        {
+          if ((error= quick->get_next()))
+            DBUG_RETURN(error);
+        }
+      }
+      memcpy(last_rowid, quick->file->ref, head->file->ref_length);
+      last_rowid_count= 1;      
+    }
+    else
+    {
+      /* current 'candidate' row confirmed by this select */
+      last_rowid_count++;
+    }
+  }
+
+  /* We get here iff we got the same row ref in all scans. */
+  if (need_to_fetch_row)
+    error= head->file->rnd_pos(head->record[0], last_rowid);
+  DBUG_RETURN(error);
+}
+
+
+/* 
+  Retrieve next record.  
+  SYNOPSIS
+    QUICK_ROR_UNION_SELECT::get_next()
+  
+  NOTES
+    Enter/exit invariant: 
+    For each quick select in the queue a {key,rowid} tuple has been 
+    retrieved but the corresponding row hasn't been passed to output.
+
+  RETURN 
+   0     - Ok
+   other - Error code if any error occurred.
+*/
+
+int QUICK_ROR_UNION_SELECT::get_next()
+{
+  int error, dup_row;
+  QUICK_SELECT_I *quick;
+  byte *tmp;
+  DBUG_ENTER("QUICK_ROR_UNION_SELECT::get_next");
+  
+  do
+  {
+    if (!queue.elements)
+      DBUG_RETURN(HA_ERR_END_OF_FILE);
+    /* Ok, we have a queue with >= 1 scans */
+
+    quick= (QUICK_SELECT_I*)queue_top(&queue);
+    memcpy(cur_rowid, quick->last_rowid, rowid_length);
+
+    /* put into queue rowid from the same stream as top element */
+    if ((error= quick->get_next()))
+    {
+      if (error != HA_ERR_END_OF_FILE)
+        DBUG_RETURN(error);
+      queue_remove(&queue, 0);
+    }
+    else
+    {
+      quick->save_last_pos();
+      queue_replaced(&queue);
+    }
+    
+    if (!have_prev_rowid)
+    {
+      /* No rows have been returned yet */
+      dup_row= false;
+      have_prev_rowid= true;
+    }
+    else
+      dup_row= !head->file->cmp_ref(cur_rowid, prev_rowid);
+  }while (dup_row);
+  
+  tmp= cur_rowid;
+  cur_rowid= prev_rowid;
+  prev_rowid= tmp;
+
+  error= head->file->rnd_pos(quick->record, prev_rowid);
+  DBUG_RETURN(error);
+}
+
 	/* get next possible record using quick-struct */
 
-int QUICK_SELECT::get_next()
+int QUICK_RANGE_SELECT::get_next()
 {
-  DBUG_ENTER("get_next");
+  DBUG_ENTER("QUICK_RANGE_SELECT::get_next");
 
   for (;;)
   {
@@ -2627,7 +5600,14 @@ int QUICK_SELECT::get_next()
 	DBUG_RETURN(result);
     }
 
-    if (!(range= it++))
+    if (!cur_range)
+      range= *(cur_range= (QUICK_RANGE**) ranges.buffer);
+    else 
+      range=
+        (cur_range == ((QUICK_RANGE**) ranges.buffer + ranges.elements - 1)) ?
+        (QUICK_RANGE*) 0 : *(++cur_range);
+
+    if (!range)
       DBUG_RETURN(HA_ERR_END_OF_FILE);		// All ranges used
 
     start_key.key=    (const byte*) range->min_key;
@@ -2660,9 +5640,9 @@ int QUICK_SELECT::get_next()
 
 /* Get next for geometrical indexes */
 
-int QUICK_SELECT_GEOM::get_next()
+int QUICK_RANGE_SELECT_GEOM::get_next()
 {
-  DBUG_ENTER(" QUICK_SELECT_GEOM::get_next");
+  DBUG_ENTER("QUICK_RANGE_SELECT_GEOM::get_next");
 
   for (;;)
   {
@@ -2676,7 +5656,14 @@ int QUICK_SELECT_GEOM::get_next()
 	DBUG_RETURN(result);
     }
 
-    if (!(range= it++))
+   if (!cur_range)
+      range= *(cur_range= (QUICK_RANGE**) ranges.buffer);
+    else
+      range=
+        (cur_range == ((QUICK_RANGE**) ranges.buffer + ranges.elements - 1)) ?
+        (QUICK_RANGE*) 0 : *(++cur_range);
+
+    if (!range)
       DBUG_RETURN(HA_ERR_END_OF_FILE);		// All ranges used
 
     result= file->index_read(record,
@@ -2691,6 +5678,46 @@ int QUICK_SELECT_GEOM::get_next()
 
 
 /*
+  Check if current row will be retrieved by this QUICK_RANGE_SELECT
+
+  NOTES
+    It is assumed that currently a scan is being done on another index 
+    which reads all necessary parts of the index that is scanned by this 
+    quick select.
+    The implementation does a binary search on sorted array of disjoint 
+    ranges, without taking size of range into account.
+
+    This function is used to filter out clustered PK scan rows in 
+    index_merge quick select.
+
+  RETURN
+    true  if current row will be retrieved by this quick select
+    false if not
+*/
+
+bool QUICK_RANGE_SELECT::row_in_ranges()
+{
+  QUICK_RANGE *range;
+  uint min= 0;
+  uint max= ranges.elements - 1;
+  uint mid= (max + min)/2;
+
+  while (min != max)
+  {    
+    if (cmp_next(*(QUICK_RANGE**)dynamic_array_ptr(&ranges, mid)))
+    {
+      /* current row value > mid->max */
+      min= mid + 1;
+    }
+    else
+      max= mid;
+    mid= (min + max) / 2;
+  }
+  range= *(QUICK_RANGE**)dynamic_array_ptr(&ranges, mid);
+  return (!cmp_next(range) && !cmp_prev(range));
+}
+
+/*
   This is a hack: we inherit from QUICK_SELECT so that we can use the
   get_next() interface, but we have to hold a pointer to the original
   QUICK_SELECT because its data are used all over the place.  What
@@ -2700,16 +5727,17 @@ int QUICK_SELECT_GEOM::get_next()
   for now, this seems to work right at least.
  */
 
-QUICK_SELECT_DESC::QUICK_SELECT_DESC(QUICK_SELECT *q, uint used_key_parts)
-  : QUICK_SELECT(*q), rev_it(rev_ranges)
+QUICK_SELECT_DESC::QUICK_SELECT_DESC(QUICK_RANGE_SELECT *q, 
+                                     uint used_key_parts)
+ : QUICK_RANGE_SELECT(*q), rev_it(rev_ranges)
 {
   QUICK_RANGE *r;
+  
+  QUICK_RANGE **pr= (QUICK_RANGE**)ranges.buffer;
+  QUICK_RANGE **last_range= pr + ranges.elements;
+  for (; pr!=last_range; pr++)
+    rev_ranges.push_front(*pr);
 
-  it.rewind();
-  for (r = it++; r; r = it++)
-  {
-    rev_ranges.push_front(r);
-  }
   /* Remove EQ_RANGE flag for keys that are not using the full key */
   for (r = rev_it++; r; r = rev_it++)
   {
@@ -2802,10 +5830,51 @@ int QUICK_SELECT_DESC::get_next()
 
 
 /*
+  Compare if found key is over max-value
+  Returns 0 if key <= range->max_key
+*/
+
+int QUICK_RANGE_SELECT::cmp_next(QUICK_RANGE *range_arg)
+{
+  if (range_arg->flag & NO_MAX_RANGE)
+    return 0;                                   /* key can't be to large */
+
+  KEY_PART *key_part=key_parts;
+  uint store_length;
+
+  for (char *key=range_arg->max_key, *end=key+range_arg->max_length;
+       key < end;
+       key+= store_length, key_part++)
+  {
+    int cmp;
+    store_length= key_part->store_length;
+    if (key_part->null_bit)
+    {
+      if (*key)
+      {
+        if (!key_part->field->is_null())
+          return 1;
+        continue;
+      }
+      else if (key_part->field->is_null())
+        return 0;
+      key++;					// Skip null byte
+      store_length--;
+    }
+    if ((cmp=key_part->field->key_cmp((byte*) key, key_part->length)) < 0)
+      return 0;
+    if (cmp > 0)
+      return 1;
+  }
+  return (range_arg->flag & NEAR_MAX) ? 1 : 0;          // Exact match
+}
+
+
+/*
   Returns 0 if found key is inside range (found key >= range->min_key).
 */
 
-int QUICK_SELECT_DESC::cmp_prev(QUICK_RANGE *range_arg)
+int QUICK_RANGE_SELECT::cmp_prev(QUICK_RANGE *range_arg)
 {
   int cmp;
   if (range_arg->flag & NO_MIN_RANGE)
@@ -2880,6 +5949,232 @@ bool QUICK_SELECT_DESC::test_if_null_range(QUICK_RANGE *range_arg,
 #endif
 
 
+void QUICK_RANGE_SELECT::add_info_string(String *str)
+{
+  KEY *key_info= head->key_info + index;
+  str->append(key_info->name);
+}
+
+void QUICK_INDEX_MERGE_SELECT::add_info_string(String *str)
+{
+  QUICK_RANGE_SELECT *quick;
+  bool first= true;
+  List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
+  str->append("sort_union(");
+  while ((quick= it++))
+  {
+    if (!first)
+      str->append(',');
+    else
+      first= false;
+    quick->add_info_string(str);
+  }
+  if (pk_quick_select)
+  {
+    str->append(',');
+    pk_quick_select->add_info_string(str);
+  }
+  str->append(')');
+}
+
+void QUICK_ROR_INTERSECT_SELECT::add_info_string(String *str)
+{
+  bool first= true; 
+  QUICK_RANGE_SELECT *quick;
+  List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
+  str->append("intersect(");
+  while ((quick= it++))
+  {
+    KEY *key_info= head->key_info + quick->index;
+    if (!first)
+      str->append(',');
+    else 
+      first= false;
+    str->append(key_info->name);
+  }
+  if (cpk_quick)
+  {
+    KEY *key_info= head->key_info + cpk_quick->index;
+    str->append(',');
+    str->append(key_info->name);
+  }
+  str->append(')');
+}
+
+void QUICK_ROR_UNION_SELECT::add_info_string(String *str)
+{
+  bool first= true; 
+  QUICK_SELECT_I *quick;
+  List_iterator_fast<QUICK_SELECT_I> it(quick_selects);
+  str->append("union(");
+  while ((quick= it++))
+  {
+    if (!first)
+      str->append(',');
+    else
+      first= false;
+    quick->add_info_string(str);
+  }
+  str->append(')');
+}
+
+
+void QUICK_RANGE_SELECT::add_keys_and_lengths(String *key_names, 
+                                              String *used_lengths)
+{
+  char buf[64];
+  uint length;
+  KEY *key_info= head->key_info + index;
+  key_names->append(key_info->name);
+  length= longlong2str(max_used_key_length, buf, 10) - buf;
+  used_lengths->append(buf, length);
+}
+
+void QUICK_INDEX_MERGE_SELECT::add_keys_and_lengths(String *key_names,
+                                                    String *used_lengths)
+{
+  char buf[64];
+  uint length;
+  bool first= true;
+  QUICK_RANGE_SELECT *quick;
+  
+  List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
+  while ((quick= it++))
+  {
+    if (first)
+      first= false;
+    else
+    {
+      key_names->append(',');
+      used_lengths->append(',');
+    }
+      
+    KEY *key_info= head->key_info + quick->index;
+    key_names->append(key_info->name);
+    length= longlong2str(quick->max_used_key_length, buf, 10) - buf;
+    used_lengths->append(buf, length);
+  }
+  if (pk_quick_select)
+  {
+    KEY *key_info= head->key_info + pk_quick_select->index;
+    key_names->append(',');
+    key_names->append(key_info->name);
+    length= longlong2str(pk_quick_select->max_used_key_length, buf, 10) - buf;
+    used_lengths->append(',');
+    used_lengths->append(buf, length);
+  }
+}
+
+void QUICK_ROR_INTERSECT_SELECT::add_keys_and_lengths(String *key_names,
+                                                      String *used_lengths)
+{
+  char buf[64];
+  uint length;
+  bool first= true; 
+  QUICK_RANGE_SELECT *quick;
+  List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
+  while ((quick= it++))
+  {
+    KEY *key_info= head->key_info + quick->index;
+    if (first)
+      first= false;
+    else
+    {
+      key_names->append(',');
+      used_lengths->append(',');
+    }
+    key_names->append(key_info->name);
+    length= longlong2str(quick->max_used_key_length, buf, 10) - buf;
+    used_lengths->append(buf, length);
+  }
+  
+  if (cpk_quick)
+  {
+    KEY *key_info= head->key_info + cpk_quick->index;
+    key_names->append(',');
+    key_names->append(key_info->name);
+    length= longlong2str(cpk_quick->max_used_key_length, buf, 10) - buf;
+    used_lengths->append(',');
+    used_lengths->append(buf, length);
+  }
+}
+
+void QUICK_ROR_UNION_SELECT::add_keys_and_lengths(String *key_names,
+                                                  String *used_lengths)
+{
+  bool first= true; 
+  QUICK_SELECT_I *quick;
+  List_iterator_fast<QUICK_SELECT_I> it(quick_selects);
+  while ((quick= it++))
+  {
+    if (first)
+      first= false;
+    else
+    {  
+      used_lengths->append(',');
+      key_names->append(',');
+    }
+    quick->add_keys_and_lengths(key_names, used_lengths);
+  }
+}
+
+#ifndef DBUG_OFF
+
+static void print_sel_tree(PARAM *param, SEL_TREE *tree, key_map *tree_map, 
+                           const char *msg)
+{
+  SEL_ARG **key,**end;
+  int idx;
+  char buff[1024];
+  DBUG_ENTER("print_sel_tree");
+  if (! _db_on_)
+    DBUG_VOID_RETURN;
+
+  String tmp(buff,sizeof(buff),&my_charset_bin);
+  tmp.length(0);
+  for (idx= 0,key=tree->keys, end=key+param->keys ;
+       key != end ;
+       key++,idx++)
+  {
+    if (tree_map->is_set(idx))
+    {
+      uint keynr= param->real_keynr[idx];
+      if (tmp.length())
+        tmp.append(',');
+      tmp.append(param->table->key_info[keynr].name);
+    }
+  }
+  if (!tmp.length())
+    tmp.append("(empty)");
+
+  DBUG_PRINT("info", ("SEL_TREE %p (%s) scans:%s", tree, msg, tmp.ptr()));
+
+  DBUG_VOID_RETURN;
+}
+
+static void print_ror_scans_arr(TABLE *table, const char *msg,
+                                struct st_ror_scan_info **start, 
+                                struct st_ror_scan_info **end)
+{
+  DBUG_ENTER("print_ror_scans");
+  if (! _db_on_)
+    DBUG_VOID_RETURN;
+
+  char buff[1024];
+  String tmp(buff,sizeof(buff),&my_charset_bin);
+  tmp.length(0);
+  for(;start != end; start++)
+  {
+    if (tmp.length())
+      tmp.append(',');
+    tmp.append(table->key_info[(*start)->keynr].name);
+  }
+  if (!tmp.length())
+    tmp.append("(empty)");
+  DBUG_PRINT("info", ("ROR key scans (%s): %s", msg, tmp.ptr()));
+  DBUG_VOID_RETURN;
+}
+
 /*****************************************************************************
 ** Print a quick range for debugging
 ** TODO:
@@ -2887,8 +6182,6 @@ bool QUICK_SELECT_DESC::test_if_null_range(QUICK_RANGE *range_arg,
 ** of locking the DEBUG stream !
 *****************************************************************************/
 
-#ifndef DBUG_OFF
-
 static void
 print_key(KEY_PART *key_part,const char *key,uint used_length)
 {
@@ -2921,48 +6214,122 @@ print_key(KEY_PART *key_part,const char *key,uint used_length)
 }
 
 
-static void print_quick(QUICK_SELECT *quick,const key_map* needed_reg)
+static void print_quick(QUICK_SELECT_I *quick, const key_map *needed_reg)
 {
-  QUICK_RANGE *range;
   char buf[MAX_KEY/8+1];
   DBUG_ENTER("print_param");
   if (! _db_on_ || !quick)
     DBUG_VOID_RETURN;
+  DBUG_LOCK_FILE;
+ 
+  quick->dbug_dump(0, true);
+  fprintf(DBUG_FILE,"other_keys: 0x%s:\n", needed_reg->print(buf));
+
+  DBUG_UNLOCK_FILE;
+  DBUG_VOID_RETURN;
+}
 
-  List_iterator<QUICK_RANGE> li(quick->ranges);
+
+static void print_rowid(byte* val, int len)
+{
+  byte *pb;
   DBUG_LOCK_FILE;
-  fprintf(DBUG_FILE,"Used quick_range on key: %d (other_keys: 0x%s):\n",
-	  quick->index, needed_reg->print(buf));
-  while ((range=li++))
+  fputc('\"', DBUG_FILE);
+  for (pb= val; pb!= val + len; ++pb)
+    fprintf(DBUG_FILE, "%c", *pb);
+  fprintf(DBUG_FILE, "\", hex: ");
+
+  for (pb= val; pb!= val + len; ++pb)
+    fprintf(DBUG_FILE, "%x ", *pb);
+  fputc('\n', DBUG_FILE);
+  DBUG_UNLOCK_FILE;
+}
+
+void QUICK_RANGE_SELECT::dbug_dump(int indent, bool verbose)
+{
+  fprintf(DBUG_FILE, "%*squick range select, key %s, length: %d\n",
+	  indent, "", head->key_info[index].name, max_used_key_length);
+  
+  if (verbose)
   {
-    if (!(range->flag & NO_MIN_RANGE))
+    QUICK_RANGE *range;
+    QUICK_RANGE **pr= (QUICK_RANGE**)ranges.buffer;
+    QUICK_RANGE **last_range= pr + ranges.elements;    
+    for (; pr!=last_range; ++pr)
     {
-      print_key(quick->key_parts,range->min_key,range->min_length);
-      if (range->flag & NEAR_MIN)
-	fputs(" < ",DBUG_FILE);
-      else
-	fputs(" <= ",DBUG_FILE);
-    }
-    fputs("X",DBUG_FILE);
+      fprintf(DBUG_FILE, "%*s", indent + 2, "");
+      range= *pr;
+      if (!(range->flag & NO_MIN_RANGE))
+      {
+        print_key(key_parts,range->min_key,range->min_length);
+        if (range->flag & NEAR_MIN)
+	  fputs(" < ",DBUG_FILE);
+        else
+	  fputs(" <= ",DBUG_FILE);
+      }
+      fputs("X",DBUG_FILE);
 
-    if (!(range->flag & NO_MAX_RANGE))
-    {
-      if (range->flag & NEAR_MAX)
-	fputs(" < ",DBUG_FILE);
-      else
-	fputs(" <= ",DBUG_FILE);
-      print_key(quick->key_parts,range->max_key,range->max_length);
+      if (!(range->flag & NO_MAX_RANGE))
+      {
+        if (range->flag & NEAR_MAX)
+	  fputs(" < ",DBUG_FILE);
+        else
+	  fputs(" <= ",DBUG_FILE);
+        print_key(key_parts,range->max_key,range->max_length);
+      }
+      fputs("\n",DBUG_FILE);
     }
-    fputs("\n",DBUG_FILE);
   }
-  DBUG_UNLOCK_FILE;
-  DBUG_VOID_RETURN;
+}
+
+void QUICK_INDEX_MERGE_SELECT::dbug_dump(int indent, bool verbose)
+{
+  List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
+  QUICK_RANGE_SELECT *quick;
+  fprintf(DBUG_FILE, "%*squick index_merge select\n", indent, "");
+  fprintf(DBUG_FILE, "%*smerged scans {\n", indent, "");
+  while ((quick= it++))
+    quick->dbug_dump(indent+2, verbose);
+  if (pk_quick_select)
+  {
+    fprintf(DBUG_FILE, "%*sclustered PK quick:\n", indent, "");    
+    pk_quick_select->dbug_dump(indent+2, verbose);
+  }
+  fprintf(DBUG_FILE, "%*s}\n", indent, "");
+}
+
+void QUICK_ROR_INTERSECT_SELECT::dbug_dump(int indent, bool verbose)
+{
+  List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
+  QUICK_RANGE_SELECT *quick;
+  fprintf(DBUG_FILE, "%*squick ROR-intersect select, %scovering\n", 
+          indent, "", need_to_fetch_row? "":"non-");
+  fprintf(DBUG_FILE, "%*smerged scans {\n", indent, "");
+  while ((quick= it++))
+    quick->dbug_dump(indent+2, verbose); 
+  if (cpk_quick)
+  {
+    fprintf(DBUG_FILE, "%*sclustered PK quick:\n", indent, "");    
+    cpk_quick->dbug_dump(indent+2, verbose);
+  }
+  fprintf(DBUG_FILE, "%*s}\n", indent, "");
+}
+
+void QUICK_ROR_UNION_SELECT::dbug_dump(int indent, bool verbose)
+{
+  List_iterator_fast<QUICK_SELECT_I> it(quick_selects);
+  QUICK_SELECT_I *quick;
+  fprintf(DBUG_FILE, "%*squick ROR-union select\n", indent, "");
+  fprintf(DBUG_FILE, "%*smerged scans {\n", indent, "");
+  while ((quick= it++))
+    quick->dbug_dump(indent+2, verbose);
+  fprintf(DBUG_FILE, "%*s}\n", indent, "");
 }
 
 #endif
 
 /*****************************************************************************
-** Instansiate templates
+** Instantiate templates
 *****************************************************************************/
 
 #ifdef __GNUC__
diff --git a/sql/opt_range.h b/sql/opt_range.h
index 9b2e9e45bac..122d444af31 100644
--- a/sql/opt_range.h
+++ b/sql/opt_range.h
@@ -66,61 +66,465 @@ class QUICK_RANGE :public Sql_alloc {
 };
 
 
-class QUICK_SELECT {
+/*
+  Quick select interface. 
+  This class is a parent for all QUICK_*_SELECT and FT_SELECT classes.
+*/
+
+class QUICK_SELECT_I
+{
+public:
+  bool sorted;
+  ha_rows records;  /* estimate of # of records to be retrieved */
+  double  read_time; /* time to perform this retrieval          */
+  TABLE   *head;
+  /*
+    Index this quick select uses, or MAX_KEY for quick selects 
+    that use several indexes
+  */
+  uint index;
+
+  /*
+    Total length of first used_key_parts parts of the key.
+    Applicable if index!= MAX_KEY.
+  */
+  uint max_used_key_length;
+
+  /*
+    Max. number of (first) key parts this quick select uses for retrieval.
+    eg. for "(key1p1=c1 AND key1p2=c2) OR key1p1=c2" used_key_parts == 2.
+    Applicable if index!= MAX_KEY.
+  */
+  uint used_key_parts;
+
+  QUICK_SELECT_I();
+  virtual ~QUICK_SELECT_I(){};
+  
+  /*
+    Do post-constructor initialization.
+    SYNOPSIS
+      init()
+    
+    init() performs initializations that should have been in constructor if 
+    it was possible to return errors from constructors. The join optimizer may 
+    create and then delete quick selects without retrieving any rows so init()
+    must not contain any IO or CPU intensive code.
+
+    If init() call fails the only valid action is to delete this quick select, 
+    reset() and get_next() must not be called.
+    
+    RETURN
+      0      OK
+      other  Error code
+  */
+  virtual int  init() = 0;
+  
+  /* 
+    Initialize quick select for row retrieval.
+    SYNOPSIS
+      reset()
+    
+    reset() should be called when it is certain that row retrieval will be 
+    necessary. This call may do heavyweight initialization like buffering first
+    N records etc. If reset() call fails get_next() must not be called.
+    
+    RETURN
+      0      OK
+      other  Error code
+  */
+  virtual int  reset(void) = 0;
+  virtual int  get_next() = 0;   /* get next record to retrieve */
+  virtual bool reverse_sorted() = 0;
+  virtual bool unique_key_range() { return false; }
+
+  enum { 
+    QS_TYPE_RANGE = 0,
+    QS_TYPE_INDEX_MERGE = 1,
+    QS_TYPE_RANGE_DESC = 2,
+    QS_TYPE_FULLTEXT   = 3,
+    QS_TYPE_ROR_INTERSECT = 4,
+    QS_TYPE_ROR_UNION = 5,
+  };
+
+  /* Get type of this quick select - one of the QS_TYPE_* values */
+  virtual int get_type() = 0;
+
+  /*
+    Initialize this quick select as a merged scan inside a ROR-union or a ROR-
+    intersection scan. The caller must not additionally call init() if this 
+    function is called.
+    SYNOPSIS
+      init_ror_merged_scan()
+        reuse_handler If true, the quick select may use table->handler, otherwise
+                      it must create and use a separate handler object.
+    RETURN
+      0     Ok
+      other Error 
+  */
+  virtual int init_ror_merged_scan(bool reuse_handler)
+  { DBUG_ASSERT(0); return 1; }
+  
+  /*
+    Save ROWID of last retrieved row in file->ref. This used in ROR-merging.
+  */
+  virtual void save_last_pos(){};
+
+  /* 
+    Append comma-separated list of keys this quick select uses to key_names;
+    append comma-separated list of corresponding used lengths to used_lengths.
+    This is used by select_describe.
+  */
+  virtual void add_keys_and_lengths(String *key_names, 
+                                    String *used_lengths)=0;
+ 
+  /* 
+    Append text representation of quick select structure (what and how is 
+    merged) to str. The result is added to "Extra" field in EXPLAIN output.
+    This function is implemented only by quick selects that merge other quick
+    selects output and/or can produce output suitable for merging.
+  */
+  virtual void add_info_string(String *str) {};
+  /*
+    Return 1 if any index used by this quick select 
+     a) uses field that is listed in passed field list or 
+     b) is automatically updated (like a timestamp)
+  */
+  virtual bool check_if_keys_used(List<Item> *fields);
+
+  /*
+    rowid of last row retrieved by this quick select. This is used only when 
+    doing ROR-index_merge selects 
+  */
+  byte    *last_rowid;
+
+  /*
+    Table record buffer used by this quick select. 
+  */
+  byte    *record;
+#ifndef DBUG_OFF
+  /*
+    Print quick select information to DBUG_FILE. Caller is responsible 
+    for locking DBUG_FILE before this call and unlocking it afterwards.
+  */
+  virtual void dbug_dump(int indent, bool verbose)= 0;
+#endif 
+};
+
+
+struct st_qsel_param;
+class SEL_ARG;
+
+/*
+  Quick select that does a range scan on a single key. The records are 
+  returned in key order.
+*/
+class QUICK_RANGE_SELECT : public QUICK_SELECT_I
+{
+protected:
+  bool next,dont_free;
 public:
-  bool next,dont_free,sorted;
   int error;
-  uint index, max_used_key_length, used_key_parts;
-  TABLE *head;
+protected:
   handler *file;
-  byte    *record;
-  List<QUICK_RANGE> ranges;
-  List_iterator<QUICK_RANGE> it;
-  QUICK_RANGE *range;
-  MEM_ROOT alloc;
+  /*
+    If true, this quick select has its "own" handler object which should be
+    closed no later then this quick select is deleted.
+  */
+  bool free_file;
 
-  KEY_PART *key_parts;
+protected:
+  friend
+  QUICK_RANGE_SELECT *get_quick_select_for_ref(THD *thd, TABLE *table, 
+                                               struct st_table_ref *ref);
+  friend bool get_quick_keys(struct st_qsel_param *param,
+                             QUICK_RANGE_SELECT *quick,KEY_PART *key,
+                             SEL_ARG *key_tree,
+                             char *min_key, uint min_key_flag,
+                             char *max_key, uint max_key_flag);
+  friend QUICK_RANGE_SELECT *get_quick_select(struct st_qsel_param*,uint idx,
+                                              SEL_ARG *key_tree,
+                                              MEM_ROOT *alloc);
+  friend class QUICK_SELECT_DESC;
+  friend class QUICK_INDEX_MERGE_SELECT;
+  friend class QUICK_ROR_INTERSECT_SELECT;
+
+  DYNAMIC_ARRAY ranges;     /* ordered array of range ptrs */
+  QUICK_RANGE **cur_range;  /* current element in ranges  */
+
+  QUICK_RANGE *range;
+  KEY_PART *key_parts;  
   KEY_PART_INFO *key_part_info;
-  ha_rows records;
-  double read_time;
+  int cmp_next(QUICK_RANGE *range);
+  int cmp_prev(QUICK_RANGE *range);
+  bool row_in_ranges();
+public:
+  MEM_ROOT alloc;
 
-  QUICK_SELECT(THD *thd, TABLE *table,uint index_arg,bool no_alloc=0);
-  virtual ~QUICK_SELECT();
-  void reset(void) { next=0; it.rewind(); }
-  int init()
+  QUICK_RANGE_SELECT(THD *thd, TABLE *table,uint index_arg,bool no_alloc=0,
+                     MEM_ROOT *parent_alloc=NULL);
+  ~QUICK_RANGE_SELECT();
+  
+  int reset(void)
   {
-    key_part_info= head->key_info[index].key_part;
-    return error=file->ha_index_init(index);
+    next=0; 
+    range= NULL; 
+    cur_range= NULL;
+    return 0;
   }
-  virtual int get_next();
-  virtual bool reverse_sorted() { return 0; }
+  int init();
+  int get_next();
+  bool reverse_sorted() { return 0; }
   bool unique_key_range();
+  int init_ror_merged_scan(bool reuse_handler);
+  void save_last_pos()
+  {
+    file->position(record);
+  };
+  int get_type() { return QS_TYPE_RANGE; }
+  void add_keys_and_lengths(String *key_names, String *used_lengths);
+  void add_info_string(String *str);
+#ifndef DBUG_OFF
+  void dbug_dump(int indent, bool verbose);
+#endif
 };
 
 
-class QUICK_SELECT_GEOM: public QUICK_SELECT
+class QUICK_RANGE_SELECT_GEOM: public QUICK_RANGE_SELECT
 {
 public:
-  QUICK_SELECT_GEOM(THD *thd, TABLE *table, uint index_arg, bool no_alloc)
-    :QUICK_SELECT(thd, table, index_arg, no_alloc)
+  QUICK_RANGE_SELECT_GEOM(THD *thd, TABLE *table, uint index_arg,
+                          bool no_alloc, MEM_ROOT *parent_alloc)
+    :QUICK_RANGE_SELECT(thd, table, index_arg, no_alloc, parent_alloc)
     {};
   virtual int get_next();
 };
 
 
-class QUICK_SELECT_DESC: public QUICK_SELECT
+/*
+  QUICK_INDEX_MERGE_SELECT - index_merge access method quick select.
+
+    QUICK_INDEX_MERGE_SELECT uses 
+     * QUICK_RANGE_SELECTs to get rows
+     * Unique class to remove duplicate rows
+
+  INDEX MERGE OPTIMIZER
+    Current implementation doesn't detect all cases where index_merge could 
+    be used, in particular:
+     * index_merge will never be used if range scan is possible (even if 
+       range scan is more expensive)
+
+     * index_merge+'using index' is not supported (this the consequence of 
+       the above restriction)
+   
+     * If WHERE part contains complex nested AND and OR conditions, some ways
+       to retrieve rows using index_merge will not be considered. The choice 
+       of read plan may depend on the order of conjuncts/disjuncts in WHERE 
+       part of the query, see comments near imerge_list_or_list and
+       SEL_IMERGE::or_sel_tree_with_checks functions for details.
+
+     * There is no "index_merge_ref" method (but index_merge on non-first
+       table in join is possible with 'range checked for each record').
+
+    See comments around SEL_IMERGE class and test_quick_select for more 
+    details.
+
+  ROW RETRIEVAL ALGORITHM
+
+    index_merge uses Unique class for duplicates removal.  index_merge takes
+    advantage of Clustered Primary Key (CPK) if the table has one.
+    The index_merge algorithm consists of two phases:
+
+    Phase 1 (implemented in QUICK_INDEX_MERGE_SELECT::prepare_unique):
+    prepare()
+    {
+      activate 'index only';
+      while(retrieve next row for non-CPK scan)
+      {
+        if (there is a CPK scan and row will be retrieved by it)
+          skip this row;
+        else
+          put its rowid into Unique;
+      }
+      deactivate 'index only';
+    }
+    
+    Phase 2 (implemented as sequence of QUICK_INDEX_MERGE_SELECT::get_next
+    calls):
+
+    fetch()
+    {
+      retrieve all rows from row pointers stored in Unique;
+      free Unique;
+      retrieve all rows for CPK scan;
+    }
+*/
+
+class QUICK_INDEX_MERGE_SELECT : public QUICK_SELECT_I 
+{
+public:
+  QUICK_INDEX_MERGE_SELECT(THD *thd, TABLE *table);
+  ~QUICK_INDEX_MERGE_SELECT();
+
+  int  init();
+  int  reset(void);
+  int  get_next();
+  bool reverse_sorted() { return false; }
+  bool unique_key_range() { return false; }
+  int get_type() { return QS_TYPE_INDEX_MERGE; }
+  void add_keys_and_lengths(String *key_names, String *used_lengths);
+  void add_info_string(String *str);
+  bool check_if_keys_used(List<Item> *fields);
+#ifndef DBUG_OFF
+  void dbug_dump(int indent, bool verbose);
+#endif
+
+  bool push_quick_back(QUICK_RANGE_SELECT *quick_sel_range);
+
+  /* range quick selects this index_merge read consists of */
+  List<QUICK_RANGE_SELECT> quick_selects;
+  
+  /* quick select which is currently used for rows retrieval */
+  List_iterator_fast<QUICK_RANGE_SELECT> cur_quick_it;
+  QUICK_RANGE_SELECT* cur_quick_select;
+  
+  /* quick select that uses clustered primary key (NULL if none) */
+  QUICK_RANGE_SELECT* pk_quick_select;
+  
+  /* true if this select is currently doing a clustered PK scan */
+  bool  doing_pk_scan;
+  
+  Unique  *unique;
+  MEM_ROOT alloc;
+
+  THD *thd;
+  int prepare_unique();
+  
+  /* used to get rows collected in Unique */
+  READ_RECORD read_record;
+};
+
+
+/*
+  Rowid-Ordered Retrieval (ROR) index intersection quick select.
+  This quick select produces intersection of row sequences returned 
+  by several QUICK_RANGE_SELECTs it "merges".
+  
+  All merged QUICK_RANGE_SELECTs must return rowids in rowid order. 
+  QUICK_ROR_INTERSECT_SELECT will return rows in rowid order, too.
+
+  All merged quick selects retrieve {rowid, covered_fields} tuples (not full 
+  table records).
+  QUICK_ROR_INTERSECT_SELECT retrieves full records if it is not being used 
+  by QUICK_ROR_INTERSECT_SELECT and all merged quick selects together don't 
+  cover needed all fields.
+  
+  If one of the merged quick selects is a Clustered PK range scan, it is
+  used only to filter rowid sequence produced by other merged quick selects.
+*/
+
+class QUICK_ROR_INTERSECT_SELECT : public QUICK_SELECT_I 
+{
+public:
+  QUICK_ROR_INTERSECT_SELECT(THD *thd, TABLE *table, 
+                             bool retrieve_full_rows,
+                             MEM_ROOT *parent_alloc);
+  ~QUICK_ROR_INTERSECT_SELECT();
+
+  int  init();
+  int  reset(void);
+  int  get_next();
+  bool reverse_sorted() { return false; }
+  bool unique_key_range() { return false; }
+  int get_type() { return QS_TYPE_ROR_INTERSECT; }
+  void add_keys_and_lengths(String *key_names, String *used_lengths);
+  void add_info_string(String *str);
+  bool check_if_keys_used(List<Item> *fields);
+#ifndef DBUG_OFF
+  void dbug_dump(int indent, bool verbose);
+#endif
+  int init_ror_merged_scan(bool reuse_handler);
+  bool push_quick_back(QUICK_RANGE_SELECT *quick_sel_range);
+
+  /* 
+    Range quick selects this intersection consists of, not including
+    cpk_quick.
+  */
+  List<QUICK_RANGE_SELECT> quick_selects;
+  
+  /* 
+    Merged quick select that uses Clustered PK, if there is one. This quick 
+    select is not used for row retrieval, it is used for row retrieval.
+  */
+  QUICK_RANGE_SELECT *cpk_quick;
+
+  MEM_ROOT alloc; /* Memory pool for this and merged quick selects data. */
+  THD *thd;       /* current thread */
+  bool need_to_fetch_row; /* if true, do retrieve full table records. */
+};
+
+
+/*
+  Rowid-Ordered Retrieval index union select.
+  This quick select produces union of row sequences returned by several
+  quick select it "merges".
+
+  All merged quick selects must return rowids in rowid order. 
+  QUICK_ROR_UNION_SELECT will return rows in rowid order, too.
+
+  All merged quick selects are set not to retrieve full table records.
+  ROR-union quick select always retrieves full records.
+ 
+*/
+
+class QUICK_ROR_UNION_SELECT : public QUICK_SELECT_I 
 {
 public:
-  QUICK_SELECT_DESC(QUICK_SELECT *q, uint used_key_parts);
+  QUICK_ROR_UNION_SELECT(THD *thd, TABLE *table);
+  ~QUICK_ROR_UNION_SELECT();
+
+  int  init();
+  int  reset(void);
+  int  get_next();
+  bool reverse_sorted() { return false; }
+  bool unique_key_range() { return false; }
+  int get_type() { return QS_TYPE_ROR_UNION; }
+  void add_keys_and_lengths(String *key_names, String *used_lengths);
+  void add_info_string(String *str);
+  bool check_if_keys_used(List<Item> *fields);
+#ifndef DBUG_OFF
+  void dbug_dump(int indent, bool verbose);
+#endif
+
+  bool push_quick_back(QUICK_SELECT_I *quick_sel_range);
+
+  List<QUICK_SELECT_I> quick_selects; /* Merged quick selects */
+  
+  QUEUE queue;    /* Priority queue for merge operation */
+  MEM_ROOT alloc; /* Memory pool for this and merged quick selects data. */
+
+  THD *thd;             /* current thread */
+  byte *cur_rowid;      /* buffer used in get_next() */
+  byte *prev_rowid;     /* rowid of last row returned by get_next() */
+  bool have_prev_rowid; /* true if prev_rowid has valid data */
+  uint rowid_length;    /* table rowid length */ 
+private:
+  static int queue_cmp(void *arg, byte *val1, byte *val2);
+};
+
+
+class QUICK_SELECT_DESC: public QUICK_RANGE_SELECT
+{
+public:
+  QUICK_SELECT_DESC(QUICK_RANGE_SELECT *q, uint used_key_parts);
   int get_next();
   bool reverse_sorted() { return 1; }
+  int get_type() { return QS_TYPE_RANGE_DESC; }
 private:
-  int cmp_prev(QUICK_RANGE *range);
   bool range_reads_after_key(QUICK_RANGE *range);
 #ifdef NOT_USED
   bool test_if_null_range(QUICK_RANGE *range, uint used_key_parts);
 #endif
-  void reset(void) { next=0; rev_it.rewind(); }
+  int reset(void) { next=0; rev_it.rewind(); return 0; }
   List<QUICK_RANGE> rev_ranges;
   List_iterator<QUICK_RANGE> rev_it;
 };
@@ -128,7 +532,7 @@ private:
 
 class SQL_SELECT :public Sql_alloc {
  public:
-  QUICK_SELECT *quick;		// If quick-select used
+  QUICK_SELECT_I *quick;	// If quick-select used
   COND		*cond;		// where condition
   TABLE	*head;
   IO_CACHE file;		// Positions to used records
@@ -150,17 +554,14 @@ class SQL_SELECT :public Sql_alloc {
 };
 
 
-class FT_SELECT: public QUICK_SELECT {
+class FT_SELECT: public QUICK_RANGE_SELECT {
 public:
-  FT_SELECT(THD *thd, TABLE *table, uint key):
-    QUICK_SELECT (thd, table, key, 1) { init(); }
+  FT_SELECT(THD *thd, TABLE *table, uint key) :
+      QUICK_RANGE_SELECT (thd, table, key, 1) { init(); }
 
-  int init() { return error= file->ft_init(); }
-  int get_next() { return error= file->ft_read(record); }
+  int init() { return error=file->ft_init(); }
+  int get_next() { return error=file->ft_read(record); }
+  int get_type() { return QS_TYPE_FULLTEXT; }
 };
 
-
-QUICK_SELECT *get_quick_select_for_ref(THD *thd, TABLE *table,
-				       struct st_table_ref *ref);
-
 #endif
diff --git a/sql/parse_file.cc b/sql/parse_file.cc
new file mode 100644
index 00000000000..c2fd69e0767
--- /dev/null
+++ b/sql/parse_file.cc
@@ -0,0 +1,785 @@
+/* Copyright (C) 2004 MySQL AB
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
+
+// Text .frm files management routines
+
+#include "mysql_priv.h"
+#include <errno.h>
+#include <m_ctype.h>
+#include <my_sys.h>
+#include <my_dir.h>
+
+
+/*
+  write string with escaping
+
+  SYNOPSIS
+    write_escaped_string()
+    file	- IO_CACHE for record
+    val_s	- string for writing
+
+  RETURN
+    FALSE - OK
+    TRUE  - error
+*/
+
+static my_bool
+write_escaped_string(IO_CACHE *file, LEX_STRING *val_s)
+{
+  char *eos= val_s->str + val_s->length;
+  char *ptr= val_s->str;
+
+  for (; ptr < eos; ptr++)
+  {
+    /*
+      Should be in sync with read_escaped_string() and
+      parse_quated_escaped_string()
+    */
+    switch(*ptr) {
+    case '\\': // escape character
+      if (my_b_append(file, "\\\\", 2))
+	return TRUE;
+      break;
+    case '\n': // parameter value delimiter
+      if (my_b_append(file, "\\n", 2))
+	return TRUE;
+      break;
+    case '\0': // problem for some string processing utilites
+      if (my_b_append(file, "\\0", 2))
+	return TRUE;
+      break;
+    case 26: // problem for windows utilites (Ctrl-Z)
+      if (my_b_append(file, "\\z", 2))
+	return TRUE;
+      break;
+    case '\'': // list of string delimiter
+      if (my_b_append(file, "\\\'", 2))
+	return TRUE;
+      break;
+    default:
+      if (my_b_append(file, ptr, 1))
+	return TRUE;
+    }
+  }
+  return FALSE;
+}
+
+
+/*
+  write parameter value to IO_CACHE
+
+  SYNOPSIS
+    write_parameter()
+    file	pointer to IO_CACHE structure for writing
+    base	pointer to data structure
+    parameter	pointer to parameter descriptor
+    old_version	for returning back old version number value
+
+  RETURN
+    FALSE - OK
+    TRUE  - error
+*/
+
+static my_bool
+write_parameter(IO_CACHE *file, gptr base, File_option *parameter,
+		ulonglong *old_version)
+{
+  char num_buf[20];			// buffer for numeric operations
+  // string for numeric operations
+  String num(num_buf, sizeof(num_buf), &my_charset_bin);
+  DBUG_ENTER("write_parameter");
+
+  switch (parameter->type) {
+  case FILE_OPTIONS_STRING:
+  {
+    LEX_STRING *val_s= (LEX_STRING *)(base + parameter->offset);
+    if (my_b_append(file, val_s->str, val_s->length))
+      DBUG_RETURN(TRUE);
+    break;
+  }
+  case FILE_OPTIONS_ESTRING:
+  {
+    if (write_escaped_string(file, (LEX_STRING *)(base + parameter->offset)))
+      DBUG_RETURN(TRUE);
+    break;
+  }
+  case FILE_OPTIONS_ULONGLONG:
+  {
+    num.set(*((ulonglong *)(base + parameter->offset)), &my_charset_bin);
+    if (my_b_append(file, num.ptr(), num.length()))
+      DBUG_RETURN(TRUE);
+    break;
+  }
+  case FILE_OPTIONS_REV:
+  {
+    ulonglong *val_i= (ulonglong *)(base + parameter->offset);
+    *old_version= (*val_i)++;
+    num.set(*val_i, &my_charset_bin);
+    if (my_b_append(file, num.ptr(), num.length()))
+      DBUG_RETURN(TRUE);
+    break;
+  }
+  case FILE_OPTIONS_TIMESTAMP:
+  {
+    /* string have to be allocated already */
+    LEX_STRING *val_s= (LEX_STRING *)(base + parameter->offset);
+    time_t tm= time(NULL);
+
+    get_date(val_s->str, GETDATE_DATE_TIME|GETDATE_GMT|GETDATE_FIXEDLENGTH,
+	     tm);
+    val_s->length= PARSE_FILE_TIMESTAMPLENGTH;
+    if (my_b_append(file, val_s->str, PARSE_FILE_TIMESTAMPLENGTH))
+      DBUG_RETURN(TRUE);
+    break;
+  }
+  case FILE_OPTIONS_STRLIST:
+  {
+    List_iterator_fast<LEX_STRING> it(*((List<LEX_STRING>*)
+					(base + parameter->offset)));
+    bool first= 1;
+    LEX_STRING *str;
+    while ((str= it++))
+    {
+      num.set((ulonglong)str->length, &my_charset_bin);
+      // ',' after string to detect list continuation
+      if ((!first && my_b_append(file, " ", 1)) ||
+	  my_b_append(file, "\'", 1) ||
+	  my_b_append(file, str->str, str->length) ||
+	  my_b_append(file, "\'", 1))
+      {
+	DBUG_RETURN(TRUE);
+      }
+      first= 0;
+    }
+    break;
+  }
+  default:
+    DBUG_ASSERT(0); // never should happened
+  }
+  DBUG_RETURN(FALSE);
+}
+
+
+/*
+  write new .frm
+
+  SYNOPSIS
+    sql_create_definition_file()
+    dir			directory where put .frm
+    file		.frm file name
+    type		.frm type string (VIEW, TABLE)
+    base		base address for parameter reading (structure like
+			TABLE)
+    parameters		parameters description
+    max_versions	number of versions to save
+
+  RETURN
+    FALSE - OK
+    TRUE - error
+*/
+
+my_bool
+sql_create_definition_file(const LEX_STRING *dir, const LEX_STRING *file_name,
+			   const LEX_STRING *type,
+			   gptr base, File_option *parameters,
+			   uint max_versions)
+{
+  File handler;
+  IO_CACHE file;
+  char path[FN_REFLEN+1];	// +1 to put temporary file name for sure
+  ulonglong old_version= ULONGLONG_MAX;
+  int path_end;
+  DBUG_ENTER("sql_create_definition_file");
+  DBUG_PRINT("enter", ("Dir: %s, file: %s, base 0x%lx",
+		       dir->str, file_name->str, (ulong) base));
+
+  fn_format(path, file_name->str, dir->str, 0, MY_UNPACK_FILENAME);
+  path_end= strlen(path);
+
+  // temporary file name
+  path[path_end]='~';
+  path[path_end+1]= '\0';
+  if ((handler= my_create(path, CREATE_MODE, O_RDWR | O_TRUNC,
+			  MYF(MY_WME))) <= 0)
+  {
+    DBUG_RETURN(TRUE);
+  }
+
+  if (init_io_cache(&file, handler, 0, SEQ_READ_APPEND, 0L, 0, MYF(MY_WME)))
+    goto err_w_file;
+
+  // write header (file signature)
+  if (my_b_append(&file, "TYPE=", 5) ||
+      my_b_append(&file, type->str, type->length) ||
+      my_b_append(&file, "\n", 1))
+    goto err_w_file;
+
+  // write parameters to temporary file
+  for (File_option *param= parameters; param->name.str; param++)
+  {
+    if (my_b_append(&file, param->name.str, param->name.length) ||
+	my_b_append(&file, "=", 1) ||
+	write_parameter(&file, base, param, &old_version) ||
+	my_b_append(&file, "\n", 1))
+      goto err_w_cache;
+  }
+
+  if (end_io_cache(&file))
+    goto err_w_file;
+
+  if (my_close(handler, MYF(MY_WME)))
+  {
+    DBUG_RETURN(TRUE);
+  }
+
+  // archive copies management
+  path[path_end]='\0';
+  if (!access(path, F_OK))
+  {
+    if (old_version != ULONGLONG_MAX && max_versions != 0)
+    {
+      // save buckup
+      char path_arc[FN_REFLEN];
+      // backup old version
+      char path_to[FN_REFLEN];
+
+      // check archive directory existence
+      fn_format(path_arc, "arc", dir->str, "", MY_UNPACK_FILENAME);
+      if (access(path_arc, F_OK))
+      {
+	if (my_mkdir(path_arc, 0777, MYF(MY_WME)))
+	{
+	  DBUG_RETURN(TRUE);
+	}
+      }
+
+      my_snprintf(path_to, FN_REFLEN, "%s/%s-%04lld",
+		  path_arc, file_name->str, old_version);
+      if (my_rename(path, path_to, MYF(MY_WME)))
+      {
+	DBUG_RETURN(TRUE);
+      }
+
+      // remove very old version
+      if (old_version > max_versions)
+      {
+	my_snprintf(path_to, FN_REFLEN, "%s/%s-%04lld",
+		    path_arc, file_name->str,
+		    old_version - max_versions);
+	if (!access(path_arc, F_OK) && my_delete(path_to, MYF(MY_WME)))
+	{
+	  DBUG_RETURN(TRUE);
+	}
+      }
+    }
+    else
+    {
+      if (my_delete(path, MYF(MY_WME)))	// no backups
+      {
+	DBUG_RETURN(TRUE);
+      }
+    }
+  }
+
+  {
+    // rename temporary file
+    char path_to[FN_REFLEN];
+    memcpy(path_to, path, path_end+1);
+    path[path_end]='~';
+    if (my_rename(path, path_to, MYF(MY_WME)))
+    {
+      DBUG_RETURN(TRUE);
+    }
+  }
+  DBUG_RETURN(FALSE);
+err_w_cache:
+  end_io_cache(&file);
+err_w_file:
+  my_close(handler, MYF(MY_WME));
+  DBUG_RETURN(TRUE);
+}
+
+
+/*
+  Prepare frm to parse (read to memory)
+
+  SYNOPSIS
+    sql_parse_prepare()
+    file_name		- path & filename to .frm file
+    mem_root		- MEM_ROOT for buffer allocation
+    bad_format_errors	- send errors on bad content
+
+  RETURN
+    0 - error
+    parser object
+
+  NOTE
+    returned pointer + 1 will be type of .frm
+*/
+
+File_parser * 
+sql_parse_prepare(const LEX_STRING *file_name, MEM_ROOT *mem_root,
+		  bool bad_format_errors)
+{
+  MY_STAT stat_info;
+  uint len;
+  char *end, *sign;
+  File_parser *parser;
+  File file;
+  DBUG_ENTER("sql__parse_prepare");
+
+  if (!my_stat(file_name->str, &stat_info, MYF(MY_WME)))
+  {
+    DBUG_RETURN(0);
+  }
+
+  if (stat_info.st_size > INT_MAX-1)
+  {
+    my_error(ER_FPARSER_TOO_BIG_FILE, MYF(0), file_name->str);
+    DBUG_RETURN(0);
+  }
+
+  if (!(parser= new(mem_root) File_parser))
+  {
+    DBUG_RETURN(0);
+  }
+
+  if (!(parser->buff= alloc_root(mem_root, stat_info.st_size+1)))
+  {
+    DBUG_RETURN(0);
+  }
+
+  if ((file= my_open(file_name->str, O_RDONLY | O_SHARE, MYF(MY_WME))) < 0)
+  {
+    DBUG_RETURN(0);
+  }
+  
+  if ((len= my_read(file, parser->buff, stat_info.st_size, MYF(MY_WME))) ==
+      MY_FILE_ERROR)
+  {
+    my_close(file, MYF(MY_WME));
+    DBUG_RETURN(0);
+  }
+
+  if (my_close(file, MYF(MY_WME)))
+  {
+    DBUG_RETURN(0);
+  }
+
+  end= parser->end= parser->buff + len;
+  *end= '\0'; // barriaer for more simple parsing
+
+  // 7 = 5 (TYPE=) + 1 (leter at least of type name) + 1 ('\n')
+  if (len < 7 ||
+      parser->buff[0] != 'T' ||
+      parser->buff[1] != 'Y' ||
+      parser->buff[2] != 'P' ||
+      parser->buff[3] != 'E' ||
+      parser->buff[4] != '=')
+    goto frm_error;
+
+  // skip signature;
+  parser->file_type.str= sign= parser->buff + 5;
+  while (*sign >= 'A' && *sign <= 'Z' && sign < end)
+    sign++;
+  if (*sign != '\n')
+    goto frm_error;
+  parser->file_type.length= sign - parser->file_type.str;
+  // EOS for file signature just for safety
+  *sign= '\0';
+
+  parser->start= sign + 1;
+  parser->content_ok= 1;
+
+  DBUG_RETURN(parser);
+
+frm_error:
+  if (bad_format_errors)
+  {
+    my_error(ER_FPARSER_BAD_HEADER, MYF(0), file_name->str);
+    DBUG_RETURN(0);
+  }
+  else
+    DBUG_RETURN(parser); // upper level have to check parser->ok()
+}
+
+
+/*
+  parse LEX_STRING
+
+  SYNOPSIS
+    parse_string()
+    ptr		- pointer on string beginning
+    end		- pointer on symbol after parsed string end (still owned
+		  by buffer and can be accessed
+    mem_root	- MEM_ROOT for parameter allocation
+    str		- pointer on string, where results should be stored
+
+  RETURN
+    0	- error
+    #	- pointer on symbol after string
+*/
+
+static char *
+parse_string(char *ptr, char *end, MEM_ROOT *mem_root, LEX_STRING *str)
+{
+  // get string length
+  char *eol= strchr(ptr, '\n');
+
+  if (eol >= end)
+    return 0;
+
+  str->length= eol - ptr;
+
+  if (!(str->str= alloc_root(mem_root, str->length+1)))
+    return 0;
+
+  memcpy(str->str, ptr, str->length);
+  str->str[str->length]= '\0'; // just for safety
+  return eol+1;
+}
+
+
+/*
+  read escaped string from ptr to eol in already allocated str
+
+  SYNOPSIS
+    parse_escaped_string()
+    ptr		- pointer on string beginning
+    eol		- pointer on character after end of string
+    str		- target string
+
+  RETURN
+    FALSE - OK
+    TRUE  - error
+*/
+
+my_bool
+read_escaped_string(char *ptr, char *eol, LEX_STRING *str)
+{
+  char *write_pos= str->str;
+
+  for(; ptr < eol; ptr++, write_pos++)
+  {
+    char c= *ptr;
+    if (c == '\\')
+    {
+      ptr++;
+      if (ptr >= eol)
+	return TRUE;
+      /*
+	Should be in sync with write_escaped_string() and
+	parse_quated_escaped_string()
+      */
+      switch(*ptr) {
+      case '\\':
+	*write_pos= '\\';
+	break;
+      case 'n':
+	*write_pos= '\n';
+	break;
+      case '0':
+	*write_pos= '\0';
+	break;
+      case 'z':
+	*write_pos= 26;
+	break;
+      case '\'':
+	*write_pos= '\'';
+      default:
+	return TRUE;
+      }
+    }
+    else
+      *write_pos= c;
+  }
+  str->str[str->length= write_pos-str->str]= '\0'; // just for safety
+  return FALSE;
+}
+
+
+/*
+  parse \n delimited escaped string
+
+  SYNOPSIS
+    parse_escaped_string()
+    ptr		- pointer on string beginning
+    end		- pointer on symbol after parsed string end (still owned
+		  by buffer and can be accessed
+    mem_root	- MEM_ROOT for parameter allocation
+    str		- pointer on string, where results should be stored
+
+  RETURN
+    0	- error
+    #	- pointer on symbol after string
+*/
+
+static char *
+parse_escaped_string(char *ptr, char *end, MEM_ROOT *mem_root, LEX_STRING *str)
+{
+  char *eol= strchr(ptr, '\n');
+
+  if (eol == 0 || eol >= end ||
+      !(str->str= alloc_root(mem_root, (eol - ptr) + 1)) ||
+      read_escaped_string(ptr, eol, str))
+    return 0;
+    
+  return eol+1;
+}
+
+
+/*
+  parse '' delimited escaped string
+
+  SYNOPSIS
+    parse_escaped_string()
+    ptr		- pointer on string beginning
+    end		- pointer on symbol after parsed string end (still owned
+		  by buffer and can be accessed
+    mem_root	- MEM_ROOT for parameter allocation
+    str		- pointer on string, where results should be stored
+
+  RETURN
+    0	- error
+    #	- pointer on symbol after string
+*/
+
+static char *
+parse_quated_escaped_string(char *ptr, char *end,
+			    MEM_ROOT *mem_root, LEX_STRING *str)
+{
+  char *eol;
+  uint result_len= 0;
+  bool escaped= 0;
+
+  // starting '
+  if (*(ptr++) != '\'')
+    return 0;
+
+  // find ending '
+  for (eol= ptr; (*eol != '\'' || escaped) && eol < end; eol++)
+  {
+    if (!(escaped= (*eol == '\\' && !escaped)))
+      result_len++;
+  }
+
+  // process string
+  if (eol >= end ||
+      !(str->str= alloc_root(mem_root, result_len + 1)) ||
+      read_escaped_string(ptr, eol, str))
+    return 0;
+
+  return eol+1;
+}
+
+
+/*
+  parse parameters
+ 
+  SYNOPSIS
+    File_parser::parse()
+    base                base address for parameter writing (structure like
+                        TABLE)
+    mem_root            MEM_ROOT for parameters allocation
+    parameters          parameters description
+    required            number of required parameters in above list
+
+  RETURN
+    FALSE - OK
+    TRUE - error
+*/
+
+my_bool
+File_parser::parse(gptr base, MEM_ROOT *mem_root,
+		  struct File_option *parameters, uint required)
+{
+  uint first_param= 0, found= 0;
+  register char *ptr= start;
+  char *eol;
+  LEX_STRING *str;
+  MEM_ROOT *sql_mem;
+  List<LEX_STRING> *list;
+  bool change_mem;
+  DBUG_ENTER("File_parser::parse");
+
+  while (ptr < end && found < required)
+  {
+    char *line= ptr;
+    if (*ptr == '#')
+    {
+      // it is comment
+      if (!(ptr= strchr(ptr, '\n')))
+      {
+	my_error(ER_FPARSER_EOF_IN_COMMENT, MYF(0), line);
+	DBUG_RETURN(TRUE);
+      }
+      ptr++;
+    }
+    else
+    {
+      File_option *parameter= parameters+first_param,
+	*parameters_end= parameters+required;
+      int len= 0;
+      for(; parameter < parameters_end; parameter++)
+      {
+	len= parameter->name.length;
+	// check length
+	if (len < (end-ptr) && ptr[len] != '=')
+	  continue;
+	// check keyword
+	if (memcmp(parameter->name.str, ptr, len) == 0)
+	  break;
+      }
+
+      if (parameter < parameters_end)
+      {
+	found++;
+	/*
+	  if we found first parameter, start search from next parameter
+	  next time.
+	  (this small optimisation should work, because they should be
+	  written in same order)
+	*/
+	if (parameter == parameters+first_param)
+	  first_param++;
+
+	// get value
+	ptr+= (len+1);
+	switch (parameter->type) {
+	case FILE_OPTIONS_STRING:
+	{
+	  if (!(ptr= parse_string(ptr, end, mem_root,
+				  (LEX_STRING *)(base +
+						 parameter->offset))))
+	  {
+	    my_error(ER_FPARSER_ERROR_IN_PARAMETER, MYF(0),
+		     parameter->name.str, line);
+	    DBUG_RETURN(TRUE);
+	  }
+	  break;
+	}
+	case FILE_OPTIONS_ESTRING:
+	{
+	  if (!(ptr= parse_escaped_string(ptr, end, mem_root,
+					  (LEX_STRING *)
+					  (base + parameter->offset))))
+	  {
+	    my_error(ER_FPARSER_ERROR_IN_PARAMETER, MYF(0),
+		     parameter->name.str, line);
+	    DBUG_RETURN(TRUE);
+	    DBUG_RETURN(TRUE);
+	  }
+	  break;
+	}
+	case FILE_OPTIONS_ULONGLONG:
+	case FILE_OPTIONS_REV:
+	  if (!(eol= strchr(ptr, '\n')))
+	  {
+	    my_error(ER_FPARSER_ERROR_IN_PARAMETER, MYF(0),
+		     parameter->name.str, line);
+	    DBUG_RETURN(TRUE);
+	  }
+	  *eol= '\0';
+	  *((ulonglong*)(base + parameter->offset))= atoll(ptr);
+	  *eol= '\n';
+	  ptr= eol+1;
+	  break;
+	case FILE_OPTIONS_TIMESTAMP:
+	{
+	  /* string have to be allocated already */
+	  LEX_STRING *val= (LEX_STRING *)(base + parameter->offset);
+	  /* yyyy-mm-dd HH:MM:SS = 19(PARSE_FILE_TIMESTAMPLENGTH) characters */
+	  if (ptr[PARSE_FILE_TIMESTAMPLENGTH] != '\n')
+	  {
+	    my_error(ER_FPARSER_ERROR_IN_PARAMETER, MYF(0),
+		     parameter->name.str, line);
+	    DBUG_RETURN(TRUE);
+	  }
+	  memcpy(val->str, ptr, PARSE_FILE_TIMESTAMPLENGTH);
+	  val->str[val->length= PARSE_FILE_TIMESTAMPLENGTH]= '\0';
+	  ptr+= (PARSE_FILE_TIMESTAMPLENGTH+1);
+	  break;
+	}
+	case FILE_OPTIONS_STRLIST:
+	{
+	  /*
+	    TODO: remove play with mem_root, when List will be able
+	    to store MEM_ROOT* pointer for list elements allocation
+	  */
+	  sql_mem= my_pthread_getspecific_ptr(MEM_ROOT*, THR_MALLOC);
+	  list= (List<LEX_STRING>*)(base + parameter->offset);
+	  if ((change_mem= (sql_mem != mem_root)))
+	  {
+	    change_mem= 1;
+	    my_pthread_setspecific_ptr(THR_MALLOC, mem_root);
+	  }
+	    
+	  list->empty();
+	  // list parsing
+	  while (ptr < end)
+	  {
+	    if (!(str= (LEX_STRING*)alloc_root(mem_root,
+					       sizeof(LEX_STRING))) ||
+		list->push_back(str))
+	      goto list_err;
+	    if(!(ptr= parse_quated_escaped_string(ptr, end, mem_root, str)))
+	      goto list_err_w_message;
+	    switch (*ptr) {
+	    case '\n':
+	      goto end_of_list;
+	    case ' ':
+	      // we cant go over buffer bounds, because we have \0 at the end
+	      ptr++;
+	      break;
+	    default:
+	      goto list_err_w_message;
+	    }
+	  }
+	end_of_list:
+	  if (*(ptr++) != '\n')
+	    goto list_err;
+
+	  if (change_mem)
+	    my_pthread_setspecific_ptr(THR_MALLOC, sql_mem);
+	  break;
+
+      list_err_w_message:
+	  my_error(ER_FPARSER_ERROR_IN_PARAMETER, MYF(0),
+		     parameter->name.str, line);
+      list_err:
+	  if (change_mem)
+	    my_pthread_setspecific_ptr(THR_MALLOC, sql_mem);
+	  DBUG_RETURN(TRUE);
+	}
+	default:
+	  DBUG_ASSERT(0); // never should happened
+	}
+      }
+      else
+      {
+	// skip unknown parameter
+	if (!(ptr= strchr(ptr, '\n')))
+	{
+	  my_error(ER_FPARSER_EOF_IN_UNKNOWN_PARAMETER, MYF(0),
+		   line);
+	  DBUG_RETURN(TRUE);
+	}
+	ptr++;
+      }
+    }
+  }
+  DBUG_RETURN(FALSE);
+}
diff --git a/sql/parse_file.h b/sql/parse_file.h
new file mode 100644
index 00000000000..537fd035f44
--- /dev/null
+++ b/sql/parse_file.h
@@ -0,0 +1,68 @@
+/* -*- C++ -*- */
+/* Copyright (C) 2004 MySQL AB
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
+
+#ifndef _PARSE_FILE_H_
+#define _PARSE_FILE_H_
+
+#define PARSE_FILE_TIMESTAMPLENGTH 19
+
+typedef enum {
+  FILE_OPTIONS_STRING,		/* String (LEX_STRING) */
+  FILE_OPTIONS_ESTRING,		/* Escaped string (LEX_STRING) */
+  FILE_OPTIONS_ULONGLONG,	/* ulonglong parapeter (ulonglong) */
+  FILE_OPTIONS_REV,		/* Revision version number (ulonglong) */
+  FILE_OPTIONS_TIMESTAMP,	/* timestamp (LEX_STRING have to be
+				   allocated with length 20 (19+1) */
+  FILE_OPTIONS_STRLIST		/* list of strings (List<char*>) */
+} file_opt_type;
+  
+struct File_option
+{
+  const LEX_STRING name;		/* Name of the option */
+  int offset;				/* offset to base address of value */
+  enum file_opt_type type;		/* Option type */
+};
+
+class File_parser;
+File_parser *sql_parse_prepare(const LEX_STRING *file_name,
+			       MEM_ROOT *mem_root);
+
+my_bool
+sql_create_definition_file(const LEX_STRING *dir, const  LEX_STRING *file_name,
+			   const LEX_STRING *type,
+			   gptr base, File_option *parameters, uint versions);
+
+class File_parser: public Sql_alloc
+{
+  char *buff, *start, *end;
+  LEX_STRING file_type;
+  my_bool content_ok;
+public:
+  File_parser() :buff(0), start(0), end(0), content_ok(0)
+    { file_type.str= 0; file_type.length= 0; }
+
+  my_bool ok() { return content_ok; }
+  LEX_STRING *type() { return &file_type; }
+  my_bool parse(gptr base, MEM_ROOT *mem_root,
+		struct File_option *parameters, uint required);
+
+  friend File_parser *sql_parse_prepare(const LEX_STRING *file_name,
+					MEM_ROOT *mem_root,
+					bool bad_format_errors);
+};
+
+#endif /* _PARSE_FILE_H_ */
diff --git a/sql/protocol.cc b/sql/protocol.cc
index 7738349c742..065fcd3d4af 100644
--- a/sql/protocol.cc
+++ b/sql/protocol.cc
@@ -24,6 +24,7 @@
 #endif
 
 #include "mysql_priv.h"
+#include "sp_rcontext.h"
 #include <stdarg.h>
 
 static const unsigned int PACKET_BUFFER_EXTRA_ALLOC= 1024;
@@ -64,6 +65,10 @@ void send_error(THD *thd, uint sql_errno, const char *err)
 		      err ? err : net->last_error[0] ?
 		      net->last_error : "NULL"));
 
+  if (thd->spcont && thd->spcont->find_handler(sql_errno))
+  {
+    DBUG_VOID_RETURN;
+  }
 #ifndef EMBEDDED_LIBRARY  /* TODO query cache in embedded library*/
   query_cache_abort(net);
 #endif
@@ -147,6 +152,10 @@ void send_error(THD *thd, uint sql_errno, const char *err)
 void send_warning(THD *thd, uint sql_errno, const char *err)
 {
   DBUG_ENTER("send_warning");  
+  if (thd->spcont && thd->spcont->find_handler(sql_errno))
+  {
+    DBUG_VOID_RETURN;
+  }
   push_warning(thd, MYSQL_ERROR::WARN_LEVEL_WARN, sql_errno,
 	       err ? err : ER(sql_errno));
   send_ok(thd);
@@ -177,6 +186,10 @@ net_printf(THD *thd, uint errcode, ...)
   DBUG_ENTER("net_printf");
   DBUG_PRINT("enter",("message: %u",errcode));
 
+  if (thd->spcont && thd->spcont->find_handler(errcode))
+  {
+    DBUG_VOID_RETURN;
+  }
   thd->query_error=  1; // needed to catch query errors during replication
 #ifndef EMBEDDED_LIBRARY
   query_cache_abort(net);	// Safety
@@ -344,7 +357,7 @@ send_eof(THD *thd, bool no_flush)
 {
   NET *net= &thd->net;
   DBUG_ENTER("send_eof");
-  if (net->vio != 0)
+  if (net->vio != 0 && !net->no_send_eof)
   {
     if (thd->client_capabilities & CLIENT_PROTOCOL_41)
     {
@@ -1150,12 +1163,3 @@ bool Protocol_prep::store_time(TIME *tm)
   buff[0]=(char) length;			// Length is stored first
   return packet->append(buff, length+1, PACKET_BUFFER_EXTRA_ALLOC);
 }
-
-#ifdef EMBEDDED_LIBRARY
-/* Should be removed when we define the Protocol_cursor's future */
-bool Protocol_cursor::write()
-{
-  return Protocol_simple::write();
-}
-#endif
-
diff --git a/sql/protocol.h b/sql/protocol.h
index d7ce5425ad1..079c06ae155 100644
--- a/sql/protocol.h
+++ b/sql/protocol.h
@@ -50,17 +50,13 @@ public:
   Protocol(THD *thd_arg) { init(thd_arg); }
   virtual ~Protocol() {}
   void init(THD* thd_arg);
-  bool send_fields(List<Item> *list, uint flag);
+  virtual bool send_fields(List<Item> *list, uint flag);
   bool send_records_num(List<Item> *list, ulonglong records);
   bool store(I_List<i_string> *str_list);
   bool store(const char *from, CHARSET_INFO *cs);
   String *storage_packet() { return packet; }
   inline void free() { packet->free(); }
-#ifndef EMBEDDED_LIBRARY
-  bool write();
-#else
   virtual bool write();
-#endif
   inline  bool store(uint32 from)
   { return store_long((longlong) from); }
   inline  bool store(longlong from)
@@ -161,6 +157,7 @@ public:
   Protocol_cursor(THD *thd_arg, MEM_ROOT *ini_alloc) :Protocol_simple(thd_arg), alloc(ini_alloc) {}
   bool prepare_for_send(List<Item> *item_list) 
   {
+    row_count= 0;
     fields= NULL;
     data= NULL;
     prev_record= &data;
@@ -168,6 +165,7 @@ public:
   }
   bool send_fields(List<Item> *list, uint flag);
   bool write();
+  uint get_field_count() { return field_count; }
 };
 
 void send_warning(THD *thd, uint sql_errno, const char *err=0);
diff --git a/sql/protocol_cursor.cc b/sql/protocol_cursor.cc
index 5f35552c562..749b66785d4 100644
--- a/sql/protocol_cursor.cc
+++ b/sql/protocol_cursor.cc
@@ -51,6 +51,7 @@ bool Protocol_cursor::send_fields(List<Item> *list, uint flag)
     client_field->name=   strdup_root(alloc, server_field.col_name);
     client_field->org_table= strdup_root(alloc, server_field.org_table_name);
     client_field->org_name=  strdup_root(alloc, server_field.org_col_name);
+    client_field->catalog= strdup_root(alloc, "");
     client_field->length= server_field.length;
     client_field->type=   server_field.type;
     client_field->flags= server_field.flags;
@@ -60,6 +61,7 @@ bool Protocol_cursor::send_fields(List<Item> *list, uint flag)
     client_field->name_length=		strlen(client_field->name);
     client_field->org_name_length=	strlen(client_field->org_name);
     client_field->org_table_length=	strlen(client_field->org_table);
+    client_field->catalog_length=       0;
     client_field->charsetnr=		server_field.charsetnr;
     
     if (INTERNAL_NUM_FIELD(client_field))
@@ -100,17 +102,17 @@ bool Protocol_cursor::write()
   byte *to;
 
   new_record= (MYSQL_ROWS *)alloc_root(alloc, 
-       sizeof(MYSQL_ROWS) + (field_count + 1)*sizeof(char *) + packet->length());
+    sizeof(MYSQL_ROWS) + (field_count + 1)*sizeof(char *) + packet->length());
   if (!new_record)
     goto err;
   data_tmp= (byte **)(new_record + 1);
   new_record->data= (char **)data_tmp;
 
-  to= (byte *)(fields + field_count + 1);
+  to= (byte *)data_tmp + (field_count + 1)*sizeof(char *);
 
   for (; cur_field < fields_end; ++cur_field, ++data_tmp)
   {
-    if ((len=net_field_length((uchar **)&cp)))
+    if ((len= net_field_length((uchar **)&cp)) == 0)
     {
       *data_tmp= 0;
     }
@@ -121,6 +123,7 @@ bool Protocol_cursor::write()
 // TODO error signal      send_error(thd, CR_MALFORMED_PACKET);
 	return TRUE;
       }
+      *data_tmp= to;
       memcpy(to,(char*) cp,len);
       to[len]=0;
       to+=len+1;
@@ -129,6 +132,7 @@ bool Protocol_cursor::write()
 	cur_field->max_length=len;
     }
   }
+  *data_tmp= 0;
 
   *prev_record= new_record;
   prev_record= &new_record->next;
@@ -139,5 +143,3 @@ bool Protocol_cursor::write()
 // TODO error signal      send_error(thd, ER_OUT_OF_RESOURCES);
   return TRUE;
 }
-
-
diff --git a/sql/records.cc b/sql/records.cc
index 94634d30759..37fbc7570ed 100644
--- a/sql/records.cc
+++ b/sql/records.cc
@@ -177,7 +177,7 @@ static int rr_sequential(READ_RECORD *info)
   {
     if (info->thd->killed)
     {
-      my_error(ER_SERVER_SHUTDOWN,MYF(0));
+      info->thd->send_kill_message();
       return 1;
     }
     if (tmp != HA_ERR_RECORD_DELETED)
diff --git a/sql/set_var.cc b/sql/set_var.cc
index 840a7ae075a..614d615e37b 100644
--- a/sql/set_var.cc
+++ b/sql/set_var.cc
@@ -76,7 +76,7 @@ TYPELIB delay_key_write_typelib=
   array_elements(delay_key_write_type_names)-1, "", delay_key_write_type_names
 };
 
-static int  sys_check_charset(THD *thd, set_var *var);
+static int sys_check_charset(THD *thd, set_var *var);
 static bool sys_update_charset(THD *thd, set_var *var);
 static void sys_set_default_charset(THD *thd, enum_var_type type);
 static int  sys_check_ftb_syntax(THD *thd,  set_var *var);
@@ -91,6 +91,7 @@ static bool set_option_autocommit(THD *thd, set_var *var);
 static int  check_log_update(THD *thd, set_var *var);
 static bool set_log_update(THD *thd, set_var *var);
 static int  check_pseudo_thread_id(THD *thd, set_var *var);
+static bool set_log_bin(THD *thd, set_var *var);
 static void fix_low_priority_updates(THD *thd, enum_var_type type);
 static void fix_tx_isolation(THD *thd, enum_var_type type);
 static void fix_net_read_timeout(THD *thd, enum_var_type type);
@@ -262,6 +263,10 @@ sys_var_thd_ulong	sys_net_retry_count("net_retry_count",
 					    0, fix_net_retry_count);
 sys_var_thd_bool	sys_new_mode("new", &SV::new_mode);
 sys_var_thd_bool	sys_old_passwords("old_passwords", &SV::old_passwords);
+sys_var_thd_ulong       sys_optimizer_prune_level("optimizer_prune_level",
+                                                  &SV::optimizer_prune_level);
+sys_var_thd_ulong       sys_optimizer_search_depth("optimizer_search_depth",
+                                                   &SV::optimizer_search_depth);
 sys_var_thd_ulong       sys_preload_buff_size("preload_buffer_size",
                                               &SV::preload_buff_size);
 sys_var_thd_ulong	sys_read_buff_size("read_buffer_size",
@@ -385,8 +390,8 @@ static sys_var_thd_bit	sys_log_update("sql_log_update",
 				       OPTION_UPDATE_LOG);
 static sys_var_thd_bit	sys_log_binlog("sql_log_bin",
                                        check_log_update,
-                                       set_log_update,
-                                       OPTION_BIN_LOG);
+				       set_log_bin,
+				       OPTION_BIN_LOG);
 static sys_var_thd_bit	sys_sql_warnings("sql_warnings", 0,
 					 set_option_bit,
 					 OPTION_WARNINGS);
@@ -539,6 +544,8 @@ sys_var *sys_variables[]=
   &sys_net_write_timeout,
   &sys_new_mode,
   &sys_old_passwords,
+  &sys_optimizer_prune_level,
+  &sys_optimizer_search_depth,
   &sys_preload_buff_size,
   &sys_pseudo_thread_id,
   &sys_query_alloc_block_size,
@@ -756,6 +763,10 @@ struct show_var_st init_vars[]= {
   {sys_new_mode.name,         (char*) &sys_new_mode,                SHOW_SYS},
   {sys_old_passwords.name,    (char*) &sys_old_passwords,           SHOW_SYS},
   {"open_files_limit",	      (char*) &open_files_limit,	    SHOW_LONG},
+  {sys_optimizer_prune_level.name, (char*) &sys_optimizer_prune_level,
+   SHOW_SYS},
+  {sys_optimizer_search_depth.name,(char*) &sys_optimizer_search_depth,
+   SHOW_SYS},
   {"pid_file",                (char*) pidfile_name,                 SHOW_CHAR},
   {"port",                    (char*) &mysqld_port,                  SHOW_INT},
   {sys_preload_buff_size.name, (char*) &sys_preload_buff_size,      SHOW_SYS},
@@ -2485,6 +2496,30 @@ static int check_log_update(THD *thd, set_var *var)
 
 static bool set_log_update(THD *thd, set_var *var)
 {
+  /*
+    The update log is not supported anymore since 5.0.
+    See sql/mysqld.cc/, comments in function init_server_components() for an
+    explaination of the different warnings we send below
+  */
+    
+  if (opt_sql_bin_update)
+  {
+    ((sys_var_thd_bit*) var->var)->bit_flag|= (OPTION_BIN_LOG |
+					       OPTION_UPDATE_LOG);
+    push_warning(thd, MYSQL_ERROR::WARN_LEVEL_NOTE,
+                 ER_UPDATE_LOG_DEPRECATED_TRANSLATED,
+                 ER(ER_UPDATE_LOG_DEPRECATED_TRANSLATED));
+  }
+  else
+    push_warning(thd, MYSQL_ERROR::WARN_LEVEL_NOTE,
+                 ER_UPDATE_LOG_DEPRECATED_IGNORED,
+                 ER(ER_UPDATE_LOG_DEPRECATED_IGNORED));
+  set_option_bit(thd, var);
+  return 0;
+}
+
+static bool set_log_bin(THD *thd, set_var *var)
+{
   if (opt_sql_bin_update)
     ((sys_var_thd_bit*) var->var)->bit_flag|= (OPTION_BIN_LOG |
 					       OPTION_UPDATE_LOG);
diff --git a/sql/share/czech/errmsg.txt b/sql/share/czech/errmsg.txt
index 2d377929229..75cbf12f60c 100644
--- a/sql/share/czech/errmsg.txt
+++ b/sql/share/czech/errmsg.txt
@@ -228,8 +228,8 @@ character-set=latin2
 "Deadlock found when trying to get lock; try restarting transaction",
 "The used table type doesn't support FULLTEXT indexes",
 "Cannot add foreign key constraint",
-"Cannot add a child row: a foreign key constraint fails",
-"Cannot delete a parent row: a foreign key constraint fails",
+"Cannot add or update a child row: a foreign key constraint fails",
+"Cannot delete or update a parent row: a foreign key constraint fails",
 "Error connecting to master: %-.128s",
 "Error running query on master: %-.128s",
 "Error when executing command %s: %-.128s",
@@ -290,21 +290,21 @@ character-set=latin2
 "Server is running in --secure-auth mode, but '%s'@'%s' has a password in the old format; please change the password to the new format",
 "Field or reference '%-.64s%s%-.64s%s%-.64s' of SELECT #%d was resolved in SELECT #%d",
 "Incorrect parameter or combination of parameters for START SLAVE UNTIL",
-"It is recommended to run with --skip-slave-start when doing step-by-step replication with START SLAVE UNTIL; otherwise, you are not safe in case of unexpected slave's mysqld restart",
+"It is recommended to use --skip-slave-start when doing step-by-step replication with START SLAVE UNTIL; otherwise, you will get problems if you get an unexpected slave's mysqld restart",
 "SQL thread is not to be started so UNTIL options are ignored",
 "Incorrect index name '%-.100s'",
 "Incorrect catalog name '%-.100s'",
-"Query cache failed to set size %lu, new query cache size is %lu",
+"Query cache failed to set size %lu; new query cache size is %lu",
 "Column '%-.64s' cannot be part of FULLTEXT index",
 "Unknown key cache '%-.100s'",
-"MySQL is started in --skip-name-resolve mode. You need to restart it without this switch for this grant to work",
+"MySQL is started in --skip-name-resolve mode; you must restart it without this switch for this grant to work",
 "Unknown table engine '%s'",
-"'%s' is deprecated, use '%s' instead",
+"'%s' is deprecated; use '%s' instead",
 "The target table %-.100s of the %s is not updatable",
-"The '%s' feature was disabled; you need MySQL built with '%s' to have it working",
+"The '%s' feature is disabled; you need MySQL built with '%s' to have it working",
 "The MySQL server is running with the %s option so it cannot execute this statement",
 "Column '%-.100s' has duplicated value '%-.64s' in %s"
-"Truncated wrong %-.32s value: '%-.128s'"
+"Truncated incorrect %-.32s value: '%-.128s'"
 "Incorrect table definition; there can be only one TIMESTAMP column with CURRENT_TIMESTAMP in DEFAULT or ON UPDATE clause"
 "Invalid ON UPDATE clause for '%-.64s' column",
 "This command is not supported in the prepared statement protocol yet",
@@ -312,3 +312,45 @@ character-set=latin2
 "Got temporary error %d '%-.100s' from %s",
 "Unknown or incorrect time zone: '%-.64s'",
 "Invalid TIMESTAMP value in column '%s' at row %ld",
+"Can't create a %s from within another stored routine"
+"%s %s already exists"
+"%s %s does not exist"
+"Failed to DROP %s %s"
+"Failed to CREATE %s %s"
+"%s with no matching label: %s"
+"Redefining label %s"
+"End-label %s without match"
+"Referring to uninitialized variable %s"
+"SELECT in a stored procedure must have INTO"
+"RETURN is only allowed in a FUNCTION"
+"Statements like SELECT, INSERT, UPDATE (and others) are not allowed in a FUNCTION"
+"The update log is deprecated and replaced by the binary log; SET SQL_LOG_UPDATE has been ignored"
+"The update log is deprecated and replaced by the binary log; SET SQL_LOG_UPDATE has been translated to SET SQL_LOG_BIN"
+"Query execution was interrupted"
+"Incorrect number of arguments for %s %s; expected %u, got %u"
+"Undefined CONDITION: %s"
+"No RETURN found in FUNCTION %s"
+"FUNCTION %s ended without RETURN"
+"Cursor statement must be a SELECT"
+"Cursor SELECT must not have INTO"
+"Undefined CURSOR: %s"
+"Cursor is already open"
+"Cursor is not open"
+"Undeclared variable: %s"
+"Incorrect number of FETCH variables"
+"No data to FETCH"
+"Duplicate parameter: %s"
+"Duplicate variable: %s"
+"Duplicate condition: %s"
+"Duplicate cursor: %s"
+"Failed to ALTER %s %s"
+"Subselect value not supported"
+"USE is not allowed in a stored procedure"
+"Variable or condition declaration after cursor or handler declaration"
+"Cursor declaration after handler declaration"
+"Case not found for CASE statement"
+"Configuration file '%-.64s' is too big"
+"Malformed file type header in file '%-.64s'"
+"Unexpected end of file while parsing comment '%-.64s'"
+"Error while parsing parameter '%-.64s' (line: '%-.64s')"
+"Unexpected end of file while skipping unknown parameter '%-.64s'"
diff --git a/sql/share/danish/errmsg.txt b/sql/share/danish/errmsg.txt
index af7b8263e6b..b4b42b41969 100644
--- a/sql/share/danish/errmsg.txt
+++ b/sql/share/danish/errmsg.txt
@@ -306,3 +306,45 @@ character-set=latin1
 "Modtog temporary fejl %d '%-.100s' fra %s",
 "Unknown or incorrect time zone: '%-.64s'",
 "Invalid TIMESTAMP value in column '%s' at row %ld",
+"Can't create a %s from within another stored routine"
+"%s %s already exists"
+"%s %s does not exist"
+"Failed to DROP %s %s"
+"Failed to CREATE %s %s"
+"%s with no matching label: %s"
+"Redefining label %s"
+"End-label %s without match"
+"Referring to uninitialized variable %s"
+"SELECT in a stored procedure must have INTO"
+"RETURN is only allowed in a FUNCTION"
+"Statements like SELECT, INSERT, UPDATE (and others) are not allowed in a FUNCTION"
+"The update log is deprecated and replaced by the binary log; SET SQL_LOG_UPDATE has been ignored"
+"The update log is deprecated and replaced by the binary log; SET SQL_LOG_UPDATE has been translated to SET SQL_LOG_BIN"
+"Query execution was interrupted"
+"Incorrect number of arguments for %s %s; expected %u, got %u"
+"Undefined CONDITION: %s"
+"No RETURN found in FUNCTION %s"
+"FUNCTION %s ended without RETURN"
+"Cursor statement must be a SELECT"
+"Cursor SELECT must not have INTO"
+"Undefined CURSOR: %s"
+"Cursor is already open"
+"Cursor is not open"
+"Undeclared variable: %s"
+"Incorrect number of FETCH variables"
+"No data to FETCH"
+"Duplicate parameter: %s"
+"Duplicate variable: %s"
+"Duplicate condition: %s"
+"Duplicate cursor: %s"
+"Failed to ALTER %s %s"
+"Subselect value not supported"
+"USE is not allowed in a stored procedure"
+"Variable or condition declaration after cursor or handler declaration"
+"Cursor declaration after handler declaration"
+"Case not found for CASE statement"
+"Configuration file '%-.64s' is too big"
+"Malformed file type header in file '%-.64s'"
+"Unexpected end of file while parsing comment '%-.64s'"
+"Error while parsing parameter '%-.64s' (line: '%-.64s')"
+"Unexpected end of file while skipping unknown parameter '%-.64s'"
diff --git a/sql/share/dutch/errmsg.txt b/sql/share/dutch/errmsg.txt
index aa20996680e..af0c3a1bc0b 100644
--- a/sql/share/dutch/errmsg.txt
+++ b/sql/share/dutch/errmsg.txt
@@ -314,3 +314,45 @@ character-set=latin1
 "Got temporary error %d '%-.100s' from %s",
 "Unknown or incorrect time zone: '%-.64s'",
 "Invalid TIMESTAMP value in column '%s' at row %ld",
+"Can't create a %s from within another stored routine"
+"%s %s already exists"
+"%s %s does not exist"
+"Failed to DROP %s %s"
+"Failed to CREATE %s %s"
+"%s with no matching label: %s"
+"Redefining label %s"
+"End-label %s without match"
+"Referring to uninitialized variable %s"
+"SELECT in a stored procedure must have INTO"
+"RETURN is only allowed in a FUNCTION"
+"Statements like SELECT, INSERT, UPDATE (and others) are not allowed in a FUNCTION"
+"The update log is deprecated and replaced by the binary log; SET SQL_LOG_UPDATE has been ignored"
+"The update log is deprecated and replaced by the binary log; SET SQL_LOG_UPDATE has been translated to SET SQL_LOG_BIN"
+"Query execution was interrupted"
+"Incorrect number of arguments for %s %s; expected %u, got %u"
+"Undefined CONDITION: %s"
+"No RETURN found in FUNCTION %s"
+"FUNCTION %s ended without RETURN"
+"Cursor statement must be a SELECT"
+"Cursor SELECT must not have INTO"
+"Undefined CURSOR: %s"
+"Cursor is already open"
+"Cursor is not open"
+"Undeclared variable: %s"
+"Incorrect number of FETCH variables"
+"No data to FETCH"
+"Duplicate parameter: %s"
+"Duplicate variable: %s"
+"Duplicate condition: %s"
+"Duplicate cursor: %s"
+"Failed to ALTER %s %s"
+"Subselect value not supported"
+"USE is not allowed in a stored procedure"
+"Variable or condition declaration after cursor or handler declaration"
+"Cursor declaration after handler declaration"
+"Case not found for CASE statement"
+"Configuration file '%-.64s' is too big"
+"Malformed file type header in file '%-.64s'"
+"Unexpected end of file while parsing comment '%-.64s'"
+"Error while parsing parameter '%-.64s' (line: '%-.64s')"
+"Unexpected end of file while skipping unknown parameter '%-.64s'"
diff --git a/sql/share/english/errmsg.txt b/sql/share/english/errmsg.txt
index b5a7f7962cf..7706257a55d 100644
--- a/sql/share/english/errmsg.txt
+++ b/sql/share/english/errmsg.txt
@@ -303,3 +303,45 @@ character-set=latin1
 "Got temporary error %d '%-.100s' from %s",
 "Unknown or incorrect time zone: '%-.64s'",
 "Invalid TIMESTAMP value in column '%s' at row %ld",
+"Can't create a %s from within another stored routine"
+"%s %s already exists"
+"%s %s does not exist"
+"Failed to DROP %s %s"
+"Failed to CREATE %s %s"
+"%s with no matching label: %s"
+"Redefining label %s"
+"End-label %s without match"
+"Referring to uninitialized variable %s"
+"SELECT in a stored procedure must have INTO"
+"RETURN is only allowed in a FUNCTION"
+"Statements like SELECT, INSERT, UPDATE (and others) are not allowed in a FUNCTION"
+"The update log is deprecated and replaced by the binary log; SET SQL_LOG_UPDATE has been ignored"
+"The update log is deprecated and replaced by the binary log; SET SQL_LOG_UPDATE has been translated to SET SQL_LOG_BIN"
+"Query execution was interrupted"
+"Incorrect number of arguments for %s %s; expected %u, got %u"
+"Undefined CONDITION: %s"
+"No RETURN found in FUNCTION %s"
+"FUNCTION %s ended without RETURN"
+"Cursor statement must be a SELECT"
+"Cursor SELECT must not have INTO"
+"Undefined CURSOR: %s"
+"Cursor is already open"
+"Cursor is not open"
+"Undeclared variable: %s"
+"Incorrect number of FETCH variables"
+"No data to FETCH"
+"Duplicate parameter: %s"
+"Duplicate variable: %s"
+"Duplicate condition: %s"
+"Duplicate cursor: %s"
+"Failed to ALTER %s %s"
+"Subselect value not supported"
+"USE is not allowed in a stored procedure"
+"Variable or condition declaration after cursor or handler declaration"
+"Cursor declaration after handler declaration"
+"Case not found for CASE statement"
+"Configuration file '%-.64s' is too big"
+"Malformed file type header in file '%-.64s'"
+"Unexpected end of file while parsing comment '%-.64s'"
+"Error while parsing parameter '%-.64s' (line: '%-.64s')"
+"Unexpected end of file while skipping unknown parameter '%-.64s'"
diff --git a/sql/share/estonian/errmsg.txt b/sql/share/estonian/errmsg.txt
index 0cc6e06ab26..ace966b693a 100644
--- a/sql/share/estonian/errmsg.txt
+++ b/sql/share/estonian/errmsg.txt
@@ -308,3 +308,45 @@ character-set=latin7
 "Got temporary error %d '%-.100s' from %s",
 "Unknown or incorrect time zone: '%-.64s'",
 "Invalid TIMESTAMP value in column '%s' at row %ld",
+"Can't create a %s from within another stored routine"
+"%s %s already exists"
+"%s %s does not exist"
+"Failed to DROP %s %s"
+"Failed to CREATE %s %s"
+"%s with no matching label: %s"
+"Redefining label %s"
+"End-label %s without match"
+"Referring to uninitialized variable %s"
+"SELECT in a stored procedure must have INTO"
+"RETURN is only allowed in a FUNCTION"
+"Statements like SELECT, INSERT, UPDATE (and others) are not allowed in a FUNCTION"
+"The update log is deprecated and replaced by the binary log; SET SQL_LOG_UPDATE has been ignored"
+"The update log is deprecated and replaced by the binary log; SET SQL_LOG_UPDATE has been translated to SET SQL_LOG_BIN"
+"Query execution was interrupted"
+"Incorrect number of arguments for %s %s; expected %u, got %u"
+"Undefined CONDITION: %s"
+"No RETURN found in FUNCTION %s"
+"FUNCTION %s ended without RETURN"
+"Cursor statement must be a SELECT"
+"Cursor SELECT must not have INTO"
+"Undefined CURSOR: %s"
+"Cursor is already open"
+"Cursor is not open"
+"Undeclared variable: %s"
+"Incorrect number of FETCH variables"
+"No data to FETCH"
+"Duplicate parameter: %s"
+"Duplicate variable: %s"
+"Duplicate condition: %s"
+"Duplicate cursor: %s"
+"Failed to ALTER %s %s"
+"Subselect value not supported"
+"USE is not allowed in a stored procedure"
+"Variable or condition declaration after cursor or handler declaration"
+"Cursor declaration after handler declaration"
+"Case not found for CASE statement"
+"Configuration file '%-.64s' is too big"
+"Malformed file type header in file '%-.64s'"
+"Unexpected end of file while parsing comment '%-.64s'"
+"Error while parsing parameter '%-.64s' (line: '%-.64s')"
+"Unexpected end of file while skipping unknown parameter '%-.64s'"
diff --git a/sql/share/french/errmsg.txt b/sql/share/french/errmsg.txt
index 2e23db62ddb..63568b9ebd7 100644
--- a/sql/share/french/errmsg.txt
+++ b/sql/share/french/errmsg.txt
@@ -303,3 +303,45 @@ character-set=latin1
 "Got temporary error %d '%-.100s' from %s",
 "Unknown or incorrect time zone: '%-.64s'",
 "Invalid TIMESTAMP value in column '%s' at row %ld",
+"Can't create a %s from within another stored routine"
+"%s %s already exists"
+"%s %s does not exist"
+"Failed to DROP %s %s"
+"Failed to CREATE %s %s"
+"%s with no matching label: %s"
+"Redefining label %s"
+"End-label %s without match"
+"Referring to uninitialized variable %s"
+"SELECT in a stored procedure must have INTO"
+"RETURN is only allowed in a FUNCTION"
+"Statements like SELECT, INSERT, UPDATE (and others) are not allowed in a FUNCTION"
+"The update log is deprecated and replaced by the binary log; SET SQL_LOG_UPDATE has been ignored"
+"The update log is deprecated and replaced by the binary log; SET SQL_LOG_UPDATE has been translated to SET SQL_LOG_BIN"
+"Query execution was interrupted"
+"Incorrect number of arguments for %s %s; expected %u, got %u"
+"Undefined CONDITION: %s"
+"No RETURN found in FUNCTION %s"
+"FUNCTION %s ended without RETURN"
+"Cursor statement must be a SELECT"
+"Cursor SELECT must not have INTO"
+"Undefined CURSOR: %s"
+"Cursor is already open"
+"Cursor is not open"
+"Undeclared variable: %s"
+"Incorrect number of FETCH variables"
+"No data to FETCH"
+"Duplicate parameter: %s"
+"Duplicate variable: %s"
+"Duplicate condition: %s"
+"Duplicate cursor: %s"
+"Failed to ALTER %s %s"
+"Subselect value not supported"
+"USE is not allowed in a stored procedure"
+"Variable or condition declaration after cursor or handler declaration"
+"Cursor declaration after handler declaration"
+"Case not found for CASE statement"
+"Configuration file '%-.64s' is too big"
+"Malformed file type header in file '%-.64s'"
+"Unexpected end of file while parsing comment '%-.64s'"
+"Error while parsing parameter '%-.64s' (line: '%-.64s')"
+"Unexpected end of file while skipping unknown parameter '%-.64s'"
diff --git a/sql/share/german/errmsg.txt b/sql/share/german/errmsg.txt
index c63162c84f6..d2588b10092 100644
--- a/sql/share/german/errmsg.txt
+++ b/sql/share/german/errmsg.txt
@@ -315,3 +315,45 @@ character-set=latin1
 "Got temporary error %d '%-.100s' from %s",
 "Unknown or incorrect time zone: '%-.64s'",
 "Invalid TIMESTAMP value in column '%s' at row %ld",
+"Can't create a %s from within another stored routine"
+"%s %s already exists"
+"%s %s does not exist"
+"Failed to DROP %s %s"
+"Failed to CREATE %s %s"
+"%s with no matching label: %s"
+"Redefining label %s"
+"End-label %s without match"
+"Referring to uninitialized variable %s"
+"SELECT in a stored procedure must have INTO"
+"RETURN is only allowed in a FUNCTION"
+"Statements like SELECT, INSERT, UPDATE (and others) are not allowed in a FUNCTION"
+"The update log is deprecated and replaced by the binary log; SET SQL_LOG_UPDATE has been ignored"
+"The update log is deprecated and replaced by the binary log; SET SQL_LOG_UPDATE has been translated to SET SQL_LOG_BIN"
+"Query execution was interrupted"
+"Incorrect number of arguments for %s %s, expected %u, got %u"
+"Undefined CONDITION: %s"
+"No RETURN found in FUNCTION %s"
+"FUNCTION %s ended without RETURN"
+"Cursor statement must be a SELECT"
+"Cursor SELECT must not have INTO"
+"Undefined CURSOR: %s"
+"Cursor is already open"
+"Cursor is not open"
+"Undeclared variable: %s"
+"Incorrect number of FETCH variables"
+"No data to FETCH"
+"Duplicate parameter: %s"
+"Duplicate variable: %s"
+"Duplicate condition: %s"
+"Duplicate cursor: %s"
+"Failed to ALTER %s %s"
+"Subselect value not supported"
+"USE is not allowed in a stored procedure"
+"Variable or condition declaration after cursor or handler declaration"
+"Cursor declaration after handler declaration"
+"Case not found for CASE statement"
+"Configuration file '%-.64s' is too big"
+"Malformed file type header in file '%-.64s'"
+"Unexpected end of file while parsing comment '%-.64s'"
+"Error while parsing parameter '%-.64s' (line: '%-.64s')"
+"Unexpected end of file while skipping unknown parameter '%-.64s'"
diff --git a/sql/share/greek/errmsg.txt b/sql/share/greek/errmsg.txt
index fa94b0f5107..2d099e149f6 100644
--- a/sql/share/greek/errmsg.txt
+++ b/sql/share/greek/errmsg.txt
@@ -303,3 +303,45 @@ character-set=greek
 "Got temporary error %d '%-.100s' from %s",
 "Unknown or incorrect time zone: '%-.64s'",
 "Invalid TIMESTAMP value in column '%s' at row %ld",
+"Can't create a %s from within another stored routine"
+"%s %s already exists"
+"%s %s does not exist"
+"Failed to DROP %s %s"
+"Failed to CREATE %s %s"
+"%s with no matching label: %s"
+"Redefining label %s"
+"End-label %s without match"
+"Referring to uninitialized variable %s"
+"SELECT in a stored procedure must have INTO"
+"RETURN is only allowed in a FUNCTION"
+"Statements like SELECT, INSERT, UPDATE (and others) are not allowed in a FUNCTION"
+"The update log is deprecated and replaced by the binary log; SET SQL_LOG_UPDATE has been ignored"
+"The update log is deprecated and replaced by the binary log; SET SQL_LOG_UPDATE has been translated to SET SQL_LOG_BIN"
+"Query execution was interrupted"
+"Incorrect number of arguments for %s %s; expected %u, got %u"
+"Undefined CONDITION: %s"
+"No RETURN found in FUNCTION %s"
+"FUNCTION %s ended without RETURN"
+"Cursor statement must be a SELECT"
+"Cursor SELECT must not have INTO"
+"Undefined CURSOR: %s"
+"Cursor is already open"
+"Cursor is not open"
+"Undeclared variable: %s"
+"Incorrect number of FETCH variables"
+"No data to FETCH"
+"Duplicate parameter: %s"
+"Duplicate variable: %s"
+"Duplicate condition: %s"
+"Duplicate cursor: %s"
+"Failed to ALTER %s %s"
+"Subselect value not supported"
+"USE is not allowed in a stored procedure"
+"Variable or condition declaration after cursor or handler declaration"
+"Cursor declaration after handler declaration"
+"Case not found for CASE statement"
+"Configuration file '%-.64s' is too big"
+"Malformed file type header in file '%-.64s'"
+"Unexpected end of file while parsing comment '%-.64s'"
+"Error while parsing parameter '%-.64s' (line: '%-.64s')"
+"Unexpected end of file while skipping unknown parameter '%-.64s'"
diff --git a/sql/share/hungarian/errmsg.txt b/sql/share/hungarian/errmsg.txt
index 56fae82c438..ac9539b35cf 100644
--- a/sql/share/hungarian/errmsg.txt
+++ b/sql/share/hungarian/errmsg.txt
@@ -305,3 +305,45 @@ character-set=latin2
 "Got temporary error %d '%-.100s' from %s",
 "Unknown or incorrect time zone: '%-.64s'",
 "Invalid TIMESTAMP value in column '%s' at row %ld",
+"Can't create a %s from within another stored routine"
+"%s %s already exists"
+"%s %s does not exist"
+"Failed to DROP %s %s"
+"Failed to CREATE %s %s"
+"%s with no matching label: %s"
+"Redefining label %s"
+"End-label %s without match"
+"Referring to uninitialized variable %s"
+"SELECT in a stored procedure must have INTO"
+"RETURN is only allowed in a FUNCTION"
+"Statements like SELECT, INSERT, UPDATE (and others) are not allowed in a FUNCTION"
+"The update log is deprecated and replaced by the binary log; SET SQL_LOG_UPDATE has been ignored"
+"The update log is deprecated and replaced by the binary log; SET SQL_LOG_UPDATE has been translated to SET SQL_LOG_BIN"
+"Query execution was interrupted"
+"Incorrect number of arguments for %s %s; expected %u, got %u"
+"Undefined CONDITION: %s"
+"No RETURN found in FUNCTION %s"
+"FUNCTION %s ended without RETURN"
+"Cursor statement must be a SELECT"
+"Cursor SELECT must not have INTO"
+"Undefined CURSOR: %s"
+"Cursor is already open"
+"Cursor is not open"
+"Undeclared variable: %s"
+"Incorrect number of FETCH variables"
+"No data to FETCH"
+"Duplicate parameter: %s"
+"Duplicate variable: %s"
+"Duplicate condition: %s"
+"Duplicate cursor: %s"
+"Failed to ALTER %s %s"
+"Subselect value not supported"
+"USE is not allowed in a stored procedure"
+"Variable or condition declaration after cursor or handler declaration"
+"Cursor declaration after handler declaration"
+"Case not found for CASE statement"
+"Configuration file '%-.64s' is too big"
+"Malformed file type header in file '%-.64s'"
+"Unexpected end of file while parsing comment '%-.64s'"
+"Error while parsing parameter '%-.64s' (line: '%-.64s')"
+"Unexpected end of file while skipping unknown parameter '%-.64s'"
diff --git a/sql/share/italian/errmsg.txt b/sql/share/italian/errmsg.txt
index 31768f172b4..33ea2cb2a9d 100644
--- a/sql/share/italian/errmsg.txt
+++ b/sql/share/italian/errmsg.txt
@@ -303,3 +303,45 @@ character-set=latin1
 "Got temporary error %d '%-.100s' from %s",
 "Unknown or incorrect time zone: '%-.64s'",
 "Invalid TIMESTAMP value in column '%s' at row %ld",
+"Can't create a %s from within another stored routine"
+"%s %s already exists"
+"%s %s does not exist"
+"Failed to DROP %s %s"
+"Failed to CREATE %s %s"
+"%s with no matching label: %s"
+"Redefining label %s"
+"End-label %s without match"
+"Referring to uninitialized variable %s"
+"SELECT in a stored procedure must have INTO"
+"RETURN is only allowed in a FUNCTION"
+"Statements like SELECT, INSERT, UPDATE (and others) are not allowed in a FUNCTION"
+"The update log is deprecated and replaced by the binary log; SET SQL_LOG_UPDATE has been ignored"
+"The update log is deprecated and replaced by the binary log; SET SQL_LOG_UPDATE has been translated to SET SQL_LOG_BIN"
+"Query execution was interrupted"
+"Incorrect number of arguments for %s %s; expected %u, got %u"
+"Undefined CONDITION: %s"
+"No RETURN found in FUNCTION %s"
+"FUNCTION %s ended without RETURN"
+"Cursor statement must be a SELECT"
+"Cursor SELECT must not have INTO"
+"Undefined CURSOR: %s"
+"Cursor is already open"
+"Cursor is not open"
+"Undeclared variable: %s"
+"Incorrect number of FETCH variables"
+"No data to FETCH"
+"Duplicate parameter: %s"
+"Duplicate variable: %s"
+"Duplicate condition: %s"
+"Duplicate cursor: %s"
+"Failed to ALTER %s %s"
+"Subselect value not supported"
+"USE is not allowed in a stored procedure"
+"Variable or condition declaration after cursor or handler declaration"
+"Cursor declaration after handler declaration"
+"Case not found for CASE statement"
+"Configuration file '%-.64s' is too big"
+"Malformed file type header in file '%-.64s'"
+"Unexpected end of file while parsing comment '%-.64s'"
+"Error while parsing parameter '%-.64s' (line: '%-.64s')"
+"Unexpected end of file while skipping unknown parameter '%-.64s'"
diff --git a/sql/share/japanese/errmsg.txt b/sql/share/japanese/errmsg.txt
index 4385f25c991..6bb82cc9749 100644
--- a/sql/share/japanese/errmsg.txt
+++ b/sql/share/japanese/errmsg.txt
@@ -304,4 +304,46 @@ character-set=ujis
 "Got NDB error %d '%-.100s'",
 "Got temporary NDB error %d '%-.100s'",
 "Unknown or incorrect time zone: '%-.64s'",
-"Invalid TIMESTAMP value in column '%s' at row %ld",
++ "Invalid TIMESTAMP value in column '%s' at row %ld",
+"Can't create a %s from within another stored routine"
+"%s %s already exists"
+"%s %s does not exist"
+"Failed to DROP %s %s"
+"Failed to CREATE %s %s"
+"%s with no matching label: %s"
+"Redefining label %s"
+"End-label %s without match"
+"Referring to uninitialized variable %s"
+"SELECT in a stored procedure must have INTO"
+"RETURN is only allowed in a FUNCTION"
+"Statements like SELECT, INSERT, UPDATE (and others) are not allowed in a FUNCTION"
+"The update log is deprecated and replaced by the binary log; SET SQL_LOG_UPDATE has been ignored"
+"The update log is deprecated and replaced by the binary log; SET SQL_LOG_UPDATE has been translated to SET SQL_LOG_BIN"
+"Query execution was interrupted"
+"Incorrect number of arguments for %s %s; expected %u, got %u"
+"Undefined CONDITION: %s"
+"No RETURN found in FUNCTION %s"
+"FUNCTION %s ended without RETURN"
+"Cursor statement must be a SELECT"
+"Cursor SELECT must not have INTO"
+"Undefined CURSOR: %s"
+"Cursor is already open"
+"Cursor is not open"
+"Undeclared variable: %s"
+"Incorrect number of FETCH variables"
+"No data to FETCH"
+"Duplicate parameter: %s"
+"Duplicate variable: %s"
+"Duplicate condition: %s"
+"Duplicate cursor: %s"
+"Failed to ALTER %s %s"
+"Subselect value not supported"
+"USE is not allowed in a stored procedure"
+"Variable or condition declaration after cursor or handler declaration"
+"Cursor declaration after handler declaration"
+"Case not found for CASE statement"
+"Configuration file '%-.64s' is too big"
+"Malformed file type header in file '%-.64s'"
+"Unexpected end of file while parsing comment '%-.64s'"
+"Error while parsing parameter '%-.64s' (line: '%-.64s')"
+"Unexpected end of file while skipping unknown parameter '%-.64s'"
diff --git a/sql/share/korean/errmsg.txt b/sql/share/korean/errmsg.txt
index a6e84fad01e..884ce735815 100644
--- a/sql/share/korean/errmsg.txt
+++ b/sql/share/korean/errmsg.txt
@@ -303,3 +303,45 @@ character-set=euckr
 "Got temporary error %d '%-.100s' from %s",
 "Unknown or incorrect time zone: '%-.64s'",
 "Invalid TIMESTAMP value in column '%s' at row %ld",
+"Can't create a %s from within another stored routine"
+"%s %s already exists"
+"%s %s does not exist"
+"Failed to DROP %s %s"
+"Failed to CREATE %s %s"
+"%s with no matching label: %s"
+"Redefining label %s"
+"End-label %s without match"
+"Referring to uninitialized variable %s"
+"SELECT in a stored procedure must have INTO"
+"RETURN is only allowed in a FUNCTION"
+"Statements like SELECT, INSERT, UPDATE (and others) are not allowed in a FUNCTION"
+"The update log is deprecated and replaced by the binary log; SET SQL_LOG_UPDATE has been ignored"
+"The update log is deprecated and replaced by the binary log; SET SQL_LOG_UPDATE has been translated to SET SQL_LOG_BIN"
+"Query execution was interrupted"
+"Incorrect number of arguments for %s %s; expected %u, got %u"
+"Undefined CONDITION: %s"
+"No RETURN found in FUNCTION %s"
+"FUNCTION %s ended without RETURN"
+"Cursor statement must be a SELECT"
+"Cursor SELECT must not have INTO"
+"Undefined CURSOR: %s"
+"Cursor is already open"
+"Cursor is not open"
+"Undeclared variable: %s"
+"Incorrect number of FETCH variables"
+"No data to FETCH"
+"Duplicate parameter: %s"
+"Duplicate variable: %s"
+"Duplicate condition: %s"
+"Duplicate cursor: %s"
+"Failed to ALTER %s %s"
+"Subselect value not supported"
+"USE is not allowed in a stored procedure"
+"Variable or condition declaration after cursor or handler declaration"
+"Cursor declaration after handler declaration"
+"Case not found for CASE statement"
+"Configuration file '%-.64s' is too big"
+"Malformed file type header in file '%-.64s'"
+"Unexpected end of file while parsing comment '%-.64s'"
+"Error while parsing parameter '%-.64s' (line: '%-.64s')"
+"Unexpected end of file while skipping unknown parameter '%-.64s'"
diff --git a/sql/share/norwegian-ny/errmsg.txt b/sql/share/norwegian-ny/errmsg.txt
index eaf7b3482ee..27ebe984c7e 100644
--- a/sql/share/norwegian-ny/errmsg.txt
+++ b/sql/share/norwegian-ny/errmsg.txt
@@ -305,3 +305,45 @@ character-set=latin1
 "Mottok temporary feil %d '%-.100s' fra %s",
 "Unknown or incorrect time zone: '%-.64s'",
 "Invalid TIMESTAMP value in column '%s' at row %ld",
+"Can't create a %s from within another stored routine"
+"%s %s already exists"
+"%s %s does not exist"
+"Failed to DROP %s %s"
+"Failed to CREATE %s %s"
+"%s with no matching label: %s"
+"Redefining label %s"
+"End-label %s without match"
+"Referring to uninitialized variable %s"
+"SELECT in a stored procedure must have INTO"
+"RETURN is only allowed in a FUNCTION"
+"Statements like SELECT, INSERT, UPDATE (and others) are not allowed in a FUNCTION"
+"The update log is deprecated and replaced by the binary log; SET SQL_LOG_UPDATE has been ignored"
+"The update log is deprecated and replaced by the binary log; SET SQL_LOG_UPDATE has been translated to SET SQL_LOG_BIN"
+"Query execution was interrupted"
+"Incorrect number of arguments for %s %s; expected %u, got %u"
+"Undefined CONDITION: %s"
+"No RETURN found in FUNCTION %s"
+"FUNCTION %s ended without RETURN"
+"Cursor statement must be a SELECT"
+"Cursor SELECT must not have INTO"
+"Undefined CURSOR: %s"
+"Cursor is already open"
+"Cursor is not open"
+"Undeclared variable: %s"
+"Incorrect number of FETCH variables"
+"No data to FETCH"
+"Duplicate parameter: %s"
+"Duplicate variable: %s"
+"Duplicate condition: %s"
+"Duplicate cursor: %s"
+"Failed to ALTER %s %s"
+"Subselect value not supported"
+"USE is not allowed in a stored procedure"
+"Variable or condition declaration after cursor or handler declaration"
+"Cursor declaration after handler declaration"
+"Case not found for CASE statement"
+"Configuration file '%-.64s' is too big"
+"Malformed file type header in file '%-.64s'"
+"Unexpected end of file while parsing comment '%-.64s'"
+"Error while parsing parameter '%-.64s' (line: '%-.64s')"
+"Unexpected end of file while skipping unknown parameter '%-.64s'"
diff --git a/sql/share/norwegian/errmsg.txt b/sql/share/norwegian/errmsg.txt
index 692c10db58f..764032da9da 100644
--- a/sql/share/norwegian/errmsg.txt
+++ b/sql/share/norwegian/errmsg.txt
@@ -305,3 +305,45 @@ character-set=latin1
 "Mottok temporary feil %d '%-.100s' fra %s",
 "Unknown or incorrect time zone: '%-.64s'",
 "Invalid TIMESTAMP value in column '%s' at row %ld",
+"Can't create a %s from within another stored routine"
+"%s %s already exists"
+"%s %s does not exist"
+"Failed to DROP %s %s"
+"Failed to CREATE %s %s"
+"%s with no matching label: %s"
+"Redefining label %s"
+"End-label %s without match"
+"Referring to uninitialized variable %s"
+"SELECT in a stored procedure must have INTO"
+"RETURN is only allowed in a FUNCTION"
+"Statements like SELECT, INSERT, UPDATE (and others) are not allowed in a FUNCTION"
+"The update log is deprecated and replaced by the binary log; SET SQL_LOG_UPDATE has been ignored"
+"The update log is deprecated and replaced by the binary log; SET SQL_LOG_UPDATE has been translated to SET SQL_LOG_BIN"
+"Query execution was interrupted"
+"Incorrect number of arguments for %s %s; expected %u, got %u"
+"Undefined CONDITION: %s"
+"No RETURN found in FUNCTION %s"
+"FUNCTION %s ended without RETURN"
+"Cursor statement must be a SELECT"
+"Cursor SELECT must not have INTO"
+"Undefined CURSOR: %s"
+"Cursor is already open"
+"Cursor is not open"
+"Undeclared variable: %s"
+"Incorrect number of FETCH variables"
+"No data to FETCH"
+"Duplicate parameter: %s"
+"Duplicate variable: %s"
+"Duplicate condition: %s"
+"Duplicate cursor: %s"
+"Failed to ALTER %s %s"
+"Subselect value not supported"
+"USE is not allowed in a stored procedure"
+"Variable or condition declaration after cursor or handler declaration"
+"Cursor declaration after handler declaration"
+"Case not found for CASE statement"
+"Configuration file '%-.64s' is too big"
+"Malformed file type header in file '%-.64s'"
+"Unexpected end of file while parsing comment '%-.64s'"
+"Error while parsing parameter '%-.64s' (line: '%-.64s')"
+"Unexpected end of file while skipping unknown parameter '%-.64s'"
diff --git a/sql/share/polish/errmsg.txt b/sql/share/polish/errmsg.txt
index 19f2c1c6983..68464d0b8f1 100644
--- a/sql/share/polish/errmsg.txt
+++ b/sql/share/polish/errmsg.txt
@@ -307,3 +307,45 @@ character-set=latin2
 "Got temporary error %d '%-.100s' from %s",
 "Unknown or incorrect time zone: '%-.64s'",
 "Invalid TIMESTAMP value in column '%s' at row %ld",
+"Can't create a %s from within another stored routine"
+"%s %s already exists"
+"%s %s does not exist"
+"Failed to DROP %s %s"
+"Failed to CREATE %s %s"
+"%s with no matching label: %s"
+"Redefining label %s"
+"End-label %s without match"
+"Referring to uninitialized variable %s"
+"SELECT in a stored procedure must have INTO"
+"RETURN is only allowed in a FUNCTION"
+"Statements like SELECT, INSERT, UPDATE (and others) are not allowed in a FUNCTION"
+"The update log is deprecated and replaced by the binary log; SET SQL_LOG_UPDATE has been ignored"
+"The update log is deprecated and replaced by the binary log; SET SQL_LOG_UPDATE has been translated to SET SQL_LOG_BIN"
+"Query execution was interrupted"
+"Incorrect number of arguments for %s %s; expected %u, got %u"
+"Undefined CONDITION: %s"
+"No RETURN found in FUNCTION %s"
+"FUNCTION %s ended without RETURN"
+"Cursor statement must be a SELECT"
+"Cursor SELECT must not have INTO"
+"Undefined CURSOR: %s"
+"Cursor is already open"
+"Cursor is not open"
+"Undeclared variable: %s"
+"Incorrect number of FETCH variables"
+"No data to FETCH"
+"Duplicate parameter: %s"
+"Duplicate variable: %s"
+"Duplicate condition: %s"
+"Duplicate cursor: %s"
+"Failed to ALTER %s %s"
+"Subselect value not supported"
+"USE is not allowed in a stored procedure"
+"Variable or condition declaration after cursor or handler declaration"
+"Cursor declaration after handler declaration"
+"Case not found for CASE statement"
+"Configuration file '%-.64s' is too big"
+"Malformed file type header in file '%-.64s'"
+"Unexpected end of file while parsing comment '%-.64s'"
+"Error while parsing parameter '%-.64s' (line: '%-.64s')"
+"Unexpected end of file while skipping unknown parameter '%-.64s'"
diff --git a/sql/share/portuguese/errmsg.txt b/sql/share/portuguese/errmsg.txt
index c77d10d83de..7561f12cb7c 100644
--- a/sql/share/portuguese/errmsg.txt
+++ b/sql/share/portuguese/errmsg.txt
@@ -304,3 +304,45 @@ character-set=latin1
 "Got temporary error %d '%-.100s' from %s",
 "Unknown or incorrect time zone: '%-.64s'",
 "Invalid TIMESTAMP value in column '%s' at row %ld",
+"Can't create a %s from within another stored routine"
+"%s %s already exists"
+"%s %s does not exist"
+"Failed to DROP %s %s"
+"Failed to CREATE %s %s"
+"%s with no matching label: %s"
+"Redefining label %s"
+"End-label %s without match"
+"Referring to uninitialized variable %s"
+"SELECT in a stored procedure must have INTO"
+"RETURN is only allowed in a FUNCTION"
+"Statements like SELECT, INSERT, UPDATE (and others) are not allowed in a FUNCTION"
+"The update log is deprecated and replaced by the binary log; SET SQL_LOG_UPDATE has been ignored"
+"The update log is deprecated and replaced by the binary log; SET SQL_LOG_UPDATE has been translated to SET SQL_LOG_BIN"
+"Query execution was interrupted"
+"Incorrect number of arguments for %s %s; expected %u, got %u"
+"Undefined CONDITION: %s"
+"No RETURN found in FUNCTION %s"
+"FUNCTION %s ended without RETURN"
+"Cursor statement must be a SELECT"
+"Cursor SELECT must not have INTO"
+"Undefined CURSOR: %s"
+"Cursor is already open"
+"Cursor is not open"
+"Undeclared variable: %s"
+"Incorrect number of FETCH variables"
+"No data to FETCH"
+"Duplicate parameter: %s"
+"Duplicate variable: %s"
+"Duplicate condition: %s"
+"Duplicate cursor: %s"
+"Failed to ALTER %s %s"
+"Subselect value not supported"
+"USE is not allowed in a stored procedure"
+"Variable or condition declaration after cursor or handler declaration"
+"Cursor declaration after handler declaration"
+"Case not found for CASE statement"
+"Configuration file '%-.64s' is too big"
+"Malformed file type header in file '%-.64s'"
+"Unexpected end of file while parsing comment '%-.64s'"
+"Error while parsing parameter '%-.64s' (line: '%-.64s')"
+"Unexpected end of file while skipping unknown parameter '%-.64s'"
diff --git a/sql/share/romanian/errmsg.txt b/sql/share/romanian/errmsg.txt
index 5ee4efd0063..03dd45fb6c2 100644
--- a/sql/share/romanian/errmsg.txt
+++ b/sql/share/romanian/errmsg.txt
@@ -307,3 +307,45 @@ character-set=latin2
 "Got temporary error %d '%-.100s' from %s",
 "Unknown or incorrect time zone: '%-.64s'",
 "Invalid TIMESTAMP value in column '%s' at row %ld",
+"Can't create a %s from within another stored routine"
+"%s %s already exists"
+"%s %s does not exist"
+"Failed to DROP %s %s"
+"Failed to CREATE %s %s"
+"%s with no matching label: %s"
+"Redefining label %s"
+"End-label %s without match"
+"Referring to uninitialized variable %s"
+"SELECT in a stored procedure must have INTO"
+"RETURN is only allowed in a FUNCTION"
+"Statements like SELECT, INSERT, UPDATE (and others) are not allowed in a FUNCTION"
+"The update log is deprecated and replaced by the binary log; SET SQL_LOG_UPDATE has been ignored"
+"The update log is deprecated and replaced by the binary log; SET SQL_LOG_UPDATE has been translated to SET SQL_LOG_BIN"
+"Query execution was interrupted"
+"Incorrect number of arguments for %s %s; expected %u, got %u"
+"Undefined CONDITION: %s"
+"No RETURN found in FUNCTION %s"
+"FUNCTION %s ended without RETURN"
+"Cursor statement must be a SELECT"
+"Cursor SELECT must not have INTO"
+"Undefined CURSOR: %s"
+"Cursor is already open"
+"Cursor is not open"
+"Undeclared variable: %s"
+"Incorrect number of FETCH variables"
+"No data to FETCH"
+"Duplicate parameter: %s"
+"Duplicate variable: %s"
+"Duplicate condition: %s"
+"Duplicate cursor: %s"
+"Failed to ALTER %s %s"
+"Subselect value not supported"
+"USE is not allowed in a stored procedure"
+"Variable or condition declaration after cursor or handler declaration"
+"Cursor declaration after handler declaration"
+"Case not found for CASE statement"
+"Configuration file '%-.64s' is too big"
+"Malformed file type header in file '%-.64s'"
+"Unexpected end of file while parsing comment '%-.64s'"
+"Error while parsing parameter '%-.64s' (line: '%-.64s')"
+"Unexpected end of file while skipping unknown parameter '%-.64s'"
diff --git a/sql/share/russian/errmsg.txt b/sql/share/russian/errmsg.txt
index 20188723f6d..3b0286ea3f8 100644
--- a/sql/share/russian/errmsg.txt
+++ b/sql/share/russian/errmsg.txt
@@ -305,3 +305,45 @@ character-set=koi8r
 "Got temporary error %d '%-.100s' from %s",
 "Unknown or incorrect time zone: '%-.64s'",
 "Invalid TIMESTAMP value in column '%s' at row %ld",
+"Can't create a %s from within another stored routine"
+"%s %s already exists"
+"%s %s does not exist"
+"Failed to DROP %s %s"
+"Failed to CREATE %s %s"
+"%s with no matching label: %s"
+"Redefining label %s"
+"End-label %s without match"
+"Referring to uninitialized variable %s"
+"SELECT in a stored procedure must have INTO"
+"RETURN is only allowed in a FUNCTION"
+"Statements like SELECT, INSERT, UPDATE (and others) are not allowed in a FUNCTION"
+"The update log is deprecated and replaced by the binary log; SET SQL_LOG_UPDATE has been ignored"
+"The update log is deprecated and replaced by the binary log SET SQL_LOG_UPDATE has been translated to SET SQL_LOG_BIN"
+"Query execution was interrupted"
+"Incorrect number of arguments for %s %s; expected %u, got %u"
+"Undefined CONDITION: %s"
+"No RETURN found in FUNCTION %s"
+"FUNCTION %s ended without RETURN"
+"Cursor statement must be a SELECT"
+"Cursor SELECT must not have INTO"
+"Undefined CURSOR: %s"
+"Cursor is already open"
+"Cursor is not open"
+"Undeclared variable: %s"
+"Incorrect number of FETCH variables"
+"No data to FETCH"
+"Duplicate parameter: %s"
+"Duplicate variable: %s"
+"Duplicate condition: %s"
+"Duplicate cursor: %s"
+"Failed to ALTER %s %s"
+"Subselect value not supported"
+"USE is not allowed in a stored procedure"
+"Variable or condition declaration after cursor or handler declaration"
+"Cursor declaration after handler declaration"
+"Case not found for CASE statement"
+"������� ������� ���������������� ���� '%-.64s'"
+"�������� ��������� ���� ����� '%-.64s'"
+"����������� ����� ����� � ���������� '%-.64s'"
+"������ ��� ������������� ��������� '%-.64s' (������: '%-.64s')"
+"����������� ����� ����� ��� �������� ������������ ��������� '%-.64s'"
diff --git a/sql/share/serbian/errmsg.txt b/sql/share/serbian/errmsg.txt
index cc822431464..a64d9056701 100644
--- a/sql/share/serbian/errmsg.txt
+++ b/sql/share/serbian/errmsg.txt
@@ -309,3 +309,45 @@ character-set=cp1250
 "Got temporary error %d '%-.100s' from %s",
 "Unknown or incorrect time zone: '%-.64s'",
 "Invalid TIMESTAMP value in column '%s' at row %ld",
+"Can't create a %s from within another stored routine"
+"%s %s already exists"
+"%s %s does not exist"
+"Failed to DROP %s %s"
+"Failed to CREATE %s %s"
+"%s with no matching label: %s"
+"Redefining label %s"
+"End-label %s without match"
+"Referring to uninitialized variable %s"
+"SELECT in a stored procedure must have INTO"
+"RETURN is only allowed in a FUNCTION"
+"Statements like SELECT, INSERT, UPDATE (and others) are not allowed in a FUNCTION"
+"The update log is deprecated and replaced by the binary log; SET SQL_LOG_UPDATE has been ignored"
+"The update log is deprecated and replaced by the binary log; SET SQL_LOG_UPDATE has been translated to SET SQL_LOG_BIN"
+"Query execution was interrupted"
+"Incorrect number of arguments for %s %s; expected %u, got %u"
+"Undefined CONDITION: %s"
+"No RETURN found in FUNCTION %s"
+"FUNCTION %s ended without RETURN"
+"Cursor statement must be a SELECT"
+"Cursor SELECT must not have INTO"
+"Undefined CURSOR: %s"
+"Cursor is already open"
+"Cursor is not open"
+"Undeclared variable: %s"
+"Incorrect number of FETCH variables"
+"No data to FETCH"
+"Duplicate parameter: %s"
+"Duplicate variable: %s"
+"Duplicate condition: %s"
+"Duplicate cursor: %s"
+"Failed to ALTER %s %s"
+"Subselect value not supported"
+"USE is not allowed in a stored procedure"
+"Variable or condition declaration after cursor or handler declaration"
+"Cursor declaration after handler declaration"
+"Case not found for CASE statement"
+"Configuration file '%-.64s' is too big"
+"Malformed file type header in file '%-.64s'"
+"Unexpected end of file while parsing comment '%-.64s'"
+"Error while parsing parameter '%-.64s' (line: '%-.64s')"
+"Unexpected end of file while skipping unknown parameter '%-.64s'"
diff --git a/sql/share/slovak/errmsg.txt b/sql/share/slovak/errmsg.txt
index ee6aac5081b..ebd64f34f88 100644
--- a/sql/share/slovak/errmsg.txt
+++ b/sql/share/slovak/errmsg.txt
@@ -311,3 +311,45 @@ character-set=latin2
 "Got temporary error %d '%-.100s' from %s",
 "Unknown or incorrect time zone: '%-.64s'",
 "Invalid TIMESTAMP value in column '%s' at row %ld",
+"Can't create a %s from within another stored routine"
+"%s %s already exists"
+"%s %s does not exist"
+"Failed to DROP %s %s"
+"Failed to CREATE %s %s"
+"%s with no matching label: %s"
+"Redefining label %s"
+"End-label %s without match"
+"Referring to uninitialized variable %s"
+"SELECT in a stored procedure must have INTO"
+"RETURN is only allowed in a FUNCTION"
+"Statements like SELECT, INSERT, UPDATE (and others) are not allowed in a FUNCTION"
+"The update log is deprecated and replaced by the binary log; SET SQL_LOG_UPDATE has been ignored"
+"The update log is deprecated and replaced by the binary log; SET SQL_LOG_UPDATE has been translated to SET SQL_LOG_BIN"
+"Query execution was interrupted"
+"Incorrect number of arguments for %s %s; expected %u, got %u"
+"Undefined CONDITION: %s"
+"No RETURN found in FUNCTION %s"
+"FUNCTION %s ended without RETURN"
+"Cursor statement must be a SELECT"
+"Cursor SELECT must not have INTO"
+"Undefined CURSOR: %s"
+"Cursor is already open"
+"Cursor is not open"
+"Undeclared variable: %s"
+"Incorrect number of FETCH variables"
+"No data to FETCH"
+"Duplicate parameter: %s"
+"Duplicate variable: %s"
+"Duplicate condition: %s"
+"Duplicate cursor: %s"
+"Failed to ALTER %s %s"
+"Subselect value not supported"
+"USE is not allowed in a stored procedure"
+"Variable or condition declaration after cursor or handler declaration"
+"Cursor declaration after handler declaration"
+"Case not found for CASE statement"
+"Configuration file '%-.64s' is too big"
+"Malformed file type header in file '%-.64s'"
+"Unexpected end of file while parsing comment '%-.64s'"
+"Error while parsing parameter '%-.64s' (line: '%-.64s')"
+"Unexpected end of file while skipping unknown parameter '%-.64s'"
diff --git a/sql/share/spanish/errmsg.txt b/sql/share/spanish/errmsg.txt
index 483ec7068a2..042df901a5e 100644
--- a/sql/share/spanish/errmsg.txt
+++ b/sql/share/spanish/errmsg.txt
@@ -125,7 +125,7 @@ character-set=latin1
 "Muchos campos",
 "Tama�o de l�nea muy grande. M�ximo tama�o de l�nea, no contando blob, es %d. Tu tienes que cambiar algunos campos para blob",
 "Sobrecarga de la pila de thread:  Usada: %ld de una %ld pila.  Use 'mysqld -O thread_stack=#' para especificar una mayor pila si necesario",
-"Dependencia cruzada encontrada en OUTER JOIN; examine su condici�n ON",
+"Dependencia cruzada encontrada en OUTER JOIN.  Examine su condici�n ON",
 "Columna '%-.32s' es usada con UNIQUE o INDEX pero no est� definida como NOT NULL",
 "No puedo cargar funci�n '%-.64s'",
 "No puedo inicializar funci�n '%-.64s'; %-.80s",
@@ -167,7 +167,7 @@ character-set=latin1
 "Obtenido timeout leyendo paquetes de comunicaci�n",
 "Obtenido un error de escribiendo paquetes de comunicaci�n",
 "Obtenido timeout escribiendo paquetes de comunicaci�n",
-"La string resultante es mayor que 'max_allowed_packet'",
+"La string resultante es mayor que max_allowed_packet",
 "El tipo de tabla usada no permite soporte para columnas BLOB/TEXT",
 "El tipo de tabla usada no permite soporte para columnas AUTO_INCREMENT",
 "INSERT DELAYED no puede ser usado con tablas '%-.64s', porque esta bloqueada con LOCK TABLES",
@@ -248,7 +248,7 @@ character-set=latin1
 "Referencia de llave y referencia de tabla no coinciden",
 "Operando debe tener %d columna(s)",
 "Subconsulta retorna mas que 1 l�nea",
-"Desconocido preparado comando handler (%.*s) dado para %s",
+"Desconocido preparado comando handler (%ld) dado para %s",
 "Base de datos Help est� corrupto o no existe",
 "C�clica referencia en subconsultas",
 "Convirtiendo columna '%s' de %s para %s",
@@ -257,15 +257,15 @@ character-set=latin1
 "Select %u fu� reducido durante optimizaci�n",
 "Tabla '%-.64s' de uno de los SELECT no puede ser usada en %-.32s",
 "Cliente no soporta protocolo de autenticaci�n solicitado por el servidor; considere actualizar el cliente MySQL",
-"Todas las partes de una SPATIAL index deben ser NOT NULL",
+"Todas las partes de una SPATIAL KEY deben ser NOT NULL",
 "COLLATION '%s' no es v�lido para CHARACTER SET '%s'",
 "Slave ya est� funcionando",
 "Slave ya fu� parado",
 "Tama�o demasiado grande para datos descomprimidos. El m�ximo tama�o es %d. (probablemente, extensi�n de datos descomprimidos fu� corrompida)",
-"ZLIB: No suficiente memoria",
-"ZLIB: No suficiente espacio en el b�fer de salida (probablemente, extensi�n de datos descomprimidos fu� corrompida)",
-"ZLIB: Dato de entrada fu� corrompido",
-"%d l�nea(s) fueron cortadas por GROUP_CONCAT()",
+"Z_MEM_ERROR: No suficiente memoria para zlib",
+"Z_BUF_ERROR: No suficiente espacio en el b�fer de salida para zlib (probablemente, extensi�n de datos descomprimidos fu� corrompida)",
+"Z_DATA_ERROR: Dato de entrada fu� corrompido para zlib",
+"%d l�nea(s) fue(fueron) cortadas por group_concat()",
 "L�nea %ld no contiene datos para todas las columnas",
 "L�nea %ld fu� truncada; La misma contine mas datos que las que existen en las columnas de entrada",
 "Datos truncado, NULL suministrado para NOT NULL columna '%s' en la l�nea %ld",
@@ -301,7 +301,45 @@ character-set=latin1
 "Incorrecta definici�n de tabla; Solamente debe haber una columna TIMESTAMP con CURRENT_TIMESTAMP en DEFAULT o ON UPDATE cl�usula"
 "Inv�lido ON UPDATE cl�usula para campo '%-.64s'",
 "This command is not supported in the prepared statement protocol yet",
-"Got error %d '%-.100s' from %s",
-"Got temporary error %d '%-.100s' from %s",
-"Unknown or incorrect time zone: '%-.64s'",
-"Invalid TIMESTAMP value in column '%s' at row %ld",
+"Can't create a %s from within another stored routine"
+"%s %s already exists"
+"%s %s does not exist"
+"Failed to DROP %s %s"
+"Failed to CREATE %s %s"
+"%s with no matching label: %s"
+"Redefining label %s"
+"End-label %s without match"
+"Referring to uninitialized variable %s"
+"SELECT in a stored procedure must have INTO"
+"RETURN is only allowed in a FUNCTION"
+"Statements like SELECT, INSERT, UPDATE (and others) are not allowed in a FUNCTION"
+"The update log is deprecated and replaced by the binary log; SET SQL_LOG_UPDATE has been ignored"
+"The update log is deprecated and replaced by the binary log; SET SQL_LOG_UPDATE has been translated to SET SQL_LOG_BIN"
+"Query execution was interrupted"
+"Incorrect number of arguments for %s %s; expected %u, got %u"
+"Undefined CONDITION: %s"
+"No RETURN found in FUNCTION %s"
+"FUNCTION %s ended without RETURN"
+"Cursor statement must be a SELECT"
+"Cursor SELECT must not have INTO"
+"Undefined CURSOR: %s"
+"Cursor is already open"
+"Cursor is not open"
+"Undeclared variable: %s"
+"Incorrect number of FETCH variables"
+"No data to FETCH"
+"Duplicate parameter: %s"
+"Duplicate variable: %s"
+"Duplicate condition: %s"
+"Duplicate cursor: %s"
+"Failed to ALTER %s %s"
+"Subselect value not supported"
+"USE is not allowed in a stored procedure"
+"Variable or condition declaration after cursor or handler declaration"
+"Cursor declaration after handler declaration"
+"Case not found for CASE statement"
+"Configuration file '%-.64s' is too big"
+"Malformed file type header in file '%-.64s'"
+"Unexpected end of file while parsing comment '%-.64s'"
+"Error while parsing parameter '%-.64s' (line: '%-.64s')"
+"Unexpected end of file while skipping unknown parameter '%-.64s'"
diff --git a/sql/share/swedish/errmsg.txt b/sql/share/swedish/errmsg.txt
index d9f3adf92d4..26e00bfbcca 100644
--- a/sql/share/swedish/errmsg.txt
+++ b/sql/share/swedish/errmsg.txt
@@ -165,7 +165,7 @@ character-set=latin1
 "Fick 'timeout' vid l�sning fr�n klienten",
 "Fick ett fel vid skrivning till klienten",
 "Fick 'timeout' vid skrivning till klienten",
-"Resultatstr�ngen �r l�ngre �n 'max_allowed_packet'",
+"Resultatstr�ngen �r l�ngre �n max_allowed_packet",
 "Den anv�nda tabelltypen kan inte hantera BLOB/TEXT-kolumner",
 "Den anv�nda tabelltypen kan inte hantera AUTO_INCREMENT-kolumner",
 "INSERT DELAYED kan inte anv�ndas med tabell '%-.64s', emedan den �r l�st med LOCK TABLES",
@@ -246,7 +246,7 @@ character-set=latin1
 "Nyckelreferensen och tabellreferensen st�mmer inte �verens",
 "Operand should contain %d column(s)",
 "Subquery returnerade mer �n 1 rad",
-"Ok�nd PREPARED STATEMENT id (%.*s) var given till %s",
+"Ok�nd PREPARED STATEMENT id (%ld) var given till %s",
 "Hj�lpdatabasen finns inte eller �r skadad",
 "Cyklisk referens i subqueries",
 "Konvertar kolumn '%s' fr�n %s till %s",
@@ -255,19 +255,19 @@ character-set=latin1
 "Select %u reducerades vid optimiering",
 "Tabell '%-.64s' fr�n en SELECT kan inte anv�ndas i %-.32s",
 "Klienten st�der inte autentiseringsprotokollet som beg�rts av servern; �verv�g uppgradering av klientprogrammet.",
-"Alla delar av en SPATIAL index m�ste vara NOT NULL",
+"Alla delar av en SPATIAL KEY m�ste vara NOT NULL",
 "COLLATION '%s' �r inte till�tet f�r CHARACTER SET '%s'",
 "Slaven har redan startat",
 "Slaven har redan stoppat",
-"Uncompressed data size too large; the maximum size is %d (probably, length of uncompressed data was corrupted)",
-"ZLIB: Not enough memory",
-"ZLIB: Not enough room in the output buffer (probably, length of uncompressed data was corrupted)",
-"ZLIB: Input data corrupted",
-"%d rad(er) kapades av GROUP_CONCAT()",
+"Too big size of uncompressed data. The maximum size is %d. (probably, length of uncompressed data was corrupted)",
+"Z_MEM_ERROR: Not enough memory available for zlib",
+"Z_BUF_ERROR: Not enough room in the output buffer for zlib (probably, length of uncompressed data was corrupted)",
+"Z_DATA_ERROR: Input data was corrupted for zlib",
+"%d rad(er) kapades av group_concat()",
 "Row %ld doesn't contain data for all columns",
-"Row %ld was truncated; it contained more data than there were input columns",
-"Data truncated; NULL supplied to NOT NULL column '%s' at row %ld",
-"Data truncated; out of range for column '%s' at row %ld",
+"Row %ld was truncated; It contained more data than there were input columns",
+"Data truncated, NULL supplied to NOT NULL column '%s' at row %ld",
+"Data truncated, out of range for column '%s' at row %ld",
 "Data truncated for column '%s' at row %ld",
 "Anv�nder handler %s f�r tabell '%s'",
 "Illegal mix of collations (%s,%s) and (%s,%s) for operation '%s'",
@@ -275,13 +275,13 @@ character-set=latin1
 "Can't revoke all privileges, grant for one or more of the requested users",
 "Illegal mix of collations (%s,%s), (%s,%s), (%s,%s) for operation '%s'",
 "Illegal mix of collations for operation '%s'",
-"Variable '%-.64s' is not a variable component (can't be used as XXXX.variable_name)",
+"Variable '%-.64s' is not a variable component (Can't be used as XXXX.variable_name)",
 "Unknown collation: '%-.64s'",
-"SSL parameters in CHANGE MASTER are ignored because this MySQL slave was compiled without SSL support; they can be used later if MySQL slave with SSL is started",
+"SSL parameters in CHANGE MASTER are ignored because this MySQL slave was compiled without SSL support; they can be used later when MySQL slave with SSL will be started",
 "Server is running in --secure-auth mode, but '%s'@'%s' has a password in the old format; please change the password to the new format",
 "Field or reference '%-.64s%s%-.64s%s%-.64s' of SELECT #%d was resolved in SELECT #%d",
-"Incorrect parameter or combination of parameters for START SLAVE UNTIL",
-"It is recommended to run with --skip-slave-start when doing step-by-step replication with START SLAVE UNTIL; otherwise, you are not safe in case of unexpected slave's mysqld restart",
+"Wrong parameter or combination of parameters for START SLAVE UNTIL",
+"It is recommended to run with --skip-slave-start when doing step-by-step replication with START SLAVE UNTIL, otherwise you are not safe in case of unexpected slave's mysqld restart",
 "SQL thread is not to be started so UNTIL options are ignored",
 "Felaktigt index namn '%-.100s'",
 "Felaktigt katalog namn '%-.100s'",
@@ -296,10 +296,48 @@ character-set=latin1
 "MySQL �r startad med --skip-grant-tables. Pga av detta kan du inte anv�nda detta kommando",
 "Column '%-.100s' has duplicated value '%-.64s' in %s"
 "Truncated wrong %-.32s value: '%-.128s'"
-"Incorrect table definition; there can be only one TIMESTAMP column with CURRENT_TIMESTAMP in DEFAULT or ON UPDATE clause"
-"Invalid ON UPDATE clause for '%-.64s' column",
+"Incorrect table definition; There can only be one TIMESTAMP column with CURRENT_TIMESTAMP in DEFAULT or ON UPDATE clause"
+"Invalid ON UPDATE clause for '%-.64s' field",
 "This command is not supported in the prepared statement protocol yet",
-"Fick felkod %d '%-.100s' fr�n %s",
-"Fick tilf�llig felkod %d '%-.100s' fr�n %s",
-"Unknown or incorrect time zone: '%-.64s'",
-"Invalid TIMESTAMP value in column '%s' at row %ld",
+"Can't create a %s from within another stored routine"
+"%s %s already exists"
+"%s %s does not exist"
+"Failed to DROP %s %s"
+"Failed to CREATE %s %s"
+"%s with no matching label: %s"
+"Redefining label %s"
+"End-label %s without match"
+"Referring to uninitialized variable %s"
+"SELECT in a stored procedure must have INTO"
+"RETURN is only allowed in a FUNCTION"
+"Statements like SELECT, INSERT, UPDATE (and others) are not allowed in a FUNCTION"
+"The update log is deprecated and replaced by the binary log; SET SQL_LOG_UPDATE has been ignored"
+"The update log is deprecated and replaced by the binary log; SET SQL_LOG_UPDATE has been translated to SET SQL_LOG_BIN"
+"Query execution was interrupted"
+"Incorrect number of arguments for %s %s; expected %u, got %u"
+"Undefined CONDITION: %s"
+"No RETURN found in FUNCTION %s"
+"FUNCTION %s ended without RETURN"
+"Cursor statement must be a SELECT"
+"Cursor SELECT must not have INTO"
+"Undefined CURSOR: %s"
+"Cursor is already open"
+"Cursor is not open"
+"Undeclared variable: %s"
+"Incorrect number of FETCH variables"
+"No data to FETCH"
+"Duplicate parameter: %s"
+"Duplicate variable: %s"
+"Duplicate condition: %s"
+"Duplicate cursor: %s"
+"Failed to ALTER %s %s"
+"Subselect value not supported"
+"USE is not allowed in a stored procedure"
+"Variable or condition declaration after cursor or handler declaration"
+"Cursor declaration after handler declaration"
+"Case not found for CASE statement"
+"Configuration file '%-.64s' is too big"
+"Malformed file type header in file '%-.64s'"
+"Unexpected end of file while parsing comment '%-.64s'"
+"Error while parsing parameter '%-.64s' (line: '%-.64s')"
+"Unexpected end of file while skipping unknown parameter '%-.64s'"
diff --git a/sql/share/ukrainian/errmsg.txt b/sql/share/ukrainian/errmsg.txt
index acf6f5121e8..05fcb3d3121 100644
--- a/sql/share/ukrainian/errmsg.txt
+++ b/sql/share/ukrainian/errmsg.txt
@@ -161,7 +161,7 @@ character-set=koi8u
 "���� ��� INSERT DELAYED �� ���� �������� ���������� ��� �����æ %-.64s",
 "�������� ���������� Ǧ��� ����������դ����",
 "��������� �'������� %ld �� ���� ������: '%-.64s' �����������: '%-.32s' (%-.64s)",
-"�������� ����� ¦����� Φ� 'max_allowed_packet'",
+"�������� ����� ¦����� Φ� max_allowed_packet",
 "�������� ������� ������� � ����Φ��æ����� ������",
 "�������� �������� צ� fcntl()",
 "�������� ������ � ����������� �������",
@@ -170,7 +170,7 @@ character-set=koi8u
 "�������� �������� ������� ����Φ��æ���� ����Ԧ�",
 "�������� ������� ������ ����Φ��æ���� ����Ԧ�",
 "�������� �������� ������ ����Φ��æ���� ����Ԧ�",
-"������ ���������� ����� Φ� 'max_allowed_packet'",
+"������ ���������� ����� Φ� max_allowed_packet",
 "������������ ��� �����æ �� Ц�����դ BLOB/TEXT �����æ",
 "������������ ��� �����æ �� Ц�����դ AUTO_INCREMENT �����æ",
 "INSERT DELAYED �� ���� ���� ����������� � �������� '%-.64s', ���� �� �� ����������� � LOCK TABLES",
@@ -221,7 +221,7 @@ character-set=koi8u
 "������ �������� ��� %s",
 "����������� '%-.32s'@'%-.64s' �� ��������� ���������� ����� ���������ަ�",
 "Incorrect table definition; all MERGE tables must be in the same database",
-"Deadlock found when trying to get lock; try restarting transaction",
+"Deadlock found when trying to get lock; Try restarting transaction",
 "������������ ��� �����æ �� Ц�����դ FULLTEXT �����Ӧ�",
 "Cannot add foreign key constraint",
 "Cannot add a child row: a foreign key constraint fails",
@@ -229,50 +229,50 @@ character-set=koi8u
 "Error connecting to master: %-.128s",
 "Error running query on master: %-.128s",
 "Error when executing command %s: %-.128s",
-"Incorrect usage of %s and %s",
+"Wrong usage of %s and %s",
 "The used SELECT statements have a different number of columns",
 "Can't execute the query because you have a conflicting read lock",
 "Mixing of transactional and non-transactional tables is disabled",
 "Option '%s' used twice in statement",
 "User '%-.64s' has exceeded the '%s' resource (current value: %ld)",
-"Access denied; you need the %-.128s privilege for this operation",
+"Access denied. You need the %-.128s privilege for this operation",
 "Variable '%-.64s' is a SESSION variable and can't be used with SET GLOBAL",
 "Variable '%-.64s' is a GLOBAL variable and should be set with SET GLOBAL",
 "Variable '%-.64s' doesn't have a default value",
 "Variable '%-.64s' can't be set to the value of '%-.64s'",
-"Incorrect argument type to variable '%-.64s'",
+"Wrong argument type to variable '%-.64s'",
 "Variable '%-.64s' can only be set, not read",
-"Incorrect usage/placement of '%s'",
+"Wrong usage/placement of '%s'",
 "This version of MySQL doesn't yet support '%s'",
 "Got fatal error %d: '%-.128s' from master when reading data from binary log",
 "Slave SQL thread ignored the query because of replicate-*-table rules",
 "Variable '%-.64s' is a %s variable",
-"Incorrect foreign key definition for '%-.64s': %s",
-"Key reference and table reference don't match",
+"Wrong foreign key definition for '%-.64s': %s",
+"Key reference and table reference doesn't match",
 "������� ��� ���������� � %d �����æ�",
 "������� �������� ¦��� �i� 1 �����",
-"Unknown prepared statement handler (%.*s) given to %s",
+"Unknown prepared statement handler (%ld) given to %s",
 "Help database is corrupt or does not exist",
 "���̦��� ��������� �� Ц������",
 "������������ ������� '%s' � %s � %s",
 "��������� '%-.64s' �� �i���������� (%s)",
-"Every derived table must have its own alias",
+"Every derived table must have it's own alias",
 "Select %u was ��������� ��� �����i���ii",
-"Table '%-.64s' from one of the SELECTs cannot be used in %-.32s",
+"Table '%-.64s' from one of SELECT's can not be used  in %-.32s",
 "Client does not support authentication protocol requested by server; consider upgrading MySQL client",
-"All parts of a SPATIAL index must be NOT NULL",
+"All parts of a SPATIAL KEY must be NOT NULL",
 "COLLATION '%s' is not valid for CHARACTER SET '%s'",
 "Slave is already running",
 "Slave has already been stopped",
-"Uncompressed data size too large; the maximum size is %d (probably, length of uncompressed data was corrupted)",
-"ZLIB: Not enough memory",
-"ZLIB: Not enough room in the output buffer (probably, length of uncompressed data was corrupted)",
-"ZLIB: Input data corrupted",
-"%d line(s) were cut by GROUP_CONCAT()",
+"Too big size of uncompressed data. The maximum size is %d. (probably, length of uncompressed data was corrupted)",
+"Z_MEM_ERROR: Not enough memory available for zlib",
+"Z_BUF_ERROR: Not enough room in the output buffer for zlib (probably, length of uncompressed data was corrupted)",
+"Z_DATA_ERROR: Input data was corrupted for zlib",
+"%d line(s) was(were) cut by group_concat()",
 "Row %ld doesn't contain data for all columns",
-"Row %ld was truncated; it contained more data than there were input columns",
-"Data truncated; NULL supplied to NOT NULL column '%s' at row %ld",
-"Data truncated; out of range for column '%s' at row %ld",
+"Row %ld was truncated; It contained more data than there were input columns",
+"Data truncated, NULL supplied to NOT NULL column '%s' at row %ld",
+"Data truncated, out of range for column '%s' at row %ld",
 "Data truncated for column '%s' at row %ld",
 "Using storage engine %s for table '%s'",
 "Illegal mix of collations (%s,%s) and (%s,%s) for operation '%s'",
@@ -280,13 +280,13 @@ character-set=koi8u
 "Can't revoke all privileges, grant for one or more of the requested users",
 "Illegal mix of collations (%s,%s), (%s,%s), (%s,%s) for operation '%s'",
 "Illegal mix of collations for operation '%s'",
-"Variable '%-.64s' is not a variable component (can't be used as XXXX.variable_name)",
+"Variable '%-.64s' is not a variable component (Can't be used as XXXX.variable_name)",
 "Unknown collation: '%-.64s'",
-"SSL parameters in CHANGE MASTER are ignored because this MySQL slave was compiled without SSL support; they can be used later if MySQL slave with SSL is started",
+"SSL parameters in CHANGE MASTER are ignored because this MySQL slave was compiled without SSL support; they can be used later when MySQL slave with SSL will be started",
 "Server is running in --secure-auth mode, but '%s'@'%s' has a password in the old format; please change the password to the new format",
 "�������� ��� ��������� '%-.64s%s%-.64s%s%-.64s' �� SELECT� #%d ���� �������� � SELECT� #%d",
-"Incorrect parameter or combination of parameters for START SLAVE UNTIL",
-"It is recommended to run with --skip-slave-start when doing step-by-step replication with START SLAVE UNTIL; otherwise, you are not safe in case of unexpected slave's mysqld restart",
+"Wrong parameter or combination of parameters for START SLAVE UNTIL",
+"It is recommended to run with --skip-slave-start when doing step-by-step replication with START SLAVE UNTIL, otherwise you are not safe in case of unexpected slave's mysqld restart",
 "SQL thread is not to be started so UNTIL options are ignored",
 "Incorrect index name '%-.100s'",
 "Incorrect catalog name '%-.100s'",
@@ -301,10 +301,48 @@ character-set=koi8u
 "The MySQL server is running with the %s option so it cannot execute this statement",
 "Column '%-.100s' has duplicated value '%-.64s' in %s"
 "Truncated wrong %-.32s value: '%-.128s'"
-"Incorrect table definition; there can be only one TIMESTAMP column with CURRENT_TIMESTAMP in DEFAULT or ON UPDATE clause"
-"Invalid ON UPDATE clause for '%-.64s' column",
+"Incorrect table definition; There can only be one TIMESTAMP column with CURRENT_TIMESTAMP in DEFAULT or ON UPDATE clause"
+"Invalid ON UPDATE clause for '%-.64s' field",
 "This command is not supported in the prepared statement protocol yet",
-"Got error %d '%-.100s' from %s",
-"Got temporary error %d '%-.100s' from %s",
-"Unknown or incorrect time zone: '%-.64s'",
-"Invalid TIMESTAMP value in column '%s' at row %ld",
+"Can't create a %s from within another stored routine"
+"%s %s already exists"
+"%s %s does not exist"
+"Failed to DROP %s %s"
+"Failed to CREATE %s %s"
+"%s with no matching label: %s"
+"Redefining label %s"
+"End-label %s without match"
+"Referring to uninitialized variable %s"
+"SELECT in a stored procedure must have INTO"
+"RETURN is only allowed in a FUNCTION"
+"Statements like SELECT, INSERT, UPDATE (and others) are not allowed in a FUNCTION"
+"The update log is deprecated and replaced by the binary log; SET SQL_LOG_UPDATE has been ignored"
+"The update log is deprecated and replaced by the binary log; SET SQL_LOG_UPDATE has been translated to SET SQL_LOG_BIN"
+"Query execution was interrupted"
+"Incorrect number of arguments for %s %s; expected %u, got %u"
+"Undefined CONDITION: %s"
+"No RETURN found in FUNCTION %s"
+"FUNCTION %s ended without RETURN"
+"Cursor statement must be a SELECT"
+"Cursor SELECT must not have INTO"
+"Undefined CURSOR: %s"
+"Cursor is already open"
+"Cursor is not open"
+"Undeclared variable: %s"
+"Incorrect number of FETCH variables"
+"No data to FETCH"
+"Duplicate parameter: %s"
+"Duplicate variable: %s"
+"Duplicate condition: %s"
+"Duplicate cursor: %s"
+"Failed to ALTER %s %s"
+"Subselect value not supported"
+"USE is not allowed in a stored procedure"
+"Variable or condition declaration after cursor or handler declaration"
+"Cursor declaration after handler declaration"
+"Case not found for CASE statement"
+"������� ������� ���Ʀ����æ���� ���� '%-.64s'"
+"��צ���� ��������� ���� � ���̦ '%-.64s'"
+"�����Ħ������ ˦���� ����� � �������Ҧ '%-.64s'"
+"������� � ���Ц������Φ ��������� '%-.64s' (�����: '%-.64s')"
+"�����Ħ������ ˦���� ����� � ����¦ ��������� ��צ����� �������� '%-.64s'"
diff --git a/sql/slave.cc b/sql/slave.cc
index 7e46fb81053..5bc2599211e 100644
--- a/sql/slave.cc
+++ b/sql/slave.cc
@@ -74,7 +74,6 @@ static int create_table_from_dump(THD* thd, MYSQL *mysql, const char* db,
 				  const char* table_name, bool overwrite);
 static int get_master_version_and_clock(MYSQL* mysql, MASTER_INFO* mi);
 
-
 /*
   Find out which replications threads are running
 
@@ -218,6 +217,12 @@ static byte* get_table_key(TABLE_RULE_ENT* e, uint* len,
     pos			Position in relay log file 
     need_data_lock	Set to 1 if this functions should do mutex locks
     errmsg		Store pointer to error message here
+    look_for_description_event 
+                        1 if we should look for such an event. We only need
+                        this when the SQL thread starts and opens an existing
+                        relay log and has to execute it (possibly from an offset
+                        >4); then we need to read the first event of the relay
+                        log to be able to parse the events we have to execute.
 
   DESCRIPTION
   - Close old open relay log files.
@@ -235,15 +240,35 @@ static byte* get_table_key(TABLE_RULE_ENT* e, uint* len,
 
 int init_relay_log_pos(RELAY_LOG_INFO* rli,const char* log,
 		       ulonglong pos, bool need_data_lock,
-		       const char** errmsg)
+		       const char** errmsg,
+                       bool look_for_description_event)
 {
   DBUG_ENTER("init_relay_log_pos");
+  DBUG_PRINT("info", ("pos=%lu", pos));
 
   *errmsg=0;
   pthread_mutex_t *log_lock=rli->relay_log.get_log_lock();
   
   if (need_data_lock)
     pthread_mutex_lock(&rli->data_lock);
+
+  /*
+    Slave threads are not the only users of init_relay_log_pos(). CHANGE MASTER
+    is, too, and init_slave() too; these 2 functions allocate a description
+    event in init_relay_log_pos, which is not freed by the terminating SQL slave
+    thread as that thread is not started by these functions. So we have to free
+    the description_event here, in case, so that there is no memory leak in
+    running, say, CHANGE MASTER.
+  */
+  delete rli->relay_log.description_event_for_exec;
+  /*
+    By default the relay log is in binlog format 3 (4.0).
+    Even if format is 4, this will work enough to read the first event
+    (Format_desc) (remember that format 4 is just lenghtened compared to format
+    3; format 3 is a prefix of format 4). 
+  */
+  rli->relay_log.description_event_for_exec= new
+    Format_description_log_event(3);
   
   pthread_mutex_lock(log_lock);
   
@@ -283,9 +308,8 @@ int init_relay_log_pos(RELAY_LOG_INFO* rli,const char* log,
       In this case, we will use the same IO_CACHE pointer to
       read data as the IO thread is using to write data.
     */
-    rli->cur_log= rli->relay_log.get_log_file();
-    if (my_b_tell(rli->cur_log) == 0 &&
-	check_binlog_magic(rli->cur_log, errmsg))
+    my_b_seek((rli->cur_log=rli->relay_log.get_log_file()), (off_t)0);
+    if (check_binlog_magic(rli->cur_log,errmsg))
       goto err;
     rli->cur_log_old_open_count=rli->relay_log.get_open_count();
   }
@@ -299,8 +323,85 @@ int init_relay_log_pos(RELAY_LOG_INFO* rli,const char* log,
       goto err;
     rli->cur_log = &rli->cache_buf;
   }
-  if (pos >= BIN_LOG_HEADER_SIZE)
+  /*
+    In all cases, check_binlog_magic() has been called so we're at offset 4 for
+    sure.
+  */
+  if (pos > BIN_LOG_HEADER_SIZE) /* If pos<=4, we stay at 4 */
+  {
+    Log_event* ev;
+    while (look_for_description_event) 
+    {
+      /*
+        Read the possible Format_description_log_event; if position was 4, no need, it will
+        be read naturally.
+      */
+      DBUG_PRINT("info",("looking for a Format_description_log_event"));
+
+      if (my_b_tell(rli->cur_log) >= pos)
+        break;
+
+      /*
+        Because of we have rli->data_lock and log_lock, we can safely read an
+        event
+      */
+      if (!(ev=Log_event::read_log_event(rli->cur_log,0,
+                                         rli->relay_log.description_event_for_exec)))
+      {
+        DBUG_PRINT("info",("could not read event, rli->cur_log->error=%d",
+                           rli->cur_log->error));
+        if (rli->cur_log->error) /* not EOF */
+        {
+          *errmsg= "I/O error reading event at position 4";
+          goto err;
+        }
+        break;
+      }
+      else if (ev->get_type_code() == FORMAT_DESCRIPTION_EVENT)
+      {
+        DBUG_PRINT("info",("found Format_description_log_event"));
+        delete rli->relay_log.description_event_for_exec;
+        rli->relay_log.description_event_for_exec= (Format_description_log_event*) ev;
+        /*
+          As ev was returned by read_log_event, it has passed is_valid(), so
+          my_malloc() in ctor worked, no need to check again.
+        */
+        /*
+          Ok, we found a Format_description event. But it is not sure that this
+          describes the whole relay log; indeed, one can have this sequence
+          (starting from position 4):
+          Format_desc (of slave)
+          Rotate (of master)
+          Format_desc (of slave)
+          So the Format_desc which really describes the rest of the relay log is
+          the 3rd event (it can't be further than that, because we rotate the
+          relay log when we queue a Rotate event from the master).
+          But what describes the Rotate is the first Format_desc.
+          So what we do is:
+          go on searching for Format_description events, until you exceed the
+          position (argument 'pos') or until you find another event than Rotate
+          or Format_desc.
+        */
+      }
+      else 
+      {
+        DBUG_PRINT("info",("found event of another type=%d",
+                           ev->get_type_code()));
+        look_for_description_event= (ev->get_type_code() == ROTATE_EVENT);
+        delete ev;
+      }
+    }
     my_b_seek(rli->cur_log,(off_t)pos);
+#ifndef DBUG_OFF
+  {
+    char llbuf1[22], llbuf2[22];
+    DBUG_PRINT("info", ("my_b_tell(rli->cur_log)=%s rli->event_relay_log_pos=%s",
+                        llstr(my_b_tell(rli->cur_log),llbuf1), 
+                        llstr(rli->event_relay_log_pos,llbuf2)));
+  }
+#endif
+
+  }
 
 err:
   /*
@@ -315,6 +416,8 @@ err:
 
   if (need_data_lock)
     pthread_mutex_unlock(&rli->data_lock);
+  if (!rli->relay_log.description_event_for_exec->is_valid() && !*errmsg)
+    *errmsg= "Invalid Format_description log event; could be out of memory";
 
   DBUG_RETURN ((*errmsg) ? 1 : 0);
 }
@@ -360,16 +463,15 @@ void init_slave_skip_errors(const char* arg)
 }
 
 
-void st_relay_log_info::inc_group_relay_log_pos(ulonglong val,
-                                                ulonglong log_pos,
-                                                bool skip_lock)
+void st_relay_log_info::inc_group_relay_log_pos(ulonglong log_pos,
+						bool skip_lock)  
 {
   if (!skip_lock)
     pthread_mutex_lock(&data_lock);
-  inc_event_relay_log_pos(val);
+  inc_event_relay_log_pos();
   group_relay_log_pos= event_relay_log_pos;
   strmake(group_relay_log_name,event_relay_log_name,
-          sizeof(group_relay_log_name)-1);
+	  sizeof(group_relay_log_name)-1);
 
   notify_group_relay_log_name_update();
         
@@ -383,6 +485,28 @@ void st_relay_log_info::inc_group_relay_log_pos(ulonglong val,
     not advance as it should on the non-transactional slave (it advances by
     big leaps, whereas it should advance by small leaps).
   */
+  /*
+    In 4.x we used the event's len to compute the positions here. This is
+    wrong if the event was 3.23/4.0 and has been converted to 5.0, because
+    then the event's len is not what is was in the master's binlog, so this
+    will make a wrong group_master_log_pos (yes it's a bug in 3.23->4.0
+    replication: Exec_master_log_pos is wrong). Only way to solve this is to
+    have the original offset of the end of the event the relay log. This is
+    what we do in 5.0: log_pos has become "end_log_pos" (because the real use
+    of log_pos in 4.0 was to compute the end_log_pos; so better to store
+    end_log_pos instead of begin_log_pos.
+    If we had not done this fix here, the problem would also have appeared
+    when the slave and master are 5.0 but with different event length (for
+    example the slave is more recent than the master and features the event
+    UID). It would give false MASTER_POS_WAIT, false Exec_master_log_pos in
+    SHOW SLAVE STATUS, and so the user would do some CHANGE MASTER using this
+    value which would lead to badly broken replication.
+    Even the relay_log_pos will be corrupted in this case, because the len is
+    the relay log is not "val".
+    With the end_log_pos solution, we avoid computations involving lengthes.
+  */
+  DBUG_PRINT("info", ("log_pos=%lld group_master_log_pos=%lld",
+		      log_pos,group_master_log_pos));
   if (log_pos) // 3.23 binlogs don't have log_posx
   {
 #if MYSQL_VERSION_ID < 50000
@@ -396,10 +520,10 @@ void st_relay_log_info::inc_group_relay_log_pos(ulonglong val,
       Yes this is a hack but it's just to make 3.23->4.x replication work;
       3.23->5.0 replication is working much better.
     */
-    group_master_log_pos= log_pos + val -
+    group_master_log_pos= log_pos -
       (mi->old_format ? (LOG_EVENT_HEADER_LEN - OLD_HEADER_LEN) : 0);
 #else
-    group_master_log_pos= log_pos+ val;
+    group_master_log_pos= log_pos;
 #endif /* MYSQL_VERSION_ID < 5000 */
   }
   pthread_cond_broadcast(&data_cond);
@@ -481,13 +605,15 @@ int purge_relay_logs(RELAY_LOG_INFO* rli, THD *thd, bool just_reset,
 	  sizeof(rli->group_relay_log_name)-1);
   strmake(rli->event_relay_log_name, rli->relay_log.get_log_fname(),
  	  sizeof(rli->event_relay_log_name)-1);
-  // Just first log with magic number and nothing else
-  rli->log_space_total= BIN_LOG_HEADER_SIZE;
   rli->group_relay_log_pos= rli->event_relay_log_pos= BIN_LOG_HEADER_SIZE;
-  rli->relay_log.reset_bytes_written();
+  if (count_relay_log_space(rli))
+  {
+    *errmsg= "Error counting relay log space";
+    goto err;
+  }
   if (!just_reset)
     error= init_relay_log_pos(rli, rli->group_relay_log_name, rli->group_relay_log_pos,
-  			      0 /* do not need data lock */, errmsg);
+  			      0 /* do not need data lock */, errmsg, 0);
   
 err:
 #ifndef DBUG_OFF
@@ -636,7 +762,7 @@ int start_slave_thread(pthread_handler h_func, pthread_mutex_t *start_lock,
       if (thd->killed)
       {
 	pthread_mutex_unlock(cond_lock);
-	DBUG_RETURN(ER_SERVER_SHUTDOWN);
+	DBUG_RETURN(thd->killed_errno());
       }
     }
   }
@@ -756,6 +882,11 @@ static TABLE_RULE_ENT* find_wild(DYNAMIC_ARRAY *a, const char* key, int len)
     second call will make the decision (because
     all_tables_not_ok() = !tables_ok(1st_list) && !tables_ok(2nd_list)).
 
+    Thought which arose from a question of a big customer "I want to include all
+    tables like "abc.%" except the "%.EFG"". This can't be done now. If we
+    supported Perl regexps we could do it with this pattern: /^abc\.(?!EFG)/
+    (I could not find an equivalent in the regex library MySQL uses).
+
   RETURN VALUES
     0           should not be logged/replicated
     1           should be logged/replicated                  
@@ -1162,29 +1293,86 @@ static int init_intvar_from_file(int* var, IO_CACHE* f, int default_val)
   return 1;
 }
 
+/*
+  Note that we rely on the master's version (3.23, 4.0.14 etc) instead of
+  relying on the binlog's version. This is not perfect: imagine an upgrade
+  of the master without waiting that all slaves are in sync with the master;
+  then a slave could be fooled about the binlog's format. This is what happens
+  when people upgrade a 3.23 master to 4.0 without doing RESET MASTER: 4.0
+  slaves are fooled. So we do this only to distinguish between 3.23 and more
+  recent masters (it's too late to change things for 3.23).
+  
+  RETURNS
+  0       ok
+  1       error
+*/
 
 static int get_master_version_and_clock(MYSQL* mysql, MASTER_INFO* mi)
 {
   const char* errmsg= 0;
-  
+
   /*
-    Note the following switch will bug when we have MySQL branch 30 ;)
+    Free old description_event_for_queue (that is needed if we are in
+    a reconnection).
   */
-  switch (*mysql->server_version) {
-  case '3':
-    mi->old_format = 
-      (strncmp(mysql->server_version, "3.23.57", 7) < 0) /* < .57 */ ?
-      BINLOG_FORMAT_323_LESS_57 : 
-      BINLOG_FORMAT_323_GEQ_57 ;
-    break;
-  case '4':
-    mi->old_format = BINLOG_FORMAT_CURRENT;
-    break;
-  default:
-    /* 5.0 is not supported */
-    errmsg = "Master reported an unrecognized MySQL version. Note that 4.1 \
-slaves can't replicate a 5.0 or newer master.";
-    break;
+  delete mi->rli.relay_log.description_event_for_queue;
+  mi->rli.relay_log.description_event_for_queue= 0;
+  
+  if (!my_isdigit(&my_charset_bin,*mysql->server_version))
+    errmsg = "Master reported unrecognized MySQL version";
+  else
+  {
+    /*
+      Note the following switch will bug when we have MySQL branch 30 ;)
+    */
+    switch (*mysql->server_version) 
+    {
+    case '0':
+    case '1':
+    case '2':
+      errmsg = "Master reported unrecognized MySQL version";
+      break;
+    case '3':
+      mi->rli.relay_log.description_event_for_queue= new
+        Format_description_log_event(1, mysql->server_version); 
+      break;
+    case '4':
+      mi->rli.relay_log.description_event_for_queue= new
+        Format_description_log_event(3, mysql->server_version); 
+      break;
+    default: 
+      /*
+        Master is MySQL >=5.0. Give a default Format_desc event, so that we can
+        take the early steps (like tests for "is this a 3.23 master") which we
+        have to take before we receive the real master's Format_desc which will
+        override this one. Note that the Format_desc we create below is garbage
+        (it has the format of the *slave*); it's only good to help know if the
+        master is 3.23, 4.0, etc.
+      */
+      mi->rli.relay_log.description_event_for_queue= new
+        Format_description_log_event(4, mysql->server_version); 
+      break;
+    }
+  }
+  
+  /* 
+     This does not mean that a 5.0 slave will be able to read a 6.0 master; but
+     as we don't know yet, we don't want to forbid this for now. If a 5.0 slave
+     can't read a 6.0 master, this will show up when the slave can't read some
+     events sent by the master, and there will be error messages.
+  */
+  
+  if (errmsg)
+  {
+    sql_print_error(errmsg);
+    return 1;
+  }
+
+  /* as we are here, we tried to allocate the event */
+  if (!mi->rli.relay_log.description_event_for_queue)
+  {
+    sql_print_error("Slave I/O thread failed to create a default Format_description_log_event");
+    return 1;
   }
 
   /*
@@ -1522,7 +1710,7 @@ int init_relay_log_info(RELAY_LOG_INFO* rli, const char* info_fname)
   if (open_log(&rli->relay_log, glob_hostname, opt_relay_logname,
 	       "-relay-bin", opt_relaylog_index_name,
 	       LOG_BIN, 1 /* read_append cache */,
-	       1 /* no auto events */,
+	       0 /* starting from 5.0 we want relay logs to have auto events */,
                max_relay_log_size ? max_relay_log_size : max_binlog_size))
   {
     pthread_mutex_unlock(&rli->data_lock);
@@ -1557,7 +1745,7 @@ file '%s', errno %d)", fname, my_errno);
 
     /* Init relay log with first entry in the relay index file */
     if (init_relay_log_pos(rli,NullS,BIN_LOG_HEADER_SIZE,0 /* no data lock */,
-			   &msg))
+			   &msg, 0))
     {
       sql_print_error("Failed to open the relay log 'FIRST' (relay_log_pos 4)");
       goto err;
@@ -1622,7 +1810,7 @@ Failed to open the existing relay log info file '%s' (errno %d)",
 			   rli->group_relay_log_name,
 			   rli->group_relay_log_pos,
 			   0 /* no data lock*/,
-			   &msg))
+			   &msg, 0))
     {
       char llbuf[22];
       sql_print_error("Failed to open the relay log '%s' (relay_log_pos %s)",
@@ -1631,8 +1819,18 @@ Failed to open the existing relay log info file '%s' (errno %d)",
       goto err;
     }
   }
-  DBUG_ASSERT(rli->event_relay_log_pos >= BIN_LOG_HEADER_SIZE);
-  DBUG_ASSERT(my_b_tell(rli->cur_log) == rli->event_relay_log_pos);
+
+#ifndef DBUG_OFF
+  {
+    char llbuf1[22], llbuf2[22];
+    DBUG_PRINT("info", ("my_b_tell(rli->cur_log)=%s rli->event_relay_log_pos=%s",
+                        llstr(my_b_tell(rli->cur_log),llbuf1), 
+                        llstr(rli->event_relay_log_pos,llbuf2)));
+    DBUG_ASSERT(rli->event_relay_log_pos >= BIN_LOG_HEADER_SIZE);
+    DBUG_ASSERT(my_b_tell(rli->cur_log) == rli->event_relay_log_pos);
+  }
+#endif
+
   /*
     Now change the cache from READ to WRITE - must do this
     before flush_relay_log_info
@@ -2516,7 +2714,7 @@ err:
   pthread_mutex_unlock(&data_lock);
   DBUG_PRINT("exit",("killed: %d  abort: %d  slave_running: %d \
 improper_arguments: %d  timed_out: %d",
-                     (int) thd->killed,
+                     thd->killed_errno(),
                      (int) (init_abort_pos_wait != abort_pos_wait),
                      (int) slave_running,
                      (int) (error == -2),
@@ -2529,6 +2727,11 @@ improper_arguments: %d  timed_out: %d",
   DBUG_RETURN( error ? error : event_count );
 }
 
+void set_slave_thread_options(THD* thd)
+{
+  thd->options = ((opt_log_slave_updates) ? OPTION_BIN_LOG:0) |
+    OPTION_AUTO_IS_NULL;
+}
 
 /*
   init_slave_thread()
@@ -2546,6 +2749,7 @@ static int init_slave_thread(THD* thd, SLAVE_THD_TYPE thd_type)
   thd->master_access= ~0;
   thd->priv_user = 0;
   thd->slave_thread = 1;
+  set_slave_thread_options(thd);
   /* 
      It's nonsense to constrain the slave threads with max_join_size; if a
      query succeeded on master, we HAVE to execute it. So set
@@ -2807,13 +3011,14 @@ bool st_relay_log_info::is_until_satisfied()
   
   if (until_log_names_cmp_result == UNTIL_LOG_NAMES_CMP_UNKNOWN)
   {
-    /* 
-       We have no cached comaprison results so we should compare log names
-       and cache result
+    /*
+      We have no cached comparison results so we should compare log names
+      and cache result.
+      If we are after RESET SLAVE, and the SQL slave thread has not processed
+      any event yet, it could be that group_master_log_name is "". In that case,
+      just wait for more events (as there is no sensible comparison to do).
     */
 
-    DBUG_ASSERT(*log_name || log_pos == 0);
-    
     if (*log_name)
     {
       const char *basename= log_name + dirname_length(log_name);
@@ -2888,28 +3093,63 @@ static int exec_relay_log_event(THD* thd, RELAY_LOG_INFO* rli)
     int exec_res;
 
     /*
-      Skip queries originating from this server or number of
-      queries specified by the user in slave_skip_counter
-      We can't however skip event's that has something to do with the
+      Queries originating from this server must be skipped.
+      Low-level events (Format_desc, Rotate, Stop) from this server
+      must also be skipped. But for those we don't want to modify
+      group_master_log_pos, because these events did not exist on the master.
+      Format_desc is not completely skipped.
+      Skip queries specified by the user in slave_skip_counter.
+      We can't however skip events that has something to do with the
       log files themselves.
+      Filtering on own server id is extremely important, to ignore execution of
+      events created by the creation/rotation of the relay log (remember that
+      now the relay log starts with its Format_desc, has a Rotate etc).
     */
-
-    if ((ev->server_id == (uint32) ::server_id && !replicate_same_server_id) ||
-	(rli->slave_skip_counter && type_code != ROTATE_EVENT))
+    
+    DBUG_PRINT("info",("type_code=%d, server_id=%d",type_code,ev->server_id));
+    
+    if ((ev->server_id == (uint32) ::server_id &&
+         !replicate_same_server_id &&
+         type_code != FORMAT_DESCRIPTION_EVENT) ||
+ 	(rli->slave_skip_counter && 
+         type_code != ROTATE_EVENT && type_code != STOP_EVENT &&
+         type_code != START_EVENT_V3 && type_code!= FORMAT_DESCRIPTION_EVENT))
     {
-      /* TODO: I/O thread should not even log events with the same server id */
-      rli->inc_group_relay_log_pos(ev->get_event_len(),
-		   type_code != STOP_EVENT ? ev->log_pos : LL(0),
-		   1/* skip lock*/);
-      flush_relay_log_info(rli);
-
+      DBUG_PRINT("info", ("event skipped"));
+      if (thd->options & OPTION_BEGIN)
+        rli->inc_event_relay_log_pos();
+      else
+      {
+        rli->inc_group_relay_log_pos((type_code == ROTATE_EVENT || 
+                                      type_code == STOP_EVENT ||
+                                      type_code == FORMAT_DESCRIPTION_EVENT) ?
+                                     LL(0) : ev->log_pos,
+                                     1/* skip lock*/);
+        flush_relay_log_info(rli);
+      }
+      
       /*
-	Protect against common user error of setting the counter to 1
-	instead of 2 while recovering from an failed auto-increment insert
+ 	Protect against common user error of setting the counter to 1
+ 	instead of 2 while recovering from an insert which used auto_increment,
+ 	rand or user var.
       */
       if (rli->slave_skip_counter && 
-	  !((type_code == INTVAR_EVENT || type_code == STOP_EVENT) &&
-	    rli->slave_skip_counter == 1))
+ 	  !((type_code == INTVAR_EVENT ||
+             type_code == RAND_EVENT || 
+             type_code == USER_VAR_EVENT) &&
+ 	    rli->slave_skip_counter == 1) &&
+          /*
+            The events from ourselves which have something to do with the relay
+            log itself must be skipped, true, but they mustn't decrement
+            rli->slave_skip_counter, because the user is supposed to not see
+            these events (they are not in the master's binlog) and if we
+            decremented, START SLAVE would for example decrement when it sees
+            the Rotate, so the event which the user probably wanted to skip
+            would not be skipped.
+          */
+          !(ev->server_id == (uint32) ::server_id &&
+            (type_code == ROTATE_EVENT || type_code == STOP_EVENT ||
+             type_code == START_EVENT_V3 || type_code == FORMAT_DESCRIPTION_EVENT)))
         --rli->slave_skip_counter;
       pthread_mutex_unlock(&rli->data_lock);
       delete ev;     
@@ -2925,7 +3165,16 @@ static int exec_relay_log_event(THD* thd, RELAY_LOG_INFO* rli)
     ev->thd = thd;
     exec_res = ev->exec_event(rli);
     DBUG_ASSERT(rli->sql_thd==thd);
-    delete ev;
+    /* 
+       Format_description_log_event should not be deleted because it will be
+       used to read info about the relay log's format; it will be deleted when
+       the SQL thread does not need it, i.e. when this thread terminates.
+    */
+    if (ev->get_type_code() != FORMAT_DESCRIPTION_EVENT)
+    {
+      DBUG_PRINT("info", ("Deleting the event after it has been executed"));
+      delete ev;
+    }
     return exec_res;
   }
   else
@@ -3026,7 +3275,8 @@ connected:
   thd->proc_info = "Checking master version";
   if (get_master_version_and_clock(mysql, mi))
     goto err;
-  if (!mi->old_format)
+
+  if (mi->rli.relay_log.description_event_for_queue->binlog_version > 1)
   {
     /*
       Register ourselves with the master.
@@ -3228,6 +3478,9 @@ err:
   pthread_mutex_lock(&mi->run_lock);
   mi->slave_running = 0;
   mi->io_thd = 0;
+  /* Forget the relay log's format */
+  delete mi->rli.relay_log.description_event_for_queue;
+  mi->rli.relay_log.description_event_for_queue= 0;
   // TODO: make rpl_status part of MASTER_INFO
   change_rpl_status(RPL_ACTIVE_SLAVE,RPL_IDLE_SLAVE);
   mi->abort_slave = 0; // TODO: check if this is needed
@@ -3323,15 +3576,38 @@ slave_begin:
   if (init_relay_log_pos(rli,
 			 rli->group_relay_log_name,
 			 rli->group_relay_log_pos,
-			 1 /*need data lock*/, &errmsg))
+			 1 /*need data lock*/, &errmsg,
+                         1 /*look for a description_event*/))
   {
     sql_print_error("Error initializing relay log position: %s",
 		    errmsg);
     goto err;
   }
   THD_CHECK_SENTRY(thd);
-  DBUG_ASSERT(rli->event_relay_log_pos >= BIN_LOG_HEADER_SIZE);
-  DBUG_ASSERT(my_b_tell(rli->cur_log) == rli->event_relay_log_pos);
+#ifndef DBUG_OFF
+  {
+    char llbuf1[22], llbuf2[22];
+    DBUG_PRINT("info", ("my_b_tell(rli->cur_log)=%s rli->event_relay_log_pos=%s",
+                        llstr(my_b_tell(rli->cur_log),llbuf1), 
+                        llstr(rli->event_relay_log_pos,llbuf2)));
+    DBUG_ASSERT(rli->event_relay_log_pos >= BIN_LOG_HEADER_SIZE);
+    /*
+      Wonder if this is correct. I (Guilhem) wonder if my_b_tell() returns the
+      correct position when it's called just after my_b_seek() (the questionable
+      stuff is those "seek is done on next read" comments in the my_b_seek()
+      source code).
+      The crude reality is that this assertion randomly fails whereas
+      replication seems to work fine. And there is no easy explanation why it
+      fails (as we my_b_seek(rli->event_relay_log_pos) at the very end of
+      init_relay_log_pos() called above). Maybe the assertion would be
+      meaningful if we held rli->data_lock between the my_b_seek() and the
+      DBUG_ASSERT().
+    */
+#ifdef SHOULD_BE_CHECKED
+    DBUG_ASSERT(my_b_tell(rli->cur_log) == rli->event_relay_log_pos);
+#endif
+  }
+#endif
   DBUG_ASSERT(rli->sql_thd == thd);
 
   DBUG_PRINT("master_info",("log_file_name: %s  position: %s",
@@ -3382,7 +3658,12 @@ the slave SQL thread with \"SLAVE START\". We stopped at log \
 
  err:
   VOID(pthread_mutex_lock(&LOCK_thread_count));
-  thd->query = thd->db = 0; // extra safety
+  /*
+    Some extra safety, which should not been needed (normally, event deletion
+    should already have done these assignments (each event which sets these
+    variables is supposed to set them to 0 before terminating)).
+  */
+  thd->query= thd->db= thd->catalog= 0; 
   thd->query_length = 0;
   VOID(pthread_mutex_unlock(&LOCK_thread_count));
   thd->proc_info = "Waiting for slave mutex on exit";
@@ -3392,11 +3673,9 @@ the slave SQL thread with \"SLAVE START\". We stopped at log \
   DBUG_ASSERT(rli->slave_running == 1); // tracking buffer overrun
   /* When master_pos_wait() wakes up it will check this and terminate */
   rli->slave_running= 0; 
-  /* 
-     Going out of the transaction. Necessary to mark it, in case the user
-     restarts replication from a non-transactional statement (with CHANGE
-     MASTER).
-  */
+  /* Forget the relay log's format */
+  delete rli->relay_log.description_event_for_exec;
+  rli->relay_log.description_event_for_exec= 0;
   /* Wake up master_pos_wait() */
   pthread_mutex_unlock(&rli->data_lock);
   DBUG_PRINT("info",("Signaling possibly waiting master_pos_wait() functions"));
@@ -3494,7 +3773,7 @@ static int process_io_create_file(MASTER_INFO* mi, Create_file_log_event* cev)
         if (unlikely(cev_not_written))
 	  break;
 	Execute_load_log_event xev(thd,0,0);
-	xev.log_pos = mi->master_log_pos;
+	xev.log_pos = cev->log_pos;
 	if (unlikely(mi->rli.relay_log.append(&xev)))
 	{
 	  sql_print_error("Slave I/O: error writing Exec_load event to \
@@ -3508,7 +3787,6 @@ relay log");
       {
 	cev->block = (char*)net->read_pos;
 	cev->block_len = num_bytes;
-	cev->log_pos = mi->master_log_pos;
 	if (unlikely(mi->rli.relay_log.append(cev)))
 	{
 	  sql_print_error("Slave I/O: error writing Create_file event to \
@@ -3522,7 +3800,7 @@ relay log");
       {
 	aev.block = (char*)net->read_pos;
 	aev.block_len = num_bytes;
-	aev.log_pos = mi->master_log_pos;
+	aev.log_pos = cev->log_pos;
 	if (unlikely(mi->rli.relay_log.append(&aev)))
 	{
 	  sql_print_error("Slave I/O: error writing Append_block event to \
@@ -3550,6 +3828,7 @@ err:
   DESCRIPTION
     Updates the master info with the place in the next binary
     log where we should start reading.
+    Rotate the relay log to avoid mixed-format relay logs.
 
   NOTES
     We assume we already locked mi->data_lock
@@ -3580,21 +3859,30 @@ static int process_io_rotate(MASTER_INFO *mi, Rotate_log_event *rev)
   if (disconnect_slave_event_count)
     events_till_disconnect++;
 #endif
+  /*
+    If description_event_for_queue is format <4, there is conversion in the
+    relay log to the slave's format (4). And Rotate can mean upgrade or
+    nothing. If upgrade, it's to 5.0 or newer, so we will get a Format_desc, so
+    no need to reset description_event_for_queue now. And if it's nothing (same
+    master version as before), no need (still using the slave's format).
+  */
+  if (mi->rli.relay_log.description_event_for_queue->binlog_version >= 4)
+  {
+    delete mi->rli.relay_log.description_event_for_queue;
+    /* start from format 3 (MySQL 4.0) again */
+    mi->rli.relay_log.description_event_for_queue= new
+      Format_description_log_event(3);
+  }
+
+  rotate_relay_log(mi); /* will take the right mutexes */
   DBUG_RETURN(0);
 }
 
-
 /*
-  queue_old_event()
-
-  Writes a 3.23 event to the relay log.
-
-  TODO: 
-    Test this code before release - it has to be tested on a separate
-    setup with 3.23 master 
+  Reads a 3.23 event and converts it to the slave's format. This code was copied
+  from MySQL 4.0.
 */
-
-static int queue_old_event(MASTER_INFO *mi, const char *buf,
+static int queue_binlog_ver_1_event(MASTER_INFO *mi, const char *buf,
 			   ulong event_len)
 {
   const char *errmsg = 0;
@@ -3602,7 +3890,7 @@ static int queue_old_event(MASTER_INFO *mi, const char *buf,
   bool ignore_event= 0;
   char *tmp_buf = 0;
   RELAY_LOG_INFO *rli= &mi->rli;
-  DBUG_ENTER("queue_old_event");
+  DBUG_ENTER("queue_binlog_ver_1_event");
 
   /*
     If we get Load event, we need to pass a non-reusable buffer
@@ -3634,7 +3922,7 @@ static int queue_old_event(MASTER_INFO *mi, const char *buf,
     connected to the master).
   */
   Log_event *ev = Log_event::read_log_event(buf,event_len, &errmsg,
-					    1 /*old format*/ );
+                                            mi->rli.relay_log.description_event_for_queue);
   if (unlikely(!ev))
   {
     sql_print_error("Read invalid event from master: '%s',\
@@ -3644,7 +3932,7 @@ static int queue_old_event(MASTER_INFO *mi, const char *buf,
     DBUG_RETURN(1);
   }
   pthread_mutex_lock(&mi->data_lock);
-  ev->log_pos = mi->master_log_pos;
+  ev->log_pos= mi->master_log_pos; /* 3.23 events don't contain log_pos */
   switch (ev->get_type_code()) {
   case STOP_EVENT:
     ignore_event= 1;
@@ -3669,13 +3957,11 @@ static int queue_old_event(MASTER_INFO *mi, const char *buf,
   {
     /* We come here when and only when tmp_buf != 0 */
     DBUG_ASSERT(tmp_buf);
+    inc_pos=event_len;
+    ev->log_pos+= inc_pos;
     int error = process_io_create_file(mi,(Create_file_log_event*)ev);
     delete ev;
-    /*
-      We had incremented event_len, but now when it is used to calculate the
-      position in the master's log, we must use the original value.
-    */
-    mi->master_log_pos += --event_len;
+    mi->master_log_pos += inc_pos;
     DBUG_PRINT("info", ("master_log_pos: %d", (ulong) mi->master_log_pos));
     pthread_mutex_unlock(&mi->data_lock);
     my_free((char*)tmp_buf, MYF(0));
@@ -3687,6 +3973,12 @@ static int queue_old_event(MASTER_INFO *mi, const char *buf,
   }
   if (likely(!ignore_event))
   {
+    if (ev->log_pos) 
+      /* 
+         Don't do it for fake Rotate events (see comment in
+      Log_event::Log_event(const char* buf...) in log_event.cc).
+      */
+      ev->log_pos+= event_len; /* make log_pos be the pos of the end of the event */
     if (unlikely(rli->relay_log.append(ev)))
     {
       delete ev;
@@ -3702,10 +3994,98 @@ static int queue_old_event(MASTER_INFO *mi, const char *buf,
   DBUG_RETURN(0);
 }
 
+/*
+  Reads a 4.0 event and converts it to the slave's format. This code was copied
+  from queue_binlog_ver_1_event(), with some affordable simplifications.
+*/
+static int queue_binlog_ver_3_event(MASTER_INFO *mi, const char *buf,
+			   ulong event_len)
+{
+  const char *errmsg = 0;
+  ulong inc_pos;
+  char *tmp_buf = 0;
+  RELAY_LOG_INFO *rli= &mi->rli;
+  DBUG_ENTER("queue_binlog_ver_3_event");
+
+  /* read_log_event() will adjust log_pos to be end_log_pos */
+  Log_event *ev = Log_event::read_log_event(buf,event_len, &errmsg,
+                                            mi->rli.relay_log.description_event_for_queue);
+  if (unlikely(!ev))
+  {
+    sql_print_error("Read invalid event from master: '%s',\
+ master could be corrupt but a more likely cause of this is a bug",
+		    errmsg);
+    my_free((char*) tmp_buf, MYF(MY_ALLOW_ZERO_PTR));
+    DBUG_RETURN(1);
+  }
+  pthread_mutex_lock(&mi->data_lock);
+  switch (ev->get_type_code()) {
+  case STOP_EVENT:
+    goto err;
+  case ROTATE_EVENT:
+    if (unlikely(process_io_rotate(mi,(Rotate_log_event*)ev)))
+    {
+      delete ev;
+      pthread_mutex_unlock(&mi->data_lock);
+      DBUG_RETURN(1);
+    }
+    inc_pos= 0;
+    break;
+  default:
+    inc_pos= event_len;
+    break;
+  }
+  if (unlikely(rli->relay_log.append(ev)))
+  {
+    delete ev;
+    pthread_mutex_unlock(&mi->data_lock);
+    DBUG_RETURN(1);
+  }
+  rli->relay_log.harvest_bytes_written(&rli->log_space_total);
+  delete ev;
+  mi->master_log_pos+= inc_pos;
+err:
+  DBUG_PRINT("info", ("master_log_pos: %d", (ulong) mi->master_log_pos));
+  pthread_mutex_unlock(&mi->data_lock);
+  DBUG_RETURN(0);
+}
+
+/*
+  queue_old_event()
+
+  Writes a 3.23 or 4.0 event to the relay log, after converting it to the 5.0
+  (exactly, slave's) format. To do the conversion, we create a 5.0 event from
+  the 3.23/4.0 bytes, then write this event to the relay log.
+
+  TODO: 
+    Test this code before release - it has to be tested on a separate
+    setup with 3.23 master or 4.0 master
+*/
+
+static int queue_old_event(MASTER_INFO *mi, const char *buf,
+			   ulong event_len)
+{
+  switch (mi->rli.relay_log.description_event_for_queue->binlog_version)
+  {
+  case 1:
+      return queue_binlog_ver_1_event(mi,buf,event_len);
+  case 3:
+      return queue_binlog_ver_3_event(mi,buf,event_len);
+  default: /* unsupported format; eg version 2 */
+    DBUG_PRINT("info",("unsupported binlog format %d in queue_old_event()",
+                       mi->rli.relay_log.description_event_for_queue->binlog_version));  
+    return 1;
+  }
+}
 
 /*
   queue_event()
 
+  If the event is 3.23/4.0, passes it to queue_old_event() which will convert
+  it. Otherwise, writes a 5.0 (or newer) event to the relay log. Then there is
+  no format conversion, it's pure read/write of bytes.
+  So a 5.0.0 slave's relay log can contain events in the slave's format or in
+  any >=5.0.0 format.
 */
 
 int queue_event(MASTER_INFO* mi,const char* buf, ulong event_len)
@@ -3715,7 +4095,8 @@ int queue_event(MASTER_INFO* mi,const char* buf, ulong event_len)
   RELAY_LOG_INFO *rli= &mi->rli;
   DBUG_ENTER("queue_event");
 
-  if (mi->old_format)
+  if (mi->rli.relay_log.description_event_for_queue->binlog_version<4 &&
+      buf[EVENT_TYPE_OFFSET] != FORMAT_DESCRIPTION_EVENT /* a way to escape */)
     DBUG_RETURN(queue_old_event(mi,buf,event_len));
 
   pthread_mutex_lock(&mi->data_lock);
@@ -3742,7 +4123,7 @@ int queue_event(MASTER_INFO* mi,const char* buf, ulong event_len)
     goto err;
   case ROTATE_EVENT:
   {
-    Rotate_log_event rev(buf,event_len,0);
+    Rotate_log_event rev(buf,event_len,mi->rli.relay_log.description_event_for_queue); 
     if (unlikely(process_io_rotate(mi,&rev)))
     {
       error= 1;
@@ -3755,6 +4136,42 @@ int queue_event(MASTER_INFO* mi,const char* buf, ulong event_len)
     inc_pos= 0;
     break;
   }
+  case FORMAT_DESCRIPTION_EVENT:
+  {
+    /*
+      Create an event, and save it (when we rotate the relay log, we will have
+      to write this event again).
+    */
+    /*
+      We are the only thread which reads/writes description_event_for_queue. The
+      relay_log struct does not move (though some members of it can change), so
+      we needn't any lock (no rli->data_lock, no log lock).
+    */
+    Format_description_log_event* tmp;
+    const char* errmsg;
+    if (!(tmp= (Format_description_log_event*)
+          Log_event::read_log_event(buf, event_len, &errmsg,
+                                    mi->rli.relay_log.description_event_for_queue)))
+    {
+      error= 2;
+      goto err;
+    }
+    delete mi->rli.relay_log.description_event_for_queue;
+    mi->rli.relay_log.description_event_for_queue= tmp;
+    /* 
+       Though this does some conversion to the slave's format, this will
+       preserve the master's binlog format version, and number of event types. 
+    */
+    /* 
+       If the event was not requested by the slave (the slave did not ask for
+       it), i.e. has end_log_pos=0, we do not increment mi->master_log_pos 
+    */
+    inc_pos= uint4korr(buf+LOG_POS_OFFSET) ? event_len : 0;
+    DBUG_PRINT("info",("binlog format is now %d",
+                       mi->rli.relay_log.description_event_for_queue->binlog_version));  
+
+  }
+  break;
   default:
     inc_pos= event_len;
     break;
@@ -3781,23 +4198,32 @@ int queue_event(MASTER_INFO* mi,const char* buf, ulong event_len)
       We still want to increment, so that we won't re-read this event from the
       master if the slave IO thread is now stopped/restarted (more efficient if
       the events we are ignoring are big LOAD DATA INFILE).
+      But events which were generated by this slave and which do not exist in
+      the master's binlog (i.e. Format_desc, Rotate & Stop) should not increment
+      mi->master_log_pos.
     */
-    mi->master_log_pos+= inc_pos;
+    if (buf[EVENT_TYPE_OFFSET]!=FORMAT_DESCRIPTION_EVENT &&
+        buf[EVENT_TYPE_OFFSET]!=ROTATE_EVENT &&
+        buf[EVENT_TYPE_OFFSET]!=STOP_EVENT)
+      mi->master_log_pos+= inc_pos;
     DBUG_PRINT("info", ("master_log_pos: %d, event originating from the same server, ignored", (ulong) mi->master_log_pos));
   }  
   else
   {
     /* write the event to the relay log */
-    if (likely(!(error= rli->relay_log.appendv(buf,event_len,0))))
+    if (likely(!(rli->relay_log.appendv(buf,event_len,0))))
     {
       mi->master_log_pos+= inc_pos;
       DBUG_PRINT("info", ("master_log_pos: %d", (ulong) mi->master_log_pos));
       rli->relay_log.harvest_bytes_written(&rli->log_space_total);
     }
+    else
+      error=3;
   }
 
 err:
   pthread_mutex_unlock(&mi->data_lock);
+  DBUG_PRINT("info", ("error=%d", error));
   DBUG_RETURN(error);
 }
 
@@ -3822,6 +4248,7 @@ void end_relay_log_info(RELAY_LOG_INFO* rli)
   }
   rli->inited = 0;
   rli->relay_log.close(LOG_CLOSE_INDEX | LOG_CLOSE_STOP_EVENT);
+  rli->relay_log.harvest_bytes_written(&rli->log_space_total);
   /*
     Delete the slave's temporary tables from memory.
     In the future there will be other actions than this, to ensure persistance
@@ -4043,6 +4470,7 @@ static IO_CACHE *reopen_relay_log(RELAY_LOG_INFO *rli, const char **errmsg)
     relay_log_pos	Current log pos
     pending		Number of bytes already processed from the event
   */
+  rli->event_relay_log_pos= max(rli->event_relay_log_pos, BIN_LOG_HEADER_SIZE);
   my_b_seek(cur_log,rli->event_relay_log_pos);
   DBUG_RETURN(cur_log);
 }
@@ -4101,28 +4529,40 @@ Log_event* next_event(RELAY_LOG_INFO* rli)
 	hot_log=0;				// Using old binary log
       }
     }
+
 #ifndef DBUG_OFF
     {
+      /* This is an assertion which sometimes fails, let's try to track it */
       char llbuf1[22], llbuf2[22];
-      DBUG_ASSERT(my_b_tell(cur_log) >= BIN_LOG_HEADER_SIZE);
-      /*
-	The next assertion sometimes (very rarely) fails, let's try to track
-	it
-      */
-      DBUG_PRINT("info", ("\
-Before assert, my_b_tell(cur_log)=%s  rli->event_relay_log_pos=%s",
+      DBUG_PRINT("info", ("my_b_tell(cur_log)=%s rli->event_relay_log_pos=%s",
                           llstr(my_b_tell(cur_log),llbuf1), 
-                          llstr(rli->group_relay_log_pos,llbuf2)));
-       DBUG_ASSERT(my_b_tell(cur_log) == rli->event_relay_log_pos);
+                          llstr(rli->event_relay_log_pos,llbuf2)));
+      DBUG_ASSERT(my_b_tell(cur_log) >= BIN_LOG_HEADER_SIZE);
+      DBUG_ASSERT(my_b_tell(cur_log) == rli->event_relay_log_pos);
     }
 #endif
     /*
       Relay log is always in new format - if the master is 3.23, the
-      I/O thread will convert the format for us
+      I/O thread will convert the format for us.
+      A problem: the description event may be in a previous relay log. So if the
+      slave has been shutdown meanwhile, we would have to look in old relay
+      logs, which may even have been deleted. So we need to write this
+      description event at the beginning of the relay log.
+      When the relay log is created when the I/O thread starts, easy: the master
+      will send the description event and we will queue it.
+      But if the relay log is created by new_file(): then the solution is:
+      MYSQL_LOG::open() will write the buffered description event.
     */
-    if ((ev=Log_event::read_log_event(cur_log,0,(bool)0 /* new format */)))
+    if ((ev=Log_event::read_log_event(cur_log,0,
+                                      rli->relay_log.description_event_for_exec)))
+      
     {
       DBUG_ASSERT(thd==rli->sql_thd);
+      /*
+        read it while we have a lock, to avoid a mutex lock in
+        inc_event_relay_log_pos()
+      */
+      rli->future_event_relay_log_pos= my_b_tell(cur_log);
       if (hot_log)
 	pthread_mutex_unlock(log_lock);
       DBUG_RETURN(ev);
@@ -4340,8 +4780,9 @@ void rotate_relay_log(MASTER_INFO* mi)
   DBUG_ENTER("rotate_relay_log");
   RELAY_LOG_INFO* rli= &mi->rli;
 
-  lock_slave_threads(mi);
-  pthread_mutex_lock(&rli->data_lock);
+  /* We don't lock rli->run_lock. This would lead to deadlocks. */
+  pthread_mutex_lock(&mi->run_lock);
+
   /* 
      We need to test inited because otherwise, new_file() will attempt to lock
      LOCK_log, which may not be inited (if we're not a slave).
@@ -4370,8 +4811,7 @@ void rotate_relay_log(MASTER_INFO* mi)
   */
   rli->relay_log.harvest_bytes_written(&rli->log_space_total);
 end:
-  pthread_mutex_unlock(&rli->data_lock);
-  unlock_slave_threads(mi);
+  pthread_mutex_unlock(&mi->run_lock);
   DBUG_VOID_RETURN;
 }
 
diff --git a/sql/slave.h b/sql/slave.h
index 384436fdfcc..46fe58e1976 100644
--- a/sql/slave.h
+++ b/sql/slave.h
@@ -77,11 +77,6 @@ extern my_bool opt_log_slave_updates;
 extern ulonglong relay_log_space_limit;
 struct st_master_info;
 
-enum enum_binlog_formats {
-  BINLOG_FORMAT_CURRENT=0, /* 0 is important for easy 'if (mi->old_format)' */
-  BINLOG_FORMAT_323_LESS_57, 
-  BINLOG_FORMAT_323_GEQ_57 };
-
 /****************************************************************************
 
   Replication SQL Thread
@@ -183,6 +178,8 @@ typedef struct st_relay_log_info
   ulonglong group_relay_log_pos;
   char event_relay_log_name[FN_REFLEN];
   ulonglong event_relay_log_pos;
+  ulonglong future_event_relay_log_pos;
+
   /* 
      Original log name and position of the group we're currently executing
      (whose coordinates are group_relay_log_name/pos in the relay log)
@@ -286,12 +283,14 @@ typedef struct st_relay_log_info
       until_log_names_cmp_result= UNTIL_LOG_NAMES_CMP_UNKNOWN;
   }
   
-  inline void inc_event_relay_log_pos(ulonglong val)
+  inline void inc_event_relay_log_pos()
   {
-    event_relay_log_pos+= val;
+    event_relay_log_pos= future_event_relay_log_pos;
   }
 
-  void inc_group_relay_log_pos(ulonglong val, ulonglong log_pos, bool skip_lock=0);
+  void inc_group_relay_log_pos(ulonglong log_pos,
+			       bool skip_lock=0);
+
   int wait_for_pos(THD* thd, String* log_name, longlong log_pos, 
 		   longlong timeout);
   void close_temporary_tables();
@@ -368,7 +367,6 @@ typedef struct st_master_info
   int events_till_abort;
 #endif
   bool inited;
-  enum enum_binlog_formats old_format;
   volatile bool abort_slave, slave_running;
   volatile ulong slave_run_id;
   /* 
@@ -383,7 +381,7 @@ typedef struct st_master_info
   long clock_diff_with_master; 
   
   st_master_info()
-    :ssl(0), fd(-1),  io_thd(0), inited(0), old_format(BINLOG_FORMAT_CURRENT),
+    :ssl(0), fd(-1),  io_thd(0), inited(0),
      abort_slave(0),slave_running(0), slave_run_id(0)
   {
     host[0] = 0; user[0] = 0; password[0] = 0;
@@ -514,10 +512,12 @@ void lock_slave_threads(MASTER_INFO* mi);
 void unlock_slave_threads(MASTER_INFO* mi);
 void init_thread_mask(int* mask,MASTER_INFO* mi,bool inverse);
 int init_relay_log_pos(RELAY_LOG_INFO* rli,const char* log,ulonglong pos,
-		       bool need_data_lock, const char** errmsg);
+		       bool need_data_lock, const char** errmsg,
+                       bool look_for_description_event);
 
 int purge_relay_logs(RELAY_LOG_INFO* rli, THD *thd, bool just_reset,
 		     const char** errmsg);
+void set_slave_thread_options(THD* thd);
 void rotate_relay_log(MASTER_INFO* mi);
 
 extern "C" pthread_handler_decl(handle_slave_io,arg);
diff --git a/sql/sp.cc b/sql/sp.cc
new file mode 100644
index 00000000000..d70da1b1421
--- /dev/null
+++ b/sql/sp.cc
@@ -0,0 +1,1164 @@
+/* Copyright (C) 2002 MySQL AB
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
+
+
+#include "mysql_priv.h"
+#include "sql_acl.h"
+#include "sp.h"
+#include "sp_head.h"
+#include "sp_cache.h"
+
+static bool
+create_string(THD *thd, String *buf,
+	      int sp_type,
+	      sp_name *name,
+	      const char *params, ulong paramslen,
+	      const char *returns, ulong returnslen,
+	      const char *body, ulong bodylen,
+	      st_sp_chistics *chistics);
+
+/*
+ *
+ * DB storage of Stored PROCEDUREs and FUNCTIONs
+ *
+ */
+
+enum
+{
+  MYSQL_PROC_FIELD_DB = 0,
+  MYSQL_PROC_FIELD_NAME,
+  MYSQL_PROC_FIELD_TYPE,
+  MYSQL_PROC_FIELD_SPECIFIC_NAME,
+  MYSQL_PROC_FIELD_LANGUAGE,
+  MYSQL_PROC_FIELD_ACCESS,
+  MYSQL_PROC_FIELD_DETERMINISTIC,
+  MYSQL_PROC_FIELD_SECURITY_TYPE,
+  MYSQL_PROC_FIELD_PARAM_LIST,
+  MYSQL_PROC_FIELD_RETURNS,
+  MYSQL_PROC_FIELD_BODY,
+  MYSQL_PROC_FIELD_DEFINER,
+  MYSQL_PROC_FIELD_CREATED,
+  MYSQL_PROC_FIELD_MODIFIED,
+  MYSQL_PROC_FIELD_SQL_MODE,
+  MYSQL_PROC_FIELD_COMMENT,
+  MYSQL_PROC_FIELD_COUNT
+};
+
+bool mysql_proc_table_exists= 1;
+
+/* *opened=true means we opened ourselves */
+static int
+db_find_routine_aux(THD *thd, int type, sp_name *name,
+		    enum thr_lock_type ltype, TABLE **tablep, bool *opened)
+{
+  TABLE *table;
+  byte key[64+64+1];		// db, name, type
+  uint keylen;
+  DBUG_ENTER("db_find_routine_aux");
+  DBUG_PRINT("enter", ("type: %d name: %*s",
+		       type, name->m_name.length, name->m_name.str));
+
+  /*
+    Speed up things if mysql.proc doesn't exists
+    mysql_proc_table_exists is set when on creates a stored procedure
+    or on flush privileges
+  */
+  if (!mysql_proc_table_exists && ltype == TL_READ)
+    DBUG_RETURN(SP_OPEN_TABLE_FAILED);
+
+  // Put the key used to read the row together
+  keylen= name->m_db.length;
+  if (keylen > 64)
+    keylen= 64;
+  memcpy(key, name->m_db.str, keylen);
+  memset(key+keylen, (int)' ', 64-keylen); // Pad with space
+  keylen= name->m_name.length;
+  if (keylen > 64)
+    keylen= 64;
+  memcpy(key+64, name->m_name.str, keylen);
+  memset(key+64+keylen, (int)' ', 64-keylen); // Pad with space
+  key[128]= type;
+  keylen= sizeof(key);
+
+  for (table= thd->open_tables ; table ; table= table->next)
+    if (strcmp(table->table_cache_key, "mysql") == 0 &&
+	strcmp(table->real_name, "proc") == 0)
+      break;
+  if (table)
+    *opened= FALSE;
+  else
+  {
+    TABLE_LIST tables;
+
+    memset(&tables, 0, sizeof(tables));
+    tables.db= (char*)"mysql";
+    tables.real_name= tables.alias= (char*)"proc";
+    if (! (table= open_ltable(thd, &tables, ltype)))
+    {
+      *tablep= NULL;
+      mysql_proc_table_exists= 0;
+      DBUG_RETURN(SP_OPEN_TABLE_FAILED);
+    }
+    *opened= TRUE;
+  }
+  mysql_proc_table_exists= 1;
+
+  if (table->file->index_read_idx(table->record[0], 0,
+				  key, keylen,
+				  HA_READ_KEY_EXACT))
+  {
+    *tablep= NULL;
+    DBUG_RETURN(SP_KEY_NOT_FOUND);
+  }
+  *tablep= table;
+
+  DBUG_RETURN(SP_OK);
+}
+
+
+static int
+db_find_routine(THD *thd, int type, sp_name *name, sp_head **sphp)
+{
+  extern int yyparse(void *thd);
+  TABLE *table;
+  const char *params, *returns, *body;
+  int ret;
+  bool opened;
+  const char *definer;
+  longlong created;
+  longlong modified;
+  st_sp_chistics chistics;
+  char *ptr;
+  uint length;
+  char buff[65];
+  String str(buff, sizeof(buff), &my_charset_bin);
+  ulong sql_mode;
+  DBUG_ENTER("db_find_routine");
+  DBUG_PRINT("enter", ("type: %d name: %*s",
+		       type, name->m_name.length, name->m_name.str));
+
+  ret= db_find_routine_aux(thd, type, name, TL_READ, &table, &opened);
+  if (ret != SP_OK)
+    goto done;
+
+  if (table->fields != MYSQL_PROC_FIELD_COUNT)
+  {
+    ret= SP_GET_FIELD_FAILED;
+    goto done;
+  }
+
+  if ((ptr= get_field(&thd->mem_root,
+		      table->field[MYSQL_PROC_FIELD_DETERMINISTIC])) == NULL)
+  {
+    ret= SP_GET_FIELD_FAILED;
+    goto done;
+  }
+  bzero((char *)&chistics, sizeof(chistics));
+  chistics.detistic= (ptr[0] == 'N' ? FALSE : TRUE);    
+
+  if ((ptr= get_field(&thd->mem_root,
+		      table->field[MYSQL_PROC_FIELD_SECURITY_TYPE])) == NULL)
+  {
+    ret= SP_GET_FIELD_FAILED;
+    goto done;
+  }
+  chistics.suid= (ptr[0] == 'I' ? IS_NOT_SUID : IS_SUID);    
+
+  if ((params= get_field(&thd->mem_root,
+			 table->field[MYSQL_PROC_FIELD_PARAM_LIST])) == NULL)
+  {
+    params= "";
+  }
+
+  if (type == TYPE_ENUM_PROCEDURE)
+    returns= "";
+  else if ((returns= get_field(&thd->mem_root,
+			       table->field[MYSQL_PROC_FIELD_RETURNS])) == NULL)
+  {
+    ret= SP_GET_FIELD_FAILED;
+    goto done;
+  }
+
+  if ((body= get_field(&thd->mem_root,
+		       table->field[MYSQL_PROC_FIELD_BODY])) == NULL)
+  {
+    ret= SP_GET_FIELD_FAILED;
+    goto done;
+  }
+
+  // Get additional information
+  if ((definer= get_field(&thd->mem_root,
+			  table->field[MYSQL_PROC_FIELD_DEFINER])) == NULL)
+  {
+    ret= SP_GET_FIELD_FAILED;
+    goto done;
+  }
+
+  modified= table->field[MYSQL_PROC_FIELD_MODIFIED]->val_int();
+  created= table->field[MYSQL_PROC_FIELD_CREATED]->val_int();
+
+  sql_mode= (ulong) table->field[MYSQL_PROC_FIELD_SQL_MODE]->val_int();
+
+  table->field[MYSQL_PROC_FIELD_COMMENT]->val_str(&str, &str);
+
+  ptr= 0;
+  if ((length= str.length()))
+    ptr= thd->strmake(str.ptr(), length);
+  chistics.comment.str= ptr;
+  chistics.comment.length= length;
+
+  if (opened)
+  {
+    opened= FALSE;
+    close_thread_tables(thd, 0, 1);
+  }
+
+  {
+    String defstr;
+    LEX *oldlex= thd->lex;
+    char olddb[128];
+    bool dbchanged;
+    enum enum_sql_command oldcmd= thd->lex->sql_command;
+    ulong old_sql_mode= thd->variables.sql_mode;
+    ha_rows select_limit= thd->variables.select_limit;
+
+    thd->variables.sql_mode= sql_mode;
+    thd->variables.select_limit= HA_POS_ERROR;
+
+    defstr.set_charset(system_charset_info);
+    if (!create_string(thd, &defstr,
+		       type,
+		       name,
+		       params, strlen(params),
+		       returns, strlen(returns),
+		       body, strlen(body),
+		       &chistics))
+    {
+      ret= SP_INTERNAL_ERROR;
+      goto done;
+    }
+
+    dbchanged= FALSE;
+    if ((ret= sp_use_new_db(thd, name->m_db.str, olddb, sizeof(olddb),
+			    1, &dbchanged)))
+      goto done;
+
+    {
+      /* This is something of a kludge. We need to initialize some fields
+       * in thd->lex (the unit and master stuff), and the easiest way to
+       * do it is, is to call mysql_init_query(), but this unfortunately
+       * resets teh value_list where we keep the CALL parameters. So we
+       * copy the list and then restore it.
+       */
+      List<Item> vals= thd->lex->value_list;
+
+      mysql_init_query(thd, TRUE);
+      lex_start(thd, (uchar*)defstr.c_ptr(), defstr.length());
+      thd->lex->value_list= vals;
+    }
+
+    if (yyparse(thd) || thd->is_fatal_error || thd->lex->sphead == NULL)
+    {
+      LEX *newlex= thd->lex;
+      sp_head *sp= newlex->sphead;
+
+      if (dbchanged && (ret= sp_change_db(thd, olddb, 1)))
+	goto done;
+      if (sp)
+      {
+	if (oldlex != newlex)
+	  sp->restore_lex(thd);
+	delete sp;
+	newlex->sphead= NULL;
+      }
+      ret= SP_PARSE_ERROR;
+    }
+    else
+    {
+      if (dbchanged && (ret= sp_change_db(thd, olddb, 1)))
+	goto done;
+      *sphp= thd->lex->sphead;
+      (*sphp)->set_info((char *)definer, (uint)strlen(definer),
+			created, modified, &chistics, sql_mode);
+    }
+    thd->lex->sql_command= oldcmd;
+    thd->variables.sql_mode= old_sql_mode;
+    thd->variables.select_limit= select_limit;
+  }
+
+ done:
+
+  if (opened)
+    close_thread_tables(thd);
+  DBUG_RETURN(ret);
+}
+
+
+static int
+db_create_routine(THD *thd, int type, sp_head *sp)
+{
+  int ret;
+  TABLE *table;
+  TABLE_LIST tables;
+  char definer[HOSTNAME_LENGTH+USERNAME_LENGTH+2];
+  char olddb[128];
+  bool dbchanged;
+  DBUG_ENTER("db_create_routine");
+  DBUG_PRINT("enter", ("type: %d name: %*s",type,sp->m_name.length,sp->m_name.str));
+
+  dbchanged= FALSE;
+  if ((ret= sp_use_new_db(thd, sp->m_db.str, olddb, sizeof(olddb),
+			  0, &dbchanged)))
+  {
+    ret= SP_NO_DB_ERROR;
+    goto done;
+  }
+
+  memset(&tables, 0, sizeof(tables));
+  tables.db= (char*)"mysql";
+  tables.real_name= tables.alias= (char*)"proc";
+
+  if (! (table= open_ltable(thd, &tables, TL_WRITE)))
+    ret= SP_OPEN_TABLE_FAILED;
+  else
+  {
+    restore_record(table, default_values); // Get default values for fields
+    strxmov(definer, thd->priv_user, "@", thd->priv_host, NullS);
+
+    if (table->fields != MYSQL_PROC_FIELD_COUNT)
+    {
+      ret= SP_GET_FIELD_FAILED;
+      goto done;
+    }
+    table->field[MYSQL_PROC_FIELD_DB]->
+      store(sp->m_db.str, sp->m_db.length, system_charset_info);
+    table->field[MYSQL_PROC_FIELD_NAME]->
+      store(sp->m_name.str, sp->m_name.length, system_charset_info);
+    table->field[MYSQL_PROC_FIELD_TYPE]->
+      store((longlong)type);
+    table->field[MYSQL_PROC_FIELD_SPECIFIC_NAME]->
+      store(sp->m_name.str, sp->m_name.length, system_charset_info);
+    table->field[MYSQL_PROC_FIELD_DETERMINISTIC]->
+      store((longlong)(sp->m_chistics->detistic ? 1 : 2));
+    if (sp->m_chistics->suid != IS_DEFAULT_SUID)
+      table->field[MYSQL_PROC_FIELD_SECURITY_TYPE]->
+	store((longlong)sp->m_chistics->suid);
+    table->field[MYSQL_PROC_FIELD_PARAM_LIST]->
+      store(sp->m_params.str, sp->m_params.length, system_charset_info);
+    if (sp->m_retstr.str)
+      table->field[MYSQL_PROC_FIELD_RETURNS]->
+	store(sp->m_retstr.str, sp->m_retstr.length, system_charset_info);
+    table->field[MYSQL_PROC_FIELD_BODY]->
+      store(sp->m_body.str, sp->m_body.length, system_charset_info);
+    table->field[MYSQL_PROC_FIELD_DEFINER]->
+      store(definer, (uint)strlen(definer), system_charset_info);
+    ((Field_timestamp *)table->field[MYSQL_PROC_FIELD_CREATED])->set_time();
+    table->field[MYSQL_PROC_FIELD_SQL_MODE]->
+      store((longlong)thd->variables.sql_mode);
+    if (sp->m_chistics->comment.str)
+      table->field[MYSQL_PROC_FIELD_COMMENT]->
+	store(sp->m_chistics->comment.str, sp->m_chistics->comment.length,
+	      system_charset_info);
+
+    ret= SP_OK;
+    if (table->file->write_row(table->record[0]))
+      ret= SP_WRITE_ROW_FAILED;
+  }
+
+done:
+  close_thread_tables(thd);
+  if (dbchanged)
+    (void)sp_change_db(thd, olddb, 1);
+  DBUG_RETURN(ret);
+}
+
+
+static int
+db_drop_routine(THD *thd, int type, sp_name *name)
+{
+  TABLE *table;
+  int ret;
+  bool opened;
+  DBUG_ENTER("db_drop_routine");
+  DBUG_PRINT("enter", ("type: %d name: %*s",
+		       type, name->m_name.length, name->m_name.str));
+
+  ret= db_find_routine_aux(thd, type, name, TL_WRITE, &table, &opened);
+  if (ret == SP_OK)
+  {
+    if (table->file->delete_row(table->record[0]))
+      ret= SP_DELETE_ROW_FAILED;
+  }
+
+  if (opened)
+    close_thread_tables(thd);
+  DBUG_RETURN(ret);
+}
+
+
+static int
+db_update_routine(THD *thd, int type, sp_name *name,
+		  char *newname, uint newnamelen,
+		  st_sp_chistics *chistics)
+{
+  TABLE *table;
+  int ret;
+  bool opened;
+  DBUG_ENTER("db_update_routine");
+  DBUG_PRINT("enter", ("type: %d name: %*s",
+		       type, name->m_name.length, name->m_name.str));
+
+  ret= db_find_routine_aux(thd, type, name, TL_WRITE, &table, &opened);
+  if (ret == SP_OK)
+  {
+    store_record(table,record[1]);
+    ((Field_timestamp *)table->field[MYSQL_PROC_FIELD_MODIFIED])->set_time();
+    if (chistics->suid != IS_DEFAULT_SUID)
+      table->field[MYSQL_PROC_FIELD_SECURITY_TYPE]->store((longlong)chistics->suid);
+    if (newname)
+      table->field[MYSQL_PROC_FIELD_NAME]->store(newname,
+						 newnamelen,
+						 system_charset_info);
+    if (chistics->comment.str)
+      table->field[MYSQL_PROC_FIELD_COMMENT]->store(chistics->comment.str,
+						    chistics->comment.length,
+						    system_charset_info);
+    if ((table->file->update_row(table->record[1],table->record[0])))
+      ret= SP_WRITE_ROW_FAILED;
+  }
+  if (opened)
+    close_thread_tables(thd);
+  DBUG_RETURN(ret);
+}
+
+
+struct st_used_field
+{
+  const char *field_name;
+  uint field_length;
+  enum enum_field_types field_type;
+  Field *field;
+};
+
+static struct st_used_field init_fields[]=
+{
+  { "Db",       NAME_LEN, MYSQL_TYPE_STRING,    0},
+  { "Name",     NAME_LEN, MYSQL_TYPE_STRING,    0},
+  { "Type",            9, MYSQL_TYPE_STRING,    0},
+  { "Definer",        77, MYSQL_TYPE_STRING,    0},
+  { "Modified",        0, MYSQL_TYPE_TIMESTAMP, 0},
+  { "Created",         0, MYSQL_TYPE_TIMESTAMP, 0},
+  { "Security_type",   1, MYSQL_TYPE_STRING,    0},
+  { "Comment",  NAME_LEN, MYSQL_TYPE_STRING,    0},
+  { 0,                 0, MYSQL_TYPE_STRING,    0}
+};
+
+
+static int
+print_field_values(THD *thd, TABLE *table,
+		   struct st_used_field *used_fields,
+		   int type, const char *wild)
+{
+  Protocol *protocol= thd->protocol;
+
+  if (table->field[MYSQL_PROC_FIELD_TYPE]->val_int() == type)
+  {
+    String db_string;
+    String name_string;
+    struct st_used_field *used_field= used_fields;
+
+    if (get_field(&thd->mem_root, used_field->field, &db_string))
+      db_string.set_ascii("", 0);
+    used_field+= 1;
+    get_field(&thd->mem_root, used_field->field, &name_string);
+
+    if (!wild || !wild[0] || !wild_compare(name_string.ptr(), wild, 0))
+    {
+      protocol->prepare_for_resend();
+      protocol->store(&db_string);
+      protocol->store(&name_string);
+      for (used_field++;
+	   used_field->field_name;
+	   used_field++)
+      {
+	switch (used_field->field_type) {
+	case MYSQL_TYPE_TIMESTAMP:
+	  {
+	    TIME tmp_time;
+
+	    bzero((char *)&tmp_time, sizeof(tmp_time));
+	    ((Field_timestamp *) used_field->field)->get_time(&tmp_time);
+	    protocol->store(&tmp_time);
+	  }
+	  break;
+	default:
+	  {
+	    String tmp_string;
+
+	    get_field(&thd->mem_root, used_field->field, &tmp_string);
+	    protocol->store(&tmp_string);
+	  }
+	  break;
+	}
+      }
+      if (protocol->write())
+	return SP_INTERNAL_ERROR;
+    }
+  }
+  return SP_OK;
+}
+
+
+static int
+db_show_routine_status(THD *thd, int type, const char *wild)
+{
+  TABLE *table;
+  TABLE_LIST tables;
+  int res;
+  DBUG_ENTER("db_show_routine_status");
+
+  memset(&tables, 0, sizeof(tables));
+  tables.db= (char*)"mysql";
+  tables.real_name= tables.alias= (char*)"proc";
+
+  if (! (table= open_ltable(thd, &tables, TL_READ)))
+  {
+    res= SP_OPEN_TABLE_FAILED;
+    goto done;
+  }
+  else
+  {
+    Item *item;
+    List<Item> field_list;
+    struct st_used_field *used_field;
+    st_used_field used_fields[array_elements(init_fields)];
+
+    memcpy((char*) used_fields, (char*) init_fields, sizeof(used_fields));
+    /* Init header */
+    for (used_field= &used_fields[0];
+	 used_field->field_name;
+	 used_field++)
+    {
+      switch (used_field->field_type) {
+      case MYSQL_TYPE_TIMESTAMP:
+	field_list.push_back(item=new Item_datetime(used_field->field_name));
+	break;
+      default:
+	field_list.push_back(item=new Item_empty_string(used_field->field_name,
+							used_field->
+							field_length));
+	break;
+      }
+    }
+    /* Print header */
+    if (thd->protocol->send_fields(&field_list,1))
+    {
+      res= SP_INTERNAL_ERROR;
+      goto err_case;
+    }
+
+    /* Init fields */
+    setup_tables(&tables);
+    for (used_field= &used_fields[0];
+	 used_field->field_name;
+	 used_field++)
+    {
+      TABLE_LIST *not_used;
+      Item_field *field= new Item_field("mysql", "proc",
+					used_field->field_name);
+      if (!(used_field->field= find_field_in_tables(thd, field, &tables, 
+						    &not_used, TRUE)))
+      {
+	res= SP_INTERNAL_ERROR;
+	goto err_case1;
+      }
+    }
+
+    table->file->index_init(0);
+    if ((res= table->file->index_first(table->record[0])))
+    {
+      res= (res == HA_ERR_END_OF_FILE) ? 0 : SP_INTERNAL_ERROR;
+      goto err_case1;
+    }
+    if ((res= print_field_values(thd, table, used_fields, type, wild)))
+      goto err_case1;
+    while (!table->file->index_next(table->record[0]))
+    {
+      if ((res= print_field_values(thd, table, used_fields, type, wild)))
+	goto err_case1;
+    }
+    res= SP_OK;
+  }
+
+err_case1:
+  send_eof(thd);
+err_case:
+  close_thread_tables(thd);
+done:
+  DBUG_RETURN(res);
+}
+
+
+/* Drop all routines in database 'db' */
+int
+sp_drop_db_routines(THD *thd, char *db)
+{
+  TABLE *table;
+  byte key[64];			// db
+  uint keylen;
+  int ret;
+  DBUG_ENTER("sp_drop_db_routines");
+  DBUG_PRINT("enter", ("db: %s", db));
+
+  // Put the key used to read the row together
+  keylen= strlen(db);
+  if (keylen > 64)
+    keylen= 64;
+  memcpy(key, db, keylen);
+  memset(key+keylen, (int)' ', 64-keylen); // Pad with space
+  keylen= sizeof(key);
+
+  for (table= thd->open_tables ; table ; table= table->next)
+    if (strcmp(table->table_cache_key, "mysql") == 0 &&
+	strcmp(table->real_name, "proc") == 0)
+      break;
+  if (! table)
+  {
+    TABLE_LIST tables;
+
+    memset(&tables, 0, sizeof(tables));
+    tables.db= (char*)"mysql";
+    tables.real_name= tables.alias= (char*)"proc";
+    if (! (table= open_ltable(thd, &tables, TL_WRITE)))
+      DBUG_RETURN(SP_OPEN_TABLE_FAILED);
+  }
+
+  ret= SP_OK;
+  table->file->index_init(0);
+  if (! table->file->index_read(table->record[0],
+				key, keylen, HA_READ_KEY_EXACT))
+  {
+    int nxtres;
+    bool deleted= FALSE;
+
+    do {
+      if (! table->file->delete_row(table->record[0]))
+	deleted= TRUE;		/* We deleted something */
+      else
+      {
+	ret= SP_DELETE_ROW_FAILED;
+	nxtres= 0;
+	break;
+      }
+    } while (! (nxtres= table->file->index_next_same(table->record[0],
+						     key, keylen)));
+    if (nxtres != HA_ERR_END_OF_FILE)
+      ret= SP_KEY_NOT_FOUND;
+    if (deleted)
+      sp_cache_invalidate();
+  }
+
+  close_thread_tables(thd);
+
+  DBUG_RETURN(ret);
+}
+
+
+/*****************************************************************************
+  PROCEDURE
+******************************************************************************/
+
+sp_head *
+sp_find_procedure(THD *thd, sp_name *name)
+{
+  sp_head *sp;
+  DBUG_ENTER("sp_find_procedure");
+  DBUG_PRINT("enter", ("name: %*s.%*s",
+		       name->m_db.length, name->m_db.str,
+		       name->m_name.length, name->m_name.str));
+
+  if (!(sp= sp_cache_lookup(&thd->sp_proc_cache, name)))
+  {
+    if (db_find_routine(thd, TYPE_ENUM_PROCEDURE, name, &sp) == SP_OK)
+      sp_cache_insert(&thd->sp_proc_cache, sp);
+  }
+
+  DBUG_RETURN(sp);
+}
+
+
+int
+sp_create_procedure(THD *thd, sp_head *sp)
+{
+  int ret;
+  DBUG_ENTER("sp_create_procedure");
+  DBUG_PRINT("enter", ("name: %*s", sp->m_name.length, sp->m_name.str));
+
+  ret= db_create_routine(thd, TYPE_ENUM_PROCEDURE, sp);
+  DBUG_RETURN(ret);
+}
+
+
+int
+sp_drop_procedure(THD *thd, sp_name *name)
+{
+  int ret;
+  DBUG_ENTER("sp_drop_procedure");
+  DBUG_PRINT("enter", ("name: %*s", name->m_name.length, name->m_name.str));
+
+  sp_cache_remove(&thd->sp_proc_cache, name);
+  ret= db_drop_routine(thd, TYPE_ENUM_PROCEDURE, name);
+  DBUG_RETURN(ret);
+}
+
+
+int
+sp_update_procedure(THD *thd, sp_name *name,
+		    char *newname, uint newnamelen,
+		    st_sp_chistics *chistics)
+{
+  int ret;
+  DBUG_ENTER("sp_update_procedure");
+  DBUG_PRINT("enter", ("name: %*s", name->m_name.length, name->m_name.str));
+
+  sp_cache_remove(&thd->sp_proc_cache, name);
+  ret= db_update_routine(thd, TYPE_ENUM_PROCEDURE, name,
+			 newname, newnamelen, chistics);
+  DBUG_RETURN(ret);
+}
+
+
+int
+sp_show_create_procedure(THD *thd, sp_name *name)
+{
+  sp_head *sp;
+  DBUG_ENTER("sp_show_create_procedure");
+  DBUG_PRINT("enter", ("name: %*s", name->m_name.length, name->m_name.str));
+
+  if ((sp= sp_find_procedure(thd, name)))
+  {
+    int ret= sp->show_create_procedure(thd);
+
+    DBUG_RETURN(ret);
+  }
+
+  DBUG_RETURN(SP_KEY_NOT_FOUND);
+}
+
+
+int
+sp_show_status_procedure(THD *thd, const char *wild)
+{
+  int ret;
+  DBUG_ENTER("sp_show_status_procedure");
+
+  ret= db_show_routine_status(thd, TYPE_ENUM_PROCEDURE, wild);
+  DBUG_RETURN(ret);
+}
+
+
+/*****************************************************************************
+  FUNCTION
+******************************************************************************/
+
+sp_head *
+sp_find_function(THD *thd, sp_name *name)
+{
+  sp_head *sp;
+  DBUG_ENTER("sp_find_function");
+  DBUG_PRINT("enter", ("name: %*s", name->m_name.length, name->m_name.str));
+
+  if (!(sp= sp_cache_lookup(&thd->sp_func_cache, name)))
+  {
+    if (db_find_routine(thd, TYPE_ENUM_FUNCTION, name, &sp) != SP_OK)
+      sp= NULL;
+    else
+      sp_cache_insert(&thd->sp_func_cache, sp);
+  }
+  DBUG_RETURN(sp);
+}
+
+
+int
+sp_create_function(THD *thd, sp_head *sp)
+{
+  int ret;
+  DBUG_ENTER("sp_create_function");
+  DBUG_PRINT("enter", ("name: %*s", sp->m_name.length, sp->m_name.str));
+
+  ret= db_create_routine(thd, TYPE_ENUM_FUNCTION, sp);
+  DBUG_RETURN(ret);
+}
+
+
+int
+sp_drop_function(THD *thd, sp_name *name)
+{
+  int ret;
+  DBUG_ENTER("sp_drop_function");
+  DBUG_PRINT("enter", ("name: %*s", name->m_name.length, name->m_name.str));
+
+  sp_cache_remove(&thd->sp_func_cache, name);
+  ret= db_drop_routine(thd, TYPE_ENUM_FUNCTION, name);
+  DBUG_RETURN(ret);
+}
+
+
+int
+sp_update_function(THD *thd, sp_name *name,
+		    char *newname, uint newnamelen,
+		    st_sp_chistics *chistics)
+{
+  int ret;
+  DBUG_ENTER("sp_update_procedure");
+  DBUG_PRINT("enter", ("name: %*s", name->m_name.length, name->m_name.str));
+
+  sp_cache_remove(&thd->sp_func_cache, name);
+  ret= db_update_routine(thd, TYPE_ENUM_FUNCTION, name,
+			 newname, newnamelen, chistics);
+  DBUG_RETURN(ret);
+}
+
+
+int
+sp_show_create_function(THD *thd, sp_name *name)
+{
+  sp_head *sp;
+  DBUG_ENTER("sp_show_create_function");
+  DBUG_PRINT("enter", ("name: %*s", name->m_name.length, name->m_name.str));
+
+  if ((sp= sp_find_function(thd, name)))
+  {
+    int ret= sp->show_create_function(thd);
+
+    DBUG_RETURN(ret);
+  }
+  DBUG_RETURN(SP_KEY_NOT_FOUND);
+}
+
+
+int
+sp_show_status_function(THD *thd, const char *wild)
+{
+  int ret;
+  DBUG_ENTER("sp_show_status_function");
+  ret= db_show_routine_status(thd, TYPE_ENUM_FUNCTION, wild);
+  DBUG_RETURN(ret);
+}
+
+
+bool
+sp_function_exists(THD *thd, sp_name *name)
+{
+  TABLE *table;
+  bool ret= FALSE;
+  bool opened= FALSE;
+  DBUG_ENTER("sp_function_exists");
+
+  if (sp_cache_lookup(&thd->sp_func_cache, name) ||
+      db_find_routine_aux(thd, TYPE_ENUM_FUNCTION,
+			  name, TL_READ,
+			  &table, &opened) == SP_OK)
+    ret= TRUE;
+  if (opened)
+    close_thread_tables(thd, 0, 1);
+  thd->clear_error();
+  DBUG_RETURN(ret);
+}
+
+
+byte *
+sp_lex_spfuns_key(const byte *ptr, uint *plen, my_bool first)
+{
+  LEX_STRING *lsp= (LEX_STRING *)ptr;
+  *plen= lsp->length;
+  return (byte *)lsp->str;
+}
+
+
+void
+sp_add_fun_to_lex(LEX *lex, sp_name *fun)
+{
+  if (! hash_search(&lex->spfuns,
+		    (byte *)fun->m_qname.str, fun->m_qname.length))
+  {
+    LEX_STRING *ls= (LEX_STRING *)sql_alloc(sizeof(LEX_STRING));
+    ls->str= sql_strmake(fun->m_qname.str, fun->m_qname.length);
+    ls->length= fun->m_qname.length;
+
+    my_hash_insert(&lex->spfuns, (byte *)ls);
+  }
+}
+
+
+void
+sp_merge_funs(LEX *dst, LEX *src)
+{
+  for (uint i=0 ; i < src->spfuns.records ; i++)
+  {
+    LEX_STRING *ls= (LEX_STRING *)hash_element(&src->spfuns, i);
+
+    if (! hash_search(&dst->spfuns, (byte *)ls->str, ls->length))
+      my_hash_insert(&dst->spfuns, (byte *)ls);
+  }
+}
+
+
+int
+sp_cache_functions(THD *thd, LEX *lex)
+{
+  HASH *h= &lex->spfuns;
+  int ret= 0;
+
+  for (uint i=0 ; i < h->records ; i++)
+  {
+    LEX_STRING *ls= (LEX_STRING *)hash_element(h, i);
+    sp_name name(*ls);
+
+    name.m_qname= *ls;
+    if (! sp_cache_lookup(&thd->sp_func_cache, &name))
+    {
+      sp_head *sp;
+      LEX *oldlex= thd->lex;
+      LEX *newlex= new st_lex;
+
+      thd->lex= newlex;
+      name.m_name.str= strchr(name.m_qname.str, '.');
+      name.m_db.length= name.m_name.str - name.m_qname.str;
+      name.m_db.str= strmake_root(&thd->mem_root,
+				  name.m_qname.str, name.m_db.length);
+      name.m_name.str+= 1;
+      name.m_name.length= name.m_qname.length - name.m_db.length - 1;
+
+      if (db_find_routine(thd, TYPE_ENUM_FUNCTION, &name, &sp)
+	  == SP_OK)
+      {
+	ret= sp_cache_functions(thd, newlex);
+	delete newlex;
+	thd->lex= oldlex;
+	if (ret)
+	  break;
+	sp_cache_insert(&thd->sp_func_cache, sp);
+      }
+      else
+      {
+	delete newlex;
+	thd->lex= oldlex;
+	net_printf(thd, ER_SP_DOES_NOT_EXIST, "FUNCTION", ls->str);
+	ret= 1;
+	break;
+      }
+    }
+  }
+  return ret;
+}
+
+/*
+ * Generates the CREATE... string from the table information.
+ * Returns TRUE on success, FALSE on (alloc) failure.
+ */
+static bool
+create_string(THD *thd, String *buf,
+	      int type,
+	      sp_name *name,
+	      const char *params, ulong paramslen,
+	      const char *returns, ulong returnslen,
+	      const char *body, ulong bodylen,
+	      st_sp_chistics *chistics)
+{
+  /* Make some room to begin with */
+  if (buf->alloc(100 + name->m_qname.length + paramslen + returnslen + bodylen +
+		 chistics->comment.length))
+    return FALSE;
+
+  buf->append("CREATE ", 7);
+  if (type == TYPE_ENUM_FUNCTION)
+    buf->append("FUNCTION ", 9);
+  else
+    buf->append("PROCEDURE ", 10);
+  append_identifier(thd, buf, name->m_db.str, name->m_db.length);
+  buf->append('.');
+  append_identifier(thd, buf, name->m_name.str, name->m_name.length);
+  buf->append('(');
+  buf->append(params, paramslen);
+  buf->append(')');
+  if (type == TYPE_ENUM_FUNCTION)
+  {
+    buf->append(" RETURNS ", 9);
+    buf->append(returns, returnslen);
+  }
+  buf->append('\n');
+  if (chistics->detistic)
+    buf->append( "    DETERMINISTIC\n", 18);
+  if (chistics->suid == IS_NOT_SUID)
+    buf->append("    SQL SECURITY INVOKER\n", 25);
+  if (chistics->comment.length)
+  {
+    buf->append("    COMMENT ");
+    append_unescaped(buf, chistics->comment.str, chistics->comment.length);
+    buf->append('\n');
+  }
+  buf->append(body, bodylen);
+  return TRUE;
+}
+
+
+//
+// Utilities...
+//
+
+int
+sp_use_new_db(THD *thd, char *newdb, char *olddb, uint olddblen,
+	      bool no_access_check, bool *dbchangedp)
+{
+  bool changeit;
+  DBUG_ENTER("sp_use_new_db");
+  DBUG_PRINT("enter", ("newdb: %s", newdb));
+
+  if (thd->db && thd->db[0])
+  {
+    if (my_strcasecmp(system_charset_info, thd->db, newdb) == 0)
+      changeit= 0;
+    else
+    {
+      changeit= 1;
+      strnmov(olddb, thd->db, olddblen);
+    }
+  }
+  else
+  {				// thd->db empty
+    if (newdb[0])
+      changeit= 1;
+    else
+      changeit= 0;
+    olddb[0] = '\0';
+  }
+  if (!changeit)
+  {
+    *dbchangedp= FALSE;
+    DBUG_RETURN(0);
+  }
+  else
+  {
+    int ret= sp_change_db(thd, newdb, no_access_check);
+
+    if (! ret)
+      *dbchangedp= TRUE;
+    DBUG_RETURN(ret);
+  }
+}
+
+/*
+  Change database.
+
+  SYNOPSIS
+    sp_change_db()
+    thd		    Thread handler
+    name	    Database name
+    empty_is_ok     True= it's ok with "" as name
+    no_access_check True= don't do access check
+
+  DESCRIPTION
+    This is the same as mysql_change_db(), but with some extra
+    arguments for Stored Procedure usage; doing implicit "use" 
+    when executing an SP in a different database.
+    We also use different error routines, since this might be
+    invoked from a function when executing a query or statement.
+    Note: We would have prefered to reuse mysql_change_db(), but
+      the error handling in particular made that too awkward, so
+      we (reluctantly) have a "copy" here.
+
+  RETURN VALUES
+    0	ok
+    1	error
+*/
+
+int
+sp_change_db(THD *thd, char *name, bool no_access_check)
+{
+  int length, db_length;
+  char *dbname=my_strdup((char*) name,MYF(MY_WME));
+  char	path[FN_REFLEN];
+  ulong db_access;
+  HA_CREATE_INFO create;
+  DBUG_ENTER("sp_change_db");
+  DBUG_PRINT("enter", ("db: %s, no_access_check: %d", name, no_access_check));
+
+  db_length= (!dbname ? 0 : strip_sp(dbname));
+  if (dbname && db_length)
+  {
+    if ((db_length > NAME_LEN) || check_db_name(dbname))
+    {
+      my_printf_error(ER_WRONG_DB_NAME, ER(ER_WRONG_DB_NAME), MYF(0), dbname);
+      x_free(dbname);
+      DBUG_RETURN(1);
+    }
+  }
+
+  if (dbname && db_length)
+  {
+#ifndef NO_EMBEDDED_ACCESS_CHECKS
+    if (! no_access_check)
+    {
+      if (test_all_bits(thd->master_access,DB_ACLS))
+	db_access=DB_ACLS;
+      else
+	db_access= (acl_get(thd->host,thd->ip, thd->priv_user,dbname,0) |
+		    thd->master_access);  
+      if (!(db_access & DB_ACLS) &&
+	  (!grant_option || check_grant_db(thd,dbname)))
+      {
+	my_printf_error(ER_DBACCESS_DENIED_ERROR, ER(ER_DBACCESS_DENIED_ERROR),
+			MYF(0),
+			thd->priv_user,
+			thd->priv_host,
+			dbname);
+	mysql_log.write(thd,COM_INIT_DB,ER(ER_DBACCESS_DENIED_ERROR),
+			thd->priv_user,
+			thd->priv_host,
+			dbname);
+	my_free(dbname,MYF(0));
+	DBUG_RETURN(1);
+      }
+    }
+#endif
+    (void) sprintf(path,"%s/%s",mysql_data_home,dbname);
+    length=unpack_dirname(path,path);		// Convert if not unix
+    if (length && path[length-1] == FN_LIBCHAR)
+      path[length-1]=0;				// remove ending '\'
+    if (access(path,F_OK))
+    {
+      my_printf_error(ER_BAD_DB_ERROR, ER(ER_BAD_DB_ERROR), MYF(0), dbname);
+      my_free(dbname,MYF(0));
+      DBUG_RETURN(1);
+    }
+  }
+
+  x_free(thd->db);
+  thd->db=dbname;				// THD::~THD will free this
+  thd->db_length=db_length;
+
+  if (dbname && db_length)
+  {
+    strmov(path+unpack_dirname(path,path), MY_DB_OPT_FILE);
+    load_db_opt(thd, path, &create);
+    thd->db_charset= create.default_table_charset ?
+      create.default_table_charset :
+      thd->variables.collation_server;
+    thd->variables.collation_database= thd->db_charset;
+  }
+  DBUG_RETURN(0);
+}
diff --git a/sql/sp.h b/sql/sp.h
new file mode 100644
index 00000000000..783de2fe7ee
--- /dev/null
+++ b/sql/sp.h
@@ -0,0 +1,106 @@
+/* -*- C++ -*- */
+/* Copyright (C) 2002 MySQL AB
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
+
+#ifndef _SP_H_
+#define _SP_H_
+
+// Return codes from sp_create_*, sp_drop_*, and sp_show_*:
+#define SP_OK                 0
+#define SP_KEY_NOT_FOUND     -1
+#define SP_OPEN_TABLE_FAILED -2
+#define SP_WRITE_ROW_FAILED  -3
+#define SP_DELETE_ROW_FAILED -4
+#define SP_GET_FIELD_FAILED  -5
+#define SP_PARSE_ERROR       -6
+#define SP_INTERNAL_ERROR    -7
+#define SP_NO_DB_ERROR       -8
+
+/* Drop all routines in database 'db' */
+int
+sp_drop_db_routines(THD *thd, char *db);
+
+sp_head *
+sp_find_procedure(THD *thd, sp_name *name);
+
+int
+sp_create_procedure(THD *thd, sp_head *sp);
+
+int
+sp_drop_procedure(THD *thd, sp_name *name);
+
+
+int
+sp_update_procedure(THD *thd, sp_name *name,
+		    char *newname, uint newnamelen,
+		    st_sp_chistics *chistics);
+
+int
+sp_show_create_procedure(THD *thd, sp_name *name);
+
+int
+sp_show_status_procedure(THD *thd, const char *wild);
+
+sp_head *
+sp_find_function(THD *thd, sp_name *name);
+
+int
+sp_create_function(THD *thd, sp_head *sp);
+
+int
+sp_drop_function(THD *thd, sp_name *name);
+
+int
+sp_update_function(THD *thd, sp_name *name,
+		   char *newname, uint newnamelen,
+		   st_sp_chistics *chistics);
+
+int
+sp_show_create_function(THD *thd, sp_name *name);
+
+int
+sp_show_status_function(THD *thd, const char *wild);
+
+bool
+sp_function_exists(THD *thd, sp_name *name);
+
+
+// This is needed since we have to read the functions before we
+// do anything else.
+void
+sp_add_fun_to_lex(LEX *lex, sp_name *fun);
+void
+sp_merge_funs(LEX *dst, LEX *src);
+int
+sp_cache_functions(THD *thd, LEX *lex);
+
+
+//
+// Utilities...
+//
+
+// Do a "use newdb". The current db is stored at olddb.
+// If newdb is the same as the current one, nothing is changed.
+// dbchangedp is set to true if the db was actually changed.
+int
+sp_use_new_db(THD *thd, char *newdb, char *olddb, uint olddbmax,
+	      bool no_access_check, bool *dbchangedp);
+
+// Like mysql_change_db() but handles empty db name and the  send_ok() problem.
+int
+sp_change_db(THD *thd, char *db, bool no_access_check);
+
+#endif /* _SP_H_ */
diff --git a/sql/sp_cache.cc b/sql/sp_cache.cc
new file mode 100644
index 00000000000..056ac6d7e96
--- /dev/null
+++ b/sql/sp_cache.cc
@@ -0,0 +1,166 @@
+/* Copyright (C) 2002 MySQL AB
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
+
+#ifdef __GNUC__
+#pragma implementation
+#endif
+
+#include "mysql_priv.h"
+#include "sp_cache.h"
+#include "sp_head.h"
+
+static pthread_mutex_t Cversion_lock;
+static ulong Cversion = 0;
+
+void
+sp_cache_init()
+{
+  pthread_mutex_init(&Cversion_lock, MY_MUTEX_INIT_FAST);
+}
+
+void
+sp_cache_clear(sp_cache **cp)
+{
+  sp_cache *c= *cp;
+
+  if (c)
+  {
+    delete c;
+    *cp= NULL;
+  }
+}
+
+void
+sp_cache_insert(sp_cache **cp, sp_head *sp)
+{
+  sp_cache *c= *cp;
+
+  if (! c)
+    c= new sp_cache();
+  if (c)
+  {
+    ulong v;
+
+    pthread_mutex_lock(&Cversion_lock); // LOCK
+    v= Cversion;
+    pthread_mutex_unlock(&Cversion_lock); // UNLOCK
+
+    if (c->version < v)
+    {
+      if (*cp)
+	c->remove_all();
+      c->version= v;
+    }
+    c->insert(sp);
+    if (*cp == NULL)
+      *cp= c;
+  }
+}
+
+sp_head *
+sp_cache_lookup(sp_cache **cp, sp_name *name)
+{
+  ulong v;
+  sp_cache *c= *cp;
+
+  if (! c)
+    return NULL;
+
+  pthread_mutex_lock(&Cversion_lock); // LOCK
+  v= Cversion;
+  pthread_mutex_unlock(&Cversion_lock); // UNLOCK
+
+  if (c->version < v)
+  {
+    c->remove_all();
+    c->version= v;
+    return NULL;
+  }
+  return c->lookup(name->m_qname.str, name->m_qname.length);
+}
+
+bool
+sp_cache_remove(sp_cache **cp, sp_name *name)
+{
+  sp_cache *c= *cp;
+  bool found= FALSE;
+
+  if (c)
+  {
+    ulong v;
+
+    pthread_mutex_lock(&Cversion_lock); // LOCK
+    v= Cversion++;
+    pthread_mutex_unlock(&Cversion_lock); // UNLOCK
+
+    if (c->version < v)
+      c->remove_all();
+    else
+      found= c->remove(name->m_qname.str, name->m_qname.length);
+    c->version= v+1;
+  }
+  return found;
+}
+
+void
+sp_cache_invalidate()
+{
+  pthread_mutex_lock(&Cversion_lock); // LOCK
+  Cversion++;
+  pthread_mutex_unlock(&Cversion_lock); // UNLOCK
+}
+
+static byte *
+hash_get_key_for_sp_head(const byte *ptr, uint *plen,
+			       my_bool first)
+{
+  sp_head *sp= (sp_head *)ptr;
+
+  *plen= sp->m_qname.length;
+  return (byte*) sp->m_qname.str;
+}
+
+static void
+hash_free_sp_head(void *p)
+{
+  sp_head *sp= (sp_head *)p;
+
+  delete sp;
+}
+
+sp_cache::sp_cache()
+{
+  init();
+}
+
+sp_cache::~sp_cache()
+{
+  hash_free(&m_hashtable);
+}
+
+void
+sp_cache::init()
+{
+  hash_init(&m_hashtable, system_charset_info, 0, 0, 0,
+	    hash_get_key_for_sp_head, hash_free_sp_head, 0);
+  version= 0;
+}
+
+void
+sp_cache::cleanup()
+{
+  hash_free(&m_hashtable);
+}
diff --git a/sql/sp_cache.h b/sql/sp_cache.h
new file mode 100644
index 00000000000..754a987090e
--- /dev/null
+++ b/sql/sp_cache.h
@@ -0,0 +1,107 @@
+/* -*- C++ -*- */
+/* Copyright (C) 2002 MySQL AB
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
+
+#ifndef _SP_CACHE_H_
+#define _SP_CACHE_H_
+
+#ifdef __GNUC__
+#pragma interface			/* gcc class implementation */
+#endif
+
+class sp_head;
+class sp_cache;
+
+/* Initialize the SP caching once at startup */
+void sp_cache_init();
+
+/* Clear the cache *cp and set *cp to NULL */
+void sp_cache_clear(sp_cache **cp);
+
+/* Insert an SP to cache. If 'cp' points to NULL, it's set to a new cache */
+void sp_cache_insert(sp_cache **cp, sp_head *sp);
+
+/* Lookup an SP in cache */
+sp_head *sp_cache_lookup(sp_cache **cp, sp_name *name);
+
+/* Remove an SP from cache. Returns true if something was removed */
+bool sp_cache_remove(sp_cache **cp, sp_name *name);
+
+/* Invalidate a cache */
+void sp_cache_invalidate();
+
+
+/*
+ *
+ * The cache class. Don't use this directly, use the C API above
+ *
+ */
+
+class sp_cache
+{
+public:
+
+  ulong version;
+
+  sp_cache();
+
+  ~sp_cache();
+
+  void
+  init();
+
+  void
+  cleanup();
+
+  inline void
+  insert(sp_head *sp)
+  {
+    my_hash_insert(&m_hashtable, (const byte *)sp);
+  }
+
+  inline sp_head *
+  lookup(char *name, uint namelen)
+  {
+    return (sp_head *)hash_search(&m_hashtable, (const byte *)name, namelen);
+  }
+
+  inline bool
+  remove(char *name, uint namelen)
+  {
+    sp_head *sp= lookup(name, namelen);
+
+    if (sp)
+    {
+      hash_delete(&m_hashtable, (byte *)sp);
+      return TRUE;
+    }
+    return FALSE;
+  }
+
+  inline void
+  remove_all()
+  {
+    cleanup();
+    init();
+  }
+
+private:
+
+  HASH m_hashtable;
+
+}; // class sp_cache
+
+#endif /* _SP_CACHE_H_ */
diff --git a/sql/sp_head.cc b/sql/sp_head.cc
new file mode 100644
index 00000000000..41955f12b5d
--- /dev/null
+++ b/sql/sp_head.cc
@@ -0,0 +1,1428 @@
+/* Copyright (C) 2002 MySQL AB
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
+
+#ifdef __GNUC__
+#pragma implementation
+#endif
+
+#include "mysql_priv.h"
+#include "sql_acl.h"
+#include "sp_head.h"
+#include "sp.h"
+#include "sp_pcontext.h"
+#include "sp_rcontext.h"
+
+Item_result
+sp_map_result_type(enum enum_field_types type)
+{
+  switch (type)
+  {
+  case MYSQL_TYPE_TINY:
+  case MYSQL_TYPE_SHORT:
+  case MYSQL_TYPE_LONG:
+  case MYSQL_TYPE_LONGLONG:
+  case MYSQL_TYPE_INT24:
+    return INT_RESULT;
+  case MYSQL_TYPE_DECIMAL:
+  case MYSQL_TYPE_FLOAT:
+  case MYSQL_TYPE_DOUBLE:
+    return REAL_RESULT;
+  default:
+    return STRING_RESULT;
+  }
+}
+
+/* Evaluate a (presumed) func item. Always returns an item, the parameter
+** if nothing else.
+*/
+Item *
+sp_eval_func_item(THD *thd, Item *it, enum enum_field_types type)
+{
+  DBUG_ENTER("sp_eval_func_item");
+  it= it->this_item();
+  DBUG_PRINT("info", ("type: %d", type));
+
+  if (!it->fixed && it->fix_fields(thd, 0, &it))
+  {
+    DBUG_PRINT("info", ("fix_fields() failed"));
+    DBUG_RETURN(it);		// Shouldn't happen?
+  }
+
+  /* QQ How do we do this? Is there some better way? */
+  if (type == MYSQL_TYPE_NULL)
+    it= new Item_null();
+  else
+  {
+    switch (sp_map_result_type(type)) {
+    case INT_RESULT:
+      {
+	longlong i= it->val_int();
+
+	if (it->null_value)
+	{
+	  DBUG_PRINT("info", ("INT_RESULT: null"));
+	  it= new Item_null();
+	}
+	else
+	{
+	  DBUG_PRINT("info", ("INT_RESULT: %d", i));
+	  it= new Item_int(it->val_int());
+	}
+	break;
+      }
+    case REAL_RESULT:
+      {
+	double d= it->val();
+
+	if (it->null_value)
+	{
+	  DBUG_PRINT("info", ("REAL_RESULT: null"));
+	  it= new Item_null();
+	}
+	else
+	{
+	  /* There's some difference between Item::new_item() and the
+	   * constructor; the former crashes, the latter works... weird. */
+	  uint8 decimals= it->decimals;
+	  uint32 max_length= it->max_length;
+	  DBUG_PRINT("info", ("REAL_RESULT: %g", d));
+	  it= new Item_real(it->val());
+	  it->decimals= decimals;
+	  it->max_length= max_length;
+	}
+	break;
+      }
+    default:
+      {
+	char buffer[MAX_FIELD_WIDTH];
+	String tmp(buffer, sizeof(buffer), it->collation.collation);
+	String *s= it->val_str(&tmp);
+
+	if (it->null_value)
+	{
+	  DBUG_PRINT("info", ("default result: null"));
+	  it= new Item_null();
+	}
+	else
+	{
+	  DBUG_PRINT("info",("default result: %*s",s->length(),s->c_ptr_quick()));
+	  it= new Item_string(thd->strmake(s->c_ptr_quick(), s->length()),
+			      s->length(), it->collation.collation);
+	}
+	break;
+      }
+    }
+  }
+
+  DBUG_RETURN(it);
+}
+
+
+/*
+ *
+ *  sp_name
+ *
+ */
+
+void
+sp_name::init_qname(THD *thd)
+{
+  m_qname.length= m_db.length+m_name.length+1;
+  m_qname.str= alloc_root(&thd->mem_root, m_qname.length+1);
+  sprintf(m_qname.str, "%*s.%*s",
+	  m_db.length, (m_db.length ? m_db.str : ""),
+	  m_name.length, m_name.str);
+}
+
+sp_name *
+sp_name_current_db_new(THD *thd, LEX_STRING name)
+{
+  sp_name *qname;
+
+  if (! thd->db)
+    qname= new sp_name(name);
+  else
+  {
+    LEX_STRING db;
+
+    db.length= strlen(thd->db);
+    db.str= thd->strmake(thd->db, db.length);
+    qname= new sp_name(db, name);
+  }
+  qname->init_qname(thd);
+  return qname;
+}
+
+
+/* ------------------------------------------------------------------ */
+
+
+/*
+ *
+ *  sp_head
+ *
+ */
+
+void *
+sp_head::operator new(size_t size)
+{
+  DBUG_ENTER("sp_head::operator new");
+  MEM_ROOT own_root;
+  sp_head *sp;
+
+  bzero((char *)&own_root, sizeof(own_root));
+  init_alloc_root(&own_root, MEM_ROOT_BLOCK_SIZE, MEM_ROOT_PREALLOC);
+  sp= (sp_head *)alloc_root(&own_root, size);
+  sp->mem_root= own_root;
+  DBUG_PRINT("info", ("mem_root 0x%lx", (ulong) &sp->mem_root));
+  DBUG_RETURN(sp);
+}
+
+void 
+sp_head::operator delete(void *ptr, size_t size)
+{
+  DBUG_ENTER("sp_head::operator delete");
+  MEM_ROOT own_root;
+  sp_head *sp= (sp_head *)ptr;
+
+  memcpy(&own_root, (const void *)&sp->mem_root, sizeof(MEM_ROOT));
+  DBUG_PRINT("info", ("mem_root 0x%lx moved to 0x%lx",
+                      (ulong) &sp->mem_root, (ulong) &own_root));
+  free_root(&own_root, MYF(0));
+
+  DBUG_VOID_RETURN;
+}
+
+
+sp_head::sp_head()
+  :Item_arena((bool)FALSE), m_returns_cs(NULL), m_has_return(FALSE),
+   m_simple_case(FALSE), m_multi_results(FALSE)
+{
+  DBUG_ENTER("sp_head::sp_head");
+
+  m_backpatch.empty();
+  m_lex.empty();
+  DBUG_VOID_RETURN;
+}
+
+
+void
+sp_head::init(LEX *lex)
+{
+  DBUG_ENTER("sp_head::init");
+
+  lex->spcont= m_pcont= new sp_pcontext();
+  my_init_dynamic_array(&m_instr, sizeof(sp_instr *), 16, 8);
+  m_param_begin= m_param_end= m_returns_begin= m_returns_end= m_body_begin= 0;
+  m_qname.str= m_db.str= m_name.str= m_params.str= m_retstr.str=
+    m_body.str= m_defstr.str= 0;
+  m_qname.length= m_db.length= m_name.length= m_params.length=
+    m_retstr.length= m_body.length= m_defstr.length= 0;
+  m_returns_cs= NULL;
+  DBUG_VOID_RETURN;
+}
+
+void
+sp_head::init_strings(THD *thd, LEX *lex, sp_name *name)
+{
+  DBUG_ENTER("sp_head::init_strings");
+  /* During parsing, we must use thd->mem_root */
+  MEM_ROOT *root= &thd->mem_root;
+
+  DBUG_PRINT("info", ("name: %*.s%*s",
+		      name->m_db.length, name->m_db.str,
+		      name->m_name.length, name->m_name.str));
+  /* We have to copy strings to get them into the right memroot */
+  if (name->m_db.length == 0)
+  {
+    m_db.length= (thd->db ? strlen(thd->db) : 0);
+    m_db.str= strmake_root(root, (thd->db ? thd->db : ""), m_db.length);
+  }
+  else
+  {
+    m_db.length= name->m_db.length;
+    m_db.str= strmake_root(root, name->m_db.str, name->m_db.length);
+  }
+  m_name.length= name->m_name.length;
+  m_name.str= strmake_root(root, name->m_name.str, name->m_name.length);
+
+  if (name->m_qname.length == 0)
+    name->init_qname(thd);
+  m_qname.length= name->m_qname.length;
+  m_qname.str= strmake_root(root, name->m_qname.str, m_qname.length);
+
+  m_params.length= m_param_end- m_param_begin;
+  m_params.str= strmake_root(root,
+			     (char *)m_param_begin, m_params.length);
+  if (m_returns_begin && m_returns_end)
+  {
+    /* QQ KLUDGE: We can't seem to cut out just the type in the parser
+       (without the RETURNS), so we'll have to do it here. :-(
+       Furthermore, if there's a character type as well, it's not include
+       (beyond the m_returns_end pointer), in which case we need
+       m_returns_cs. */
+    char *p= (char *)m_returns_begin+strspn((char *)m_returns_begin,"\t\n\r ");
+    p+= strcspn(p, "\t\n\r ");
+    p+= strspn(p, "\t\n\r ");
+    if (p < (char *)m_returns_end)
+      m_returns_begin= (uchar *)p;
+    /* While we're at it, trim the end too. */
+    p= (char *)m_returns_end-1;
+    while (p > (char *)m_returns_begin &&
+	   (*p == '\t' || *p == '\n' || *p == '\r' || *p == ' '))
+      p-= 1;
+    m_returns_end= (uchar *)p+1;
+    if (m_returns_cs)
+    {
+      String s((char *)m_returns_begin, m_returns_end - m_returns_begin,
+	       system_charset_info);
+
+      s.append(' ');
+      s.append(m_returns_cs->csname);
+      m_retstr.length= s.length();
+      m_retstr.str= strmake_root(root, s.ptr(), m_retstr.length);
+    }
+    else
+    {
+      m_retstr.length= m_returns_end - m_returns_begin;
+      m_retstr.str= strmake_root(root,
+				 (char *)m_returns_begin, m_retstr.length);
+    }
+  }
+  m_body.length= lex->end_of_query - m_body_begin;
+  m_body.str= strmake_root(root, (char *)m_body_begin, m_body.length);
+  m_defstr.length= lex->end_of_query - lex->buf;
+  m_defstr.str= strmake_root(root, (char *)lex->buf, m_defstr.length);
+  DBUG_VOID_RETURN;
+}
+
+int
+sp_head::create(THD *thd)
+{
+  DBUG_ENTER("sp_head::create");
+  int ret;
+
+  DBUG_PRINT("info", ("type: %d name: %s params: %s body: %s",
+		      m_type, m_name.str, m_params.str, m_body.str));
+#ifndef DBUG_OFF
+  String s;
+  sp_instr *i;
+  uint ip= 0;
+  while ((i = get_instr(ip)))
+  {
+    char buf[8];
+
+    sprintf(buf, "%4u: ", ip);
+    s.append(buf);
+    i->print(&s);
+    s.append('\n');
+    ip+= 1;
+  }
+  s.append('\0');
+  DBUG_PRINT("info", ("Code %s\n%s", m_qname.str, s.ptr()));
+#endif
+
+  if (m_type == TYPE_ENUM_FUNCTION)
+    ret= sp_create_function(thd, this);
+  else
+    ret= sp_create_procedure(thd, this);
+
+  DBUG_RETURN(ret);
+}
+
+sp_head::~sp_head()
+{
+  destroy();
+  if (m_thd)
+    restore_thd_mem_root(m_thd);
+}
+
+void
+sp_head::destroy()
+{
+  DBUG_ENTER("sp_head::destroy");
+  DBUG_PRINT("info", ("name: %s", m_name.str));
+  sp_instr *i;
+  LEX *lex;
+
+  for (uint ip = 0 ; (i = get_instr(ip)) ; ip++)
+    delete i;
+  delete_dynamic(&m_instr);
+  m_pcont->destroy();
+  free_items(free_list);
+  while ((lex= (LEX *)m_lex.pop()))
+  {
+    if (lex != &m_thd->main_lex) // We got interrupted and have lex'es left
+      delete lex;
+  }
+  DBUG_VOID_RETURN;
+}
+
+int
+sp_head::execute(THD *thd)
+{
+  DBUG_ENTER("sp_head::execute");
+  char olddb[128];
+  bool dbchanged;
+  sp_rcontext *ctx= thd->spcont;
+  int ret= 0;
+  uint ip= 0;
+
+#ifndef EMBEDDED_LIBRARY
+  if (check_stack_overrun(thd, olddb))
+  {
+    DBUG_RETURN(-1);
+  }
+#endif
+
+  dbchanged= FALSE;
+  if ((ret= sp_use_new_db(thd, m_db.str, olddb, sizeof(olddb), 0, &dbchanged)))
+    goto done;
+
+  if (ctx)
+    ctx->clear_handler();
+  thd->query_error= 0;
+  thd->current_arena= this;
+  do
+  {
+    sp_instr *i;
+    uint hip;			// Handler ip
+
+    i = get_instr(ip);	// Returns NULL when we're done.
+    if (i == NULL)
+      break;
+    DBUG_PRINT("execute", ("Instruction %u", ip));
+    ret= i->execute(thd, &ip);
+    if (i->free_list)
+      cleanup_items(i->free_list);
+    // Check if an exception has occurred and a handler has been found
+    // Note: We havo to check even if ret==0, since warnings (and some
+    //       errors don't return a non-zero value.
+    if (!thd->killed && ctx)
+    {
+      uint hf;
+
+      switch (ctx->found_handler(&hip, &hf))
+      {
+      case SP_HANDLER_NONE:
+	break;
+      case SP_HANDLER_CONTINUE:
+	ctx->save_variables(hf);
+	ctx->push_hstack(ip);
+	// Fall through
+      default:
+	ip= hip;
+	ret= 0;
+	ctx->clear_handler();
+	continue;
+      }
+    }
+  } while (ret == 0 && !thd->killed && !thd->query_error &&
+	   !thd->net.report_error);
+
+ done:
+  DBUG_PRINT("info", ("ret=%d killed=%d query_error=%d",
+		      ret, thd->killed, thd->query_error));
+
+  if (thd->current_arena)
+    cleanup_items(thd->current_arena->free_list);
+  thd->current_arena= 0;
+
+  if (thd->killed || thd->query_error || thd->net.report_error)
+    ret= -1;
+  /* If the DB has changed, the pointer has changed too, but the
+     original thd->db will then have been freed */
+  if (dbchanged)
+  {
+    if (! thd->killed)
+      ret= sp_change_db(thd, olddb, 0);
+  }
+  DBUG_RETURN(ret);
+}
+
+
+int
+sp_head::execute_function(THD *thd, Item **argp, uint argcount, Item **resp)
+{
+  DBUG_ENTER("sp_head::execute_function");
+  DBUG_PRINT("info", ("function %s", m_name.str));
+  uint csize = m_pcont->max_framesize();
+  uint params = m_pcont->params();
+  uint hmax = m_pcont->handlers();
+  uint cmax = m_pcont->cursors();
+  sp_rcontext *octx = thd->spcont;
+  sp_rcontext *nctx = NULL;
+  uint i;
+  int ret;
+
+  if (argcount != params)
+  {
+    // Need to use my_printf_error here, or it will not terminate the
+    // invoking query properly.
+    my_printf_error(ER_SP_WRONG_NO_OF_ARGS, ER(ER_SP_WRONG_NO_OF_ARGS), MYF(0),
+		    "FUNCTION", m_name.str, params, argcount);
+    DBUG_RETURN(-1);
+  }
+
+  // QQ Should have some error checking here? (types, etc...)
+  nctx= new sp_rcontext(csize, hmax, cmax);
+  for (i= 0 ; i < params && i < argcount ; i++)
+  {
+    sp_pvar_t *pvar = m_pcont->find_pvar(i);
+
+    nctx->push_item(sp_eval_func_item(thd, *argp++, pvar->type));
+  }
+  // Close tables opened for subselect in argument list
+  close_thread_tables(thd);
+
+  // The rest of the frame are local variables which are all IN.
+  // Default all variables to null (those with default clauses will
+  // be set by an set instruction).
+  {
+    Item_null *nit= NULL;	// Re-use this, and only create if needed
+    for (; i < csize ; i++)
+    {
+      if (! nit)
+	nit= new Item_null();
+      nctx->push_item(nit);
+    }
+  }
+  thd->spcont= nctx;
+
+  ret= execute(thd);
+  if (ret == 0)
+  {
+    Item *it= nctx->get_result();
+
+    if (it)
+      *resp= it;
+    else
+    {
+      my_printf_error(ER_SP_NORETURNEND, ER(ER_SP_NORETURNEND), MYF(0),
+		      m_name.str);
+      ret= -1;
+    }
+  }
+
+  nctx->pop_all_cursors();	// To avoid memory leaks after an error
+  thd->spcont= octx;
+  DBUG_RETURN(ret);
+}
+
+int
+sp_head::execute_procedure(THD *thd, List<Item> *args)
+{
+  DBUG_ENTER("sp_head::execute_procedure");
+  DBUG_PRINT("info", ("procedure %s", m_name.str));
+  int ret;
+  uint csize = m_pcont->max_framesize();
+  uint params = m_pcont->params();
+  uint hmax = m_pcont->handlers();
+  uint cmax = m_pcont->cursors();
+  sp_rcontext *octx = thd->spcont;
+  sp_rcontext *nctx = NULL;
+  my_bool tmp_octx = FALSE;	// True if we have allocated a temporary octx
+
+  if (args->elements != params)
+  {
+    net_printf(thd, ER_SP_WRONG_NO_OF_ARGS, "PROCEDURE", m_name.str,
+	       params, args->elements);
+    DBUG_RETURN(-1);
+  }
+
+  if (csize > 0 || hmax > 0 || cmax > 0)
+  {
+    Item_null *nit= NULL;	// Re-use this, and only create if needed
+    uint i;
+    List_iterator_fast<Item> li(*args);
+    Item *it;
+
+    nctx= new sp_rcontext(csize, hmax, cmax);
+    if (! octx)
+    {				// Create a temporary old context
+      octx= new sp_rcontext(csize, hmax, cmax);
+      tmp_octx= TRUE;
+    }
+    // QQ: Should do type checking?
+    for (i = 0 ; (it= li++) && i < params ; i++)
+    {
+      sp_pvar_t *pvar = m_pcont->find_pvar(i);
+
+      if (! pvar)
+	nctx->set_oindex(i, -1); // Shouldn't happen
+      else
+      {
+	if (pvar->mode == sp_param_out)
+	{
+	  if (! nit)
+	    nit= new Item_null();
+	  nctx->push_item(nit); // OUT
+	}
+	else
+	  nctx->push_item(sp_eval_func_item(thd, it,pvar->type)); // IN or INOUT
+	// Note: If it's OUT or INOUT, it must be a variable.
+	// QQ: We can check for global variables here, or should we do it
+	//     while parsing?
+	if (pvar->mode == sp_param_in)
+	  nctx->set_oindex(i, -1); // IN
+	else			// OUT or INOUT
+	  nctx->set_oindex(i, static_cast<Item_splocal *>(it)->get_offset());
+      }
+    }
+    // Close tables opened for subselect in argument list
+    close_thread_tables(thd);
+
+    // The rest of the frame are local variables which are all IN.
+    // Default all variables to null (those with default clauses will
+    // be set by an set instruction).
+    for (; i < csize ; i++)
+    {
+      if (! nit)
+	nit= new Item_null();
+      nctx->push_item(nit);
+    }
+    thd->spcont= nctx;
+  }
+
+  ret= execute(thd);
+
+  // Don't copy back OUT values if we got an error
+  if (ret)
+  {
+    if (thd->net.report_error)
+      send_error(thd, 0, NullS);
+  }
+  else if (csize > 0)
+  {
+    List_iterator_fast<Item> li(*args);
+    Item *it;
+
+    // Copy back all OUT or INOUT values to the previous frame, or
+    // set global user variables
+    for (uint i = 0 ; (it= li++) && i < params ; i++)
+    {
+      int oi = nctx->get_oindex(i);
+
+      if (oi >= 0)
+      {
+	if (! tmp_octx)
+	  octx->set_item(nctx->get_oindex(i), nctx->get_item(i));
+	else
+	{
+	  // QQ Currently we just silently ignore non-user-variable arguments.
+	  //    We should check this during parsing, when setting up the call
+	  //    above
+	  if (it->type() == Item::FUNC_ITEM)
+	  {
+	    Item_func *fi= static_cast<Item_func*>(it);
+
+	    if (fi->functype() == Item_func::GUSERVAR_FUNC)
+	    {			// A global user variable
+	      Item *item= nctx->get_item(i);
+	      Item_func_set_user_var *suv;
+	      Item_func_get_user_var *guv=
+		static_cast<Item_func_get_user_var*>(fi);
+
+	      suv= new Item_func_set_user_var(guv->get_name(), item);
+	      suv->fix_fields(thd, NULL, &item);
+	      suv->fix_length_and_dec();
+	      suv->check();
+	      suv->update();
+	    }
+	  }
+	}
+      }
+    }
+  }
+
+  if (tmp_octx)
+    octx= NULL;
+  if (nctx)
+    nctx->pop_all_cursors();	// To avoid memory leaks after an error
+  thd->spcont= octx;
+
+  DBUG_RETURN(ret);
+}
+
+
+// Reset lex during parsing, before we parse a sub statement.
+void
+sp_head::reset_lex(THD *thd)
+{
+  DBUG_ENTER("sp_head::reset_lex");
+  LEX *sublex;
+  LEX *oldlex= thd->lex;
+
+  (void)m_lex.push_front(oldlex);
+  thd->lex= sublex= new st_lex;
+  /* Reset most stuff. The length arguments doesn't matter here. */
+  lex_start(thd, oldlex->buf, oldlex->end_of_query - oldlex->ptr);
+  sublex->yylineno= oldlex->yylineno;
+  /* We must reset ptr and end_of_query again */
+  sublex->ptr= oldlex->ptr;
+  sublex->end_of_query= oldlex->end_of_query;
+  sublex->tok_start= oldlex->tok_start;
+  /* And keep the SP stuff too */
+  sublex->sphead= oldlex->sphead;
+  sublex->spcont= oldlex->spcont;
+  mysql_init_query(thd, true);	// Only init lex
+  sublex->sp_lex_in_use= FALSE;
+  DBUG_VOID_RETURN;
+}
+
+// Restore lex during parsing, after we have parsed a sub statement.
+void
+sp_head::restore_lex(THD *thd)
+{
+  DBUG_ENTER("sp_head::restore_lex");
+  LEX *sublex= thd->lex;
+  LEX *oldlex= (LEX *)m_lex.pop();
+  SELECT_LEX *sl;
+
+  if (! oldlex)
+    return;			// Nothing to restore
+
+  // Update some state in the old one first
+  oldlex->ptr= sublex->ptr;
+  oldlex->next_state= sublex->next_state;
+
+  // Collect some data from the sub statement lex.
+  sp_merge_funs(oldlex, sublex);
+#ifdef NOT_USED_NOW
+  // QQ We're not using this at the moment.
+  if (sublex.sql_command == SQLCOM_CALL)
+  {
+    // It would be slightly faster to keep the list sorted, but we need
+    // an "insert before" method to do that.
+    char *proc= sublex.udf.name.str;
+
+    List_iterator_fast<char *> li(m_calls);
+    char **it;
+
+    while ((it= li++))
+      if (my_strcasecmp(system_charset_info, proc, *it) == 0)
+	break;
+    if (! it)
+      m_calls.push_back(&proc);
+
+  }
+  // Merge used tables
+  // QQ ...or just open tables in thd->open_tables?
+  //    This is not entirerly clear at the moment, but for now, we collect
+  //    tables here.
+  for (sl= sublex.all_selects_list ;
+       sl ;
+       sl= sl->next_select())
+  {
+    for (TABLE_LIST *tables= sl->get_table_list() ;
+	 tables ;
+	 tables= tables->next)
+    {
+      List_iterator_fast<char *> li(m_tables);
+      char **tb;
+
+      while ((tb= li++))
+	if (my_strcasecmp(system_charset_info, tables->real_name, *tb) == 0)
+	  break;
+      if (! tb)
+	m_tables.push_back(&tables->real_name);
+    }
+  }
+#endif
+  if (! sublex->sp_lex_in_use)
+    delete sublex;
+  thd->lex= oldlex;
+  DBUG_VOID_RETURN;
+}
+
+void
+sp_head::push_backpatch(sp_instr *i, sp_label_t *lab)
+{
+  bp_t *bp= (bp_t *)sql_alloc(sizeof(bp_t));
+
+  if (bp)
+  {
+    bp->lab= lab;
+    bp->instr= i;
+    (void)m_backpatch.push_front(bp);
+  }
+}
+
+void
+sp_head::backpatch(sp_label_t *lab)
+{
+  bp_t *bp;
+  uint dest= instructions();
+  List_iterator_fast<bp_t> li(m_backpatch);
+
+  while ((bp= li++))
+    if (bp->lab == lab)
+    {
+      sp_instr_jump *i= static_cast<sp_instr_jump *>(bp->instr);
+
+      i->set_destination(dest);
+    }
+}
+
+void
+sp_head::set_info(char *definer, uint definerlen,
+		  longlong created, longlong modified,
+		  st_sp_chistics *chistics, ulong sql_mode)
+{
+  char *p= strchr(definer, '@');
+  uint len;
+
+  if (! p)
+    p= definer;		// Weird...
+  len= p-definer;
+  m_definer_user.str= strmake_root(&mem_root, definer, len);
+  m_definer_user.length= len;
+  len= definerlen-len-1;
+  m_definer_host.str= strmake_root(&mem_root, p+1, len);
+  m_definer_host.length= len;
+  m_created= created;
+  m_modified= modified;
+  m_chistics= (st_sp_chistics *)alloc_root(&mem_root, sizeof(st_sp_chistics));
+  memcpy(m_chistics, chistics, sizeof(st_sp_chistics));
+  if (m_chistics->comment.length == 0)
+    m_chistics->comment.str= 0;
+  else
+    m_chistics->comment.str= strmake_root(&mem_root,
+					  m_chistics->comment.str,
+					  m_chistics->comment.length);
+  m_sql_mode= sql_mode;
+}
+
+void
+sp_head::reset_thd_mem_root(THD *thd)
+{
+  DBUG_ENTER("sp_head::reset_thd_mem_root");
+  m_thd_root= thd->mem_root;
+  thd->mem_root= mem_root;
+  DBUG_PRINT("info", ("mem_root 0x%lx moved to thd mem root 0x%lx",
+                      (ulong) &mem_root, (ulong) &thd->mem_root));
+  free_list= thd->free_list; // Keep the old list
+  thd->free_list= NULL;	// Start a new one
+  /* Copy the db, since substatements will point to it */
+  m_thd_db= thd->db;
+  thd->db= strmake_root(&thd->mem_root, thd->db, thd->db_length);
+  m_thd= thd;
+  DBUG_VOID_RETURN;
+}
+
+void
+sp_head::restore_thd_mem_root(THD *thd)
+{
+  DBUG_ENTER("sp_head::restore_thd_mem_root");
+  Item *flist= free_list;	// The old list
+  set_item_arena(thd);          // Get new fre_list and mem_root
+  DBUG_PRINT("info", ("mem_root 0x%lx returned from thd mem root 0x%lx",
+                      (ulong) &mem_root, (ulong) &thd->mem_root));
+  thd->free_list= flist;	// Restore the old one
+  thd->db= m_thd_db;		// Restore the original db pointer
+  thd->mem_root= m_thd_root;
+  m_thd= NULL;
+  DBUG_VOID_RETURN;
+}
+
+
+int
+sp_head::show_create_procedure(THD *thd)
+{
+  Protocol *protocol= thd->protocol;
+  char buff[2048];
+  String buffer(buff, sizeof(buff), system_charset_info);
+  int res;
+  List<Item> field_list;
+  ulong old_sql_mode;
+  sys_var *sql_mode_var;
+  byte *sql_mode_str;
+  ulong sql_mode_len;
+
+  DBUG_ENTER("sp_head::show_create_procedure");
+  DBUG_PRINT("info", ("procedure %s", m_name.str));
+
+  old_sql_mode= thd->variables.sql_mode;
+  thd->variables.sql_mode= m_sql_mode;
+  sql_mode_var= find_sys_var("SQL_MODE", 8);
+  if (sql_mode_var)
+  {
+    sql_mode_str= sql_mode_var->value_ptr(thd, OPT_SESSION, 0);
+    sql_mode_len= strlen(sql_mode_str);
+  }
+
+  field_list.push_back(new Item_empty_string("Procedure", NAME_LEN));
+  if (sql_mode_var)
+    field_list.push_back(new Item_empty_string("sql_mode", sql_mode_len));
+  // 1024 is for not to confuse old clients
+  field_list.push_back(new Item_empty_string("Create Procedure",
+					     max(buffer.length(), 1024)));
+  if (protocol->send_fields(&field_list, 1))
+  {
+    res= 1;
+    goto done;
+  }
+  protocol->prepare_for_resend();
+  protocol->store(m_name.str, m_name.length, system_charset_info);
+  if (sql_mode_var)
+    protocol->store(sql_mode_str, sql_mode_len, system_charset_info);
+  protocol->store(m_defstr.str, m_defstr.length, system_charset_info);
+  res= protocol->write();
+  send_eof(thd);
+
+ done:
+  thd->variables.sql_mode= old_sql_mode;
+  DBUG_RETURN(res);
+}
+
+
+/*
+  Add instruction to SP
+
+  SYNOPSIS
+    sp_head::add_instr()
+    instr   Instruction
+*/
+
+void sp_head::add_instr(sp_instr *instr)
+{
+  instr->free_list= m_thd->free_list;
+  m_thd->free_list= 0;
+  insert_dynamic(&m_instr, (gptr)&instr);
+}
+
+
+int
+sp_head::show_create_function(THD *thd)
+{
+  Protocol *protocol= thd->protocol;
+  char buff[2048];
+  String buffer(buff, sizeof(buff), system_charset_info);
+  int res;
+  List<Item> field_list;
+  ulong old_sql_mode;
+  sys_var *sql_mode_var;
+  byte *sql_mode_str;
+  ulong sql_mode_len;
+
+  DBUG_ENTER("sp_head::show_create_function");
+  DBUG_PRINT("info", ("procedure %s", m_name.str));
+
+  old_sql_mode= thd->variables.sql_mode;
+  thd->variables.sql_mode= m_sql_mode;
+  sql_mode_var= find_sys_var("SQL_MODE", 8);
+  if (sql_mode_var)
+  {
+    sql_mode_str= sql_mode_var->value_ptr(thd, OPT_SESSION, 0);
+    sql_mode_len= strlen(sql_mode_str);
+  }
+
+  field_list.push_back(new Item_empty_string("Function",NAME_LEN));
+  if (sql_mode_var)
+    field_list.push_back(new Item_empty_string("sql_mode", sql_mode_len));
+  field_list.push_back(new Item_empty_string("Create Function",
+					     max(buffer.length(),1024)));
+  if (protocol->send_fields(&field_list, 1))
+  {
+    res= 1;
+    goto done;
+  }
+  protocol->prepare_for_resend();
+  protocol->store(m_name.str, m_name.length, system_charset_info);
+  if (sql_mode_var)
+    protocol->store(sql_mode_str, sql_mode_len, system_charset_info);
+  protocol->store(m_defstr.str, m_defstr.length, system_charset_info);
+  res= protocol->write();
+  send_eof(thd);
+
+ done:
+  thd->variables.sql_mode= old_sql_mode;
+  DBUG_RETURN(res);
+}
+// ------------------------------------------------------------------
+
+//
+// sp_instr_stmt
+//
+sp_instr_stmt::~sp_instr_stmt()
+{
+  if (m_lex)
+    delete m_lex;
+}
+
+int
+sp_instr_stmt::execute(THD *thd, uint *nextp)
+{
+  DBUG_ENTER("sp_instr_stmt::execute");
+  DBUG_PRINT("info", ("command: %d", m_lex->sql_command));
+  int res= exec_stmt(thd, m_lex);
+  *nextp = m_ip+1;
+  DBUG_RETURN(res);
+}
+
+void
+sp_instr_stmt::print(String *str)
+{
+  str->reserve(12);
+  str->append("stmt ");
+  str->qs_append((uint)m_lex->sql_command);
+}
+
+
+int
+sp_instr_stmt::exec_stmt(THD *thd, LEX *lex)
+{
+  LEX *olex;			// The other lex
+  SELECT_LEX *sl;
+  int res;
+
+  olex= thd->lex;		// Save the other lex
+  thd->lex= lex;		// Use my own lex
+  thd->lex->thd = thd;		// QQ Not reentrant!
+  thd->lex->unit.thd= thd;	// QQ Not reentrant
+  thd->free_list= NULL;
+
+  VOID(pthread_mutex_lock(&LOCK_thread_count));
+  thd->query_id= query_id++;
+  VOID(pthread_mutex_unlock(&LOCK_thread_count));
+
+  reset_stmt_for_execute(thd, lex);
+
+  res= mysql_execute_command(thd);
+
+  lex->unit.cleanup();
+  if (thd->lock || thd->open_tables || thd->derived_tables)
+  {
+    thd->proc_info="closing tables";
+    close_thread_tables(thd);			/* Free tables */
+  }
+
+  thd->lex= olex;		// Restore the other lex
+
+  return res;
+}
+
+//
+// sp_instr_set
+//
+int
+sp_instr_set::execute(THD *thd, uint *nextp)
+{
+  DBUG_ENTER("sp_instr_set::execute");
+  DBUG_PRINT("info", ("offset: %u", m_offset));
+  thd->spcont->set_item(m_offset, sp_eval_func_item(thd, m_value, m_type));
+  *nextp = m_ip+1;
+  DBUG_RETURN(0);
+}
+
+void
+sp_instr_set::print(String *str)
+{
+  str->reserve(12);
+  str->append("set ");
+  str->qs_append(m_offset);
+  str->append(' ');
+  m_value->print(str);
+}
+
+//
+// sp_instr_jump
+//
+int
+sp_instr_jump::execute(THD *thd, uint *nextp)
+{
+  DBUG_ENTER("sp_instr_jump::execute");
+  DBUG_PRINT("info", ("destination: %u", m_dest));
+
+  *nextp= m_dest;
+  DBUG_RETURN(0);
+}
+
+void
+sp_instr_jump::print(String *str)
+{
+  str->reserve(12);
+  str->append("jump ");
+  str->qs_append(m_dest);
+}
+
+//
+// sp_instr_jump_if
+//
+int
+sp_instr_jump_if::execute(THD *thd, uint *nextp)
+{
+  DBUG_ENTER("sp_instr_jump_if::execute");
+  DBUG_PRINT("info", ("destination: %u", m_dest));
+  Item *it= sp_eval_func_item(thd, m_expr, MYSQL_TYPE_TINY);
+
+  if (it->val_int())
+    *nextp = m_dest;
+  else
+    *nextp = m_ip+1;
+  DBUG_RETURN(0);
+}
+
+void
+sp_instr_jump_if::print(String *str)
+{
+  str->reserve(12);
+  str->append("jump_if ");
+  str->qs_append(m_dest);
+  str->append(' ');
+  m_expr->print(str);
+}
+
+//
+// sp_instr_jump_if_not
+//
+int
+sp_instr_jump_if_not::execute(THD *thd, uint *nextp)
+{
+  DBUG_ENTER("sp_instr_jump_if_not::execute");
+  DBUG_PRINT("info", ("destination: %u", m_dest));
+  Item *it= sp_eval_func_item(thd, m_expr, MYSQL_TYPE_TINY);
+
+  if (! it->val_int())
+    *nextp = m_dest;
+  else
+    *nextp = m_ip+1;
+  DBUG_RETURN(0);
+}
+
+void
+sp_instr_jump_if_not::print(String *str)
+{
+  str->reserve(16);
+  str->append("jump_if_not ");
+  str->qs_append(m_dest);
+  str->append(' ');
+  m_expr->print(str);
+}
+
+//
+// sp_instr_freturn
+//
+int
+sp_instr_freturn::execute(THD *thd, uint *nextp)
+{
+  DBUG_ENTER("sp_instr_freturn::execute");
+  thd->spcont->set_result(sp_eval_func_item(thd, m_value, m_type));
+  *nextp= UINT_MAX;
+  DBUG_RETURN(0);
+}
+
+void
+sp_instr_freturn::print(String *str)
+{
+  str->reserve(12);
+  str->append("freturn ");
+  str->qs_append((uint)m_type);
+  str->append(' ');
+  m_value->print(str);
+}
+
+//
+// sp_instr_hpush_jump
+//
+int
+sp_instr_hpush_jump::execute(THD *thd, uint *nextp)
+{
+  DBUG_ENTER("sp_instr_hpush_jump::execute");
+  List_iterator_fast<sp_cond_type_t> li(m_cond);
+  sp_cond_type_t *p;
+
+  while ((p= li++))
+    thd->spcont->push_handler(p, m_handler, m_type, m_frame);
+
+  *nextp= m_dest;
+  DBUG_RETURN(0);
+}
+
+void
+sp_instr_hpush_jump::print(String *str)
+{
+  str->reserve(32);
+  str->append("hpush_jump ");
+  str->qs_append(m_dest);
+  str->append(" t=");
+  str->qs_append(m_type);
+  str->append(" f=");
+  str->qs_append(m_frame);
+  str->append(" h=");
+  str->qs_append(m_handler);
+}
+
+//
+// sp_instr_hpop
+//
+int
+sp_instr_hpop::execute(THD *thd, uint *nextp)
+{
+  DBUG_ENTER("sp_instr_hpop::execute");
+  thd->spcont->pop_handlers(m_count);
+  *nextp= m_ip+1;
+  DBUG_RETURN(0);
+}
+
+void
+sp_instr_hpop::print(String *str)
+{
+  str->reserve(12);
+  str->append("hpop ");
+  str->qs_append(m_count);
+}
+
+//
+// sp_instr_hreturn
+//
+int
+sp_instr_hreturn::execute(THD *thd, uint *nextp)
+{
+  DBUG_ENTER("sp_instr_hreturn::execute");
+  thd->spcont->restore_variables(m_frame);
+  *nextp= thd->spcont->pop_hstack();
+  DBUG_RETURN(0);
+}
+
+void
+sp_instr_hreturn::print(String *str)
+{
+  str->reserve(12);
+  str->append("hreturn ");
+  str->qs_append(m_frame);
+}
+
+//
+// sp_instr_cpush
+//
+int
+sp_instr_cpush::execute(THD *thd, uint *nextp)
+{
+  DBUG_ENTER("sp_instr_cpush::execute");
+  thd->spcont->push_cursor(m_lex);
+  *nextp= m_ip+1;
+  DBUG_RETURN(0);
+}
+
+sp_instr_cpush::~sp_instr_cpush()
+{
+  if (m_lex)
+    delete m_lex;
+}
+
+void
+sp_instr_cpush::print(String *str)
+{
+  str->append("cpush");
+}
+
+//
+// sp_instr_cpop
+//
+int
+sp_instr_cpop::execute(THD *thd, uint *nextp)
+{
+  DBUG_ENTER("sp_instr_cpop::execute");
+  thd->spcont->pop_cursors(m_count);
+  *nextp= m_ip+1;
+  DBUG_RETURN(0);
+}
+
+void
+sp_instr_cpop::print(String *str)
+{
+  str->reserve(12);
+  str->append("cpop ");
+  str->qs_append(m_count);
+}
+
+//
+// sp_instr_copen
+//
+int
+sp_instr_copen::execute(THD *thd, uint *nextp)
+{
+  sp_cursor *c= thd->spcont->get_cursor(m_cursor);
+  int res;
+  DBUG_ENTER("sp_instr_copen::execute");
+
+  if (! c)
+    res= -1;
+  else
+  {
+    LEX *lex= c->pre_open(thd);
+
+    if (! lex)
+      res= -1;
+    else
+      res= exec_stmt(thd, lex);
+    c->post_open(thd, (lex ? TRUE : FALSE));
+  }
+
+  *nextp= m_ip+1;
+  DBUG_RETURN(res);
+}
+
+void
+sp_instr_copen::print(String *str)
+{
+  str->reserve(12);
+  str->append("copen ");
+  str->qs_append(m_cursor);
+}
+
+//
+// sp_instr_cclose
+//
+int
+sp_instr_cclose::execute(THD *thd, uint *nextp)
+{
+  sp_cursor *c= thd->spcont->get_cursor(m_cursor);
+  int res;
+  DBUG_ENTER("sp_instr_cclose::execute");
+
+  if (! c)
+    res= -1;
+  else
+    res= c->close(thd);
+  *nextp= m_ip+1;
+  DBUG_RETURN(res);
+}
+
+void
+sp_instr_cclose::print(String *str)
+{
+  str->reserve(12);
+  str->append("cclose ");
+  str->qs_append(m_cursor);
+}
+
+//
+// sp_instr_cfetch
+//
+int
+sp_instr_cfetch::execute(THD *thd, uint *nextp)
+{
+  sp_cursor *c= thd->spcont->get_cursor(m_cursor);
+  int res;
+  DBUG_ENTER("sp_instr_cfetch::execute");
+
+  if (! c)
+    res= -1;
+  else
+    res= c->fetch(thd, &m_varlist);
+  *nextp= m_ip+1;
+  DBUG_RETURN(res);
+}
+
+void
+sp_instr_cfetch::print(String *str)
+{
+  List_iterator_fast<struct sp_pvar> li(m_varlist);
+  sp_pvar_t *pv;
+
+  str->reserve(12);
+  str->append("cfetch ");
+  str->qs_append(m_cursor);
+  while ((pv= li++))
+  {
+    str->reserve(8);
+    str->append(' ');
+    str->qs_append(pv->offset);
+  }
+}
+
+//
+// sp_instr_error
+//
+int
+sp_instr_error::execute(THD *thd, uint *nextp)
+{
+  DBUG_ENTER("sp_instr_error::execute");
+
+  my_error(m_errcode, MYF(0));
+  *nextp= m_ip+1;
+  DBUG_RETURN(-1);
+}
+
+void
+sp_instr_error::print(String *str)
+{
+  str->reserve(12);
+  str->append("error ");
+  str->qs_append(m_errcode);
+}
+
+/* ------------------------------------------------------------------ */
+
+
+//
+// Security context swapping
+//
+
+#ifndef NO_EMBEDDED_ACCESS_CHECKS
+void
+sp_change_security_context(THD *thd, sp_head *sp, st_sp_security_context *ctxp)
+{
+  ctxp->changed= (sp->m_chistics->suid != IS_NOT_SUID &&
+		   (strcmp(sp->m_definer_user.str, thd->priv_user) ||
+		    strcmp(sp->m_definer_host.str, thd->priv_host)));
+
+  if (ctxp->changed)
+  {
+    ctxp->master_access= thd->master_access;
+    ctxp->db_access= thd->db_access;
+    ctxp->priv_user= thd->priv_user;
+    strncpy(ctxp->priv_host, thd->priv_host, sizeof(ctxp->priv_host));
+    ctxp->user= thd->user;
+    ctxp->host= thd->host;
+    ctxp->ip= thd->ip;
+
+    /* Change thise just to do the acl_getroot_no_password */
+    thd->user= sp->m_definer_user.str;
+    thd->host= thd->ip = sp->m_definer_host.str;
+
+    if (acl_getroot_no_password(thd))
+    {			// Failed, run as invoker for now
+      ctxp->changed= FALSE;
+      thd->master_access= ctxp->master_access;
+      thd->db_access= ctxp->db_access;
+      thd->priv_user= ctxp->priv_user;
+      strncpy(thd->priv_host, ctxp->priv_host, sizeof(thd->priv_host));
+    }
+
+    /* Restore these immiediately */
+    thd->user= ctxp->user;
+    thd->host= ctxp->host;
+    thd->ip= ctxp->ip;
+  }
+}
+
+void
+sp_restore_security_context(THD *thd, sp_head *sp, st_sp_security_context *ctxp)
+{
+  if (ctxp->changed)
+  {
+    ctxp->changed= FALSE;
+    thd->master_access= ctxp->master_access;
+    thd->db_access= ctxp->db_access;
+    thd->priv_user= ctxp->priv_user;
+    strncpy(thd->priv_host, ctxp->priv_host, sizeof(thd->priv_host));
+  }
+}
+
+#endif /* NO_EMBEDDED_ACCESS_CHECKS */
diff --git a/sql/sp_head.h b/sql/sp_head.h
new file mode 100644
index 00000000000..fd6ecfd7320
--- /dev/null
+++ b/sql/sp_head.h
@@ -0,0 +1,729 @@
+/* -*- C++ -*- */
+/* Copyright (C) 2002 MySQL AB
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
+
+#ifndef _SP_HEAD_H_
+#define _SP_HEAD_H_
+
+#ifdef __GNUC__
+#pragma interface			/* gcc class implementation */
+#endif
+
+#include <stddef.h>
+
+// Values for the type enum. This reflects the order of the enum declaration
+// in the CREATE TABLE command.
+#define TYPE_ENUM_FUNCTION  1
+#define TYPE_ENUM_PROCEDURE 2
+
+Item_result
+sp_map_result_type(enum enum_field_types type);
+
+struct sp_label;
+class sp_instr;
+struct sp_cond_type;
+struct sp_pvar;
+
+class sp_name : public Sql_alloc
+{
+public:
+
+  LEX_STRING m_db;
+  LEX_STRING m_name;
+  LEX_STRING m_qname;
+
+  sp_name(LEX_STRING name)
+    : m_name(name)
+  {
+    m_db.str= m_qname.str= 0;
+    m_db.length= m_qname.length= 0;
+  }
+
+  sp_name(LEX_STRING db, LEX_STRING name)
+    : m_db(db), m_name(name)
+  {
+    m_qname.str= 0;
+    m_qname.length= 0;
+  }
+
+  // Init. the qualified name from the db and name.
+  void init_qname(THD *thd);	// thd for memroot allocation
+
+  ~sp_name()
+  {}
+};
+
+sp_name *
+sp_name_current_db_new(THD *thd, LEX_STRING name);
+
+
+class sp_head :private Item_arena
+{
+  sp_head(const sp_head &);	/* Prevent use of these */
+  void operator=(sp_head &);
+
+public:
+
+  int m_type;			// TYPE_ENUM_FUNCTION or TYPE_ENUM_PROCEDURE
+  enum enum_field_types m_returns; // For FUNCTIONs only
+  CHARSET_INFO *m_returns_cs;	// For FUNCTIONs only
+  my_bool m_has_return;		// For FUNCTIONs only
+  my_bool m_simple_case;	// TRUE if parsing simple case, FALSE otherwise
+  my_bool m_multi_results;	// TRUE if a procedure with SELECT(s)
+  uint m_old_cmq;		// Old CLIENT_MULTI_QUERIES value
+  st_sp_chistics *m_chistics;
+  ulong m_sql_mode;		// For SHOW CREATE
+#if NOT_USED_NOW
+  // QQ We're not using this at the moment.
+  List<char *> m_calls;		// Called procedures.
+  List<char *> m_tables;	// Used tables.
+#endif
+  LEX_STRING m_qname;		// db.name
+  LEX_STRING m_db;
+  LEX_STRING m_name;
+  LEX_STRING m_params;
+  LEX_STRING m_retstr;		// For FUNCTIONs only
+  LEX_STRING m_body;
+  LEX_STRING m_defstr;
+  LEX_STRING m_definer_user;
+  LEX_STRING m_definer_host;
+  longlong m_created;
+  longlong m_modified;
+  // Pointers set during parsing
+  uchar *m_param_begin, *m_param_end, *m_returns_begin, *m_returns_end,
+    *m_body_begin;
+
+  static void *
+  operator new(size_t size);
+
+  static void
+  operator delete(void *ptr, size_t size);
+
+  sp_head();
+
+  // Initialize after we have reset mem_root
+  void
+  init(LEX *lex);
+
+  // Initialize strings after parsing header
+  void
+  init_strings(THD *thd, LEX *lex, sp_name *name);
+
+  int
+  create(THD *thd);
+
+  virtual ~sp_head();
+
+  // Free memory
+  void
+  destroy();
+
+  int
+  execute_function(THD *thd, Item **args, uint argcount, Item **resp);
+
+  int
+  execute_procedure(THD *thd, List<Item> *args);
+
+  int
+  show_create_procedure(THD *thd);
+
+  int
+  show_create_function(THD *thd);
+
+  void
+  add_instr(sp_instr *instr);
+
+  inline uint
+  instructions()
+  {
+    return m_instr.elements;
+  }
+
+  inline sp_instr *
+  last_instruction()
+  {
+    sp_instr *i;
+
+    get_dynamic(&m_instr, (gptr)&i, m_instr.elements-1);
+    return i;
+  }
+
+  // Resets lex in 'thd' and keeps a copy of the old one.
+  void
+  reset_lex(THD *thd);
+
+  // Restores lex in 'thd' from our copy, but keeps some status from the
+  // one in 'thd', like ptr, tables, fields, etc.
+  void
+  restore_lex(THD *thd);
+
+  // Put the instruction on the backpatch list, associated with the label.
+  void
+  push_backpatch(sp_instr *, struct sp_label *);
+
+  // Update all instruction with this label in the backpatch list to
+  // the current position.
+  void
+  backpatch(struct sp_label *);
+
+  char *name(uint *lenp = 0) const
+  {
+    if (lenp)
+      *lenp= m_name.length;
+    return m_name.str;
+  }
+
+  char *create_string(THD *thd, ulong *lenp);
+
+  inline Item_result result()
+  {
+    return sp_map_result_type(m_returns);
+  }
+
+  void set_info(char *definer, uint definerlen,
+		longlong created, longlong modified,
+		st_sp_chistics *chistics, ulong sql_mode);
+
+  void reset_thd_mem_root(THD *thd);
+
+  void restore_thd_mem_root(THD *thd);
+
+
+private:
+
+  MEM_ROOT m_thd_root;		// Temp. store for thd's mem_root
+  THD *m_thd;			// Set if we have reset mem_root
+  char *m_thd_db;		// Original thd->db pointer
+
+  sp_pcontext *m_pcont;		// Parse context
+  List<LEX> m_lex;		// Temp. store for the other lex
+  DYNAMIC_ARRAY m_instr;	// The "instructions"
+  typedef struct
+  {
+    struct sp_label *lab;
+    sp_instr *instr;
+  } bp_t;
+  List<bp_t> m_backpatch;	// Instructions needing backpatching
+
+  inline sp_instr *
+  get_instr(uint i)
+  {
+    sp_instr *ip;
+
+    if (i < m_instr.elements)
+      get_dynamic(&m_instr, (gptr)&ip, i);
+    else
+      ip= NULL;
+    return ip;
+  }
+
+  int
+  execute(THD *thd);
+
+}; // class sp_head : public Sql_alloc
+
+
+//
+// "Instructions"...
+//
+
+class sp_instr : public Sql_alloc
+{
+  sp_instr(const sp_instr &);	/* Prevent use of these */
+  void operator=(sp_instr &);
+
+public:
+
+  Item *free_list;              // My Items
+
+  // Should give each a name or type code for debugging purposes?
+  sp_instr(uint ip)
+    :Sql_alloc(), free_list(0), m_ip(ip)
+  {}
+
+  virtual ~sp_instr()
+  { free_items(free_list); }
+
+  // Execute this instrution. '*nextp' will be set to the index of the next
+  // instruction to execute. (For most instruction this will be the
+  // instruction following this one.)
+  // Returns 0 on success, non-zero if some error occured.
+  virtual int execute(THD *thd, uint *nextp) = 0;
+
+  virtual void print(String *str) = 0;
+
+protected:
+
+  uint m_ip;			// My index
+
+}; // class sp_instr : public Sql_alloc
+
+
+//
+// Call out to some prepared SQL statement.
+//
+class sp_instr_stmt : public sp_instr
+{
+  sp_instr_stmt(const sp_instr_stmt &);	/* Prevent use of these */
+  void operator=(sp_instr_stmt &);
+
+public:
+
+  sp_instr_stmt(uint ip)
+    : sp_instr(ip), m_lex(NULL)
+  {}
+
+  virtual ~sp_instr_stmt();
+
+  virtual int execute(THD *thd, uint *nextp);
+
+  virtual void print(String *str);
+
+  inline void
+  set_lex(LEX *lex)
+  {
+    m_lex= lex;
+  }
+
+  inline LEX *
+  get_lex()
+  {
+    return m_lex;
+  }
+
+protected:
+
+  int exec_stmt(THD *thd, LEX *lex); // Execute a statement
+
+private:
+
+  LEX *m_lex;			// My own lex
+
+}; // class sp_instr_stmt : public sp_instr
+
+
+class sp_instr_set : public sp_instr
+{
+  sp_instr_set(const sp_instr_set &);	/* Prevent use of these */
+  void operator=(sp_instr_set &);
+
+public:
+
+  sp_instr_set(uint ip, uint offset, Item *val, enum enum_field_types type)
+    : sp_instr(ip), m_offset(offset), m_value(val), m_type(type)
+  {}
+
+  virtual ~sp_instr_set()
+  {}
+
+  virtual int execute(THD *thd, uint *nextp);
+
+  virtual void print(String *str);
+
+private:
+
+  uint m_offset;		// Frame offset
+  Item *m_value;
+  enum enum_field_types m_type;	// The declared type
+
+}; // class sp_instr_set : public sp_instr
+
+
+class sp_instr_jump : public sp_instr
+{
+  sp_instr_jump(const sp_instr_jump &);	/* Prevent use of these */
+  void operator=(sp_instr_jump &);
+
+public:
+
+  sp_instr_jump(uint ip)
+    : sp_instr(ip), m_dest(0)
+  {}
+
+  sp_instr_jump(uint ip, uint dest)
+    : sp_instr(ip), m_dest(dest)
+  {}
+
+  virtual ~sp_instr_jump()
+  {}
+
+  virtual int execute(THD *thd, uint *nextp);
+
+  virtual void print(String *str);
+
+  virtual void
+  set_destination(uint dest)
+  {
+    if (m_dest == 0)		// Don't reset
+      m_dest= dest;
+  }
+
+protected:
+
+  int m_dest;			// Where we will go
+
+}; // class sp_instr_jump : public sp_instr
+
+
+class sp_instr_jump_if : public sp_instr_jump
+{
+  sp_instr_jump_if(const sp_instr_jump_if &); /* Prevent use of these */
+  void operator=(sp_instr_jump_if &);
+
+public:
+
+  sp_instr_jump_if(uint ip, Item *i)
+    : sp_instr_jump(ip), m_expr(i)
+  {}
+
+  sp_instr_jump_if(uint ip, Item *i, uint dest)
+    : sp_instr_jump(ip, dest), m_expr(i)
+  {}
+
+  virtual ~sp_instr_jump_if()
+  {}
+
+  virtual int execute(THD *thd, uint *nextp);
+
+  virtual void print(String *str);
+
+private:
+
+  Item *m_expr;			// The condition
+
+}; // class sp_instr_jump_if : public sp_instr_jump
+
+
+class sp_instr_jump_if_not : public sp_instr_jump
+{
+  sp_instr_jump_if_not(const sp_instr_jump_if_not &); /* Prevent use of these */
+  void operator=(sp_instr_jump_if_not &);
+
+public:
+
+  sp_instr_jump_if_not(uint ip, Item *i)
+    : sp_instr_jump(ip), m_expr(i)
+  {}
+
+  sp_instr_jump_if_not(uint ip, Item *i, uint dest)
+    : sp_instr_jump(ip, dest), m_expr(i)
+  {}
+
+  virtual ~sp_instr_jump_if_not()
+  {}
+
+  virtual int execute(THD *thd, uint *nextp);
+
+  virtual void print(String *str);
+
+private:
+
+  Item *m_expr;			// The condition
+
+}; // class sp_instr_jump_if_not : public sp_instr_jump
+
+
+class sp_instr_freturn : public sp_instr
+{
+  sp_instr_freturn(const sp_instr_freturn &);	/* Prevent use of these */
+  void operator=(sp_instr_freturn &);
+
+public:
+
+  sp_instr_freturn(uint ip, Item *val, enum enum_field_types type)
+    : sp_instr(ip), m_value(val), m_type(type)
+  {}
+
+  virtual ~sp_instr_freturn()
+  {}
+
+  virtual int execute(THD *thd, uint *nextp);
+
+  virtual void print(String *str);
+
+protected:
+
+  Item *m_value;
+  enum enum_field_types m_type;
+
+}; // class sp_instr_freturn : public sp_instr
+
+
+class sp_instr_hpush_jump : public sp_instr_jump
+{
+  sp_instr_hpush_jump(const sp_instr_hpush_jump &); /* Prevent use of these */
+  void operator=(sp_instr_hpush_jump &);
+
+public:
+
+  sp_instr_hpush_jump(uint ip, int htype, uint fp)
+    : sp_instr_jump(ip), m_type(htype), m_frame(fp)
+  {
+    m_handler= ip+1;
+    m_cond.empty();
+  }
+
+  virtual ~sp_instr_hpush_jump()
+  {
+    m_cond.empty();
+  }
+
+  virtual int execute(THD *thd, uint *nextp);
+
+  virtual void print(String *str);
+
+  inline void add_condition(struct sp_cond_type *cond)
+  {
+    m_cond.push_front(cond);
+  }
+
+private:
+
+  int m_type;			// Handler type
+  uint m_frame;
+  uint m_handler;		// Location of handler
+  List<struct sp_cond_type> m_cond;
+
+}; // class sp_instr_hpush_jump : public sp_instr_jump
+
+
+class sp_instr_hpop : public sp_instr
+{
+  sp_instr_hpop(const sp_instr_hpop &);	/* Prevent use of these */
+  void operator=(sp_instr_hpop &);
+
+public:
+
+  sp_instr_hpop(uint ip, uint count)
+    : sp_instr(ip), m_count(count)
+  {}
+
+  virtual ~sp_instr_hpop()
+  {}
+
+  virtual int execute(THD *thd, uint *nextp);
+
+  virtual void print(String *str);
+
+private:
+
+  uint m_count;
+
+}; // class sp_instr_hpop : public sp_instr
+
+
+class sp_instr_hreturn : public sp_instr
+{
+  sp_instr_hreturn(const sp_instr_hreturn &);	/* Prevent use of these */
+  void operator=(sp_instr_hreturn &);
+
+public:
+
+  sp_instr_hreturn(uint ip, uint fp)
+    : sp_instr(ip), m_frame(fp)
+  {}
+
+  virtual ~sp_instr_hreturn()
+  {}
+
+  virtual int execute(THD *thd, uint *nextp);
+
+  virtual void print(String *str);
+
+private:
+
+  uint m_frame;
+
+}; // class sp_instr_hreturn : public sp_instr
+
+
+class sp_instr_cpush : public sp_instr
+{
+  sp_instr_cpush(const sp_instr_cpush &); /* Prevent use of these */
+  void operator=(sp_instr_cpush &);
+
+public:
+
+  sp_instr_cpush(uint ip, LEX *lex)
+    : sp_instr(ip), m_lex(lex)
+  {}
+
+  virtual ~sp_instr_cpush();
+
+  virtual int execute(THD *thd, uint *nextp);
+
+  virtual void print(String *str);
+
+private:
+
+  LEX *m_lex;
+
+}; // class sp_instr_cpush : public sp_instr
+
+
+class sp_instr_cpop : public sp_instr
+{
+  sp_instr_cpop(const sp_instr_cpop &); /* Prevent use of these */
+  void operator=(sp_instr_cpop &);
+
+public:
+
+  sp_instr_cpop(uint ip, uint count)
+    : sp_instr(ip), m_count(count)
+  {}
+
+  virtual ~sp_instr_cpop()
+  {}
+
+  virtual int execute(THD *thd, uint *nextp);
+
+  virtual void print(String *str);
+
+private:
+
+  uint m_count;
+
+}; // class sp_instr_cpop : public sp_instr
+
+
+class sp_instr_copen : public sp_instr_stmt
+{
+  sp_instr_copen(const sp_instr_copen &); /* Prevent use of these */
+  void operator=(sp_instr_copen &);
+
+public:
+
+  sp_instr_copen(uint ip, uint c)
+    : sp_instr_stmt(ip), m_cursor(c)
+  {}
+
+  virtual ~sp_instr_copen()
+  {}
+
+  virtual int execute(THD *thd, uint *nextp);
+
+  virtual void print(String *str);
+
+private:
+
+  uint m_cursor;		// Stack index
+
+}; // class sp_instr_copen : public sp_instr_stmt
+
+
+class sp_instr_cclose : public sp_instr
+{
+  sp_instr_cclose(const sp_instr_cclose &); /* Prevent use of these */
+  void operator=(sp_instr_cclose &);
+
+public:
+
+  sp_instr_cclose(uint ip, uint c)
+    : sp_instr(ip), m_cursor(c)
+  {}
+
+  virtual ~sp_instr_cclose()
+  {}
+
+  virtual int execute(THD *thd, uint *nextp);
+
+  virtual void print(String *str);
+
+private:
+
+  uint m_cursor;
+
+}; // class sp_instr_cclose : public sp_instr
+
+
+class sp_instr_cfetch : public sp_instr
+{
+  sp_instr_cfetch(const sp_instr_cfetch &); /* Prevent use of these */
+  void operator=(sp_instr_cfetch &);
+
+public:
+
+  sp_instr_cfetch(uint ip, uint c)
+    : sp_instr(ip), m_cursor(c)
+  {
+    m_varlist.empty();
+  }
+
+  virtual ~sp_instr_cfetch()
+  {}
+
+  virtual int execute(THD *thd, uint *nextp);
+
+  virtual void print(String *str);
+
+  void add_to_varlist(struct sp_pvar *var)
+  {
+    m_varlist.push_back(var);
+  }
+
+private:
+
+  uint m_cursor;
+  List<struct sp_pvar> m_varlist;
+
+}; // class sp_instr_cfetch : public sp_instr
+
+
+class sp_instr_error : public sp_instr
+{
+  sp_instr_error(const sp_instr_error &); /* Prevent use of these */
+  void operator=(sp_instr_error &);
+
+public:
+
+  sp_instr_error(uint ip, int errcode)
+    : sp_instr(ip), m_errcode(errcode)
+  {}
+
+  virtual ~sp_instr_error()
+  {}
+
+  virtual int execute(THD *thd, uint *nextp);
+
+  virtual void print(String *str);
+
+private:
+
+  int m_errcode;
+
+}; // class sp_instr_error : public sp_instr
+
+
+struct st_sp_security_context
+{
+  bool changed;
+  uint master_access;
+  uint db_access;
+  char *priv_user;
+  char priv_host[MAX_HOSTNAME];
+  char *user;
+  char *host;
+  char *ip;
+};
+
+#ifndef NO_EMBEDDED_ACCESS_CHECKS
+void
+sp_change_security_context(THD *thd, sp_head *sp, st_sp_security_context *ctxp);
+void
+sp_restore_security_context(THD *thd, sp_head *sp,st_sp_security_context *ctxp);
+#endif /* NO_EMBEDDED_ACCESS_CHECKS */
+
+#endif /* _SP_HEAD_H_ */
diff --git a/sql/sp_pcontext.cc b/sql/sp_pcontext.cc
new file mode 100644
index 00000000000..03333a2da72
--- /dev/null
+++ b/sql/sp_pcontext.cc
@@ -0,0 +1,247 @@
+/* Copyright (C) 2002 MySQL AB
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
+
+#ifdef __GNUC__
+#pragma implementation
+#endif
+
+#if defined(WIN32) || defined(__WIN__)
+#undef SAFEMALLOC				/* Problems with threads */
+#endif
+
+#include "mysql_priv.h"
+#include "sp_pcontext.h"
+#include "sp_head.h"
+
+sp_pcontext::sp_pcontext()
+  : Sql_alloc(), m_params(0), m_framesize(0), m_handlers(0), m_cursmax(0)
+{
+  VOID(my_init_dynamic_array(&m_pvar, sizeof(sp_pvar_t *), 16, 8));
+  VOID(my_init_dynamic_array(&m_cond, sizeof(sp_cond_type_t *), 16, 8));
+  VOID(my_init_dynamic_array(&m_cursor, sizeof(LEX_STRING), 16, 8));
+  VOID(my_init_dynamic_array(&m_scopes, sizeof(sp_scope_t), 16, 8));
+  m_label.empty();
+}
+
+void
+sp_pcontext::destroy()
+{
+  delete_dynamic(&m_pvar);
+  delete_dynamic(&m_cond);
+  delete_dynamic(&m_cursor);
+  delete_dynamic(&m_scopes);
+  m_label.empty();
+}
+
+void
+sp_pcontext::push_scope()
+{
+  sp_scope_t s;
+
+  s.vars= m_pvar.elements;
+  s.conds= m_cond.elements;
+  s.curs= m_cursor.elements;
+  insert_dynamic(&m_scopes, (gptr)&s);
+}
+
+void
+sp_pcontext::pop_scope()
+{
+  (void)pop_dynamic(&m_scopes);
+}
+
+
+/* This does a linear search (from newer to older variables, in case
+** we have shadowed names).
+** It's possible to have a more efficient allocation and search method,
+** but it might not be worth it. The typical number of parameters and
+** variables will in most cases be low (a handfull).
+** ...and, this is only called during parsing.
+*/
+sp_pvar_t *
+sp_pcontext::find_pvar(LEX_STRING *name, my_bool scoped)
+{
+  uint i = m_pvar.elements;
+  uint limit;
+
+  if (! scoped || m_scopes.elements == 0)
+    limit= 0;
+  else
+  {
+    sp_scope_t s;
+
+    get_dynamic(&m_scopes, (gptr)&s, m_scopes.elements-1);
+    limit= s.vars;
+  }
+      
+  while (i-- > limit)
+  {
+    sp_pvar_t *p;
+
+    get_dynamic(&m_pvar, (gptr)&p, i);
+    if (my_strnncoll(system_charset_info,
+		     (const uchar *)name->str, name->length,
+		     (const uchar *)p->name.str, p->name.length) == 0)
+    {
+      return p;
+    }
+  }
+  return NULL;
+}
+
+void
+sp_pcontext::push_pvar(LEX_STRING *name, enum enum_field_types type,
+		       sp_param_mode_t mode)
+{
+  sp_pvar_t *p= (sp_pvar_t *)sql_alloc(sizeof(sp_pvar_t));
+
+  if (p)
+  {
+    if (m_pvar.elements == m_framesize)
+      m_framesize += 1;
+    p->name.str= name->str;
+    p->name.length= name->length;
+    p->type= type;
+    p->mode= mode;
+    p->offset= m_pvar.elements;
+    p->isset= (mode == sp_param_out ? FALSE : TRUE);
+    p->dflt= NULL;
+    insert_dynamic(&m_pvar, (gptr)&p);
+  }
+}
+
+sp_label_t *
+sp_pcontext::push_label(char *name, uint ip)
+{
+  sp_label_t *lab = (sp_label_t *)sql_alloc(sizeof(sp_label_t));
+
+  if (lab)
+  {
+    lab->name= name;
+    lab->ip= ip;
+    lab->isbegin= FALSE;
+    m_label.push_front(lab);
+  }
+  return lab;
+}
+
+sp_label_t *
+sp_pcontext::find_label(char *name)
+{
+  List_iterator_fast<sp_label_t> li(m_label);
+  sp_label_t *lab;
+
+  while ((lab= li++))
+    if (my_strcasecmp(system_charset_info, name, lab->name) == 0)
+      return lab;
+
+  return NULL;
+}
+
+void
+sp_pcontext::push_cond(LEX_STRING *name, sp_cond_type_t *val)
+{
+  sp_cond_t *p= (sp_cond_t *)sql_alloc(sizeof(sp_cond_t));
+
+  if (p)
+  {
+    p->name.str= name->str;
+    p->name.length= name->length;
+    p->val= val;
+    insert_dynamic(&m_cond, (gptr)&p);
+  }
+}
+
+/*
+ * See comment for find_pvar() above
+ */
+sp_cond_type_t *
+sp_pcontext::find_cond(LEX_STRING *name, my_bool scoped)
+{
+  uint i = m_cond.elements;
+  uint limit;
+
+  if (! scoped || m_scopes.elements == 0)
+    limit= 0;
+  else
+  {
+    sp_scope_t s;
+
+    get_dynamic(&m_scopes, (gptr)&s, m_scopes.elements-1);
+    limit= s.conds;
+  }
+      
+  while (i-- > limit)
+  {
+    sp_cond_t *p;
+
+    get_dynamic(&m_cond, (gptr)&p, i);
+    if (my_strnncoll(system_charset_info,
+		     (const uchar *)name->str, name->length,
+		     (const uchar *)p->name.str, p->name.length) == 0)
+    {
+      return p->val;
+    }
+  }
+  return NULL;
+}
+
+void
+sp_pcontext::push_cursor(LEX_STRING *name)
+{
+  LEX_STRING n;
+
+  n.str= name->str;
+  n.length= name->length;
+  insert_dynamic(&m_cursor, (gptr)&n);
+  if (m_cursor.elements > m_cursmax)
+    m_cursmax= m_cursor.elements;
+}
+
+/*
+ * See comment for find_pvar() above
+ */
+my_bool
+sp_pcontext::find_cursor(LEX_STRING *name, uint *poff, my_bool scoped)
+{
+  uint i = m_cursor.elements;
+  uint limit;
+
+  if (! scoped || m_scopes.elements == 0)
+    limit= 0;
+  else
+  {
+    sp_scope_t s;
+
+    get_dynamic(&m_scopes, (gptr)&s, m_scopes.elements-1);
+    limit= s.curs;
+  }
+      
+  while (i-- > limit)
+  {
+    LEX_STRING n;
+
+    get_dynamic(&m_cursor, (gptr)&n, i);
+    if (my_strnncoll(system_charset_info,
+		     (const uchar *)name->str, name->length,
+		     (const uchar *)n.str, n.length) == 0)
+    {
+      *poff= i;
+      return TRUE;
+    }
+  }
+  return FALSE;
+}
diff --git a/sql/sp_pcontext.h b/sql/sp_pcontext.h
new file mode 100644
index 00000000000..a82cefa2e42
--- /dev/null
+++ b/sql/sp_pcontext.h
@@ -0,0 +1,266 @@
+/* -*- C++ -*- */
+/* Copyright (C) 2002 MySQL AB
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
+
+#ifndef _SP_PCONTEXT_H_
+#define _SP_PCONTEXT_H_
+
+#ifdef __GNUC__
+#pragma interface			/* gcc class implementation */
+#endif
+
+typedef enum
+{
+  sp_param_in,
+  sp_param_out,
+  sp_param_inout
+} sp_param_mode_t;
+
+typedef struct sp_pvar
+{
+  LEX_STRING name;
+  enum enum_field_types type;
+  sp_param_mode_t mode;
+  uint offset;			// Offset in current frame
+  my_bool isset;
+  Item *dflt;
+} sp_pvar_t;
+
+typedef struct sp_label
+{
+  char *name;
+  uint ip;			// Instruction index
+  my_bool isbegin;		// For ITERATE error checking
+} sp_label_t;
+
+typedef struct sp_cond_type
+{
+  enum { number, state, warning, notfound, exception } type;
+  char sqlstate[6];
+  uint mysqlerr;
+} sp_cond_type_t;
+
+typedef struct sp_cond
+{
+  LEX_STRING name;
+  sp_cond_type_t *val;
+} sp_cond_t;
+
+typedef struct sp_scope
+{
+  uint vars, conds, curs;
+} sp_scope_t;
+
+class sp_pcontext : public Sql_alloc
+{
+  sp_pcontext(const sp_pcontext &); /* Prevent use of these */
+  void operator=(sp_pcontext &);
+
+ public:
+
+  sp_pcontext();
+
+  // Free memory
+  void
+  destroy();
+
+  // For error checking of duplicate things
+  void
+  push_scope();
+
+  void
+  pop_scope();
+
+  //
+  // Parameters and variables
+  //
+
+  inline uint
+  max_framesize()
+  {
+    return m_framesize;
+  }
+
+  inline uint
+  current_framesize()
+  {
+    return m_pvar.elements;
+  }
+
+  inline uint
+  params()
+  {
+    return m_params;
+  }
+
+  // Set the number of parameters to the current esize
+  inline void
+  set_params()
+  {
+    m_params= m_pvar.elements;
+  }
+
+  inline void
+  set_type(uint i, enum enum_field_types type)
+  {
+    sp_pvar_t *p= find_pvar(i);
+
+    if (p)
+      p->type= type;
+  }
+
+  inline void
+  set_isset(uint i, my_bool val)
+  {
+    sp_pvar_t *p= find_pvar(i);
+
+    if (p)
+      p->isset= val;
+  }
+
+  inline void
+  set_default(uint i, Item *it)
+  {
+    sp_pvar_t *p= find_pvar(i);
+
+    if (p)
+      p->dflt= it;
+  }
+
+  void
+  push_pvar(LEX_STRING *name, enum enum_field_types type, sp_param_mode_t mode);
+
+  // Pop the last 'num' slots of the frame
+  inline void
+  pop_pvar(uint num = 1)
+  {
+    while (num--)
+      pop_dynamic(&m_pvar);
+  }
+
+  // Find by name
+  sp_pvar_t *
+  find_pvar(LEX_STRING *name, my_bool scoped=0);
+
+  // Find by index
+  sp_pvar_t *
+  find_pvar(uint i)
+  {
+    sp_pvar_t *p;
+
+    if (i < m_pvar.elements)
+      get_dynamic(&m_pvar, (gptr)&p, i);
+    else
+      p= NULL;
+    return p;
+  }
+
+  //
+  // Labels
+  //
+
+  sp_label_t *
+  push_label(char *name, uint ip);
+
+  sp_label_t *
+  find_label(char *name);
+
+  inline sp_label_t *
+  last_label()
+  {
+    return m_label.head();
+  }
+
+  inline sp_label_t *
+  pop_label()
+  {
+    return m_label.pop();
+  }
+
+  //
+  // Conditions
+  //
+
+  void
+  push_cond(LEX_STRING *name, sp_cond_type_t *val);
+
+  inline void
+  pop_cond(uint num)
+  {
+    while (num--)
+      pop_dynamic(&m_cond);
+  }
+
+  sp_cond_type_t *
+  find_cond(LEX_STRING *name, my_bool scoped=0);
+
+  //
+  // Handlers
+  //
+
+  inline void
+  add_handler()
+  {
+    m_handlers+= 1;
+  }
+
+  inline uint
+  handlers()
+  {
+    return m_handlers;
+  }
+
+  //
+  // Cursors
+  //
+
+  void
+  push_cursor(LEX_STRING *name);
+
+  my_bool
+  find_cursor(LEX_STRING *name, uint *poff, my_bool scoped=0);
+
+  inline void
+  pop_cursor(uint num)
+  {
+    while (num--)
+      pop_dynamic(&m_cursor);
+  }
+
+  inline uint
+  cursors()
+  {
+    return m_cursmax;
+  }
+
+private:
+
+  uint m_params;		// The number of parameters
+  uint m_framesize;		// The maximum framesize
+  uint m_handlers;		// The total number of handlers
+  uint m_cursmax;		// The maximum number of cursors
+
+  DYNAMIC_ARRAY m_pvar;		// Parameters/variables
+  DYNAMIC_ARRAY m_cond;		// Conditions
+  DYNAMIC_ARRAY m_cursor;	// Cursors
+  DYNAMIC_ARRAY m_scopes;	// For error checking
+
+  List<sp_label_t> m_label;	// The label list
+
+}; // class sp_pcontext : public Sql_alloc
+
+
+#endif /* _SP_PCONTEXT_H_ */
diff --git a/sql/sp_rcontext.cc b/sql/sp_rcontext.cc
new file mode 100644
index 00000000000..0b2b20fe3b3
--- /dev/null
+++ b/sql/sp_rcontext.cc
@@ -0,0 +1,247 @@
+/* Copyright (C) 2002 MySQL AB
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
+
+#ifdef __GNUC__
+#pragma implementation
+#endif
+
+#if defined(WIN32) || defined(__WIN__)
+#undef SAFEMALLOC				/* Problems with threads */
+#endif
+
+#include "mysql_priv.h"
+#include "mysql.h"
+#include "sp_head.h"
+#include "sp_rcontext.h"
+#include "sp_pcontext.h"
+
+sp_rcontext::sp_rcontext(uint fsize, uint hmax, uint cmax)
+  : m_count(0), m_fsize(fsize), m_result(NULL), m_hcount(0), m_hsp(0),
+    m_hfound(-1), m_ccount(0)
+{
+  m_frame= (Item **)sql_alloc(fsize * sizeof(Item*));
+  m_outs= (int *)sql_alloc(fsize * sizeof(int));
+  m_handler= (sp_handler_t *)sql_alloc(hmax * sizeof(sp_handler_t));
+  m_hstack= (uint *)sql_alloc(hmax * sizeof(uint));
+  m_cstack= (sp_cursor **)sql_alloc(cmax * sizeof(sp_cursor *));
+  m_saved.empty();
+}
+
+void
+sp_rcontext::set_item_eval(uint idx, Item *i, enum_field_types type)
+{
+  extern Item *sp_eval_func_item(THD *thd, Item *it, enum_field_types type);
+
+  set_item(idx, sp_eval_func_item(current_thd, i, type));
+}
+
+int
+sp_rcontext::find_handler(uint sql_errno)
+{
+  if (m_hfound >= 0)
+    return 1;			// Already got one
+
+  const char *sqlstate= mysql_errno_to_sqlstate(sql_errno);
+  int i= m_hcount, found= 0;
+
+  while (!found && i--)
+  {
+    sp_cond_type_t *cond= m_handler[i].cond;
+
+    switch (cond->type)
+    {
+    case sp_cond_type_t::number:
+      if (sql_errno == cond->mysqlerr)
+	found= 1;
+      break;
+    case sp_cond_type_t::state:
+      if (strcmp(sqlstate, cond->sqlstate) == 0)
+	found= 1;
+      break;
+    case sp_cond_type_t::warning:
+      if (sqlstate[0] == '0' && sqlstate[1] == '1')
+	found= 1;
+      break;
+    case sp_cond_type_t::notfound:
+      if (sqlstate[0] == '0' && sqlstate[1] == '2')
+	found= 1;
+      break;
+    case sp_cond_type_t::exception:
+      if (sqlstate[0] != '0' || sqlstate[1] > '2')
+	found= 1;
+      break;
+    }
+  }
+  if (found)
+    m_hfound= i;
+  return found;
+}
+
+void
+sp_rcontext::save_variables(uint fp)
+{
+  while (fp < m_count)
+    m_saved.push_front(m_frame[fp++]);
+}
+
+void
+sp_rcontext::restore_variables(uint fp)
+{
+  uint i= m_count;
+
+  while (i-- > fp)
+    m_frame[i]= m_saved.pop();
+}
+
+void
+sp_rcontext::push_cursor(LEX *lex)
+{
+  m_cstack[m_ccount++]= new sp_cursor(lex);
+}
+
+void
+sp_rcontext::pop_cursors(uint count)
+{
+  while (count--)
+  {
+    delete m_cstack[--m_ccount];
+  }
+}
+
+
+/*
+ *
+ *  sp_cursor
+ *
+ */
+
+// We have split this in two to make it easy for sp_instr_copen
+// to reuse the sp_instr::exec_stmt() code.
+LEX *
+sp_cursor::pre_open(THD *thd)
+{
+  if (m_isopen)
+  {
+    send_error(thd, ER_SP_CURSOR_ALREADY_OPEN);
+    return NULL;
+  }
+
+  bzero((char *)&m_mem_root, sizeof(m_mem_root));
+  init_alloc_root(&m_mem_root, MEM_ROOT_BLOCK_SIZE, MEM_ROOT_PREALLOC);
+  if ((m_prot= new Protocol_cursor(thd, &m_mem_root)) == NULL)
+    return NULL;
+
+  m_oprot= thd->protocol;	// Save the original protocol
+  thd->protocol= m_prot;
+
+  m_nseof= thd->net.no_send_eof;
+  thd->net.no_send_eof= TRUE;
+  return m_lex;
+}
+
+void
+sp_cursor::post_open(THD *thd, my_bool was_opened)
+{
+  thd->net.no_send_eof= m_nseof; // Restore the originals
+  thd->protocol= m_oprot;
+  if (was_opened)
+  {
+    m_isopen= was_opened;
+    m_current_row= m_prot->data;
+  }
+}
+
+int
+sp_cursor::close(THD *thd)
+{
+  if (! m_isopen)
+  {
+    send_error(thd, ER_SP_CURSOR_NOT_OPEN);
+    return -1;
+  }
+  destroy();
+  return 0;
+}
+
+void
+sp_cursor::destroy()
+{
+  if (m_prot)
+  {
+    delete m_prot;
+    m_prot= NULL;
+    free_root(&m_mem_root, MYF(0));
+    bzero((char *)&m_mem_root, sizeof(m_mem_root));
+  }
+  m_isopen= FALSE;
+}
+
+int
+sp_cursor::fetch(THD *thd, List<struct sp_pvar> *vars)
+{
+  List_iterator_fast<struct sp_pvar> li(*vars);
+  sp_pvar_t *pv;
+  MYSQL_ROW row;
+  uint fldcount;
+
+  if (! m_isopen)
+  {
+    send_error(thd, ER_SP_CURSOR_NOT_OPEN);
+    return -1;
+  }
+  if (m_current_row == NULL)
+  {
+    send_error(thd, ER_SP_FETCH_NO_DATA);
+    return -1;
+  }
+
+  row= m_current_row->data;
+  for (fldcount= 0 ; (pv= li++) ; fldcount++)
+  {
+    Item *it;
+    const char *s;
+
+    if (fldcount >= m_prot->get_field_count())
+    {
+      send_error(thd, ER_SP_WRONG_NO_OF_FETCH_ARGS);
+      return -1;
+    }
+    s= row[fldcount];
+    switch (sp_map_result_type(pv->type))
+    {
+    case INT_RESULT:
+      it= new Item_int(s);
+      break;
+    case REAL_RESULT:
+      it= new Item_real(s, strlen(s));
+      break;
+    default:
+      {
+	uint len= strlen(s);
+	it= new Item_string(thd->strmake(s, len), len, thd->db_charset);
+	break;
+      }
+    }
+    thd->spcont->set_item(pv->offset, it);
+  }
+  if (fldcount < m_prot->get_field_count())
+  {
+    send_error(thd, ER_SP_WRONG_NO_OF_FETCH_ARGS);
+    return -1;
+  }
+  m_current_row= m_current_row->next;
+  return 0;
+}
diff --git a/sql/sp_rcontext.h b/sql/sp_rcontext.h
new file mode 100644
index 00000000000..1b83d3518b6
--- /dev/null
+++ b/sql/sp_rcontext.h
@@ -0,0 +1,252 @@
+/* -*- C++ -*- */
+/* Copyright (C) 2002 MySQL AB
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
+
+#ifndef _SP_RCONTEXT_H_
+#define _SP_RCONTEXT_H_
+
+#ifdef __GNUC__
+#pragma interface			/* gcc class implementation */
+#endif
+
+struct sp_cond_type;
+class sp_cursor;
+struct sp_pvar;
+
+#define SP_HANDLER_NONE      0
+#define SP_HANDLER_EXIT      1
+#define SP_HANDLER_CONTINUE  2
+#define SP_HANDLER_UNDO      3
+
+typedef struct
+{
+  struct sp_cond_type *cond;
+  uint handler;			// Location of handler
+  int type;
+  uint foffset;			// Frame offset for the handlers declare level
+} sp_handler_t;
+
+class sp_rcontext : public Sql_alloc
+{
+  sp_rcontext(const sp_rcontext &); /* Prevent use of these */
+  void operator=(sp_rcontext &);
+
+ public:
+
+  sp_rcontext(uint fsize, uint hmax, uint cmax);
+
+  ~sp_rcontext()
+  {
+    // Not needed?
+    //sql_element_free(m_frame);
+    //m_saved.empty();
+  }
+
+  inline void
+  push_item(Item *i)
+  {
+    if (m_count < m_fsize)
+      m_frame[m_count++] = i;
+  }
+
+  inline void
+  set_item(uint idx, Item *i)
+  {
+    if (idx < m_count)
+      m_frame[idx] = i;
+  }
+
+  void
+  set_item_eval(uint idx, Item *i, enum_field_types type);
+
+  inline Item *
+  get_item(uint idx)
+  {
+    return m_frame[idx];
+  }
+
+  inline void
+  set_oindex(uint idx, int oidx)
+  {
+    m_outs[idx] = oidx;
+  }
+
+  inline int
+  get_oindex(uint idx)
+  {
+    return m_outs[idx];
+  }
+
+  inline void
+  set_result(Item *it)
+  {
+    m_result= it;
+  }
+
+  inline Item *
+  get_result()
+  {
+    return m_result;
+  }
+
+  inline void
+  push_handler(struct sp_cond_type *cond, uint h, int type, uint f)
+  {
+    m_handler[m_hcount].cond= cond;
+    m_handler[m_hcount].handler= h;
+    m_handler[m_hcount].type= type;
+    m_handler[m_hcount].foffset= f;
+    m_hcount+= 1;
+  }
+
+  inline void
+  pop_handlers(uint count)
+  {
+    m_hcount-= count;
+  }
+
+  // Returns 1 if a handler was found, 0 otherwise.
+  int
+  find_handler(uint sql_errno);
+
+  // Returns handler type and sets *ip to location if one was found
+  inline int
+  found_handler(uint *ip, uint *fp)
+  {
+    if (m_hfound < 0)
+      return SP_HANDLER_NONE;
+    *ip= m_handler[m_hfound].handler;
+    *fp= m_handler[m_hfound].foffset;
+    return m_handler[m_hfound].type;
+  }
+
+  // Clears the handler find state
+  inline void
+  clear_handler()
+  {
+    m_hfound= -1;
+  }
+
+  inline void
+  push_hstack(uint h)
+  {
+    m_hstack[m_hsp++]= h;
+  }
+
+  inline uint
+  pop_hstack()
+  {
+    return m_hstack[--m_hsp];
+  }
+
+  // Save variables starting at fp and up
+  void
+  save_variables(uint fp);
+
+  // Restore variables down to fp
+  void
+  restore_variables(uint fp);
+
+  void
+  push_cursor(LEX *lex);
+
+  void
+  pop_cursors(uint count);
+
+  void
+  pop_all_cursors()
+  {
+    pop_cursors(m_ccount);
+  }
+
+  inline sp_cursor *
+  get_cursor(uint i)
+  {
+    return m_cstack[i];
+  }
+
+private:
+
+  uint m_count;
+  uint m_fsize;
+  Item **m_frame;
+  int  *m_outs;
+
+  Item *m_result;		// For FUNCTIONs
+
+  sp_handler_t *m_handler;
+  uint m_hcount;
+  uint *m_hstack;
+  uint m_hsp;
+  int m_hfound;			// Set by find_handler; -1 if not found
+  List<Item> m_saved;		// Saved variables
+
+  sp_cursor **m_cstack;
+  uint m_ccount;
+
+}; // class sp_rcontext : public Sql_alloc
+
+
+class sp_cursor : public Sql_alloc
+{
+public:
+
+  sp_cursor(LEX *lex)
+    : m_lex(lex), m_prot(NULL), m_isopen(0), m_current_row(NULL)
+  {
+    /* Empty */
+  }
+
+  virtual ~sp_cursor()
+  {
+    destroy();
+  }
+
+  // We have split this in two to make it easy for sp_instr_copen
+  // to reuse the sp_instr::exec_stmt() code.
+  LEX *
+  pre_open(THD *thd);
+  void
+  post_open(THD *thd, my_bool was_opened);
+
+  int
+  close(THD *thd);
+
+  inline my_bool
+  is_open()
+  {
+    return m_isopen;
+  }
+
+  int
+  fetch(THD *, List<struct sp_pvar> *vars);
+
+private:
+
+  MEM_ROOT m_mem_root;		// My own mem_root
+  LEX *m_lex;
+  Protocol_cursor *m_prot;
+  my_bool m_isopen;
+  my_bool m_nseof;		// Original no_send_eof
+  Protocol *m_oprot;		// Original protcol
+  MYSQL_ROWS *m_current_row;
+  
+  void
+  destroy();
+
+}; // class sp_cursor : public Sql_alloc
+
+#endif /* _SP_RCONTEXT_H_ */
diff --git a/sql/sql_acl.cc b/sql/sql_acl.cc
index a03e371dd63..36926d4526d 100644
--- a/sql/sql_acl.cc
+++ b/sql/sql_acl.cc
@@ -140,7 +140,6 @@ my_bool acl_init(THD *org_thd, bool dont_read_acl_tables)
   MYSQL_LOCK *lock;
   my_bool return_val=1;
   bool check_no_resolve= specialflag & SPECIAL_NO_RESOLVE;
-
   DBUG_ENTER("acl_init");
 
   if (!acl_cache)
@@ -153,6 +152,7 @@ my_bool acl_init(THD *org_thd, bool dont_read_acl_tables)
   }
 
   priv_version++; /* Privileges updated */
+  mysql_proc_table_exists= 1;			// Assume mysql.proc exists
 
   /*
     To be able to run this from boot, we allocate a temporary THD
@@ -784,6 +784,86 @@ int acl_getroot(THD *thd, USER_RESOURCES  *mqh,
 }
 
 
+/*
+ * This is like acl_getroot() above, but it doesn't check password,
+ * and we don't care about the user resources.
+ * Used to get access rights for SQL SECURITY DEFINER invokation of
+ * stored procedures.
+ */
+int acl_getroot_no_password(THD *thd)
+{
+  ulong user_access= NO_ACCESS;
+  int res= 1;
+  uint i;
+  ACL_USER *acl_user= 0;
+  DBUG_ENTER("acl_getroot_no_password");
+
+  if (!initialized)
+  {
+    /* 
+      here if mysqld's been started with --skip-grant-tables option.
+    */
+    thd->priv_user= (char *) "";                // privileges for
+    *thd->priv_host= '\0';                      // the user are unknown
+    thd->master_access= ~NO_ACCESS;             // everything is allowed
+    DBUG_RETURN(0);
+  }
+
+  VOID(pthread_mutex_lock(&acl_cache->lock));
+
+  thd->master_access= 0;
+  thd->db_access= 0;
+
+  /*
+     Find acl entry in user database.
+     This is specially tailored to suit the check we do for CALL of
+     a stored procedure; thd->user is set to what is actually a
+     priv_user, which can be ''.
+  */
+  for (i=0 ; i < acl_users.elements ; i++)
+  {
+    acl_user= dynamic_element(&acl_users,i,ACL_USER*);
+    if ((!acl_user->user && (!thd->user || !thd->user[0])) ||
+	(acl_user->user && strcmp(thd->user, acl_user->user) == 0))
+    {
+      if (compare_hostname(&acl_user->host, thd->host, thd->ip))
+      {
+	res= 0;
+	break;
+      }
+    }
+  }
+
+  if (acl_user)
+  {
+    for (i=0 ; i < acl_dbs.elements ; i++)
+    {
+      ACL_DB *acl_db= dynamic_element(&acl_dbs, i, ACL_DB*);
+      if (!acl_db->user ||
+	  (thd->user && thd->user[0] && !strcmp(thd->user, acl_db->user)))
+      {
+	if (compare_hostname(&acl_db->host, thd->host, thd->ip))
+	{
+	  if (!acl_db->db || (thd->db && !strcmp(acl_db->db, thd->db)))
+	  {
+	    thd->db_access= acl_db->access;
+	    break;
+	  }
+	}
+      }
+    }
+    thd->master_access= acl_user->access;
+    thd->priv_user= acl_user->user ? thd->user : (char *) "";
+
+    if (acl_user->host.hostname)
+      strmake(thd->priv_host, acl_user->host.hostname, MAX_HOSTNAME);
+    else
+      *thd->priv_host= 0;
+  }
+  VOID(pthread_mutex_unlock(&acl_cache->lock));
+  DBUG_RETURN(res);
+}
+
 static byte* check_get_key(ACL_USER *buff,uint *length,
 			   my_bool not_used __attribute__((unused)))
 {
@@ -1194,7 +1274,6 @@ bool change_password(THD *thd, const char *host, const char *user,
 		acl_user->host.hostname ? acl_user->host.hostname : "",
 		new_password));
   thd->clear_error();
-  mysql_update_log.write(thd, buff, query_length);
   Query_log_event qinfo(thd, buff, query_length, 0);
   mysql_bin_log.write(&qinfo);
   DBUG_RETURN(0);
diff --git a/sql/sql_acl.h b/sql/sql_acl.h
index a237b45e29c..2ea3b8f5628 100644
--- a/sql/sql_acl.h
+++ b/sql/sql_acl.h
@@ -141,6 +141,7 @@ ulong acl_get(const char *host, const char *ip,
 	      const char *user, const char *db, my_bool db_is_pattern);
 int acl_getroot(THD *thd, USER_RESOURCES *mqh, const char *passwd,
                 uint passwd_len);
+int acl_getroot_no_password(THD *thd);
 bool acl_check_host(const char *host, const char *ip);
 bool check_change_password(THD *thd, const char *host, const char *user);
 bool change_password(THD *thd, const char *host, const char *user,
diff --git a/sql/sql_base.cc b/sql/sql_base.cc
index b6d14092885..22f93bb05ba 100644
--- a/sql/sql_base.cc
+++ b/sql/sql_base.cc
@@ -2173,14 +2173,14 @@ int setup_wild(THD *thd, TABLE_LIST *tables, List<Item> &fields,
 {
   if (!wild_num)
     return 0;
-  Statement *stmt= thd->current_statement, backup;
+  Item_arena *arena= thd->current_arena, backup;
 
   /*
     If we are in preparing prepared statement phase then we have change
     temporary mem_root to statement mem root to save changes of SELECT list
   */
-  if (stmt)
-    thd->set_n_backup_item_arena(stmt, &backup);
+  if (arena)
+    thd->set_n_backup_item_arena(arena, &backup);
   reg2 Item *item;
   List_iterator<Item> it(fields);
   while ( wild_num && (item= it++))
@@ -2204,8 +2204,8 @@ int setup_wild(THD *thd, TABLE_LIST *tables, List<Item> &fields,
       else if (insert_fields(thd,tables,((Item_field*) item)->db_name,
                              ((Item_field*) item)->table_name, &it))
       {
-	if (stmt)
-	  thd->restore_backup_item_arena(stmt, &backup);
+	if (arena)
+	  thd->restore_backup_item_arena(arena, &backup);
 	return (-1);
       }
       if (sum_func_list)
@@ -2220,8 +2220,15 @@ int setup_wild(THD *thd, TABLE_LIST *tables, List<Item> &fields,
       wild_num--;
     }
   }
-  if (stmt)
-      thd->restore_backup_item_arena(stmt, &backup);
+  if (arena)
+  {
+    /* make * substituting permanent */
+    SELECT_LEX *select_lex= thd->lex->current_select;
+    select_lex->with_wild= 0;
+    select_lex->item_list= fields;
+
+    thd->restore_backup_item_arena(arena, &backup);
+  }
   return 0;
 }
 
@@ -2383,10 +2390,33 @@ insert_fields(THD *thd,TABLE_LIST *tables, const char *db_name,
       TABLE *natural_join_table= 0;
 
       thd->used_tables|=table->map;
-      if (!table->outer_join &&
-          tables->natural_join &&
-          !tables->natural_join->table->outer_join)
-        natural_join_table= tables->natural_join->table;
+      TABLE_LIST *embedded= tables;
+      TABLE_LIST *last= embedded;
+      TABLE_LIST *embedding;
+      
+      while ((embedding= embedded->embedding) &&
+              embedding->join_list->elements != 1)
+      {
+        TABLE_LIST *next;
+        List_iterator_fast<TABLE_LIST> it(embedding->nested_join->join_list);
+        last= it++;
+        while ((next= it++))
+          last= next;
+        if (last != tables)
+          break;
+        embedded= embedding;
+      } 
+ 
+      if (tables == last && 
+          !embedded->outer_join &&
+          embedded->natural_join &&
+          !embedded->natural_join->outer_join)
+      {
+        embedding= embedded->natural_join;
+        while (embedding->nested_join)
+          embedding= embedding->nested_join->join_list.head();
+        natural_join_table= embedding->table;
+      }       
 
       while ((field = *ptr++))
       {
@@ -2434,136 +2464,166 @@ insert_fields(THD *thd,TABLE_LIST *tables, const char *db_name,
 int setup_conds(THD *thd,TABLE_LIST *tables,COND **conds)
 {
   table_map not_null_tables= 0;
-  Statement *stmt= thd->current_statement, backup;
-
+  SELECT_LEX *select_lex= thd->lex->current_select;
+  Item_arena *arena= ((thd->current_arena && 
+                       !select_lex->conds_processed_with_permanent_arena) ?
+                      thd->current_arena :
+                      0);
+  Item_arena backup;
   DBUG_ENTER("setup_conds");
+
   thd->set_query_id=1;
   
-  thd->lex->current_select->cond_count= 0;
+  select_lex->cond_count= 0;
   if (*conds)
   {
     thd->where="where clause";
     if (!(*conds)->fixed && (*conds)->fix_fields(thd, tables, conds) ||
 	(*conds)->check_cols(1))
       DBUG_RETURN(1);
-    not_null_tables= (*conds)->not_null_tables();
   }
 
 
   /* Check if we are using outer joins */
   for (TABLE_LIST *table=tables ; table ; table=table->next)
   {
-    if (table->on_expr)
-    {
-      /* Make a join an a expression */
-      thd->where="on clause";
-      
-      if (!table->on_expr->fixed &&
-	  table->on_expr->fix_fields(thd, tables, &table->on_expr) ||
-	  table->on_expr->check_cols(1))
-	DBUG_RETURN(1);
-      thd->lex->current_select->cond_count++;
-
-      /*
-	If it's a normal join or a LEFT JOIN which can be optimized away
-	add the ON/USING expression to the WHERE
-      */
-      if (!table->outer_join ||
-	  ((table->table->map & not_null_tables) &&
-	   !(specialflag & SPECIAL_NO_NEW_FUNC)))
+    TABLE_LIST *embedded;
+    TABLE_LIST *embedding= table;
+    do
+    { 
+      embedded= embedding;
+      if (embedded->on_expr)
       {
-	table->outer_join= 0;
-	if (stmt)
-	  thd->set_n_backup_item_arena(stmt, &backup);
-	*conds= and_conds(*conds, table->on_expr);
-	table->on_expr=0;
-	if (stmt)
-	  thd->restore_backup_item_arena(stmt, &backup);
-	if ((*conds) && !(*conds)->fixed &&
-	    (*conds)->fix_fields(thd, tables, conds))
+        /* Make a join an a expression */
+        thd->where="on clause";
+        if (embedded->on_expr->fix_fields(thd, tables, &embedded->on_expr) ||
+	    embedded->on_expr->check_cols(1))
 	  DBUG_RETURN(1);
+        select_lex->cond_count++;
       }
-    }
-    if (table->natural_join)
-    {
-      if (stmt)
-	thd->set_n_backup_item_arena(stmt, &backup);
-      /* Make a join of all fields with have the same name */
-      TABLE *t1= table->table;
-      TABLE *t2= table->natural_join->table;
-      Item_cond_and *cond_and= new Item_cond_and();
-      if (!cond_and)				// If not out of memory
-	goto err;
-      cond_and->top_level_item();
-
-      Field **t1_field, *t2_field;
-      for (t1_field= t1->field; (*t1_field); t1_field++)
+      if (embedded->natural_join)
       {
-        const char *t1_field_name= (*t1_field)->field_name;
-        uint not_used_field_index= NO_CACHED_FIELD_INDEX;
+        /* Make a join of all fields wich have the same name */
+        TABLE_LIST *tab1= embedded;
+        TABLE_LIST *tab2= embedded->natural_join;
+        if (!(embedded->outer_join & JOIN_TYPE_RIGHT))
+        {
+          while (tab1->nested_join)
+          {
+            TABLE_LIST *next;
+            List_iterator_fast<TABLE_LIST> it(tab1->nested_join->join_list);
+            tab1= it++;
+            while ((next= it++))
+              tab1= next;
+          }
+        }
+        else
+        {
+          while (tab1->nested_join)
+            tab1= tab1->nested_join->join_list.head();
+        }
+        if (embedded->outer_join & JOIN_TYPE_RIGHT)
+        {
+          while (tab2->nested_join)
+          {
+            TABLE_LIST *next;
+            List_iterator_fast<TABLE_LIST> it(tab2->nested_join->join_list);
+            tab2= it++;
+            while ((next= it++))
+              tab2= next;
+          }
+        }
+        else
+        {
+          while (tab2->nested_join)
+            tab2= tab2->nested_join->join_list.head();
+        }
+
+        if (arena)
+	  thd->set_n_backup_item_arena(arena, &backup);
+        TABLE *t1=tab1->table;
+        TABLE *t2=tab2->table;
+        Item_cond_and *cond_and=new Item_cond_and();
+        if (!cond_and)				// If not out of memory
+	  DBUG_RETURN(1);
+        cond_and->top_level_item();
 
-        if ((t2_field= find_field_in_table(thd, t2, t1_field_name,
-                                           strlen(t1_field_name), 0, 0,
-                                           &not_used_field_index)))
+        Field **t1_field, *t2_field;
+        for (t1_field= t1->field; (*t1_field); t1_field++)
         {
-          Item_func_eq *tmp=new Item_func_eq(new Item_field(*t1_field),
-                                             new Item_field(t2_field));
-          if (!tmp)
-            goto err;
-          /* Mark field used for table cache */
-          (*t1_field)->query_id= t2_field->query_id= thd->query_id;
-          cond_and->list.push_back(tmp);
-          t1->used_keys.intersect((*t1_field)->part_of_key);
-          t2->used_keys.intersect(t2_field->part_of_key);
+          const char *t1_field_name= (*t1_field)->field_name;
+          uint not_used_field_index= NO_CACHED_FIELD_INDEX;
+
+          if ((t2_field= find_field_in_table(thd, t2, t1_field_name,
+                                             strlen(t1_field_name), 0, 0,
+                                             &not_used_field_index)))
+          {
+            Item_func_eq *tmp=new Item_func_eq(new Item_field(*t1_field),
+                                               new Item_field(t2_field));
+            if (!tmp)
+              goto err;
+            /* Mark field used for table cache */
+            (*t1_field)->query_id= t2_field->query_id= thd->query_id;
+            cond_and->list.push_back(tmp);
+            t1->used_keys.intersect((*t1_field)->part_of_key);
+            t2->used_keys.intersect(t2_field->part_of_key);
+          }
         }
-      }
-      thd->lex->current_select->cond_count+= cond_and->list.elements;
+        cond_and->used_tables_cache= t1->map | t2->map;
+        select_lex->cond_count+= cond_and->list.elements;
 
-      // to prevent natural join processing during PS re-execution
-      table->natural_join= 0;
+        // to prevent natural join processing during PS re-execution
+        embedding->natural_join= 0;
 
-      if (!table->outer_join)			// Not left join
-      {
-	*conds= and_conds(*conds, cond_and);
-	// fix_fields() should be made with temporary memory pool
-	if (stmt)
-	  thd->restore_backup_item_arena(stmt, &backup);
-	if (*conds && !(*conds)->fixed)
-	{
-	  if ((*conds)->fix_fields(thd, tables, conds))
-	    DBUG_RETURN(1);
-	}
-      }
-      else
-      {
-	table->on_expr= and_conds(table->on_expr, cond_and);
-	// fix_fields() should be made with temporary memory pool
-	if (stmt)
-	  thd->restore_backup_item_arena(stmt, &backup);
-	if (table->on_expr && !table->on_expr->fixed)
-	{
-	  if (table->on_expr->fix_fields(thd, tables, &table->on_expr))
-	   DBUG_RETURN(1);
-	}
+        COND *on_expr= cond_and;
+        on_expr->fix_fields(thd, 0, &on_expr);
+        if (!embedded->outer_join)			// Not left join
+        {
+	  *conds= and_conds(*conds, cond_and);
+	  // fix_fields() should be made with temporary memory pool
+	  if (arena)
+	    thd->restore_backup_item_arena(arena, &backup);
+	  if (*conds && !(*conds)->fixed)
+	  {
+	    if ((*conds)->fix_fields(thd, tables, conds))
+	      DBUG_RETURN(1);
+	  }
+        }
+        else
+        {
+	  embedded->on_expr= and_conds(embedded->on_expr, cond_and);
+	  // fix_fields() should be made with temporary memory pool
+	  if (arena)
+	    thd->restore_backup_item_arena(arena, &backup);
+	  if (embedded->on_expr && !embedded->on_expr->fixed)
+	  {
+	    if (embedded->on_expr->fix_fields(thd, tables, &table->on_expr))
+	     DBUG_RETURN(1);
+	  }
+        }
       }
+      embedding= embedded->embedding;
     }
+    while (embedding &&
+           embedding->nested_join->join_list.head() == embedded);
   }
 
-  if (stmt)
+  if (arena)
   {
     /*
       We are in prepared statement preparation code => we should store
       WHERE clause changing for next executions.
 
-      We do this ON -> WHERE transformation only once per PS statement.
+      We do this ON -> WHERE transformation only once per PS/SP statement.
     */
-    thd->lex->current_select->where= *conds;
+    select_lex->where= *conds;
+    select_lex->conds_processed_with_permanent_arena= 1;
   }
   DBUG_RETURN(test(thd->net.report_error));
 
 err:
-  if (stmt)
-      thd->restore_backup_item_arena(stmt, &backup);
+  if (arena)
+      thd->restore_backup_item_arena(arena, &backup);
   DBUG_RETURN(1);
 }
 
@@ -2730,7 +2790,7 @@ bool remove_table_from_cache(THD *thd, const char *db, const char *table_name,
       if ((in_use->system_thread & SYSTEM_THREAD_DELAYED_INSERT) &&
           ! in_use->killed)
       {
-	in_use->killed=1;
+	in_use->killed= THD::KILL_CONNECTION;
 	pthread_mutex_lock(&in_use->mysys_var->mutex);
 	if (in_use->mysys_var->current_cond)
 	{
diff --git a/sql/sql_cache.cc b/sql/sql_cache.cc
index 5fe21d79aa0..68b4bc601fe 100644
--- a/sql/sql_cache.cc
+++ b/sql/sql_cache.cc
@@ -932,11 +932,16 @@ Query_cache::send_result_to_client(THD *thd, char *sql, uint query_length)
 
   /*
     Test if the query is a SELECT
-    (pre-space is removed in dispatch_command)
+    (pre-space is removed in dispatch_command).
+
+    First '/' looks like comment before command it is not
+    frequently appeared in real lihe, consequently we can
+    check all such queries, too.
   */
-  if (my_toupper(system_charset_info, sql[0]) != 'S' || 
-      my_toupper(system_charset_info, sql[1]) != 'E' ||
-      my_toupper(system_charset_info,sql[2]) !='L')
+  if ((my_toupper(system_charset_info, sql[0]) != 'S' || 
+       my_toupper(system_charset_info, sql[1]) != 'E' ||
+       my_toupper(system_charset_info,sql[2]) !='L') &&
+      sql[0] != '/')
   {
     DBUG_PRINT("qcache", ("The statement is not a SELECT; Not cached"));
     goto err;
@@ -3133,7 +3138,7 @@ void Query_cache::wreck(uint line, const char *message)
   DBUG_PRINT("warning", ("%5d QUERY CACHE WRECK => DISABLED",line));
   DBUG_PRINT("warning", ("=================================="));
   if (thd)
-    thd->killed = 1;
+    thd->killed= THD::KILL_CONNECTION;
   cache_dump();
   /* check_integrity(0); */ /* Can't call it here because of locks */
   bins_dump();
diff --git a/sql/sql_class.cc b/sql/sql_class.cc
index 7aa3bbbdd7b..07c894dc8c1 100644
--- a/sql/sql_class.cc
+++ b/sql/sql_class.cc
@@ -36,6 +36,9 @@
 #endif
 #include <mysys_err.h>
 
+#include "sp_rcontext.h"
+#include "sp_cache.h"
+
 /*
   The following is used to initialise Table_ident with a internal
   table name
@@ -155,18 +158,20 @@ bool foreign_key_prefix(Key *a, Key *b)
 ** Thread specific functions
 ****************************************************************************/
 
-THD::THD():user_time(0), current_statement(0), is_fatal_error(0),
+THD::THD():user_time(0), current_arena(0), is_fatal_error(0),
 	   last_insert_id_used(0),
            insert_id_used(0), rand_used(0), time_zone_used(0),
-           in_lock_tables(0), global_read_lock(0), bootstrap(0)
+           in_lock_tables(0), global_read_lock(0), bootstrap(0),
+           spcont(NULL)
 {
-  host= user= priv_user= db= ip=0;
+  host= user= priv_user= db= ip= 0;
+  catalog= (char*)"std"; // the only catalog we have for now
   host_or_ip= "connecting host";
   locked=some_tables_deleted=no_errors=password= 0;
-  killed=0;
   query_start_used= 0;
   count_cuted_fields= CHECK_FIELD_IGNORE;
-  db_length= col_access= 0;
+  killed= NOT_KILLED;
+  db_length= col_access=0;
   query_error= tmp_table_used= 0;
   next_insert_id=last_insert_id=0;
   open_tables= temporary_tables= handler_tables= derived_tables= 0;
@@ -222,9 +227,12 @@ THD::THD():user_time(0), current_statement(0), is_fatal_error(0),
   clear_alloc_root(&transaction.mem_root);
   init_alloc_root(&warn_root, WARN_ALLOC_BLOCK_SIZE, WARN_ALLOC_PREALLOC_SIZE);
   user_connect=(USER_CONN *)0;
-  hash_init(&user_vars, &my_charset_bin, USER_VARS_HASH_SIZE, 0, 0,
+  hash_init(&user_vars, system_charset_info, USER_VARS_HASH_SIZE, 0, 0,
 	    (hash_get_key) get_var_key,
-	    (hash_free_key) free_user_var,0);
+	    (hash_free_key) free_user_var, 0);
+
+  sp_proc_cache= NULL;
+  sp_func_cache= NULL;
 
   /* For user vars replication*/
   if (opt_bin_log)
@@ -252,7 +260,7 @@ THD::THD():user_time(0), current_statement(0), is_fatal_error(0),
     if (open_cached_file(&transaction.trans_log,
 			 mysql_tmpdir, LOG_PREFIX, binlog_cache_size,
 			 MYF(MY_WME)))
-      killed=1;
+      killed= KILL_CONNECTION;
     transaction.trans_log.end_of_file= max_binlog_cache_size;
   }
 #endif
@@ -325,9 +333,11 @@ void THD::change_user(void)
   cleanup();
   cleanup_done= 0;
   init();
-  hash_init(&user_vars, &my_charset_bin, USER_VARS_HASH_SIZE, 0, 0,
+  hash_init(&user_vars, system_charset_info, USER_VARS_HASH_SIZE, 0, 0,
 	    (hash_get_key) get_var_key,
 	    (hash_free_key) free_user_var, 0);
+  sp_cache_clear(&sp_proc_cache);
+  sp_cache_clear(&sp_func_cache);
 }
 
 
@@ -353,6 +363,8 @@ void THD::cleanup(void)
   my_free((char*) variables.datetime_format, MYF(MY_ALLOW_ZERO_PTR));
   delete_dynamic(&user_var_events);
   hash_free(&user_vars);
+  sp_cache_clear(&sp_proc_cache);
+  sp_cache_clear(&sp_func_cache);
   if (global_read_lock)
     unlock_global_read_lock(this);
   if (ull)
@@ -362,6 +374,7 @@ void THD::cleanup(void)
     pthread_mutex_unlock(&LOCK_user_locks);
     ull= 0;
   }
+
   cleanup_done=1;
   DBUG_VOID_RETURN;
 }
@@ -393,6 +406,9 @@ THD::~THD()
   }
 #endif
 
+  sp_cache_clear(&sp_proc_cache);
+  sp_cache_clear(&sp_func_cache);
+
   DBUG_PRINT("info", ("freeing host"));
   if (host != my_localhost)			// If not pointer to constant
     safeFree(host);
@@ -405,7 +421,7 @@ THD::~THD()
   mysys_var=0;					// Safety (shouldn't be needed)
   pthread_mutex_destroy(&LOCK_delete);
 #ifndef DBUG_OFF
-  dbug_sentry = THD_SENTRY_GONE;
+  dbug_sentry= THD_SENTRY_GONE;
 #endif  
   /* Reset stmt_backup.mem_root to not double-free memory from thd.mem_root */
   clear_alloc_root(&stmt_backup.mem_root);
@@ -413,14 +429,14 @@ THD::~THD()
 }
 
 
-void THD::awake(bool prepare_to_die)
+void THD::awake(THD::killed_state state_to_set)
 {
   THD_CHECK_SENTRY(this);
   safe_mutex_assert_owner(&LOCK_delete); 
 
-  if (prepare_to_die)
-    killed = 1;
-  thr_alarm_kill(real_id);
+  killed= state_to_set;
+  if (state_to_set != THD::KILL_QUERY)
+    thr_alarm_kill(real_id);
 #ifdef SIGNAL_WITH_VIO_CLOSE
   close_active_vio();
 #endif    
@@ -624,7 +640,7 @@ CHANGED_TABLE_LIST* THD::changed_table_dup(const char *key, long key_length)
   {
     my_error(EE_OUTOFMEMORY, MYF(ME_BELL),
 	     ALIGN_SIZE(sizeof(TABLE_LIST)) + key_length + 1);
-    killed= 1;
+    killed= KILL_CONNECTION;
     return 0;
   }
 
@@ -649,8 +665,8 @@ int THD::send_explain_fields(select_result *result)
   item->maybe_null=1;
   field_list.push_back(item=new Item_empty_string("key",NAME_LEN));
   item->maybe_null=1;
-  field_list.push_back(item=new Item_return_int("key_len",3,
-						MYSQL_TYPE_LONGLONG));
+  field_list.push_back(item=new Item_empty_string("key_len",
+						  NAME_LEN*MAX_KEY));
   item->maybe_null=1;
   field_list.push_back(item=new Item_empty_string("ref",
 						  NAME_LEN*MAX_REF_PARTS));
@@ -1263,49 +1279,82 @@ bool select_exists_subselect::send_data(List<Item> &items)
 int select_dumpvar::prepare(List<Item> &list, SELECT_LEX_UNIT *u)
 {
   List_iterator_fast<Item> li(list);
-  List_iterator_fast<LEX_STRING> gl(var_list);
+  List_iterator_fast<my_var> gl(var_list);
   Item *item;
-  LEX_STRING *ls;
+
+  local_vars.empty();				// Clear list if SP
+  unit= u;
+  row_count= 0;
+
   if (var_list.elements != list.elements)
   {
     my_error(ER_WRONG_NUMBER_OF_COLUMNS_IN_SELECT, MYF(0));
     return 1;
   }
-  unit=u;
   while ((item=li++))
   {
-    ls= gl++;
-    Item_func_set_user_var *xx = new Item_func_set_user_var(*ls,item);
-    /*
-      Item_func_set_user_var can't substitute something else on its place =>
-      0 can be passed as last argument (reference on item)
-    */
-    xx->fix_fields(thd,(TABLE_LIST*) thd->lex->select_lex.table_list.first,
-		   0);
-    xx->fix_length_and_dec();
-    vars.push_back(xx);
+    my_var *mv= gl++;
+    if (mv->local)
+      (void)local_vars.push_back(new Item_splocal(mv->s, mv->offset));
+    else
+    {
+      Item_func_set_user_var *xx = new Item_func_set_user_var(mv->s, item);
+      /*
+        Item_func_set_user_var can't substitute something else on its place =>
+        0 can be passed as last argument (reference on item)
+      */
+      xx->fix_fields(thd, (TABLE_LIST*) thd->lex->select_lex.table_list.first,
+		     0);
+      xx->fix_length_and_dec();
+      vars.push_back(xx);
+    }
   }
   return 0;
 }
 
 
+Item_arena::Item_arena(THD* thd)
+  :free_list(0)
+{
+  init_sql_alloc(&mem_root,
+                 thd->variables.query_alloc_block_size,
+                 thd->variables.query_prealloc_size);
+}
+
+
+Item_arena::Item_arena()
+  :free_list(0)
+{
+  bzero((char *) &mem_root, sizeof(mem_root));
+}
+
+
+Item_arena::Item_arena(bool init_mem_root)
+  :free_list(0)
+{
+  if (init_mem_root)
+    bzero((char *) &mem_root, sizeof(mem_root));
+}
+
+
+Item_arena::~Item_arena()
+{}
+
+
 /*
   Statement functions 
 */
 
 Statement::Statement(THD *thd)
-  :id(++thd->statement_id_counter),
+  :Item_arena(thd),
+  id(++thd->statement_id_counter),
   set_query_id(1),
   allow_sum_func(0),
   lex(&main_lex),
   query(0),
-  query_length(0),
-  free_list(0)
+  query_length(0)
 {
   name.str= NULL;
-  init_sql_alloc(&mem_root,
-                 thd->variables.query_alloc_block_size,
-                 thd->variables.query_prealloc_size);
 }
 
 /*
@@ -1315,15 +1364,14 @@ Statement::Statement(THD *thd)
 */
 
 Statement::Statement()
-  :id(0),
+  :Item_arena(),
+  id(0),
   set_query_id(1),
   allow_sum_func(0),                            /* initialized later */
   lex(&main_lex),
   query(0),                                     /* these two are set */ 
-  query_length(0),                              /* in alloc_query() */
-  free_list(0)
+  query_length(0)                               /* in alloc_query() */
 {
-  bzero((char *) &mem_root, sizeof(mem_root));
 }
 
 
@@ -1344,14 +1392,14 @@ void Statement::set_statement(Statement *stmt)
 }
 
 
-void Statement::set_n_backup_item_arena(Statement *set, Statement *backup)
+void Item_arena::set_n_backup_item_arena(Item_arena *set, Item_arena *backup)
 {
   backup->set_item_arena(this);
   set_item_arena(set);
 }
 
 
-void Statement::restore_backup_item_arena(Statement *set, Statement *backup)
+void Item_arena::restore_backup_item_arena(Item_arena *set, Item_arena *backup)
 {
   set->set_item_arena(this);
   set_item_arena(backup);
@@ -1359,7 +1407,7 @@ void Statement::restore_backup_item_arena(Statement *set, Statement *backup)
   init_alloc_root(&backup->mem_root, 0, 0);
 }
 
-void Statement::set_item_arena(Statement *set)
+void Item_arena::set_item_arena(Item_arena *set)
 {
   mem_root= set->mem_root;
   free_list= set->free_list;
@@ -1435,8 +1483,19 @@ int Statement_map::insert(Statement *statement)
 bool select_dumpvar::send_data(List<Item> &items)
 {
   List_iterator_fast<Item_func_set_user_var> li(vars);
+  List_iterator_fast<Item_splocal> var_li(local_vars);
+  List_iterator_fast<my_var> my_li(var_list);
+  List_iterator_fast<Item> it(items);
   Item_func_set_user_var *xx;
+  Item_splocal *yy;
+  Item *item;
+  my_var *zz;
   DBUG_ENTER("send_data");
+  if (unit->offset_limit_cnt)
+  {						// using limit offset,count
+    unit->offset_limit_cnt--;
+    DBUG_RETURN(0);
+  }
 
   if (unit->offset_limit_cnt)
   {				          // Using limit offset,count
@@ -1448,26 +1507,33 @@ bool select_dumpvar::send_data(List<Item> &items)
     my_error(ER_TOO_MANY_ROWS, MYF(0));
     DBUG_RETURN(1);
   }
-  while ((xx=li++))
+  while ((zz=my_li++) && (item=it++))
   {
-    xx->check();
-    xx->update();
+    if (zz->local)
+    {
+      if ((yy=var_li++)) 
+      {
+	thd->spcont->set_item_eval(yy->get_offset(), item, zz->type);
+      }
+    }
+    else
+    {
+      if ((xx=li++))
+      {
+        xx->check();
+	xx->update();
+      }
+    }
   }
   DBUG_RETURN(0);
 }
 
 bool select_dumpvar::send_eof()
 {
-  if (row_count)
-  {
-    ::send_ok(thd,row_count);
-    return 0;
-  }
-  else
-  {
-    my_error(ER_EMPTY_QUERY,MYF(0));
-    return 1;
-  }
+  if (! row_count)
+    send_warning(thd, ER_SP_FETCH_NO_DATA);
+  ::send_ok(thd,row_count);
+  return 0;
 }
 
 /****************************************************************************
diff --git a/sql/sql_class.h b/sql/sql_class.h
index 64fed055c80..08f73309a85 100644
--- a/sql/sql_class.h
+++ b/sql/sql_class.h
@@ -26,6 +26,9 @@
 class Query_log_event;
 class Load_log_event;
 class Slave_log_event;
+class Format_description_log_event;
+class sp_rcontext;
+class sp_cache;
 
 enum enum_enable_or_disable { LEAVE_AS_IS, ENABLE, DISABLE };
 enum enum_ha_read_modes { RFIRST, RNEXT, RPREV, RLAST, RKEY, RNEXT_SAME };
@@ -97,7 +100,14 @@ class MYSQL_LOG
   enum cache_type io_cache_type;
   bool write_error, inited;
   bool need_start_event;
-  bool no_auto_events;				// For relay binlog
+  /*
+    no_auto_events means we don't want any of these automatic events :
+    Start/Rotate/Stop. That is, in 4.x when we rotate a relay log, we don't want
+    a Rotate_log event to be written to the relay log. When we start a relay log
+    etc. So in 4.x this is 1 for relay logs, 0 for binlogs.
+    In 5.0 it's 0 for relay logs too!
+  */
+  bool no_auto_events;			     
   /* 
      The max size before rotation (usable only if log_type == LOG_BIN: binary
      logs and relay logs).
@@ -114,6 +124,18 @@ class MYSQL_LOG
 public:
   MYSQL_LOG();
   ~MYSQL_LOG();
+
+  /* 
+     These describe the log's format. This is used only for relay logs.
+     _for_exec is used by the SQL thread, _for_queue by the I/O thread. It's
+     necessary to have 2 distinct objects, because the I/O thread may be reading
+     events in a different format from what the SQL thread is reading (consider
+     the case of a master which has been upgraded from 5.0 to 5.1 without doing
+     RESET MASTER, or from 4.x to 5.0).
+  */
+  Format_description_log_event *description_event_for_exec,
+    *description_event_for_queue;
+
   void reset_bytes_written()
   {
     bytes_written = 0;
@@ -142,7 +164,8 @@ public:
   bool open(const char *log_name,enum_log_type log_type,
 	    const char *new_name, const char *index_file_name_arg,
 	    enum cache_type io_cache_type_arg,
-	    bool no_auto_events_arg, ulong max_size);
+	    bool no_auto_events_arg, ulong max_size,
+            bool null_created);
   void new_file(bool need_lock= 1);
   bool write(THD *thd, enum enum_server_command command,
 	     const char *format,...);
@@ -368,6 +391,8 @@ struct system_variables
   ulong net_retry_count;
   ulong net_wait_timeout;
   ulong net_write_timeout;
+  ulong optimizer_prune_level;
+  ulong optimizer_search_depth;
   ulong preload_buff_size;
   ulong query_cache_type;
   ulong read_buff_size;
@@ -418,6 +443,48 @@ struct system_variables
 void free_tmp_table(THD *thd, TABLE *entry);
 
 
+class Item_arena
+{
+public:
+  /*
+    List of items created in the parser for this query. Every item puts
+    itself to the list on creation (see Item::Item() for details))
+  */
+  Item *free_list;
+  MEM_ROOT mem_root;
+  
+  Item_arena(THD *thd);
+  Item_arena();
+  Item_arena(bool init_mem_root);
+  ~Item_arena();
+
+  inline gptr alloc(unsigned int size) { return alloc_root(&mem_root,size); }
+  inline gptr calloc(unsigned int size)
+  {
+    gptr ptr;
+    if ((ptr=alloc_root(&mem_root,size)))
+      bzero((char*) ptr,size);
+    return ptr;
+  }
+  inline char *strdup(const char *str)
+  { return strdup_root(&mem_root,str); }
+  inline char *strmake(const char *str, uint size)
+  { return strmake_root(&mem_root,str,size); }
+  inline char *memdup(const char *str, uint size)
+  { return memdup_root(&mem_root,str,size); }
+  inline char *memdup_w_gap(const char *str, uint size, uint gap)
+  {
+    gptr ptr;
+    if ((ptr=alloc_root(&mem_root,size+gap)))
+      memcpy(ptr,str,size);
+    return ptr;
+  }
+
+  void set_n_backup_item_arena(Item_arena *set, Item_arena *backup);
+  void restore_backup_item_arena(Item_arena *set, Item_arena *backup);
+  void set_item_arena(Item_arena *set);
+};
+
 /*
   State of a single command executed against this connection.
   One connection can contain a lot of simultaneously running statements,
@@ -432,7 +499,7 @@ void free_tmp_table(THD *thd, TABLE *entry);
   be used explicitly.
 */
 
-class Statement
+class Statement: public Item_arena
 {
   Statement(const Statement &rhs);              /* not implemented: */
   Statement &operator=(const Statement &rhs);   /* non-copyable */
@@ -474,12 +541,6 @@ public:
   */
   char *query;
   uint32 query_length;                          // current query length
-  /*
-    List of items created in the parser for this query. Every item puts
-    itself to the list on creation (see Item::Item() for details))
-  */
-  Item *free_list;
-  MEM_ROOT mem_root;
 
 public:
   /* We build without RTTI, so dynamic_cast can't be used. */
@@ -503,31 +564,6 @@ public:
   /* return class type */
   virtual Type type() const;
 
-  inline gptr alloc(unsigned int size) { return alloc_root(&mem_root,size); }
-  inline gptr calloc(unsigned int size)
-  {
-    gptr ptr;
-    if ((ptr=alloc_root(&mem_root,size)))
-      bzero((char*) ptr,size);
-    return ptr;
-  }
-  inline char *strdup(const char *str)
-  { return strdup_root(&mem_root,str); }
-  inline char *strmake(const char *str, uint size)
-  { return strmake_root(&mem_root,str,size); }
-  inline char *memdup(const char *str, uint size)
-  { return memdup_root(&mem_root,str,size); }
-  inline char *memdup_w_gap(const char *str, uint size, uint gap)
-  {
-    gptr ptr;
-    if ((ptr=alloc_root(&mem_root,size+gap)))
-      memcpy(ptr,str,size);
-    return ptr;
-  }
-
-  void set_n_backup_item_arena(Statement *set, Statement *backup);
-  void restore_backup_item_arena(Statement *set, Statement *backup);
-  void set_item_arena(Statement *set);
 };
 
 
@@ -660,9 +696,17 @@ public:
      the connection
     priv_user - The user privilege we are using. May be '' for anonymous user.
     db - currently selected database
+    catalog - currently selected catalog
     ip - client IP
+    WARNING: some members of THD (currently 'db', 'catalog' and 'query')  are
+    set and alloced by the slave SQL thread (for the THD of that thread); that
+    thread is (and must remain, for now) the only responsible for freeing these
+    3 members. If you add members here, and you add code to set them in
+    replication, don't forget to free_them_and_set_them_to_0 in replication
+    properly. For details see the 'err:' label of the pthread_handler_decl of
+    the slave SQL thread, in sql/slave.cc.
    */
-  char	  *host,*user,*priv_user,*db,*ip;
+  char	  *host,*user,*priv_user,*db,*catalog,*ip;
   char	  priv_host[MAX_HOSTNAME];
   /* remote (peer) port */
   uint16 peer_port;
@@ -760,9 +804,9 @@ public:
   Vio* active_vio;
 #endif
   /*
-    Current prepared Statement if there one, or 0
+    Current prepared Item_arena if there one, or 0
   */
-  Statement *current_statement;
+  Item_arena *current_arena;
   /*
     next_insert_id is set on SET INSERT_ID= #. This is used as the next
     generated auto_increment value in handler.cc
@@ -833,10 +877,26 @@ public:
   bool	     time_zone_used;
   bool	     in_lock_tables,global_read_lock;
   bool       query_error, bootstrap, cleanup_done;
+
+  enum killed_state { NOT_KILLED=0, KILL_CONNECTION=ER_SERVER_SHUTDOWN, KILL_QUERY=ER_QUERY_INTERRUPTED };
+  killed_state volatile killed;
+  inline int killed_errno() const
+  {
+    return killed;
+  }
+  inline void send_kill_message() const
+  {
+    my_error(killed_errno(), MYF(0));
+  }
+
   bool	     tmp_table_used;
   bool	     charset_is_system_charset, charset_is_collation_connection;
   bool       slow_command;
-  my_bool    volatile killed;
+
+  longlong   row_count_func;	/* For the ROW_COUNT() function */
+  sp_rcontext *spcont;		// SP runtime context
+  sp_cache   *sp_proc_cache;
+  sp_cache   *sp_func_cache;
 
   /*
     If we do a purge of binary logs, log index info of the threads
@@ -884,7 +944,7 @@ public:
   }
   void close_active_vio();
 #endif  
-  void awake(bool prepare_to_die);
+  void awake(THD::killed_state state_to_set);
   inline const char* enter_cond(pthread_cond_t *cond, pthread_mutex_t* mutex,
 			  const char* msg)
   {
@@ -973,7 +1033,7 @@ public:
 
   inline void allocate_temporary_memory_pool_for_ps_preparing()
   {
-    DBUG_ASSERT(current_statement!=0);
+    DBUG_ASSERT(current_arena!=0);
     /*
       We do not want to have in PS memory all that junk,
       which will be created by preparation => substitute memory
@@ -982,7 +1042,7 @@ public:
       We know that PS memory pool is now copied to THD, we move it back
       to allow some code use it.
     */
-    current_statement->set_item_arena(this);
+    current_arena->set_item_arena(this);
     init_sql_alloc(&mem_root,
 		   variables.query_alloc_block_size,
 		   variables.query_prealloc_size);
@@ -990,12 +1050,16 @@ public:
   }
   inline void free_temporary_memory_pool_for_ps_preparing()
   {
-    DBUG_ASSERT(current_statement!=0);
-    cleanup_items(current_statement->free_list);
+    DBUG_ASSERT(current_arena!=0);
+    cleanup_items(current_arena->free_list);
     free_items(free_list);
     close_thread_tables(this); // to close derived tables
     free_root(&mem_root, MYF(0));
-    set_item_arena(current_statement);
+    set_item_arena(current_arena);
+  }
+  inline bool only_prepare()
+  {
+    return command == COM_PREPARE;
   }
 };
 
@@ -1326,8 +1390,13 @@ class user_var_entry
   DTCollation collation;
 };
 
-
-/* Class for unique (removing of duplicates) */
+/*
+   Unique -- class for unique (removing of duplicates). 
+   Puts all values to the TREE. If the tree becomes too big,
+   it's dumped to the file. User can request sorted values, or
+   just iterate through them. In the last case tree merging is performed in
+   memory simultaneously with iteration, so it should be ~2-3x faster.
+ */
 
 class Unique :public Sql_alloc
 {
@@ -1341,10 +1410,10 @@ class Unique :public Sql_alloc
 
 public:
   ulong elements;
-  Unique(qsort_cmp2 comp_func, void * comp_func_fixed_arg,
+  Unique(qsort_cmp2 comp_func, void *comp_func_fixed_arg,
 	 uint size_arg, ulong max_in_memory_size_arg);
   ~Unique();
-  inline bool unique_add(gptr ptr)
+  inline bool unique_add(void *ptr)
   {
     if (tree.elements_in_tree > max_elements && flush())
       return 1;
@@ -1352,6 +1421,18 @@ public:
   }
 
   bool get(TABLE *table);
+  static double get_use_cost(uint *buffer, uint nkeys, uint key_size, 
+                             ulong max_in_memory_size);
+  inline static int get_cost_calc_buff_size(ulong nkeys, uint key_size, 
+                                            ulong max_in_memory_size)
+  {
+    register ulong max_elems_in_tree= 
+      (1 + max_in_memory_size / ALIGN_SIZE(sizeof(TREE_ELEMENT)+key_size));
+    return sizeof(uint)*(1 + nkeys/max_elems_in_tree);
+  }
+
+  void reset();
+  bool walk(tree_walk_action action, void *walk_action_arg);
 
   friend int unique_write_to_file(gptr key, element_count count, Unique *unique);
   friend int unique_write_to_ptrs(gptr key, element_count count, Unique *unique);
@@ -1408,13 +1489,25 @@ public:
   bool send_eof();
 };
 
+class my_var : public Sql_alloc  {
+public:
+  LEX_STRING s;
+  bool local;
+  uint offset;
+  enum_field_types type;
+  my_var (LEX_STRING& j, bool i, uint o, enum_field_types t)
+    :s(j), local(i), offset(o), type(t)
+  {}
+  ~my_var() {}
+};
 
 class select_dumpvar :public select_result {
   ha_rows row_count;
 public:
-  List<LEX_STRING> var_list;
+  List<my_var> var_list;
   List<Item_func_set_user_var> vars;
-  select_dumpvar(void)  { var_list.empty(); vars.empty(); row_count=0;}
+  List<Item_splocal> local_vars;
+  select_dumpvar(void)  { var_list.empty(); local_vars.empty(); vars.empty(); row_count=0;}
   ~select_dumpvar() {}
   int prepare(List<Item> &list, SELECT_LEX_UNIT *u);
   bool send_fields(List<Item> &list, uint flag) {return 0;}
diff --git a/sql/sql_db.cc b/sql/sql_db.cc
index 9db2198268a..fcb6134ce41 100644
--- a/sql/sql_db.cc
+++ b/sql/sql_db.cc
@@ -20,6 +20,7 @@
 #include "mysql_priv.h"
 #include <mysys_err.h>
 #include "sql_acl.h"
+#include "sp.h"
 #include <my_dir.h>
 #include <m_ctype.h>
 #ifdef __WIN__
@@ -248,7 +249,6 @@ int mysql_create_db(THD *thd, char *db, HA_CREATE_INFO *create_info,
       query= 	    thd->query;
       query_length= thd->query_length;
     }
-    mysql_update_log.write(thd, query, query_length);
     if (mysql_bin_log.is_open())
     {
       Query_log_event qinfo(thd, query, query_length, 0);
@@ -298,7 +298,6 @@ int mysql_alter_db(THD *thd, const char *db, HA_CREATE_INFO *create_info)
     thd->variables.collation_database= thd->db_charset;
   }
 
-  mysql_update_log.write(thd,thd->query, thd->query_length);
   if (mysql_bin_log.is_open())
   {
     Query_log_event qinfo(thd, thd->query, thd->query_length, 0);
@@ -412,6 +411,7 @@ int mysql_rm_db(THD *thd,char *db,bool if_exists, bool silent)
   }
 
 exit:
+  (void)sp_drop_db_routines(thd, db); /* QQ Ignore errors for now  */
   start_waiting_global_read_lock(thd);
   /*
     If this database was the client's selected database, we silently change the
diff --git a/sql/sql_delete.cc b/sql/sql_delete.cc
index b76bad2805b..9e425f86579 100644
--- a/sql/sql_delete.cc
+++ b/sql/sql_delete.cc
@@ -88,6 +88,7 @@ int mysql_delete(THD *thd, TABLE_LIST *table_list, COND *conds, SQL_LIST *order,
   {
     delete select;
     free_underlaid_joins(thd, &thd->lex->select_lex);
+    thd->row_count_func= 0;
     send_ok(thd,0L);
     DBUG_RETURN(0);				// Nothing to delete
   }
@@ -142,6 +143,13 @@ int mysql_delete(THD *thd, TABLE_LIST *table_list, COND *conds, SQL_LIST *order,
     select= 0;
   }
 
+  /* If quick select is used, initialize it before retrieving rows. */
+  if (select && select->quick && select->quick->reset())
+  {
+    delete select;
+    free_underlaid_joins(thd, &thd->lex->select_lex);
+    DBUG_RETURN(-1);			// This will force out message
+  }
   init_read_record(&info,thd,table,select,1,1);
   deleted=0L;
   init_ftfuncs(thd, &thd->lex->select_lex, 1);
@@ -210,7 +218,6 @@ cleanup:
   */
   if ((deleted || (error < 0)) && (error <= 0 || !transactional_table))
   {
-    mysql_update_log.write(thd,thd->query, thd->query_length);
     if (mysql_bin_log.is_open())
     {
       if (error <= 0)
@@ -236,9 +243,10 @@ cleanup:
   }
   free_underlaid_joins(thd, &thd->lex->select_lex);
   if (error >= 0 || thd->net.report_error)
-    send_error(thd,thd->killed ? ER_SERVER_SHUTDOWN: 0);
+    send_error(thd,thd->killed_errno());
   else
   {
+    thd->row_count_func= deleted;
     send_ok(thd,deleted);
     DBUG_PRINT("info",("%d records deleted",deleted));
   }
@@ -264,10 +272,11 @@ int mysql_prepare_delete(THD *thd, TABLE_LIST *table_list, Item **conds)
 {
   TABLE_LIST *delete_table_list= ((TABLE_LIST*) thd->lex->
 				  select_lex.table_list.first);
+  SELECT_LEX *select_lex= &thd->lex->select_lex;
   DBUG_ENTER("mysql_prepare_delete");
 
   if (setup_conds(thd, delete_table_list, conds) || 
-      setup_ftfuncs(&thd->lex->select_lex))
+      setup_ftfuncs(select_lex))
     DBUG_RETURN(-1);
   if (find_real_table_in_list(table_list->next, 
 			      table_list->db, table_list->real_name))
@@ -275,6 +284,11 @@ int mysql_prepare_delete(THD *thd, TABLE_LIST *table_list, Item **conds)
     my_error(ER_UPDATE_TABLE_USED, MYF(0), table_list->real_name);
     DBUG_RETURN(-1);
   }
+  if (thd->current_arena && select_lex->first_execution)
+  {
+    select_lex->prep_where= select_lex->where;
+    select_lex->first_execution= 0;
+  }
   DBUG_RETURN(0);
 }
 
@@ -285,10 +299,10 @@ int mysql_prepare_delete(THD *thd, TABLE_LIST *table_list, Item **conds)
 
 #define MEM_STRIP_BUF_SIZE current_thd->variables.sortbuff_size
 
-extern "C" int refposcmp2(void* arg, const void *a,const void *b)
+extern "C" int refpos_order_cmp(void* arg, const void *a,const void *b)
 {
-  /* arg is a pointer to file->ref_length */
-  return memcmp(a,b, *(int*) arg);
+  handler *file= (handler*)arg;
+  return file->cmp_ref((const byte*)a, (const byte*)b);
 }
 
 multi_delete::multi_delete(THD *thd_arg, TABLE_LIST *dt,
@@ -352,8 +366,8 @@ multi_delete::initialize_tables(JOIN *join)
   for (walk=walk->next ; walk ; walk=walk->next)
   {
     TABLE *table=walk->table;
-    *tempfiles_ptr++= new Unique (refposcmp2,
-				  (void *) &table->file->ref_length,
+    *tempfiles_ptr++= new Unique (refpos_order_cmp,
+				  (void *) table->file,
 				  table->file->ref_length,
 				  MEM_STRIP_BUF_SIZE);
   }
@@ -545,7 +559,9 @@ bool multi_delete::send_eof()
     ha_autocommit_...
   */
   if (deleted)
+  {
     query_cache_invalidate3(thd, delete_tables, 1);
+  }
 
   /*
     Write the SQL statement to the binlog if we deleted
@@ -557,7 +573,6 @@ bool multi_delete::send_eof()
   */
   if (deleted && (error <= 0 || normal_tables))
   {
-    mysql_update_log.write(thd,thd->query,thd->query_length);
     if (mysql_bin_log.is_open())
     {
       if (error <= 0)
@@ -578,7 +593,10 @@ bool multi_delete::send_eof()
   if (local_error)
     ::send_error(thd);
   else
+  {
+    thd->row_count_func= deleted;
     ::send_ok(thd, deleted);
+  }
   return 0;
 }
 
@@ -670,7 +688,6 @@ end:
   {
     if (!error)
     {
-      mysql_update_log.write(thd,thd->query,thd->query_length);
       if (mysql_bin_log.is_open())
       {
         thd->clear_error();
diff --git a/sql/sql_derived.cc b/sql/sql_derived.cc
index 01459d3fc7a..48bc6f4c5a2 100644
--- a/sql/sql_derived.cc
+++ b/sql/sql_derived.cc
@@ -151,7 +151,7 @@ static int mysql_derived(THD *thd, LEX *lex, SELECT_LEX_UNIT *unit,
     if it is preparation PS only then we do not need real data and we
     can skip execution (and parameters is not defined, too)
   */
-  if (!thd->current_statement)
+  if (!thd->only_prepare())
   {
     if (is_union)
     {
diff --git a/sql/sql_error.cc b/sql/sql_error.cc
index c94d5e52bcf..8aa7bdf9a7f 100644
--- a/sql/sql_error.cc
+++ b/sql/sql_error.cc
@@ -43,6 +43,7 @@ This file contains the implementation of error and warnings related
 ***********************************************************************/
 
 #include "mysql_priv.h"
+#include "sp_rcontext.h"
 
 /*
   Store a new message in an error object
@@ -108,6 +109,9 @@ MYSQL_ERROR *push_warning(THD *thd, MYSQL_ERROR::enum_warning_level level,
 
   MYSQL_ERROR *err= NULL;
 
+  if (thd->spcont && thd->spcont->find_handler(code))
+    DBUG_RETURN(NULL);
+
   if (thd->warn_list.elements < thd->variables.max_error_count)
   {
     /*
diff --git a/sql/sql_insert.cc b/sql/sql_insert.cc
index b769178565b..3acebf5cadc 100644
--- a/sql/sql_insert.cc
+++ b/sql/sql_insert.cc
@@ -24,7 +24,7 @@ static int check_null_fields(THD *thd,TABLE *entry);
 #ifndef EMBEDDED_LIBRARY
 static TABLE *delayed_get_table(THD *thd,TABLE_LIST *table_list);
 static int write_delayed(THD *thd,TABLE *table, enum_duplicates dup,
-			 char *query, uint query_length, int log_on);
+			 char *query, uint query_length, bool log_on);
 static void end_delayed_insert(THD *thd);
 extern "C" pthread_handler_decl(handle_delayed_insert,arg);
 static void unlink_blobs(register TABLE *table);
@@ -40,9 +40,6 @@ static void unlink_blobs(register TABLE *table);
 #define my_safe_afree(ptr, size, min_length) if (size > min_length) my_free(ptr,MYF(0))
 #endif
 
-#define DELAYED_LOG_UPDATE 1
-#define DELAYED_LOG_BIN    2
-
 /*
   Check if insert fields are correct.
   Sets table->timestamp_default_now/on_update_now to 0 o leaves it to point
@@ -120,7 +117,7 @@ int mysql_insert(THD *thd,TABLE_LIST *table_list,
     By default, both logs are enabled (this won't cause problems if the server
     runs without --log-update or --log-bin).
   */
-  int log_on= DELAYED_LOG_UPDATE | DELAYED_LOG_BIN ;
+  bool log_on= (thd->options & OPTION_BIN_LOG) || (!(thd->master_access & SUPER_ACL));
   bool transactional_table, log_delayed;
   uint value_count;
   ulong counter = 1;
@@ -137,10 +134,6 @@ int mysql_insert(THD *thd,TABLE_LIST *table_list,
     thd->lex->select_lex.table_list.first;
   DBUG_ENTER("mysql_insert");
 
-  if (!(thd->options & OPTION_UPDATE_LOG))
-    log_on&= ~(int) DELAYED_LOG_UPDATE;
-  if (!(thd->options & OPTION_BIN_LOG))
-    log_on&= ~(int) DELAYED_LOG_BIN;
   /*
     in safe mode or with skip-new change delayed insert to be regular
     if we are told to replace duplicates, the insert cannot be concurrent
@@ -354,7 +347,9 @@ int mysql_insert(THD *thd,TABLE_LIST *table_list,
       before binlog writing and ha_autocommit_...
     */
     if (info.copied || info.deleted || info.updated)
+    {
       query_cache_invalidate3(thd, table_list, 1);
+    }
 
     transactional_table= table->file->has_transactions();
 
@@ -362,7 +357,6 @@ int mysql_insert(THD *thd,TABLE_LIST *table_list,
     if ((info.copied || info.deleted || info.updated) &&
 	(error <= 0 || !transactional_table))
     {
-      mysql_update_log.write(thd, thd->query, thd->query_length);
       if (mysql_bin_log.is_open())
       {
         if (error <= 0)
@@ -401,7 +395,10 @@ int mysql_insert(THD *thd,TABLE_LIST *table_list,
     goto abort;
   if (values_list.elements == 1 && (!(thd->options & OPTION_WARNINGS) ||
 				    !thd->cuted_fields))
-    send_ok(thd,info.copied+info.deleted+info.updated,id);
+  {
+    thd->row_count_func= info.copied+info.deleted+info.updated;
+    send_ok(thd, thd->row_count_func, id);
+  }
   else
   {
     char buff[160];
@@ -412,7 +409,8 @@ int mysql_insert(THD *thd,TABLE_LIST *table_list,
     else
       sprintf(buff, ER(ER_INSERT_INFO), (ulong) info.records,
 	      (ulong) (info.deleted+info.updated), (ulong) thd->cuted_fields);
-    ::send_ok(thd,info.copied+info.deleted+info.updated,(ulonglong)id,buff);
+    thd->row_count_func= info.copied+info.deleted+info.updated;
+    ::send_ok(thd, thd->row_count_func, (ulonglong)id,buff);
   }
   free_underlaid_joins(thd, &thd->lex->select_lex);
   table->insert_values=0;
@@ -462,6 +460,7 @@ int mysql_prepare_insert(THD *thd, TABLE_LIST *table_list,
     my_error(ER_UPDATE_TABLE_USED, MYF(0), table_list->real_name);
     DBUG_RETURN(-1);
   }
+  thd->lex->select_lex.first_execution= 0;
   DBUG_RETURN(0);
 }
 
@@ -642,14 +641,13 @@ public:
   char *record,*query;
   enum_duplicates dup;
   time_t start_time;
-  bool query_start_used,last_insert_id_used,insert_id_used;
-  int log_query;
+  bool query_start_used,last_insert_id_used,insert_id_used, log_query;
   ulonglong last_insert_id;
   ulong timestamp_default_now;
   ulong timestamp_on_update_now;
   uint query_length;
 
-  delayed_row(enum_duplicates dup_arg, int log_query_arg)
+  delayed_row(enum_duplicates dup_arg, bool log_query_arg)
     :record(0),query(0),dup(dup_arg),log_query(log_query_arg) {}
   ~delayed_row()
   {
@@ -963,7 +961,7 @@ TABLE *delayed_insert::get_local_table(THD* client_thd)
 /* Put a question in queue */
 
 static int write_delayed(THD *thd,TABLE *table,enum_duplicates duplic,
-			 char *query, uint query_length, int log_on)
+			 char *query, uint query_length, bool log_on)
 {
   delayed_row *row=0;
   delayed_insert *di=thd->di;
@@ -1044,7 +1042,7 @@ void kill_delayed_threads(void)
   {
     /* Ensure that the thread doesn't kill itself while we are looking at it */
     pthread_mutex_lock(&tmp->mutex);
-    tmp->thd.killed=1;
+    tmp->thd.killed= THD::KILL_CONNECTION;
     if (tmp->thd.mysys_var)
     {
       pthread_mutex_lock(&tmp->thd.mysys_var->mutex);
@@ -1083,7 +1081,7 @@ extern "C" pthread_handler_decl(handle_delayed_insert,arg)
   thd->thread_id=thread_id++;
   thd->end_time();
   threads.append(thd);
-  thd->killed=abort_loop;
+  thd->killed=abort_loop ? THD::KILL_CONNECTION : THD::NOT_KILLED;
   pthread_mutex_unlock(&LOCK_thread_count);
 
   pthread_mutex_lock(&di->mutex);
@@ -1136,7 +1134,7 @@ extern "C" pthread_handler_decl(handle_delayed_insert,arg)
 
   for (;;)
   {
-    if (thd->killed)
+    if (thd->killed == THD::KILL_CONNECTION)
     {
       uint lock_count;
       /*
@@ -1184,7 +1182,7 @@ extern "C" pthread_handler_decl(handle_delayed_insert,arg)
 	  break;
 	if (error == ETIME || error == ETIMEDOUT)
 	{
-	  thd->killed=1;
+	  thd->killed= THD::KILL_CONNECTION;
 	  break;
 	}
       }
@@ -1203,8 +1201,9 @@ extern "C" pthread_handler_decl(handle_delayed_insert,arg)
       /* request for new delayed insert */
       if (!(thd->lock=mysql_lock_tables(thd,&di->table,1)))
       {
-	di->dead= 1;			// Some fatal error
-        thd->killed= 1;
+	/* Fatal error */
+	di->dead= 1;
+	thd->killed= THD::KILL_CONNECTION;
       }
       pthread_cond_broadcast(&di->cond_client);
     }
@@ -1212,8 +1211,9 @@ extern "C" pthread_handler_decl(handle_delayed_insert,arg)
     {
       if (di->handle_inserts())
       {
-	di->dead= 1;			// Some fatal error
-        thd->killed= 1;
+	/* Some fatal error */
+	di->dead= 1;
+	thd->killed= THD::KILL_CONNECTION;
       }
     }
     di->status=0;
@@ -1243,7 +1243,7 @@ end:
   close_thread_tables(thd);			// Free the table
   di->table=0;
   di->dead= 1;                                  // If error
-  thd->killed= 1;
+  thd->killed= THD::KILL_CONNECTION;	        // If error
   pthread_cond_broadcast(&di->cond_client);	// Safety
   pthread_mutex_unlock(&di->mutex);
 
@@ -1312,7 +1312,7 @@ bool delayed_insert::handle_inserts(void)
   max_rows=delayed_insert_limit;
   if (thd.killed || table->version != refresh_version)
   {
-    thd.killed=1;
+    thd.killed= THD::KILL_CONNECTION;
     max_rows= ~0;				// Do as much as possible
   }
 
@@ -1357,15 +1357,10 @@ bool delayed_insert::handle_inserts(void)
       using_ignore=0;
       table->file->extra(HA_EXTRA_NO_IGNORE_DUP_KEY);
     }
-    if (row->query)
+    if (row->query && row->log_query && using_bin_log)
     {
-      if (row->log_query & DELAYED_LOG_UPDATE)
-        mysql_update_log.write(&thd,row->query, row->query_length);
-      if (row->log_query & DELAYED_LOG_BIN && using_bin_log)
-      {
-        Query_log_event qinfo(&thd, row->query, row->query_length,0);
-        mysql_bin_log.write(&qinfo);
-      }
+      Query_log_event qinfo(&thd, row->query, row->query_length,0);
+      mysql_bin_log.write(&qinfo);
     }
     if (table->blob_fields)
       free_delayed_insert_blobs(table);
@@ -1381,7 +1376,7 @@ bool delayed_insert::handle_inserts(void)
       on this table until all entries has been processed
     */
     if (group_count++ >= max_rows && (row= rows.head()) &&
-	(!(row->log_query & DELAYED_LOG_BIN && using_bin_log) ||
+	(!(row->log_query & using_bin_log) ||
 	 row->query))
     {
       group_count=0;
@@ -1527,7 +1522,6 @@ void select_insert::send_error(uint errcode,const char *err)
   {
     if (last_insert_id)
       thd->insert_id(last_insert_id);		// For binary log
-    mysql_update_log.write(thd,thd->query,thd->query_length);
     if (mysql_bin_log.is_open())
     {
       Query_log_event qinfo(thd, thd->query, thd->query_length,
@@ -1538,7 +1532,9 @@ void select_insert::send_error(uint errcode,const char *err)
       thd->options|=OPTION_STATUS_NO_TRANS_UPDATE;
   }
   if (info.copied || info.deleted || info.updated)
+  {
     query_cache_invalidate3(thd, table, 1);
+  }
   ha_rollback_stmt(thd);
   DBUG_VOID_RETURN;
 }
@@ -1567,7 +1563,6 @@ bool select_insert::send_eof()
   if (last_insert_id)
     thd->insert_id(last_insert_id);		// For binary log
   /* Write to binlog before commiting transaction */
-  mysql_update_log.write(thd,thd->query,thd->query_length);
   if (mysql_bin_log.is_open())
   {
     if (!error)
@@ -1592,7 +1587,8 @@ bool select_insert::send_eof()
   else
     sprintf(buff, ER(ER_INSERT_INFO), (ulong) info.records,
 	    (ulong) (info.deleted+info.updated), (ulong) thd->cuted_fields);
-  ::send_ok(thd,info.copied+info.deleted+info.updated,last_insert_id,buff);
+  thd->row_count_func= info.copied+info.deleted+info.updated;
+  ::send_ok(thd, thd->row_count_func, last_insert_id, buff);
   DBUG_RETURN(0);
 }
 
diff --git a/sql/sql_lex.cc b/sql/sql_lex.cc
index d945eef1425..1ffe3d12139 100644
--- a/sql/sql_lex.cc
+++ b/sql/sql_lex.cc
@@ -21,6 +21,8 @@
 #include "item_create.h"
 #include <m_ctype.h>
 #include <hash.h>
+#include "sp.h"
+#include "sp_head.h"
 
 /* Macros to look like lex */
 
@@ -109,6 +111,7 @@ LEX *lex_start(THD *thd, uchar *buf,uint length)
   LEX *lex= thd->lex;
   lex->thd= thd;
   lex->next_state=MY_LEX_START;
+  lex->buf= buf;
   lex->end_of_query=(lex->ptr=buf)+length;
   lex->yylineno = 1;
   lex->select_lex.parsing_place= SELECT_LEX_NODE::NO_MATTER;
@@ -125,6 +128,14 @@ LEX *lex_start(THD *thd, uchar *buf,uint length)
   lex->ignore_space=test(thd->variables.sql_mode & MODE_IGNORE_SPACE);
   lex->sql_command=SQLCOM_END;
   lex->duplicates= DUP_ERROR;
+  lex->sphead= NULL;
+  lex->spcont= NULL;
+
+  extern byte *sp_lex_spfuns_key(const byte *ptr, uint *plen, my_bool first);
+  hash_free(&lex->spfuns);
+  hash_init(&lex->spfuns, system_charset_info, 0, 0, 0,
+	    sp_lex_spfuns_key, 0, 0);
+
   return lex;
 }
 
@@ -148,27 +159,6 @@ static int find_keyword(LEX *lex, uint len, bool function)
     lex->yylval->symbol.length=len;
     return symbol->tok;
   }
-#ifdef HAVE_DLOPEN
-  udf_func *udf;
-  if (function && using_udf_functions && (udf=find_udf((char*) tok, len)))
-  {
-    lex->safe_to_cache_query=0;
-    lex->yylval->udf=udf;
-    switch (udf->returns) {
-    case STRING_RESULT:
-      return (udf->type == UDFTYPE_FUNCTION) ? UDF_CHAR_FUNC : UDA_CHAR_SUM;
-    case REAL_RESULT:
-      return (udf->type == UDFTYPE_FUNCTION) ? UDF_FLOAT_FUNC : UDA_FLOAT_SUM;
-    case INT_RESULT:
-      return (udf->type == UDFTYPE_FUNCTION) ? UDF_INT_FUNC : UDA_INT_SUM;
-    case ROW_RESULT:
-    default:
-      // This case should never be choosen
-      DBUG_ASSERT(0);
-      return 0;
-    }
-  }
-#endif
   return 0;
 }
 
@@ -992,12 +982,16 @@ void st_select_lex_unit::init_query()
   cleaned= 0;
   item_list.empty();
   describe= 0;
+  found_rows_for_union= 0;
 }
 
 void st_select_lex::init_query()
 {
   st_select_lex_node::init_query();
   table_list.empty();
+  top_join_list.empty();
+  join_list= &top_join_list;
+  embedding= 0;
   item_list.empty();
   join= 0;
   where= 0;
@@ -1005,10 +999,13 @@ void st_select_lex::init_query()
   having_fix_field= 0;
   resolve_mode= NOMATTER_MODE;
   cond_count= with_wild= 0;
+  conds_processed_with_permanent_arena= 0;
   ref_pointer_array= 0;
   select_n_having_items= 0;
   prep_where= 0;
   subquery_in_having= explicit_limit= 0;
+  first_execution= 1;
+  first_cond_optimization= 1;
 }
 
 void st_select_lex::init_select()
@@ -1408,7 +1405,9 @@ bool st_select_lex::add_order_to_list(THD *thd, Item *item, bool asc)
 
 bool st_select_lex::add_item_to_list(THD *thd, Item *item)
 {
-  return item_list.push_back(item);
+  DBUG_ENTER("st_select_lex::add_item_to_list");
+  DBUG_PRINT("info", ("Item: %p", item));
+  DBUG_RETURN(item_list.push_back(item));
 }
 
 
@@ -1494,12 +1493,12 @@ bool st_select_lex::setup_ref_array(THD *thd, uint order_group_num)
     We have to create array in prepared statement memory if it is
     prepared statement
   */
-  Statement *stmt= thd->current_statement ? thd->current_statement : thd;
+  Item_arena *arena= thd->current_arena ? thd->current_arena : thd;
   return (ref_pointer_array= 
-	  (Item **)stmt->alloc(sizeof(Item*) *
-			       (item_list.elements +
-				select_n_having_items +
-				order_group_num)* 5)) == 0;
+          (Item **)arena->alloc(sizeof(Item*) *
+                                (item_list.elements +
+                                 select_n_having_items +
+                                 order_group_num)* 5)) == 0;
 }
 
 
@@ -1619,6 +1618,25 @@ void st_select_lex::print_limit(THD *thd, String *str)
   }
 }
 
+/*
+  initialize limit counters
+
+  SYNOPSIS
+    st_select_lex_unit::set_limit()
+    values	- SELECT_LEX with initial values for counters
+    sl		- SELECT_LEX for options set
+*/
+void st_select_lex_unit::set_limit(SELECT_LEX *values,
+				   SELECT_LEX *sl)
+{
+  offset_limit_cnt= values->offset_limit;
+  select_limit_cnt= values->select_limit+values->offset_limit;
+  if (select_limit_cnt < values->select_limit)
+    select_limit_cnt= HA_POS_ERROR;		// no limit
+  if (select_limit_cnt == HA_POS_ERROR)
+    sl->options&= ~OPTION_FOUND_ROWS;
+}
+
 
 /*
   Unlink first table from global table list and first table from outer select
diff --git a/sql/sql_lex.h b/sql/sql_lex.h
index 328df2f6bb9..387685194f3 100644
--- a/sql/sql_lex.h
+++ b/sql/sql_lex.h
@@ -21,6 +21,10 @@
 class Table_ident;
 class sql_exchange;
 class LEX_COLUMN;
+class sp_head;
+class sp_name;
+class sp_instr;
+class sp_pcontext;
 
 /*
   The following hack is needed because mysql_yacc.cc does not define
@@ -75,6 +79,10 @@ enum enum_sql_command {
   SQLCOM_SHOW_WARNS, SQLCOM_EMPTY_QUERY, SQLCOM_SHOW_ERRORS,
   SQLCOM_SHOW_COLUMN_TYPES, SQLCOM_SHOW_STORAGE_ENGINES, SQLCOM_SHOW_PRIVILEGES,
   SQLCOM_HELP, SQLCOM_DROP_USER, SQLCOM_REVOKE_ALL, SQLCOM_CHECKSUM,
+  SQLCOM_CREATE_PROCEDURE, SQLCOM_CREATE_SPFUNCTION, SQLCOM_CALL,
+  SQLCOM_DROP_PROCEDURE, SQLCOM_ALTER_PROCEDURE,SQLCOM_ALTER_FUNCTION,
+  SQLCOM_SHOW_CREATE_PROC, SQLCOM_SHOW_CREATE_FUNC,
+  SQLCOM_SHOW_STATUS_PROC, SQLCOM_SHOW_STATUS_FUNC,
 
   SQLCOM_PREPARE, SQLCOM_EXECUTE, SQLCOM_DEALLOCATE_PREPARE,
   /* This should be the last !!! */
@@ -85,6 +93,10 @@ enum enum_sql_command {
 #define DESCRIBE_NORMAL		1
 #define DESCRIBE_EXTENDED	2
 
+enum suid_behaviour
+{
+  IS_DEFAULT_SUID= 0, IS_NOT_SUID, IS_SUID
+};
 
 typedef List<Item> List_item;
 
@@ -374,8 +386,9 @@ public:
 
   ulong init_prepare_fake_select_lex(THD *thd);
   int change_result(select_subselect *result, select_subselect *old_result);
+  void set_limit(st_select_lex *values, st_select_lex *sl);
 
-  friend void mysql_init_query(THD *thd);
+  friend void mysql_init_query(THD *thd, bool lexonly);
   friend int subselect_union_engine::exec();
 private:
   bool create_total_list_n_last_return(THD *thd, st_lex *lex,
@@ -404,7 +417,10 @@ public:
   List<Item_func_match> *ftfunc_list;
   List<Item_func_match> ftfunc_list_alloc;
   JOIN *join; /* after JOIN::prepare it is pointer to corresponding JOIN */
-  const char *type; /* type of select for EXPLAIN */
+  List<TABLE_LIST> top_join_list; /* join list of the top level          */
+  List<TABLE_LIST> *join_list;    /* list for the currently parsed join  */  
+  TABLE_LIST *embedding;          /* table embedding to the above list   */
+  const char *type;               /* type of select for EXPLAIN          */
 
   SQL_LIST order_list;                /* ORDER clause */
   List<List_item>     expr_list;
@@ -423,6 +439,11 @@ public:
   uint cond_count;      /* number of arguments of and/or/xor in where/having */
   enum_parsing_place parsing_place; /* where we are parsing expression */
   bool with_sum_func;   /* sum function indicator */
+  /* 
+    PS or SP cond natural joins was alredy processed with permanent
+    arena and all additional items which we need alredy stored in it
+  */
+  bool conds_processed_with_permanent_arena;
 
   ulong table_join_options;
   uint in_sum_expr;
@@ -438,6 +459,8 @@ public:
     query processing end even if we use temporary table
   */
   bool subquery_in_having;
+  bool first_execution; /* first execution in SP or PS */
+  bool first_cond_optimization;
 
   /* 
      SELECT for SELECT command st_select_lex. Used to privent scaning
@@ -500,6 +523,12 @@ public:
 				List<String> *ignore_index= 0,
                                 LEX_STRING *option= 0);
   TABLE_LIST* get_table_list();
+  bool init_nested_join(THD *thd);
+  TABLE_LIST *end_nested_join(THD *thd);
+  TABLE_LIST *nest_last_join(THD *thd);
+  void save_names_for_using_list(TABLE_LIST *tab1, TABLE_LIST *tab2);
+  void add_joined_table(TABLE_LIST *table);
+  TABLE_LIST *convert_right_join();
   List<Item>* get_item_list();
   List<String>* get_use_index();
   List<String>* get_ignore_index();
@@ -514,7 +543,7 @@ public:
   
   bool test_limit();
 
-  friend void mysql_init_query(THD *thd);
+  friend void mysql_init_query(THD *thd, bool lexonly);
   st_select_lex() {}
   void make_empty_select()
   {
@@ -552,6 +581,13 @@ typedef struct st_alter_info
   void reset(){drop_list.empty();alter_list.empty();clear();}
 } ALTER_INFO;
 
+struct st_sp_chistics
+{
+  LEX_STRING comment;
+  enum suid_behaviour suid;
+  bool detistic;
+};
+
 /* The state of the lex parsing. This is saved in the THD struct */
 
 typedef struct st_lex
@@ -564,6 +600,7 @@ typedef struct st_lex
   SELECT_LEX *current_select;
   /* list of all SELECT_LEX */
   SELECT_LEX *all_selects_list;
+  uchar *buf;			/* The beginning of string, used by SPs */
   uchar *ptr,*tok_start,*tok_end,*end_of_query;
   char *length,*dec,*change,*name;
   char *help_arg;
@@ -633,7 +670,24 @@ typedef struct st_lex
   bool prepared_stmt_code_is_varref;
   /* Names of user variables holding parameters (in EXECUTE) */
   List<LEX_STRING> prepared_stmt_params; 
-  st_lex() {}
+  sp_head *sphead;
+  sp_name *spname;
+  bool sp_lex_in_use;	/* Keep track on lex usage in SPs for error handling */
+  sp_pcontext *spcont;
+  HASH spfuns;		/* Called functions */
+  st_sp_chistics sp_chistics;
+
+  st_lex()
+  {
+    bzero((char *)&spfuns, sizeof(spfuns));
+  }
+  
+  ~st_lex()
+  {
+    if (spfuns.array.buffer)
+      hash_free(&spfuns);
+  }
+
   inline void uncacheable(uint8 cause)
   {
     safe_to_cache_query= 0;
diff --git a/sql/sql_list.h b/sql/sql_list.h
index 22e9ed37386..6dee38e9192 100644
--- a/sql/sql_list.h
+++ b/sql/sql_list.h
@@ -141,6 +141,12 @@ public:
       last= &first;
     return tmp->info;
   }
+  inline void concat(base_list *list)
+  {
+    *last= list->first;
+    last= list->last;
+    elements+= list->elements;
+  }
   inline list_node* last_node() { return *last; }
   inline list_node* first_node() { return first;}
   inline void *head() { return first->info; }
@@ -261,6 +267,7 @@ public:
     }
     empty();
   }
+  inline void concat(List<T> *list) { base_list::concat(list); }
 };
 
 
diff --git a/sql/sql_load.cc b/sql/sql_load.cc
index 167fb2daf8b..c7a64ea8699 100644
--- a/sql/sql_load.cc
+++ b/sql/sql_load.cc
@@ -349,9 +349,6 @@ int mysql_load(THD *thd,sql_exchange *ex,TABLE_LIST *table_list,
   sprintf(name, ER(ER_LOAD_INFO), (ulong) info.records, (ulong) info.deleted,
 	  (ulong) (info.records - info.copied), (ulong) thd->cuted_fields);
   send_ok(thd,info.copied+info.deleted,0L,name);
-  // on the slave thd->query is never initialized
-  if (!thd->slave_thread)
-    mysql_update_log.write(thd,thd->query,thd->query_length);
 
   if (!log_delayed)
     thd->options|=OPTION_STATUS_NO_TRANS_UPDATE;
@@ -405,7 +402,7 @@ read_fixed_length(THD *thd,COPY_INFO &info,TABLE *table,List<Item> &fields,
   {
     if (thd->killed)
     {
-      my_error(ER_SERVER_SHUTDOWN,MYF(0));
+      thd->send_kill_message();
       DBUG_RETURN(1);
     }
     if (skip_lines)
@@ -504,7 +501,7 @@ read_sep_field(THD *thd,COPY_INFO &info,TABLE *table,
   {
     if (thd->killed)
     {
-      my_error(ER_SERVER_SHUTDOWN,MYF(0));
+      thd->send_kill_message();
       DBUG_RETURN(1);
     }
     while ((sql_field=(Item_field*) it++))
diff --git a/sql/sql_parse.cc b/sql/sql_parse.cc
index 0cc25c4fe6e..6870721a085 100644
--- a/sql/sql_parse.cc
+++ b/sql/sql_parse.cc
@@ -26,6 +26,9 @@
 #include "ha_innodb.h"
 #endif
 
+#include "sp_head.h"
+#include "sp.h"
+
 #ifdef HAVE_OPENSSL
 /*
   Without SSL the handshake consists of one packet. This packet
@@ -44,6 +47,15 @@
 #define MIN_HANDSHAKE_SIZE      6
 #endif /* HAVE_OPENSSL */
 
+/* Used in error handling only */
+#define SP_TYPE_STRING(LP) \
+  ((LP)->sphead->m_type == TYPE_ENUM_FUNCTION ? "FUNCTION" : "PROCEDURE")
+#define SP_COM_STRING(LP) \
+  ((LP)->sql_command == SQLCOM_CREATE_SPFUNCTION || \
+   (LP)->sql_command == SQLCOM_ALTER_FUNCTION || \
+   (LP)->sql_command == SQLCOM_DROP_FUNCTION ? \
+   "FUNCTION" : "PROCEDURE")
+
 #ifdef SOLARIS
 extern "C" int gethostname(char *name, int namelen);
 #endif
@@ -283,9 +295,10 @@ int check_user(THD *thd, enum enum_server_command command,
                       thd->user, thd->host_or_ip);
       DBUG_RETURN(-1);
     }
+    /* We have to read very specific packet size */
     if (send_old_password_request(thd) ||
-        my_net_read(net) != SCRAMBLE_LENGTH_323 + 1) // We have to read very
-    {                                                // specific packet size
+        my_net_read(net) != SCRAMBLE_LENGTH_323 + 1)
+    {                                               
       inc_host_errors(&thd->remote.sin_addr);
       DBUG_RETURN(ER_HANDSHAKE_ERROR);
     }
@@ -614,8 +627,9 @@ static void reset_mqh(THD *thd, LEX_USER *lu, bool get_them= 0)
       uc->conn_per_hour=0;
     }
   }
-  else // for FLUSH PRIVILEGES and FLUSH USER_RESOURCES
+  else
   {
+    /* for FLUSH PRIVILEGES and FLUSH USER_RESOURCES */
     for (uint idx=0;idx < hash_user_connections.records; idx++)
     {
       USER_CONN *uc=(struct user_conn *) hash_element(&hash_user_connections,
@@ -939,7 +953,7 @@ pthread_handler_decl(handle_one_connection,arg)
   pthread_detach_this_thread();
 
 #if !defined( __WIN__) && !defined(OS2)	// Win32 calls this in pthread_create
-  // The following calls needs to be done before we call DBUG_ macros
+  /* The following calls needs to be done before we call DBUG_ macros */
   if (!(test_flags & TEST_NO_THREADS) & my_thread_init())
   {
     close_connection(thd, ER_OUT_OF_RESOURCES, 1);
@@ -958,7 +972,7 @@ pthread_handler_decl(handle_one_connection,arg)
   */
   DBUG_PRINT("info", ("handle_one_connection called by thread %d\n",
 		      thd->thread_id));
-  // now that we've called my_thread_init(), it is safe to call DBUG_*
+  /* now that we've called my_thread_init(), it is safe to call DBUG_* */
 
 #if defined(__WIN__)
   init_signals();				// IRENA; testing ?
@@ -1005,20 +1019,19 @@ pthread_handler_decl(handle_one_connection,arg)
     {
       execute_init_command(thd, &sys_init_connect, &LOCK_sys_init_connect);
       if (thd->query_error)
-	thd->killed= 1;
+	thd->killed= THD::KILL_CONNECTION;
     }
 
     thd->proc_info=0;
     thd->set_time();
     thd->init_for_queries();
-    while (!net->error && net->vio != 0 && !thd->killed)
+    while (!net->error && net->vio != 0 && !(thd->killed == THD::KILL_CONNECTION))
     {
       if (do_command(thd))
 	break;
     }
     if (thd->user_connect)
       decrease_user_connections(thd->user_connect);
-    free_root(&thd->mem_root,MYF(0));
     if (net->error && net->vio != 0 && net->report_error)
     {
       if (!thd->killed && thd->variables.log_warnings > 1)
@@ -1108,6 +1121,10 @@ extern "C" pthread_handler_decl(handle_bootstrap,arg)
     thd->query_length=length;
     thd->query= thd->memdup_w_gap(buff, length+1, thd->db_length+1);
     thd->query[length] = '\0';
+    /*
+      We don't need to obtain LOCK_thread_count here because in bootstrap
+      mode we have only one thread.
+    */
     thd->query_id=query_id++;
     if (mqh_used && thd->user_connect && check_mqh(thd, SQLCOM_END))
     {
@@ -1229,7 +1246,7 @@ bool do_command(THD *thd)
 
   packet=0;
   old_timeout=net->read_timeout;
-  // Wait max for 8 hours
+  /* Wait max for 8 hours */
   net->read_timeout=(uint) thd->variables.net_wait_timeout;
   thd->clear_error();				// Clear error message
 
@@ -1251,6 +1268,9 @@ bool do_command(THD *thd)
   }
   else
   {
+    if (thd->killed == THD::KILL_QUERY)
+      thd->killed= THD::NOT_KILLED;
+
     packet=(char*) net->read_pos;
     command = (enum enum_server_command) (uchar) packet[0];
     if (command >= COM_END)
@@ -1527,7 +1547,7 @@ bool dispatch_command(enum enum_server_command command, THD *thd,
 			packet, (uint) (pend-packet), thd->charset());
     table_list.alias= table_list.real_name= conv_name.str;
     packet= pend+1;
-    // command not cachable => no gap for data base name
+    /*  command not cachable => no gap for data base name */
     if (!(thd->query=fields=thd->memdup(packet,thd->query_length+1)))
       break;
     mysql_log.write(thd,command,"%s %s",table_list.real_name,fields);
@@ -1574,7 +1594,7 @@ bool dispatch_command(enum enum_server_command command, THD *thd,
     {
       statistic_increment(com_stat[SQLCOM_DROP_DB],&LOCK_status);
       char *db=thd->strdup(packet), *alias;
-      // null test to handle EOM
+      /*  null test to handle EOM */
       if (!db || !(alias= thd->strdup(db)) || check_db_name(db))
       {
 	net_printf(thd,ER_WRONG_DB_NAME, db ? db : "NULL");
@@ -1612,7 +1632,7 @@ bool dispatch_command(enum enum_server_command command, THD *thd,
       thd->server_id = slave_server_id;
       mysql_binlog_send(thd, thd->strdup(packet + 10), (my_off_t) pos, flags);
       unregister_slave(thd,1,1);
-      // fake COM_QUIT -- if we get here, the thread needs to terminate
+      /*  fake COM_QUIT -- if we get here, the thread needs to terminate */
       error = TRUE;
       net->error = 0;
       break;
@@ -1665,8 +1685,6 @@ bool dispatch_command(enum enum_server_command command, THD *thd,
 #endif
     close_connection(thd, 0, 1);
     close_thread_tables(thd);			// Free before kill
-    free_root(&thd->mem_root,MYF(0));
-    free_root(&thd->transaction.mem_root,MYF(0));
     kill_mysql();
     error=TRUE;
     break;
@@ -1717,7 +1735,7 @@ bool dispatch_command(enum enum_server_command command, THD *thd,
   {
     statistic_increment(com_stat[SQLCOM_KILL],&LOCK_status);
     ulong id=(ulong) uint4korr(packet);
-    kill_one_thread(thd,id);
+    kill_one_thread(thd,id,false);
     break;
   }
   case COM_SET_OPTION:
@@ -1792,6 +1810,7 @@ bool dispatch_command(enum enum_server_command command, THD *thd,
   thread_running--;
   VOID(pthread_mutex_unlock(&LOCK_thread_count));
   thd->packet.shrink(thd->variables.net_buffer_length);	// Reclaim some memory
+
   free_root(&thd->mem_root,MYF(MY_KEEP_PREALLOC));
   DBUG_RETURN(error);
 }
@@ -1851,7 +1870,7 @@ bool alloc_query(THD *thd, char *packet, ulong packet_length)
 ** Execute command saved in thd and current_lex->sql_command
 ****************************************************************************/
 
-void
+int
 mysql_execute_command(THD *thd)
 {
   int	res= 0;
@@ -1861,6 +1880,13 @@ mysql_execute_command(THD *thd)
   SELECT_LEX_UNIT *unit= &lex->unit;
   DBUG_ENTER("mysql_execute_command");
 
+  if (lex->sql_command != SQLCOM_CREATE_PROCEDURE &&
+      lex->sql_command != SQLCOM_CREATE_SPFUNCTION)
+  {
+    if (sp_cache_functions(thd, lex))
+      DBUG_RETURN(-1);
+  }
+
   /*
     Reset warning count for each query that uses tables
     A better approach would be to reset this for any commands
@@ -1881,7 +1907,7 @@ mysql_execute_command(THD *thd)
     {
       /* we warn the slave SQL thread */
       my_error(ER_SLAVE_IGNORED_TABLE, MYF(0));
-      DBUG_VOID_RETURN;
+      DBUG_RETURN(0);
     }
 #ifndef TO_BE_DELETED
     /*
@@ -1899,7 +1925,7 @@ mysql_execute_command(THD *thd)
 #endif /* !HAVE_REPLICATION */
   if (&lex->select_lex != lex->all_selects_list &&
       lex->unit.create_total_list(thd, lex, &tables))
-    DBUG_VOID_RETURN;
+    DBUG_RETURN(0);
 
   /*
     When option readonly is set deny operations which change tables.
@@ -1910,7 +1936,7 @@ mysql_execute_command(THD *thd)
       (uc_update_queries[lex->sql_command] > 0))
   {
     net_printf(thd, ER_OPTION_PREVENTS_STATEMENT, "--read-only");
-    DBUG_VOID_RETURN;
+    DBUG_RETURN(-1);
   }
 
   statistic_increment(com_stat[lex->sql_command],&LOCK_status);
@@ -1940,18 +1966,6 @@ mysql_execute_command(THD *thd)
       res=0;
       break;					// Error message is given
     }
-    /* 
-       In case of single SELECT unit->global_parameters points on first SELECT
-       TODO: move counters to SELECT_LEX
-    */
-    unit->offset_limit_cnt= (ha_rows) unit->global_parameters->offset_limit;
-    unit->select_limit_cnt= (ha_rows) (unit->global_parameters->select_limit+
-      unit->global_parameters->offset_limit);
-    if (unit->select_limit_cnt <
-	(ha_rows) unit->global_parameters->select_limit)
-      unit->select_limit_cnt= HA_POS_ERROR;		// no limit
-    if (unit->select_limit_cnt == HA_POS_ERROR && !select_lex->next_select())
-      select_lex->options&= ~OPTION_FOUND_ROWS;
 
     if (!(res=open_and_lock_tables(thd,tables)))
     {
@@ -1960,7 +1974,7 @@ mysql_execute_command(THD *thd)
 	if (!(result= new select_send()))
 	{
 	  send_error(thd, ER_OUT_OF_RESOURCES);
-	  DBUG_VOID_RETURN;
+	  goto error;
 	}
 	else
 	  thd->send_explain_fields(result);
@@ -2111,7 +2125,7 @@ mysql_execute_command(THD *thd)
   {
     if (check_global_access(thd, SUPER_ACL))
       goto error;
-    // PURGE MASTER LOGS TO 'file'
+    /* PURGE MASTER LOGS TO 'file' */
     res = purge_master_logs(thd, lex->to_log);
     break;
   }
@@ -2119,7 +2133,7 @@ mysql_execute_command(THD *thd)
   {
     if (check_global_access(thd, SUPER_ACL))
       goto error;
-    // PURGE MASTER LOGS BEFORE 'data'
+    /* PURGE MASTER LOGS BEFORE 'data' */
     res = purge_master_logs_before_date(thd, lex->purge_time);
     break;
   }
@@ -2339,11 +2353,7 @@ mysql_execute_command(THD *thd)
       if (tables && check_table_access(thd, SELECT_ACL, tables,0))
 	goto create_error;			// Error message is given
       select_lex->options|= SELECT_NO_UNLOCK;
-      unit->offset_limit_cnt= select_lex->offset_limit;
-      unit->select_limit_cnt= select_lex->select_limit+
-	select_lex->offset_limit;
-      if (unit->select_limit_cnt < select_lex->select_limit)
-	unit->select_limit_cnt= HA_POS_ERROR;	// No limit
+      unit->set_limit(select_lex, select_lex);
 
       if (!(res=open_and_lock_tables(thd,tables)))
       {
@@ -2363,13 +2373,14 @@ mysql_execute_command(THD *thd)
           res=handle_select(thd, lex, result);
           select_lex->resolve_mode= SELECT_LEX::NOMATTER_MODE;
         }
-	//reset for PS
+	/* reset for PS */
 	lex->create_list.empty();
 	lex->key_list.empty();
       }
     }
-    else // regular create
+    else
     {
+      /* regular create */
       if (lex->name)
         res= mysql_create_like_table(thd, create_table, &lex->create_info, 
                                      (Table_ident *)lex->name); 
@@ -2384,15 +2395,15 @@ mysql_execute_command(THD *thd)
 	send_ok(thd);
     }
 
-    // put tables back for PS rexecuting
+    /* put tables back for PS rexecuting */
     tables= lex->link_first_table_back(tables, create_table,
 				       create_table_local);
     break;
 
 create_error:
-    res= 1; //error reported
+    res= 1;                                     //error reported
 unsent_create_error:
-    // put tables back for PS rexecuting
+    /* put tables back for PS rexecuting */
     tables= lex->link_first_table_back(tables, create_table,
 				       create_table_local);
     break;
@@ -2432,7 +2443,7 @@ unsent_create_error:
   if (thd->locked_tables || thd->active_transaction())
   {
     send_error(thd,ER_LOCK_OR_ACTIVE_TRANSACTION);
-    break;
+    goto error;
   }
   {
     pthread_mutex_lock(&LOCK_active_mi);
@@ -2445,7 +2456,7 @@ unsent_create_error:
   case SQLCOM_ALTER_TABLE:
 #if defined(DONT_ALLOW_SHOW_COMMANDS)
     send_error(thd,ER_NOT_ALLOWED_COMMAND); /* purecov: inspected */
-    break;
+    goto error;
 #else
     {
       ulong priv=0;
@@ -2539,7 +2550,7 @@ unsent_create_error:
   case SQLCOM_SHOW_BINLOGS:
 #ifdef DONT_ALLOW_SHOW_COMMANDS
     send_error(thd,ER_NOT_ALLOWED_COMMAND); /* purecov: inspected */
-    DBUG_VOID_RETURN;
+    goto error;
 #else
     {
       if (check_global_access(thd, SUPER_ACL))
@@ -2552,7 +2563,7 @@ unsent_create_error:
   case SQLCOM_SHOW_CREATE:
 #ifdef DONT_ALLOW_SHOW_COMMANDS
     send_error(thd,ER_NOT_ALLOWED_COMMAND); /* purecov: inspected */
-    DBUG_VOID_RETURN;
+    goto error;
 #else
     {
       if (check_db_used(thd, tables) ||
@@ -2581,7 +2592,6 @@ unsent_create_error:
     /* ! we write after unlocking the table */
     if (!res && !lex->no_write_to_binlog)
     {
-      mysql_update_log.write(thd, thd->query, thd->query_length);
       if (mysql_bin_log.is_open())
       {
         Query_log_event qinfo(thd, thd->query, thd->query_length, 0);
@@ -2609,7 +2619,6 @@ unsent_create_error:
     /* ! we write after unlocking the table */
     if (!res && !lex->no_write_to_binlog)
     {
-      mysql_update_log.write(thd, thd->query, thd->query_length);
       if (mysql_bin_log.is_open())
       {
         Query_log_event qinfo(thd, thd->query, thd->query_length, 0);
@@ -2631,7 +2640,6 @@ unsent_create_error:
     /* ! we write after unlocking the table */
     if (!res && !lex->no_write_to_binlog)
     {
-      mysql_update_log.write(thd, thd->query, thd->query_length);
       if (mysql_bin_log.is_open())
       {
         Query_log_event qinfo(thd, thd->query, thd->query_length, 0);
@@ -2694,10 +2702,7 @@ unsent_create_error:
     select_lex->options|= SELECT_NO_UNLOCK;
 
     select_result *result;
-    unit->offset_limit_cnt= select_lex->offset_limit;
-    unit->select_limit_cnt= select_lex->select_limit+select_lex->offset_limit;
-    if (unit->select_limit_cnt < select_lex->select_limit)
-      unit->select_limit_cnt= HA_POS_ERROR;		// No limit
+    unit->set_limit(select_lex, select_lex);
 
     if (find_real_table_in_list(tables->next, tables->db, tables->real_name))
     {
@@ -2863,7 +2868,7 @@ unsent_create_error:
   case SQLCOM_SHOW_DATABASES:
 #if defined(DONT_ALLOW_SHOW_COMMANDS)
     send_error(thd,ER_NOT_ALLOWED_COMMAND);   /* purecov: inspected */
-    DBUG_VOID_RETURN;
+    goto error;
 #else
     if ((specialflag & SPECIAL_SKIP_SHOW_DB) &&
 	check_global_access(thd, SHOW_DB_ACL))
@@ -2899,7 +2904,7 @@ unsent_create_error:
   case SQLCOM_SHOW_LOGS:
 #ifdef DONT_ALLOW_SHOW_COMMANDS
     send_error(thd,ER_NOT_ALLOWED_COMMAND);	/* purecov: inspected */
-    DBUG_VOID_RETURN;
+    goto error;
 #else
     {
       if (grant_option && check_access(thd, FILE_ACL, any_db,0,0,0))
@@ -2912,13 +2917,13 @@ unsent_create_error:
     /* FALL THROUGH */
 #ifdef DONT_ALLOW_SHOW_COMMANDS
     send_error(thd,ER_NOT_ALLOWED_COMMAND);	/* purecov: inspected */
-    DBUG_VOID_RETURN;
+    goto error;
 #else
     {
       char *db=select_lex->db ? select_lex->db : thd->db;
       if (!db)
       {
-	send_error(thd,ER_NO_DB_ERROR);	/* purecov: inspected */
+	send_error(thd,ER_NO_DB_ERROR);		/* purecov: inspected */
 	goto error;				/* purecov: inspected */
       }
       remove_escape(db);				// Fix escaped '_'
@@ -2959,7 +2964,7 @@ unsent_create_error:
   case SQLCOM_SHOW_FIELDS:
 #ifdef DONT_ALLOW_SHOW_COMMANDS
     send_error(thd,ER_NOT_ALLOWED_COMMAND);	/* purecov: inspected */
-    DBUG_VOID_RETURN;
+    goto error;
 #else
     {
       char *db=tables->db;
@@ -2979,7 +2984,7 @@ unsent_create_error:
   case SQLCOM_SHOW_KEYS:
 #ifdef DONT_ALLOW_SHOW_COMMANDS
     send_error(thd,ER_NOT_ALLOWED_COMMAND);	/* purecov: inspected */
-    DBUG_VOID_RETURN;
+    goto error;
 #else
     {
       char *db=tables->db;
@@ -3199,26 +3204,24 @@ purposes internal to the MySQL server", MYF(0));
     res=mysqld_show_create_db(thd,lex->name,&lex->create_info);
     break;
   }
-  case SQLCOM_CREATE_FUNCTION:
+  case SQLCOM_CREATE_FUNCTION:                  // UDF function
+  {
+    sp_head *sph;
     if (check_access(thd,INSERT_ACL,"mysql",0,1,0))
       break;
 #ifdef HAVE_DLOPEN
+    if ((sph= sp_find_function(thd, lex->spname)))
+    {
+      net_printf(thd, ER_UDF_EXISTS, lex->spname->m_name.str);
+      goto error;
+    }
     if (!(res = mysql_create_function(thd,&lex->udf)))
       send_ok(thd);
 #else
     res= -1;
 #endif
     break;
-  case SQLCOM_DROP_FUNCTION:
-    if (check_access(thd,DELETE_ACL,"mysql",0,1,0))
-      break;
-#ifdef HAVE_DLOPEN
-    if (!(res = mysql_drop_function(thd,&lex->udf.name)))
-      send_ok(thd);
-#else
-    res= -1;
-#endif
-    break;
+  }
 #ifndef NO_EMBEDDED_ACCESS_CHECKS
   case SQLCOM_DROP_USER:
   {
@@ -3226,7 +3229,6 @@ purposes internal to the MySQL server", MYF(0));
       break;
     if (!(res= mysql_drop_user(thd, lex->users_list)))
     {
-      mysql_update_log.write(thd, thd->query, thd->query_length);
       if (mysql_bin_log.is_open())
       {
 	Query_log_event qinfo(thd, thd->query, thd->query_length, 0);
@@ -3242,7 +3244,6 @@ purposes internal to the MySQL server", MYF(0));
       break;
     if (!(res = mysql_revoke_all(thd, lex->users_list)))
     {
-      mysql_update_log.write(thd, thd->query, thd->query_length);
       if (mysql_bin_log.is_open())
       {
 	Query_log_event qinfo(thd, thd->query, thd->query_length, 0);
@@ -3280,7 +3281,7 @@ purposes internal to the MySQL server", MYF(0));
 	{
 	  if (check_access(thd, UPDATE_ACL, "mysql",0,1,0))
 	    goto error;
-	  break;			// We are allowed to do changes
+	  break;                                // We are allowed to do changes
 	}
       }
     }
@@ -3306,15 +3307,12 @@ purposes internal to the MySQL server", MYF(0));
 	goto error;
       if (!(res = mysql_table_grant(thd,tables,lex->users_list, lex->columns,
 				    lex->grant,
-				    lex->sql_command == SQLCOM_REVOKE)))
+				    lex->sql_command == SQLCOM_REVOKE)) &&
+          mysql_bin_log.is_open())
       {
-	mysql_update_log.write(thd, thd->query, thd->query_length);
-	if (mysql_bin_log.is_open())
-	{
-          thd->clear_error();
-	  Query_log_event qinfo(thd, thd->query, thd->query_length, 0);
-	  mysql_bin_log.write(&qinfo);
-	}
+        thd->clear_error();
+        Query_log_event qinfo(thd, thd->query, thd->query_length, 0);
+        mysql_bin_log.write(&qinfo);
       }
     }
     else
@@ -3329,7 +3327,6 @@ purposes internal to the MySQL server", MYF(0));
 			  lex->sql_command == SQLCOM_REVOKE);
       if (!res)
       {
-	mysql_update_log.write(thd, thd->query, thd->query_length);
 	if (mysql_bin_log.is_open())
 	{
           thd->clear_error();
@@ -3373,7 +3370,6 @@ purposes internal to the MySQL server", MYF(0));
       */
       if (!lex->no_write_to_binlog && write_to_binlog)
       {
-        mysql_update_log.write(thd, thd->query, thd->query_length);
         if (mysql_bin_log.is_open())
         {
           Query_log_event qinfo(thd, thd->query, thd->query_length, 0);
@@ -3385,7 +3381,7 @@ purposes internal to the MySQL server", MYF(0));
     break;
   }
   case SQLCOM_KILL:
-    kill_one_thread(thd,lex->thread_id);
+    kill_one_thread(thd,lex->thread_id, lex->type & ONLY_KILL_QUERY);
     break;
 #ifndef NO_EMBEDDED_ACCESS_CHECKS
   case SQLCOM_SHOW_GRANTS:
@@ -3497,40 +3493,336 @@ purposes internal to the MySQL server", MYF(0));
     else
       res= -1;
     break;
+  case SQLCOM_CREATE_PROCEDURE:
+  case SQLCOM_CREATE_SPFUNCTION:
+  {
+    if (!lex->sphead)
+    {
+      res= -1;                                  // Shouldn't happen
+      break;
+    }
+    uint namelen;
+    char *name= lex->sphead->name(&namelen);
+#ifdef HAVE_DLOPEN
+    if (lex->sphead->m_type == TYPE_ENUM_FUNCTION)
+    {
+      udf_func *udf = find_udf(name, namelen);
+
+      if (udf)
+      {
+	net_printf(thd, ER_UDF_EXISTS, name);
+	delete lex->sphead;
+	lex->sphead=0;
+	goto error;
+      }
+    }
+#endif
+    if (lex->sphead->m_type == TYPE_ENUM_FUNCTION &&
+	!lex->sphead->m_has_return)
+    {
+      net_printf(thd, ER_SP_NORETURN, name);
+      delete lex->sphead;
+      lex->sphead=0;
+      goto error;
+    }
+
+    res= lex->sphead->create(thd);
+    switch (res) {
+    case SP_OK:
+      send_ok(thd);
+      delete lex->sphead;
+      lex->sphead= 0;
+      break;
+    case SP_WRITE_ROW_FAILED:
+      net_printf(thd, ER_SP_ALREADY_EXISTS, SP_TYPE_STRING(lex), name);
+      delete lex->sphead;
+      lex->sphead= 0;
+      goto error;
+    case SP_NO_DB_ERROR:
+      net_printf(thd, ER_BAD_DB_ERROR, lex->sphead->m_db);
+      delete lex->sphead;
+      lex->sphead= 0;
+      goto error;
+    default:
+      net_printf(thd, ER_SP_STORE_FAILED, SP_TYPE_STRING(lex), name);
+      delete lex->sphead;
+      lex->sphead= 0;
+      goto error;
+    }
+    break;
+  }
+  case SQLCOM_CALL:
+    {
+      sp_head *sp;
+
+      if (!(sp= sp_find_procedure(thd, lex->spname)))
+      {
+	net_printf(thd, ER_SP_DOES_NOT_EXIST, "PROCEDURE",
+		   lex->spname->m_qname.str);
+	goto error;
+      }
+      else
+      {
+#ifndef NO_EMBEDDED_ACCESS_CHECKS
+	st_sp_security_context save_ctx;
+#endif
+	ha_rows select_limit;
+	uint smrx;
+	LINT_INIT(smrx);
+
+	/* In case the arguments are subselects... */
+	if (tables && ((res= check_table_access(thd, SELECT_ACL, tables, 0)) ||
+		       (res= open_and_lock_tables(thd, tables))))
+	{
+	  break;
+	}
+	fix_tables_pointers(lex->all_selects_list);
+
+#ifndef EMBEDDED_LIBRARY
+	/*
+          When executing substatements, they're assumed to send_error when
+          it happens, but not to send_ok.
+        */
+	my_bool nsok= thd->net.no_send_ok;
+	thd->net.no_send_ok= TRUE;
+#endif
+	if (sp->m_multi_results)
+	{
+	  if (! (thd->client_capabilities & CLIENT_MULTI_RESULTS))
+	  {
+	    send_error(thd, ER_SP_BADSELECT);
+#ifndef EMBEDDED_LIBRARY
+	    thd->net.no_send_ok= nsok;
+#endif
+	    goto error;
+	  }
+	  smrx= thd->server_status & SERVER_MORE_RESULTS_EXISTS;
+	  thd->server_status |= SERVER_MORE_RESULTS_EXISTS;
+	}
+
+#ifndef NO_EMBEDDED_ACCESS_CHECKS
+	sp_change_security_context(thd, sp, &save_ctx);
+#endif
+	select_limit= thd->variables.select_limit;
+	thd->variables.select_limit= HA_POS_ERROR;
+
+	res= sp->execute_procedure(thd, &lex->value_list);
+
+	thd->variables.select_limit= select_limit;
+#ifndef NO_EMBEDDED_ACCESS_CHECKS
+	sp_restore_security_context(thd, sp, &save_ctx);
+#endif
+
+#ifndef EMBEDDED_LIBRARY
+	thd->net.no_send_ok= nsok;
+#endif
+	if (sp->m_multi_results)
+	{
+	  if (! smrx)
+	    thd->server_status &= ~SERVER_MORE_RESULTS_EXISTS;
+	}
+
+	if (res == 0)
+	  send_ok(thd, (thd->row_count_func < 0 ? 0 : thd->row_count_func));
+	else
+	  goto error;		// Substatement should already have sent error
+      }
+      break;
+    }
+  case SQLCOM_ALTER_PROCEDURE:
+  case SQLCOM_ALTER_FUNCTION:
+    {
+      res= -1;
+      uint newname_len= 0;
+      if (lex->name)
+	newname_len= strlen(lex->name);
+      if (newname_len > NAME_LEN)
+      {
+	net_printf(thd, ER_TOO_LONG_IDENT, lex->name);
+	goto error;
+      }
+      if (lex->sql_command == SQLCOM_ALTER_PROCEDURE)
+	res= sp_update_procedure(thd, lex->spname,
+				 lex->name, newname_len, &lex->sp_chistics);
+      else
+	res= sp_update_function(thd, lex->spname,
+				lex->name, newname_len,	&lex->sp_chistics);
+      switch (res)
+      {
+      case SP_OK:
+	send_ok(thd);
+	break;
+      case SP_KEY_NOT_FOUND:
+	net_printf(thd, ER_SP_DOES_NOT_EXIST, SP_COM_STRING(lex),
+		   lex->spname->m_qname.str);
+	goto error;
+      default:
+	net_printf(thd, ER_SP_CANT_ALTER, SP_COM_STRING(lex),
+		   lex->spname->m_qname.str);
+	goto error;
+      }
+      break;
+    }
+  case SQLCOM_DROP_PROCEDURE:
+  case SQLCOM_DROP_FUNCTION:
+    {
+      if (lex->sql_command == SQLCOM_DROP_PROCEDURE)
+	res= sp_drop_procedure(thd, lex->spname);
+      else
+      {
+	res= sp_drop_function(thd, lex->spname);
+#ifdef HAVE_DLOPEN
+	if (res == SP_KEY_NOT_FOUND)
+	{
+	  udf_func *udf = find_udf(lex->spname->m_name.str,
+				   lex->spname->m_name.length);
+	  if (udf)
+	  {
+	    if (check_access(thd, DELETE_ACL, "mysql", 0, 1, 0))
+	      goto error;
+	    if (!(res = mysql_drop_function(thd,&lex->spname->m_name)))
+	    {
+	      send_ok(thd);
+	      break;
+	    }
+	  }
+	}
+#endif
+      }
+      switch (res)
+      {
+      case SP_OK:
+	send_ok(thd);
+	break;
+      case SP_KEY_NOT_FOUND:
+	if (lex->drop_if_exists)
+	{
+	  push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
+			      ER_SP_DOES_NOT_EXIST, ER(ER_SP_DOES_NOT_EXIST),
+			      SP_COM_STRING(lex), lex->spname->m_name.str);
+	  res= 0;
+	  send_ok(thd);
+	  break;
+	}
+	net_printf(thd, ER_SP_DOES_NOT_EXIST, SP_COM_STRING(lex),
+		   lex->spname->m_qname.str);
+	goto error;
+      default:
+	net_printf(thd, ER_SP_DROP_FAILED, SP_COM_STRING(lex),
+		   lex->spname->m_qname.str);
+	goto error;
+      }
+      break;
+    }
+  case SQLCOM_SHOW_CREATE_PROC:
+    {
+      res= -1;
+      if (lex->spname->m_name.length > NAME_LEN)
+      {
+	net_printf(thd, ER_TOO_LONG_IDENT, lex->spname->m_name.str);
+	goto error;
+      }
+      res= sp_show_create_procedure(thd, lex->spname);
+      if (res != SP_OK)
+      {			/* We don't distinguish between errors for now */
+	net_printf(thd, ER_SP_DOES_NOT_EXIST,
+		   SP_COM_STRING(lex), lex->spname->m_name.str);
+	res= 0;
+	goto error;
+      }
+      break;
+    }
+  case SQLCOM_SHOW_CREATE_FUNC:
+    {
+      if (lex->spname->m_name.length > NAME_LEN)
+      {
+	net_printf(thd, ER_TOO_LONG_IDENT, lex->spname->m_name.str);
+	goto error;
+      }
+      res= sp_show_create_function(thd, lex->spname);
+      if (res != SP_OK)
+      {			/* We don't distinguish between errors for now */
+	net_printf(thd, ER_SP_DOES_NOT_EXIST,
+		   SP_COM_STRING(lex), lex->spname->m_name.str);
+	res= 0;
+	goto error;
+      }
+      res= 0;
+      break;
+    }
+  case SQLCOM_SHOW_STATUS_PROC:
+    {
+      res= sp_show_status_procedure(thd, (lex->wild ?
+					  lex->wild->ptr() : NullS));
+      break;
+    }
+  case SQLCOM_SHOW_STATUS_FUNC:
+    {
+      res= sp_show_status_function(thd, (lex->wild ? 
+					 lex->wild->ptr() : NullS));
+      break;
+    }
   default:					/* Impossible */
     send_ok(thd);
     break;
   }
-  thd->proc_info="query end";			// QQ
+  thd->proc_info="query end";
   if (thd->one_shot_set)
+  {
+    /*
+      If this is a SET, do nothing. This is to allow mysqlbinlog to print
+      many SET commands (in this case we want the charset temp setting to
+      live until the real query). This is also needed so that SET
+      CHARACTER_SET_CLIENT... does not cancel itself immediately.
+    */
+    if (lex->sql_command != SQLCOM_SET_OPTION)
     {
-      /*
-        If this is a SET, do nothing. This is to allow mysqlbinlog to print
-        many SET commands (in this case we want the charset temp setting to
-        live until the real query). This is also needed so that SET
-        CHARACTER_SET_CLIENT... does not cancel itself immediately.
-      */
-      if (lex->sql_command != SQLCOM_SET_OPTION)
-      {
-        thd->variables.character_set_client=
-          global_system_variables.character_set_client;
-        thd->variables.collation_connection=
-          global_system_variables.collation_connection;
-        thd->variables.collation_database=
-          global_system_variables.collation_database;
-        thd->variables.collation_server=
-          global_system_variables.collation_server;
-        thd->update_charset();
-        thd->variables.time_zone=
-          global_system_variables.time_zone;
-        thd->one_shot_set= 0;
-      }
+      thd->variables.character_set_client=
+        global_system_variables.character_set_client;
+      thd->variables.collation_connection=
+        global_system_variables.collation_connection;
+      thd->variables.collation_database=
+        global_system_variables.collation_database;
+      thd->variables.collation_server=
+        global_system_variables.collation_server;
+      thd->update_charset();
+      thd->variables.time_zone=
+        global_system_variables.time_zone;
+      thd->one_shot_set= 0;
     }
+  }
+
+  /*
+    The return value for ROW_COUNT() is "implementation dependent" if
+    the statement is not DELETE, INSERT or UPDATE (or a CALL executing
+    such a statement), but -1 is what JDBC and ODBC wants.
+   */
+  switch (lex->sql_command) {
+  case SQLCOM_UPDATE:
+  case SQLCOM_UPDATE_MULTI:
+  case SQLCOM_REPLACE:
+  case SQLCOM_INSERT:
+  case SQLCOM_REPLACE_SELECT:
+  case SQLCOM_INSERT_SELECT:
+  case SQLCOM_DELETE:
+  case SQLCOM_DELETE_MULTI:
+  case SQLCOM_CALL:
+    break;
+  default:
+    thd->row_count_func= -1;
+  }
+
+  /*
+    We end up here if res == 0 and send_ok() has been done,
+    or res != 0 and no send_error() has yet been done.
+  */
   if (res < 0)
-    send_error(thd,thd->killed ? ER_SERVER_SHUTDOWN : 0);
+    send_error(thd,thd->killed_errno());
+  DBUG_RETURN(res);
 
 error:
-  DBUG_VOID_RETURN;
+  /* We end up here if send_error() has already been done. */
+  DBUG_RETURN(-1);
 }
 
 
@@ -3648,7 +3940,7 @@ check_access(THD *thd, ulong want_access, const char *db, ulong *save_priv,
 		      thd->priv_user, db, test(want_access & GRANT_ACL));
   else
     db_access=thd->db_access;
-  // Remove SHOW attribute and access rights we already have
+  /* Remove SHOW attribute and access rights we already have */
   want_access &= ~(thd->master_access | EXTRA_ACL);
   db_access= ((*save_priv=(db_access | thd->master_access)) & want_access);
 
@@ -3850,7 +4142,7 @@ bool my_yyoverflow(short **yyss, YYSTYPE **yyvs, ulong *yystacksize)
 ****************************************************************************/
 
 void
-mysql_init_query(THD *thd)
+mysql_init_query(THD *thd, bool lexonly)
 {
   DBUG_ENTER("mysql_init_query");
   LEX *lex= thd->lex;
@@ -3870,24 +4162,29 @@ mysql_init_query(THD *thd)
   lex->select_lex.link_next= lex->select_lex.slave= lex->select_lex.next= 0;
   lex->select_lex.link_prev= (st_select_lex_node**)&(lex->all_selects_list);
   lex->select_lex.options=0;
+  lex->select_lex.init_order();
+  lex->select_lex.group_list.empty();
   lex->describe= 0;
   lex->derived_tables= FALSE;
   lex->lock_option= TL_READ;
   lex->found_colon= 0;
   lex->safe_to_cache_query= 1;
-  thd->select_number= lex->select_lex.select_number= 1;
-  thd->free_list= 0;
-  thd->total_warn_count=0;			// Warnings for this query
-  thd->last_insert_id_used= thd->query_start_used= thd->insert_id_used=0;
-  thd->sent_row_count= thd->examined_row_count= 0;
-  thd->is_fatal_error= thd->rand_used= thd->time_zone_used= 0;
-  thd->server_status&= ~ (SERVER_MORE_RESULTS_EXISTS | 
-			  SERVER_QUERY_NO_INDEX_USED |
-			  SERVER_QUERY_NO_GOOD_INDEX_USED);
-  thd->tmp_table_used= 0;
-  if (opt_bin_log)
-    reset_dynamic(&thd->user_var_events);
-  thd->clear_error();
+  if (! lexonly)
+  {
+    thd->select_number= lex->select_lex.select_number= 1;
+    thd->free_list= 0;
+    thd->total_warn_count=0;			// Warnings for this query
+    thd->last_insert_id_used= thd->query_start_used= thd->insert_id_used=0;
+    thd->sent_row_count= thd->examined_row_count= 0;
+    thd->is_fatal_error= thd->rand_used= thd->time_zone_used= 0;
+    thd->server_status&= ~ (SERVER_MORE_RESULTS_EXISTS | 
+			    SERVER_QUERY_NO_INDEX_USED |
+			    SERVER_QUERY_NO_GOOD_INDEX_USED);
+    thd->tmp_table_used= 0;
+    if (opt_bin_log)
+      reset_dynamic(&thd->user_var_events);
+    thd->clear_error();
+  }
   DBUG_VOID_RETURN;
 }
 
@@ -3930,7 +4227,7 @@ mysql_new_select(LEX *lex, bool move_down)
     unit->link_prev= 0;
     unit->return_to= lex->current_select;
     select_lex->include_down(unit);
-    // TODO: assign resolve_mode for fake subquery after merging with new tree
+    /* TODO: assign resolve_mode for fake subquery after merging with new tree */
   }
   else
   {
@@ -4028,19 +4325,38 @@ void mysql_parse(THD *thd, char *inBuf, uint length)
 #endif
       {
 	if (thd->net.report_error)
+	{
 	  send_error(thd, 0, NullS);
+	  if (thd->lex->sphead)
+	  {
+	    if (lex != thd->lex)
+	      thd->lex->sphead->restore_lex(thd);
+	    delete thd->lex->sphead;
+	    thd->lex->sphead= NULL;
+	  }
+	}
 	else
 	{
 	  mysql_execute_command(thd);
 	  query_cache_end_of_result(thd);
 	}
       }
+      lex->unit.cleanup();
     }
     else
     {
       DBUG_PRINT("info",("Command aborted. Fatal_error: %d",
 			 thd->is_fatal_error));
       query_cache_abort(&thd->net);
+      lex->unit.cleanup();
+      if (thd->lex->sphead)
+      {
+	/* Clean up after failed stored procedure/function */
+	if (lex != thd->lex)
+	  thd->lex->sphead->restore_lex(thd);
+	delete thd->lex->sphead;
+	thd->lex->sphead= NULL;
+      }
     }
     thd->proc_info="freeing items";
     free_items(thd->free_list);  /* Free strings used by items */
@@ -4641,6 +4957,237 @@ TABLE_LIST *st_select_lex::add_table_to_list(THD *thd,
 
 
 /*
+  Initialize a new table list for a nested join
+
+  SYNOPSIS
+    init_table_list() 
+    thd         current thread
+  
+  DESCRIPTION
+    The function initializes a structure of the TABLE_LIST type
+    for a nested join. It sets up its nested join list as empty.
+    The created structure is added to the front of the current
+    join list in the st_select_lex object. Then the function
+    changes the current nest level for joins to refer to the newly
+    created empty list after having saved the info on the old level
+    in the initialized structure.
+
+  RETURN VALUE
+    0,  if success
+    1,  otherwise
+*/
+
+bool st_select_lex::init_nested_join(THD *thd)
+{
+  TABLE_LIST *ptr;
+  NESTED_JOIN *nested_join;
+  DBUG_ENTER("init_nested_join");
+  
+  if (!(ptr = (TABLE_LIST *) thd->calloc(sizeof(TABLE_LIST))) ||
+      !(nested_join= ptr->nested_join=
+                    (NESTED_JOIN *) thd->calloc(sizeof(NESTED_JOIN))))
+    DBUG_RETURN(1);
+  join_list->push_front(ptr);
+  ptr->embedding= embedding;
+  ptr->join_list= join_list;
+  embedding= ptr;
+  join_list= &nested_join->join_list;
+  join_list->empty();
+  DBUG_RETURN(0);
+}
+
+
+/*
+  End a nested join table list
+
+  SYNOPSIS
+    end_nested_join()
+    thd         current thread
+
+  DESCRIPTION
+    The function returns to the previous join nest level.
+    If the current level contains only one member, the function
+    moves it one level up, eliminating the nest. 
+
+  RETURN VALUE
+    Pointer to TABLE_LIST element added to the total table list, if success
+    0, otherwise
+*/
+
+TABLE_LIST *st_select_lex::end_nested_join(THD *thd)
+{
+  TABLE_LIST *ptr;
+  DBUG_ENTER("end_nested_join");
+  ptr= embedding;
+  join_list= ptr->join_list;
+  embedding= ptr->embedding;
+  NESTED_JOIN *nested_join= ptr->nested_join;
+  if (nested_join->join_list.elements == 1)
+  {
+    TABLE_LIST *embedded= nested_join->join_list.head();
+    join_list->pop();
+    embedded->join_list= join_list;
+    embedded->embedding= embedding;
+    join_list->push_front(embedded);
+    ptr= embedded;
+  }
+  DBUG_RETURN(ptr);
+}
+
+
+/*
+  Nest last join operation
+
+  SYNOPSIS
+    nest_last_join() 
+    thd         current thread
+
+  DESCRIPTION
+    The function nest last join operation as if it was enclosed in braces.
+
+  RETURN VALUE
+    Pointer to TABLE_LIST element created for the new nested join, if success
+    0, otherwise
+*/
+
+TABLE_LIST *st_select_lex::nest_last_join(THD *thd)
+{
+  TABLE_LIST *ptr;
+  NESTED_JOIN *nested_join;
+  DBUG_ENTER("nest_last_join");
+  
+  if (!(ptr = (TABLE_LIST *) thd->calloc(sizeof(TABLE_LIST))) ||
+      !(nested_join= ptr->nested_join=
+                    (NESTED_JOIN *) thd->calloc(sizeof(NESTED_JOIN))))
+    DBUG_RETURN(0);
+  ptr->embedding= embedding;
+  ptr->join_list= join_list;
+  List<TABLE_LIST> *embedded_list= &nested_join->join_list;
+  embedded_list->empty();
+  for (int i=0; i < 2; i++)
+  {
+    TABLE_LIST *table= join_list->pop();
+    table->join_list= embedded_list;
+    table->embedding= ptr;
+    embedded_list->push_back(table);
+  }
+  join_list->push_front(ptr);
+  nested_join->used_tables= nested_join->not_null_tables= (table_map) 0;
+  DBUG_RETURN(ptr);
+}
+
+
+/*
+  Save names for a join with using clase
+   
+  SYNOPSIS
+    save_names_for_using_list
+    tab1      left table in join
+    tab2      right table in join
+
+  DESCRIPTION
+    The function saves the full names of the tables in st_select_lex
+    to be able to build later an on expression to replace the using clause.   
+  
+  RETURN VALUE
+    None  
+*/    
+
+void st_select_lex::save_names_for_using_list(TABLE_LIST *tab1,
+                                              TABLE_LIST *tab2)
+{
+  while (tab1->nested_join)
+  {
+    tab1= tab1->nested_join->join_list.head();
+  }
+  db1= tab1->db;
+  table1= tab1->alias;
+  while (tab2->nested_join)
+  {
+    TABLE_LIST *next;
+    List_iterator_fast<TABLE_LIST> it(tab2->nested_join->join_list);
+    tab2= it++;
+    while ((next= it++))
+      tab2= next;
+  }
+  db2= tab2->db;
+  table2= tab2->alias;
+}
+  
+
+/*
+  Add a table to the current join list
+
+  SYNOPSIS
+    add_joined_table()
+    table       the table to add
+
+  DESCRIPTION
+    The function puts a table in front of the current join list
+    of st_select_lex object.
+    Thus, joined tables are put into this list in the reverse order
+    (the most outer join operation follows first).
+
+  RETURN VALUE
+    None
+*/
+
+void st_select_lex::add_joined_table(TABLE_LIST *table)
+{
+  DBUG_ENTER("add_joined_table");
+  join_list->push_front(table);
+  table->join_list= join_list;
+  table->embedding= embedding;
+  DBUG_VOID_RETURN;
+}
+
+
+/*
+  Convert a right join into equivalent left join
+
+  SYNOPSIS
+    convert_right_join()
+    thd         current thread
+  
+  DESCRIPTION 
+    The function takes the current join list t[0],t[1] ... and 
+    effectively converts it into the list t[1],t[0] ...
+    Although the outer_join flag for the new nested table contains
+    JOIN_TYPE_RIGHT, it will be handled as the inner table of a left join
+    operation.
+
+  EXAMPLES
+    SELECT * FROM t1 RIGHT JOIN t2 ON on_expr =>
+      SELECT * FROM t2 LEFT JOIN t1 ON on_expr
+
+    SELECT * FROM t1,t2 RIGHT JOIN t3 ON on_expr =>
+      SELECT * FROM t1,t3 LEFT JOIN t2 ON on_expr
+
+    SELECT * FROM t1,t2 RIGHT JOIN (t3,t4) ON on_expr =>
+      SELECT * FROM t1,(t3,t4) LEFT JOIN t2 ON on_expr
+
+    SELECT * FROM t1 LEFT JOIN t2 ON on_expr1 RIGHT JOIN t3  ON on_expr2 =>
+      SELECT * FROM t3 LEFT JOIN (t1 LEFT JOIN t2 ON on_expr2) ON on_expr1
+
+  RETURN
+    Pointer to the table representing the inner table, if success
+    0, otherwise
+*/
+
+TABLE_LIST *st_select_lex::convert_right_join() 
+{
+  TABLE_LIST *tab2= join_list->pop();
+  TABLE_LIST *tab1= join_list->pop(); 
+  DBUG_ENTER("convert_right_join");
+
+  join_list->push_front(tab2);
+  join_list->push_front(tab1);
+  tab1->outer_join|= JOIN_TYPE_RIGHT;
+
+  DBUG_RETURN(tab1);
+}
+
+/*
   Set lock for all tables in current select level
 
   SYNOPSIS:
@@ -4679,7 +5226,7 @@ void add_join_on(TABLE_LIST *b,Item *expr)
       b->on_expr=expr;
     else
     {
-      // This only happens if you have both a right and left join
+      /* This only happens if you have both a right and left join */
       b->on_expr=new Item_cond_and(b->on_expr,expr);
     }
     b->on_expr->top_level_item();
@@ -4759,7 +5306,6 @@ bool reload_acl_and_cache(THD *thd, ulong options, TABLE_LIST *tables,
     */
     tmp_write_to_binlog= 0;
     mysql_log.new_file(1);
-    mysql_update_log.new_file(1);
     mysql_bin_log.new_file(1);
     mysql_slow_log.new_file(1);
 #ifdef HAVE_REPLICATION
@@ -4782,7 +5328,7 @@ bool reload_acl_and_cache(THD *thd, ulong options, TABLE_LIST *tables,
   if (options & REFRESH_QUERY_CACHE_FREE)
   {
     query_cache.pack();				// FLUSH QUERY CACHE
-    options &= ~REFRESH_QUERY_CACHE; //don't flush all cache, just free memory
+    options &= ~REFRESH_QUERY_CACHE; 	// Don't flush cache, just free memory
   }
   if (options & (REFRESH_TABLES | REFRESH_QUERY_CACHE))
   {
@@ -4857,7 +5403,7 @@ bool reload_acl_and_cache(THD *thd, ulong options, TABLE_LIST *tables,
     This is written such that we have a short lock on LOCK_thread_count
 */
 
-void kill_one_thread(THD *thd, ulong id)
+void kill_one_thread(THD *thd, ulong id, bool only_kill_query)
 {
   THD *tmp;
   uint error=ER_NO_SUCH_THREAD;
@@ -4877,7 +5423,7 @@ void kill_one_thread(THD *thd, ulong id)
     if ((thd->master_access & SUPER_ACL) ||
 	!strcmp(thd->user,tmp->user))
     {
-      tmp->awake(1 /*prepare to die*/);
+      tmp->awake(only_kill_query ? THD::KILL_QUERY : THD::KILL_CONNECTION);
       error=0;
     }
     else
diff --git a/sql/sql_prepare.cc b/sql/sql_prepare.cc
index 91df364e531..d4f69e5dd4f 100644
--- a/sql/sql_prepare.cc
+++ b/sql/sql_prepare.cc
@@ -71,6 +71,7 @@ Long data handling:
 #include "sql_acl.h"
 #include "sql_select.h" // for JOIN
 #include <m_ctype.h>  // for isspace()
+#include "sp_head.h"
 #ifdef EMBEDDED_LIBRARY
 /* include MYSQL_BIND headers */
 #include <mysql.h>
@@ -1492,7 +1493,7 @@ static int send_prepare_results(Prepared_statement *stmt, bool text_protocol)
     DBUG_RETURN(text_protocol? 0 : send_prep_stmt(stmt, 0));
 error:
   if (res < 0)
-    send_error(thd, thd->killed ? ER_SERVER_SHUTDOWN : 0);
+    send_error(thd,thd->killed_errno());
   DBUG_RETURN(1);
 }
 
@@ -1612,7 +1613,7 @@ int mysql_stmt_prepare(THD *thd, char *packet, uint packet_length,
 
   mysql_log.write(thd, COM_PREPARE, "%s", packet);
 
-  thd->current_statement= stmt;
+  thd->current_arena= stmt;
   lex= lex_start(thd, (uchar *) thd->query, thd->query_length);
   mysql_init_query(thd);
   lex->safe_to_cache_query= 0;
@@ -1624,12 +1625,19 @@ int mysql_stmt_prepare(THD *thd, char *packet, uint packet_length,
   /* restore to WAIT_PRIOR: QUERY_PRIOR is set inside alloc_query */
   if (!(specialflag & SPECIAL_NO_PRIOR))
     my_pthread_setprio(pthread_self(),WAIT_PRIOR);
+  if (error && thd->lex->sphead)
+  {
+    if (lex != thd->lex)
+      thd->lex->sphead->restore_lex(thd);
+    delete thd->lex->sphead;
+    thd->lex->sphead= NULL;
+  }
   lex_end(lex);
   stmt->set_statement(thd);
   stmt->set_item_arena(thd);
   thd->set_statement(&thd->stmt_backup);
   thd->set_item_arena(&thd->stmt_backup);
-  thd->current_statement= 0;
+  thd->current_arena= 0;
 
   if (error)
   {
@@ -1639,47 +1647,39 @@ int mysql_stmt_prepare(THD *thd, char *packet, uint packet_length,
     /* error is sent inside yyparse/send_prepare_results */
   }
   else
-  {
-    stmt->setup_set_params();
-    SELECT_LEX *sl= stmt->lex->all_selects_list;
-    /*
-      Save WHERE clause pointers, because they may be changed during query
-      optimisation.
-    */
-    for (; sl; sl= sl->next_select_in_list())
-    {
-      sl->prep_where= sl->where;
-    }
-  
-  }
+   stmt->setup_set_params();
 
   DBUG_RETURN(!stmt);
+
 }
 
 /* Reinit statement before execution */
 
-static void reset_stmt_for_execute(Prepared_statement *stmt)
+void reset_stmt_for_execute(THD *thd, LEX *lex)
 {
-  THD *thd= stmt->thd;
-  SELECT_LEX *sl= stmt->lex->all_selects_list;
+  SELECT_LEX *sl= lex->all_selects_list;
 
   for (; sl; sl= sl->next_select_in_list())
   {
-    /* remove option which was put by mysql_explain_union() */
-    sl->options&= ~SELECT_DESCRIBE;
-    /*
-      Copy WHERE clause pointers to avoid damaging they by optimisation
-    */
-    if (sl->prep_where)
-      sl->where= sl->prep_where->copy_andor_structure(thd);
-    DBUG_ASSERT(sl->join == 0);
-    ORDER *order;
-    /* Fix GROUP list */
-    for (order= (ORDER *)sl->group_list.first; order; order= order->next)
-      order->item= &order->item_ptr;
-    /* Fix ORDER list */
-    for (order= (ORDER *)sl->order_list.first; order; order= order->next)
-      order->item= &order->item_ptr;
+    if (!sl->first_execution)
+    {
+      /* remove option which was put by mysql_explain_union() */
+      sl->options&= ~SELECT_DESCRIBE;
+
+      /*
+        Copy WHERE clause pointers to avoid damaging they by optimisation
+      */
+      if (sl->prep_where)
+        sl->where= sl->prep_where->copy_andor_structure(thd);
+      DBUG_ASSERT(sl->join == 0);
+      ORDER *order;
+      /* Fix GROUP list */
+      for (order= (ORDER *)sl->group_list.first; order; order= order->next)
+        order->item= &order->item_ptr;
+      /* Fix ORDER list */
+      for (order= (ORDER *)sl->order_list.first; order; order= order->next)
+        order->item= &order->item_ptr;
+    }
 
     /*
       TODO: When the new table structure is ready, then have a status bit 
@@ -1697,7 +1697,6 @@ static void reset_stmt_for_execute(Prepared_statement *stmt)
       tables->table= 0;
       tables->table_list= 0;
     }
-    
     {
       SELECT_LEX_UNIT *unit= sl->master_unit();
       unit->unclean();
@@ -1706,6 +1705,7 @@ static void reset_stmt_for_execute(Prepared_statement *stmt)
       unit->reinit_exec_mechanism();
     }
   }
+  lex->current_select= &lex->select_lex;
 }
 
 
@@ -1784,9 +1784,11 @@ void mysql_stmt_execute(THD *thd, char *packet, uint packet_length)
   if (stmt->param_count && stmt->set_params_data(stmt, &expanded_query))
     goto set_params_data_err;
 #endif
+  thd->current_arena= stmt;
   thd->protocol= &thd->protocol_prep;           // Switch to binary protocol
   execute_stmt(thd, stmt, &expanded_query, true);
   thd->protocol= &thd->protocol_simple;         // Use normal protocol
+  thd->current_arena= 0;
   DBUG_VOID_RETURN;
 
 set_params_data_err:
diff --git a/sql/sql_rename.cc b/sql/sql_rename.cc
index d2b575c0838..a1676aed78d 100644
--- a/sql/sql_rename.cc
+++ b/sql/sql_rename.cc
@@ -80,7 +80,6 @@ bool mysql_rename_tables(THD *thd, TABLE_LIST *table_list)
   /* Lets hope this doesn't fail as the result will be messy */ 
   if (!error)
   {
-    mysql_update_log.write(thd,thd->query,thd->query_length);
     if (mysql_bin_log.is_open())
     {
       thd->clear_error();
diff --git a/sql/sql_repl.cc b/sql/sql_repl.cc
index 32c5f0bfdab..98bf4e86aba 100644
--- a/sql/sql_repl.cc
+++ b/sql/sql_repl.cc
@@ -48,16 +48,34 @@ int check_binlog_magic(IO_CACHE* log, const char** errmsg)
   return 0;
 }
 
+ /* 
+    fake_rotate_event() builds a fake (=which does not exist physically in any
+    binlog) Rotate event, which contains the name of the binlog we are going to
+    send to the slave (because the slave may not know it if it just asked for
+    MASTER_LOG_FILE='', MASTER_LOG_POS=4).
+    < 4.0.14, fake_rotate_event() was called only if the requested pos was
+    4. After this version we always call it, so that a 3.23.58 slave can rely on
+    it to detect if the master is 4.0 (and stop) (the _fake_ Rotate event has
+    zeros in the good positions which, by chance, make it possible for the 3.23
+    slave to detect that this event is unexpected) (this is luck which happens
+    because the master and slave disagree on the size of the header of
+    Log_event).
+ 
+    Relying on the event length of the Rotate event instead of these well-placed
+    zeros was not possible as Rotate events have a variable-length part. 
+*/
+
 static int fake_rotate_event(NET* net, String* packet, char* log_file_name,
-                             ulonglong position, const char**errmsg)
+                             ulonglong position, const char** errmsg)
 {
+  DBUG_ENTER("fake_rotate_event");
   char header[LOG_EVENT_HEADER_LEN], buf[ROTATE_HEADER_LEN];
-  memset(header, 0, 4); // when does not matter
+  memset(header, 0, 4); // 'when' (the timestamp) does not matter, is set to 0
   header[EVENT_TYPE_OFFSET] = ROTATE_EVENT;
 
   char* p = log_file_name+dirname_length(log_file_name);
   uint ident_len = (uint) strlen(p);
-  ulong event_len = ident_len + ROTATE_EVENT_OVERHEAD;
+  ulong event_len = ident_len + LOG_EVENT_HEADER_LEN + ROTATE_HEADER_LEN;
   int4store(header + SERVER_ID_OFFSET, server_id);
   int4store(header + EVENT_LEN_OFFSET, event_len);
   int2store(header + FLAGS_OFFSET, 0);
@@ -72,9 +90,9 @@ static int fake_rotate_event(NET* net, String* packet, char* log_file_name,
   if (my_net_write(net, (char*)packet->ptr(), packet->length()))
   {
     *errmsg = "failed on my_net_write()";
-    return -1;
+    DBUG_RETURN(-1);
   }
-  return 0;
+  DBUG_RETURN(0);
 }
 
 static int send_file(THD *thd)
@@ -310,6 +328,36 @@ int purge_master_logs_before_date(THD* thd, time_t purge_time)
   return purge_error_message(thd ,res);
 }
 
+int test_for_non_eof_log_read_errors(int error, const char **errmsg)
+{
+  if (error == LOG_READ_EOF) 
+    return 0;
+  my_errno= ER_MASTER_FATAL_ERROR_READING_BINLOG;
+  switch (error) {
+  case LOG_READ_BOGUS:
+    *errmsg = "bogus data in log event";
+    break;
+  case LOG_READ_TOO_LARGE:
+    *errmsg = "log event entry exceeded max_allowed_packet; \
+Increase max_allowed_packet on master";
+    break;
+  case LOG_READ_IO:
+    *errmsg = "I/O error reading log event";
+    break;
+  case LOG_READ_MEM:
+    *errmsg = "memory allocation failed reading log event";
+    break;
+  case LOG_READ_TRUNC:
+    *errmsg = "binlog truncated in the middle of event";
+    break;
+  default:
+    *errmsg = "unknown error reading log event on the master";
+    break;
+  }
+  return error;
+}
+
+
 /*
   TODO: Clean up loop to only have one call to send_file()
 */
@@ -326,6 +374,7 @@ void mysql_binlog_send(THD* thd, char* log_ident, my_off_t pos,
   int error;
   const char *errmsg = "Unknown error";
   NET* net = &thd->net;
+  pthread_mutex_t *log_lock;
 #ifndef DBUG_OFF
   int left_events = max_binlog_dump_events;
 #endif
@@ -385,18 +434,25 @@ impossible position";
     goto err;
   }
 
-  my_b_seek(&log, pos);				// Seek will done on next read
   /*
     We need to start a packet with something other than 255
-    to distiquish it from error
+    to distinguish it from error
   */
-  packet->set("\0", 1, &my_charset_bin);
+  packet->set("\0", 1, &my_charset_bin); /* This is the start of a new packet */
 
   /*
+    Tell the client about the log name with a fake Rotate event;
+    this is needed even if we also send a Format_description_log_event just
+    after, because that event does not contain the binlog's name.
+    Note that as this Rotate event is sent before Format_description_log_event,
+    the slave cannot have any info to understand this event's format, so the
+    header len of Rotate_log_event is FROZEN
+    (so in 5.0 it will have a header shorter than other events except
+    FORMAT_DESCRIPTION_EVENT). 
     Before 4.0.14 we called fake_rotate_event below only if 
     (pos == BIN_LOG_HEADER_SIZE), because if this is false then the slave
     already knows the binlog's name.
-    Now we always call fake_rotate_event; if the slave already knew the log's
+    Since, we always call fake_rotate_event; if the slave already knew the log's
     name (ex: CHANGE MASTER TO MASTER_LOG_FILE=...) this is useless but does
     not harm much. It is nice for 3.23 (>=.58) slaves which test Rotate events
     to see if the master is 4.0 (then they choose to stop because they can't
@@ -413,15 +469,72 @@ impossible position";
   */
   if (fake_rotate_event(net, packet, log_file_name, pos, &errmsg))
   {
+    /* 
+       This error code is not perfect, as fake_rotate_event() does not read
+       anything from the binlog; if it fails it's because of an error in
+       my_net_write(), fortunately it will say it in errmsg. 
+    */
     my_errno= ER_MASTER_FATAL_ERROR_READING_BINLOG;
     goto err;
   }
   packet->set("\0", 1, &my_charset_bin);
 
+  /*
+    We can set log_lock now, it does not move (it's a member of mysql_bin_log,
+    and it's already inited, and it will be destroyed only at shutdown).
+  */
+  log_lock = mysql_bin_log.get_log_lock();  
+  if (pos > BIN_LOG_HEADER_SIZE)
+   {
+     /* Try to find a Format_description_log_event at the beginning of the binlog */
+     if (!(error = Log_event::read_log_event(&log, packet, log_lock)))
+     {
+       /*
+         The packet has offsets equal to the normal offsets in a binlog event
+         +1 (the first character is \0).
+       */
+       DBUG_PRINT("info",
+                  ("Looked for a Format_description_log_event, found event type %d",
+                   (*packet)[EVENT_TYPE_OFFSET+1]));
+       if ((*packet)[EVENT_TYPE_OFFSET+1] == FORMAT_DESCRIPTION_EVENT)
+       {
+         /*
+           mark that this event with "log_pos=0", so the slave
+           should not increment master's binlog position
+           (rli->group_master_log_pos)
+         */
+         int4store(packet->c_ptr() +LOG_POS_OFFSET+1,0);
+         /* send it */
+         if (my_net_write(net, (char*)packet->ptr(), packet->length()))
+         {
+           errmsg = "Failed on my_net_write()";
+           my_errno= ER_UNKNOWN_ERROR;
+           goto err;
+         }
+         /*
+           No need to save this event. We are only doing simple reads (no real
+           parsing of the events) so we don't need it. And so we don't need the
+           artificial Format_description_log_event of 3.23&4.x.
+         */
+       }
+     }
+     else
+       if (test_for_non_eof_log_read_errors(error, &errmsg))
+         goto err;
+     /* 
+        else: it's EOF, nothing to do, go on reading next events, the
+        Format_description_log_event will be found naturally if it is written.
+     */
+     /* reset the packet as we wrote to it in any case */
+     packet->set("\0", 1, &my_charset_bin);
+   } /* end of if (pos > BIN_LOG_HEADER_SIZE); if false, the Format_description_log_event
+        event will be found naturally. */
+  
+  /* seek to the requested position, to start the requested dump */
+  my_b_seek(&log, pos);			// Seek will done on next read
+
   while (!net->error && net->vio != 0 && !thd->killed)
   {
-    pthread_mutex_t *log_lock = mysql_bin_log.get_log_lock();
-
     while (!(error = Log_event::read_log_event(&log, packet, log_lock)))
     {
 #ifndef DBUG_OFF
@@ -433,7 +546,7 @@ impossible position";
 	goto err;
       }
 #endif
-      if (my_net_write(net, (char*)packet->ptr(), packet->length()) )
+      if (my_net_write(net, (char*)packet->ptr(), packet->length()))
       {
 	errmsg = "Failed on my_net_write()";
 	my_errno= ER_UNKNOWN_ERROR;
@@ -454,34 +567,14 @@ impossible position";
     }
     /*
       TODO: now that we are logging the offset, check to make sure
-      the recorded offset and the actual match
+      the recorded offset and the actual match.
+      Guilhem 2003-06: this is not true if this master is a slave <4.0.15
+      running with --log-slave-updates, because then log_pos may be the offset
+      in the-master-of-this-master's binlog.
     */
-    if (error != LOG_READ_EOF)
-    {
-      my_errno= ER_MASTER_FATAL_ERROR_READING_BINLOG;
-      switch (error) {
-      case LOG_READ_BOGUS:
-	errmsg = "bogus data in log event";
-	break;
-      case LOG_READ_TOO_LARGE:
-	errmsg = "log event entry exceeded max_allowed_packet; \
-Increase max_allowed_packet on master";
-	break;
-      case LOG_READ_IO:
-	errmsg = "I/O error reading log event";
-	break;
-      case LOG_READ_MEM:
-	errmsg = "memory allocation failed reading log event";
-	break;
-      case LOG_READ_TRUNC:
-	errmsg = "binlog truncated in the middle of event";
-	break;
-      default:
-	errmsg = "unknown error reading log event on the master";
-	break;
-      }
+
+    if (test_for_non_eof_log_read_errors(error, &errmsg))
       goto err;
-    }
 
     if (!(flags & BINLOG_DUMP_NON_BLOCK) &&
        mysql_bin_log.is_active(log_file_name))
@@ -615,8 +708,13 @@ Increase max_allowed_packet on master";
       (void) my_close(file, MYF(MY_WME));
 
       /*
-        Even if the previous log contained a Rotate_log_event, we still fake
-        one.
+        Call fake_rotate_event() in case the previous log (the one which we have
+        just finished reading) did not contain a Rotate event (for example (I
+        don't know any other example) the previous log was the last one before
+        the master was shutdown & restarted).
+        This way we tell the slave about the new log's name and position.
+        If the binlog is 5.0, the next event we are going to read and send is
+        Format_description_log_event.
       */
       if ((file=open_binlog(&log, log_file_name, &errmsg)) < 0 ||
 	  fake_rotate_event(net, packet, log_file_name, BIN_LOG_HEADER_SIZE, &errmsg))
@@ -950,7 +1048,7 @@ void kill_zombie_dump_threads(uint32 slave_server_id)
       it will be slow because it will iterate through the list
       again. We just to do kill the thread ourselves.
     */
-    tmp->awake(1/*prepare to die*/);
+    tmp->awake(THD::KILL_QUERY);
     pthread_mutex_unlock(&tmp->LOCK_delete);
   }
 }
@@ -1110,7 +1208,7 @@ int change_master(THD* thd, MASTER_INFO* mi)
 			   mi->rli.group_relay_log_name,
 			   mi->rli.group_relay_log_pos,
 			   0 /*no data lock*/,
-			   &msg))
+			   &msg, 0))
     {
       net_printf(thd,0,"Failed initializing relay log position: %s",msg);
       unlock_slave_threads(mi);
@@ -1195,6 +1293,8 @@ int show_binlog_events(THD* thd)
   const char *errmsg = 0;
   IO_CACHE log;
   File file = -1;
+  Format_description_log_event *description_event= new
+    Format_description_log_event(3); /* MySQL 4.0 by default */ 
 
   Log_event::init_show_field_list(&field_list);
   if (protocol-> send_fields(&field_list, 1))
@@ -1233,10 +1333,38 @@ int show_binlog_events(THD* thd)
       goto err;
 
     pthread_mutex_lock(log_lock);
+
+    /* 
+       open_binlog() sought to position 4.
+       Read the first event in case it's a Format_description_log_event, to know the
+       format. If there's no such event, we are 3.23 or 4.x. This code, like
+       before, can't read 3.23 binlogs.
+       This code will fail on a mixed relay log (one which has Format_desc then
+       Rotate then Format_desc).
+    */
+    
+    ev = Log_event::read_log_event(&log,(pthread_mutex_t*)0,description_event);
+    if (ev)
+    {
+      if (ev->get_type_code() == FORMAT_DESCRIPTION_EVENT)
+      {
+        delete description_event;
+        description_event= (Format_description_log_event*) ev;
+      }
+      else
+        delete ev;
+    }
+
     my_b_seek(&log, pos);
 
+    if (!description_event->is_valid())
+    {
+      errmsg="Invalid Format_description event; could be out of memory";
+      goto err;
+    }
+    
     for (event_count = 0;
-	 (ev = Log_event::read_log_event(&log,(pthread_mutex_t*)0,0)); )
+	 (ev = Log_event::read_log_event(&log,(pthread_mutex_t*)0,description_event)); )
     {
       if (event_count >= limit_start &&
 	  ev->net_send(protocol, linfo.log_file_name, pos))
@@ -1265,6 +1393,7 @@ int show_binlog_events(THD* thd)
   }
 
 err:
+  delete description_event;
   if (file >= 0)
   {
     end_io_cache(&log);
diff --git a/sql/sql_repl.h b/sql/sql_repl.h
index e7001c1fe1e..c39ef90114d 100644
--- a/sql/sql_repl.h
+++ b/sql/sql_repl.h
@@ -20,7 +20,7 @@ extern I_List<i_string> binlog_do_db, binlog_ignore_db;
 extern int max_binlog_dump_events;
 extern my_bool opt_sporadic_binlog_dump_fail;
 
-#define KICK_SLAVE(thd) { pthread_mutex_lock(&(thd)->LOCK_delete); (thd)->awake(0 /* do not prepare to die*/); pthread_mutex_unlock(&(thd)->LOCK_delete); }
+#define KICK_SLAVE(thd) { pthread_mutex_lock(&(thd)->LOCK_delete); (thd)->awake(THD::NOT_KILLED); pthread_mutex_unlock(&(thd)->LOCK_delete); }
 
 File open_binlog(IO_CACHE *log, const char *log_file_name,
 		 const char **errmsg);
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index 2f8ede4b4cb..05676f9058a 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -30,7 +30,8 @@
 
 const char *join_type_str[]={ "UNKNOWN","system","const","eq_ref","ref",
 			      "MAYBE_REF","ALL","range","index","fulltext",
-			      "ref_or_null","unique_subquery","index_subquery"
+			      "ref_or_null","unique_subquery","index_subquery",
+                              "index_merge"
 };
 
 const key_map key_map_empty(0);
@@ -47,7 +48,25 @@ static int sort_keyuse(KEYUSE *a,KEYUSE *b);
 static void set_position(JOIN *join,uint index,JOIN_TAB *table,KEYUSE *key);
 static bool create_ref_for_key(JOIN *join, JOIN_TAB *j, KEYUSE *org_keyuse,
 			       table_map used_tables);
-static void find_best_combination(JOIN *join,table_map rest_tables);
+static void choose_plan(JOIN *join,table_map join_tables);
+
+static void best_access_path(JOIN *join, JOIN_TAB *s, THD *thd,
+                             table_map remaining_tables, uint idx,
+                             double record_count, double read_time);
+static void optimize_straight_join(JOIN *join, table_map join_tables);
+static void greedy_search(JOIN *join, table_map remaining_tables,
+                             uint depth, uint prune_level);
+static void best_extension_by_limited_search(JOIN *join,
+                                             table_map remaining_tables,
+                                             uint idx, double record_count,
+                                             double read_time, uint depth,
+                                             uint prune_level);
+static uint determine_search_depth(JOIN* join);
+static int join_tab_cmp(const void* ptr1, const void* ptr2);
+/*
+  TODO: 'find_best' is here only temporarily until 'greedy_search' is
+  tested and approved.
+*/
 static void find_best(JOIN *join,table_map rest_tables,uint index,
 		      double record_count,double read_time);
 static uint cache_record_length(JOIN *join,uint index);
@@ -58,6 +77,7 @@ static store_key *get_store_key(THD *thd,
 				KEY_PART_INFO *key_part, char *key_buff,
 				uint maybe_null);
 static bool make_simple_join(JOIN *join,TABLE *tmp_table);
+static void make_outerjoin_info(JOIN *join);
 static bool make_join_select(JOIN *join,SQL_SELECT *select,COND *item);
 static void make_join_readinfo(JOIN *join,uint options);
 static bool only_eq_ref_tables(JOIN *join, ORDER *order, table_map tables);
@@ -70,8 +90,11 @@ static int return_zero_rows(JOIN *join, select_result *res,TABLE_LIST *tables,
 			    uint select_options, const char *info,
 			    Item *having, Procedure *proc,
 			    SELECT_LEX_UNIT *unit);
-static COND *optimize_cond(THD *thd, COND *conds,
+static COND *simplify_joins(JOIN *join, List<TABLE_LIST> *join_list,
+                            COND *conds, bool top);
+static COND *optimize_cond(JOIN *join, COND *conds,
 			   Item::cond_result *cond_value);
+static bool resolve_nested_join (TABLE_LIST *table);
 static COND *remove_eq_conds(THD *thd, COND *cond, 
 			     Item::cond_result *cond_value);
 static bool const_expression_in_where(COND *conds,Item *item, Item **comp_item);
@@ -116,7 +139,7 @@ static int join_read_next_same_or_null(READ_RECORD *info);
 static COND *make_cond_for_table(COND *cond,table_map table,
 				 table_map used_table);
 static Item* part_of_refkey(TABLE *form,Field *field);
-static uint find_shortest_key(TABLE *table, const key_map *usable_keys);
+uint find_shortest_key(TABLE *table, const key_map *usable_keys);
 static bool test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,
 				    ha_rows select_limit, bool no_changes);
 static int create_sort_index(THD *thd, JOIN *join, ORDER *order,
@@ -127,6 +150,7 @@ static int remove_dup_with_compare(THD *thd, TABLE *entry, Field **field,
 				   ulong offset,Item *having);
 static int remove_dup_with_hash_index(THD *thd,TABLE *table,
 				      uint field_count, Field **first_field,
+
 				      ulong key_length,Item *having);
 static int join_init_cache(THD *thd,JOIN_TAB *tables,uint table_count);
 static ulong used_blob_length(CACHE_FIELD **ptr);
@@ -177,8 +201,11 @@ int handle_select(THD *thd, LEX *lex, select_result *result)
   DBUG_ENTER("handle_select");
 
   if (select_lex->next_select())
-    res=mysql_union(thd, lex, result, &lex->unit);
+    res= mysql_union(thd, lex, result, &lex->unit);
   else
+  {
+    SELECT_LEX_UNIT *unit= &lex->unit;
+    unit->set_limit(unit->global_parameters, select_lex);
     res= mysql_select(thd, &select_lex->ref_pointer_array,
 		      (TABLE_LIST*) select_lex->table_list.first,
 		      select_lex->with_wild, select_lex->item_list,
@@ -190,7 +217,8 @@ int handle_select(THD *thd, LEX *lex, select_result *result)
 		      select_lex->having,
 		      (ORDER*) lex->proc_list.first,
 		      select_lex->options | thd->options,
-		      result, &(lex->unit), &(lex->select_lex));
+		      result, unit, select_lex);
+  }
 
   /* Don't set res if it's -1 as we may want this later */
   DBUG_PRINT("info",("res: %d  report_error: %d", res,
@@ -208,7 +236,8 @@ int handle_select(THD *thd, LEX *lex, select_result *result)
       send_error(thd, 0, NullS);
     res= 1;					// Error sent to client
   }
-  delete result;
+  if (result != lex->result)
+    delete result;
   DBUG_RETURN(res);
 }
 
@@ -305,6 +334,7 @@ JOIN::prepare(Item ***rref_pointer_array,
   tables_list= tables_init;
   select_lex= select_lex_arg;
   select_lex->join= this;
+  join_list= &select_lex->top_join_list;
   union_part= (unit_arg->first_select()->next_select() != 0);
 
   /* Check that all tables, fields, conds and order are ok */
@@ -344,7 +374,14 @@ JOIN::prepare(Item ***rref_pointer_array,
       Item_subselect::trans_res res;
       if ((res= subselect->select_transformer(this)) !=
 	  Item_subselect::RES_OK)
+      {
+        if (thd->current_arena && select_lex->first_execution)
+        {
+          select_lex->prep_where= select_lex->where;
+          select_lex->first_execution= 0;
+        }
 	DBUG_RETURN((res == Item_subselect::RES_ERROR));
+      }
     }
   }
 
@@ -447,6 +484,11 @@ JOIN::prepare(Item ***rref_pointer_array,
   if (alloc_func_list())
     goto err;
 
+  if (thd->current_arena && select_lex->first_execution)
+  {
+    select_lex->prep_where= select_lex->where;
+    select_lex->first_execution= 0;
+  }
   DBUG_RETURN(0); // All OK
 
 err:
@@ -492,7 +534,6 @@ bool JOIN::test_in_subselect(Item **where)
   return 0;
 }
 
-
 /*
   global select optimisation.
   return 0 - success
@@ -530,7 +571,7 @@ JOIN::optimize()
   }
 #endif
 
-  conds= optimize_cond(thd, conds, &cond_value);
+  conds= optimize_cond(this, conds,&cond_value);   
   if (thd->net.report_error)
   {
     error= 1;
@@ -631,6 +672,9 @@ JOIN::optimize()
     DBUG_PRINT("error",("Error: make_select() failed"));
     DBUG_RETURN(1);
   }
+
+  make_outerjoin_info(this);
+
   if (make_join_select(this, select, conds))
   {
     zero_result_cause=
@@ -1004,12 +1048,7 @@ JOIN::reinit()
 {
   DBUG_ENTER("JOIN::reinit");
   /* TODO move to unit reinit */
-  unit->offset_limit_cnt =select_lex->offset_limit;
-  unit->select_limit_cnt =select_lex->select_limit+select_lex->offset_limit;
-  if (unit->select_limit_cnt < select_lex->select_limit)
-    unit->select_limit_cnt= HA_POS_ERROR;		// no limit
-  if (unit->select_limit_cnt == HA_POS_ERROR)
-    select_lex->options&= ~OPTION_FOUND_ROWS;
+  unit->set_limit(select_lex, select_lex);
   
   if (setup_tables(tables_list))
     DBUG_RETURN(1);
@@ -1468,7 +1507,7 @@ JOIN::exec()
 	    at least one row generated from the table.
 	  */
 	  if (curr_table->select_cond ||
-	      (curr_table->keyuse && !curr_table->on_expr))
+	      (curr_table->keyuse && !curr_table->first_inner))
 	  {
 	    /* We have to sort all rows */
 	    curr_join->select_limit= HA_POS_ERROR;
@@ -1684,6 +1723,7 @@ make_join_statistics(JOIN *join,TABLE_LIST *tables,COND *conds,
 		     DYNAMIC_ARRAY *keyuse_array)
 {
   int error;
+  TABLE *table;
   uint i,table_count,const_count,key;
   table_map found_const_table_map, all_table_map, found_ref, refs;
   key_map const_ref, eq_part;
@@ -1709,13 +1749,15 @@ make_join_statistics(JOIN *join,TABLE_LIST *tables,COND *conds,
 
   for (s=stat,i=0 ; tables ; s++,tables=tables->next,i++)
   {
-    TABLE *table;
+    table_map dep_tables;
+    TABLE_LIST *embedding= tables->embedding;
     stat_vector[i]=s;
     s->keys.init();
     s->const_keys.init();
     s->checked_keys.init();
     s->needed_reg.init();
     table_vector[i]=s->table=table=tables->table;
+    table->pos_in_table_list= tables;
     table->file->info(HA_STATUS_VARIABLE | HA_STATUS_NO_LOCK);// record count
     table->quick_keys.clear_all();
     table->reginfo.join_tab=s;
@@ -1724,29 +1766,36 @@ make_join_statistics(JOIN *join,TABLE_LIST *tables,COND *conds,
     all_table_map|= table->map;
     s->join=join;
     s->info=0;					// For describe
+
+    s->dependent= tables->dep_tables;
+    s->key_dependent= 0;
+
     if ((s->on_expr=tables->on_expr))
     {
-      /* Left join */
+      /* s is the only inner table of an outer join */
       if (!table->file->records)
       {						// Empty table
-	s->key_dependent=s->dependent=0;	// Ignore LEFT JOIN depend.
+        s->dependent= 0;                        // Ignore LEFT JOIN depend.
 	set_position(join,const_count++,s,(KEYUSE*) 0);
 	continue;
       }
-      s->key_dependent=s->dependent=
-	s->on_expr->used_tables() & ~(table->map);
-      if (table->outer_join & JOIN_TYPE_LEFT)
-	s->dependent|=stat_vector[i-1]->dependent | table_vector[i-1]->map;
-      if (tables->outer_join & JOIN_TYPE_RIGHT)
-	s->dependent|=tables->next->table->map;
-      outer_join|=table->map;
+      outer_join|= table->map;
       continue;
     }
-    if (tables->straight)			// We don't have to move this
-      s->dependent= table_vector[i-1]->map | stat_vector[i-1]->dependent;
-    else
-      s->dependent=(table_map) 0;
-    s->key_dependent=(table_map) 0;
+    if (embedding)
+    {
+      /* s belongs to a nested join, maybe to several embedded joins */
+      do
+      {
+        NESTED_JOIN *nested_join= embedding->nested_join;
+        s->dependent|= embedding->dep_tables;
+        embedding= embedding->embedding;
+        outer_join|= nested_join->used_tables;
+      }
+      while (embedding);
+      continue;
+    }
+
     if ((table->system || table->file->records <= 1) && ! s->dependent &&
 	!(table->file->table_flags() & HA_NOT_EXACT_COUNT) &&
         !table->fulltext_searched)
@@ -1757,39 +1806,35 @@ make_join_statistics(JOIN *join,TABLE_LIST *tables,COND *conds,
   stat_vector[i]=0;
   join->outer_join=outer_join;
 
-  /*
-    If outer join: Re-arrange tables in stat_vector so that outer join
-    tables are after all tables it is dependent of.
-    For example: SELECT * from A LEFT JOIN B ON B.c=C.c, C WHERE A.C=C.C
-    Will shift table B after table C.
-  */
-  if (outer_join)
+  if (join->outer_join)
   {
-    table_map used_tables=0L;
-    for (i=0 ; i < join->tables-1 ; i++)
+    /* 
+       Build transitive closure for relation 'to be dependent on'.
+       This will speed up the plan search for many cases with outer joins,
+       as well as allow us to catch illegal cross references/
+       Warshall's algorithm is used to build the transitive closure.
+       As we use bitmaps to represent the relation the complexity
+       of the algorithm is O((number of tables)^2). 
+    */
+    for (i= 0, s= stat ; i < table_count ; i++, s++)
     {
-      if (stat_vector[i]->dependent & ~used_tables)
+      for (uint j= 0 ; j < table_count ; j++)
       {
-	JOIN_TAB *save= stat_vector[i];
-	uint j;
-	for (j=i+1;
-	     j < join->tables && stat_vector[j]->dependent & ~used_tables;
-	     j++)
-	{
-	  JOIN_TAB *tmp=stat_vector[j];		// Move element up
-	  stat_vector[j]=save;
-	  save=tmp;
-	}
-	if (j == join->tables)
-	{
-	  join->tables=0;			// Don't use join->table
-	  my_error(ER_WRONG_OUTER_JOIN,MYF(0));
-	  DBUG_RETURN(1);
-	}
-	stat_vector[i]=stat_vector[j];
-	stat_vector[j]=save;
+        table= stat[j].table;
+        if (s->dependent & table->map)
+          s->dependent |= table->reginfo.join_tab->dependent;
+      }
+    }
+    /* Catch illegal cross references for outer joins */
+    for (i= 0, s= stat ; i < table_count ; i++, s++)
+    {
+      if (s->dependent & s->table->map)
+      {
+        join->tables=0;			// Don't use join->table
+        my_error(ER_WRONG_OUTER_JOIN,MYF(0));
+        DBUG_RETURN(1);
       }
-      used_tables|= stat_vector[i]->table->map;
+      s->key_dependent= s->dependent;
     }
   }
 
@@ -1832,7 +1877,7 @@ make_join_statistics(JOIN *join,TABLE_LIST *tables,COND *conds,
 
     for (JOIN_TAB **pos=stat_vector+const_count ; (s= *pos) ; pos++)
     {
-      TABLE *table=s->table;
+      table=s->table;
       if (s->dependent)				// If dependent on some table
       {
 	// All dep. must be constants
@@ -1990,7 +2035,7 @@ make_join_statistics(JOIN *join,TABLE_LIST *tables,COND *conds,
   if (join->const_tables != join->tables)
   {
     optimize_keyuse(join, keyuse_array);
-    find_best_combination(join,all_table_map & ~join->const_table_map);
+    choose_plan(join, all_table_map & ~join->const_table_map);
   }
   else
   {
@@ -2501,11 +2546,32 @@ update_ref_and_keys(THD *thd, DYNAMIC_ARRAY *keyuse,JOIN_TAB *join_tab,
   }
   for (i=0 ; i < tables ; i++)
   {
+    /*
+      Block the creation of keys for inner tables of outer joins.
+      Here only the outer joins that can not be converted to
+      inner joins are left and all nests that can be eliminated
+      are flattened.
+      In the future when we introduce conditional accesses
+      for inner tables in outer joins these keys will be taken
+      into account as well.
+    */ 
     if (join_tab[i].on_expr)
     {
       add_key_fields(join_tab,&end,&and_level,join_tab[i].on_expr,
 		     join_tab[i].table->map);
     }
+    else 
+    {
+      TABLE_LIST *tab= join_tab[i].table->pos_in_table_list;
+      TABLE_LIST *embedding= tab->embedding;
+      if (embedding)
+      {
+        NESTED_JOIN *nested_join= embedding->nested_join;
+        if (nested_join->join_list.head() == tab)
+          add_key_fields(join_tab, &end, &and_level, embedding->on_expr,
+                         nested_join->used_tables);
+      }
+    }
   }
   /* fill keyuse with found key parts */
   for ( ; field != end ; field++)
@@ -2568,7 +2634,7 @@ update_ref_and_keys(THD *thd, DYNAMIC_ARRAY *keyuse,JOIN_TAB *join_tab,
 }
 
 /*
-  Update some values in keyuse for faster find_best_combination() loop
+  Update some values in keyuse for faster choose_plan() loop
 */
 
 static void optimize_keyuse(JOIN *join, DYNAMIC_ARRAY *keyuse_array)
@@ -2636,16 +2702,956 @@ set_position(JOIN *join,uint idx,JOIN_TAB *table,KEYUSE *key)
 }
 
 
+/*
+  Find the best access path for an extension of a partial execution plan and
+  add this path to the plan.
+
+  SYNOPSIS
+    best_access_path()
+    join             pointer to the structure providing all context info
+                     for the query
+    s                the table to be joined by the function
+    thd              thread for the connection that submitted the query
+    remaining_tables set of tables not included into the partial plan yet
+    idx              the length of the partial plan
+    record_count     estimate for the number of records returned by the partial
+                     plan
+    read_time        the cost of the partial plan
+
+  DESCRIPTION
+    The function finds the best access path to table 's' from the passed
+    partial plan where an access path is the general term for any means to
+    access the data in 's'. An access path may use either an index or a scan,
+    whichever is cheaper. The input partial plan is passed via the array
+    'join->positions' of length 'idx'. The chosen access method for 's' and its
+    cost are stored in 'join->positions[idx]'.
+
+  RETURN
+    None
+*/
+
+static void
+best_access_path(JOIN      *join,
+                 JOIN_TAB  *s,
+                 THD       *thd,
+                 table_map remaining_tables,
+                 uint      idx,
+                 double    record_count,
+                 double    read_time)
+{
+  KEYUSE *best_key=         0;
+  uint best_max_key_part=   0;
+  my_bool found_constraint= 0;
+  double best=              DBL_MAX;
+  double best_time=         DBL_MAX;
+  double records=           DBL_MAX;
+  double tmp;
+  ha_rows rec;
+
+  DBUG_ENTER("best_access_path");
+
+  if (s->keyuse)
+  {                                            /* Use key if possible */
+    TABLE *table= s->table;
+    KEYUSE *keyuse,*start_key=0;
+    double best_records= DBL_MAX;
+    uint max_key_part=0;
+
+    /* Test how we can use keys */
+    rec= s->records/MATCHING_ROWS_IN_OTHER_TABLE;  // Assumed records/key
+    for (keyuse=s->keyuse ; keyuse->table == table ;)
+    {
+      key_part_map found_part= 0;
+      table_map found_ref=     0;
+      uint found_ref_or_null=  0;
+      uint key=     keyuse->key;
+      KEY *keyinfo= table->key_info+key;
+      bool ft_key=  (keyuse->keypart == FT_KEYPART);
+
+      /* Calculate how many key segments of the current key we can use */
+      start_key= keyuse;
+      do
+      { /* for each keypart */
+        uint keypart= keyuse->keypart;
+        uint found_part_ref_or_null= KEY_OPTIMIZE_REF_OR_NULL;
+        do
+        {
+          if (!(remaining_tables & keyuse->used_tables) &&
+              !(found_ref_or_null & keyuse->optimize))
+          {
+            found_part|= keyuse->keypart_map;
+            found_ref|=  keyuse->used_tables;
+            if (rec > keyuse->ref_table_rows)
+              rec= keyuse->ref_table_rows;
+            found_part_ref_or_null&= keyuse->optimize;
+          }
+          keyuse++;
+          found_ref_or_null|= found_part_ref_or_null;
+        } while (keyuse->table == table && keyuse->key == key &&
+                 keyuse->keypart == keypart);
+      } while (keyuse->table == table && keyuse->key == key);
+
+      /*
+        Assume that that each key matches a proportional part of table.
+      */
+      if (!found_part && !ft_key)
+        continue;                               // Nothing usable found
+
+      if (rec < MATCHING_ROWS_IN_OTHER_TABLE)
+        rec= MATCHING_ROWS_IN_OTHER_TABLE;      // Fix for small tables
+
+      /*
+        ft-keys require special treatment
+      */
+      if (ft_key)
+      {
+        /*
+          Really, there should be records=0.0 (yes!)
+          but 1.0 would be probably safer
+        */
+        tmp= prev_record_reads(join, found_ref);
+        records= 1.0;
+      }
+      else
+      {
+        found_constraint= 1;
+        /*
+          Check if we found full key
+        */
+        if (found_part == PREV_BITS(uint,keyinfo->key_parts) &&
+            !found_ref_or_null)
+        {                                         /* use eq key */
+          max_key_part= (uint) ~0;
+          if ((keyinfo->flags & (HA_NOSAME | HA_NULL_PART_KEY)) == HA_NOSAME)
+          {
+            tmp = prev_record_reads(join, found_ref);
+            records=1.0;
+          }
+          else
+          {
+            if (!found_ref)
+            {                                     /* We found a const key */
+              if (table->quick_keys.is_set(key))
+                records= (double) table->quick_rows[key];
+              else
+              {
+                /* quick_range couldn't use key! */
+                records= (double) s->records/rec;
+              }
+            }
+            else
+            {
+              if (!(records=keyinfo->rec_per_key[keyinfo->key_parts-1]))
+              {                                   /* Prefer longer keys */
+                records=
+                  ((double) s->records / (double) rec *
+                   (1.0 +
+                    ((double) (table->max_key_length-keyinfo->key_length) /
+                     (double) table->max_key_length)));
+                if (records < 2.0)
+                  records=2.0;               /* Can't be as good as a unique */
+              }
+            }
+            /* Limit the number of matched rows */
+            tmp = records;
+            set_if_smaller(tmp, (double) thd->variables.max_seeks_for_key);
+            if (table->used_keys.is_set(key))
+            {
+              /* we can use only index tree */
+              uint keys_per_block= table->file->block_size/2/
+                (keyinfo->key_length+table->file->ref_length)+1;
+              tmp = record_count*(tmp+keys_per_block-1)/keys_per_block;
+            }
+            else
+              tmp = record_count*min(tmp,s->worst_seeks);
+          }
+        }
+        else
+        {
+          /*
+            Use as much key-parts as possible and a uniq key is better
+            than a not unique key
+            Set tmp to (previous record count) * (records / combination)
+          */
+          if ((found_part & 1) &&
+              (!(table->file->index_flags(key) & HA_ONLY_WHOLE_INDEX) ||
+               found_part == PREV_BITS(uint,keyinfo->key_parts)))
+          {
+            max_key_part=max_part_bit(found_part);
+            /*
+              Check if quick_range could determinate how many rows we
+              will match
+            */
+            if (table->quick_keys.is_set(key) &&
+                table->quick_key_parts[key] == max_key_part)
+              tmp= records= (double) table->quick_rows[key];
+            else
+            {
+              /* Check if we have statistic about the distribution */
+              if ((records = keyinfo->rec_per_key[max_key_part-1]))
+                tmp = records;
+              else
+              {
+                /*
+                  Assume that the first key part matches 1% of the file
+                  and that the hole key matches 10 (duplicates) or 1
+                  (unique) records.
+                  Assume also that more key matches proportionally more
+                  records
+                  This gives the formula:
+                  records = (x * (b-a) + a*c-b)/(c-1)
+
+                  b = records matched by whole key
+                  a = records matched by first key part (10% of all records?)
+                  c = number of key parts in key
+                  x = used key parts (1 <= x <= c)
+                */
+                double rec_per_key;
+                if (!(rec_per_key=(double)
+                      keyinfo->rec_per_key[keyinfo->key_parts-1]))
+                  rec_per_key=(double) s->records/rec+1;
+
+                if (!s->records)
+                  tmp = 0;
+                else if (rec_per_key/(double) s->records >= 0.01)
+                  tmp = rec_per_key;
+                else
+                {
+                  double a=s->records*0.01;
+                  tmp = (max_key_part * (rec_per_key - a) +
+                         a*keyinfo->key_parts - rec_per_key)/
+                    (keyinfo->key_parts-1);
+                  set_if_bigger(tmp,1.0);
+                }
+                records = (ulong) tmp;
+              }
+              /*
+                If quick_select was used on a part of this key, we know
+                the maximum number of rows that the key can match.
+              */
+              if (table->quick_keys.is_set(key) &&
+                  table->quick_key_parts[key] <= max_key_part &&
+                  records > (double) table->quick_rows[key])
+                tmp= records= (double) table->quick_rows[key];
+              else if (found_ref_or_null)
+              {
+                /* We need to do two key searches to find key */
+                tmp *= 2.0;
+                records *= 2.0;
+              }
+            }
+            /* Limit the number of matched rows */
+            set_if_smaller(tmp, (double) thd->variables.max_seeks_for_key);
+            if (table->used_keys.is_set(key))
+            {
+              /* we can use only index tree */
+              uint keys_per_block= table->file->block_size/2/
+                (keyinfo->key_length+table->file->ref_length)+1;
+              tmp = record_count*(tmp+keys_per_block-1)/keys_per_block;
+            }
+            else
+              tmp = record_count*min(tmp,s->worst_seeks);
+          }
+          else
+            tmp = best_time;                    // Do nothing
+        }
+      } /* not ft_key */
+      if (tmp < best_time - records/(double) TIME_FOR_COMPARE)
+      {
+        best_time= tmp + records/(double) TIME_FOR_COMPARE;
+        best= tmp;
+        best_records= records;
+        best_key= start_key;
+        best_max_key_part= max_key_part;
+      }
+    }
+    records= best_records;
+  }
+
+  /*
+    Don't test table scan if it can't be better.
+    Prefer key lookup if we would use the same key for scanning.
+
+    Don't do a table scan on InnoDB tables, if we can read the used
+    parts of the row from any of the used index.
+    This is because table scans uses index and we would not win
+    anything by using a table scan.
+  */
+  if ((records >= s->found_records || best > s->read_time) &&
+      !(s->quick && best_key && s->quick->index == best_key->key &&
+        best_max_key_part >= s->table->quick_key_parts[best_key->key]) &&
+      !((s->table->file->table_flags() & HA_TABLE_SCAN_ON_INDEX) &&
+        ! s->table->used_keys.is_clear_all() && best_key) &&
+      !(s->table->force_index && best_key))
+  {                                             // Check full join
+    ha_rows rnd_records= s->found_records;
+    /*
+      If there is a restriction on the table, assume that 25% of the
+      rows can be skipped on next part.
+      This is to force tables that this table depends on before this
+      table
+    */
+    if (found_constraint)
+      rnd_records-= rnd_records/4;
+
+    /*
+      Range optimizer never proposes a RANGE if it isn't better
+      than FULL: so if RANGE is present, it's always preferred to FULL.
+      Here we estimate its cost.
+    */
+    if (s->quick)
+    {
+      /*
+        For each record we:
+        - read record range through 'quick'
+        - skip rows which does not satisfy WHERE constraints
+      */
+      tmp= record_count *
+        (s->quick->read_time +
+         (s->found_records - rnd_records)/(double) TIME_FOR_COMPARE);
+    }
+    else
+    {
+      /* Estimate cost of reading table. */
+      tmp= s->table->file->scan_time();
+      if (s->table->map & join->outer_join)     // Can't use join cache
+      {
+        /*
+          For each record we have to:
+          - read the whole table record 
+          - skip rows which does not satisfy join condition
+        */
+        tmp= record_count *
+          (tmp +
+           (s->records - rnd_records)/(double) TIME_FOR_COMPARE);
+      }
+      else
+      {
+        /* We read the table as many times as join buffer becomes full. */
+        tmp*= (1.0 + floor((double) cache_record_length(join,idx) *
+                           record_count /
+                           (double) thd->variables.join_buff_size));
+        /* 
+            We don't make full cartesian product between rows in the scanned
+           table and existing records because we skip all rows from the
+           scanned table, which does not satisfy join condition when 
+           we read the table (see flush_cached_records for details). Here we
+           take into account cost to read and skip these records.
+        */
+        tmp+= (s->records - rnd_records)/(double) TIME_FOR_COMPARE;
+      }
+    }
+
+    /*
+      We estimate the cost of evaluating WHERE clause for found records
+      as record_count * rnd_records / TIME_FOR_COMPARE. This cost plus
+      tmp give us total cost of using TABLE SCAN
+    */
+    if (best == DBL_MAX ||
+        (tmp  + record_count/(double) TIME_FOR_COMPARE*rnd_records <
+         best + record_count/(double) TIME_FOR_COMPARE*records))
+    {
+      /*
+        If the table has a range (s->quick is set) make_join_select()
+        will ensure that this will be used
+      */
+      best= tmp;
+      records= rows2double(rnd_records);
+      best_key= 0;
+    }
+  }
+
+  /* Update the cost information for the current partial plan */
+  join->positions[idx].records_read= records;
+  join->positions[idx].read_time=    best;
+  join->positions[idx].key=          best_key;
+  join->positions[idx].table=        s;
+
+  if (!best_key &&
+      idx == join->const_tables &&
+      s->table == join->sort_by_table &&
+      join->unit->select_limit_cnt >= records)
+    join->sort_by_table= (TABLE*) 1;  // Must use temporary table
+
+  DBUG_VOID_RETURN;
+}
+
+
+/*
+  Selects and invokes a search strategy for an optimal query plan.
+
+  SYNOPSIS
+    choose_plan()
+    join        pointer to the structure providing all context info for
+                the query
+    join_tables set of the tables in the query
+
+  DESCRIPTION
+    The function checks user-configurable parameters that control the search
+    strategy for an optimal plan, selects the search method and then invokes
+    it. Each specific optimization procedure stores the final optimal plan in
+    the array 'join->best_positions', and the cost of the plan in
+    'join->best_read'.
+
+  RETURN
+    None
+*/
+
 static void
-find_best_combination(JOIN *join, table_map rest_tables)
+choose_plan(JOIN *join, table_map join_tables)
 {
-  DBUG_ENTER("find_best_combination");
-  join->best_read=DBL_MAX;
-  find_best(join,rest_tables, join->const_tables,1.0,0.0);
+  uint search_depth= join->thd->variables.optimizer_search_depth;
+  uint prune_level=  join->thd->variables.optimizer_prune_level;
+
+  DBUG_ENTER("choose_plan");
+
+  if (join->select_options & SELECT_STRAIGHT_JOIN)
+  {
+    optimize_straight_join(join, join_tables);
+  }
+  else
+  {
+    /*
+      Heuristic: pre-sort all access plans with respect to the number of
+      records accessed.
+    */
+    qsort(join->best_ref + join->const_tables, join->tables - join->const_tables,
+          sizeof(JOIN_TAB*), join_tab_cmp);
+
+    if (search_depth == MAX_TABLES+2)
+    { /*
+        TODO: 'MAX_TABLES+2' denotes the old implementation of find_best before
+        the greedy version. Will be removed when greedy_search is approved.
+      */
+      join->best_read= DBL_MAX;
+      find_best(join, join_tables, join->const_tables, 1.0, 0.0);
+    } 
+    else
+    {
+      if (search_depth == 0)
+        /* Automatically determine a reasonable value for 'search_depth' */
+        search_depth= determine_search_depth(join);
+      greedy_search(join, join_tables, search_depth, prune_level);
+    }
+  }
+
+  /* Store the cost of this query into a user variable */
+  last_query_cost= join->best_read;
+
   DBUG_VOID_RETURN;
 }
 
 
+/*
+  Compare two JOIN_TAB objects based on the number of accessed records.
+
+  SYNOPSIS
+    join_tab_cmp()
+    ptr1 pointer to first JOIN_TAB object
+    ptr2 pointer to second JOIN_TAB object
+
+  RETURN
+    1  if first is bigger
+    -1 if second is bigger
+    0  if equal
+*/
+
+static int
+join_tab_cmp(const void* ptr1, const void* ptr2)
+{
+  JOIN_TAB *jt1= *(JOIN_TAB**) ptr1;
+  JOIN_TAB *jt2= *(JOIN_TAB**) ptr2;
+
+  if (jt1->dependent & jt2->table->map)
+    return 1;
+  if (jt2->dependent & jt1->table->map)
+    return -1;  
+  if (jt1->found_records > jt2->found_records)
+    return 1;
+  if (jt1->found_records < jt2->found_records)
+    return -1; 
+  return jt1 > jt2 ? 1 : (jt1 < jt2 ? -1 : 0);
+}
+
+
+/*
+  Heuristic procedure to automatically guess a reasonable degree of
+  exhaustiveness for the greedy search procedure.
+
+  SYNOPSIS
+    determine_search_depth()
+    join   pointer to the structure providing all context info for the query
+
+  DESCRIPTION
+    The procedure estimates the optimization time and selects a search depth
+    big enough to result in a near-optimal QEP, that doesn't take too long to
+    find. If the number of tables in the query exceeds some constant, then
+    search_depth is set to this constant.
+
+  NOTES
+    This is an extremely simplistic implementation that serves as a stub for a
+    more advanced analysis of the join. Ideally the search depth should be
+    determined by learning from previous query optimizations, because it will
+    depend on the CPU power (and other factors).
+
+  RETURN
+    A positive integer that specifies the search depth (and thus the
+    exhaustiveness) of the depth-first search algorithm used by
+    'greedy_search'.
+*/
+
+static uint
+determine_search_depth(JOIN *join)
+{
+  uint table_count=  join->tables - join->const_tables;
+  uint search_depth;
+  /* TODO: this value should be determined dynamically, based on statistics: */
+  uint max_tables_for_exhaustive_opt= 7;
+
+  if (table_count <= max_tables_for_exhaustive_opt)
+    search_depth= table_count+1; // use exhaustive for small number of tables
+  else
+    /*
+      TODO: this value could be determined by some mapping of the form:
+      depth : table_count -> [max_tables_for_exhaustive_opt..MAX_EXHAUSTIVE]
+    */
+    search_depth= max_tables_for_exhaustive_opt; // use greedy search
+
+  return search_depth;
+}
+
+
+/*
+  Select the best ways to access the tables in a query without reordering them.
+
+  SYNOPSIS
+    optimize_straight_join()
+    join          pointer to the structure providing all context info for
+                  the query
+    join_tables   set of the tables in the query
+
+  DESCRIPTION
+    Find the best access paths for each query table and compute their costs
+    according to their order in the array 'join->best_ref' (thus without
+    reordering the join tables). The function calls sequentially
+    'best_access_path' for each table in the query to select the best table
+    access method. The final optimal plan is stored in the array
+    'join->best_positions', and the corresponding cost in 'join->best_read'.
+
+  NOTES
+    This function can be applied to:
+    - queries with STRAIGHT_JOIN
+    - internally to compute the cost of an arbitrary QEP
+    Thus 'optimize_straight_join' can be used at any stage of the query
+    optimization process to finalize a QEP as it is.
+
+  RETURN
+    None
+*/
+
+static void
+optimize_straight_join(JOIN *join, table_map join_tables)
+{
+  JOIN_TAB *s;
+  uint idx= join->const_tables;
+  double    record_count= 1.0;
+  double    read_time=    0.0;
+
+  for (JOIN_TAB **pos= join->best_ref + idx ; (s= *pos) ; pos++)
+  {
+    /* Find the best access method from 's' to the current partial plan */
+    best_access_path(join, s, join->thd, join_tables, idx, record_count, read_time);
+    /* compute the cost of the new plan extended with 's' */
+    record_count*= join->positions[idx].records_read;
+    read_time+=    join->positions[idx].read_time;
+    join_tables&= ~(s->table->map);
+    ++idx;
+  }
+
+  read_time+= record_count / (double) TIME_FOR_COMPARE;
+  if (join->sort_by_table &&
+      join->sort_by_table != join->positions[join->const_tables].table->table)
+    read_time+= record_count;  // We have to make a temp table
+  memcpy((gptr) join->best_positions, (gptr) join->positions,
+         sizeof(POSITION)*idx);
+  join->best_read= read_time;
+}
+
+
+/*
+  Find a good, possibly optimal, query execution plan (QEP) by a greedy search.
+
+  SYNOPSIS
+    join             pointer to the structure providing all context info
+                     for the query
+    remaining_tables set of tables not included into the partial plan yet
+    search_depth     controlls the exhaustiveness of the search
+    prune_level      the pruning heuristics that should be applied during
+                     search
+
+  DESCRIPTION
+    The search procedure uses a hybrid greedy/exhaustive search with controlled
+    exhaustiveness. The search is performed in N = card(remaining_tables)
+    steps. Each step evaluates how promising is each of the unoptimized tables,
+    selects the most promising table, and extends the current partial QEP with
+    that table.  Currenly the most 'promising' table is the one with least
+    expensive extension.
+    There are two extreme cases:
+    1. When (card(remaining_tables) < search_depth), the estimate finds the best
+       complete continuation of the partial QEP. This continuation can be
+       used directly as a result of the search.
+    2. When (search_depth == 1) the 'best_extension_by_limited_search'
+       consideres the extension of the current QEP with each of the remaining
+       unoptimized tables.
+    All other cases are in-between these two extremes. Thus the parameter
+    'search_depth' controlls the exhaustiveness of the search. The higher the
+    value, the longer the optimizaton time and possibly the better the
+    resulting plan. The lower the value, the fewer alternative plans are
+    estimated, but the more likely to get a bad QEP.
+
+    All intermediate and final results of the procedure are stored in 'join':
+    join->positions      modified for every partial QEP that is explored
+    join->best_positions modified for the current best complete QEP
+    join->best_read      modified for the current best complete QEP
+    join->best_ref       might be partially reordered
+    The final optimal plan is stored in 'join->best_positions', and its
+    corresponding cost in 'join->best_read'.
+
+  NOTES
+    The following pseudocode describes the algorithm of 'greedy_search':
+
+    procedure greedy_search
+    input: remaining_tables
+    output: pplan;
+    {
+      pplan = <>;
+      do {
+        (t, a) = best_extension(pplan, remaining_tables);
+        pplan = concat(pplan, (t, a));
+        remaining_tables = remaining_tables - t;
+      } while (remaining_tables != {})
+      return pplan;
+    }
+
+    where 'best_extension' is a placeholder for a procedure that selects the
+    most "promising" of all tables in 'remaining_tables'.
+    Currently this estimate is performed by calling
+    'best_extension_by_limited_search' to evaluate all extensions of the
+    current QEP of size 'search_depth', thus the complexity of 'greedy_search'
+    mainly depends on that of 'best_extension_by_limited_search'.
+
+    If 'best_extension()' == 'best_extension_by_limited_search()', then the
+    worst-case complexity of this algorithm is <=
+    O(N*N^search_depth/search_depth). When serch_depth >= N, then the
+    complexity of greedy_search is O(N!).
+
+    In the future, 'greedy_search' might be extended to support other
+    implementations of 'best_extension', e.g. some simpler quadratic procedure.
+
+  RETURN
+    None
+*/
+
+static void
+greedy_search(JOIN      *join,
+              table_map remaining_tables,
+              uint      search_depth,
+              uint      prune_level)
+{
+  double    record_count= 1.0;
+  double    read_time=    0.0;
+  uint      idx= join->const_tables; // index into 'join->best_ref'
+  uint      best_idx;
+  uint      rem_size;    // cardinality of remaining_tables
+  POSITION  best_pos;
+  JOIN_TAB  *best_table; // the next plan node to be added to the curr QEP
+
+  DBUG_ENTER("greedy_search");
+
+  /* number of tables that remain to be optimized */
+  rem_size= my_count_bits(remaining_tables);
+
+  do {
+    /* Find the extension of the current QEP with the lowest cost */
+    join->best_read= DBL_MAX;
+    best_extension_by_limited_search(join, remaining_tables, idx, record_count,
+                                     read_time, search_depth, prune_level);
+
+    if (rem_size <= search_depth)
+    {
+      /*
+        'join->best_positions' contains a complete optimal extension of the
+        current partial QEP.
+      */
+      DBUG_EXECUTE("opt", print_plan(join, read_time, record_count,
+                                     join->tables, "optimal"););
+      DBUG_VOID_RETURN;
+    }
+
+    /* select the first table in the optimal extension as most promising */
+    best_pos= join->best_positions[idx];
+    best_table= best_pos.table;
+    /*
+      Each subsequent loop of 'best_extension_by_limited_search' uses
+      'join->positions' for cost estimates, therefore we have to update its
+      value.
+    */
+    join->positions[idx]= best_pos;
+
+    /* find the position of 'best_table' in 'join->best_ref' */
+    best_idx= idx;
+    JOIN_TAB *pos= join->best_ref[best_idx];
+    while (pos && best_table != pos)
+      pos= join->best_ref[++best_idx];
+    DBUG_ASSERT((pos != NULL)); // should always find 'best_table'
+    /* move 'best_table' at the first free position in the array of joins */
+    swap_variables(JOIN_TAB*, join->best_ref[idx], join->best_ref[best_idx]);
+
+    /* compute the cost of the new plan extended with 'best_table' */
+    record_count*= join->positions[idx].records_read;
+    read_time+=    join->positions[idx].read_time;
+
+    remaining_tables&= ~(best_table->table->map);
+    --rem_size;
+    ++idx;
+
+    DBUG_EXECUTE("opt",
+                 print_plan(join, read_time, record_count, idx, "extended"););
+  } while (TRUE);
+}
+
+
+/*
+  Find a good, possibly optimal, query execution plan (QEP) by a possibly
+  exhaustive search.
+
+  SYNOPSIS
+    best_extension_by_limited_search()
+    join             pointer to the structure providing all context info for
+                     the query
+    remaining_tables set of tables not included into the partial plan yet
+    idx              length of the partial QEP in 'join->positions';
+                     since a depth-first search is used, also corresponds to
+                     the current depth of the search tree;
+                     also an index in the array 'join->best_ref';
+    record_count     estimate for the number of records returned by the best
+                     partial plan
+    read_time        the cost of the best partial plan
+    search_depth     maximum depth of the recursion and thus size of the found
+                     optimal plan (0 < search_depth <= join->tables+1).
+    prune_level      pruning heuristics that should be applied during optimization
+                     (values: 0 = EXHAUSTIVE, 1 = PRUNE_BY_TIME_OR_ROWS)
+
+  DESCRIPTION
+    The procedure searches for the optimal ordering of the query tables in set
+    'remaining_tables' of size N, and the corresponding optimal access paths to each
+    table. The choice of a table order and an access path for each table
+    constitutes a query execution plan (QEP) that fully specifies how to
+    execute the query.
+   
+    The maximal size of the found plan is controlled by the parameter
+    'search_depth'. When search_depth == N, the resulting plan is complete and
+    can be used directly as a QEP. If search_depth < N, the found plan consists
+    of only some of the query tables. Such "partial" optimal plans are useful
+    only as input to query optimization procedures, and cannot be used directly
+    to execute a query.
+
+    The algorithm begins with an empty partial plan stored in 'join->positions'
+    and a set of N tables - 'remaining_tables'. Each step of the algorithm
+    evaluates the cost of the partial plan extended by all access plans for
+    each of the relations in 'remaining_tables', expands the current partial
+    plan with the access plan that results in lowest cost of the expanded
+    partial plan, and removes the corresponding relation from
+    'remaining_tables'. The algorithm continues until it either constructs a
+    complete optimal plan, or constructs an optimal plartial plan with size =
+    search_depth.
+
+    The final optimal plan is stored in 'join->best_positions'. The
+    corresponding cost of the optimal plan is in 'join->best_read'.
+
+  NOTES
+    The procedure uses a recursive depth-first search where the depth of the
+    recursion (and thus the exhaustiveness of the search) is controlled by the
+    parameter 'search_depth'.
+
+    The pseudocode below describes the algorithm of
+    'best_extension_by_limited_search'. The worst-case complexity of this
+    algorithm is O(N*N^search_depth/search_depth). When serch_depth >= N, then
+    the complexity of greedy_search is O(N!).
+
+    procedure best_extension_by_limited_search(
+      pplan in,             // in, partial plan of tables-joined-so-far
+      pplan_cost,           // in, cost of pplan
+      remaining_tables,     // in, set of tables not referenced in pplan
+      best_plan_so_far,     // in/out, best plan found so far
+      best_plan_so_far_cost,// in/out, cost of best_plan_so_far
+      search_depth)         // in, maximum size of the plans being considered
+    {
+      for each table T from remaining_tables
+      {
+        // Calculate the cost of using table T as above
+        cost = complex-series-of-calculations;
+
+        // Add the cost to the cost so far.
+        pplan_cost+= cost;
+
+        if (pplan_cost >= best_plan_so_far_cost)
+          // pplan_cost already too great, stop search
+          continue;
+
+        pplan= expand pplan by best_access_method;
+        remaining_tables= remaining_tables - table T;
+        if (remaining_tables is not an empty set
+            and
+            search_depth > 1)
+        {
+          best_extension_by_limited_search(pplan, pplan_cost,
+                                           remaining_tables,
+                                           best_plan_so_far,
+                                           best_plan_so_far_cost,
+                                           search_depth - 1);
+        }
+        else
+        {
+          best_plan_so_far_cost= pplan_cost;
+          best_plan_so_far= pplan;
+        }
+      }
+    }
+
+  IMPLEMENTATION
+    When 'best_extension_by_limited_search' is called for the first time,
+    'join->best_read' must be set to the largest possible value (e.g. DBL_MAX).
+    The actual implementation provides a way to optionally use pruning
+    heuristic (controlled by the parameter 'prune_level') to reduce the search
+    space by skipping some partial plans.
+    The parameter 'search_depth' provides control over the recursion
+    depth, and thus the size of the resulting optimal plan.
+
+  RETURN
+    None
+*/
+
+static void
+best_extension_by_limited_search(JOIN      *join,
+                                 table_map remaining_tables,
+                                 uint      idx,
+                                 double    record_count,
+                                 double    read_time,
+                                 uint      search_depth,
+                                 uint      prune_level)
+{
+  THD *thd= join->thd;
+  if (thd->killed)  // Abort
+    return;
+
+  DBUG_ENTER("best_extension_by_limited_search");
+
+  /* 
+     'join' is a partial plan with lower cost than the best plan so far,
+     so continue expanding it further with the tables in 'remaining_tables'.
+  */
+  JOIN_TAB *s;
+  double best_record_count= DBL_MAX;
+  double best_read_time=    DBL_MAX;
+
+  DBUG_EXECUTE("opt",
+               print_plan(join, read_time, record_count, idx, "part_plan"););
+
+  for (JOIN_TAB **pos= join->best_ref + idx ; (s= *pos) ; pos++)
+  {
+    table_map real_table_bit= s->table->map;
+    if ((remaining_tables & real_table_bit) && !(remaining_tables & s->dependent))
+    {
+      double current_record_count, current_read_time;
+
+      /* Find the best access method from 's' to the current partial plan */
+      best_access_path(join, s, thd, remaining_tables, idx, record_count, read_time);
+      /* Compute the cost of extending the plan with 's' */
+      current_record_count= record_count * join->positions[idx].records_read;
+      current_read_time=    read_time + join->positions[idx].read_time;
+
+      /* Expand only partial plans with lower cost than the best QEP so far */
+      if ((current_read_time +
+           current_record_count / (double) TIME_FOR_COMPARE) >= join->best_read)
+      {
+        DBUG_EXECUTE("opt", print_plan(join, read_time, record_count, idx,
+                                       "prune_by_cost"););
+        continue;
+      }
+
+      /*
+        Prune some less promising partial plans. This heuristic may miss
+        the optimal QEPs, thus it results in a non-exhaustive search.
+      */
+      if (prune_level == 1)
+      {
+        if (best_record_count > current_record_count ||
+            best_read_time > current_read_time ||
+            idx == join->const_tables &&  // 's' is the first table in the QEP
+            s->table == join->sort_by_table)
+        {
+          if (best_record_count >= current_record_count &&
+              best_read_time >= current_read_time &&
+              /* TODO: What is the reasoning behind this condition? */
+              (!(s->key_dependent & remaining_tables) ||
+               join->positions[idx].records_read < 2.0))
+          {
+            best_record_count= current_record_count;
+            best_read_time=    current_read_time;
+          }
+        }
+        else
+        {
+          DBUG_EXECUTE("opt", print_plan(join, read_time, record_count, idx,
+                                         "pruned_by_heuristic"););
+          continue;
+        }
+      }
+
+      if ( (search_depth > 1) && (remaining_tables & ~real_table_bit) )
+      { /* Recursively expand the current partial plan */
+        swap_variables(JOIN_TAB*, join->best_ref[idx], *pos);
+        best_extension_by_limited_search(join,
+                                         remaining_tables & ~real_table_bit,
+                                         idx + 1,
+                                         current_record_count,
+                                         current_read_time,
+                                         search_depth - 1,
+                                         prune_level);
+        if (thd->killed)
+          DBUG_VOID_RETURN;
+        swap_variables(JOIN_TAB*, join->best_ref[idx], *pos);
+      }
+      else
+      { /*
+          'join' is either the best partial QEP with 'search_depth' relations,
+          or the best complete QEP so far, whichever is smaller.
+        */
+        current_read_time+= current_record_count / (double) TIME_FOR_COMPARE;
+        if (join->sort_by_table &&
+            join->sort_by_table != join->positions[join->const_tables].table->table)
+          /* We have to make a temp table */
+          current_read_time+= current_record_count;
+        if ((search_depth == 1) || (current_read_time < join->best_read))
+        {
+          memcpy((gptr) join->best_positions, (gptr) join->positions,
+                 sizeof(POSITION) * (idx + 1));
+          join->best_read= current_read_time;
+        }
+        DBUG_EXECUTE("opt",
+                     print_plan(join, current_read_time, current_record_count, idx, "full_plan"););
+      }
+    }
+  }
+  DBUG_VOID_RETURN;
+}
+
+
+/*
+  TODO: this function is here only temporarily until 'greedy_search' is
+  tested and accepted.
+*/
 static void
 find_best(JOIN *join,table_map rest_tables,uint idx,double record_count,
 	  double read_time)
@@ -2965,7 +3971,7 @@ find_best(JOIN *join,table_map rest_tables,uint idx,double record_count,
         {
           /* Estimate cost of reading table. */
           tmp= s->table->file->scan_time();
-          if (s->on_expr)                         // Can't use join cache
+          if (s->table->map  & join->outer_join)      // Can't use join cache
           {
             /*
               For each record we have to:
@@ -3409,6 +4415,8 @@ make_simple_join(JOIN *join,TABLE *tmp_table)
   join_tab->keys.init(~0);                      /* test everything in quick */
   join_tab->info=0;
   join_tab->on_expr=0;
+  join_tab->last_inner= 0;
+  join_tab->first_unmatched= 0;
   join_tab->ref.key = -1;
   join_tab->not_used_in_distinct=0;
   join_tab->read_first_record= join_init_read_record;
@@ -3420,6 +4428,132 @@ make_simple_join(JOIN *join,TABLE *tmp_table)
 }
 
 
+/*
+  Build a predicate guarded by match variables for embedding outer joins
+
+  SYNOPSIS
+    add_found_match_trig_cond()
+    tab       the first inner table for most nested outer join
+    cond      the predicate to be guarded
+    root_tab  the first inner table to stop
+
+  DESCRIPTION
+    The function recursively adds guards for predicate cond
+    assending from tab to the first inner table  next embedding
+    nested outer join and so on until it reaches root_tab
+    (root_tab can be 0).
+
+  RETURN VALUE
+    pointer to the guarded predicate, if success
+    0, otherwise
+*/ 
+
+static COND*
+add_found_match_trig_cond(JOIN_TAB *tab, COND *cond, JOIN_TAB *root_tab)
+{
+  COND *tmp;
+  if (tab == root_tab || !cond)
+    return cond;
+  if ((tmp= add_found_match_trig_cond(tab->first_upper, cond, root_tab)))
+  {
+    tmp= new Item_func_trig_cond(tmp, &tab->found);
+  }
+  if (!tmp)
+    tmp->quick_fix_field();
+  return tmp;
+}
+
+
+/*
+   Fill in outer join related info for the execution plan structure
+
+  SYNOPSIS
+    make_outerjoin_info()
+    join - reference to the info fully describing the query
+
+  DESCRIPTION
+    For each outer join operation left after simplification of the
+    original query the function set up the following pointers in the linear
+    structure join->join_tab representing the selected execution plan.
+    The first inner table t0 for the operation is set to refer to the last
+    inner table tk through the field t0->last_inner.
+    Any inner table ti for the operation are set to refer to the first
+    inner table ti->first_inner.
+    The first inner table t0 for the operation is set to refer to the
+    first inner table of the embedding outer join operation, if there is any,
+    through the field t0->first_upper.
+    The on expression for the outer join operation is attached to the
+    corresponding first inner table through the field t0->on_expr.
+    Here ti are structures of the JOIN_TAB type.
+
+  EXAMPLE
+    For the query: 
+      SELECT * FROM t1
+                    LEFT JOIN
+                    (t2, t3 LEFT JOIN t4 ON t3.a=t4.a)
+                    ON (t1.a=t2.a AND t1.b=t3.b)
+        WHERE t1.c > 5,
+    given the execution plan with the table order t1,t2,t3,t4
+    is selected, the following references will be set;
+    t4->last_inner=[t4], t4->first_inner=[t4], t4->first_upper=[t2]
+    t2->last_inner=[t4], t2->first_inner=t3->first_inner=[t2],
+    on expression (t1.a=t2.a AND t1.b=t3.b) will be attached to t2->on_expr,
+    while t3.a=t4.a will be attached to t4->on_expr.
+            
+  NOTES
+    The function assumes that the simplification procedure has been
+    already applied to the join query (see simplify_joins).
+    This function can be called only after the execution plan
+    has been chosen.
+*/
+ 
+static void
+make_outerjoin_info(JOIN *join)
+{
+  for (uint i=join->const_tables ; i < join->tables ; i++)
+  {
+    JOIN_TAB *tab=join->join_tab+i;
+    TABLE *table=tab->table;
+    TABLE_LIST *tbl= table->pos_in_table_list;
+    TABLE_LIST *embedding= tbl->embedding;
+
+    if (tbl->outer_join)
+    {
+      /* 
+        Table tab is the only one inner table for outer join.
+        (Like table t4 for the table reference t3 LEFT JOIN t4 ON t3.a=t4.a
+        is in the query above.)
+      */
+      tab->last_inner= tab->first_inner= tab;
+      tab->on_expr= tbl->on_expr;
+      if (embedding)
+        tab->first_upper= embedding->nested_join->first_nested;
+    }    
+    for ( ; embedding ; embedding= embedding->embedding)
+    {
+      NESTED_JOIN *nested_join= embedding->nested_join;
+      if (!nested_join->counter)
+      {
+        /* 
+          Table tab is the first inner table for nested_join.
+          Save reference to it in the nested join structure.
+        */ 
+        nested_join->first_nested= tab;
+        tab->on_expr= embedding->on_expr;
+        if (embedding->embedding)
+          tab->first_upper= embedding->embedding->nested_join->first_nested;
+      }
+      if (!tab->first_inner)  
+        tab->first_inner= nested_join->first_nested;
+      if (++nested_join->counter < nested_join->join_list.elements)
+        break;
+      /* Table tab is the last inner table for nested join. */
+      nested_join->first_nested->last_inner= tab;
+    }
+  }
+}
+
+
 static bool
 make_join_select(JOIN *join,SQL_SELECT *select,COND *cond)
 {
@@ -3448,6 +4582,7 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond)
     for (uint i=join->const_tables ; i < join->tables ; i++)
     {
       JOIN_TAB *tab=join->join_tab+i;
+      JOIN_TAB *first_inner_tab= tab->first_inner; 
       table_map current_map= tab->table->map;
       /*
 	Following force including random expression in last table condition.
@@ -3487,7 +4622,16 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond)
 	  join->thd->memdup((gptr) select, sizeof(SQL_SELECT));
 	if (!sel)
 	  DBUG_RETURN(1);			// End of memory
+        /*
+          If tab is an inner table of an outer join operation,
+          add a match guard to the pushed down predicate.
+          The guard will turn the predicate on only after
+          the first match for outer tables is encountered.
+	*/        
+        if (!(tmp= add_found_match_trig_cond(first_inner_tab, tmp, 0)))
+          DBUG_RETURN(1);
 	tab->select_cond=sel->cond=tmp;
+
 	sel->head=tab->table;
 	if (tab->quick)
 	{
@@ -3495,7 +4639,7 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond)
 	     with key reading */
 	  if (tab->needed_reg.is_clear_all() && tab->type != JT_EQ_REF
 	      && tab->type != JT_FT && (tab->type != JT_REF ||
-	       (uint) tab->ref.key == tab->quick->index))
+               (uint) tab->ref.key == tab->quick->index))
 	  {
 	    sel->quick=tab->quick;		// Use value from get_quick_...
 	    sel->quick_keys.clear_all();
@@ -3597,13 +4741,65 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond)
 	    }
 	  }
 	}
+      } 
+      
+      /* 
+        Push down all predicates from on expressions.
+        Each of these predicated are guarded by a variable
+        that turns if off just before null complemented row for
+        outer joins is formed. Thus, the predicates from an
+        'on expression' are guaranteed not to be checked for
+        the null complemented row.
+      */ 
+      JOIN_TAB *last_tab= tab;
+      while (first_inner_tab && first_inner_tab->last_inner == last_tab)
+      {  
+        /* 
+          Table tab is the last inner table of an outer join.
+          An on expression is always attached to it.
+	*/     
+        COND *on_expr= first_inner_tab->on_expr;
+
+        table_map used_tables= join->const_table_map |
+		               OUTER_REF_TABLE_BIT | RAND_TABLE_BIT;
+	for (tab= join->join_tab+join->const_tables; tab <= last_tab ; tab++)
+        {
+          current_map= tab->table->map;
+          used_tables|= current_map;
+          COND *tmp= make_cond_for_table(on_expr, used_tables, current_map);
+          if (tmp)
+          {
+            JOIN_TAB *cond_tab= tab < first_inner_tab ? first_inner_tab : tab;
+            /*
+              First add the guards for match variables of
+              all embedding outer join operations.
+	    */
+            if (!(tmp= add_found_match_trig_cond(cond_tab->first_inner,
+                                                 tmp, first_inner_tab)))
+              DBUG_RETURN(1);
+            /* 
+              Now add the guard turning the predicate off for 
+              the null complemented row.
+	    */ 
+            tmp= new Item_func_trig_cond(tmp, 
+                                         &first_inner_tab->not_null_compl);
+            if (tmp)
+              tmp->quick_fix_field();
+	    /* Add the predicate to other pushed down predicates */
+            cond_tab->select_cond= !cond_tab->select_cond ? tmp :
+	                          new Item_cond_and(cond_tab->select_cond,tmp);
+            if (!cond_tab->select_cond)
+	      DBUG_RETURN(1);
+            cond_tab->select_cond->quick_fix_field();
+          }              
+        }
+        first_inner_tab= first_inner_tab->first_upper;       
       }
     }
   }
   DBUG_RETURN(0);
 }
 
-
 static void
 make_join_readinfo(JOIN *join, uint options)
 {
@@ -3691,7 +4887,7 @@ make_join_readinfo(JOIN *join, uint options)
       */
       table->status=STATUS_NO_RECORD;
       if (i != join->const_tables && !(options & SELECT_NO_JOIN_CACHE) &&
-	  tab->use_quick != 2 && !tab->on_expr)
+          tab->use_quick != 2 && !tab->first_inner)
       {
 	if ((options & SELECT_DESCRIBE) ||
 	    !join_init_cache(join->thd,join->join_tab+join->const_tables,
@@ -4409,18 +5605,267 @@ COND *eliminate_not_funcs(THD *thd, COND *cond)
 }
 
 
+/*
+  Simplify joins replacing outer joins by inner joins whenever it's possible
+
+  SYNOPSIS
+    simplify_joins()
+    join        reference to the query info
+    join_list   list representation of the join to be converted
+    conds       conditions to add on expressions for converted joins
+    top         true <=> conds is the where condition  
+
+  DESCRIPTION
+    The function, during a retrieval of join_list,  eliminates those
+    outer joins that can be converted into inner join, possibly nested.
+    It also moves the on expressions for the converted outer joins
+    and from inner joins to conds.
+    The function also calculates some attributes for nested joins:
+    - used_tables    
+    - not_null_tables
+    - dep_tables.
+    - on_expr_dep_tables
+    The first two attributes are used to test whether an outer join can
+    be substituted for an inner join. The third attribute represents the
+    relation 'to be dependent on' for tables. If table t2 is dependent
+    on table t1, then in any evaluated execution plan table access to
+    table t2 must precede access to table t2. This relation is used also
+    to check whether the query contains  invalid cross-references.
+    The forth attribute is an auxiliary one and is used to calculate
+    dep_tables.
+    As the attribute dep_tables qualifies possibles orders of tables in the
+    execution plan, the dependencies required by the straight join
+    modifiers are reflected in this attribute as well.
+    The function also removes all braces that can be removed from the join
+    expression without changing its meaning.
+
+  NOTES
+    An outer join can be replaced by an inner join if the where condition
+    or the on expression for an embedding nested join contains a conjunctive
+    predicate rejecting null values for some attribute of the inner tables.
+
+    E.g. in the query:    
+      SELECT * FROM t1 LEFT JOIN t2 ON t2.a=t1.a WHERE t2.b < 5
+    the predicate t2.b < 5 rejects nulls.
+    The query is converted first to:
+      SELECT * FROM t1 INNER JOIN t2 ON t2.a=t1.a WHERE t2.b < 5
+    then to the equivalent form:
+      SELECT * FROM t1, t2 ON t2.a=t1.a WHERE t2.b < 5 AND t2.a=t1.a.
+
+    Similarly the following query:
+      SELECT * from t1 LEFT JOIN (t2, t3) ON t2.a=t1.a t3.b=t1.b
+        WHERE t2.c < 5  
+    is converted to:
+      SELECT * FROM t1, (t2, t3) WHERE t2.c < 5 AND t2.a=t1.a t3.b=t1.b 
+
+    One conversion might trigger another:
+      SELECT * FROM t1 LEFT JOIN t2 ON t2.a=t1.a
+                       LEFT JOIN t3 ON t3.b=t2.b
+        WHERE t3 IS NOT NULL =>
+      SELECT * FROM t1 LEFT JOIN t2 ON t2.a=t1.a, t3
+        WHERE t3 IS NOT NULL AND t3.b=t2.b => 
+      SELECT * FROM t1, t2, t3
+        WHERE t3 IS NOT NULL AND t3.b=t2.b AND t2.a=t1.a
+   
+    The function removes all unnecessary braces from the expression
+    produced by the conversions.
+    E.g. SELECT * FROM t1, (t2, t3) WHERE t2.c < 5 AND t2.a=t1.a t3.b=t1.b
+    finally is converted to: 
+      SELECT * FROM t1, t2, t3 WHERE t2.c < 5 AND t2.a=t1.a t3.b=t1.b
+
+    It also will remove braces from the following queries:
+      SELECT * from (t1 LEFT JOIN t2 ON t2.a=t1.a) LEFT JOIN t3 ON t3.b=t2.b
+      SELECT * from (t1, (t2,t3)) WHERE t1.a=t2.a AND t2.b=t3.b.
+
+    The benefit of this simplification procedure is that it might return 
+    a query for which the optimizer can evaluate execution plan with more
+    join orders. With a left join operation the optimizer does not
+    consider any plan where one of the inner tables is before some of outer
+    tables.
+
+  IMPLEMENTATION.
+    The function is implemented by a recursive procedure.  On the recursive
+    ascent all attributes are calculated, all outer joins that can be
+    converted are replaced and then all unnecessary braces are removed.
+    As join list contains join tables in the reverse order sequential
+    elimination of outer joins does not requite extra recursive calls.
+
+  EXAMPLES
+    Here is an example of a join query with invalid cross references:
+      SELECT * FROM t1 LEFT JOIN t2 ON t2.a=t3.a LEFT JOIN ON  t3.b=t1.b 
+     
+  RETURN VALUE
+    The new condition, if success
+    0, otherwise  
+*/
+
 static COND *
-optimize_cond(THD *thd, COND *conds, Item::cond_result *cond_value)
+simplify_joins(JOIN *join, List<TABLE_LIST> *join_list, COND *conds, bool top)
+{
+  TABLE_LIST *table;
+  NESTED_JOIN *nested_join;
+  TABLE_LIST *prev_table= 0;
+  List_iterator<TABLE_LIST> li(*join_list);
+
+  /* 
+    Try to simplify join operations from join_list.
+    The most outer join operation is checked for conversion first. 
+  */
+  while ((table= li++))
+  {
+    table_map used_tables;
+    table_map not_null_tables= (table_map) 0;
+
+    if ((nested_join= table->nested_join))
+    {
+      /* 
+         If the element of join_list is a nested join apply
+         the procedure to its nested join list first.
+      */
+      if (table->on_expr)
+      {
+        /* 
+           If an on expression E is attached to the table, 
+           check all null rejected predicates in this expression.
+           If such a predicate over an attribute belonging to
+           an inner table of an embedded outer join is found,
+           the outer join is converted to an inner join and
+           the corresponding on expression is added to E. 
+	*/ 
+        table->on_expr= simplify_joins(join, &nested_join->join_list,
+                                       table->on_expr, FALSE);
+      }
+      nested_join->used_tables= (table_map) 0;
+      nested_join->not_null_tables=(table_map) 0;
+      conds= simplify_joins(join, &nested_join->join_list, conds, top);
+      used_tables= nested_join->used_tables;
+      not_null_tables= nested_join->not_null_tables;  
+    }
+    else
+    {
+      used_tables= table->table->map;
+      if (conds)
+        not_null_tables= conds->not_null_tables();
+    }
+      
+    if (table->embedding)
+    {
+      table->embedding->nested_join->used_tables|= used_tables;
+      table->embedding->nested_join->not_null_tables|= not_null_tables;
+    }
+
+    if (!table->outer_join || (used_tables & not_null_tables))
+    {
+      /* 
+        For some of the inner tables there are conjunctive predicates
+        that reject nulls => the outer join can be replaced by an inner join.
+      */
+      table->outer_join= 0;
+      if (table->on_expr)
+      {
+        /* Add on expression to the where condition. */
+        if (conds)
+        {
+          conds= and_conds(conds, table->on_expr);
+          conds->fix_fields(join->thd, 0, &conds);
+        }
+        else
+          conds= table->on_expr; 
+        table->on_expr= 0;
+      }
+    }
+    
+    if (!top)
+      continue;
+
+    /* 
+      Only inner tables of non-convertible outer joins
+      remain with on_expr.
+    */ 
+    if (table->on_expr)
+    {
+      table->dep_tables|= table->on_expr->used_tables(); 
+      if (table->embedding)
+      {
+        table->dep_tables&= ~table->embedding->nested_join->used_tables;   
+        /*
+           Embedding table depends on tables used
+           in embedded on expressions. 
+        */
+        table->embedding->on_expr_dep_tables|= table->on_expr->used_tables();
+      }
+      else
+        table->dep_tables&= ~table->table->map;
+    }
+
+    if (prev_table)
+    {
+      /* The order of tables is reverse: prev_table follows table */
+      if (prev_table->straight)
+        prev_table->dep_tables|= used_tables;
+      if (prev_table->on_expr)
+        prev_table->dep_tables|= table->on_expr_dep_tables;
+    }
+    prev_table= table;
+  }
+    
+  /* Flatten nested joins that can be flattened. */
+  li.rewind();
+  while((table= li++))
+  {
+    nested_join= table->nested_join;
+    if (nested_join && !table->on_expr)
+    {
+      TABLE_LIST *tbl;
+      List_iterator<TABLE_LIST> it(nested_join->join_list);
+      while ((tbl= it++))
+      {
+        tbl->embedding= table->embedding;
+        tbl->join_list= table->join_list;
+      }      
+      li.replace(nested_join->join_list);
+    }
+  }
+  return conds;         
+}
+      
+static COND *
+optimize_cond(JOIN *join, COND *conds, Item::cond_result *cond_value)
 {
   DBUG_ENTER("optimize_cond");
+
+  THD *thd= join->thd;
+  SELECT_LEX *select= thd->lex->current_select;
+  if (select->first_cond_optimization)
+  {
+    Item_arena *arena= select->first_cond_optimization ?
+                        thd->current_arena : 0;
+    Item_arena backup;
+    if (arena)
+      thd->set_n_backup_item_arena(arena, &backup);
+
+    if (conds)
+    {
+      DBUG_EXECUTE("where",print_where(conds,"original"););
+      /* eliminate NOT operators */
+      conds= eliminate_not_funcs(thd, conds);
+    }
+
+    /* Convert all outer joins to inner joins if possible */
+    conds= simplify_joins(join, join->join_list, conds, TRUE);
+
+    select->prep_where= conds ? conds->copy_andor_structure(thd) : 0;
+    select->first_cond_optimization= 0;
+    if (arena)
+      thd->restore_backup_item_arena(arena, &backup);
+  }
+
   if (!conds)
   {
     *cond_value= Item::COND_TRUE;
     DBUG_RETURN(conds);
   }
-  DBUG_EXECUTE("where",print_where(conds,"original"););
-  /* eliminate NOT operators */
-  conds= eliminate_not_funcs(thd, conds);
+
   DBUG_EXECUTE("where", print_where(conds, "after negation elimination"););
   /* change field = field to field = const for each found field = const */
   propagate_cond_constants((I_List<COND_CMP> *) 0,conds,conds);
@@ -4633,7 +6078,6 @@ const_expression_in_where(COND *cond, Item *comp_item, Item **const_item)
   return 0;
 }
 
-
 /****************************************************************************
   Create internal temporary table
 ****************************************************************************/
@@ -5675,7 +7119,7 @@ do_select(JOIN *join,List<Item> *fields,TABLE *table,Procedure *procedure)
   if (join->tables == join->const_tables)
   {
     /*
-      HAVING will be chcked after processing aggregate functions,
+      HAVING will be checked after processing aggregate functions,
       But WHERE should checkd here (we alredy have read tables)
     */
     if (!join->conds || join->conds->val_int())
@@ -5751,7 +7195,7 @@ sub_select_cache(JOIN *join,JOIN_TAB *join_tab,bool end_of_records)
   }
   if (join->thd->killed)		// If aborted by user
   {
-    my_error(ER_SERVER_SHUTDOWN,MYF(0)); /* purecov: inspected */
+    join->thd->send_kill_message();
     return -2;				 /* purecov: inspected */
   }
   if (join_tab->use_quick != 2 || test_if_quick_select(join_tab) <= 0)
@@ -5765,20 +7209,148 @@ sub_select_cache(JOIN *join,JOIN_TAB *join_tab,bool end_of_records)
   return sub_select(join,join_tab,end_of_records); /* Use ordinary select */
 }
 
+/*
+  Retrieve records ends with a given beginning from the result of a join  
+
+  SYNPOSIS
+    sub_select()
+    join      pointer to the structure providing all context info for the query
+    join_tab  the first next table of the execution plan to be retrieved
+    end_records  true when we need to perform final steps of retrival   
+
+  DESCRIPTION
+    For a given partial join record consisting of records from the tables 
+    preceding the table join_tab in the execution plan, the function
+    retrieves all matching full records from the result set and
+    send them to the result set stream. 
+
+  NOTES
+    The function effectively implements the  final (n-k) nested loops
+    of nested loops join algorithm, where k is the ordinal number of
+    the join_tab table and n is the total number of tables in the join query.
+    It performs nested loops joins with all conjunctive predicates from
+    the where condition pushed as low to the tables as possible.
+    E.g. for the query
+      SELECT * FROM t1,t2,t3 
+        WHERE t1.a=t2.a AND t2.b=t3.b AND t1.a BETWEEN 5 AND 9
+    the predicate (t1.a BETWEEN 5 AND 9) will be pushed to table t1,
+    given the selected plan prescribes to nest retrievals of the
+    joined tables in the following order: t1,t2,t3.
+    A pushed down predicate are attached to the table which it pushed to,
+    at the field select_cond.
+    When executing a nested loop of level k the function runs through
+    the rows of 'join_tab' and for each row checks the pushed condition
+    attached to the table.
+    If it is false the function moves to the next row of the
+    table. If the condition is true the function recursively executes (n-k-1)
+    remaining embedded nested loops.
+    The situation becomes more complicated if outer joins are involved in
+    the execution plan. In this case the pushed down predicates can be
+    checked only at certain conditions.
+    Suppose for the query
+      SELECT * FROM t1 LEFT JOIN (t2,t3) ON t3.a=t1.a 
+        WHERE t1>2 AND (t2.b>5 OR t2.b IS NULL)
+    the optimizer has chosen a plan with the table order t1,t2,t3.  
+    The predicate P1=t1>2 will be pushed down to the table t1, while the
+    predicate P2=(t2.b>5 OR t2.b IS NULL) will be attached to the table
+    t2. But the second predicate can not be unconditionally tested right
+    after a row from t2 has been read. This can be done only after the
+    first row with t3.a=t1.a has been encountered.
+    Thus, the second predicate P2 is supplied with a guarded value that are
+    stored in the field 'found' of the first inner table for the outer join
+    (table t2). When the first row with t3.a=t1.a for the  current row 
+    of table t1  appears, the value becomes true. For now on the predicate
+    is evaluated immediately after the row of table t2 has been read.
+    When the first row with t3.a=t1.a has been encountered all
+    conditions attached to the inner tables t2,t3 must be evaluated.
+    Only when all of them are true the row is sent to the output stream.
+    If not, the function returns to the lowest nest level that has a false
+    attached condition.
+    The predicates from on expressions are also pushed down. If in the 
+    the above example the on expression were (t3.a=t1.a AND t2.a=t1.a),
+    then t1.a=t2.a would be pushed down to table t2, and without any
+    guard.
+    If after the run through all rows of table t2, the first inner table
+    for the outer join operation, it turns out that no matches are
+    found for the current row of t1, then current row from table t1
+    is complemented by nulls  for t2 and t3. Then the pushed down predicates
+    are checked for the composed row almost in the same way as it had
+    been done for the first row with a match. The only difference is
+    the predicates  from on expressions are not checked. 
+
+  IMPLEMENTATION
+    The function forms output rows for a current partial join of k
+    tables tables recursively.
+    For each partial join record ending with a certain row from
+    join_tab it calls sub_select that builds all possible matching
+    tails from the result set.
+    To be able  check predicates conditionally items of the class
+    Item_func_trig_cond  are employed.
+    An object of  this class is constructed from an item of class COND
+    and a pointer to a guarding boolean variable.
+    When the value of the guard variable is true the value of the object
+    is the same as the value of the predicate, otherwise it's just returns
+    true. 
+    To carry out a return to a nested loop level of join table t the pointer 
+    to t is remembered in the field 'return_tab' of the join structure.
+    Consider the following query:
+      SELECT * FROM t1,
+                    LEFT JOIN
+                    (t2, t3 LEFT JOIN (t4,t5) ON t5.a=t3.a)
+                    ON t4.a=t2.a
+         WHERE (t2.b=5 OR t2.b IS NULL) AND (t4.b=2 OR t4.b IS NULL)
+    Suppose the chosen execution plan dictates the order t1,t2,t3,t4,t5
+    and suppose for a given joined rows from tables t1,t2,t3 there are
+    no rows in the result set yet.
+    When first row from t5 that satisfies the on condition
+    t5.a=t3.a is found, the pushed down predicate t4.b=2 OR t4.b IS NULL
+    becomes 'activated', as well the predicate t4.a=t2.a. But
+    the predicate (t2.b=5 OR t2.b IS NULL) can not be checked until
+    t4.a=t2.a becomes true. 
+    In order not to re-evaluate the predicates that were already evaluated
+    as attached pushed down predicates, a pointer to the the first
+    most inner unmatched table is maintained in join_tab->first_unmatched.
+    Thus, when the first row from t5 with t5.a=t3.a is found
+    this pointer for t5 is changed from t4 to t2.             
+
+  STRUCTURE NOTES
+    join_tab->first_unmatched points always backwards to the first inner
+    table of the embedding nested join, if any.
+
+  RETURN
+    0, if success
+    # of the error, otherwise
+*/
 
 static int
 sub_select(JOIN *join,JOIN_TAB *join_tab,bool end_of_records)
 {
-
   join_tab->table->null_row=0;
   if (end_of_records)
     return (*join_tab->next_select)(join,join_tab+1,end_of_records);
 
-  /* Cache variables for faster loop */
   int error;
-  bool found=0;
-  COND *on_expr=join_tab->on_expr, *select_cond=join_tab->select_cond;
+  JOIN_TAB *first_unmatched;
+  JOIN_TAB *tab;
+  bool found= 0;
+  /* Cache variables for faster loop */
+  COND *select_cond= join_tab->select_cond;
+  JOIN_TAB *first_inner_tab= join_tab->first_inner;
+   
   my_bool *report_error= &(join->thd->net.report_error);
+  join->return_tab= join_tab;
+
+  if (join_tab->last_inner)
+  {
+    /* join_tab is the first inner table for an outer join operation. */
+
+    /* Set initial state of guard variables for this table.*/
+    join_tab->found=0;
+    join_tab->not_null_compl= 1;
+
+    /* Set first_unmatched for the last inner table of this group */
+    join_tab->last_inner->first_unmatched= join_tab; 
+  }
 
   if (!(error=(*join_tab->read_first_record)(join_tab)))
   {
@@ -5792,20 +7364,80 @@ sub_select(JOIN *join,JOIN_TAB *join_tab,bool end_of_records)
     {
       if (join->thd->killed)			// Aborted by user
       {
-	my_error(ER_SERVER_SHUTDOWN,MYF(0));	/* purecov: inspected */
+	join->thd->send_kill_message();
 	return -2;				/* purecov: inspected */
       }
-      join->examined_rows++;
-      join->thd->row_count++;
-      if (!on_expr || on_expr->val_int())
+      if (!select_cond || select_cond->val_int())
       {
-	found=1;
-	if (not_exists_optimize)
-	  break;			// Searching after not null columns
-	if (!select_cond || select_cond->val_int())
-	{
-	  if ((error=(*join_tab->next_select)(join,join_tab+1,0)) < 0)
+        /* 
+          There is no select condition or the attached pushed down
+          condition is true => a match is found.
+	*/
+        bool found= 1;
+	while (join_tab->first_unmatched && found)
+        {
+          /*
+             The while condition is always false if join_tab is not
+             the last inner join table of an outer join operation. 
+	  */ 
+          first_unmatched= join_tab->first_unmatched;
+          /*
+             Mark that a match for current outer table is found.
+             This activates push down conditional predicates attached
+             to the all inner tables of the outer join.
+	  */  
+          first_unmatched->found= 1;
+          for (tab= first_unmatched; tab <= join_tab; tab++)
+          { 
+            /* Check all predicates that has just been activated. */
+            /*
+              Actually all predicates non-guarded by first_unmatched->found
+              will be re-evaluated again. It could be fixed, but, probably,
+              it's not worth doing now.
+	    */ 
+            if (tab->select_cond && !tab->select_cond->val_int())
+            {
+              /* The condition attached to table tab is false */
+              if (tab == join_tab)
+                found= 0;
+              else
+              {
+                /*
+                  Set a return point if rejected predicate is attached 
+                  not to the last table of the current nest level.
+		*/
+                join->return_tab= tab;
+                return 0;
+              }
+            }
+          }
+          /* 
+             Check whether join_tab is not the last inner table
+             for another embedding outer join.
+          */
+          if ((first_unmatched= first_unmatched->first_upper) &&
+              first_unmatched->last_inner != join_tab)
+            first_unmatched= 0;
+          join_tab->first_unmatched= first_unmatched;
+        }
+        
+        /*
+           It was not just a return to lower loop level when one
+           of the newly activated predicates is evaluated as false 
+           (See above join->return_tab= tab).
+	*/             
+        join->examined_rows++;
+        join->thd->row_count++;
+              
+        if (found)
+        {
+          if (not_exists_optimize)
+            break;
+          /* A match from join_tab is found for the current partial join. */
+	  if ((error=(*join_tab->next_select)(join, join_tab+1, 0)) < 0)
 	    return error;
+          if (join->return_tab < join_tab)
+              return 0;
 	  /*
 	    Test if this was a SELECT DISTINCT query on a table that
 	    was not in the field list;  In this case we can abort if
@@ -5815,30 +7447,77 @@ sub_select(JOIN *join,JOIN_TAB *join_tab,bool end_of_records)
 	    return 0;
 	}
 	else
-        {
-          /* 
-            This row failed selection, release lock on it.
-            XXX: There is no table handler in MySQL which makes use of this
-            call. It's kept from Gemini times. A lot of new code was added
-            recently (i. e. subselects) without having it in mind.
-          */
-	  info->file->unlock_row();
-        }
+	  info->file->unlock_row();    
       }
+      else
+      {
+        /* 
+           The condition pushed down to the table join_tab rejects all rows 
+           with the beginning coinciding with the current partial join.
+	*/ 
+        join->examined_rows++;
+        join->thd->row_count++;
+      }
+
     } while (!(error=info->read_record(info)) && !(*report_error));
   }
   if (error > 0 || (*report_error))				// Fatal error
     return -1;
 
-  if (!found && on_expr)
-  {						// OUTER JOIN
-    restore_record(join_tab->table,default_values);		// Make empty record
-    mark_as_null_row(join_tab->table);		// For group by without error
-    if (!select_cond || select_cond->val_int())
-    {
-      if ((error=(*join_tab->next_select)(join,join_tab+1,0)) < 0)
-	return error;				/* purecov: inspected */
+  if (join_tab->last_inner && !join_tab->found)
+  {        
+    /* 
+      The table join_tab is the first inner table of a outer join operation
+      and no matches has been found for the current outer row.
+    */
+    JOIN_TAB *last_inner_tab= join_tab->last_inner;
+    for ( ; join_tab <= last_inner_tab ; join_tab++)
+    { 
+      /* Change the the values of guard predicate variables. */
+      join_tab->found= 1;
+      join_tab->not_null_compl= 0;
+      /* The outer row is complemented by nulls for each inner tables */
+      restore_record(join_tab->table,default_values);  // Make empty record
+      mark_as_null_row(join_tab->table);       // For group by without error
+      select_cond= join_tab->select_cond;
+      /* Check all attached conditions for inner table rows. */
+      if (select_cond && !select_cond->val_int())
+        return 0;
+    }    
+    join_tab--;
+    /* 
+       The row complemented by nulls might be the first row
+       of embedding outer joins. 
+       If so, perform the same actions as in the code 
+       for the first regular outer join row above.
+    */
+    for ( ; ; )
+    {
+      first_unmatched= join_tab->first_unmatched;
+      if ((first_unmatched= first_unmatched->first_upper) &&
+          first_unmatched->last_inner != join_tab)
+        first_unmatched= 0;
+      join_tab->first_unmatched= first_unmatched;
+      if (!first_unmatched)
+        break;
+      first_unmatched->found= 1;
+      for (JOIN_TAB *tab= first_unmatched; tab <= join_tab; tab++)
+      {  
+        if (tab->select_cond && !tab->select_cond->val_int())
+        {
+	  join->return_tab= tab;
+          return 0;
+        }
+      }
     }
+    /*
+      The row complemented by nulls satisfies all conditions
+      attached to inner tables.
+      Send the row complemented by nulls to be joined with the 
+      remaining tables.
+    */     
+    if ((error=(*join_tab->next_select)(join, join_tab+1 ,0)) < 0)
+      return error;
   }
   return 0;
 }
@@ -5880,7 +7559,7 @@ flush_cached_records(JOIN *join,JOIN_TAB *join_tab,bool skip_last)
   {
     if (join->thd->killed)
     {
-      my_error(ER_SERVER_SHUTDOWN,MYF(0)); /* purecov: inspected */
+      join->thd->send_kill_message();
       return -2;				// Aborted by user /* purecov: inspected */
     }
     SQL_SELECT *select=join_tab->select;
@@ -6211,8 +7890,8 @@ test_if_quick_select(JOIN_TAB *tab)
 static int
 join_init_read_record(JOIN_TAB *tab)
 {
-  if (tab->select && tab->select->quick)
-    tab->select->quick->reset();
+  if (tab->select && tab->select->quick && tab->select->quick->reset())
+    return 1;
   init_read_record(&tab->read_record, tab->join->thd, tab->table,
 		   tab->select,1,1);
   return (*tab->read_record.read_record)(&tab->read_record);
@@ -6532,7 +8211,7 @@ end_write(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
 
   if (join->thd->killed)			// Aborted by user
   {
-    my_error(ER_SERVER_SHUTDOWN,MYF(0));	/* purecov: inspected */
+    join->thd->send_kill_message();
     DBUG_RETURN(-2);				/* purecov: inspected */
   }
   if (!end_of_records)
@@ -6600,7 +8279,7 @@ end_update(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
     DBUG_RETURN(0);
   if (join->thd->killed)			// Aborted by user
   {
-    my_error(ER_SERVER_SHUTDOWN,MYF(0));	/* purecov: inspected */
+    join->thd->send_kill_message();
     DBUG_RETURN(-2);				/* purecov: inspected */
   }
 
@@ -6669,7 +8348,7 @@ end_unique_update(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
     DBUG_RETURN(0);
   if (join->thd->killed)			// Aborted by user
   {
-    my_error(ER_SERVER_SHUTDOWN,MYF(0));	/* purecov: inspected */
+    join->thd->send_kill_message();
     DBUG_RETURN(-2);				/* purecov: inspected */
   }
 
@@ -6716,7 +8395,7 @@ end_write_group(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
 
   if (join->thd->killed)
   {						// Aborted by user
-    my_error(ER_SERVER_SHUTDOWN,MYF(0));	/* purecov: inspected */
+    join->thd->send_kill_message();
     DBUG_RETURN(-2);				/* purecov: inspected */
   }
   if (!join->first_record || end_of_records ||
@@ -6979,7 +8658,7 @@ static int test_if_order_by_key(ORDER *order, TABLE *table, uint idx,
 }
 
 
-static uint find_shortest_key(TABLE *table, const key_map *usable_keys)
+uint find_shortest_key(TABLE *table, const key_map *usable_keys)
 {
   uint min_length= (uint) ~0;
   uint best= MAX_KEY;
@@ -7116,6 +8795,17 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit,
   }
   else if (select && select->quick)		// Range found by opt_range
   {
+    int quick_type= select->quick->get_type();
+    /* 
+      assume results are not ordered when index merge is used 
+      TODO: sergeyp: Results of all index merge selects actually are ordered 
+      by clustered PK values.
+    */
+  
+    if (quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_MERGE || 
+        quick_type == QUICK_SELECT_I::QS_TYPE_ROR_UNION || 
+        quick_type == QUICK_SELECT_I::QS_TYPE_ROR_INTERSECT)
+      DBUG_RETURN(0);
     ref_key=	   select->quick->index;
     ref_key_parts= select->quick->used_key_parts;
   }
@@ -7150,6 +8840,10 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit,
 	else
 	{
           select->quick->file->ha_index_end();
+          /* 
+            We have verified above that select->quick is not 
+            index_merge quick select. 
+          */
 	  select->quick->index= new_ref_key;
 	  select->quick->init();
 	}
@@ -7171,10 +8865,15 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit,
 	  */
 	  if (!select->quick->reverse_sorted())
 	  {
-            if (!(table->file->index_flags(ref_key) & HA_READ_PREV))
+            int quick_type= select->quick->get_type();
+            if (!(table->file->index_flags(ref_key) & HA_READ_PREV) ||
+                quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_MERGE ||
+                quick_type == QUICK_SELECT_I::QS_TYPE_ROR_INTERSECT ||
+                quick_type == QUICK_SELECT_I::QS_TYPE_ROR_UNION)
               DBUG_RETURN(0);			// Use filesort
-	    // ORDER BY range_key DESC
-	    QUICK_SELECT_DESC *tmp=new QUICK_SELECT_DESC(select->quick,
+            
+            // ORDER BY range_key DESC
+	    QUICK_SELECT_DESC *tmp=new QUICK_SELECT_DESC((QUICK_RANGE_SELECT*)(select->quick),
 							 used_key_parts);
 	    if (!tmp || tmp->error)
 	    {
@@ -7324,8 +9023,11 @@ create_sort_index(THD *thd, JOIN *join, ORDER *order,
     {
       select->quick=tab->quick;
       tab->quick=0;
-      /* We can only use 'Only index' if quick key is same as ref_key */
-      if (table->key_read && (uint) tab->ref.key != select->quick->index)
+      /* 
+        We can only use 'Only index' if quick key is same as ref_key
+        and in index_merge 'Only index' cannot be used
+      */
+      if (table->key_read && ((uint) tab->ref.key != select->quick->index))
       {
 	table->key_read=0;
 	table->file->extra(HA_EXTRA_NO_KEYREAD);
@@ -7356,6 +9058,8 @@ create_sort_index(THD *thd, JOIN *join, ORDER *order,
     tab->select= 0;
   }
   tab->select_cond=0;
+  tab->last_inner= 0;
+  tab->first_unmatched= 0;
   tab->type=JT_ALL;				// Read with normal read_record
   tab->read_first_record= join_init_read_record;
   tab->join->examined_rows+=examined_rows;
@@ -7513,7 +9217,7 @@ static int remove_dup_with_compare(THD *thd, TABLE *table, Field **first_field,
   {
     if (thd->killed)
     {
-      my_error(ER_SERVER_SHUTDOWN,MYF(0));
+      thd->send_kill_message();
       error=0;
       goto err;
     }
@@ -7625,7 +9329,7 @@ static int remove_dup_with_hash_index(THD *thd, TABLE *table,
   {
     if (thd->killed)
     {
-      my_error(ER_SERVER_SHUTDOWN,MYF(0));
+      thd->send_kill_message();
       error=0;
       goto err;
     }
@@ -9152,6 +10856,7 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order,
   select_result *result=join->result;
   Item *item_null= new Item_null();
   CHARSET_INFO *cs= system_charset_info;
+  int quick_type;
   DBUG_ENTER("select_describe");
   DBUG_PRINT("info", ("Select 0x%lx, type %s, message %s",
 		      (ulong)join->select_lex, join->select_lex->type,
@@ -9243,14 +10948,20 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order,
     {
       JOIN_TAB *tab=join->join_tab+i;
       TABLE *table=tab->table;
-      char buff[512],*buff_ptr=buff;
-      char buff1[512], buff2[512];
+      char buff[512]; 
+      char buff1[512], buff2[512], buff3[512];
+      char keylen_str_buf[64];
+      String extra(buff, sizeof(buff),cs);
       char table_name_buffer[NAME_LEN];
       String tmp1(buff1,sizeof(buff1),cs);
       String tmp2(buff2,sizeof(buff2),cs);
+      String tmp3(buff3,sizeof(buff3),cs);
+      extra.length(0);
       tmp1.length(0);
       tmp2.length(0);
+      tmp3.length(0);
 
+      quick_type= -1;
       item_list.empty();
       /* id */
       item_list.push_back(new Item_uint((uint32)
@@ -9260,7 +10971,15 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order,
 					  strlen(join->select_lex->type),
 					  cs));
       if (tab->type == JT_ALL && tab->select && tab->select->quick)
-	tab->type= JT_RANGE;
+      {
+        quick_type= tab->select->quick->get_type();
+        if ((quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_MERGE) ||
+            (quick_type == QUICK_SELECT_I::QS_TYPE_ROR_INTERSECT) ||
+            (quick_type == QUICK_SELECT_I::QS_TYPE_ROR_UNION))
+          tab->type = JT_INDEX_MERGE;
+        else
+	  tab->type = JT_RANGE;
+      }
       /* table */
       if (table->derived_select_number)
       {
@@ -9279,7 +10998,7 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order,
 					  strlen(join_type_str[tab->type]),
 					  cs));
       uint j;
-      /* possible_keys */
+      /* Build "possible_keys" value and add it to item_list */
       if (!tab->keys.is_clear_all())
       {
         for (j=0 ; j < table->keys ; j++)
@@ -9298,14 +11017,19 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order,
 	item_list.push_back(new Item_string(tmp1.ptr(),tmp1.length(),cs));
       else
 	item_list.push_back(item_null);
-      /* key key_len ref */
+
+      /* Build "key", "key_len", and "ref" values and add them to item_list */
       if (tab->ref.key_parts)
       {
 	KEY *key_info=table->key_info+ tab->ref.key;
+        register uint length;
 	item_list.push_back(new Item_string(key_info->name,
 					    strlen(key_info->name),
 					    system_charset_info));
-	item_list.push_back(new Item_int((int32) tab->ref.key_length));
+        length= longlong2str(tab->ref.key_length, keylen_str_buf, 10) - 
+                keylen_str_buf;
+        item_list.push_back(new Item_string(keylen_str_buf, length,
+                                            system_charset_info));
 	for (store_key **ref=tab->ref.key_copy ; *ref ; ref++)
 	{
 	  if (tmp2.length())
@@ -9318,18 +11042,21 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order,
       else if (tab->type == JT_NEXT)
       {
 	KEY *key_info=table->key_info+ tab->index;
+        register uint length;
 	item_list.push_back(new Item_string(key_info->name,
 					    strlen(key_info->name),cs));
-	item_list.push_back(new Item_int((int32) key_info->key_length));
+        length= longlong2str(key_info->key_length, keylen_str_buf, 10) - 
+                keylen_str_buf;
+        item_list.push_back(new Item_string(keylen_str_buf, 
+                                            length,
+                                            system_charset_info));
 	item_list.push_back(item_null);
       }
       else if (tab->select && tab->select->quick)
       {
-	KEY *key_info=table->key_info+ tab->select->quick->index;
-	item_list.push_back(new Item_string(key_info->name,
-					    strlen(key_info->name),cs));
-	item_list.push_back(new Item_int((int32) tab->select->quick->
-					 max_used_key_length));
+        tab->select->quick->add_keys_and_lengths(&tmp2, &tmp3);
+	item_list.push_back(new Item_string(tmp2.ptr(),tmp2.length(),cs));
+	item_list.push_back(new Item_string(tmp3.ptr(),tmp3.length(),cs));
 	item_list.push_back(item_null);
       }
       else
@@ -9338,52 +11065,68 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order,
 	item_list.push_back(item_null);
 	item_list.push_back(item_null);
       }
-      /* rows */
+      /* Add "rows" field to item_list. */
       item_list.push_back(new Item_int((longlong) (ulonglong)
 				       join->best_positions[i]. records_read,
 				       21));
-      /* extra */
+      /* Build "Extra" field and add it to item_list. */
       my_bool key_read=table->key_read;
       if ((tab->type == JT_NEXT || tab->type == JT_CONST) &&
           table->used_keys.is_set(tab->index))
 	key_read=1;
-
+      if (quick_type == QUICK_SELECT_I::QS_TYPE_ROR_INTERSECT &&
+          !((QUICK_ROR_INTERSECT_SELECT*)tab->select->quick)->need_to_fetch_row)
+        key_read=1;
+        
       if (tab->info)
 	item_list.push_back(new Item_string(tab->info,strlen(tab->info),cs));
       else
       {
+        if (quick_type == QUICK_SELECT_I::QS_TYPE_ROR_UNION || 
+            quick_type == QUICK_SELECT_I::QS_TYPE_ROR_INTERSECT ||
+            quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_MERGE)
+        {
+          extra.append("; Using ");
+          tab->select->quick->add_info_string(&extra);
+        }
 	if (tab->select)
 	{
 	  if (tab->use_quick == 2)
 	  {
             char buf[MAX_KEY/8+1];
-	    sprintf(buff_ptr,"; Range checked for each record (index map: 0x%s)",
-                tab->keys.print(buf));
-	    buff_ptr=strend(buff_ptr);
+            extra.append("; Range checked for each record (index map: 0x");
+            extra.append(tab->keys.print(buf));
+            extra.append(')');
 	  }
 	  else
-	    buff_ptr=strmov(buff_ptr,"; Using where");
+            extra.append("; Using where");
 	}
 	if (key_read)
-	  buff_ptr= strmov(buff_ptr,"; Using index");
+	  extra.append("; Using index");
 	if (table->reginfo.not_exists_optimize)
-	  buff_ptr= strmov(buff_ptr,"; Not exists");
+	  extra.append("; Not exists");
 	if (need_tmp_table)
 	{
 	  need_tmp_table=0;
-	  buff_ptr= strmov(buff_ptr,"; Using temporary");
+	  extra.append("; Using temporary");
 	}
 	if (need_order)
 	{
 	  need_order=0;
-	  buff_ptr= strmov(buff_ptr,"; Using filesort");
+	  extra.append("; Using filesort");
 	}
 	if (distinct & test_all_bits(used_tables,thd->used_tables))
-	  buff_ptr= strmov(buff_ptr,"; Distinct");
-	if (buff_ptr == buff)
-	  buff_ptr+= 2;				// Skip inital "; "
-	item_list.push_back(new Item_string(buff+2,(uint) (buff_ptr - buff)-2,
-					    cs));
+	  extra.append("; Distinct");
+        
+        /* Skip initial "; "*/
+        const char *str= extra.ptr();
+        uint32 len= extra.length();
+        if (len)
+        {
+          str += 2;
+          len -= 2;
+        }
+	item_list.push_back(new Item_string(str, len, cs));
       }
       // For next iteration
       used_tables|=table->map;
@@ -9442,6 +11185,7 @@ int mysql_explain_union(THD *thd, SELECT_LEX_UNIT *unit, select_result *result)
   else
   {
     thd->lex->current_select= first;
+    unit->set_limit(unit->global_parameters, first);
     res= mysql_select(thd, &first->ref_pointer_array,
 			(TABLE_LIST*) first->table_list.first,
 			first->with_wild, first->item_list,
@@ -9509,11 +11253,35 @@ void st_select_lex::print(THD *thd, String *str)
   if (table_list.elements)
   {
     str->append(" from ", 6);
-    Item *next_on= 0;
     for (TABLE_LIST *table= (TABLE_LIST *) table_list.first;
 	 table;
 	 table= table->next)
     {
+      TABLE_LIST *embedded=table;
+      TABLE_LIST *embedding= table->embedding;
+      while (embedding)
+      {
+        TABLE_LIST *next;
+        NESTED_JOIN *nested_join= table->embedding->nested_join;
+        List_iterator_fast<TABLE_LIST> it(nested_join->join_list);
+        TABLE_LIST *tab= it++;
+        while ((next= it++))
+          tab= next;
+        if (tab != embedded)
+          break;
+        str->append('(');
+	if (embedded->outer_join & JOIN_TYPE_RIGHT)
+	  str->append(" right join ", 12);
+	else if (embedded->outer_join & JOIN_TYPE_LEFT)
+	  str->append(" left join ", 11);
+	else if (embedded->straight)
+	  str->append(" straight_join ", 15);
+	else
+	  str->append(" join ", 6);
+        embedded= embedding;
+        embedding= embedding->embedding;
+      }
+
       if (table->derived)
       {
 	str->append('(');
@@ -9533,35 +11301,43 @@ void st_select_lex::print(THD *thd, String *str)
 	}
       }
 
-      if (table->on_expr && ((table->outer_join & JOIN_TYPE_LEFT) ||
-			     !(table->outer_join & JOIN_TYPE_RIGHT)))
-	next_on= table->on_expr;
-
-      if (next_on)
+      if (table->on_expr)
       {
 	str->append(" on(", 4);
-	next_on->print(str);
+	table->on_expr->print(str);
 	str->append(')');
-	next_on= 0;
       }
 
       TABLE_LIST *next_table;
       if ((next_table= table->next))
       {
-	if (table->outer_join & JOIN_TYPE_RIGHT)
-	{
+	if (next_table->outer_join & JOIN_TYPE_RIGHT)
 	  str->append(" right join ", 12);
-	  if (!(table->outer_join & JOIN_TYPE_LEFT) &&
-	      table->on_expr)
-	    next_on= table->on_expr;	    
-	}
-	else if (next_table->straight)
-	  str->append(" straight_join ", 15);
 	else if (next_table->outer_join & JOIN_TYPE_LEFT)
 	  str->append(" left join ", 11);
+	else if (next_table->straight)
+	  str->append(" straight_join ", 15);
 	else
 	  str->append(" join ", 6);
       }
+
+      embedded=table;
+      embedding= table->embedding;
+      while (embedding)
+      {
+        NESTED_JOIN *nested_join= table->embedding->nested_join;
+        if (nested_join->join_list.head() != embedded)
+          break;
+        str->append(')');
+        if (embedding->on_expr)
+        {
+	  str->append(" on(", 4);
+	  embedding->on_expr->print(str);
+	  str->append(')');
+        }
+        embedded= embedding;
+        embedding= embedding->embedding;
+      }
     }
   }
 
diff --git a/sql/sql_select.h b/sql/sql_select.h
index 8aca43484d2..8ffe50e6db2 100644
--- a/sql/sql_select.h
+++ b/sql/sql_select.h
@@ -75,10 +75,9 @@ typedef struct st_join_cache {
 /*
 ** The structs which holds the join connections and join states
 */
-
 enum join_type { JT_UNKNOWN,JT_SYSTEM,JT_CONST,JT_EQ_REF,JT_REF,JT_MAYBE_REF,
 		 JT_ALL, JT_RANGE, JT_NEXT, JT_FT, JT_REF_OR_NULL,
-		 JT_UNIQUE_SUBQUERY, JT_INDEX_SUBQUERY};
+		 JT_UNIQUE_SUBQUERY, JT_INDEX_SUBQUERY, JT_INDEX_MERGE};
 
 class JOIN;
 
@@ -87,8 +86,14 @@ typedef struct st_join_table {
   KEYUSE	*keyuse;			/* pointer to first used key */
   SQL_SELECT	*select;
   COND		*select_cond;
-  QUICK_SELECT	*quick;
-  Item		*on_expr;
+  QUICK_SELECT_I *quick;
+  Item		*on_expr;       /* associated on expression                  */
+  st_join_table *first_inner;   /* first inner table for including outerjoin */
+  bool           found;         /* true after all matches or null complement */
+  bool           not_null_compl;/* true before null complement is added      */
+  st_join_table *last_inner;    /* last table table for embedding outer join */
+  st_join_table *first_upper;  /* first inner table for embedding outer join */
+  st_join_table *first_unmatched; /* used for optimization purposes only     */
   const char	*info;
   int		(*read_first_record)(struct st_join_table *tab);
   int		(*next_select)(JOIN *,struct st_join_table *,bool);
@@ -116,6 +121,7 @@ typedef struct st_join_table {
 typedef struct st_position			/* Used in find_best */
 {
   double records_read;
+  double read_time;
   JOIN_TAB *table;
   KEYUSE *key;
 } POSITION;
@@ -133,8 +139,9 @@ typedef struct st_rollup
 class JOIN :public Sql_alloc
 {
  public:
-  JOIN_TAB *join_tab,**best_ref,**map2table;
-  JOIN_TAB *join_tab_save; //saved join_tab for subquery reexecution
+  JOIN_TAB *join_tab,**best_ref;
+  JOIN_TAB **map2table;    // mapping between table indexes and JOIN_TABs
+  JOIN_TAB *join_tab_save; // saved join_tab for subquery reexecution
   TABLE    **table,**all_tables,*sort_by_table;
   uint	   tables,const_tables;
   uint	   send_group_parts;
@@ -199,8 +206,10 @@ class JOIN :public Sql_alloc
   ORDER *order, *group_list, *proc_param; //hold parameters of mysql_select
   COND *conds;                            // ---"---
   Item *conds_history;                    // store WHERE for explain
-  TABLE_LIST *tables_list;           //hold 'tables' parameter of mysql_selec
+  TABLE_LIST *tables_list;           //hold 'tables' parameter of mysql_select
+  List<TABLE_LIST> *join_list;       // list of joined tables in reverse order
   SQL_SELECT *select;                //created in optimisation phase
+  JOIN_TAB *return_tab;              //used only for outer joins
   Item **ref_pointer_array; //used pointer reference for this select
   // Copy of above to be used with different lists
   Item **items0, **items1, **items2, **items3, **current_ref_pointer_array;
@@ -224,6 +233,7 @@ class JOIN :public Sql_alloc
     table= 0;
     tables= 0;
     const_tables= 0;
+    join_list= 0;
     sort_and_group= 0;
     first_record= 0;
     do_send_rows= 1;
@@ -254,6 +264,7 @@ class JOIN :public Sql_alloc
     fields_list= fields_arg;
     error= 0;
     select= 0;
+    return_tab= 0;
     ref_pointer_array= items0= items1= items2= items3= 0;
     ref_pointer_array_size= 0;
     zero_result_cause= 0;
@@ -332,10 +343,14 @@ void copy_fields(TMP_TABLE_PARAM *param);
 void copy_funcs(Item **func_ptr);
 bool create_myisam_from_heap(THD *thd, TABLE *table, TMP_TABLE_PARAM *param,
 			     int error, bool ignore_last_dupp_error);
+uint find_shortest_key(TABLE *table, const key_map *usable_keys);
 
 /* functions from opt_sum.cc */
 int opt_sum_query(TABLE_LIST *tables, List<Item> &all_fields,COND *conds);
 
+/* from sql_delete.cc, used by opt_range.cc */
+extern "C" int refpos_order_cmp(void* arg, const void *a,const void *b);
+
 /* class to copying an field/item to a key struct */
 
 class store_key :public Sql_alloc
diff --git a/sql/sql_show.cc b/sql/sql_show.cc
index 823552be3a4..3e99704e80c 100644
--- a/sql/sql_show.cc
+++ b/sql/sql_show.cc
@@ -1561,7 +1561,7 @@ void mysqld_list_processes(THD *thd,const char *user, bool verbose)
         thd_info->command=(int) tmp->command;
         if ((mysys_var= tmp->mysys_var))
           pthread_mutex_lock(&mysys_var->mutex);
-        thd_info->proc_info= (char*) (tmp->killed ? "Killed" : 0);
+        thd_info->proc_info= (char*) (tmp->killed == THD::KILL_CONNECTION? "Killed" : 0);
 #ifndef EMBEDDED_LIBRARY
         thd_info->state_info= (char*) (tmp->locked ? "Locked" :
                                        tmp->net.reading_or_writing ?
@@ -1842,6 +1842,11 @@ int mysqld_show(THD *thd, const char *wild, show_var_st *variables,
         end= strend(pos);
         break;
       }
+      case SHOW_DOUBLE:
+      {
+        end= buff + sprintf(buff, "%f", *(double*) value);
+        break;
+      }
 #ifdef HAVE_OPENSSL
 	/* First group - functions relying on CTX */
       case SHOW_SSL_CTX_SESS_ACCEPT:
diff --git a/sql/sql_sort.h b/sql/sql_sort.h
index 9f95ffa4884..1831c4a2f3d 100644
--- a/sql/sql_sort.h
+++ b/sql/sql_sort.h
@@ -78,3 +78,4 @@ int merge_buffers(SORTPARAM *param,IO_CACHE *from_file,
 		  IO_CACHE *to_file, uchar *sort_buffer,
 		  BUFFPEK *lastbuff,BUFFPEK *Fb,
 		  BUFFPEK *Tb,int flag);
+void reuse_freed_buff(QUEUE *queue, BUFFPEK *reuse, uint key_length);
diff --git a/sql/sql_string.cc b/sql/sql_string.cc
index 1ec0faafa8f..ba9431f27c4 100644
--- a/sql/sql_string.cc
+++ b/sql/sql_string.cc
@@ -671,6 +671,19 @@ void String::qs_append(double *d)
   qs_append(ld);
 }
 
+void String::qs_append(int i)
+{
+  char *buff = Ptr + str_length;
+  sprintf(buff,"%d", i);
+  str_length += strlen(buff);
+}
+
+void String::qs_append(uint i)
+{
+  char *buff = Ptr + str_length;
+  sprintf(buff,"%u", i);
+  str_length += strlen(buff);
+}
 
 /*
   Compare strings according to collation, without end space.
diff --git a/sql/sql_string.h b/sql/sql_string.h
index 01329c45a98..79365b7481b 100644
--- a/sql/sql_string.h
+++ b/sql/sql_string.h
@@ -273,6 +273,8 @@ public:
      Ptr[str_length]= c;
      str_length++;
   }
+  void qs_append(int i);
+  void qs_append(uint i);
 
   /* Inline (general) functions used by the protocol functions */
 
diff --git a/sql/sql_table.cc b/sql/sql_table.cc
index a560bd40028..48bd9e572b0 100644
--- a/sql/sql_table.cc
+++ b/sql/sql_table.cc
@@ -267,17 +267,13 @@ int mysql_rm_table_part2(THD *thd, TABLE_LIST *tables, bool if_exists,
   if (some_tables_deleted || tmp_table_deleted || !error)
   {
     query_cache_invalidate3(thd, tables, 0);
-    if (!dont_log_query)
+    if (!dont_log_query && mysql_bin_log.is_open())
     {
-      mysql_update_log.write(thd, thd->query,thd->query_length);
-      if (mysql_bin_log.is_open())
-      {
-        if (!error)
-          thd->clear_error();
-	Query_log_event qinfo(thd, thd->query, thd->query_length,
-			      tmp_table_deleted && !some_tables_deleted);
-	mysql_bin_log.write(&qinfo);
-      }
+      if (!error)
+        thd->clear_error();
+      Query_log_event qinfo(thd, thd->query, thd->query_length,
+                            tmp_table_deleted && !some_tables_deleted);
+      mysql_bin_log.write(&qinfo);
     }
   }
 
@@ -1260,18 +1256,13 @@ int mysql_create_table(THD *thd,const char *db, const char *table_name,
     }
     thd->tmp_table_used= 1;
   }
-  if (!tmp_table && !no_log)
+  if (!tmp_table && !no_log && mysql_bin_log.is_open())
   {
-    // Must be written before unlock
-    mysql_update_log.write(thd,thd->query, thd->query_length);
-    if (mysql_bin_log.is_open())
-    {
-      thd->clear_error();
-      Query_log_event qinfo(thd, thd->query, thd->query_length,
-			    test(create_info->options &
-				 HA_LEX_CREATE_TMP_TABLE));
-      mysql_bin_log.write(&qinfo);
-    }
+    thd->clear_error();
+    Query_log_event qinfo(thd, thd->query, thd->query_length,
+                          test(create_info->options &
+                               HA_LEX_CREATE_TMP_TABLE));
+    mysql_bin_log.write(&qinfo);
   }
   error=0;
 end:
@@ -2168,7 +2159,6 @@ int mysql_create_like_table(THD* thd, TABLE_LIST* table,
   }
 
   // Must be written before unlock
-  mysql_update_log.write(thd,thd->query, thd->query_length);
   if (mysql_bin_log.is_open())
   {
     thd->clear_error();
@@ -2279,7 +2269,6 @@ int mysql_discard_or_import_tablespace(THD *thd,
     error=1;
   if (error)
     goto err;
-  mysql_update_log.write(thd, thd->query,thd->query_length);
   if (mysql_bin_log.is_open())
   {
     Query_log_event qinfo(thd, thd->query, thd->query_length, 0);
@@ -2664,7 +2653,6 @@ int mysql_alter_table(THD *thd,char *new_db, char *new_name,
     }
     if (!error)
     {
-      mysql_update_log.write(thd, thd->query, thd->query_length);
       if (mysql_bin_log.is_open())
       {
 	thd->clear_error();
@@ -3060,7 +3048,6 @@ int mysql_alter_table(THD *thd,char *new_db, char *new_name,
       my_free((gptr) new_table,MYF(0));
       goto err;
     }
-    mysql_update_log.write(thd, thd->query,thd->query_length);
     if (mysql_bin_log.is_open())
     {
       thd->clear_error();
@@ -3195,7 +3182,6 @@ int mysql_alter_table(THD *thd,char *new_db, char *new_name,
     goto err;
   }
   thd->proc_info="end";
-  mysql_update_log.write(thd, thd->query,thd->query_length);
   if (mysql_bin_log.is_open())
   {
     thd->clear_error();
@@ -3332,7 +3318,7 @@ copy_data_between_tables(TABLE *from,TABLE *to,
   {
     if (thd->killed)
     {
-      my_error(ER_SERVER_SHUTDOWN,MYF(0));
+      thd->send_kill_message();
       error= 1;
       break;
     }
diff --git a/sql/sql_test.cc b/sql/sql_test.cc
index d992c93f8fc..5c467402497 100644
--- a/sql/sql_test.cc
+++ b/sql/sql_test.cc
@@ -181,9 +181,10 @@ TEST_join(JOIN *join)
 		"                  quick select checked for each record (keys: %s)\n",
 		tab->select->quick_keys.print(buf));
       else if (tab->select->quick)
-	fprintf(DBUG_FILE,"                  quick select used on key %s, length: %d\n",
-		form->key_info[tab->select->quick->index].name,
-		tab->select->quick->max_used_key_length);
+      {
+	fprintf(DBUG_FILE, "                  quick select used:\n");
+        tab->select->quick->dbug_dump(18, false);
+      }
       else
 	VOID(fputs("                  select used\n",DBUG_FILE));
     }
@@ -202,6 +203,104 @@ TEST_join(JOIN *join)
   DBUG_VOID_RETURN;
 }
 
+
+/* 
+  Print the current state during query optimization.
+
+  SYNOPSIS
+    print_plan()
+    join         pointer to the structure providing all context info for
+                 the query
+    read_time    the cost of the best partial plan
+    record_count estimate for the number of records returned by the best
+                 partial plan
+    idx          length of the partial QEP in 'join->positions';
+                 also an index in the array 'join->best_ref';
+    info         comment string to appear above the printout
+
+  DESCRIPTION
+    This function prints to the log file DBUG_FILE the members of 'join' that
+    are used during query optimization (join->positions, join->best_positions,
+    and join->best_ref) and few other related variables (read_time,
+    record_count).
+    Useful to trace query optimizer functions.
+
+  RETURN
+    None
+*/
+
+void
+print_plan(JOIN* join, double read_time, double record_count,
+           uint idx, const char *info)
+{
+  uint i;
+  POSITION pos;
+  JOIN_TAB *join_table;
+  JOIN_TAB **plan_nodes;
+  TABLE*   table;
+
+  if (info == 0)
+    info= "";
+
+  DBUG_LOCK_FILE;
+  if (join->best_read == DBL_MAX)
+  {
+    fprintf(DBUG_FILE,"%s; idx:%u, best: DBL_MAX, current:%g\n",
+            info, idx, read_time);
+  }
+  else
+  {
+    fprintf(DBUG_FILE,"%s; idx: %u, best: %g, current: %g\n",
+            info, idx, join->best_read, read_time);
+  }
+
+  /* Print the tables in JOIN->positions */
+  fputs("     POSITIONS: ", DBUG_FILE);
+  for (i= 0; i < idx ; i++)
+  {
+    pos = join->positions[i];
+    table= pos.table->table;
+    if (table)
+      fputs(table->real_name, DBUG_FILE);
+    fputc(' ', DBUG_FILE);
+  }
+  fputc('\n', DBUG_FILE);
+
+  /*
+    Print the tables in JOIN->best_positions only if at least one complete plan
+    has been found. An indicator for this is the value of 'join->best_read'.
+  */
+  fputs("BEST_POSITIONS: ", DBUG_FILE);
+  if (join->best_read < DBL_MAX)
+  {
+    for (i= 0; i < idx ; i++)
+    {
+      pos= join->best_positions[i];
+      table= pos.table->table;
+      if (table)
+        fputs(table->real_name, DBUG_FILE);
+      fputc(' ', DBUG_FILE);
+    }
+  }
+  fputc('\n', DBUG_FILE);
+
+  /* Print the tables in JOIN->best_ref */
+  fputs("      BEST_REF: ", DBUG_FILE);
+  for (plan_nodes= join->best_ref ; *plan_nodes ; plan_nodes++)
+  {
+    join_table= (*plan_nodes);
+    fputs(join_table->table->real_name, DBUG_FILE);
+    fprintf(DBUG_FILE, "(%lu,%lu,%lu)",
+            (ulong) join_table->found_records,
+            (ulong) join_table->records,
+            (ulong) join_table->read_time);
+    fputc(' ', DBUG_FILE);
+  }
+  fputc('\n', DBUG_FILE);
+
+  DBUG_UNLOCK_FILE;
+}
+
 #endif
 
 typedef struct st_debug_lock
diff --git a/sql/sql_union.cc b/sql/sql_union.cc
index 70c05489f82..2b7c0d8f0c0 100644
--- a/sql/sql_union.cc
+++ b/sql/sql_union.cc
@@ -211,11 +211,8 @@ int st_select_lex_unit::prepare(THD *thd_arg, select_result *sel_result,
       goto err;
 
     thd_arg->lex->current_select= sl;
-    offset_limit_cnt= sl->offset_limit;
-    select_limit_cnt= sl->select_limit+sl->offset_limit;
-    if (select_limit_cnt < sl->select_limit)
-      select_limit_cnt= HA_POS_ERROR;		// no limit
-    if (select_limit_cnt == HA_POS_ERROR || sl->braces)
+    set_limit(sl, sl);
+    if (sl->braces)
       sl->options&= ~OPTION_FOUND_ROWS;
     
     res= join->prepare(&sl->ref_pointer_array,
@@ -287,27 +284,32 @@ int st_select_lex_unit::prepare(THD *thd_arg, select_result *sel_result,
     thd_arg->lex->current_select= lex_select_save;
     if (!item_list.elements)
     {
-      Statement *stmt= thd->current_statement;
-      Statement backup;
-      if (stmt)
-	thd->set_n_backup_item_arena(stmt, &backup);
+      Item_arena *arena= thd->current_arena;
+      Item_arena backup;
+      if (arena)
+	thd->set_n_backup_item_arena(arena, &backup);
       Field **field;
       for (field= table->field; *field; field++)
       {
 	Item_field *item= new Item_field(*field);
 	if (!item || item_list.push_back(item))
 	{
-	  if (stmt)
-	    thd->restore_backup_item_arena(stmt, &backup);
+	  if (arena)
+	    thd->restore_backup_item_arena(arena, &backup);
 	  DBUG_RETURN(-1);
 	}
       }
-      if (stmt)
+      if (arena)
       {
-	thd->restore_backup_item_arena(stmt, &backup);
+	thd->restore_backup_item_arena(arena, &backup);
 
 	/* prepare fake select to initialize it correctly */
 	ulong options_tmp= init_prepare_fake_select_lex(thd);
+        /*
+          it should be done only once (because item_list builds only onece
+          per statement)
+        */
+        DBUG_ASSERT(fake_select_lex->join == 0);
 	if (!(fake_select_lex->join= new JOIN(thd, item_list, thd->options,
 					      result)))
 	{
@@ -469,8 +471,8 @@ int st_select_lex_unit::exec()
 	  allocate JOIN for fake select only once (prevent
 	  mysql_select automatic allocation)
 	*/
-	if (!(fake_select_lex->join= new JOIN(thd, item_list, thd->options,
-					      result)))
+	if (!(fake_select_lex->join= new JOIN(thd, item_list,
+					      fake_select_lex->options, result)))
 	{
 	  fake_select_lex->table_list.empty();
 	  DBUG_RETURN(-1);
@@ -485,12 +487,14 @@ int st_select_lex_unit::exec()
       else
       {
 	JOIN_TAB *tab,*end;
-	for (tab=join->join_tab,end=tab+join->tables ; tab != end ; tab++)
+	for (tab=join->join_tab, end=tab+join->tables ;
+	     tab && tab != end ;
+	     tab++)
 	{
 	  delete tab->select;
 	  delete tab->quick;
 	}
-	join->init(thd, item_list, thd->options, result);
+	join->init(thd, item_list, fake_select_lex->options, result);
       }
       res= mysql_select(thd, &fake_select_lex->ref_pointer_array,
 			&result_table_list,
@@ -498,7 +502,7 @@ int st_select_lex_unit::exec()
 			global_parameters->order_list.elements,
 			(ORDER*)global_parameters->order_list.first,
 			(ORDER*) NULL, NULL, (ORDER*) NULL,
-			options_tmp | SELECT_NO_UNLOCK,
+			fake_select_lex->options | SELECT_NO_UNLOCK,
 			result, this, fake_select_lex);
 
       fake_select_lex->table_list.empty();
diff --git a/sql/sql_update.cc b/sql/sql_update.cc
index 18394d007ed..e0281c813f1 100644
--- a/sql/sql_update.cc
+++ b/sql/sql_update.cc
@@ -156,11 +156,13 @@ int mysql_update(THD *thd,
   }
   init_ftfuncs(thd, &thd->lex->select_lex, 1);
   /* Check if we are modifying a key that we are used to search with */
+  
   if (select && select->quick)
+  {
+    used_index= select->quick->index;
     used_key_is_modified= (!select->quick->unique_key_range() &&
-			   check_if_key_used(table,
-					     (used_index=select->quick->index),
-					     fields));
+                          select->quick->check_if_keys_used(&fields));
+  }
   else if ((used_index=table->file->key_used_on_scan) < MAX_KEY)
     used_key_is_modified=check_if_key_used(table, used_index, fields);
   else
@@ -172,7 +174,7 @@ int mysql_update(THD *thd,
       matching rows before updating the table!
     */
     table->file->extra(HA_EXTRA_RETRIEVE_ALL_COLS);
-    if (old_used_keys.is_set(used_index))
+    if ( (used_index != MAX_KEY) && old_used_keys.is_set(used_index))
     {
       table->key_read=1;
       table->file->extra(HA_EXTRA_KEYREAD);
@@ -218,8 +220,12 @@ int mysql_update(THD *thd,
       if (open_cached_file(&tempfile, mysql_tmpdir,TEMP_PREFIX,
 			   DISK_BUFFER_SIZE, MYF(MY_WME)))
 	goto err;
-
+      
+      /* If quick select is used, initialize it before retrieving rows. */
+      if (select && select->quick && select->quick->reset())
+        goto err;
       init_read_record(&info,thd,table,select,0,1);
+     
       thd->proc_info="Searching rows for update";
       uint tmp_limit= limit;
 
@@ -274,6 +280,9 @@ int mysql_update(THD *thd,
 
   if (handle_duplicates == DUP_IGNORE)
     table->file->extra(HA_EXTRA_IGNORE_DUP_KEY);
+  
+  if (select && select->quick && select->quick->reset())
+        goto err;
   init_read_record(&info,thd,table,select,0,1);
 
   updated= found= 0;
@@ -328,13 +337,14 @@ int mysql_update(THD *thd,
     This must be before binlog writing and ha_autocommit_...
   */
   if (updated)
+  {
     query_cache_invalidate3(thd, table_list, 1);
+  }
 
   transactional_table= table->file->has_transactions();
   log_delayed= (transactional_table || table->tmp_table);
   if ((updated || (error < 0)) && (error <= 0 || !transactional_table))
   {
-    mysql_update_log.write(thd,thd->query,thd->query_length);
     if (mysql_bin_log.is_open())
     {
       if (error <= 0)
@@ -361,14 +371,15 @@ int mysql_update(THD *thd,
 
   free_underlaid_joins(thd, &thd->lex->select_lex);
   if (error >= 0)
-    send_error(thd,thd->killed ? ER_SERVER_SHUTDOWN : 0); /* purecov: inspected */
+    send_error(thd,thd->killed_errno()); /* purecov: inspected */
   else
   {
     char buff[80];
     sprintf(buff, ER(ER_UPDATE_INFO), (ulong) found, (ulong) updated,
 	    (ulong) thd->cuted_fields);
-    send_ok(thd,
-	    (thd->client_capabilities & CLIENT_FOUND_ROWS) ? found : updated,
+    thd->row_count_func=
+      (thd->client_capabilities & CLIENT_FOUND_ROWS) ? found : updated;
+    send_ok(thd, thd->row_count_func,
 	    thd->insert_id_used ? thd->insert_id() : 0L,buff);
     DBUG_PRINT("info",("%d records updated",updated));
   }
@@ -411,6 +422,7 @@ int mysql_prepare_update(THD *thd, TABLE_LIST *table_list,
   TABLE *table= table_list->table;
   TABLE_LIST tables;
   List<Item> all_fields;
+  SELECT_LEX *select_lex= &thd->lex->select_lex;
   DBUG_ENTER("mysql_prepare_update");
 
 #ifndef NO_EMBEDDED_ACCESS_CHECKS
@@ -423,10 +435,10 @@ int mysql_prepare_update(THD *thd, TABLE_LIST *table_list,
 
   if (setup_tables(update_table_list) ||
       setup_conds(thd, update_table_list, conds) ||
-      thd->lex->select_lex.setup_ref_array(thd, order_num) ||
-      setup_order(thd, thd->lex->select_lex.ref_pointer_array,
+      select_lex->setup_ref_array(thd, order_num) ||
+      setup_order(thd, select_lex->ref_pointer_array,
 		  update_table_list, all_fields, all_fields, order) ||
-      setup_ftfuncs(&thd->lex->select_lex))
+      setup_ftfuncs(select_lex))
     DBUG_RETURN(-1);
 
   /* Check that we are not using table that we are updating in a sub select */
@@ -436,6 +448,11 @@ int mysql_prepare_update(THD *thd, TABLE_LIST *table_list,
     my_error(ER_UPDATE_TABLE_USED, MYF(0), table_list->real_name);
     DBUG_RETURN(-1);
   }
+  if (thd->current_arena && select_lex->first_execution)
+  {
+    select_lex->prep_where= select_lex->where;
+    select_lex->first_execution= 0;
+  }
 
   DBUG_RETURN(0);
 }
@@ -813,8 +830,7 @@ static bool safe_update_on_fly(JOIN_TAB *join_tab, List<Item> *fields)
   case JT_ALL:
     /* If range search on index */
     if (join_tab->quick)
-      return !check_if_key_used(table, join_tab->quick->index,
-				*fields);
+      return !join_tab->quick->check_if_keys_used(fields);
     /* If scanning in clustered key */
     if ((table->file->table_flags() & HA_PRIMARY_KEY_IN_READ_INDEX) &&
 	table->primary_key < MAX_KEY)
@@ -1102,7 +1118,6 @@ bool multi_update::send_eof()
 
   if (updated && (local_error <= 0 || !trans_safe))
   {
-    mysql_update_log.write(thd,thd->query,thd->query_length);
     if (mysql_bin_log.is_open())
     {
       if (local_error <= 0)
@@ -1134,8 +1149,9 @@ bool multi_update::send_eof()
 
   sprintf(buff, ER(ER_UPDATE_INFO), (ulong) found, (ulong) updated,
 	  (ulong) thd->cuted_fields);
-  ::send_ok(thd,
-	    (thd->client_capabilities & CLIENT_FOUND_ROWS) ? found : updated,
+  thd->row_count_func=
+    (thd->client_capabilities & CLIENT_FOUND_ROWS) ? found : updated;
+  ::send_ok(thd, thd->row_count_func,
 	    thd->insert_id_used ? thd->insert_id() : 0L,buff);
   return 0;
 }
diff --git a/sql/sql_yacc.yy b/sql/sql_yacc.yy
index 6fa69f050b4..cf17d38566d 100644
--- a/sql/sql_yacc.yy
+++ b/sql/sql_yacc.yy
@@ -35,6 +35,10 @@
 #include "sql_acl.h"
 #include "lex_symbol.h"
 #include "item_create.h"
+#include "sp_head.h"
+#include "sp_pcontext.h"
+#include "sp_rcontext.h"
+#include "sp.h"
 #include <myisam.h>
 #include <myisammrg.h>
 
@@ -83,10 +87,14 @@ inline Item *or_or_concat(THD *thd, Item* A, Item* B)
   enum Item_udftype udf_type;
   CHARSET_INFO *charset;
   thr_lock_type lock_type;
-  interval_type interval;
+  interval_type interval, interval_time_st;
   timestamp_type date_time_type;
   st_select_lex *select_lex;
   chooser_compare_func_creator boolfunc2creator;
+  struct sp_cond_type *spcondtype;
+  struct { int vars, conds, hndlrs, curs; } spblock;
+  sp_name *spname;
+  struct st_lex *lex;
 }
 
 %{
@@ -127,6 +135,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize);
 %token	AVG_SYM
 %token	BEGIN_SYM
 %token	BINLOG_SYM
+%token  CALL_SYM
 %token	CHANGE
 %token	CLIENT_SYM
 %token	COMMENT_SYM
@@ -135,7 +144,9 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize);
 %token	CREATE
 %token	CROSS
 %token  CUBE_SYM
+%token  DEFINER_SYM
 %token	DELETE_SYM
+%token  DETERMINISTIC_SYM
 %token	DUAL_SYM
 %token	DO_SYM
 %token	DROP
@@ -165,6 +176,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize);
 %token	SELECT_SYM
 %token	SHOW
 %token	SLAVE
+%token  SQL_SYM
 %token	SQL_THREAD
 %token	START_SYM
 %token	STD_SYM
@@ -208,11 +220,15 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize);
 %token	COLUMNS
 %token	COLUMN_SYM
 %token	CONCURRENT
+%token  CONDITION_SYM
+%token	CONNECTION_SYM
 %token	CONSTRAINT
+%token  CONTINUE_SYM
 %token	CONVERT_SYM
 %token  CURRENT_USER
 %token	DATABASES
 %token	DATA_SYM
+%token  DECLARE_SYM
 %token	DEFAULT
 %token	DELAYED_SYM
 %token	DELAY_KEY_WRITE_SYM
@@ -230,14 +246,17 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize);
 %token	DIRECTORY_SYM
 %token	ESCAPE_SYM
 %token	EXISTS
+%token  EXIT_SYM
 %token	EXTENDED_SYM
 %token	FALSE_SYM
+%token  FETCH_SYM
 %token	FILE_SYM
 %token	FIRST_SYM
 %token	FIXED_SYM
 %token	FLOAT_NUM
 %token	FORCE_SYM
 %token	FOREIGN
+%token  FOUND_SYM
 %token	FROM
 %token	FULL
 %token	FULLTEXT_SYM
@@ -260,8 +279,10 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize);
 %token	INFILE
 %token	INNER_SYM
 %token	INNOBASE_SYM
+%token  INOUT_SYM
 %token	INTO
 %token	IN_SYM
+%token  INVOKER_SYM
 %token	ISOLATION
 %token	JOIN_SYM
 %token	KEYS
@@ -271,9 +292,11 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize);
 %token	LEAVES
 %token	LEVEL_SYM
 %token	LEX_HOSTNAME
+%token  LANGUAGE_SYM
 %token	LIKE
 %token	LINES
 %token	LOCAL_SYM
+%token  LOCATOR_SYM
 %token	LOG_SYM
 %token	LOGS_SYM
 %token	LONG_NUM
@@ -303,6 +326,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize);
 %token	MEDIUM_SYM
 %token	MIN_ROWS
 %token	NAMES_SYM
+%token	NAME_SYM
 %token	NATIONAL_SYM
 %token	NATURAL
 %token  NDBCLUSTER_SYM
@@ -323,6 +347,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize);
 %token	OR_SYM
 %token	OR_OR_CONCAT
 %token	ORDER_SYM
+%token  OUT_SYM
 %token	OUTER
 %token	OUTFILE
 %token	DUMPFILE
@@ -354,6 +379,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize);
 %token	ROW_FORMAT_SYM
 %token	ROW_SYM
 %token	RTREE_SYM
+%token  SECURITY_SYM
 %token	SET
 %token  SEPARATOR_SYM
 %token	SERIAL_SYM
@@ -362,6 +388,10 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize);
 %token	SIMPLE_SYM
 %token	SHUTDOWN
 %token	SPATIAL_SYM
+%token  SPECIFIC_SYM
+%token  SQLEXCEPTION_SYM
+%token  SQLSTATE_SYM
+%token  SQLWARNING_SYM
 %token  SSL_SYM
 %token	STARTING
 %token	STATUS_SYM
@@ -384,11 +414,13 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize);
 %token	FUNC_ARG1
 %token	FUNC_ARG2
 %token	FUNC_ARG3
-%token	UDF_RETURNS_SYM
+%token	RETURN_SYM
+%token	RETURNS_SYM
 %token	UDF_SONAME_SYM
-%token	UDF_SYM
+%token	FUNCTION_SYM
 %token	UNCOMMITTED_SYM
 %token	UNDERSCORE_CHARSET
+%token  UNDO_SYM
 %token	UNICODE_SYM
 %token	UNION_SYM
 %token	UNIQUE_SYM
@@ -406,6 +438,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize);
 %token	X509_SYM
 %token	XOR
 %token	COMPRESSED_SYM
+%token  ROW_COUNT_SYM
 
 %token  ERRORS
 %token  WARNINGS
@@ -442,6 +475,8 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize);
 %token	STRING_SYM
 %token	TEXT_SYM
 %token	TIMESTAMP
+%token	TIMESTAMP_ADD
+%token	TIMESTAMP_DIFF
 %token	TIME_SYM
 %token	TINYBLOB
 %token	TINYINT
@@ -487,6 +522,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize);
 %token	FIELD_FUNC
 %token	FORMAT_SYM
 %token	FOR_SYM
+%token  FRAC_SECOND_SYM
 %token	FROM_UNIXTIME
 %token	GEOMCOLLFROMTEXT
 %token	GEOMFROMTEXT
@@ -532,6 +568,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize);
 %token  POLYGON
 %token	POSITION_SYM
 %token	PROCEDURE
+%token	QUARTER_SYM
 %token	RAND
 %token	REPLACE
 %token	RIGHT
@@ -543,12 +580,6 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize);
 %token	SUBSTRING
 %token	SUBSTRING_INDEX
 %token	TRIM
-%token	UDA_CHAR_SUM
-%token	UDA_FLOAT_SUM
-%token	UDA_INT_SUM
-%token	UDF_CHAR_FUNC
-%token	UDF_FLOAT_FUNC
-%token	UDF_INT_FUNC
 %token	UNIQUE_USERS
 %token	UNIX_TIMESTAMP
 %token	USER
@@ -572,6 +603,18 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize);
 %token	SQL_SMALL_RESULT
 %token  SQL_BUFFER_RESULT
 
+%token  CURSOR_SYM
+%token  ELSEIF_SYM
+%token  ITERATE_SYM
+%token  LEAVE_SYM
+%token  LOOP_SYM
+%token  REPEAT_SYM
+%token  UNTIL_SYM
+%token  WHILE_SYM
+%token  ASENSITIVE_SYM
+%token  INSENSITIVE_SYM
+%token  SENSITIVE_SYM
+
 %token  ISSUER_SYM
 %token  SUBJECT_SYM
 %token  CIPHER_SYM
@@ -597,6 +640,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize);
 	LEX_HOSTNAME ULONGLONG_NUM field_ident select_alias ident ident_or_text
         UNDERSCORE_CHARSET IDENT_sys TEXT_STRING_sys TEXT_STRING_literal
 	NCHAR_STRING opt_component key_cache_name
+        sp_opt_label
 
 %type <lex_str_ptr>
 	opt_table_alias
@@ -630,17 +674,20 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize);
 %type <item>
 	literal text_literal insert_ident order_ident
 	simple_ident select_item2 expr opt_expr opt_else sum_expr in_sum_expr
-	table_wild no_in_expr expr_expr simple_expr no_and_expr
+	table_wild no_in_expr expr_expr simple_expr no_and_expr udf_expr
 	using_list expr_or_default set_expr_or_default interval_expr
 	param_marker singlerow_subselect singlerow_subselect_init
 	exists_subselect exists_subselect_init geometry_function
 	signed_literal now_or_signed_literal opt_escape
+	sp_opt_default
+	simple_ident_nospvar simple_ident_q
 
 %type <item_num>
 	NUM_literal
 
 %type <item_list>
-	expr_list udf_expr_list when_list ident_list ident_list_arg
+	expr_list udf_expr_list udf_expr_list2 when_list
+	ident_list ident_list_arg
 
 %type <key_type>
 	key_type opt_unique_or_fulltext constraint_key_type
@@ -656,14 +703,13 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize);
 
 %type <table_list>
 	join_table_list  join_table
-
-%type <udf>
-	UDF_CHAR_FUNC UDF_FLOAT_FUNC UDF_INT_FUNC
-	UDA_CHAR_SUM UDA_FLOAT_SUM UDA_INT_SUM
+        table_factor table_ref
 
 %type <date_time_type> date_time_type;
 %type <interval> interval
 
+%type <interval_time_st> interval_time_st
+
 %type <db_type> storage_engines
 
 %type <row_type> row_types
@@ -707,7 +753,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize);
 	select_item_list select_item values_list no_braces
 	opt_limit_clause delete_limit_clause fields opt_values values
 	procedure_list procedure_list2 procedure_item
-	when_list2 expr_list2  handler
+	when_list2 expr_list2 udf_expr_list3 handler
 	opt_precision opt_ignore opt_column opt_restrict
 	grant revoke set lock unlock string_list field_options field_option
 	field_opt_list opt_binary table_lock_list table_lock
@@ -728,8 +774,17 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize);
 	subselect_end select_var_list select_var_list_init help opt_len
 	opt_extended_describe
         prepare prepare_src execute deallocate 
+	statement sp_suid
+	sp_c_chistics sp_a_chistics sp_chistic sp_c_chistic sp_a_chistic
 END_OF_INPUT
 
+%type <NONE> call sp_proc_stmts sp_proc_stmt
+%type <num>  sp_decl_idents sp_opt_inout sp_handler_type sp_hcond_list
+%type <spcondtype> sp_cond sp_hcond
+%type <spblock> sp_decls sp_decl
+%type <lex> sp_cursor_stmt
+%type <spname> sp_name
+
 %type <NONE>
 	'-' '+' '*' '/' '%' '(' ')'
 	',' '!' '{' '}' '&' '|' AND_SYM OR_SYM OR_OR_CONCAT BETWEEN_SYM CASE_SYM
@@ -755,10 +810,16 @@ query:
 	| verb_clause END_OF_INPUT {};
 
 verb_clause:
+	  statement
+	| begin
+	;
+
+/* Verb clauses, except begin */
+statement:
 	  alter
 	| analyze
 	| backup
-	| begin
+	| call
 	| change
 	| check
 	| checksum
@@ -1056,20 +1117,1026 @@ create:
 	    lex->name=$4.str;
             lex->create_info.options=$3;
 	  }
-	| CREATE udf_func_type UDF_SYM IDENT_sys
+	| CREATE udf_func_type FUNCTION_SYM sp_name
 	  {
 	    LEX *lex=Lex;
-	    lex->sql_command = SQLCOM_CREATE_FUNCTION;
-	    lex->udf.name = $4;
+	    lex->spname= $4;
 	    lex->udf.type= $2;
 	  }
-	  UDF_RETURNS_SYM udf_type UDF_SONAME_SYM TEXT_STRING_sys
+	  create_function_tail
+	  {}
+	| CREATE PROCEDURE sp_name
+	  {
+	    LEX *lex= Lex;
+	    sp_head *sp;
+
+	    if (lex->sphead)
+	    {
+	      net_printf(YYTHD, ER_SP_NO_RECURSIVE_CREATE, "PROCEDURE");
+	      YYABORT;
+	    }
+	    /* Order is important here: new - reset - init */
+	    sp= new sp_head();
+	    sp->reset_thd_mem_root(YYTHD);
+	    sp->init(lex);
+
+	    sp->m_type= TYPE_ENUM_PROCEDURE;
+	    lex->sphead= sp;
+	    /*
+	     * We have to turn of CLIENT_MULTI_QUERIES while parsing a
+	     * stored procedure, otherwise yylex will chop it into pieces
+	     * at each ';'.
+	     */
+	    sp->m_old_cmq= YYTHD->client_capabilities & CLIENT_MULTI_QUERIES;
+	    YYTHD->client_capabilities &= (~CLIENT_MULTI_QUERIES);
+	  }
+          '('
+	  {
+	    LEX *lex= Lex;
+
+	    lex->sphead->m_param_begin= lex->tok_start+1;
+	  }
+	  sp_pdparam_list
+	  ')'
+	  {
+	    LEX *lex= Lex;
+
+	    lex->sphead->m_param_end= lex->tok_start;
+	    lex->spcont->set_params();
+	    bzero((char *)&lex->sp_chistics, sizeof(st_sp_chistics));
+	  }
+	  sp_c_chistics
+	  {
+	    LEX *lex= Lex;
+
+	    lex->sphead->m_chistics= &lex->sp_chistics;
+	    lex->sphead->m_body_begin= lex->tok_start;
+	  }
+	  sp_proc_stmt
+	  {
+	    LEX *lex= Lex;
+
+	    lex->sphead->init_strings(YYTHD, lex, $3);
+	    lex->sql_command= SQLCOM_CREATE_PROCEDURE;
+	    /* Restore flag if it was cleared above */
+	    if (lex->sphead->m_old_cmq)
+	      YYTHD->client_capabilities |= CLIENT_MULTI_QUERIES;
+	    lex->sphead->restore_thd_mem_root(YYTHD);
+	  } 
+	;
+
+sp_name:
+	  IDENT_sys '.' IDENT_sys
+	  {
+	    $$= new sp_name($1, $3);
+	    $$->init_qname(YYTHD);
+	  }
+	| IDENT_sys
+	  {
+	    $$= sp_name_current_db_new(YYTHD, $1);
+	  }
+	;
+
+create_function_tail:
+	  RETURNS_SYM udf_type UDF_SONAME_SYM TEXT_STRING_sys
 	  {
 	    LEX *lex=Lex;
-	    lex->udf.returns=(Item_result) $7;
-	    lex->udf.dl=$9.str;
+	    lex->sql_command = SQLCOM_CREATE_FUNCTION;
+	    lex->udf.name = lex->spname->m_name;
+	    lex->udf.returns=(Item_result) $2;
+	    lex->udf.dl=$4.str;
 	  }
-          ;
+	| '('
+	  {
+	    LEX *lex= Lex;
+	    sp_head *sp;
+
+	    if (lex->sphead)
+	    {
+	      net_printf(YYTHD, ER_SP_NO_RECURSIVE_CREATE, "FUNCTION");
+	      YYABORT;
+	    }
+	    /* Order is important here: new - reset - init */
+	    sp= new sp_head();
+	    sp->reset_thd_mem_root(YYTHD);
+	    sp->init(lex);
+
+	    sp->m_type= TYPE_ENUM_FUNCTION;
+	    lex->sphead= sp;
+	    /*
+	     * We have to turn of CLIENT_MULTI_QUERIES while parsing a
+	     * stored procedure, otherwise yylex will chop it into pieces
+	     * at each ';'.
+	     */
+	    sp->m_old_cmq= YYTHD->client_capabilities & CLIENT_MULTI_QUERIES;
+	    YYTHD->client_capabilities &= ~CLIENT_MULTI_QUERIES;
+	    lex->sphead->m_param_begin= lex->tok_start+1;
+	  }
+          sp_fdparam_list ')'
+	  {
+	    LEX *lex= Lex;
+
+	    lex->spcont->set_params();
+	    lex->sphead->m_param_end= lex->tok_start;
+	  }
+	  RETURNS_SYM
+	  {
+	    LEX *lex= Lex;
+	    sp_head *sp= lex->sphead;
+
+	    sp->m_returns_begin= lex->tok_start;
+	    sp->m_returns_cs= lex->charset= NULL;
+	  }
+	  type
+	  {
+	    LEX *lex= Lex;
+	    sp_head *sp= lex->sphead;
+
+	    sp->m_returns_end= lex->tok_start;
+	    sp->m_returns= (enum enum_field_types)$8;
+	    sp->m_returns_cs= lex->charset;
+	    bzero((char *)&lex->sp_chistics, sizeof(st_sp_chistics));
+	  }
+	  sp_c_chistics
+	  {
+	    LEX *lex= Lex;
+
+	    lex->sphead->m_chistics= &lex->sp_chistics;
+	    lex->sphead->m_body_begin= lex->tok_start;
+	  }
+	  sp_proc_stmt
+	  {
+	    LEX *lex= Lex;
+	    sp_head *sp= lex->sphead;
+
+	    lex->sql_command= SQLCOM_CREATE_SPFUNCTION;
+	    sp->init_strings(YYTHD, lex, lex->spname);
+	    /* Restore flag if it was cleared above */
+	    if (sp->m_old_cmq)
+	      YYTHD->client_capabilities |= CLIENT_MULTI_QUERIES;
+	    sp->restore_thd_mem_root(YYTHD);
+	  }
+	;
+
+sp_a_chistics:
+	  /* Empty */ {}
+	| sp_a_chistics sp_a_chistic {}
+	;
+
+sp_c_chistics:
+	  /* Empty */ {}
+	| sp_c_chistics sp_c_chistic {}
+	;
+
+/* Characteristics for both create and alter */
+sp_chistic:
+	  COMMENT_SYM TEXT_STRING_sys { Lex->sp_chistics.comment= $2; }
+	| sp_suid { }
+	;
+
+/* Alter characteristics */
+sp_a_chistic:
+	  sp_chistic     { }
+	| NAME_SYM ident { Lex->name= $2.str; }
+	;
+
+/* Create characteristics */
+sp_c_chistic:
+	  sp_chistic            { }
+	| LANGUAGE_SYM SQL_SYM  { }
+	| DETERMINISTIC_SYM     { Lex->sp_chistics.detistic= TRUE; }
+	| NOT DETERMINISTIC_SYM { Lex->sp_chistics.detistic= FALSE; }
+	;
+
+sp_suid:
+	  SQL_SYM SECURITY_SYM DEFINER_SYM
+	  {
+	    Lex->sp_chistics.suid= IS_SUID;
+	  }
+	| SQL_SYM SECURITY_SYM INVOKER_SYM
+	  {
+	    Lex->sp_chistics.suid= IS_NOT_SUID;
+	  }
+	;
+
+call:
+	  CALL_SYM sp_name
+	  {
+	    LEX *lex = Lex;
+
+	    lex->sql_command= SQLCOM_CALL;
+	    lex->spname= $2;
+	    lex->value_list.empty();
+	  }
+          '(' sp_cparam_list ')' {}
+	;
+
+/* CALL parameters */
+sp_cparam_list:
+	  /* Empty */
+	| sp_cparams
+	;
+
+sp_cparams:
+	  sp_cparams ',' expr
+	  {
+	    Lex->value_list.push_back($3);
+	  }
+	| expr
+	  {
+	    Lex->value_list.push_back($1);
+	  }
+	;
+
+/* Stored FUNCTION parameter declaration list */
+sp_fdparam_list:
+	  /* Empty */
+	| sp_fdparams
+	;
+
+sp_fdparams:
+	  sp_fdparams ',' sp_fdparam
+	| sp_fdparam
+	;
+
+sp_fdparam:
+	  ident type
+	  {
+	    LEX *lex= Lex;
+	    sp_pcontext *spc= lex->spcont;
+
+	    if (spc->find_pvar(&$1, TRUE))
+	    {
+	      net_printf(YYTHD, ER_SP_DUP_PARAM, $1.str);
+	      YYABORT;
+	    }
+	    spc->push_pvar(&$1, (enum enum_field_types)$2, sp_param_in);
+	  }
+	;
+
+/* Stored PROCEDURE parameter declaration list */
+sp_pdparam_list:
+	  /* Empty */
+	| sp_pdparams
+	;
+
+sp_pdparams:
+	  sp_pdparams ',' sp_pdparam
+	| sp_pdparam
+	;
+
+sp_pdparam:
+	  sp_opt_inout ident type
+	  {
+	    LEX *lex= Lex;
+	    sp_pcontext *spc= lex->spcont;
+
+	    if (spc->find_pvar(&$2, TRUE))
+	    {
+	      net_printf(YYTHD, ER_SP_DUP_PARAM, $2.str);
+	      YYABORT;
+	    }
+	    spc->push_pvar(&$2, (enum enum_field_types)$3,
+			   (sp_param_mode_t)$1);
+	  }
+	;
+
+sp_opt_inout:
+	  /* Empty */ { $$= sp_param_in; }
+	| IN_SYM      { $$= sp_param_in; }
+	| OUT_SYM     { $$= sp_param_out; }
+	| INOUT_SYM   { $$= sp_param_inout; }
+	;
+
+sp_proc_stmts:
+	  /* Empty */ {}
+	| sp_proc_stmts sp_proc_stmt ';'
+	;
+
+sp_decls:
+	  /* Empty */
+	  {
+	    $$.vars= $$.conds= $$.hndlrs= $$.curs= 0;
+	  }
+	| sp_decls sp_decl ';'
+	  {
+	    /* We check for declarations out of (standard) order this way
+	       because letting the grammar rules reflect it caused tricky
+	       shift/reduce conflicts with the wrong result. (And we get
+	       better error handling this way.) */
+	    if (($2.vars || $2.conds) && ($1.curs || $1.hndlrs))
+	    { /* Variable or condition following cursor or handler */
+	      send_error(YYTHD, ER_SP_VARCOND_AFTER_CURSHNDLR);
+	      YYABORT;
+	    }
+	    if ($2.curs && $1.hndlrs)
+	    { /* Cursor following handler */
+	      send_error(YYTHD, ER_SP_CURSOR_AFTER_HANDLER);
+	      YYABORT;
+	    }
+	    $$.vars= $1.vars + $2.vars;
+	    $$.conds= $1.conds + $2.conds;
+	    $$.hndlrs= $1.hndlrs + $2.hndlrs;
+	    $$.curs= $1.curs + $2.curs;
+	  }
+	;
+
+sp_decl:
+	  DECLARE_SYM sp_decl_idents type sp_opt_default
+	  {
+	    LEX *lex= Lex;
+	    uint max= lex->spcont->current_framesize();
+	    enum enum_field_types type= (enum enum_field_types)$3;
+	    Item *it= $4;
+
+	    for (uint i = max-$2 ; i < max ; i++)
+	    {
+	      lex->spcont->set_type(i, type);
+	      if (! it)
+	        lex->spcont->set_isset(i, FALSE);
+	      else
+	      {
+	        sp_instr_set *in= new sp_instr_set(lex->sphead->instructions(),
+	                                           i, it, type);
+
+	        lex->sphead->add_instr(in);
+	        lex->spcont->set_isset(i, TRUE);
+		lex->spcont->set_default(i, it);
+	      }
+	    }
+	    $$.vars= $2;
+	    $$.conds= $$.hndlrs= $$.curs= 0;
+	  }
+	| DECLARE_SYM ident CONDITION_SYM FOR_SYM sp_cond
+	  {
+	    LEX *lex= Lex;
+	    sp_pcontext *spc= lex->spcont;
+
+	    if (spc->find_cond(&$2, TRUE))
+	    {
+	      net_printf(YYTHD, ER_SP_DUP_COND, $2.str);
+	      YYABORT;
+	    }
+	    YYTHD->lex->spcont->push_cond(&$2, $5);
+	    $$.vars= $$.hndlrs= $$.curs= 0;
+	    $$.conds= 1;
+	  }
+	| DECLARE_SYM sp_handler_type HANDLER_SYM FOR_SYM
+	  {
+	    LEX *lex= Lex;
+	    sp_head *sp= lex->sphead;
+	    sp_pcontext *ctx= lex->spcont;
+	    sp_instr_hpush_jump *i=
+              new sp_instr_hpush_jump(sp->instructions(), $2,
+	                              ctx->current_framesize());
+
+	    sp->add_instr(i);
+	    sp->push_backpatch(i, ctx->push_label((char *)"", 0));
+	    ctx->add_handler();
+	  }
+	  sp_hcond_list sp_proc_stmt
+	  {
+	    LEX *lex= Lex;
+	    sp_head *sp= lex->sphead;
+	    sp_label_t *hlab= lex->spcont->pop_label(); /* After this hdlr */
+
+	    if ($2 == SP_HANDLER_CONTINUE)
+	      sp->add_instr(new sp_instr_hreturn(sp->instructions(),
+	                                         lex->spcont->current_framesize()));
+	    else
+	    {  /* EXIT or UNDO handler, just jump to the end of the block */
+	      sp_instr_jump *i= new sp_instr_jump(sp->instructions());
+
+	      sp->add_instr(i);
+	      sp->push_backpatch(i, lex->spcont->last_label()); /* Block end */
+	    }
+	    lex->sphead->backpatch(hlab);
+	    $$.vars= $$.conds= $$.curs= 0;
+	    $$.hndlrs= $6;
+	  }
+	| DECLARE_SYM ident CURSOR_SYM FOR_SYM sp_cursor_stmt
+	  {
+	    LEX *lex= Lex;
+	    sp_head *sp= lex->sphead;
+	    sp_pcontext *spc= lex->spcont;
+	    uint offp;
+	    sp_instr_cpush *i;
+
+	    if (spc->find_cursor(&$2, &offp, TRUE))
+	    {
+	      net_printf(YYTHD, ER_SP_DUP_CURS, $2.str);
+	      delete $5;
+	      YYABORT;
+	    }
+            i= new sp_instr_cpush(sp->instructions(), $5);
+	    sp->add_instr(i);
+	    lex->spcont->push_cursor(&$2);
+	    $$.vars= $$.conds= $$.hndlrs= 0;
+	    $$.curs= 1;
+	  }
+	;
+
+sp_cursor_stmt:
+	  {
+	    Lex->sphead->reset_lex(YYTHD);
+
+	    /* We use statement here just be able to get a better
+	       error message. Using 'select' works too, but will then
+	       result in a generic "syntax error" if a non-select
+	       statement is given. */
+	  }
+	  statement
+	  {
+	    LEX *lex= Lex;
+
+	    if (lex->sql_command != SQLCOM_SELECT)
+	    {
+	      send_error(YYTHD, ER_SP_BAD_CURSOR_QUERY);
+	      YYABORT;
+	    }
+	    if (lex->result)
+	    {
+	      send_error(YYTHD, ER_SP_BAD_CURSOR_SELECT);
+	      YYABORT;
+	    }
+	    lex->sp_lex_in_use= TRUE;
+	    $$= lex;
+	    lex->sphead->restore_lex(YYTHD);
+	  }
+	;
+
+sp_handler_type:
+	  EXIT_SYM      { $$= SP_HANDLER_EXIT; }
+	| CONTINUE_SYM  { $$= SP_HANDLER_CONTINUE; }
+/*	| UNDO_SYM      { QQ No yet } */
+	;
+
+sp_hcond_list:
+	  sp_hcond
+	  {
+	    LEX *lex= Lex;
+	    sp_head *sp= lex->sphead;
+	    sp_instr_hpush_jump *i= (sp_instr_hpush_jump *)sp->last_instruction();
+
+	    i->add_condition($1);
+	    $$= 1;
+	  }
+	| sp_hcond_list ',' sp_hcond
+	  {
+	    LEX *lex= Lex;
+	    sp_head *sp= lex->sphead;
+	    sp_instr_hpush_jump *i= (sp_instr_hpush_jump *)sp->last_instruction();
+
+	    i->add_condition($3);
+	    $$= $1 + 1;
+	  }
+	;
+
+sp_cond:
+	  ULONG_NUM
+	  {			/* mysql errno */
+	    $$= (sp_cond_type_t *)YYTHD->alloc(sizeof(sp_cond_type_t));
+	    $$->type= sp_cond_type_t::number;
+	    $$->mysqlerr= $1;
+	  }
+	| SQLSTATE_SYM opt_value TEXT_STRING_literal
+	  {		/* SQLSTATE */
+	    uint len= ($3.length < sizeof($$->sqlstate)-1 ?
+                       $3.length : sizeof($$->sqlstate)-1);
+
+	    $$= (sp_cond_type_t *)YYTHD->alloc(sizeof(sp_cond_type_t));
+	    $$->type= sp_cond_type_t::state;
+	    memcpy($$->sqlstate, $3.str, len);
+	    $$->sqlstate[len]= '\0';
+	  }
+	;
+
+opt_value:
+	  /* Empty */  {}
+	| VALUE_SYM    {}
+	;
+
+sp_hcond:
+	  sp_cond
+	  {
+	    $$= $1;
+	  }
+	| ident			/* CONDITION name */
+	  {
+	    $$= Lex->spcont->find_cond(&$1);
+	    if ($$ == NULL)
+	    {
+	      net_printf(YYTHD, ER_SP_COND_MISMATCH, $1.str);
+	      YYABORT;
+	    }
+	  }
+	| SQLWARNING_SYM	/* SQLSTATEs 01??? */
+	  {
+	    $$= (sp_cond_type_t *)YYTHD->alloc(sizeof(sp_cond_type_t));
+	    $$->type= sp_cond_type_t::warning;
+	  }
+	| NOT FOUND_SYM		/* SQLSTATEs 02??? */
+	  {
+	    $$= (sp_cond_type_t *)YYTHD->alloc(sizeof(sp_cond_type_t));
+	    $$->type= sp_cond_type_t::notfound;
+	  }
+	| SQLEXCEPTION_SYM	/* All other SQLSTATEs */
+	  {
+	    $$= (sp_cond_type_t *)YYTHD->alloc(sizeof(sp_cond_type_t));
+	    $$->type= sp_cond_type_t::exception;
+	  }
+	;
+
+sp_decl_idents:
+	  ident
+	  {
+	    LEX *lex= Lex;
+	    sp_pcontext *spc= lex->spcont;
+
+	    if (spc->find_pvar(&$1, TRUE))
+	    {
+	      net_printf(YYTHD, ER_SP_DUP_VAR, $1.str);
+	      YYABORT;
+	    }
+	    spc->push_pvar(&$1, (enum_field_types)0, sp_param_in);
+	    $$= 1;
+	  }
+	| sp_decl_idents ',' ident
+	  {
+	    LEX *lex= Lex;
+	    sp_pcontext *spc= lex->spcont;
+
+	    if (spc->find_pvar(&$3, TRUE))
+	    {
+	      net_printf(YYTHD, ER_SP_DUP_VAR, $3.str);
+	      YYABORT;
+	    }
+	    spc->push_pvar(&$3, (enum_field_types)0, sp_param_in);
+	    $$= $1 + 1;
+	  }
+	;
+
+sp_opt_default:
+	  /* Empty */ { $$ = NULL; }
+        | DEFAULT expr { $$ = $2; }
+	;
+
+sp_proc_stmt:
+	  {
+	    Lex->sphead->reset_lex(YYTHD);
+	  }
+	  statement
+	  {
+	    LEX *lex= Lex;
+
+	    /* QQ What else? This doesn't seem to be a practical way,
+	          but at the moment we can't think of anything better... */
+	    if ((lex->sql_command == SQLCOM_SELECT && !lex->result) ||
+	        lex->sql_command == SQLCOM_SHOW_CREATE_PROC ||
+	        lex->sql_command == SQLCOM_SHOW_CREATE_FUNC ||
+	        lex->sql_command == SQLCOM_SHOW_STATUS_PROC ||
+	        lex->sql_command == SQLCOM_SHOW_STATUS_FUNC ||
+		lex->sql_command == SQLCOM_ANALYZE)
+	    {
+	      /* We maybe have one or more SELECT without INTO */
+	      lex->sphead->m_multi_results= TRUE;
+	    }
+	    if (lex->sql_command == SQLCOM_CHANGE_DB)
+	    { /* "USE db" doesn't work in a procedure */
+	      send_error(YYTHD, ER_SP_NO_USE);
+	      YYABORT;
+	    }
+	    /* Don't add an instruction for empty SET statements.
+	    ** (This happens if the SET only contained local variables,
+	    **  which get their set instructions generated separately.)
+	    */
+	    if (lex->sql_command != SQLCOM_SET_OPTION ||
+		! lex->var_list.is_empty())
+	    {
+	      /* Currently we can't handle queries inside a FUNCTION,
+	      ** because of the way table locking works.
+	      ** This is unfortunate, and limits the usefulness of functions
+	      ** a great deal, but it's nothing we can do about this at the
+	      ** moment.
+	      */
+	      if (lex->sphead->m_type == TYPE_ENUM_FUNCTION &&
+		  lex->sql_command != SQLCOM_SET_OPTION)
+	      {
+		send_error(YYTHD, ER_SP_BADSTATEMENT);
+		YYABORT;
+	      }
+	      else
+	      {
+		sp_instr_stmt *i=new sp_instr_stmt(lex->sphead->instructions());
+
+		i->set_lex(lex);
+		lex->sphead->add_instr(i);
+		lex->sp_lex_in_use= TRUE;
+	      }
+            }
+	    lex->sphead->restore_lex(YYTHD);
+          }
+	| RETURN_SYM expr
+	  {
+	    LEX *lex= Lex;
+
+	    if (lex->sphead->m_type == TYPE_ENUM_PROCEDURE)
+	    {
+	      send_error(YYTHD, ER_SP_BADRETURN);
+	      YYABORT;
+	    }
+	    else
+	    {
+	      sp_instr_freturn *i;
+
+	      if ($2->type() == Item::SUBSELECT_ITEM)
+	      {  /* QQ For now, just disallow subselects as values */
+	        send_error(lex->thd, ER_SP_BADSTATEMENT);
+	        YYABORT;
+	      }
+	      i= new sp_instr_freturn(lex->sphead->instructions(),
+		                      $2, lex->sphead->m_returns);
+	      lex->sphead->add_instr(i);
+	      lex->sphead->m_has_return= TRUE;
+	    }
+	  }
+	| IF sp_if END IF {}
+	| CASE_SYM WHEN_SYM
+	  {
+	    Lex->sphead->m_simple_case= FALSE;
+	  }
+	  sp_case END CASE_SYM {}
+	| CASE_SYM expr WHEN_SYM
+	  {
+	    /* We "fake" this by using an anonymous variable which we
+	       set to the expression. Note that all WHENs are evaluate
+	       at the same frame level, so we then know that it's the
+	       top-most variable in the frame. */
+	    LEX *lex= Lex;
+	    uint offset= lex->spcont->current_framesize();
+	    sp_instr_set *i = new sp_instr_set(lex->sphead->instructions(),
+	                                       offset, $2, MYSQL_TYPE_STRING);
+	    LEX_STRING dummy;
+
+	    dummy.str= (char *)"";
+	    dummy.length= 0;
+	    lex->spcont->push_pvar(&dummy, MYSQL_TYPE_STRING, sp_param_in);
+	    lex->sphead->add_instr(i);
+	    lex->sphead->m_simple_case= TRUE;
+	  }
+	  sp_case END CASE_SYM
+	  {
+	    Lex->spcont->pop_pvar();
+	  }
+	| sp_labeled_control
+	  {}
+	| { /* Unlabeled controls get a secret label. */
+	    LEX *lex= Lex;
+
+	    lex->spcont->push_label((char *)"", lex->sphead->instructions());
+	  }
+	  sp_unlabeled_control
+	  {
+	    LEX *lex= Lex;
+
+	    lex->sphead->backpatch(lex->spcont->pop_label());
+	  }
+	| LEAVE_SYM IDENT
+	  {
+	    LEX *lex= Lex;
+	    sp_head *sp = lex->sphead;
+	    sp_label_t *lab= lex->spcont->find_label($2.str);
+
+	    if (! lab)
+	    {
+	      net_printf(YYTHD, ER_SP_LILABEL_MISMATCH, "LEAVE", $2.str);
+	      YYABORT;
+	    }
+	    else
+	    {
+	      sp_instr_jump *i= new sp_instr_jump(sp->instructions());
+
+	      sp->push_backpatch(i, lab);  /* Jumping forward */
+              sp->add_instr(i);
+	    }
+	  }
+	| ITERATE_SYM IDENT
+	  {
+	    LEX *lex= Lex;
+	    sp_label_t *lab= lex->spcont->find_label($2.str);
+
+	    if (! lab || lab->isbegin)
+	    {
+	      net_printf(YYTHD, ER_SP_LILABEL_MISMATCH, "ITERATE", $2.str);
+	      YYABORT;
+	    }
+	    else
+	    {
+	      uint ip= lex->sphead->instructions();
+	      sp_instr_jump *i= new sp_instr_jump(ip, lab->ip); /* Jump back */
+
+              lex->sphead->add_instr(i);
+	    }
+	  }
+	| OPEN_SYM ident
+	  {
+	    LEX *lex= Lex;
+	    sp_head *sp= lex->sphead;
+	    uint offset;
+	    sp_instr_copen *i;
+
+	    if (! lex->spcont->find_cursor(&$2, &offset))
+	    {
+	      net_printf(YYTHD, ER_SP_CURSOR_MISMATCH, $2.str);
+	      YYABORT;
+	    }
+	    i= new sp_instr_copen(sp->instructions(), offset);
+	    sp->add_instr(i);
+	  }
+	| FETCH_SYM ident INTO
+	  {
+	    LEX *lex= Lex;
+	    sp_head *sp= lex->sphead;
+	    uint offset;
+	    sp_instr_cfetch *i;
+
+	    if (! lex->spcont->find_cursor(&$2, &offset))
+	    {
+	      net_printf(YYTHD, ER_SP_CURSOR_MISMATCH, $2.str);
+	      YYABORT;
+	    }
+	    i= new sp_instr_cfetch(sp->instructions(), offset);
+	    sp->add_instr(i);
+	  }
+	  sp_fetch_list
+	  { }
+	| CLOSE_SYM ident
+	  {
+	    LEX *lex= Lex;
+	    sp_head *sp= lex->sphead;
+	    uint offset;
+	    sp_instr_cclose *i;
+
+	    if (! lex->spcont->find_cursor(&$2, &offset))
+	    {
+	      net_printf(YYTHD, ER_SP_CURSOR_MISMATCH, $2.str);
+	      YYABORT;
+	    }
+	    i= new sp_instr_cclose(sp->instructions(), offset);
+	    sp->add_instr(i);
+	  }
+	;
+
+sp_fetch_list:
+	  ident
+	  {
+	    LEX *lex= Lex;
+	    sp_head *sp= lex->sphead;
+	    sp_pcontext *spc= lex->spcont;
+	    sp_pvar_t *spv;
+
+	    if (!spc || !(spv = spc->find_pvar(&$1)))
+	    {
+	      net_printf(YYTHD, ER_SP_UNDECLARED_VAR, $1.str);
+	      YYABORT;
+	    }
+	    else
+	    { /* An SP local variable */
+	      sp_instr_cfetch *i= (sp_instr_cfetch *)sp->last_instruction();
+
+	      i->add_to_varlist(spv);
+	      spv->isset= TRUE;
+	    }
+	  }
+	|
+	  sp_fetch_list ',' ident
+	  {
+	    LEX *lex= Lex;
+	    sp_head *sp= lex->sphead;
+	    sp_pcontext *spc= lex->spcont;
+	    sp_pvar_t *spv;
+
+	    if (!spc || !(spv = spc->find_pvar(&$3)))
+	    {
+	      net_printf(YYTHD, ER_SP_UNDECLARED_VAR, $3.str);
+	      YYABORT;
+	    }
+	    else
+	    { /* An SP local variable */
+	      sp_instr_cfetch *i= (sp_instr_cfetch *)sp->last_instruction();
+
+	      i->add_to_varlist(spv);
+	      spv->isset= TRUE;
+	    }
+	  }
+	;
+
+sp_if:
+	  expr THEN_SYM
+	  {
+	    sp_head *sp= Lex->sphead;
+	    sp_pcontext *ctx= Lex->spcont;
+	    uint ip= sp->instructions();
+	    sp_instr_jump_if_not *i = new sp_instr_jump_if_not(ip, $1);
+
+	    sp->push_backpatch(i, ctx->push_label((char *)"", 0));
+            sp->add_instr(i);
+	  }
+	  sp_proc_stmts
+	  {
+	    sp_head *sp= Lex->sphead;
+	    sp_pcontext *ctx= Lex->spcont;
+	    uint ip= sp->instructions();
+	    sp_instr_jump *i = new sp_instr_jump(ip);
+
+	    sp->add_instr(i);
+	    sp->backpatch(ctx->pop_label());
+	    sp->push_backpatch(i, ctx->push_label((char *)"", 0));
+	  }
+	  sp_elseifs
+	  {
+	    LEX *lex= Lex;
+
+	    lex->sphead->backpatch(lex->spcont->pop_label());
+	  }
+	;
+
+sp_elseifs:
+	  /* Empty */
+	| ELSEIF_SYM sp_if
+	| ELSE sp_proc_stmts
+	;
+
+sp_case:
+	  expr THEN_SYM
+	  {
+	    sp_head *sp= Lex->sphead;
+	    sp_pcontext *ctx= Lex->spcont;
+	    uint ip= sp->instructions();
+	    sp_instr_jump_if_not *i;
+
+	    if (! sp->m_simple_case)
+	      i= new sp_instr_jump_if_not(ip, $1);
+	    else
+	    { /* Simple case: <caseval> = <whenval> */
+	      LEX_STRING ivar;
+
+	      ivar.str= (char *)"_tmp_";
+	      ivar.length= 5;
+	      Item *var= (Item*) new Item_splocal(ivar, 
+						  ctx->current_framesize()-1);
+	      Item *expr= new Item_func_eq(var, $1);
+
+	      i= new sp_instr_jump_if_not(ip, expr);
+	    }
+	    sp->push_backpatch(i, ctx->push_label((char *)"", 0));
+            sp->add_instr(i);
+	  }
+	  sp_proc_stmts
+	  {
+	    sp_head *sp= Lex->sphead;
+	    sp_pcontext *ctx= Lex->spcont;
+	    uint ip= sp->instructions();
+	    sp_instr_jump *i = new sp_instr_jump(ip);
+
+	    sp->add_instr(i);
+	    sp->backpatch(ctx->pop_label());
+	    sp->push_backpatch(i, ctx->push_label((char *)"", 0));
+	  }
+	  sp_whens
+	  {
+	    LEX *lex= Lex;
+
+	    lex->sphead->backpatch(lex->spcont->pop_label());
+	  }
+	;
+
+sp_whens:
+	  /* Empty */
+	  {
+	    sp_head *sp= Lex->sphead;
+	    uint ip= sp->instructions();
+	    sp_instr_error *i= new sp_instr_error(ip, ER_SP_CASE_NOT_FOUND);
+
+	    sp->add_instr(i);
+	  }
+	| ELSE sp_proc_stmts {}
+	| WHEN_SYM sp_case {}
+	;
+
+sp_labeled_control:
+	  IDENT ':'
+	  {
+	    LEX *lex= Lex;
+	    sp_label_t *lab= lex->spcont->find_label($1.str);
+
+	    if (lab)
+	    {
+	      net_printf(YYTHD, ER_SP_LABEL_REDEFINE, $1.str);
+	      YYABORT;
+	    }
+	    else
+	    {
+	      lex->spcont->push_label($1.str,
+	                              lex->sphead->instructions());
+	    }
+	  }
+	  sp_unlabeled_control sp_opt_label
+	  {
+	    LEX *lex= Lex;
+
+	    if ($5.str)
+	    {
+	      sp_label_t *lab= lex->spcont->find_label($5.str);
+
+	      if (!lab ||
+	          my_strcasecmp(system_charset_info, $5.str, lab->name) != 0)
+	      {
+	        net_printf(YYTHD, ER_SP_LABEL_MISMATCH, $5.str);
+	        YYABORT;
+	      }
+	    }
+	    lex->sphead->backpatch(lex->spcont->pop_label());
+	  }
+	;
+
+sp_opt_label:
+	  /* Empty  */
+	  { $$.str= NULL; $$.length= 0; }
+	| IDENT
+	  { $$= $1; }
+	;
+
+sp_unlabeled_control:
+	  BEGIN_SYM
+	  { /* QQ This is just a dummy for grouping declarations and statements
+	       together. No [[NOT] ATOMIC] yet, and we need to figure out how
+	       make it coexist with the existing BEGIN COMMIT/ROLLBACK. */
+	    LEX *lex= Lex;
+	    sp_label_t *lab= lex->spcont->last_label();
+
+	    lab->isbegin= TRUE;
+	    /* Scope duplicate checking */
+	    lex->spcont->push_scope();
+	  }
+	  sp_decls
+	  sp_proc_stmts
+	  END
+	  {
+	    LEX *lex= Lex;
+	    sp_head *sp= lex->sphead;
+	    sp_pcontext *ctx= lex->spcont;
+
+  	    sp->backpatch(ctx->last_label());	/* We always has a label */
+	    ctx->pop_pvar($3.vars);
+	    ctx->pop_cond($3.conds);
+	    ctx->pop_cursor($3.curs);
+	    if ($3.hndlrs)
+	      sp->add_instr(new sp_instr_hpop(sp->instructions(),$3.hndlrs));
+	    if ($3.curs)
+	      sp->add_instr(new sp_instr_cpop(sp->instructions(), $3.curs));
+	    ctx->pop_scope();
+	  }
+	| LOOP_SYM
+	  sp_proc_stmts END LOOP_SYM
+	  {
+	    LEX *lex= Lex;
+	    uint ip= lex->sphead->instructions();
+	    sp_label_t *lab= lex->spcont->last_label();  /* Jumping back */
+	    sp_instr_jump *i = new sp_instr_jump(ip, lab->ip);
+
+	    lex->sphead->add_instr(i);
+	  }
+	| WHILE_SYM expr DO_SYM
+	  {
+	    LEX *lex= Lex;
+	    sp_head *sp= lex->sphead;
+	    uint ip= sp->instructions();
+	    sp_instr_jump_if_not *i = new sp_instr_jump_if_not(ip, $2);
+
+	    /* Jumping forward */
+	    sp->push_backpatch(i, lex->spcont->last_label());
+            sp->add_instr(i);
+	  }
+	  sp_proc_stmts END WHILE_SYM
+	  {
+	    LEX *lex= Lex;
+	    uint ip= lex->sphead->instructions();
+	    sp_label_t *lab= lex->spcont->last_label();  /* Jumping back */
+	    sp_instr_jump *i = new sp_instr_jump(ip, lab->ip);
+
+	    lex->sphead->add_instr(i);
+	  }
+	| REPEAT_SYM sp_proc_stmts UNTIL_SYM expr END REPEAT_SYM
+	  {
+	    LEX *lex= Lex;
+	    uint ip= lex->sphead->instructions();
+	    sp_label_t *lab= lex->spcont->last_label();  /* Jumping back */
+	    sp_instr_jump_if_not *i = new sp_instr_jump_if_not(ip, $4, lab->ip);
+
+            lex->sphead->add_instr(i);
+	  }
+	;
 
 create2:
  	'(' create2a {}
@@ -1828,8 +2895,38 @@ alter:
 	    LEX *lex=Lex;
 	    lex->sql_command=SQLCOM_ALTER_DB;
 	    lex->name=$3.str;
-	  };
+	  }
+	| ALTER PROCEDURE sp_name
+	  {
+	    LEX *lex= Lex;
 
+	    bzero((char *)&lex->sp_chistics, sizeof(st_sp_chistics));
+	    lex->name= 0;
+          }
+	  sp_a_chistics
+	  {
+	    THD *thd= YYTHD;
+	    LEX *lex=Lex;
+
+	    lex->sql_command= SQLCOM_ALTER_PROCEDURE;
+	    lex->spname= $3;
+	  }
+	| ALTER FUNCTION_SYM sp_name
+	  {
+	    LEX *lex= Lex;
+
+	    bzero((char *)&lex->sp_chistics, sizeof(st_sp_chistics));
+	    lex->name= 0;
+          }
+	  sp_a_chistics
+	  {
+	    THD *thd= YYTHD;
+	    LEX *lex=Lex;
+
+	    lex->sql_command= SQLCOM_ALTER_FUNCTION;
+	    lex->spname= $3;
+	  }
+	;
 
 alter_list:
 	| DISCARD TABLESPACE { Lex->alter_info.tablespace_op= DISCARD_TABLESPACE; }
@@ -2809,6 +3906,8 @@ simple_expr:
 	  { $$= new Item_date_add_interval($3, $5, INTERVAL_DAY, 0);}
 	| ADDDATE_SYM '(' expr ',' INTERVAL_SYM expr interval ')'
 	  { $$= new Item_date_add_interval($3, $6, $7, 0); }
+	| REPEAT_SYM '(' expr ',' expr ')'
+	  { $$= new Item_func_repeat($3,$5); }
 	| ATAN	'(' expr ')'
 	  { $$= new Item_func_atan($3); }
 	| ATAN	'(' expr ',' expr ')'
@@ -2975,6 +4074,8 @@ simple_expr:
 	  { $$=  new Item_func_old_password($3); }
 	| POSITION_SYM '(' no_in_expr IN_SYM expr ')'
 	  { $$ = new Item_func_locate($5,$3); }
+	| QUARTER_SYM '(' expr ')'
+	  { $$ = new Item_func_quarter($3); }
 	| RAND '(' expr ')'
 	  { $$= new Item_func_rand($3); Lex->uncacheable(UNCACHEABLE_RAND);}
 	| RAND '(' ')'
@@ -2986,6 +4087,11 @@ simple_expr:
 	| ROUND '(' expr ')'
 	  { $$= new Item_func_round($3, new Item_int((char*)"0",0,1),0); }
 	| ROUND '(' expr ',' expr ')' { $$= new Item_func_round($3,$5,0); }
+	| ROW_COUNT_SYM '(' ')'
+	  {
+	    $$= new Item_func_row_count();
+	    Lex->safe_to_cache_query= 0;
+	  }
 	| SUBDATE_SYM '(' expr ',' expr ')'
 	  { $$= new Item_date_add_interval($3, $5, INTERVAL_DAY, 1);}
 	| SUBDATE_SYM '(' expr ',' INTERVAL_SYM expr interval ')'
@@ -3008,6 +4114,10 @@ simple_expr:
 	  { $$= new Item_datetime_typecast($3); }
 	| TIMESTAMP '(' expr ',' expr ')'
 	  { $$= new Item_func_add_time($3, $5, 1, 0); }
+	| TIMESTAMP_ADD '(' interval_time_st ',' expr ',' expr ')'
+	  { $$= new Item_date_add_interval($7,$5,$3,0); }
+	| TIMESTAMP_DIFF '(' interval_time_st ',' expr ',' expr ')'
+	  { $$= new Item_func_timestamp_diff($5,$7,$3); }
 	| TRIM '(' expr ')'
 	  { $$= new Item_func_trim($3); }
 	| TRIM '(' LEADING expr FROM expr ')'
@@ -3028,48 +4138,90 @@ simple_expr:
 	  { $$= new Item_func_round($3,$5,1); }
 	| TRUE_SYM
 	  { $$= new Item_int((char*) "TRUE",1,1); }
-	| UDA_CHAR_SUM '(' udf_expr_list ')'
+	| ident '.' ident '(' udf_expr_list ')'
 	  {
-	    if ($3 != NULL)
-	      $$ = new Item_sum_udf_str($1, *$3);
-	    else
-	      $$ = new Item_sum_udf_str($1);
-	  }
-	| UDA_FLOAT_SUM '(' udf_expr_list ')'
-	  {
-	    if ($3 != NULL)
-	      $$ = new Item_sum_udf_float($1, *$3);
-	    else
-	      $$ = new Item_sum_udf_float($1);
-	  }
-	| UDA_INT_SUM '(' udf_expr_list ')'
-	  {
-	    if ($3 != NULL)
-	      $$ = new Item_sum_udf_int($1, *$3);
-	    else
-	      $$ = new Item_sum_udf_int($1);
-	  }
-	| UDF_CHAR_FUNC '(' udf_expr_list ')'
-	  {
-	    if ($3 != NULL)
-	      $$ = new Item_func_udf_str($1, *$3);
-	    else
-	      $$ = new Item_func_udf_str($1);
-	  }
-	| UDF_FLOAT_FUNC '(' udf_expr_list ')'
-	  {
-	    if ($3 != NULL)
-	      $$ = new Item_func_udf_float($1, *$3);
-	    else
-	      $$ = new Item_func_udf_float($1);
-	  }
-	| UDF_INT_FUNC '(' udf_expr_list ')'
-	  {
-	    if ($3 != NULL)
-	      $$ = new Item_func_udf_int($1, *$3);
+	    LEX *lex= Lex;
+	    sp_name *name= new sp_name($1, $3);
+
+	    name->init_qname(YYTHD);
+	    sp_add_fun_to_lex(Lex, name);
+	    if ($5)
+	      $$= new Item_func_sp(name, *$5);
 	    else
-	      $$ = new Item_func_udf_int($1);
+	      $$= new Item_func_sp(name);
 	  }
+	| IDENT_sys '(' udf_expr_list ')'
+          {
+#ifdef HAVE_DLOPEN
+            udf_func *udf;
+
+            if (using_udf_functions && (udf=find_udf($1.str, $1.length)))
+            {
+              switch (udf->returns) {
+              case STRING_RESULT:
+                if (udf->type == UDFTYPE_FUNCTION)
+                {
+                  if ($3 != NULL)
+                    $$ = new Item_func_udf_str(udf, *$3);
+                  else
+                    $$ = new Item_func_udf_str(udf);
+                }
+                else
+                {
+                  if ($3 != NULL)
+                    $$ = new Item_sum_udf_str(udf, *$3);
+                  else
+                    $$ = new Item_sum_udf_str(udf);
+                }
+                break;
+              case REAL_RESULT:
+                if (udf->type == UDFTYPE_FUNCTION)
+                {
+                  if ($3 != NULL)
+                    $$ = new Item_func_udf_float(udf, *$3);
+                  else
+                    $$ = new Item_func_udf_float(udf);
+                }
+                else
+                {
+                  if ($3 != NULL)
+                    $$ = new Item_sum_udf_float(udf, *$3);
+                  else
+                    $$ = new Item_sum_udf_float(udf);
+                }
+                break;
+              case INT_RESULT:
+                if (udf->type == UDFTYPE_FUNCTION)
+                {
+                  if ($3 != NULL)
+                    $$ = new Item_func_udf_int(udf, *$3);
+                  else
+                    $$ = new Item_func_udf_int(udf);
+                }
+                else
+                {
+                  if ($3 != NULL)
+                    $$ = new Item_sum_udf_int(udf, *$3);
+                  else
+                    $$ = new Item_sum_udf_int(udf);
+                }
+                break;
+              default:
+                YYABORT;
+              }
+            }
+            else
+#endif /* HAVE_DLOPEN */
+            {
+              sp_name *name= sp_name_current_db_new(YYTHD, $1);
+
+              sp_add_fun_to_lex(Lex, name);
+              if ($3)
+                $$= new Item_func_sp(name, *$3);
+              else
+                $$= new Item_func_sp(name);
+	    }
+          }
 	| UNIQUE_USERS '(' text_literal ',' NUM ',' NUM ',' expr_list ')'
 	  {
             $$= new Item_func_unique_users($3,atoi($5.str),atoi($7.str), * $9);
@@ -3177,8 +4329,37 @@ fulltext_options:
         ;
 
 udf_expr_list:
-	/* empty */	{ $$= NULL; }
-	| expr_list	{ $$= $1;};
+	/* empty */	 { $$= NULL; }
+	| udf_expr_list2 { $$= $1;}
+	;
+
+udf_expr_list2:
+	{ Select->expr_list.push_front(new List<Item>); }
+	udf_expr_list3
+	{ $$= Select->expr_list.pop(); }
+	;
+
+udf_expr_list3:
+	udf_expr 
+	  {
+	    Select->expr_list.head()->push_back($1);
+	  }
+	| udf_expr_list3 ',' udf_expr 
+	  {
+	    Select->expr_list.head()->push_back($3);
+	  }
+	;
+
+udf_expr:
+	remember_name expr remember_end select_alias
+	{
+	  if ($4.str)
+	    $2->set_name($4.str,$4.length,system_charset_info);
+	  else
+	    $2->set_name($1,(uint) ($3 - $1), YYTHD->charset());
+	  $$= $2;
+	}
+	;
 
 sum_expr:
 	AVG_SYM '(' in_sum_expr ')'
@@ -3203,14 +4384,25 @@ sum_expr:
 	  { $$= new Item_sum_unique_users($3,atoi($5.str),atoi($7.str),$9); }
 	| MIN_SYM '(' in_sum_expr ')'
 	  { $$=new Item_sum_min($3); }
+/*
+   According to ANSI SQL, DISTINCT is allowed and has
+   no sence inside MIN and MAX grouping functions; so MIN|MAX(DISTINCT ...)
+   is processed like an ordinary MIN | MAX()
+ */
+	| MIN_SYM '(' DISTINCT in_sum_expr ')'
+	  { $$=new Item_sum_min($4); }
 	| MAX_SYM '(' in_sum_expr ')'
 	  { $$=new Item_sum_max($3); }
+	| MAX_SYM '(' DISTINCT in_sum_expr ')'
+	  { $$=new Item_sum_max($4); }
 	| STD_SYM '(' in_sum_expr ')'
 	  { $$=new Item_sum_std($3); }
 	| VARIANCE_SYM '(' in_sum_expr ')'
 	  { $$=new Item_sum_variance($3); }
 	| SUM_SYM '(' in_sum_expr ')'
 	  { $$=new Item_sum_sum($3); }
+	| SUM_SYM '(' DISTINCT in_sum_expr ')'
+	  { $$=new Item_sum_sum_distinct($4); }
 	| GROUP_CONCAT_SYM '(' opt_distinct
 	  { Select->in_sum_expr++; }
 	  expr_list opt_gorder_clause
@@ -3324,59 +4516,80 @@ when_list2:
 	    sel->when_list.head()->push_back($5);
 	  };
 
+table_ref:
+        table_factor            { $$=$1; }
+        | join_table            { $$=$1; }
+          { 
+	    LEX *lex= Lex;
+            if (!($$= lex->current_select->nest_last_join(lex->thd)))
+              YYABORT;
+          }     	        
+        ;
+
 join_table_list:
-	'(' join_table_list ')'	{ $$=$2; }
-	| join_table		{ $$=$1; }
-	| join_table_list ',' join_table_list { $$=$3; }
-	| join_table_list normal_join join_table_list { $$=$3; }
-	| join_table_list STRAIGHT_JOIN join_table_list
-	  { $$=$3 ; $1->next->straight=1; }
-	| join_table_list normal_join join_table_list ON expr
+        table_ref { $$=$1; }
+        | join_table_list ',' table_ref { $$=$3; }
+        ;
+
+join_table:
+        table_ref normal_join table_ref { $$=$3; }
+	| table_ref STRAIGHT_JOIN table_factor
+	  { $3->straight=1; $$=$3 ; }
+	| table_ref normal_join table_ref ON expr
 	  { add_join_on($3,$5); $$=$3; }
-	| join_table_list normal_join join_table_list
+	| table_ref normal_join table_ref
 	  USING
 	  {
 	    SELECT_LEX *sel= Select;
-	    sel->db1=$1->db; sel->table1=$1->alias;
-	    sel->db2=$3->db; sel->table2=$3->alias;
+            sel->save_names_for_using_list($1, $3);
 	  }
 	  '(' using_list ')'
 	  { add_join_on($3,$7); $$=$3; }
 
-	| join_table_list LEFT opt_outer JOIN_SYM join_table_list ON expr
+	| table_ref LEFT opt_outer JOIN_SYM table_factor ON expr
 	  { add_join_on($5,$7); $5->outer_join|=JOIN_TYPE_LEFT; $$=$5; }
-	| join_table_list LEFT opt_outer JOIN_SYM join_table_list
+	| table_ref LEFT opt_outer JOIN_SYM table_factor
 	  {
 	    SELECT_LEX *sel= Select;
-	    sel->db1=$1->db; sel->table1=$1->alias;
-	    sel->db2=$5->db; sel->table2=$5->alias;
+            sel->save_names_for_using_list($1, $5);
 	  }
 	  USING '(' using_list ')'
 	  { add_join_on($5,$9); $5->outer_join|=JOIN_TYPE_LEFT; $$=$5; }
-	| join_table_list NATURAL LEFT opt_outer JOIN_SYM join_table_list
+	| table_ref NATURAL LEFT opt_outer JOIN_SYM table_factor
 	  {
-	    add_join_natural($1,$1->next);
-	    $1->next->outer_join|=JOIN_TYPE_LEFT;
+	    add_join_natural($1,$6);
+	    $6->outer_join|=JOIN_TYPE_LEFT;
 	    $$=$6;
 	  }
-	| join_table_list RIGHT opt_outer JOIN_SYM join_table_list ON expr
-	  { add_join_on($1,$7); $1->outer_join|=JOIN_TYPE_RIGHT; $$=$5; }
-	| join_table_list RIGHT opt_outer JOIN_SYM join_table_list
+	| table_ref RIGHT opt_outer JOIN_SYM table_factor ON expr
+          { 
+	    LEX *lex= Lex;
+            if (!($$= lex->current_select->convert_right_join()))
+              YYABORT;
+            add_join_on($$, $7);
+          }
+	| table_ref RIGHT opt_outer JOIN_SYM table_factor
 	  {
 	    SELECT_LEX *sel= Select;
-	    sel->db1=$1->db; sel->table1=$1->alias;
-	    sel->db2=$5->db; sel->table2=$5->alias;
+            sel->save_names_for_using_list($1, $5);
 	  }
 	  USING '(' using_list ')'
-	  { add_join_on($1,$9); $1->outer_join|=JOIN_TYPE_RIGHT; $$=$5; }
-	| join_table_list NATURAL RIGHT opt_outer JOIN_SYM join_table_list
+          { 
+	    LEX *lex= Lex;
+            if (!($$= lex->current_select->convert_right_join()))
+              YYABORT;
+            add_join_on($$, $9);
+          }
+	| table_ref NATURAL RIGHT opt_outer JOIN_SYM table_factor
 	  {
-	    add_join_natural($1->next,$1);
-	    $1->outer_join|=JOIN_TYPE_RIGHT;
-	    $$=$6;
+	    add_join_natural($6,$1);
+	    LEX *lex= Lex;
+            if (!($$= lex->current_select->convert_right_join()))
+              YYABORT;
 	  }
-	| join_table_list NATURAL JOIN_SYM join_table_list
-	  { add_join_natural($1,$1->next); $$=$4; };
+	| table_ref NATURAL JOIN_SYM table_factor
+	  { add_join_natural($1,$4); $$=$4; };
+        
 
 normal_join:
 	JOIN_SYM		{}
@@ -3384,7 +4597,7 @@ normal_join:
 	| CROSS JOIN_SYM	{}
 	;
 
-join_table:
+table_factor:
 	{
 	  SELECT_LEX *sel= Select;
 	  sel->use_index_ptr=sel->ignore_index_ptr=0;
@@ -3400,8 +4613,21 @@ join_table:
 					   sel->get_use_index(),
 					   sel->get_ignore_index())))
 	    YYABORT;
+          sel->add_joined_table($$); 
 	}
-	| '{' ident join_table LEFT OUTER JOIN_SYM join_table ON expr '}'
+        | '('
+          { 
+            LEX *lex= Lex;
+            if (lex->current_select->init_nested_join(lex->thd))
+              YYABORT;
+          }            
+          join_table_list ')'
+          {
+            LEX *lex= Lex;
+            if (!($$= lex->current_select->end_nested_join(lex->thd)))
+              YYABORT;
+          }	
+	| '{' ident table_ref LEFT OUTER JOIN_SYM table_ref ON expr '}'
 	  { add_join_on($7,$9); $7->outer_join|=JOIN_TYPE_LEFT; $$=$7; }
         | '(' SELECT_SYM select_derived ')' opt_table_alias
 	{
@@ -3414,6 +4640,7 @@ join_table:
 	                          (List<String> *)0)))
 
 	    YYABORT;
+          lex->current_select->add_joined_table($$);           
 	};
 
 select_derived:
@@ -3510,23 +4737,29 @@ using_list:
 	  };
 
 interval:
-	 DAY_HOUR_SYM		{ $$=INTERVAL_DAY_HOUR; }
+	interval_time_st	{}
+	| DAY_HOUR_SYM		{ $$=INTERVAL_DAY_HOUR; }
 	| DAY_MICROSECOND_SYM	{ $$=INTERVAL_DAY_MICROSECOND; }
 	| DAY_MINUTE_SYM	{ $$=INTERVAL_DAY_MINUTE; }
 	| DAY_SECOND_SYM	{ $$=INTERVAL_DAY_SECOND; }
-	| DAY_SYM		{ $$=INTERVAL_DAY; }
 	| HOUR_MICROSECOND_SYM	{ $$=INTERVAL_HOUR_MICROSECOND; }
 	| HOUR_MINUTE_SYM	{ $$=INTERVAL_HOUR_MINUTE; }
 	| HOUR_SECOND_SYM	{ $$=INTERVAL_HOUR_SECOND; }
-	| HOUR_SYM		{ $$=INTERVAL_HOUR; }
 	| MICROSECOND_SYM	{ $$=INTERVAL_MICROSECOND; }
 	| MINUTE_MICROSECOND_SYM	{ $$=INTERVAL_MINUTE_MICROSECOND; }
 	| MINUTE_SECOND_SYM	{ $$=INTERVAL_MINUTE_SECOND; }
+	| SECOND_MICROSECOND_SYM	{ $$=INTERVAL_SECOND_MICROSECOND; }
+	| YEAR_MONTH_SYM	{ $$=INTERVAL_YEAR_MONTH; };
+
+interval_time_st:
+	DAY_SYM			{ $$=INTERVAL_DAY; }
+	| WEEK_SYM		{ $$=INTERVAL_WEEK; }
+	| HOUR_SYM		{ $$=INTERVAL_HOUR; }
+	| FRAC_SECOND_SYM	{ $$=INTERVAL_MICROSECOND; }
 	| MINUTE_SYM		{ $$=INTERVAL_MINUTE; }
 	| MONTH_SYM		{ $$=INTERVAL_MONTH; }
-	| SECOND_MICROSECOND_SYM	{ $$=INTERVAL_SECOND_MICROSECOND; }
+	| QUARTER_SYM		{ $$=INTERVAL_QUARTER; }
 	| SECOND_SYM		{ $$=INTERVAL_SECOND; }
-	| YEAR_MONTH_SYM	{ $$=INTERVAL_YEAR_MONTH; }
 	| YEAR_SYM		{ $$=INTERVAL_YEAR; };
 
 date_time_type:
@@ -3792,11 +5025,32 @@ select_var_list:
 	   | select_var_ident {}
            ;
 
-select_var_ident:  '@' ident_or_text
+select_var_ident:  
+	   '@' ident_or_text
            {
              LEX *lex=Lex;
-	     if (lex->result && ((select_dumpvar *)lex->result)->var_list.push_back((LEX_STRING*) sql_memdup(&$2,sizeof(LEX_STRING))))
+	     if (lex->result) 
+	       ((select_dumpvar *)lex->result)->var_list.push_back( new my_var($2,0,0,(enum_field_types)0));
+	     else
+	       YYABORT;
+	   }
+           | ident_or_text
+           {
+             LEX *lex=Lex;
+	     sp_pvar_t *t;
+
+	     if (!lex->spcont || !(t=lex->spcont->find_pvar(&$1)))
+	     {
+	       net_printf(YYTHD, ER_SP_UNDECLARED_VAR, $1.str);
+	       YYABORT;
+	     }
+	     if (! lex->result)
 	       YYABORT;
+	     else
+	     {
+	       ((select_dumpvar *)lex->result)->var_list.push_back( new my_var($1,1,t->offset,t->type));
+	       t->isset= TRUE;
+	     }
 	   }
            ;
 
@@ -3877,11 +5131,19 @@ drop:
 	    lex->drop_if_exists=$3;
 	    lex->name=$4.str;
 	 }
-	| DROP UDF_SYM IDENT_sys
+	| DROP FUNCTION_SYM if_exists sp_name opt_restrict
 	  {
 	    LEX *lex=Lex;
 	    lex->sql_command = SQLCOM_DROP_FUNCTION;
-	    lex->udf.name = $3;
+	    lex->drop_if_exists= $3;
+	    lex->spname= $4;
+	  }
+	| DROP PROCEDURE if_exists sp_name opt_restrict
+	  {
+	    LEX *lex=Lex;
+	    lex->sql_command = SQLCOM_DROP_PROCEDURE;
+	    lex->drop_if_exists= $3;
+	    lex->spname= $4;
 	  }
 	| DROP USER
 	  {
@@ -3893,7 +5155,6 @@ drop:
 	  {}
 	  ;
 
-
 table_list:
 	table_name
 	| table_list ',' table_name;
@@ -3952,7 +5213,6 @@ replace:
 	}
 	insert_field_spec
 	{}
-	{}
 	;
 
 insert_lock_option:
@@ -4018,7 +5278,7 @@ ident_eq_list:
 	ident_eq_value;
 
 ident_eq_value:
-	simple_ident equal expr_or_default
+	simple_ident_nospvar equal expr_or_default
 	 {
 	  LEX *lex=Lex;
 	  if (lex->field_list.push_back($1) ||
@@ -4113,12 +5373,12 @@ update:
 	;
 
 update_list:
-	update_list ',' simple_ident equal expr_or_default
+	update_list ',' simple_ident_nospvar equal expr_or_default
 	{
 	  if (add_item_to_list(YYTHD, $3) || add_value_to_list(YYTHD, $5))
 	    YYABORT;
 	}
-	| simple_ident equal expr_or_default
+	| simple_ident_nospvar equal expr_or_default
 	  {
 	    if (add_item_to_list(YYTHD, $1) || add_value_to_list(YYTHD, $3))
 	      YYABORT;
@@ -4382,7 +5642,29 @@ show_param:
         | SLAVE STATUS_SYM
           {
 	    Lex->sql_command = SQLCOM_SHOW_SLAVE_STAT;
-          };
+          }
+	| CREATE PROCEDURE sp_name
+	  {
+	    LEX *lex= Lex;
+
+	    lex->sql_command = SQLCOM_SHOW_CREATE_PROC;
+	    lex->spname= $3;
+	  }
+	| CREATE FUNCTION_SYM sp_name
+	  {
+	    LEX *lex= Lex;
+
+	    lex->sql_command = SQLCOM_SHOW_CREATE_FUNC;
+	    lex->spname= $3;
+	  }
+	| PROCEDURE STATUS_SYM wild
+	  {
+	    Lex->sql_command = SQLCOM_SHOW_STATUS_PROC;
+	  }
+	| FUNCTION_SYM STATUS_SYM wild
+	  {
+	    Lex->sql_command = SQLCOM_SHOW_STATUS_FUNC;
+	  };
 
 show_engine_param:
 	STATUS_SYM
@@ -4570,18 +5852,23 @@ purge_option:
 /* kill threads */
 
 kill:
-	KILL_SYM expr
+	KILL_SYM kill_option expr
 	{
 	  LEX *lex=Lex;
-	  if ($2->fix_fields(lex->thd, 0, &$2) || $2->check_cols(1))
+	  if ($3->fix_fields(lex->thd, 0, &$3) || $3->check_cols(1))
 	  {
 	    send_error(lex->thd, ER_SET_CONSTANTS_ONLY);
 	    YYABORT;
 	  }
           lex->sql_command=SQLCOM_KILL;
-	  lex->thread_id= (ulong) $2->val_int();
+	  lex->thread_id= (ulong) $3->val_int();
 	};
 
+kill_option:
+	/* empty */	 { Lex->type= 0; }
+	| CONNECTION_SYM { Lex->type= 0; }
+	| QUERY_SYM      { Lex->type= ONLY_KILL_QUERY; };
+
 /* change database */
 
 use:	USE_SYM ident
@@ -4777,7 +6064,7 @@ NUM_literal:
 **********************************************************************/
 
 insert_ident:
-	simple_ident	 { $$=$1; }
+	simple_ident_nospvar { $$=$1; }
 	| table_wild	 { $$=$1; };
 
 table_wild:
@@ -4801,13 +6088,46 @@ order_ident:
 simple_ident:
 	ident
 	{
+	  sp_pvar_t *spv;
+	  LEX *lex = Lex;
+          sp_pcontext *spc = lex->spcont;
+
+	  if (spc && (spv = spc->find_pvar(&$1)))
+	  { /* We're compiling a stored procedure and found a variable */
+	    if (lex->sql_command != SQLCOM_CALL && ! spv->isset)
+	    {
+	      push_warning_printf(YYTHD, MYSQL_ERROR::WARN_LEVEL_WARN,
+	                          ER_SP_UNINIT_VAR, ER(ER_SP_UNINIT_VAR),
+				  $1.str);
+	    }
+	    $$ = (Item*) new Item_splocal($1, spv->offset);
+	  }
+	  else
+	  {
+	    SELECT_LEX *sel=Select;
+	    $$= (sel->parsing_place != SELECT_LEX_NODE::IN_HAVING ||
+	         sel->get_in_sum_expr() > 0) ?
+                 (Item*) new Item_field(NullS,NullS,$1.str) :
+	         (Item*) new Item_ref(0,0, NullS,NullS,$1.str);
+	  }
+        }
+        | simple_ident_q { $$= $1; }
+	;
+
+simple_ident_nospvar:
+	ident
+	{
 	  SELECT_LEX *sel=Select;
 	  $$= (sel->parsing_place != SELECT_LEX_NODE::IN_HAVING ||
 	       sel->get_in_sum_expr() > 0) ?
               (Item*) new Item_field(NullS,NullS,$1.str) :
-	      (Item*) new Item_ref(0,0, NullS,NullS,$1.str);
+	      (Item*) new Item_ref(0,0,NullS,NullS,$1.str);
 	}
-	| ident '.' ident
+	| simple_ident_q { $$= $1; }
+	;	
+
+simple_ident_q:
+	ident '.' ident
 	{
 	  THD *thd= YYTHD;
 	  LEX *lex= thd->lex;
@@ -5006,6 +6326,7 @@ keyword:
 	| DATE_SYM		{}
 	| DAY_SYM		{}
         | DEALLOCATE_SYM        {}
+	| DEFINER_SYM		{}
 	| DELAY_KEY_WRITE_SYM	{}
 	| DES_KEY_FILE		{}
 	| DIRECTORY_SYM		{}
@@ -5043,6 +6364,7 @@ keyword:
 	| HOSTS_SYM		{}
 	| HOUR_SYM		{}
 	| IDENTIFIED_SYM	{}
+	| INVOKER_SYM		{}
 	| IMPORT		{}
 	| INDEXES		{}
 	| ISOLATION		{}
@@ -5050,6 +6372,7 @@ keyword:
 	| INNOBASE_SYM		{}
 	| INSERT_METHOD		{}
 	| RELAY_THREAD		{}
+	| LANGUAGE_SYM          {}
 	| LAST_SYM		{}
 	| LEAVES                {}
 	| LEVEL_SYM		{}
@@ -5086,6 +6409,7 @@ keyword:
 	| MULTILINESTRING	{}
 	| MULTIPOINT		{}
 	| MULTIPOLYGON		{}
+	| NAME_SYM              {}
 	| NAMES_SYM		{}
 	| NATIONAL_SYM		{}
 	| NCHAR_SYM		{}
@@ -5108,6 +6432,7 @@ keyword:
 	| PREV_SYM		{}
 	| PROCESS		{}
 	| PROCESSLIST_SYM	{}
+	| QUARTER_SYM		{}
 	| QUERY_SYM		{}
 	| QUICK			{}
 	| RAID_0_SYM		{}
@@ -5124,6 +6449,7 @@ keyword:
 	| RESET_SYM		{}
 	| RESOURCES		{}
 	| RESTORE_SYM		{}
+	| RETURNS_SYM           {}
 	| ROLLBACK_SYM		{}
 	| ROLLUP_SYM		{}
 	| ROWS_SYM		{}
@@ -5132,6 +6458,7 @@ keyword:
 	| RTREE_SYM		{}
 	| SAVEPOINT_SYM		{}
 	| SECOND_SYM		{}
+	| SECURITY_SYM		{}
 	| SERIAL_SYM		{}
 	| SERIALIZABLE_SYM	{}
 	| SESSION_SYM		{}
@@ -5159,10 +6486,12 @@ keyword:
 	| TRANSACTION_SYM	{}
 	| TRUNCATE_SYM		{}
 	| TIMESTAMP		{}
+	| TIMESTAMP_ADD		{}
+	| TIMESTAMP_DIFF	{}
 	| TIME_SYM		{}
 	| TYPE_SYM		{}
 	| TYPES_SYM		{}
-	| UDF_SYM		{}
+	| FUNCTION_SYM		{}
 	| UNCOMMITTED_SYM	{}
 	| UNICODE_SYM		{}
 	| UNTIL_SYM		{}
@@ -5171,6 +6500,7 @@ keyword:
 	| VARIABLES		{}
 	| VALUE_SYM		{}
 	| WARNINGS		{}
+	| WEEK_SYM		{}
 	| WORK_SYM		{}
 	| X509_SYM		{}
 	| YEAR_SYM		{}
@@ -5222,15 +6552,43 @@ opt_var_ident_type:
 	;
 
 option_value:
-	'@' ident_or_text equal expr
-	{
-	  Lex->var_list.push_back(new set_var_user(new Item_func_set_user_var($2,$4)));
-	}
+	  '@' ident_or_text equal expr
+	  {
+	    Lex->var_list.push_back(new set_var_user(new Item_func_set_user_var($2,$4)));
+	  }
 	| internal_variable_name equal set_expr_or_default
 	  {
 	    LEX *lex=Lex;
-	    lex->var_list.push_back(new set_var(lex->option_type, $1.var,
-						&$1.base_name, $3));
+
+	    if ($1.var)
+	    { /* System variable */
+	      lex->var_list.push_back(new set_var(lex->option_type, $1.var,
+						  &$1.base_name, $3));
+	    }
+            else
+	    { /* An SP local variable */
+	      sp_pvar_t *spv;
+              sp_instr_set *i;
+	      Item *it;
+
+	      if ($3 && $3->type() == Item::SUBSELECT_ITEM)
+	      {  /* QQ For now, just disallow subselects as values */
+	        send_error(lex->thd, ER_SP_SUBSELECT_NYI);
+	        YYABORT;
+	      }
+	      spv= lex->spcont->find_pvar(&$1.base_name);
+
+	      if ($3)
+	        it= $3;
+	      else if (spv->dflt)
+	        it= spv->dflt;
+	      else
+	        it= new Item_null();
+              i= new sp_instr_set(lex->sphead->instructions(),
+	                          spv->offset, it, spv->type);
+	      lex->sphead->add_instr(i);
+	      spv->isset= TRUE;
+	    }
 	  }
 	| '@' '@' opt_var_ident_type internal_variable_name equal set_expr_or_default
 	  {
@@ -5288,12 +6646,25 @@ option_value:
 internal_variable_name:
 	ident
 	{
-	  sys_var *tmp=find_sys_var($1.str, $1.length);
-	  if (!tmp)
-	    YYABORT;
-	  $$.var= tmp;
-	  $$.base_name.str=0;
-	  $$.base_name.length=0;
+	  LEX *lex= Lex;
+          sp_pcontext *spc= lex->spcont;
+	  sp_pvar_t *spv;
+
+	  /* We have to lookup here since local vars can shadow sysvars */
+	  if (!spc || !(spv = spc->find_pvar(&$1)))
+	  { /* Not an SP local variable */
+	    sys_var *tmp=find_sys_var($1.str, $1.length);
+	    if (!tmp)
+	      YYABORT;
+	    $$.var= tmp;
+	    $$.base_name.str=0;
+	    $$.base_name.length=0;
+	  }
+	  else
+	  { /* An SP local variable */
+	    $$.var= NULL;
+	    $$.base_name= $1;
+	  }
 	}
 	| ident '.' ident
 	  {
diff --git a/sql/structs.h b/sql/structs.h
index c30d85f59cb..9f13ef54ce0 100644
--- a/sql/structs.h
+++ b/sql/structs.h
@@ -158,8 +158,8 @@ typedef struct st_known_date_time_format {
 enum SHOW_TYPE
 {
   SHOW_UNDEF,
-  SHOW_LONG, SHOW_LONGLONG, SHOW_INT, SHOW_CHAR, SHOW_CHAR_PTR, SHOW_BOOL,
-  SHOW_MY_BOOL, SHOW_OPENTABLES, SHOW_STARTTIME, SHOW_QUESTION,
+  SHOW_LONG, SHOW_LONGLONG, SHOW_INT, SHOW_CHAR, SHOW_CHAR_PTR, SHOW_DOUBLE,
+  SHOW_BOOL, SHOW_MY_BOOL, SHOW_OPENTABLES, SHOW_STARTTIME, SHOW_QUESTION,
   SHOW_LONG_CONST, SHOW_INT_CONST, SHOW_HAVE, SHOW_SYS, SHOW_HA_ROWS,
 #ifdef HAVE_OPENSSL
   SHOW_SSL_CTX_SESS_ACCEPT, 	SHOW_SSL_CTX_SESS_ACCEPT_GOOD,
diff --git a/sql/table.h b/sql/table.h
index f111377bc85..eaad8bedf90 100644
--- a/sql/table.h
+++ b/sql/table.h
@@ -27,6 +27,7 @@ typedef struct st_order {
   struct st_order *next;
   Item	 **item;			/* Point at item in select fields */
   Item	 *item_ptr;			/* Storage for initial item */
+  Item   **item_copy;			/* For SPs; the original item ptr */
   bool	 asc;				/* true if ascending */
   bool	 free_me;			/* true if item isn't shared  */
   bool	 in_field_list;			/* true if in select field list */
@@ -157,12 +158,8 @@ struct st_table {
   key_part_map  const_key_parts[MAX_KEY];
   ulong		query_id;
   uchar		frm_version;
-
-  union					/* Temporary variables */
-  {
-    uint        temp_pool_slot;		/* Used by intern temp tables */
-    struct st_table_list *pos_in_table_list;
-  };
+  uint          temp_pool_slot;		/* Used by intern temp tables */
+  struct st_table_list *pos_in_table_list;/* Element referring to this table */
   /* number of select if it is derived table */
   uint          derived_select_number;
   THD		*in_use;		/* Which thread uses this */
@@ -192,13 +189,27 @@ typedef struct st_table_list
   uint32        db_length, real_name_length;
   bool		straight;		/* optimize with prev table */
   bool          updating;               /* for replicate-do/ignore table */
-  bool		force_index;		/* Prefer index over table scan */
-  bool          ignore_leaves;          /* Preload only non-leaf nodes */
+  bool		force_index;		/* prefer index over table scan */
+  bool          ignore_leaves;          /* preload only non-leaf nodes */
+  table_map     dep_tables;             /* tables the table depends on      */
+  table_map     on_expr_dep_tables;     /* tables on expression depends on  */ 
+  struct st_nested_join *nested_join;   /* if the element is a nested join  */
+  st_table_list *embedding;             /* nested join containing the table */
+  List<struct st_table_list> *join_list;/* join list the table belongs to   */ 
   bool		cacheable_table;	/* stop PS caching */
   /* used in multi-upd privelege check */
   bool		table_in_update_from_clause;
 } TABLE_LIST;
 
+typedef struct st_nested_join
+{
+  List<TABLE_LIST>  join_list;       /* list of elements in the nested join */
+  table_map         used_tables;     /* bitmap of tables in the nested join */
+  table_map         not_null_tables; /* tables that rejects nulls           */ 
+  struct st_join_table *first_nested;/* the first nested table in the plan  */
+  uint              counter;         /* to count tables in the nested join  */
+} NESTED_JOIN;
+  
 typedef struct st_changed_table_list
 {
   struct	st_changed_table_list *next;
@@ -206,8 +217,7 @@ typedef struct st_changed_table_list
   uint32        key_length;
 } CHANGED_TABLE_LIST;
 
-typedef struct st_open_table_list
-{
+typedef struct st_open_table_list{
   struct st_open_table_list *next;
   char	*db,*table;
   uint32 in_use,locked;
diff --git a/sql/udf_example.cc b/sql/udf_example.cc
index 7e2ee9113b2..f0f33ed6fd7 100644
--- a/sql/udf_example.cc
+++ b/sql/udf_example.cc
@@ -56,7 +56,9 @@
 **
 ** Function 'myfunc_int' returns summary length of all its arguments.
 **
-** Function 'sequence' returns an sequence starting from a certain number
+** Function 'sequence' returns an sequence starting from a certain number.
+**
+** Function 'myfunc_argument_name' returns name of argument.
 **
 ** On the end is a couple of functions that converts hostnames to ip and
 ** vice versa.
@@ -82,6 +84,7 @@
 ** CREATE FUNCTION lookup RETURNS STRING SONAME "udf_example.so";
 ** CREATE FUNCTION reverse_lookup RETURNS STRING SONAME "udf_example.so";
 ** CREATE AGGREGATE FUNCTION avgcost RETURNS REAL SONAME "udf_example.so";
+** CREATE FUNCTION myfunc_argument_name RETURNS STRING SONAME "udf_example.so";
 **
 ** After this the functions will work exactly like native MySQL functions.
 ** Functions should be created only once.
@@ -94,6 +97,7 @@
 ** DROP FUNCTION lookup;
 ** DROP FUNCTION reverse_lookup;
 ** DROP FUNCTION avgcost;
+** DROP FUNCTION myfunc_argument_name;
 **
 ** The CREATE FUNCTION and DROP FUNCTION update the func@mysql table. All
 ** Active function will be reloaded on every restart of server
@@ -984,4 +988,43 @@ avgcost( UDF_INIT* initid, UDF_ARGS* args, char* is_null, char* error )
   return data->totalprice/double(data->totalquantity);
 }
 
+extern "C" {
+my_bool myfunc_argument_name_init(UDF_INIT *initid, UDF_ARGS *args,
+				  char *message);
+char *myfunc_argument_name(UDF_INIT *initid, UDF_ARGS *args, char *result,
+			   unsigned long *length, char *null_value,
+			   char *error);
+}
+
+my_bool myfunc_argument_name_init(UDF_INIT *initid, UDF_ARGS *args,
+				  char *message)
+{
+  if (args->arg_count != 1)
+  {
+    strmov(message,"myfunc_argument_name_init accepts only one argument");
+    return 1;
+  }
+  initid->max_length= args->attribute_lengths[0];
+  initid->maybe_null= 1;
+  initid->const_item= 1;
+  return 0;
+}
+
+char *myfunc_argument_name(UDF_INIT *initid, UDF_ARGS *args, char *result,
+			   unsigned long *length, char *null_value,
+			   char *error)
+{
+  if (!args->attributes[0])
+  {
+    null_value= 0;
+    return 0;
+  }
+  (*length)--; // space for ending \0 (for debugging purposes)
+  if (*length > args->attribute_lengths[0])
+    *length= args->attribute_lengths[0];
+  memcpy(result, args->attributes[0], *length);
+  result[*length]= 0;
+  return result;
+}
+
 #endif /* HAVE_DLOPEN */
diff --git a/sql/uniques.cc b/sql/uniques.cc
index d060965aa66..b08727705e4 100644
--- a/sql/uniques.cc
+++ b/sql/uniques.cc
@@ -63,12 +63,255 @@ Unique::Unique(qsort_cmp2 comp_func, void * comp_func_fixed_arg,
 	    comp_func_fixed_arg);
   /* If the following fail's the next add will also fail */
   my_init_dynamic_array(&file_ptrs, sizeof(BUFFPEK), 16, 16);
+  /* 
+    If you change the following, change it in get_max_elements function, too.
+  */
   max_elements= max_in_memory_size / ALIGN_SIZE(sizeof(TREE_ELEMENT)+size);
   open_cached_file(&file, mysql_tmpdir,TEMP_PREFIX, DISK_BUFFER_SIZE,
 		   MYF(MY_WME));
 }
 
 
+/*
+  Calculate log2(n!)
+  
+  NOTES
+    Stirling's approximate formula is used:
+          
+      n! ~= sqrt(2*M_PI*n) * (n/M_E)^n 
+   
+    Derivation of formula used for calculations is as follows:
+
+    log2(n!) = log(n!)/log(2) = log(sqrt(2*M_PI*n)*(n/M_E)^n) / log(2) =
+    
+      = (log(2*M_PI*n)/2 + n*log(n/M_E)) / log(2).
+*/
+
+inline double log2_n_fact(double x)
+{
+  return (log(2*M_PI*x)/2 + x*log(x/M_E)) / M_LN2;
+}
+
+
+/*
+  Calculate cost of merge_buffers function call for given sequence of 
+  input stream lengths and store the number of rows in result stream in *last.
+
+  SYNOPSIS
+    get_merge_buffers_cost()
+      buff_elems  Array of #s of elements in buffers
+      elem_size   Size of element stored in buffer
+      first       Pointer to first merged element size
+      last        Pointer to last merged element size
+  
+  RETURN
+    Cost of merge_buffers operation in disk seeks.
+  
+  NOTES
+    It is assumed that no rows are eliminated during merge.
+    The cost is calculated as 
+    
+      cost(read_and_write) + cost(merge_comparisons).
+      
+    All bytes in the sequences is read and written back during merge so cost 
+    of disk io is 2*elem_size*total_buf_elems/IO_SIZE (2 is for read + write)
+     
+    For comparisons cost calculations we assume that all merged sequences have
+    the same length, so each of total_buf_size elements will be added to a sort 
+    heap with (n_buffers-1) elements. This gives the comparison cost:
+
+      total_buf_elems* log2(n_buffers) / TIME_FOR_COMPARE_ROWID;
+*/
+
+static double get_merge_buffers_cost(uint *buff_elems, uint elem_size,
+                                     uint *first, uint *last)
+{  
+  uint total_buf_elems= 0;
+  for (uint *pbuf= first; pbuf <= last; pbuf++)
+    total_buf_elems+= *pbuf;
+  *last= total_buf_elems;
+  
+  int n_buffers= last - first + 1;
+
+  /* Using log2(n)=log(n)/log(2) formula */
+  return 2*((double)total_buf_elems*elem_size) / IO_SIZE + 
+     total_buf_elems*log((double) n_buffers) / (TIME_FOR_COMPARE_ROWID * M_LN2);
+}
+
+
+/*
+  Calculate cost of merging buffers into one in Unique::get, i.e. calculate
+  how long (in terms of disk seeks) the two calls
+    merge_many_buffs(...); 
+    merge_buffers(...); 
+  will take.
+
+  SYNOPSIS
+    get_merge_many_buffs_cost()
+      buffer        buffer space for temporary data, at least 
+                    Unique::get_cost_calc_buff_size bytes
+      maxbuffer     # of full buffers
+      max_n_elems   # of elements in first maxbuffer buffers
+      last_n_elems  # of elements in last buffer
+      elem_size     size of buffer element
+
+  NOTES
+    maxbuffer+1 buffers are merged, where first maxbuffer buffers contain 
+    max_n_elems elements each and last buffer contains last_n_elems elements.
+
+    The current implementation does a dumb simulation of merge_many_buffs
+    function actions.
+  
+  RETURN
+    Cost of merge in disk seeks.
+*/
+
+static double get_merge_many_buffs_cost(uint *buffer,
+                                        uint maxbuffer, uint max_n_elems,
+                                        uint last_n_elems, int elem_size)
+{
+  register int i;
+  double total_cost= 0.0;
+  uint *buff_elems= buffer; /* #s of elements in each of merged sequences */
+   
+  /* 
+    Set initial state: first maxbuffer sequences contain max_n_elems elements
+    each, last sequence contains last_n_elems elements.
+  */
+  for(i = 0; i < (int)maxbuffer; i++)
+    buff_elems[i]= max_n_elems;  
+  buff_elems[maxbuffer]= last_n_elems;
+
+  /* 
+    Do it exactly as merge_many_buff function does, calling 
+    get_merge_buffers_cost to get cost of merge_buffers.
+  */
+  if (maxbuffer >= MERGEBUFF2)
+  {
+    while (maxbuffer >= MERGEBUFF2)
+    {
+      uint lastbuff= 0;
+      for (i = 0; i <= (int) maxbuffer - MERGEBUFF*3/2; i += MERGEBUFF)
+      {
+        total_cost+=get_merge_buffers_cost(buff_elems, elem_size,
+                                           buff_elems + i, 
+                                           buff_elems + i + MERGEBUFF-1);
+	lastbuff++;
+      }
+      total_cost+=get_merge_buffers_cost(buff_elems, elem_size,
+                                         buff_elems + i, 
+                                         buff_elems + maxbuffer);
+      maxbuffer= lastbuff;
+    }
+  }
+  
+  /* Simulate final merge_buff call. */
+  total_cost += get_merge_buffers_cost(buff_elems, elem_size,
+                                       buff_elems, buff_elems + maxbuffer);
+  return total_cost;
+}
+
+
+/*
+  Calculate cost of using Unique for processing nkeys elements of size 
+  key_size using max_in_memory_size memory.
+
+  SYNOPSIS
+    Unique::get_use_cost()
+      buffer    space for temporary data, use Unique::get_cost_calc_buff_size
+                to get # bytes needed.
+      nkeys     #of elements in Unique
+      key_size  size of each elements in bytes
+      max_in_memory_size amount of memory Unique will be allowed to use
+  
+  RETURN
+    Cost in disk seeks.
+  
+  NOTES
+    cost(using_unqiue) = 
+      cost(create_trees) +  (see #1)
+      cost(merge) +         (see #2)
+      cost(read_result)     (see #3)
+
+    1. Cost of trees creation
+      For each Unique::put operation there will be 2*log2(n+1) elements
+      comparisons, where n runs from 1 tree_size (we assume that all added
+      elements are different). Together this gives:
+    
+      n_compares = 2*(log2(2) + log2(3) + ... + log2(N+1)) = 2*log2((N+1)!)
+  
+      then cost(tree_creation) = n_compares*ROWID_COMPARE_COST;
+
+      Total cost of creating trees:
+      (n_trees - 1)*max_size_tree_cost + non_max_size_tree_cost.
+
+      Approximate value of log2(N!) is calculated by log2_n_fact function.
+    
+    2. Cost of merging.
+      If only one tree is created by Unique no merging will be necessary.
+      Otherwise, we model execution of merge_many_buff function and count
+      #of merges. (The reason behind this is that number of buffers is small, 
+      while size of buffers is big and we don't want to loose precision with 
+      O(x)-style formula)
+  
+    3. If only one tree is created by Unique no disk io will happen.
+      Otherwise, ceil(key_len*n_keys) disk seeks are necessary. We assume 
+      these will be random seeks.
+*/
+
+double Unique::get_use_cost(uint *buffer, uint nkeys, uint key_size, 
+                            ulong max_in_memory_size)
+{
+  ulong max_elements_in_tree;
+  ulong last_tree_elems;
+  int   n_full_trees; /* number of trees in unique - 1 */
+  double result;
+  
+  max_elements_in_tree= 
+    max_in_memory_size / ALIGN_SIZE(sizeof(TREE_ELEMENT)+key_size);
+
+  n_full_trees=    nkeys / max_elements_in_tree;
+  last_tree_elems= nkeys % max_elements_in_tree;
+  
+  /* Calculate cost of creating trees */
+  result= 2*log2_n_fact(last_tree_elems + 1.0);
+  if (n_full_trees)
+    result+= n_full_trees * log2_n_fact(max_elements_in_tree + 1.0);
+  result /= TIME_FOR_COMPARE_ROWID;
+
+  DBUG_PRINT("info",("unique trees sizes: %u=%u*%lu + %lu", nkeys,
+                     n_full_trees, n_full_trees?max_elements_in_tree:0,
+                     last_tree_elems));
+
+  if (!n_full_trees)
+    return result;
+  
+  /* 
+    There is more then one tree and merging is necessary.
+    First, add cost of writing all trees to disk, assuming that all disk
+    writes are sequential.
+  */
+  result += DISK_SEEK_BASE_COST * n_full_trees * 
+              ceil(((double) key_size)*max_elements_in_tree / IO_SIZE);
+  result += DISK_SEEK_BASE_COST * ceil(((double) key_size)*last_tree_elems / IO_SIZE);
+
+  /* Cost of merge */
+  double merge_cost= get_merge_many_buffs_cost(buffer, n_full_trees,
+                                               max_elements_in_tree,
+                                               last_tree_elems, key_size);
+  if (merge_cost < 0.0)
+    return merge_cost;
+
+  result += merge_cost;
+  /* 
+    Add cost of reading the resulting sequence, assuming there were no 
+    duplicate elements.
+  */
+  result += ceil((double)key_size*nkeys/IO_SIZE);
+
+  return result;
+}
+
 Unique::~Unique()
 {
   close_cached_file(&file);
@@ -84,6 +327,7 @@ bool Unique::flush()
   elements+= tree.elements_in_tree;
   file_ptr.count=tree.elements_in_tree;
   file_ptr.file_pos=my_b_tell(&file);
+
   if (tree_walk(&tree, (tree_walk_action) unique_write_to_file,
 		(void*) this, left_root_right) ||
       insert_dynamic(&file_ptrs, (gptr) &file_ptr))
@@ -94,6 +338,237 @@ bool Unique::flush()
 
 
 /*
+  Clear the tree and the file.
+  You must call reset() if you want to reuse Unique after walk().
+*/
+
+void
+Unique::reset()
+{
+  reset_tree(&tree);
+  /*
+    If elements != 0, some trees were stored in the file (see how
+    flush() works). Note, that we can not count on my_b_tell(&file) == 0
+    here, because it can return 0 right after walk(), and walk() does not
+    reset any Unique member.
+  */
+  if (elements)
+  {
+    reset_dynamic(&file_ptrs);
+    reinit_io_cache(&file, WRITE_CACHE, 0L, 0, 1);
+  }
+  elements= 0;
+}
+        
+/*
+  The comparison function, passed to queue_init() in merge_walk() must
+  use comparison function of Uniques::tree, but compare members of struct
+  BUFFPEK.
+*/
+
+struct BUFFPEK_COMPARE_CONTEXT
+{
+  qsort_cmp2 key_compare;
+  void *key_compare_arg;
+};
+
+C_MODE_START
+
+static int buffpek_compare(void *arg, byte *key_ptr1, byte *key_ptr2)
+{
+  BUFFPEK_COMPARE_CONTEXT *ctx= (BUFFPEK_COMPARE_CONTEXT *) arg;
+  return ctx->key_compare(ctx->key_compare_arg,
+                          *((byte **) key_ptr1), *((byte **)key_ptr2));
+}
+
+C_MODE_END
+
+
+/*
+  DESCRIPTION
+    Function is very similar to merge_buffers, but instead of writing sorted 
+    unique keys to the output file, it invokes walk_action for each key.
+    This saves I/O if you need to pass through all unique keys only once.
+  SYNOPSIS
+    merge_walk()
+  All params are 'IN' (but see comment for begin, end):
+    merge_buffer       buffer to perform cached piece-by-piece loading
+                       of trees; initially the buffer is empty
+    merge_buffer_size  size of merge_buffer. Must be aligned with
+                       key_length
+    key_length         size of tree element; key_length * (end - begin)
+                       must be less or equal than merge_buffer_size.
+    begin              pointer to BUFFPEK struct for the first tree.
+    end                pointer to BUFFPEK struct for the last tree;
+                       end > begin and [begin, end) form a consecutive
+                       range. BUFFPEKs structs in that range are used and
+                       overwritten in merge_walk().
+    walk_action        element visitor. Action is called for each unique
+                       key.
+    walk_action_arg    argument to walk action. Passed to it on each call.
+    compare            elements comparison function
+    compare_arg        comparison function argument
+    file               file with all trees dumped. Trees in the file
+                       must contain sorted unique values. Cache must be
+                       initialized in read mode.
+  RETURN VALUE
+    0     ok
+    <> 0  error
+*/
+
+static bool merge_walk(uchar *merge_buffer, uint merge_buffer_size,
+                       uint key_length, BUFFPEK *begin, BUFFPEK *end,
+                       tree_walk_action walk_action, void *walk_action_arg,
+                       qsort_cmp2 compare, void *compare_arg,
+                       IO_CACHE *file)
+{
+  BUFFPEK_COMPARE_CONTEXT compare_context = { compare, compare_arg };
+  QUEUE queue;
+  if (end <= begin ||
+      merge_buffer_size < key_length * (end - begin + 1) ||
+      init_queue(&queue, end - begin, offsetof(BUFFPEK, key), 0,
+                 buffpek_compare, &compare_context))
+    return 1;
+  /* we need space for one key when a piece of merge buffer is re-read */
+  merge_buffer_size-= key_length;
+  uchar *save_key_buff= merge_buffer + merge_buffer_size;
+  uint max_key_count_per_piece= merge_buffer_size/(end-begin)/key_length;
+  /* if piece_size is aligned reuse_freed_buffer will always hit */
+  uint piece_size= max_key_count_per_piece * key_length;
+  uint bytes_read;               /* to hold return value of read_to_buffer */
+  BUFFPEK *top;
+  int res= 1;
+  /*
+    Invariant: queue must contain top element from each tree, until a tree
+    is not completely walked through.
+    Here we're forcing the invariant, inserting one element from each tree
+    to the queue.
+  */
+  for (top= begin; top != end; ++top)
+  {
+    top->base= merge_buffer + (top - begin) * piece_size;
+    top->max_keys= max_key_count_per_piece;
+    bytes_read= read_to_buffer(file, top, key_length);
+    if (bytes_read == (uint) (-1))
+      goto end;
+    DBUG_ASSERT(bytes_read);
+    queue_insert(&queue, (byte *) top);
+  }
+  top= (BUFFPEK *) queue_top(&queue);
+  while (queue.elements > 1)
+  {
+    /*
+      Every iteration one element is removed from the queue, and one is
+      inserted by the rules of the invariant. If two adjacent elements on
+      the top of the queue are not equal, biggest one is unique, because all
+      elements in each tree are unique. Action is applied only to unique
+      elements.
+    */
+    void *old_key= top->key;
+    /*
+      read next key from the cache or from the file and push it to the
+      queue; this gives new top.
+    */
+    top->key+= key_length;
+    if (--top->mem_count)
+      queue_replaced(&queue);
+    else /* next piece should be read */
+    {
+      /* save old_key not to overwrite it in read_to_buffer */
+      memcpy(save_key_buff, old_key, key_length);
+      old_key= save_key_buff;
+      bytes_read= read_to_buffer(file, top, key_length);
+      if (bytes_read == (uint) (-1))
+        goto end;
+      else if (bytes_read > 0)      /* top->key, top->mem_count are reset */
+        queue_replaced(&queue);     /* in read_to_buffer */
+      else
+      {
+        /*
+          Tree for old 'top' element is empty: remove it from the queue and
+          give all its memory to the nearest tree.
+        */
+        queue_remove(&queue, 0);
+        reuse_freed_buff(&queue, top, key_length);
+      }
+    }
+    top= (BUFFPEK *) queue_top(&queue);
+    /* new top has been obtained; if old top is unique, apply the action */
+    if (compare(compare_arg, old_key, top->key))
+    {
+      if (walk_action(old_key, 1, walk_action_arg))
+        goto end;
+    }
+  }
+  /*
+    Applying walk_action to the tail of the last tree: this is safe because
+    either we had only one tree in the beginning, either we work with the
+    last tree in the queue.
+  */
+  do
+  {
+    do
+    {
+      if (walk_action(top->key, 1, walk_action_arg))
+        goto end;
+      top->key+= key_length;
+    }
+    while (--top->mem_count);
+    bytes_read= read_to_buffer(file, top, key_length);
+    if (bytes_read == (uint) (-1))
+      goto end;
+  }
+  while (bytes_read);
+  res= 0;
+end:
+  delete_queue(&queue);
+  return res;
+}
+
+
+/*
+  DESCRIPTION
+    Walks consecutively through all unique elements:
+    if all elements are in memory, then it simply invokes 'tree_walk', else
+    all flushed trees are loaded to memory piece-by-piece, pieces are
+    sorted, and action is called for each unique value.
+    Note: so as merging resets file_ptrs state, this method can change
+    internal Unique state to undefined: if you want to reuse Unique after
+    walk() you must call reset() first!
+  SYNOPSIS
+    Unique:walk()
+  All params are 'IN':
+    action  function-visitor, typed in include/my_tree.h
+            function is called for each unique element
+    arg     argument for visitor, which is passed to it on each call
+  RETURN VALUE
+    0    OK
+    <> 0 error
+ */
+
+bool Unique::walk(tree_walk_action action, void *walk_action_arg)
+{
+  if (elements == 0)                       /* the whole tree is in memory */
+    return tree_walk(&tree, action, walk_action_arg, left_root_right);
+
+  /* flush current tree to the file to have some memory for merge buffer */
+  if (flush())
+    return 1;
+  if (flush_io_cache(&file) || reinit_io_cache(&file, READ_CACHE, 0L, 0, 0))
+    return 1;
+  uchar *merge_buffer= (uchar *) my_malloc(max_in_memory_size, MYF(0));
+  if (merge_buffer == 0)
+    return 1;
+  int res= merge_walk(merge_buffer, max_in_memory_size, size,
+                      (BUFFPEK *) file_ptrs.buffer,
+                      (BUFFPEK *) file_ptrs.buffer + file_ptrs.elements,
+                      action, walk_action_arg,
+                      tree.compare, tree.custom_arg, &file);
+  x_free(merge_buffer);
+  return res;
+}
+
+/*
   Modify the TABLE element so that when one calls init_records()
   the rows will be read in priority order.
 */
@@ -114,7 +589,7 @@ bool Unique::get(TABLE *table)
       return 0;
     }
   }
-  /* Not enough memory; Save the result to file */
+  /* Not enough memory; Save the result to file && free memory used by tree */
   if (flush())
     return 1;