1 files changed, 1217 insertions, 497 deletions
diff --git a/sql/handler.cc b/sql/handler.cc
index f14564b6629..965c3d26f49 100644
--- a/sql/handler.cc
+++ b/sql/handler.cc
@@ -25,10 +25,6 @@
 #include "ha_heap.h"
 #include "ha_myisam.h"
 #include "ha_myisammrg.h"
-#ifdef HAVE_ISAM
-#include "ha_isam.h"
-#include "ha_isammrg.h"
-#endif
 #ifdef HAVE_BERKELEY_DB
 #include "ha_berkeley.h"
 #endif
@@ -50,29 +46,34 @@
 #ifdef HAVE_NDBCLUSTER_DB
 #include "ha_ndbcluster.h"
 #endif
+#ifdef HAVE_FEDERATED_DB
+#include "ha_federated.h"
+#endif
 #include <myisampack.h>
 #include <errno.h>
 
 	/* static functions defined in this file */
 
-static int NEAR_F delete_file(const char *name,const char *ext,int extflag);
+static SHOW_COMP_OPTION have_yes= SHOW_OPTION_YES;
 
-ulong ha_read_count, ha_write_count, ha_delete_count, ha_update_count,
-      ha_read_key_count, ha_read_next_count, ha_read_prev_count,
-      ha_read_first_count, ha_read_last_count,
-      ha_commit_count, ha_rollback_count,
-      ha_read_rnd_count, ha_read_rnd_next_count, ha_discover_count;
+/* list of all available storage engines (of their handlertons) */
+handlerton *handlertons[MAX_HA]={0};
 
-static SHOW_COMP_OPTION have_yes= SHOW_OPTION_YES;
+/* number of entries in handlertons[] */
+ulong total_ha;
+/* number of storage engines (from handlertons[]) that support 2pc */
+ulong total_ha_2pc;
+/* size of savepoint storage area (see ha_init) */
+ulong savepoint_alloc_size;
 
 struct show_table_type_st sys_table_types[]=
 {
   {"MyISAM",	&have_yes,
    "Default engine as of MySQL 3.23 with great performance", DB_TYPE_MYISAM},
-  {"HEAP",	&have_yes,
-   "Alias for MEMORY", DB_TYPE_HEAP},
   {"MEMORY",	&have_yes,
    "Hash based, stored in memory, useful for temporary tables", DB_TYPE_HEAP},
+  {"HEAP",	&have_yes,
+   "Alias for MEMORY", DB_TYPE_HEAP},
   {"MERGE",	&have_yes,
    "Collection of identical MyISAM tables", DB_TYPE_MRG_MYISAM},
   {"MRG_MYISAM",&have_yes,
@@ -99,13 +100,16 @@ struct show_table_type_st sys_table_types[]=
    "Archive storage engine", DB_TYPE_ARCHIVE_DB},
   {"CSV",&have_csv_db,
    "CSV storage engine", DB_TYPE_CSV_DB},
+  {"FEDERATED",&have_federated_db,
+   "Federated MySQL storage engine", DB_TYPE_FEDERATED_DB},
   {"BLACKHOLE",&have_blackhole_db,
-   "Storage engine designed to act as null storage", DB_TYPE_BLACKHOLE_DB},
+   "/dev/null storage engine (anything you write to it disappears)",
+   DB_TYPE_BLACKHOLE_DB},
   {NullS, NULL, NullS, DB_TYPE_UNKNOWN}
 };
 
 const char *ha_row_type[] = {
-  "", "FIXED", "DYNAMIC", "COMPRESSED","?","?","?"
+  "", "FIXED", "DYNAMIC", "COMPRESSED", "REDUNDANT", "COMPACT", "?","?","?"
 };
 
 const char *tx_isolation_names[] =
@@ -119,11 +123,11 @@ uint known_extensions_id= 0;
 
 enum db_type ha_resolve_by_name(const char *name, uint namelen)
 {
-  THD *thd=current_thd;
+  THD *thd= current_thd;
   if (thd && !my_strcasecmp(&my_charset_latin1, name, "DEFAULT")) {
     return (enum db_type) thd->variables.table_type;
   }
-  
+
   show_table_type_st *types;
   for (types= sys_table_types; types->type; types++)
   {
@@ -141,7 +145,7 @@ const char *ha_get_storage_engine(enum db_type db_type)
     if (db_type == types->db_type)
       return types->type;
   }
-  
+
   return "none";
 }
 
@@ -163,6 +167,7 @@ my_bool ha_storage_engine_is_enabled(enum db_type database_type)
 
 enum db_type ha_checktype(enum db_type database_type)
 {
+  THD *thd;
   if (ha_storage_engine_is_enabled(database_type))
     return database_type;
 
@@ -177,12 +182,13 @@ enum db_type ha_checktype(enum db_type database_type)
     break;
   }
   
-  return 
-    DB_TYPE_UNKNOWN != (enum db_type) current_thd->variables.table_type ?
-    (enum db_type) current_thd->variables.table_type :
-    DB_TYPE_UNKNOWN != (enum db_type) global_system_variables.table_type ?
-    (enum db_type) global_system_variables.table_type :
-    DB_TYPE_MYISAM;
+  thd= current_thd;
+  return ((enum db_type) thd->variables.table_type != DB_TYPE_UNKNOWN ?
+          (enum db_type) thd->variables.table_type :
+          ((enum db_type) global_system_variables.table_type !=
+           DB_TYPE_UNKNOWN ?
+           (enum db_type) global_system_variables.table_type : DB_TYPE_MYISAM)
+          );
 } /* ha_checktype */
 
 
@@ -222,6 +228,10 @@ handler *get_new_handler(TABLE *table, enum db_type db_type)
   case DB_TYPE_BLACKHOLE_DB:
     return new ha_blackhole(table);
 #endif
+#ifdef HAVE_FEDERATED_DB
+  case DB_TYPE_FEDERATED_DB:
+    return new ha_federated(table);
+#endif
 #ifdef HAVE_CSV_DB
   case DB_TYPE_CSV_DB:
     return new ha_tina(table);
@@ -247,65 +257,190 @@ handler *get_new_handler(TABLE *table, enum db_type db_type)
   }
 }
 
-bool ha_caching_allowed(THD* thd, char* table_key,
-                        uint key_length, uint8 cache_type)
+/*
+  Register handler error messages for use with my_error().
+
+  SYNOPSIS
+    ha_init_errors()
+
+  RETURN
+    0           OK
+    != 0        Error
+*/
+
+static int ha_init_errors(void)
 {
-#ifdef HAVE_INNOBASE_DB
-  if (cache_type == HA_CACHE_TBL_ASKTRANSACT)
-    return innobase_query_caching_of_table_permitted(thd, table_key, key_length);
-#endif
-  return 1;
+#define SETMSG(nr, msg) errmsgs[(nr) - HA_ERR_FIRST]= (msg)
+  const char    **errmsgs;
+
+  /* Allocate a pointer array for the error message strings. */
+  /* Zerofill it to avoid uninitialized gaps. */
+  if (! (errmsgs= (const char**) my_malloc(HA_ERR_ERRORS * sizeof(char*),
+                                           MYF(MY_WME | MY_ZEROFILL))))
+    return 1;
+
+  /* Set the dedicated error messages. */
+  SETMSG(HA_ERR_KEY_NOT_FOUND,          ER(ER_KEY_NOT_FOUND));
+  SETMSG(HA_ERR_FOUND_DUPP_KEY,         ER(ER_DUP_KEY));
+  SETMSG(HA_ERR_RECORD_CHANGED,         "Update wich is recoverable");
+  SETMSG(HA_ERR_WRONG_INDEX,            "Wrong index given to function");
+  SETMSG(HA_ERR_CRASHED,                ER(ER_NOT_KEYFILE));
+  SETMSG(HA_ERR_WRONG_IN_RECORD,        ER(ER_CRASHED_ON_USAGE));
+  SETMSG(HA_ERR_OUT_OF_MEM,             "Table handler out of memory");
+  SETMSG(HA_ERR_NOT_A_TABLE,            "Incorrect file format '%.64s'");
+  SETMSG(HA_ERR_WRONG_COMMAND,          "Command not supported");
+  SETMSG(HA_ERR_OLD_FILE,               ER(ER_OLD_KEYFILE));
+  SETMSG(HA_ERR_NO_ACTIVE_RECORD,       "No record read in update");
+  SETMSG(HA_ERR_RECORD_DELETED,         "Intern record deleted");
+  SETMSG(HA_ERR_RECORD_FILE_FULL,       ER(ER_RECORD_FILE_FULL));
+  SETMSG(HA_ERR_INDEX_FILE_FULL,        "No more room in index file '%.64s'");
+  SETMSG(HA_ERR_END_OF_FILE,            "End in next/prev/first/last");
+  SETMSG(HA_ERR_UNSUPPORTED,            ER(ER_ILLEGAL_HA));
+  SETMSG(HA_ERR_TO_BIG_ROW,             "Too big row");
+  SETMSG(HA_WRONG_CREATE_OPTION,        "Wrong create option");
+  SETMSG(HA_ERR_FOUND_DUPP_UNIQUE,      ER(ER_DUP_UNIQUE));
+  SETMSG(HA_ERR_UNKNOWN_CHARSET,        "Can't open charset");
+  SETMSG(HA_ERR_WRONG_MRG_TABLE_DEF,    ER(ER_WRONG_MRG_TABLE));
+  SETMSG(HA_ERR_CRASHED_ON_REPAIR,      ER(ER_CRASHED_ON_REPAIR));
+  SETMSG(HA_ERR_CRASHED_ON_USAGE,       ER(ER_CRASHED_ON_USAGE));
+  SETMSG(HA_ERR_LOCK_WAIT_TIMEOUT,      ER(ER_LOCK_WAIT_TIMEOUT));
+  SETMSG(HA_ERR_LOCK_TABLE_FULL,        ER(ER_LOCK_TABLE_FULL));
+  SETMSG(HA_ERR_READ_ONLY_TRANSACTION,  ER(ER_READ_ONLY_TRANSACTION));
+  SETMSG(HA_ERR_LOCK_DEADLOCK,          ER(ER_LOCK_DEADLOCK));
+  SETMSG(HA_ERR_CANNOT_ADD_FOREIGN,     ER(ER_CANNOT_ADD_FOREIGN));
+  SETMSG(HA_ERR_NO_REFERENCED_ROW,      ER(ER_NO_REFERENCED_ROW));
+  SETMSG(HA_ERR_ROW_IS_REFERENCED,      ER(ER_ROW_IS_REFERENCED));
+  SETMSG(HA_ERR_NO_SAVEPOINT,           "No savepoint with that name");
+  SETMSG(HA_ERR_NON_UNIQUE_BLOCK_SIZE,  "Non unique key block size");
+  SETMSG(HA_ERR_NO_SUCH_TABLE,          "No such table: '%.64s'");
+  SETMSG(HA_ERR_TABLE_EXIST,            ER(ER_TABLE_EXISTS_ERROR));
+  SETMSG(HA_ERR_NO_CONNECTION,          "Could not connect to storage engine");
+  SETMSG(HA_ERR_TABLE_DEF_CHANGED,      ER(ER_TABLE_DEF_CHANGED));
+
+  /* Register the error messages for use with my_error(). */
+  return my_error_register(errmsgs, HA_ERR_FIRST, HA_ERR_LAST);
+}
+
+
+/*
+  Unregister handler error messages.
+
+  SYNOPSIS
+    ha_finish_errors()
+
+  RETURN
+    0           OK
+    != 0        Error
+*/
+
+static int ha_finish_errors(void)
+{
+  const char    **errmsgs;
+
+  /* Allocate a pointer array for the error message strings. */
+  if (! (errmsgs= my_error_unregister(HA_ERR_FIRST, HA_ERR_LAST)))
+    return 1;
+  my_free((gptr) errmsgs, MYF(0));
+  return 0;
+}
+
+
+static inline void ha_was_inited_ok(handlerton **ht)
+{
+  uint tmp= (*ht)->savepoint_offset;
+  (*ht)->savepoint_offset= savepoint_alloc_size;
+  savepoint_alloc_size+= tmp;
+  (*ht)->slot= total_ha++;
+  if ((*ht)->prepare)
+    total_ha_2pc++;
 }
 
 int ha_init()
 {
   int error= 0;
+  handlerton **ht= handlertons;
+  total_ha= savepoint_alloc_size= 0;
+
+  if (ha_init_errors())
+    return 1;
+
+  if (opt_bin_log)
+  {
+    if (!(*ht= binlog_init()))                  // Always succeed
+    {
+      mysql_bin_log.close(LOG_CLOSE_INDEX);     // Never used
+      opt_bin_log= 0;                           // Never used
+      error= 1;                                 // Never used
+    }
+    else
+      ha_was_inited_ok(ht++);
+  }
 #ifdef HAVE_BERKELEY_DB
   if (have_berkeley_db == SHOW_OPTION_YES)
   {
-    if (berkeley_init())
+    if (!(*ht= berkeley_init()))
     {
       have_berkeley_db= SHOW_OPTION_DISABLED;	// If we couldn't use handler
       error= 1;
     }
     else
-      opt_using_transactions=1;
+      ha_was_inited_ok(ht++);
   }
 #endif
 #ifdef HAVE_INNOBASE_DB
   if (have_innodb == SHOW_OPTION_YES)
   {
-    if (innobase_init())
+    if (!(*ht= innobase_init()))
     {
       have_innodb= SHOW_OPTION_DISABLED;	// If we couldn't use handler
       error= 1;
     }
     else
-      opt_using_transactions=1;
+      ha_was_inited_ok(ht++);
   }
 #endif
 #ifdef HAVE_NDBCLUSTER_DB
   if (have_ndbcluster == SHOW_OPTION_YES)
   {
-    if (ndbcluster_init())
+    if (!(*ht= ndbcluster_init()))
     {
       have_ndbcluster= SHOW_OPTION_DISABLED;
       error= 1;
     }
     else
-      opt_using_transactions=1;
+      ha_was_inited_ok(ht++);
+  }
+#endif
+#ifdef HAVE_FEDERATED_DB
+  if (have_federated_db == SHOW_OPTION_YES)
+  {
+    if (federated_db_init())
+    {
+      have_federated_db= SHOW_OPTION_DISABLED;
+      error= 1;
+    }
   }
 #endif
 #ifdef HAVE_ARCHIVE_DB
   if (have_archive_db == SHOW_OPTION_YES)
   {
-    if (archive_db_init())
+    if (!(*ht= archive_db_init()))
     {
       have_archive_db= SHOW_OPTION_DISABLED;
       error= 1;
     }
+    else
+      ha_was_inited_ok(ht++);
   }
 #endif
+  DBUG_ASSERT(total_ha < MAX_HA);
+  /*
+    Check if there is a transaction-capable storage engine besides the
+    binary log (which is considered a transaction-capable storage engine in
+    counting total_ha)
+  */
+  opt_using_transactions= total_ha>(ulong)opt_bin_log;
+  savepoint_alloc_size+= sizeof(SAVEPOINT);
   return error;
 }
 
@@ -337,10 +472,16 @@ int ha_panic(enum ha_panic_function flag)
   if (have_ndbcluster == SHOW_OPTION_YES)
     error|=ndbcluster_end();
 #endif
+#ifdef HAVE_FEDERATED_DB
+  if (have_federated_db == SHOW_OPTION_YES)
+    error|= federated_db_end();
+#endif
 #ifdef HAVE_ARCHIVE_DB
   if (have_archive_db == SHOW_OPTION_YES)
     error|= archive_db_end();
 #endif
+  if (ha_finish_errors())
+    error= 1;
   return error;
 } /* ha_panic */
 
@@ -356,16 +497,238 @@ void ha_drop_database(char* path)
 #endif
 }
 
+/* don't bother to rollback here, it's done already */
 void ha_close_connection(THD* thd)
 {
-#ifdef HAVE_INNOBASE_DB
-  if (have_innodb == SHOW_OPTION_YES)
-    innobase_close_connection(thd);
-#endif
-#ifdef HAVE_NDBCLUSTER_DB
-  if (have_ndbcluster == SHOW_OPTION_YES)
-    ndbcluster_close_connection(thd);
+  for (uint i=0; i < total_ha; i++)
+    if (thd->ha_data[i])
+      (*handlertons[i]->close_connection)(thd);
+}
+
+/* ========================================================================
+ ======================= TRANSACTIONS ===================================*/
+
+/*
+  Register a storage engine for a transaction
+
+  DESCRIPTION
+    Every storage engine MUST call this function when it starts
+    a transaction or a statement (that is it must be called both for the
+    "beginning of transaction" and "beginning of statement").
+    Only storage engines registered for the transaction/statement
+    will know when to commit/rollback it.
+
+  NOTE
+    trans_register_ha is idempotent - storage engine may register many
+    times per transaction.
+
+*/
+void trans_register_ha(THD *thd, bool all, handlerton *ht_arg)
+{
+  THD_TRANS *trans;
+  handlerton **ht;
+  DBUG_ENTER("trans_register_ha");
+  DBUG_PRINT("enter",("%s", all ? "all" : "stmt"));
+
+  if (all)
+  {
+    trans= &thd->transaction.all;
+    thd->server_status|= SERVER_STATUS_IN_TRANS;
+  }
+  else
+    trans= &thd->transaction.stmt;
+
+  for (ht=trans->ht; *ht; ht++)
+    if (*ht == ht_arg)
+      DBUG_VOID_RETURN;  /* already registered, return */
+
+  trans->ht[trans->nht++]=ht_arg;
+  DBUG_ASSERT(*ht == ht_arg);
+  trans->no_2pc|=(ht_arg->prepare==0);
+  if (thd->transaction.xid.is_null())
+    thd->transaction.xid.set(thd->query_id);
+  DBUG_VOID_RETURN;
+}
+
+/*
+  RETURN
+      0  - ok
+      1  - error, transaction was rolled back
+*/
+int ha_prepare(THD *thd)
+{
+  int error=0, all=1;
+  THD_TRANS *trans=all ? &thd->transaction.all : &thd->transaction.stmt;
+  handlerton **ht=trans->ht;
+  DBUG_ENTER("ha_prepare");
+#ifdef USING_TRANSACTIONS
+  if (trans->nht)
+  {
+    for (; *ht; ht++)
+    {
+      int err;
+      statistic_increment(thd->status_var.ha_prepare_count,&LOCK_status);
+      if ((err= (*(*ht)->prepare)(thd, all)))
+      {
+        my_error(ER_ERROR_DURING_COMMIT, MYF(0), err);
+        ha_rollback_trans(thd, all);
+        error=1;
+        break;
+      }
+    }
+  }
+#endif /* USING_TRANSACTIONS */
+  DBUG_RETURN(error);
+}
+
+/*
+  RETURN
+      0  - ok
+      1  - transaction was rolled back
+      2  - error during commit, data may be inconsistent
+*/
+int ha_commit_trans(THD *thd, bool all)
+{
+  int error= 0, cookie= 0;
+  THD_TRANS *trans= all ? &thd->transaction.all : &thd->transaction.stmt;
+  bool is_real_trans= all || thd->transaction.all.nht == 0;
+  handlerton **ht= trans->ht;
+  my_xid xid= thd->transaction.xid.get_my_xid();
+  DBUG_ENTER("ha_commit_trans");
+#ifdef USING_TRANSACTIONS
+  if (trans->nht)
+  {
+    if (is_real_trans && wait_if_global_read_lock(thd, 0, 0))
+    {
+      ha_rollback_trans(thd, all);
+      DBUG_RETURN(1);
+    }
+    DBUG_EXECUTE_IF("crash_commit_before", abort(););
+    if (!trans->no_2pc && trans->nht > 1)
+    {
+      for (; *ht && !error; ht++)
+      {
+        int err;
+        if ((err= (*(*ht)->prepare)(thd, all)))
+        {
+          my_error(ER_ERROR_DURING_COMMIT, MYF(0), err);
+          error= 1;
+        }
+        statistic_increment(thd->status_var.ha_prepare_count,&LOCK_status);
+      }
+      DBUG_EXECUTE_IF("crash_commit_after_prepare", abort(););
+      if (error || (is_real_trans && xid &&
+                    (error= !(cookie= tc_log->log(thd, xid)))))
+      {
+        ha_rollback_trans(thd, all);
+        error= 1;
+        goto end;
+      }
+      DBUG_EXECUTE_IF("crash_commit_after_log", abort(););
+    }
+    error=ha_commit_one_phase(thd, all) ? cookie ? 2 : 1 : 0;
+    DBUG_EXECUTE_IF("crash_commit_before_unlog", abort(););
+    if (cookie)
+      tc_log->unlog(cookie, xid);
+    DBUG_EXECUTE_IF("crash_commit_after", abort(););
+end:
+    if (is_real_trans)
+      start_waiting_global_read_lock(thd);
+  }
+#endif /* USING_TRANSACTIONS */
+  DBUG_RETURN(error);
+}
+
+/*
+  NOTE - this function does not care about global read lock.
+  A caller should.
+*/
+int ha_commit_one_phase(THD *thd, bool all)
+{
+  int error=0;
+  THD_TRANS *trans=all ? &thd->transaction.all : &thd->transaction.stmt;
+  bool is_real_trans=all || thd->transaction.all.nht == 0;
+  handlerton **ht=trans->ht;
+  DBUG_ENTER("ha_commit_one_phase");
+#ifdef USING_TRANSACTIONS
+  if (trans->nht)
+  {
+    for (ht=trans->ht; *ht; ht++)
+    {
+      int err;
+      if ((err= (*(*ht)->commit)(thd, all)))
+      {
+        my_error(ER_ERROR_DURING_COMMIT, MYF(0), err);
+        error=1;
+      }
+      statistic_increment(thd->status_var.ha_commit_count,&LOCK_status);
+      *ht= 0;
+    }
+    trans->nht=0;
+    trans->no_2pc=0;
+    if (is_real_trans)
+      thd->transaction.xid.null();
+    if (all)
+    {
+#ifdef HAVE_QUERY_CACHE
+      if (thd->transaction.changed_tables)
+        query_cache.invalidate(thd->transaction.changed_tables);
 #endif
+      thd->variables.tx_isolation=thd->session_tx_isolation;
+      thd->transaction.cleanup();
+    }
+  }
+#endif /* USING_TRANSACTIONS */
+  DBUG_RETURN(error);
+}
+
+
+int ha_rollback_trans(THD *thd, bool all)
+{
+  int error=0;
+  THD_TRANS *trans=all ? &thd->transaction.all : &thd->transaction.stmt;
+  bool is_real_trans=all || thd->transaction.all.nht == 0;
+  DBUG_ENTER("ha_rollback_trans");
+#ifdef USING_TRANSACTIONS
+  if (trans->nht)
+  {
+    for (handlerton **ht=trans->ht; *ht; ht++)
+    {
+      int err;
+      if ((err= (*(*ht)->rollback)(thd, all)))
+      { // cannot happen
+        my_error(ER_ERROR_DURING_ROLLBACK, MYF(0), err);
+        error=1;
+      }
+      statistic_increment(thd->status_var.ha_rollback_count,&LOCK_status);
+      *ht= 0;
+    }
+    trans->nht=0;
+    trans->no_2pc=0;
+    if (is_real_trans)
+      thd->transaction.xid.null();
+    if (all)
+    {
+      thd->variables.tx_isolation=thd->session_tx_isolation;
+      thd->transaction.cleanup();
+    }
+  }
+#endif /* USING_TRANSACTIONS */
+  /*
+    If a non-transactional table was updated, warn; don't warn if this is a
+    slave thread (because when a slave thread executes a ROLLBACK, it has
+    been read from the binary log, so it's 100% sure and normal to produce
+    error ER_WARNING_NOT_COMPLETE_ROLLBACK. If we sent the warning to the
+    slave SQL thread, it would not stop the thread but just be printed in
+    the error log; but we don't want users to wonder why they have this
+    message in the error log, so we don't send it.
+  */
+  if (is_real_trans && (thd->options & OPTION_STATUS_NO_TRANS_UPDATE) &&
+      !thd->slave_thread)
+    push_warning(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
+                 ER_WARNING_NOT_COMPLETE_ROLLBACK,
+                 ER(ER_WARNING_NOT_COMPLETE_ROLLBACK));
+  DBUG_RETURN(error);
 }
 
 /*
@@ -381,7 +744,7 @@ int ha_autocommit_or_rollback(THD *thd, int error)
 {
   DBUG_ENTER("ha_autocommit_or_rollback");
 #ifdef USING_TRANSACTIONS
-  if (opt_using_transactions)
+  if (thd->transaction.stmt.nht)
   {
     if (!error)
     {
@@ -397,83 +760,264 @@ int ha_autocommit_or_rollback(THD *thd, int error)
   DBUG_RETURN(error);
 }
 
+int ha_commit_or_rollback_by_xid(XID *xid, bool commit)
+{
+  handlerton **ht= handlertons, **end_ht=ht+total_ha;
+  int res= 1;
+
+  for ( ; ht < end_ht ; ht++)
+    if ((*ht)->recover)
+      res= res &&
+        (*(commit ? (*ht)->commit_by_xid : (*ht)->rollback_by_xid))(xid);
+  return res;
+}
+
+#ifndef DBUG_OFF
+/* this does not need to be multi-byte safe or anything */
+static char* xid_to_str(char *buf, XID *xid)
+{
+  int i;
+  char *s=buf;
+  *s++='\'';
+  for (i=0; i < xid->gtrid_length+xid->bqual_length; i++)
+  {
+    uchar c=(uchar)xid->data[i];
+    /* is_next_dig is set if next character is a number */
+    bool is_next_dig= FALSE;
+    if (i < XIDDATASIZE)
+    {
+      char ch= xid->data[i+1];
+      is_next_dig= (ch >= '0' && ch <='9');
+    }
+    if (i == xid->gtrid_length)
+    {
+      *s++='\'';
+      if (xid->bqual_length)
+      {
+        *s++='.';
+        *s++='\'';
+      }
+    }
+    if (c < 32 || c > 126)
+    {
+      *s++='\\';
+      /*
+        If next character is a number, write current character with
+        3 octal numbers to ensure that the next number is not seen
+        as part of the octal number
+      */
+      if (c > 077 || is_next_dig)
+        *s++=_dig_vec_lower[c >> 6];
+      if (c > 007 || is_next_dig)
+        *s++=_dig_vec_lower[(c >> 3) & 7];
+      *s++=_dig_vec_lower[c & 7];
+    }
+    else
+    {
+      if (c == '\'' || c == '\\')
+        *s++='\\';
+      *s++=c;
+    }
+  }
+  *s++='\'';
+  *s=0;
+  return buf;
+}
+#endif
+
 /*
-  This function is called when MySQL writes the log segment of a
-  transaction to the binlog. It is called when the LOCK_log mutex is
-  reserved. Here we communicate to transactional table handlers what
-  binlog position corresponds to the current transaction. The handler
-  can store it and in recovery print to the user, so that the user
-  knows from what position in the binlog to start possible
-  roll-forward, for example, if the crashed server was a slave in
-  replication. This function also calls the commit of the table
-  handler, because the order of transactions in the log of the table
-  handler must be the same as in the binlog.
-  NOTE that to eliminate the bottleneck of the group commit, we do not
-  flush the handler log files here, but only later in a call of
-  ha_commit_complete().
+  recover() step of xa
 
-  arguments:
-  thd:           the thread handle of the current connection
-  log_file_name: latest binlog file name
-  end_offset:	 the offset in the binlog file up to which we wrote
-  return value:  0 if success, 1 if error
-*/
+  NOTE
+   there are three modes of operation:
 
-int ha_report_binlog_offset_and_commit(THD *thd,
-				       char *log_file_name,
-				       my_off_t end_offset)
+   - automatic recover after a crash
+     in this case commit_list != 0, tc_heuristic_recover==0
+     all xids from commit_list are committed, others are rolled back
+
+   - manual (heuristic) recover
+     in this case commit_list==0, tc_heuristic_recover != 0
+     DBA has explicitly specified that all prepared transactions should
+     be committed (or rolled back).
+
+   - no recovery (MySQL did not detect a crash)
+     in this case commit_list==0, tc_heuristic_recover == 0
+     there should be no prepared transactions in this case.
+*/
+int ha_recover(HASH *commit_list)
 {
-  int  error= 0;
-#ifdef HAVE_INNOBASE_DB
-  THD_TRANS *trans;
-  trans = &thd->transaction.all;
-  if (trans->innodb_active_trans)
+  int len, got, found_foreign_xids=0, found_my_xids=0;
+  handlerton **ht= handlertons, **end_ht=ht+total_ha;
+  XID *list=0;
+  bool dry_run=(commit_list==0 && tc_heuristic_recover==0);
+  DBUG_ENTER("ha_recover");
+
+  /* commit_list and tc_heuristic_recover cannot be set both */
+  DBUG_ASSERT(commit_list==0 || tc_heuristic_recover==0);
+  /* if either is set, total_ha_2pc must be set too */
+  DBUG_ASSERT(dry_run || total_ha_2pc>(ulong)opt_bin_log);
+
+  if (total_ha_2pc <= (ulong)opt_bin_log)
+    DBUG_RETURN(0);
+
+  if (commit_list)
+    sql_print_information("Starting crash recovery...");
+
+#ifndef WILL_BE_DELETED_LATER
+  /*
+    for now, only InnoDB supports 2pc. It means we can always safely
+    rollback all pending transactions, without risking inconsistent data
+  */
+  DBUG_ASSERT(total_ha_2pc == (ulong) opt_bin_log+1); // only InnoDB and binlog
+  tc_heuristic_recover= TC_HEURISTIC_RECOVER_ROLLBACK; // forcing ROLLBACK
+  dry_run=FALSE;
+#endif
+
+  for (len= MAX_XID_LIST_SIZE ; list==0 && len > MIN_XID_LIST_SIZE; len/=2)
   {
-    /*
-      If we updated some InnoDB tables (innodb_active_trans is true), the
-      binlog coords will be reported into InnoDB during the InnoDB commit
-      (innobase_report_binlog_offset_and_commit). But if we updated only
-      non-InnoDB tables, we need an explicit call to report it.
-    */
-    if ((error=innobase_report_binlog_offset_and_commit(thd,
-                                                        trans->innobase_tid,
-                                                        log_file_name,
-                                                        end_offset)))
+    list=(XID *)my_malloc(len*sizeof(XID), MYF(0));
+  }
+  if (!list)
+  {
+    sql_print_error(ER(ER_OUTOFMEMORY), len*sizeof(XID));
+    DBUG_RETURN(1);
+  }
+
+  for ( ; ht < end_ht ; ht++)
+  {
+    if (!(*ht)->recover)
+      continue;
+    while ((got=(*(*ht)->recover)(list, len)) > 0 )
     {
-      my_error(ER_ERROR_DURING_COMMIT, MYF(0), error);
-      error=1;
+      sql_print_information("Found %d prepared transaction(s) in %s",
+                            got, (*ht)->name);
+      for (int i=0; i < got; i ++)
+      {
+        my_xid x=list[i].get_my_xid();
+        if (!x) // not "mine" - that is generated by external TM
+        {
+#ifndef DBUG_OFF
+          char buf[XIDDATASIZE*4+6]; // see xid_to_str
+          sql_print_information("ignore xid %s", xid_to_str(buf, list+i));
+#endif
+          found_foreign_xids++;
+          continue;
+        }
+        if (dry_run)
+        {
+          found_my_xids++;
+          continue;
+        }
+        // recovery mode
+        if (commit_list ?
+            hash_search(commit_list, (byte *)&x, sizeof(x)) != 0 :
+            tc_heuristic_recover == TC_HEURISTIC_RECOVER_COMMIT)
+        {
+#ifndef DBUG_OFF
+          char buf[XIDDATASIZE*4+6]; // see xid_to_str
+          sql_print_information("commit xid %s", xid_to_str(buf, list+i));
+#endif
+          (*(*ht)->commit_by_xid)(list+i);
+        }
+        else
+        {
+#ifndef DBUG_OFF
+          char buf[XIDDATASIZE*4+6]; // see xid_to_str
+          sql_print_information("rollback xid %s", xid_to_str(buf, list+i));
+#endif
+          (*(*ht)->rollback_by_xid)(list+i);
+        }
+      }
+      if (got < len)
+        break;
     }
   }
-  else if (opt_innodb_safe_binlog) // Don't report if not useful
-    innobase_store_binlog_offset_and_flush_log(log_file_name, end_offset);
-#endif
-  return error;
+  my_free((gptr)list, MYF(0));
+  if (found_foreign_xids)
+    sql_print_warning("Found %d prepared XA transactions", found_foreign_xids);
+  if (dry_run && found_my_xids)
+  {
+    sql_print_error("Found %d prepared transactions! It means that mysqld was "
+                    "not shut down properly last time and critical recovery "
+                    "information (last binlog or %s file) was manually deleted "
+                    "after a crash. You have to start mysqld with "
+                    "--tc-heuristic-recover switch to commit or rollback "
+                    "pending transactions.",
+                    found_my_xids, opt_tc_log_file);
+    DBUG_RETURN(1);
+  }
+  if (commit_list)
+    sql_print_information("Crash recovery finished.");
+  DBUG_RETURN(0);
 }
 
 /*
-  Flushes the handler log files (if my.cnf settings do not free us from it)
-  after we have called ha_report_binlog_offset_and_commit(). To eliminate
-  the bottleneck from the group commit, this should be called when
-  LOCK_log has been released in log.cc.
+  return the list of XID's to a client, the same way SHOW commands do
 
-  arguments:
-  thd:           the thread handle of the current connection
-  return value:  always 0
+  NOTE
+    I didn't find in XA specs that an RM cannot return the same XID twice,
+    so mysql_xa_recover does not filter XID's to ensure uniqueness.
+    It can be easily fixed later, if necessary.
 */
-
-int ha_commit_complete(THD *thd)
+bool mysql_xa_recover(THD *thd)
 {
-#ifdef HAVE_INNOBASE_DB
-  THD_TRANS *trans;
-  trans = &thd->transaction.all;
-  if (trans->innobase_tid)
+  List<Item> field_list;
+  Protocol *protocol= thd->protocol;
+  handlerton **ht= handlertons, **end_ht=ht+total_ha;
+  bool error=TRUE;
+  int len, got;
+  XID *list=0;
+  DBUG_ENTER("mysql_xa_recover");
+
+  field_list.push_back(new Item_int("formatID",0,11));
+  field_list.push_back(new Item_int("gtrid_length",0,11));
+  field_list.push_back(new Item_int("bqual_length",0,11));
+  field_list.push_back(new Item_empty_string("data",XIDDATASIZE));
+
+  if (protocol->send_fields(&field_list,
+                            Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF))
+    DBUG_RETURN(TRUE);
+
+  for (len= MAX_XID_LIST_SIZE ; list==0 && len > MIN_XID_LIST_SIZE; len/=2)
+  {
+    list=(XID *)my_malloc(len*sizeof(XID), MYF(0));
+  }
+  if (!list)
   {
-    innobase_commit_complete(trans->innobase_tid);
+    my_error(ER_OUTOFMEMORY, MYF(0), len);
+    DBUG_RETURN(1);
+  }
 
-    trans->innodb_active_trans=0;
+  for ( ; ht < end_ht ; ht++)
+  {
+    if (!(*ht)->recover)
+      continue;
+    while ((got=(*(*ht)->recover)(list, len)) > 0 )
+    {
+      XID *xid, *end;
+      for (xid=list, end=list+got; xid < end; xid++)
+      {
+        if (xid->get_my_xid())
+          continue; // skip "our" xids
+        protocol->prepare_for_resend();
+        protocol->store_longlong((longlong)xid->formatID, FALSE);
+        protocol->store_longlong((longlong)xid->gtrid_length, FALSE);
+        protocol->store_longlong((longlong)xid->bqual_length, FALSE);
+        protocol->store(xid->data, xid->gtrid_length+xid->bqual_length,
+                        &my_charset_bin);
+        if (protocol->write())
+          goto err;
+      }
+      if (got < len)
+        break;
+    }
   }
-#endif
-  return 0;
+
+  error=FALSE;
+  send_eof(thd);
+err:
+  my_free((gptr)list, MYF(0));
+  DBUG_RETURN(error);
 }
 
 /*
@@ -496,313 +1040,126 @@ int ha_commit_complete(THD *thd)
 int ha_release_temporary_latches(THD *thd)
 {
 #ifdef HAVE_INNOBASE_DB
-  THD_TRANS *trans;
-  trans = &thd->transaction.all;
-  if (trans->innobase_tid)
-    innobase_release_temporary_latches(trans->innobase_tid);
+  if (opt_innodb)
+    innobase_release_temporary_latches(thd);
 #endif
   return 0;
 }
 
-int ha_commit_trans(THD *thd, THD_TRANS* trans)
-{
-  int error=0;
-  DBUG_ENTER("ha_commit_trans");
-#ifdef USING_TRANSACTIONS
-  if (opt_using_transactions)
-  {
-    bool transaction_commited= 0;
-    bool operation_done= 0, need_start_waiters= 0;
 
-    /* If transaction has done some updates to tables */
-    if (trans == &thd->transaction.all && mysql_bin_log.is_open() &&
-        my_b_tell(&thd->transaction.trans_log))
-    {
-      if ((error= wait_if_global_read_lock(thd, 0, 0)))
-      {
-        /*
-          Note that ROLLBACK [TO SAVEPOINT] does not have this test; it's
-          because ROLLBACK never updates data, so needn't wait on the lock.
-        */
-        my_error(ER_ERROR_DURING_COMMIT, MYF(0), error);
-        error= 1;
-      }
-      else
-        need_start_waiters= 1;
-      if (mysql_bin_log.is_open())
-      {
-        mysql_bin_log.write(thd, &thd->transaction.trans_log, 1);
-        statistic_increment(binlog_cache_use, &LOCK_status);
-        if (thd->transaction.trans_log.disk_writes != 0)
-        {
-          /* 
-            We have to do this after addition of trans_log to main binlog since
-            this operation can cause flushing of end of trans_log to disk. 
-          */
-          statistic_increment(binlog_cache_disk_use, &LOCK_status);
-          thd->transaction.trans_log.disk_writes= 0;
-        }
-        reinit_io_cache(&thd->transaction.trans_log,
-                        WRITE_CACHE, (my_off_t) 0, 0, 1);
-        thd->transaction.trans_log.end_of_file= max_binlog_cache_size;
-      }
-    }
-#ifdef HAVE_NDBCLUSTER_DB
-    if (trans->ndb_tid)
-    {
-      if ((error=ndbcluster_commit(thd,trans->ndb_tid)))
-      {
-	if (error == -1)
-	  my_error(ER_ERROR_DURING_COMMIT, MYF(0));
-        error=1;
-      }
-      if (trans == &thd->transaction.all)
-        operation_done= transaction_commited= 1;
-      trans->ndb_tid=0;
-    }
-#endif
-#ifdef HAVE_BERKELEY_DB
-    if (trans->bdb_tid)
-    {
-      if ((error=berkeley_commit(thd,trans->bdb_tid)))
-      {
-	my_error(ER_ERROR_DURING_COMMIT, MYF(0), error);
-	error=1;
-      }
-      else
-	if (!(thd->options & OPTION_BEGIN))
-	  transaction_commited= 1; 
-      trans->bdb_tid=0;
-    }
-#endif
+/* 
+  Export statistics for different engines. Currently we use it only for
+  InnoDB.
+*/
+
+int ha_update_statistics()
+{
 #ifdef HAVE_INNOBASE_DB
-    if (trans->innobase_tid)
-    {
-      if ((error=innobase_commit(thd,trans->innobase_tid)))
-      {
-	my_error(ER_ERROR_DURING_COMMIT, MYF(0), error);
-	error=1;
-      }
-      trans->innodb_active_trans=0;
-      if (trans == &thd->transaction.all)
-	operation_done= transaction_commited= 1;
-    }
+  if (opt_innodb)
+    innodb_export_status();
 #endif
-#ifdef HAVE_QUERY_CACHE
-    if (transaction_commited && thd->transaction.changed_tables)
-      query_cache.invalidate(thd->transaction.changed_tables);
-#endif /*HAVE_QUERY_CACHE*/
-    if (error && trans == &thd->transaction.all && mysql_bin_log.is_open())
-      sql_print_error("Got error during commit;  Binlog is not up to date!");
-    thd->variables.tx_isolation=thd->session_tx_isolation;
-    if (operation_done)
-    {
-      statistic_increment(ha_commit_count,&LOCK_status);
-      thd->transaction.cleanup();
-    }
-    if (need_start_waiters)
-      start_waiting_global_read_lock(thd);
-  }
-#endif // using transactions
-  DBUG_RETURN(error);
+  return 0;
 }
 
-
-int ha_rollback_trans(THD *thd, THD_TRANS *trans)
+int ha_rollback_to_savepoint(THD *thd, SAVEPOINT *sv)
 {
   int error=0;
-  DBUG_ENTER("ha_rollback_trans");
-#ifdef USING_TRANSACTIONS
-  if (opt_using_transactions)
+  THD_TRANS *trans=&thd->transaction.all;
+  handlerton **ht=trans->ht, **end_ht;
+  DBUG_ENTER("ha_rollback_to_savepoint");
+  DBUG_ASSERT(thd->transaction.stmt.ht[0] == 0);
+
+  trans->nht=sv->nht;
+  trans->no_2pc=0;
+  end_ht=ht+sv->nht;
+  /*
+    rolling back to savepoint in all storage engines that were part of the
+    transaction when the savepoint was set
+  */
+  for (; ht < end_ht; ht++)
   {
-    bool operation_done=0;
-    /*
-      As rollback can be 30 times slower than insert in InnoDB, and user may
-      not know there's rollback (if it's because of a dupl row), better warn.
-    */
-    const char *save_proc_info= thd->proc_info;
-    thd->proc_info= "Rolling back";
-#ifdef HAVE_NDBCLUSTER_DB
-    if (trans->ndb_tid)
-    {
-      if ((error=ndbcluster_rollback(thd, trans->ndb_tid)))
-      {
-	if (error == -1)
-	  my_error(ER_ERROR_DURING_ROLLBACK, MYF(0));
-        error=1;
-      }
-      trans->ndb_tid = 0;
-      operation_done=1;
-    }
-#endif
-#ifdef HAVE_BERKELEY_DB
-    if (trans->bdb_tid)
-    {
-      if ((error=berkeley_rollback(thd, trans->bdb_tid)))
-      {
-	my_error(ER_ERROR_DURING_ROLLBACK, MYF(0), error);
-	error=1;
-      }
-      trans->bdb_tid=0;
-      operation_done=1;
-    }
-#endif
-#ifdef HAVE_INNOBASE_DB
-    if (trans->innobase_tid)
-    {
-      if ((error=innobase_rollback(thd, trans->innobase_tid)))
-      {
-	my_error(ER_ERROR_DURING_ROLLBACK, MYF(0), error);
-	error=1;
-      }
-      trans->innodb_active_trans=0;
-      operation_done=1;
+    int err;
+    DBUG_ASSERT((*ht)->savepoint_set != 0);
+    if ((err= (*(*ht)->savepoint_rollback)(thd, (byte *)(sv+1)+(*ht)->savepoint_offset)))
+    { // cannot happen
+      my_error(ER_ERROR_DURING_ROLLBACK, MYF(0), err);
+      error=1;
     }
-#endif
-    if ((trans == &thd->transaction.all) && mysql_bin_log.is_open())
-    {
-      /* 
-         Update the binary log with a BEGIN/ROLLBACK block if we have
-         cached some queries and we updated some non-transactional
-         table. Such cases should be rare (updating a
-         non-transactional table inside a transaction...).  Count disk
-         writes to trans_log in any case.
-      */
-      if (my_b_tell(&thd->transaction.trans_log))
-      {
-        if (unlikely(thd->options & OPTION_STATUS_NO_TRANS_UPDATE))
-          mysql_bin_log.write(thd, &thd->transaction.trans_log, 0);
-        statistic_increment(binlog_cache_use, &LOCK_status);
-        if (thd->transaction.trans_log.disk_writes != 0)
-        {
-          /* 
-            We have to do this after addition of trans_log to main binlog since
-            this operation can cause flushing of end of trans_log to disk. 
-          */
-          statistic_increment(binlog_cache_disk_use, &LOCK_status);
-          thd->transaction.trans_log.disk_writes= 0;
-        }
-      }
-      /* Flushed or not, empty the binlog cache */
-      reinit_io_cache(&thd->transaction.trans_log,
-                      WRITE_CACHE, (my_off_t) 0, 0, 1);
-      thd->transaction.trans_log.end_of_file= max_binlog_cache_size;
-      if (operation_done)
-        thd->transaction.cleanup();
+    statistic_increment(thd->status_var.ha_savepoint_rollback_count,&LOCK_status);
+    trans->no_2pc|=(*ht)->prepare == 0;
+  }
+  /*
+    rolling back the transaction in all storage engines that were not part of
+    the transaction when the savepoint was set
+  */
+  for (; *ht ; ht++)
+  {
+    int err;
+    if ((err= (*(*ht)->rollback)(thd, 1)))
+    { // cannot happen
+      my_error(ER_ERROR_DURING_ROLLBACK, MYF(0), err);
+      error=1;
     }
-    thd->variables.tx_isolation=thd->session_tx_isolation;
-    if (operation_done)
-      statistic_increment(ha_rollback_count,&LOCK_status);
-    thd->proc_info= save_proc_info;
+    statistic_increment(thd->status_var.ha_rollback_count,&LOCK_status);
+    *ht=0; // keep it conveniently zero-filled
   }
-#endif /* USING_TRANSACTIONS */
   DBUG_RETURN(error);
 }
 
-
 /*
-  Rolls the current transaction back to a savepoint.
-  Return value: 0 if success, 1 if there was not a savepoint of the given
-  name.
-  NOTE: how do we handle this (unlikely but legal) case:
-  [transaction] + [update to non-trans table] + [rollback to savepoint] ?
-  The problem occurs when a savepoint is before the update to the
-  non-transactional table. Then when there's a rollback to the savepoint, if we
-  simply truncate the binlog cache, we lose the part of the binlog cache where
-  the update is. If we want to not lose it, we need to write the SAVEPOINT
-  command and the ROLLBACK TO SAVEPOINT command to the binlog cache. The latter
-  is easy: it's just write at the end of the binlog cache, but the former
-  should be *inserted* to the place where the user called SAVEPOINT. The
-  solution is that when the user calls SAVEPOINT, we write it to the binlog
-  cache (so no need to later insert it). As transactions are never intermixed
-  in the binary log (i.e. they are serialized), we won't have conflicts with
-  savepoint names when using mysqlbinlog or in the slave SQL thread.
-  Then when ROLLBACK TO SAVEPOINT is called, if we updated some
-  non-transactional table, we don't truncate the binlog cache but instead write
-  ROLLBACK TO SAVEPOINT to it; otherwise we truncate the binlog cache (which
-  will chop the SAVEPOINT command from the binlog cache, which is good as in
-  that case there is no need to have it in the binlog).
+  note, that according to the sql standard (ISO/IEC 9075-2:2003)
+  section "4.33.4 SQL-statements and transaction states",
+  SAVEPOINT is *not* transaction-initiating SQL-statement
 */
 
-int ha_rollback_to_savepoint(THD *thd, char *savepoint_name)
+int ha_savepoint(THD *thd, SAVEPOINT *sv)
 {
-  my_off_t binlog_cache_pos=0;
-  bool operation_done=0;
   int error=0;
-  DBUG_ENTER("ha_rollback_to_savepoint");
+  THD_TRANS *trans=&thd->transaction.all;
+  handlerton **ht=trans->ht;
+  DBUG_ENTER("ha_savepoint");
+  DBUG_ASSERT(thd->transaction.stmt.ht[0] == 0);
 #ifdef USING_TRANSACTIONS
-  if (opt_using_transactions)
+  for (; *ht; ht++)
   {
-#ifdef HAVE_INNOBASE_DB
-    /*
-    Retrieve the trans_log binlog cache position corresponding to the
-    savepoint, and if the rollback is successful inside InnoDB reset the write
-    position in the binlog cache to what it was at the savepoint.
-    */
-    if ((error=innobase_rollback_to_savepoint(thd, savepoint_name,
-						  &binlog_cache_pos)))
+    int err;
+    if (! (*ht)->savepoint_set)
     {
-      my_error(ER_ERROR_DURING_ROLLBACK, MYF(0), error);
+      my_error(ER_CHECK_NOT_IMPLEMENTED, MYF(0), "SAVEPOINT");
       error=1;
+      break;
     }
-    else if (mysql_bin_log.is_open())
-    {
-      /* 
-         Write ROLLBACK TO SAVEPOINT to the binlog cache if we have updated some
-         non-transactional table. Otherwise, truncate the binlog cache starting
-         from the SAVEPOINT command.
-      */
-      if (unlikely((thd->options & OPTION_STATUS_NO_TRANS_UPDATE) &&
-                   my_b_tell(&thd->transaction.trans_log)))
-      {
-        Query_log_event qinfo(thd, thd->query, thd->query_length, TRUE, FALSE);
-        if (mysql_bin_log.write(&qinfo))
-          error= 1;
-      }
-      else
-        reinit_io_cache(&thd->transaction.trans_log, WRITE_CACHE,
-                        binlog_cache_pos, 0, 0);
+    if ((err= (*(*ht)->savepoint_set)(thd, (byte *)(sv+1)+(*ht)->savepoint_offset)))
+    { // cannot happen
+      my_error(ER_GET_ERRNO, MYF(0), err);
+      error=1;
     }
-    operation_done=1;
-#endif
-    if (operation_done)
-      statistic_increment(ha_rollback_count,&LOCK_status);
+    statistic_increment(thd->status_var.ha_savepoint_count,&LOCK_status);
   }
+  sv->nht=trans->nht;
 #endif /* USING_TRANSACTIONS */
-
   DBUG_RETURN(error);
 }
 
-
-/*
-Sets a transaction savepoint.
-Return value: always 0, that is, succeeds always
-*/
-
-int ha_savepoint(THD *thd, char *savepoint_name)
+int ha_release_savepoint(THD *thd, SAVEPOINT *sv)
 {
   int error=0;
-  DBUG_ENTER("ha_savepoint");
-#ifdef USING_TRANSACTIONS
-  if (opt_using_transactions)
+  handlerton **ht=thd->transaction.all.ht, **end_ht;
+  DBUG_ENTER("ha_release_savepoint");
+  DBUG_ASSERT(thd->transaction.stmt.ht[0] == 0);
+
+  end_ht=ht+sv->nht;
+  for (; ht < end_ht; ht++)
   {
-    /* Write it to the binary log (see comments of ha_rollback_to_savepoint) */
-    if (mysql_bin_log.is_open())
-    {
-#ifdef HAVE_INNOBASE_DB
-      innobase_savepoint(thd,savepoint_name,
-                         my_b_tell(&thd->transaction.trans_log));
-#endif
-      Query_log_event qinfo(thd, thd->query, thd->query_length, TRUE, FALSE);
-      if (mysql_bin_log.write(&qinfo))
-	error= 1;
+    int err;
+    if (!(*ht)->savepoint_release)
+      continue;
+    if ((err= (*(*ht)->savepoint_release)(thd, (byte *)(sv+1)+(*ht)->savepoint_offset)))
+    { // cannot happen
+      my_error(ER_GET_ERRNO, MYF(0), err);
+      error=1;
     }
-#ifdef HAVE_INNOBASE_DB
-    else
-      innobase_savepoint(thd,savepoint_name,0);
-#endif
   }
-#endif /* USING_TRANSACTIONS */
   DBUG_RETURN(error);
 }
 
@@ -846,12 +1203,25 @@ bool ha_flush_logs()
   The .frm file will be deleted only if we return 0 or ENOENT
 */
 
-int ha_delete_table(enum db_type table_type, const char *path)
+int ha_delete_table(THD *thd, enum db_type table_type, const char *path,
+                    const char *alias, bool generate_warning)
 {
+  handler *file;
   char tmp_path[FN_REFLEN];
-  handler *file=get_new_handler((TABLE*) 0, table_type);
-  if (!file)
-    return ENOENT;
+  int error;
+  TABLE dummy_table;
+  TABLE_SHARE dummy_share;
+  DBUG_ENTER("ha_delete_table");
+
+  bzero((char*) &dummy_table, sizeof(dummy_table));
+  bzero((char*) &dummy_share, sizeof(dummy_share));
+  dummy_table.s= &dummy_share;
+
+  /* DB_TYPE_UNKNOWN is used in ALTER TABLE when renaming only .frm files */
+  if (table_type == DB_TYPE_UNKNOWN ||
+      ! (file=get_new_handler(&dummy_table, table_type)))
+    DBUG_RETURN(ENOENT);
+
   if (lower_case_table_names == 2 && !(file->table_flags() & HA_FILE_BASED))
   {
     /* Ensure that table handler get path in lower case */
@@ -859,64 +1229,45 @@ int ha_delete_table(enum db_type table_type, const char *path)
     my_casedn_str(files_charset_info, tmp_path);
     path= tmp_path;
   }
-  int error=file->delete_table(path);
-  delete file;
-  return error;
-}
-
-
-void ha_store_ptr(byte *buff, uint pack_length, my_off_t pos)
-{
-  switch (pack_length) {
-#if SIZEOF_OFF_T > 4
-  case 8: mi_int8store(buff,pos); break;
-  case 7: mi_int7store(buff,pos); break;
-  case 6: mi_int6store(buff,pos); break;
-  case 5: mi_int5store(buff,pos); break;
-#endif
-  case 4: mi_int4store(buff,pos); break;
-  case 3: mi_int3store(buff,pos); break;
-  case 2: mi_int2store(buff,(uint) pos); break;
-  case 1: buff[0]= (uchar) pos; break;
-  }
-  return;
-}
-
-my_off_t ha_get_ptr(byte *ptr, uint pack_length)
-{
-  my_off_t pos;
-  switch (pack_length) {
-#if SIZEOF_OFF_T > 4
-  case 8:
-    pos= (my_off_t) mi_uint8korr(ptr);
-    break;
-  case 7:
-    pos= (my_off_t) mi_uint7korr(ptr);
-    break;
-  case 6:
-    pos= (my_off_t) mi_uint6korr(ptr);
-    break;
-  case 5:
-    pos= (my_off_t) mi_uint5korr(ptr);
-    break;
-#endif
-  case 4:
-    pos= (my_off_t) mi_uint4korr(ptr);
-    break;
-  case 3:
-    pos= (my_off_t) mi_uint3korr(ptr);
-    break;
-  case 2:
-    pos= (my_off_t) mi_uint2korr(ptr);
-    break;
-  case 1:
-    pos= (my_off_t) mi_uint2korr(ptr);
-    break;
-  default:
-    pos=0;					// Impossible
-    break;
+  if ((error= file->delete_table(path)) && generate_warning)
+  {
+    /*
+      Because file->print_error() use my_error() to generate the error message
+      we must store the error state in thd, reset it and restore it to
+      be able to get hold of the error message.
+      (We should in the future either rewrite handler::print_error() or make
+      a nice method of this.
+    */
+    bool query_error= thd->query_error;
+    sp_rcontext *spcont= thd->spcont;
+    SELECT_LEX *current_select= thd->lex->current_select;
+    char buff[sizeof(thd->net.last_error)];
+    char new_error[sizeof(thd->net.last_error)];
+    int last_errno= thd->net.last_errno;
+
+    strmake(buff, thd->net.last_error, sizeof(buff)-1);
+    thd->query_error= 0;
+    thd->spcont= 0;
+    thd->lex->current_select= 0;
+    thd->net.last_error[0]= 0;
+
+    /* Fill up strucutures that print_error may need */
+    dummy_table.s->path= path;
+    dummy_table.alias= alias;
+
+    file->print_error(error, 0);
+    strmake(new_error, thd->net.last_error, sizeof(buff)-1);
+
+    /* restore thd */
+    thd->query_error= query_error;
+    thd->spcont= spcont;
+    thd->lex->current_select= current_select;
+    thd->net.last_errno= last_errno;
+    strmake(thd->net.last_error, buff, sizeof(buff)-1);
+    push_warning(thd, MYSQL_ERROR::WARN_LEVEL_ERROR, error, new_error);
   }
- return pos;
+  delete file;
+  DBUG_RETURN(error);
 }
 
 /****************************************************************************
@@ -931,8 +1282,8 @@ int handler::ha_open(const char *name, int mode, int test_if_locked)
   int error;
   DBUG_ENTER("handler::ha_open");
   DBUG_PRINT("enter",("name: %s  db_type: %d  db_stat: %d  mode: %d  lock_test: %d",
-		      name, table->db_type, table->db_stat, mode,
-		      test_if_locked));
+                      name, table->s->db_type, table->db_stat, mode,
+                      test_if_locked));
 
   if ((error=open(name,mode,test_if_locked)))
   {
@@ -950,7 +1301,7 @@ int handler::ha_open(const char *name, int mode, int test_if_locked)
   }
   else
   {
-    if (table->db_options_in_use & HA_OPTION_READ_ONLY_DATA)
+    if (table->s->db_options_in_use & HA_OPTION_READ_ONLY_DATA)
       table->db_stat|=HA_READ_ONLY;
     (void) extra(HA_EXTRA_NO_READCHECK);	// Not needed in SQL
 
@@ -980,7 +1331,7 @@ int handler::read_first_row(byte * buf, uint primary_key)
   register int error;
   DBUG_ENTER("handler::read_first_row");
 
-  statistic_increment(ha_read_first_count,&LOCK_status);
+  statistic_increment(current_thd->status_var.ha_read_first_count,&LOCK_status);
 
   /*
     If there is very few deleted rows in the table, find the first row by
@@ -1004,71 +1355,218 @@ int handler::read_first_row(byte * buf, uint primary_key)
   DBUG_RETURN(error);
 }
 
+/*
+  Generate the next auto-increment number based on increment and offset
+
+  In most cases increment= offset= 1, in which case we get:
+  1,2,3,4,5,...
+  If increment=10 and offset=5 and previous number is 1, we get:
+  1,5,15,25,35,...
+*/
+
+inline ulonglong
+next_insert_id(ulonglong nr,struct system_variables *variables)
+{
+  nr= (((nr+ variables->auto_increment_increment -
+         variables->auto_increment_offset)) /
+       (ulonglong) variables->auto_increment_increment);
+  return (nr* (ulonglong) variables->auto_increment_increment +
+          variables->auto_increment_offset);
+}
+
 
 /*
-  Updates field with field_type NEXT_NUMBER according to following:
-  if field = 0 change field to the next free key in database.
+  Update the auto_increment field if necessary
+
+  SYNOPSIS
+     update_auto_increment()
+
+  RETURN
+    0	ok
+    1 	get_auto_increment() was called and returned ~(ulonglong) 0
+    
+
+  IMPLEMENTATION
+
+    Updates columns with type NEXT_NUMBER if:
+
+  - If column value is set to NULL (in which case
+    auto_increment_field_not_null is 0)
+  - If column is set to 0 and (sql_mode & MODE_NO_AUTO_VALUE_ON_ZERO) is not
+    set. In the future we will only set NEXT_NUMBER fields if one sets them
+    to NULL (or they are not included in the insert list).
+
+
+  There are two different cases when the above is true:
+
+  - thd->next_insert_id == 0  (This is the normal case)
+    In this case we set the set the column for the first row to the value
+    next_insert_id(get_auto_increment(column))) which is normally
+    max-used-column-value +1.
+
+    We call get_auto_increment() only for the first row in a multi-row
+    statement. For the following rows we generate new numbers based on the
+    last used number.
+
+  - thd->next_insert_id != 0.  This happens when we have read a statement
+    from the binary log or when one has used SET LAST_INSERT_ID=#.
+
+    In this case we will set the column to the value of next_insert_id.
+    The next row will be given the id
+    next_insert_id(next_insert_id)
+
+    The idea is that generated auto_increment values are predictable and
+    independent of the column values in the table.  This is needed to be
+    able to replicate into a table that already has rows with a higher
+    auto-increment value than the one that is inserted.
+
+    After we have already generated an auto-increment number and the user
+    inserts a column with a higher value than the last used one, we will
+    start counting from the inserted value.
+
+    thd->next_insert_id is cleared after it's been used for a statement.
 */
 
-void handler::update_auto_increment()
+bool handler::update_auto_increment()
 {
-  longlong nr;
-  THD *thd;
+  ulonglong nr;
+  THD *thd= table->in_use;
+  struct system_variables *variables= &thd->variables;
+  bool auto_increment_field_not_null;
+  bool result= 0;
   DBUG_ENTER("handler::update_auto_increment");
-  if (table->next_number_field->val_int() != 0 ||
-      table->auto_increment_field_not_null &&
-      current_thd->variables.sql_mode & MODE_NO_AUTO_VALUE_ON_ZERO)
+
+  /*
+    We must save the previous value to be able to restore it if the
+    row was not inserted
+  */
+  thd->prev_insert_id= thd->next_insert_id;
+  auto_increment_field_not_null= table->auto_increment_field_not_null;
+  table->auto_increment_field_not_null= FALSE;
+
+  if ((nr= table->next_number_field->val_int()) != 0 ||
+      auto_increment_field_not_null &&
+      thd->variables.sql_mode & MODE_NO_AUTO_VALUE_ON_ZERO)
   {
-    table->auto_increment_field_not_null= FALSE;
+    /* Clear flag for next row */
+    /* Mark that we didn't generate a new value **/
     auto_increment_column_changed=0;
-    DBUG_VOID_RETURN;
+
+    /* Update next_insert_id if we have already generated a value */
+    if (thd->clear_next_insert_id && nr >= thd->next_insert_id)
+    {
+      if (variables->auto_increment_increment != 1)
+        nr= next_insert_id(nr, variables);
+      else
+        nr++;
+      thd->next_insert_id= nr;
+      DBUG_PRINT("info",("next_insert_id: %lu", (ulong) nr));
+    }
+    DBUG_RETURN(0);
   }
-  table->auto_increment_field_not_null= FALSE;
-  thd=current_thd;
-  if ((nr=thd->next_insert_id))
-    thd->next_insert_id=0;			// Clear after use
-  else
-    nr=get_auto_increment();
-  if (!table->next_number_field->store(nr))
+  if (!(nr= thd->next_insert_id))
+  {
+    if ((nr= get_auto_increment()) == ~(ulonglong) 0)
+      result= 1;                                // Mark failure
+
+    if (variables->auto_increment_increment != 1)
+      nr= next_insert_id(nr-1, variables);
+    /*
+      Update next row based on the found value. This way we don't have to
+      call the handler for every generated auto-increment value on a
+      multi-row statement
+    */
+    thd->next_insert_id= nr;
+  }
+
+  DBUG_PRINT("info",("auto_increment: %lu", (ulong) nr));
+
+  /* Mark that we should clear next_insert_id before next stmt */
+  thd->clear_next_insert_id= 1;
+
+  if (!table->next_number_field->store((longlong) nr))
     thd->insert_id((ulonglong) nr);
   else
     thd->insert_id(table->next_number_field->val_int());
+
+  /*
+    We can't set next_insert_id if the auto-increment key is not the
+    first key part, as there is no guarantee that the first parts will be in
+    sequence
+  */
+  if (!table->s->next_number_key_offset)
+  {
+    /*
+      Set next insert id to point to next auto-increment value to be able to
+      handle multi-row statements
+      This works even if auto_increment_increment > 1
+    */
+    thd->next_insert_id= next_insert_id(nr, variables);
+  }
+  else
+    thd->next_insert_id= 0;
+
+  /* Mark that we generated a new value */
   auto_increment_column_changed=1;
-  DBUG_VOID_RETURN;
+  DBUG_RETURN(result);
+}
+
+/*
+  restore_auto_increment
+
+  In case of error on write, we restore the last used next_insert_id value
+  because the previous value was not used.
+*/
+
+void handler::restore_auto_increment()
+{
+  THD *thd= table->in_use;
+  if (thd->next_insert_id)
+    thd->next_insert_id= thd->prev_insert_id;
 }
 
 
-longlong handler::get_auto_increment()
+ulonglong handler::get_auto_increment()
 {
-  longlong nr;
+  ulonglong nr;
   int error;
 
   (void) extra(HA_EXTRA_KEYREAD);
-  index_init(table->next_number_index);
-  if (!table->next_number_key_offset)
+  index_init(table->s->next_number_index);
+  if (!table->s->next_number_key_offset)
   {						// Autoincrement at key-start
     error=index_last(table->record[1]);
   }
   else
   {
     byte key[MAX_KEY_LENGTH];
-    key_copy(key,table,table->next_number_index,
-             table->next_number_key_offset);
-    error=index_read(table->record[1], key, table->next_number_key_offset,
-                     HA_READ_PREFIX_LAST);
+    key_copy(key, table->record[0],
+             table->key_info + table->s->next_number_index,
+             table->s->next_number_key_offset);
+    error= index_read(table->record[1], key, table->s->next_number_key_offset,
+                      HA_READ_PREFIX_LAST);
   }
 
   if (error)
     nr=1;
   else
-    nr=(longlong) table->next_number_field->
-      val_int_offset(table->rec_buff_length)+1;
+    nr= ((ulonglong) table->next_number_field->
+         val_int_offset(table->s->rec_buff_length)+1);
   index_end();
   (void) extra(HA_EXTRA_NO_KEYREAD);
   return nr;
 }
 
-	/* Print error that we got from handler function */
+
+/*
+  Print error that we got from handler function
+
+  NOTE
+   In case of delete table it's only safe to use the following parts of
+   the 'table' structure:
+     table->s->path
+     table->alias
+*/
 
 void handler::print_error(int error, myf errflag)
 {
@@ -1109,7 +1607,7 @@ void handler::print_error(int error, myf errflag)
 	str.length(max_length-4);
 	str.append("...");
       }
-      my_error(ER_DUP_ENTRY,MYF(0),str.c_ptr(),key_nr+1);
+      my_error(ER_DUP_ENTRY, MYF(0), str.c_ptr(), key_nr+1);
       DBUG_VOID_RETURN;
     }
     textno=ER_DUP_KEY;
@@ -1127,14 +1625,20 @@ void handler::print_error(int error, myf errflag)
   case HA_ERR_CRASHED:
     textno=ER_NOT_KEYFILE;
     break;
+  case HA_ERR_WRONG_IN_RECORD:
+    textno= ER_CRASHED_ON_USAGE;
+    break;
   case HA_ERR_CRASHED_ON_USAGE:
     textno=ER_CRASHED_ON_USAGE;
     break;
+  case HA_ERR_NOT_A_TABLE:
+    textno= error;
+    break;
   case HA_ERR_CRASHED_ON_REPAIR:
     textno=ER_CRASHED_ON_REPAIR;
     break;
   case HA_ERR_OUT_OF_MEM:
-    my_error(ER_OUT_OF_RESOURCES,errflag);
+    my_message(ER_OUT_OF_RESOURCES, ER(ER_OUT_OF_RESOURCES), errflag);
     DBUG_VOID_RETURN;
   case HA_ERR_WRONG_COMMAND:
     textno=ER_ILLEGAL_HA;
@@ -1148,6 +1652,9 @@ void handler::print_error(int error, myf errflag)
   case HA_ERR_RECORD_FILE_FULL:
     textno=ER_RECORD_FILE_FULL;
     break;
+  case HA_ERR_INDEX_FILE_FULL:
+    textno= errno;
+    break;
   case HA_ERR_LOCK_WAIT_TIMEOUT:
     textno=ER_LOCK_WAIT_TIMEOUT;
     break;
@@ -1169,6 +1676,9 @@ void handler::print_error(int error, myf errflag)
   case HA_ERR_NO_REFERENCED_ROW:
     textno=ER_NO_REFERENCED_ROW;
     break;
+  case HA_ERR_TABLE_DEF_CHANGED:
+    textno=ER_TABLE_DEF_CHANGED;
+    break;
   case HA_ERR_NO_SUCH_TABLE:
   {
     /*
@@ -1178,10 +1688,10 @@ void handler::print_error(int error, myf errflag)
     */
     char *db;
     char buff[FN_REFLEN];
-    uint length=dirname_part(buff,table->path);
+    uint length= dirname_part(buff,table->s->path);
     buff[length-1]=0;
     db=buff+dirname_length(buff);
-    my_error(ER_NO_SUCH_TABLE,MYF(0),db,table->table_name);
+    my_error(ER_NO_SUCH_TABLE, MYF(0), db, table->alias);
     break;
   }
   default:
@@ -1195,27 +1705,27 @@ void handler::print_error(int error, myf errflag)
       {
 	const char* engine= table_type();
 	if (temporary)
-	  my_error(ER_GET_TEMPORARY_ERRMSG,MYF(0),error,str.ptr(),engine);
+	  my_error(ER_GET_TEMPORARY_ERRMSG, MYF(0), error, str.ptr(), engine);
 	else
-	  my_error(ER_GET_ERRMSG,MYF(0),error,str.ptr(),engine);
+	  my_error(ER_GET_ERRMSG, MYF(0), error, str.ptr(), engine);
       }
-      else       
+      else
 	my_error(ER_GET_ERRNO,errflag,error);
       DBUG_VOID_RETURN;
     }
   }
-  my_error(textno,errflag,table->table_name,error);
+  my_error(textno, errflag, table->alias, error);
   DBUG_VOID_RETURN;
 }
 
 
-/* 
+/*
    Return an error message specific to this handler
-   
+
    SYNOPSIS
    error        error code previously returned by handler
    buf          Pointer to String where to add error message
-   
+
    Returns true if this is a temporary error
  */
 
@@ -1238,16 +1748,40 @@ uint handler::get_dup_key(int error)
 }
 
 
+/*
+  Delete all files with extension from bas_ext()
+
+  SYNOPSIS
+    delete_table()
+    name		Base name of table
+
+  NOTES
+    We assume that the handler may return more extensions than
+    was actually used for the file.
+
+  RETURN
+    0   If we successfully deleted at least one file from base_ext and
+	didn't get any other errors than ENOENT
+    #   Error
+*/
+
 int handler::delete_table(const char *name)
 {
-  int error=0;
+  int error= 0;
+  int enoent_or_zero= ENOENT;                   // Error if no file was deleted
+  char buff[FN_REFLEN];
+
   for (const char **ext=bas_ext(); *ext ; ext++)
   {
-    if (delete_file(name,*ext,2))
+    fn_format(buff, name, "", *ext, 2 | 4);
+    if (my_delete_with_symlink(buff, MYF(0)))
     {
-      if ((error=errno) != ENOENT)
+      if ((error= my_errno) != ENOENT)
 	break;
     }
+    else
+      enoent_or_zero= 0;                        // No error for ENOENT
+    error= enoent_or_zero;
   }
   return error;
 }
@@ -1269,7 +1803,12 @@ int handler::rename_table(const char * from, const char * to)
 }
 
 /*
-  Tell the handler to turn on or off transaction in the handler
+  Tell the storage engine that it is allowed to "disable transaction" in the
+  handler. It is a hint that ACID is not required - it is used in NDB for
+  ALTER TABLE, for example, when data are copied to temporary table.
+  A storage engine may treat this hint any way it likes. NDB for example
+  starts to commit every now and then automatically.
+  This hint can be safely ignored.
 */
 
 int ha_enable_transaction(THD *thd, bool on)
@@ -1278,6 +1817,16 @@ int ha_enable_transaction(THD *thd, bool on)
 
   DBUG_ENTER("ha_enable_transaction");
   thd->transaction.on= on;
+  if (on)
+  {
+    /*
+      Now all storage engines should have transaction handling enabled.
+      But some may have it enabled all the time - "disabling" transactions
+      is an optimization hint that storage engine is free to ignore.
+      So, let's commit an open transaction (if any) now.
+    */
+    error= end_trans(thd, COMMIT);
+  }
   DBUG_RETURN(error);
 }
 
@@ -1313,7 +1862,7 @@ int ha_create_table(const char *name, HA_CREATE_INFO *create_info,
   char name_buff[FN_REFLEN];
   DBUG_ENTER("ha_create_table");
 
-  if (openfrm(name,"",0,(uint) READ_ALL, 0, &table))
+  if (openfrm(current_thd, name,"",0,(uint) READ_ALL, 0, &table))
     DBUG_RETURN(1);
   if (update_create_info)
   {
@@ -1331,7 +1880,7 @@ int ha_create_table(const char *name, HA_CREATE_INFO *create_info,
   error=table.file->create(name,&table,create_info);
   VOID(closefrm(&table));
   if (error)
-    my_error(ER_CANT_CREATE_TABLE,MYF(ME_BELL+ME_WAITTANG),name,error);
+    my_error(ER_CANT_CREATE_TABLE, MYF(ME_BELL+ME_WAITTANG), name,error);
   DBUG_RETURN(error != 0);
 }
 
@@ -1378,7 +1927,7 @@ int ha_create_table_from_engine(THD* thd,
     if ((error = writefrm(path, frmblob, frmlen)))
       goto err_end;
 
-    if (openfrm(path,"",0,(uint) READ_ALL, 0, &table))
+    if (openfrm(thd, path,"",0,(uint) READ_ALL, 0, &table))
       DBUG_RETURN(1);
 
     update_create_info_from_table(&create_info, &table);
@@ -1400,13 +1949,6 @@ err_end:
   DBUG_RETURN(error);  
 }
 
-static int NEAR_F delete_file(const char *name,const char *ext,int extflag)
-{
-  char buff[FN_REFLEN];
-  VOID(fn_format(buff,name,"",ext,extflag | 4));
-  return(my_delete_with_symlink(buff,MYF(MY_WME)));
-}
-
 void st_ha_check_opt::init()
 {
   flags= sql_flags= 0;
@@ -1522,7 +2064,7 @@ int ha_discover(THD *thd, const char *db, const char *name,
     error= ndbcluster_discover(thd, db, name, frmblob, frmlen);
 #endif
   if (!error)
-    statistic_increment(ha_discover_count,&LOCK_status);
+    statistic_increment(thd->status_var.ha_discover_count,&LOCK_status);
   DBUG_RETURN(error);
 }
 
@@ -1577,6 +2119,131 @@ int ha_table_exists(THD* thd, const char* db, const char* name)
 
 
 /*
+  Read the first row of a multi-range set.
+
+  SYNOPSIS
+    read_multi_range_first()
+    found_range_p       Returns a pointer to the element in 'ranges' that
+                        corresponds to the returned row.
+    ranges              An array of KEY_MULTI_RANGE range descriptions.
+    range_count         Number of ranges in 'ranges'.
+    sorted		If result should be sorted per key.
+    buffer              A HANDLER_BUFFER for internal handler usage.
+
+  NOTES
+    Record is read into table->record[0].
+    *found_range_p returns a valid value only if read_multi_range_first()
+    returns 0.
+    Sorting is done within each range. If you want an overall sort, enter
+    'ranges' with sorted ranges.
+
+  RETURN
+    0			OK, found a row
+    HA_ERR_END_OF_FILE	No rows in range
+    #			Error code
+*/
+
+int handler::read_multi_range_first(KEY_MULTI_RANGE **found_range_p,
+                                    KEY_MULTI_RANGE *ranges, uint range_count,
+                                    bool sorted, HANDLER_BUFFER *buffer)
+{
+  int result= HA_ERR_END_OF_FILE;
+  DBUG_ENTER("handler::read_multi_range_first");
+  multi_range_sorted= sorted;
+  multi_range_buffer= buffer;
+
+  for (multi_range_curr= ranges, multi_range_end= ranges + range_count;
+       multi_range_curr < multi_range_end;
+       multi_range_curr++)
+  {
+    result= read_range_first(multi_range_curr->start_key.length ?
+                             &multi_range_curr->start_key : 0,
+                             multi_range_curr->end_key.length ?
+                             &multi_range_curr->end_key : 0,
+                             test(multi_range_curr->range_flag & EQ_RANGE),
+                             multi_range_sorted);
+    if (result != HA_ERR_END_OF_FILE)
+      break;
+  }
+
+  *found_range_p= multi_range_curr;
+  DBUG_PRINT("exit",("result %d", result));
+  DBUG_RETURN(result);
+}
+
+
+/*
+  Read the next row of a multi-range set.
+
+  SYNOPSIS
+    read_multi_range_next()
+    found_range_p       Returns a pointer to the element in 'ranges' that
+                        corresponds to the returned row.
+
+  NOTES
+    Record is read into table->record[0].
+    *found_range_p returns a valid value only if read_multi_range_next()
+    returns 0.
+
+  RETURN
+    0			OK, found a row
+    HA_ERR_END_OF_FILE	No (more) rows in range
+    #			Error code
+*/
+
+int handler::read_multi_range_next(KEY_MULTI_RANGE **found_range_p)
+{
+  int result;
+  DBUG_ENTER("handler::read_multi_range_next");
+
+  /* We should not be called after the last call returned EOF. */
+  DBUG_ASSERT(multi_range_curr < multi_range_end);
+
+  do
+  {
+    /* Save a call if there can be only one row in range. */
+    if (multi_range_curr->range_flag != (UNIQUE_RANGE | EQ_RANGE))
+    {
+      result= read_range_next();
+
+      /* On success or non-EOF errors jump to the end. */
+      if (result != HA_ERR_END_OF_FILE)
+        break;
+    }
+    else
+    {
+      /*
+        We need to set this for the last range only, but checking this
+        condition is more expensive than just setting the result code.
+      */
+      result= HA_ERR_END_OF_FILE;
+    }
+
+    /* Try the next range(s) until one matches a record. */
+    for (multi_range_curr++;
+         multi_range_curr < multi_range_end;
+         multi_range_curr++)
+    {
+      result= read_range_first(multi_range_curr->start_key.length ?
+                               &multi_range_curr->start_key : 0,
+                               multi_range_curr->end_key.length ?
+                               &multi_range_curr->end_key : 0,
+                               test(multi_range_curr->range_flag & EQ_RANGE),
+                               multi_range_sorted);
+      if (result != HA_ERR_END_OF_FILE)
+        break;
+    }
+  }
+  while ((result == HA_ERR_END_OF_FILE) &&
+         (multi_range_curr < multi_range_end));
+
+  *found_range_p= multi_range_curr;
+  DBUG_PRINT("exit",("handler::read_multi_range_next: result %d", result));
+  DBUG_RETURN(result);
+}
+
+
+/*
   Read first row between two ranges.
   Store ranges for future calls to read_range_next
 
@@ -1730,9 +2397,9 @@ TYPELIB *ha_known_exts(void)
     const char **ext, *old_ext;
 
     known_extensions_id= mysys_usage_id;
-    found_exts.push_back((char*) ".db");
+    found_exts.push_back((char*) triggers_file_ext);
     for (types= sys_table_types; types->type; types++)
-    {      
+    {
       if (*types->value == SHOW_OPTION_YES)
       {
 	handler *file= get_new_handler(0,(enum db_type) types->db_type);
@@ -1755,7 +2422,7 @@ TYPELIB *ha_known_exts(void)
                                        (found_exts.elements+1),
                                        MYF(MY_WME | MY_FAE));
     
-    DBUG_ASSERT(ext);
+    DBUG_ASSERT(ext != 0);
     known_extensions.count= found_exts.elements;
     known_extensions.type_names= ext;
 
@@ -1765,3 +2432,56 @@ TYPELIB *ha_known_exts(void)
   }
   return &known_extensions;
 }
+
+
+#ifdef HAVE_REPLICATION
+/*
+  Reports to table handlers up to which position we have sent the binlog
+  to a slave in replication
+
+  SYNOPSIS
+    ha_repl_report_sent_binlog()
+    thd             thread doing the binlog communication to the slave
+    log_file_name   binlog file name
+    end_offse t     the offset in the binlog file up to which we sent the
+		    contents to the slave
+
+  NOTES
+    Only works for InnoDB at the moment
+
+  RETURN VALUE
+    Always 0 (= success)  
+*/
+
+int ha_repl_report_sent_binlog(THD *thd, char *log_file_name,
+                               my_off_t end_offset)
+{
+#ifdef HAVE_INNOBASE_DB
+  return innobase_repl_report_sent_binlog(thd,log_file_name,end_offset);
+#else
+  return 0;
+#endif
+}
+
+
+/*
+  Reports to table handlers that we stop replication to a specific slave
+
+  SYNOPSIS
+    ha_repl_report_replication_stop()
+    thd              thread doing the binlog communication to the slave
+
+  NOTES
+    Does nothing at the moment
+
+  RETURN VALUE
+    Always 0 (= success)  
+
+  PARAMETERS
+*/
+
+int ha_repl_report_replication_stop(THD *thd)
+{
+  return 0;
+}
+#endif /* HAVE_REPLICATION */