IB, SQL: InnoDB partitioning [closes #118]

* native InnoDB partitioning for BY SYSTEM_TIME partitions.
author: Aleksey Midenkov <midenok@gmail.com> 2017-02-20 10:06:58 +0300
committer: Aleksey Midenkov <midenok@gmail.com> 2017-05-05 20:36:28 +0300
commit: fc7da4dd4f1e2b9b78b292f20d8fe61f1e9a1d11 (patch)
tree: 72d251bf8b315752eed47a093630c658c5b85282 /sql
parent: fb801289f314bee6e5b1864f3ef58f8f38a59278 (diff)
download: mariadb-git-fc7da4dd4f1e2b9b78b292f20d8fe61f1e9a1d11.tar.gz
16 files changed, 5533 insertions, 47 deletions
diff --git a/sql/CMakeLists.txt b/sql/CMakeLists.txt
index 1dfa313a70c..08a39b1975d 100644
--- a/sql/CMakeLists.txt
+++ b/sql/CMakeLists.txt
@@ -121,7 +121,7 @@ SET (SQL_SOURCE
                rpl_tblmap.cc sql_binlog.cc event_scheduler.cc event_data_objects.cc
                event_queue.cc event_db_repository.cc 
                sql_tablespace.cc events.cc ../sql-common/my_user.c 
-               partition_info.cc rpl_utility.cc rpl_injector.cc sql_locale.cc
+               partition_info.cc partitioning/partition_handler.cc rpl_utility.cc rpl_injector.cc sql_locale.cc
                rpl_rli.cc rpl_mi.cc sql_servers.cc sql_audit.cc
                sql_connect.cc scheduler.cc sql_partition_admin.cc
                sql_profile.cc event_parse_data.cc sql_alter.cc
@@ -165,7 +165,7 @@ IF (CMAKE_SYSTEM_NAME MATCHES "Linux" OR
 
 ENDIF()
 
-MYSQL_ADD_PLUGIN(partition ha_partition.cc STORAGE_ENGINE DEFAULT STATIC_ONLY
+MYSQL_ADD_PLUGIN(partition ha_partition.cc partitioning/partition_handler.cc STORAGE_ENGINE DEFAULT STATIC_ONLY
 RECOMPILE_FOR_EMBEDDED)
 MYSQL_ADD_PLUGIN(sql_sequence ha_sequence.cc STORAGE_ENGINE MANDATORY STATIC_ONLY
 RECOMPILE_FOR_EMBEDDED)
diff --git a/sql/ha_partition.cc b/sql/ha_partition.cc
index 0fa461e1807..747b9a8871f 100644
--- a/sql/ha_partition.cc
+++ b/sql/ha_partition.cc
@@ -160,9 +160,6 @@ static int partition_initialize(void *p)
 bool Partition_share::init(uint num_parts)
 {
   DBUG_ENTER("Partition_share::init");
-  mysql_mutex_init(key_partition_auto_inc_mutex,
-                   &auto_inc_mutex,
-                   MY_MUTEX_INIT_FAST);
   auto_inc_initialized= false;
   partition_name_hash_initialized= false;
   next_auto_inc_val= 0;
@@ -1246,12 +1243,12 @@ int ha_partition::handle_opt_part(THD *thd, HA_CHECK_OPT *check_opt,
    (modelled after mi_check_print_msg)
    TODO: move this into the handler, or rewrite mysql_admin_table.
 */
-static bool print_admin_msg(THD* thd, uint len,
+bool print_admin_msg(THD* thd, uint len,
                             const char* msg_type,
                             const char* db_name, String &table_name,
                             const char* op_name, const char *fmt, ...)
   ATTRIBUTE_FORMAT(printf, 7, 8);
-static bool print_admin_msg(THD* thd, uint len,
+bool print_admin_msg(THD* thd, uint len,
                             const char* msg_type,
                             const char* db_name, String &table_name,
                             const char* op_name, const char *fmt, ...)
@@ -5731,6 +5728,22 @@ int ha_partition::index_next_same(uchar *buf, const uchar *key, uint keylen)
 }
 
 
+int ha_partition::index_read_last_map(uchar *buf,
+                                          const uchar *key,
+                                          key_part_map keypart_map)
+{
+  DBUG_ENTER("ha_partition::index_read_last_map");
+
+  m_ordered= true;                              // Safety measure
+  end_range= NULL;
+  m_index_scan_type= partition_index_read_last;
+  m_start_key.key= key;
+  m_start_key.keypart_map= keypart_map;
+  m_start_key.flag= HA_READ_PREFIX_LAST;
+  DBUG_RETURN(common_index_read(buf, true));
+}
+
+
 /*
   Read next record when performing index scan backwards
 
diff --git a/sql/ha_partition.h b/sql/ha_partition.h
index 2c7f4a0861f..861ba47b94e 100644
--- a/sql/ha_partition.h
+++ b/sql/ha_partition.h
@@ -77,43 +77,118 @@ public:
 };
 
 
+extern PSI_mutex_key key_partition_auto_inc_mutex;
+
 /**
   Partition specific Handler_share.
 */
 class Partition_share : public Handler_share
 {
 public:
-  bool auto_inc_initialized;
-  mysql_mutex_t auto_inc_mutex;                /**< protecting auto_inc val */
-  ulonglong next_auto_inc_val;                 /**< first non reserved value */
-  /**
-    Hash of partition names. Initialized in the first ha_partition::open()
-    for the table_share. After that it is read-only, i.e. no locking required.
-  */
-  bool partition_name_hash_initialized;
-  HASH partition_name_hash;
-  /** Storage for each partitions Handler_share */
-  Parts_share_refs *partitions_share_refs;
-  Partition_share() {}
+  Partition_share()
+    : auto_inc_initialized(false),
+    next_auto_inc_val(0),
+    partition_name_hash_initialized(false),
+    partitions_share_refs(NULL),
+    partition_names(NULL)
+  {
+    mysql_mutex_init(key_partition_auto_inc_mutex,
+                    &auto_inc_mutex,
+                    MY_MUTEX_INIT_FAST);
+  }
+
   ~Partition_share()
   {
-    DBUG_ENTER("Partition_share::~Partition_share");
     mysql_mutex_destroy(&auto_inc_mutex);
+    if (partition_names)
+    {
+      my_free(partition_names);
+    }
     if (partition_name_hash_initialized)
+    {
       my_hash_free(&partition_name_hash);
+    }
     if (partitions_share_refs)
       delete partitions_share_refs;
-    DBUG_VOID_RETURN;
   }
+  
   bool init(uint num_parts);
-  void lock_auto_inc()
+
+  /** Set if auto increment is used an initialized. */
+  bool auto_inc_initialized;
+  /**
+    Mutex protecting next_auto_inc_val.
+    Initialized if table uses auto increment.
+  */
+  mysql_mutex_t auto_inc_mutex;
+  /** First non reserved auto increment value. */
+  ulonglong next_auto_inc_val;
+  /**
+    Hash of partition names. Initialized by the first handler instance of a
+    table_share calling populate_partition_name_hash().
+    After that it is read-only, i.e. no locking required for reading.
+  */
+  HASH partition_name_hash;
+  /** flag that the name hash is initialized, so it only will do it once. */
+  bool partition_name_hash_initialized;
+  
+  /** Storage for each partitions Handler_share */
+  Parts_share_refs *partitions_share_refs;
+
+  /**
+    Release reserved auto increment values not used.
+    @param thd             Thread.
+    @param table_share     Table Share
+    @param next_insert_id  Next insert id (first non used auto inc value).
+    @param max_reserved    End of reserved auto inc range.
+  */
+  void release_auto_inc_if_possible(THD *thd, TABLE_SHARE *table_share,
+                                    const ulonglong next_insert_id,
+                                    const ulonglong max_reserved);
+
+  /** lock mutex protecting auto increment value next_auto_inc_val. */
+  inline void lock_auto_inc()
   {
     mysql_mutex_lock(&auto_inc_mutex);
   }
-  void unlock_auto_inc()
+  /** unlock mutex protecting auto increment value next_auto_inc_val. */
+  inline void unlock_auto_inc()
   {
     mysql_mutex_unlock(&auto_inc_mutex);
   }
+  /**
+    Populate partition_name_hash with partition and subpartition names
+    from part_info.
+    @param part_info  Partition info containing all partitions metadata.
+
+    @return Operation status.
+      @retval false Success.
+      @retval true  Failure.
+  */
+  bool populate_partition_name_hash(partition_info *part_info);
+  /** Get partition name.
+
+  @param part_id  Partition id (for subpartitioned table only subpartition
+                  names will be returned.)
+
+  @return partition name or NULL if error.
+  */
+  const char *get_partition_name(size_t part_id) const;
+private:
+  const uchar **partition_names;
+  /**
+    Insert [sub]partition name into  partition_name_hash
+    @param name        Partition name.
+    @param part_id     Partition id.
+    @param is_subpart  True if subpartition else partition.
+
+    @return Operation status.
+      @retval false Success.
+      @retval true  Failure.
+  */
+  bool insert_partition_name_in_hash(const char *name,
+                                     uint part_id,
+                                     bool is_subpart);
 };
 
 
@@ -605,6 +680,10 @@ public:
   virtual int index_last(uchar * buf);
   virtual int index_next_same(uchar * buf, const uchar * key, uint keylen);
 
+  int index_read_last_map(uchar *buf,
+                          const uchar *key,
+                          key_part_map keypart_map);
+
   /*
     read_first_row is virtual method but is only implemented by
     handler.cc, no storage engine has implemented it so neither
@@ -1086,7 +1165,6 @@ private:
     ulonglong nr= (((Field_num*) field)->unsigned_flag ||
                    field->val_int() > 0) ? field->val_int() : 0;
     lock_auto_increment();
-    DBUG_ASSERT(part_share->auto_inc_initialized);
     /* must check when the mutex is taken */
     if (nr >= part_share->next_auto_inc_val)
       part_share->next_auto_inc_val= nr + 1;
@@ -1310,4 +1388,9 @@ public:
   friend int cmp_key_rowid_part_id(void *ptr, uchar *ref1, uchar *ref2);
 };
 
+bool print_admin_msg(THD* thd, uint len,
+                            const char* msg_type,
+                            const char* db_name, String &table_name,
+                            const char* op_name, const char *fmt, ...);
+
 #endif /* HA_PARTITION_INCLUDED */
diff --git a/sql/handler.cc b/sql/handler.cc
index c19d04236d7..ba947fd7a2d 100644
--- a/sql/handler.cc
+++ b/sql/handler.cc
@@ -2435,6 +2435,12 @@ LEX_STRING *handler::engine_name()
 }
 
 
+void handler::ha_statistic_increment(ulong SSV::*offset) const
+{
+  (table->in_use->status_var.*offset)++;
+}
+
+
 double handler::keyread_time(uint index, uint ranges, ha_rows rows)
 {
   /*
diff --git a/sql/handler.h b/sql/handler.h
index e20f95df1f3..f5e3d83d8d9 100644
--- a/sql/handler.h
+++ b/sql/handler.h
@@ -1393,6 +1393,7 @@ struct handlerton
    bool (*vers_query_trx_id)(THD* thd, void *out, ulonglong trx_id, vtq_field_t field);
    bool (*vers_query_commit_ts)(THD* thd, void *out, const MYSQL_TIME &commit_ts, vtq_field_t field, bool backwards);
    bool (*vers_trx_sees)(THD *thd, bool &result, ulonglong trx_id1, ulonglong trx_id0, ulonglong commit_id1, uchar iso_level1, ulonglong commit_id0);
+   handler *(*vers_upgrade_handler)(handler *hnd, MEM_ROOT *mem_root);
 };
 
 
@@ -3271,6 +3272,18 @@ protected:
   virtual int index_last(uchar * buf)
    { return  HA_ERR_WRONG_COMMAND; }
   virtual int index_next_same(uchar *buf, const uchar *key, uint keylen);
+  /**
+     @brief
+     The following functions works like index_read, but it find the last
+     row with the current key value or prefix.
+     @returns @see index_read_map().
+  */
+  virtual int index_read_last_map(uchar * buf, const uchar * key,
+                                  key_part_map keypart_map)
+  {
+    uint key_len= calculate_key_len(table, active_index, key, keypart_map);
+    return index_read_last(buf, key, key_len);
+  }
   virtual int close(void)=0;
   inline void update_rows_read()
   {
@@ -3350,7 +3363,7 @@ public:
   void ft_end() { ft_handler=NULL; }
   virtual FT_INFO *ft_init_ext(uint flags, uint inx,String *key)
     { return NULL; }
-private:
+public:
   virtual int ft_read(uchar *buf) { return HA_ERR_WRONG_COMMAND; }
   virtual int rnd_next(uchar *buf)=0;
   virtual int rnd_pos(uchar * buf, uchar *pos)=0;
@@ -4057,6 +4070,7 @@ public:
   TABLE_SHARE* get_table_share() { return table_share; }
 protected:
   /* Service methods for use by storage engines. */
+  void ha_statistic_increment(ulong SSV::*offset) const;
   void **ha_data(THD *) const;
   THD *ha_thd(void) const;
 
@@ -4082,7 +4096,7 @@ protected:
 
 public:
   bool check_table_binlog_row_based(bool binlog_row);
-private:
+
   /* Cache result to avoid extra calls */
   inline void mark_trx_read_write()
   {
@@ -4092,6 +4106,8 @@ private:
       mark_trx_read_write_internal();
     }
   }
+
+private:
   void mark_trx_read_write_internal();
   bool check_table_binlog_row_based_internal(bool binlog_row);
 
@@ -4210,6 +4226,11 @@ protected:
   virtual int index_read(uchar * buf, const uchar * key, uint key_len,
                          enum ha_rkey_function find_flag)
    { return  HA_ERR_WRONG_COMMAND; }
+  virtual int index_read_last(uchar * buf, const uchar * key, uint key_len)
+  {
+    my_errno= HA_ERR_WRONG_COMMAND;
+    return HA_ERR_WRONG_COMMAND;
+  }
   friend class ha_partition;
   friend class ha_sequence;
 public:
@@ -4340,6 +4361,8 @@ public:
   { DBUG_ASSERT(0); return false; }
   virtual handler* part_handler(uint32 part_id)
   { DBUG_ASSERT(0); return NULL; }
+  virtual void update_partition(uint	part_id)
+  {}
 protected:
   Handler_share *get_ha_share_ptr();
   void set_ha_share_ptr(Handler_share *arg_ha_share);
diff --git a/sql/partition_info.cc b/sql/partition_info.cc
index f45b45548b0..c1a792c87e0 100644
--- a/sql/partition_info.cc
+++ b/sql/partition_info.cc
@@ -215,6 +215,48 @@ bool partition_info::set_named_partition_bitmap(const char *part_name,
     @param table_list  Table list pointing to table to prune.
 
   @return Operation status
+    @retval false Success
+    @retval true  Failure
+*/
+bool partition_info::set_read_partitions(List<char> *partition_names)
+{
+  DBUG_ENTER("partition_info::set_read_partitions");
+  if (!partition_names || !partition_names->elements)
+  {
+    DBUG_RETURN(true);
+  }
+
+  uint num_names= partition_names->elements;
+  List_iterator<char> partition_names_it(*partition_names);
+  uint i= 0;
+  /*
+    TODO: When adding support for FK in partitioned tables, the referenced
+    table must probably lock all partitions for read, and also write depending
+    of ON DELETE/UPDATE.
+  */
+  bitmap_clear_all(&read_partitions);
+
+  /* No check for duplicate names or overlapping partitions/subpartitions. */
+
+  DBUG_PRINT("info", ("Searching through partition_name_hash"));
+  do
+  {
+    char *part_name= partition_names_it++;
+    if (add_named_partition(part_name, strlen(part_name)))
+      DBUG_RETURN(true);
+  } while (++i < num_names);
+  DBUG_RETURN(false);
+}
+
+
+
+/**
+  Prune away partitions not mentioned in the PARTITION () clause,
+  if used.
+
+    @param table_list  Table list pointing to table to prune.
+
+  @return Operation status
     @retval true  Failure
     @retval false Success
 */
@@ -989,13 +1031,22 @@ bool partition_info::vers_scan_min_max(THD *thd, partition_element *part)
   uint32 part_id= part->id * sub_factor;
   uint32 part_id_end= part_id + sub_factor;
   DBUG_ASSERT(part->empty);
+  DBUG_ASSERT(part->type == partition_element::VERSIONING);
   DBUG_ASSERT(table->s->stat_trx);
   for (; part_id < part_id_end; ++part_id)
   {
-    handler *file= table->file->part_handler(part_id);
-    int rc= file->ha_external_lock(thd, F_RDLCK);
+    handler *file= table->file->part_handler(part_id); // requires update_partition() for ha_innopart
+    int rc= file->ha_external_lock(thd, F_RDLCK); // requires ha_commit_trans() for ha_innobase
     if (rc)
-      goto error;
+    {
+      file->update_partition(part_id);
+      goto lock_fail;
+    }
+
+    table->default_column_bitmaps();
+    bitmap_set_bit(table->read_set, table->vers_end_field()->field_index);
+    file->column_bitmaps_signal();
+
     rc= file->ha_rnd_init(true);
     if (!rc)
     {
@@ -1006,6 +1057,8 @@ bool partition_info::vers_scan_min_max(THD *thd, partition_element *part)
         if (thd->killed)
         {
           file->ha_rnd_end();
+          file->update_partition(part_id);
+          ha_commit_trans(thd, false);
           return true;
         }
         if (rc)
@@ -1014,18 +1067,44 @@ bool partition_info::vers_scan_min_max(THD *thd, partition_element *part)
             continue;
           break;
         }
-        vers_stat_trx(STAT_TRX_END, part).update_unguarded(table->vers_end_field());
+        if (table->vers_end_field()->is_max())
+        {
+          rc= HA_ERR_INTERNAL_ERROR;
+          push_warning_printf(thd,
+            Sql_condition::WARN_LEVEL_WARN,
+            WARN_VERS_PART_NON_HISTORICAL,
+            ER_THD(thd, WARN_VERS_PART_NON_HISTORICAL),
+            part->partition_name);
+          break;
+        }
+        if (table->versioned_by_engine())
+        {
+          uchar buf[8];
+          Field_timestampf fld(buf, NULL, 0, Field::NONE, table->vers_end_field()->field_name, NULL, 6);
+          if (!vers_trx_id_to_ts(thd, table->vers_end_field(), fld))
+          {
+            vers_stat_trx(STAT_TRX_END, part).update_unguarded(&fld);
+          }
+        }
+        else
+        {
+          vers_stat_trx(STAT_TRX_END, part).update_unguarded(table->vers_end_field());
+        }
       }
       file->ha_rnd_end();
     }
     file->ha_external_lock(thd, F_UNLCK);
+    file->update_partition(part_id);
     if (rc != HA_ERR_END_OF_FILE)
     {
-    error:
-      my_error(ER_INTERNAL_ERROR, MYF(0), "partition/subpartition scan failed in versioned partitions setup");
+      ha_commit_trans(thd, false);
+    lock_fail:
+      // TODO: print rc code
+      my_error(ER_INTERNAL_ERROR, MYF(0), "min/max scan failed in versioned partitions setup (see warnings)");
       return true;
     }
   }
+  ha_commit_trans(thd, false);
   return false;
 }
 
@@ -1073,11 +1152,9 @@ bool partition_info::vers_setup_2(THD * thd, bool is_create_table_ind)
   DBUG_ASSERT(part_type == VERSIONING_PARTITION);
   DBUG_ASSERT(vers_info && vers_info->initialized(false));
   DBUG_ASSERT(table && table->s);
-  if (!table->versioned_by_sql())
-  {
-    my_error(ER_VERS_WRONG_PARAMS, MYF(0), table->s->table_name.str, "selected engine is not supported in `BY SYSTEM_TIME` partitioning");
-    return true;
-  }
+
+  bool error= false;
+
   mysql_mutex_lock(&table->s->LOCK_rotation);
   if (table->s->busy_rotation)
   {
@@ -1124,8 +1201,19 @@ bool partition_info::vers_setup_2(THD * thd, bool is_create_table_ind)
 
       if (!is_create_table_ind)
       {
-        if (vers_scan_min_max(thd, el))
-          return true;
+        if (el->type == partition_element::AS_OF_NOW)
+        {
+          uchar buf[8];
+          Field_timestampf fld(buf, NULL, 0, Field::NONE, table->vers_end_field()->field_name, NULL, 6);
+          fld.set_max();
+          vers_stat_trx(STAT_TRX_END, el).update_unguarded(&fld);
+          el->empty= false;
+        }
+        else if (vers_scan_min_max(thd, el))
+        {
+          error= true;
+          break;
+        }
         if (!el->empty)
         {
           vers_update_col_vals(thd, prev, el);
@@ -1151,7 +1239,7 @@ bool partition_info::vers_setup_2(THD * thd, bool is_create_table_ind)
       }
     } // while
 
-    if (!dont_stat)
+    if (!error && !dont_stat)
     {
       if (col_val_updated)
         table->s->stat_serial++;
@@ -1165,7 +1253,7 @@ bool partition_info::vers_setup_2(THD * thd, bool is_create_table_ind)
     table->s->busy_rotation= false;
   }
   mysql_mutex_unlock(&table->s->LOCK_rotation);
-  return false;
+  return error;
 }
 
 
@@ -3262,6 +3350,80 @@ bool partition_info::has_same_partitioning(partition_info *new_part_info)
 }
 
 
+static bool has_same_column_order(List<Create_field> *create_list,
+	                          Field** field_array)
+{
+  Field **f_ptr;
+  List_iterator_fast<Create_field> new_field_it;
+  Create_field *new_field= NULL;
+  new_field_it.init(*create_list);
+
+  for (f_ptr= field_array; *f_ptr; f_ptr++)
+  {
+    while ((new_field= new_field_it++))
+    {
+      if (new_field->field == *f_ptr)
+        break;
+    }
+    if (!new_field)
+      break;
+  }
+
+  if (!new_field)
+  {
+    /* Not same order!*/
+    return false;
+  }
+  return true;
+}
+
+bool partition_info::vers_trx_id_to_ts(THD* thd, Field* in_trx_id, Field_timestamp& out_ts)
+{
+  handlerton *hton= plugin_hton(table->s->db_plugin);
+  DBUG_ASSERT(hton);
+  ulonglong trx_id= in_trx_id->val_int();
+  MYSQL_TIME ts;
+  bool found= hton->vers_query_trx_id(thd, &ts, trx_id, VTQ_COMMIT_TS);
+  if (!found)
+  {
+    push_warning_printf(thd,
+      Sql_condition::WARN_LEVEL_WARN,
+      WARN_VERS_TRX_MISSING,
+      ER_THD(thd, WARN_VERS_TRX_MISSING),
+      trx_id);
+    return true;
+  }
+  out_ts.store_time_dec(&ts, 6);
+  return false;
+}
+
+
+/**
+  Check if the partitioning columns are in the same order as the given list.
+
+  Used to see if INPLACE alter can be allowed or not. If the order is
+  different then the rows must be redistributed for KEY [sub]partitioning.
+
+  @param[in] create_list Column list after ALTER TABLE.
+  @return true is same order as before ALTER TABLE, else false.
+*/
+bool partition_info::same_key_column_order(List<Create_field> *create_list)
+{
+  /* Only need to check for KEY [sub] partitioning. */
+  if (list_of_part_fields && !column_list)
+  {
+    if (!has_same_column_order(create_list, part_field_array))
+      return false;
+  }
+  if (list_of_subpart_fields)
+  {
+    if (!has_same_column_order(create_list, subpart_field_array))
+      return false;
+  }
+  return true;
+}
+
+
 void partition_info::print_debug(const char *str, uint *value)
 {
   DBUG_ENTER("print_debug");
diff --git a/sql/partition_info.h b/sql/partition_info.h
index 5a671bfc50f..ef20564837c 100644
--- a/sql/partition_info.h
+++ b/sql/partition_info.h
@@ -22,6 +22,7 @@
 
 #include "sql_class.h"
 #include "partition_element.h"
+#include "sql_partition.h"
 
 class partition_info;
 struct TABLE_LIST;
@@ -382,6 +383,28 @@ public:
                                    uint32 *part_id);
   void report_part_expr_error(bool use_subpart_expr);
   bool has_same_partitioning(partition_info *new_part_info);
+  inline bool is_partition_used(uint part_id) const
+  {
+    return bitmap_is_set(&read_partitions, part_id);
+  }
+  inline bool is_partition_locked(uint part_id) const
+  {
+    return bitmap_is_set(&lock_partitions, part_id);
+  }
+  inline uint num_partitions_used()
+  {
+    return bitmap_bits_set(&read_partitions);
+  }
+  inline uint get_first_used_partition() const
+  {
+    return bitmap_get_first_set(&read_partitions);
+  }
+  inline uint get_next_used_partition(uint part_id) const
+  {
+    return bitmap_get_next_set(&read_partitions, part_id);
+  }
+  bool same_key_column_order(List<Create_field> *create_list);
+
 private:
   static int list_part_cmp(const void* a, const void* b);
   bool set_up_default_partitions(THD *thd, handler *file, HA_CREATE_INFO *info,
@@ -392,9 +415,11 @@ private:
                                        uint start_no);
   char *create_default_subpartition_name(THD *thd, uint subpart_no,
                                          const char *part_name);
+  // FIXME: prune_partition_bitmaps() is duplicate of set_read_partitions()
   bool prune_partition_bitmaps(TABLE_LIST *table_list);
   bool add_named_partition(const char *part_name, uint length);
 public:
+  bool set_read_partitions(List<char> *partition_names);
   bool has_unique_name(partition_element *element);
 
   bool vers_init_info(THD *thd);
@@ -475,8 +500,8 @@ public:
       DBUG_ASSERT(vers_info->initialized());
       part= vers_hist_part();
     }
-    max_time-= vers_stat_trx(STAT_TRX_END, part).min_time();
-    return max_time > vers_info->interval;
+    my_time_t min_time= vers_stat_trx(STAT_TRX_END, part).min_time();
+    return max_time - min_time > vers_info->interval;
   }
   bool vers_interval_exceed(partition_element *part)
   {
@@ -486,15 +511,31 @@ public:
   {
     return vers_interval_exceed(vers_hist_part());
   }
+  bool vers_trx_id_to_ts(THD *thd, Field *in_trx_id, Field_timestamp &out_ts);
   void vers_update_stats(THD *thd, partition_element *el)
   {
     DBUG_ASSERT(vers_info && vers_info->initialized());
     DBUG_ASSERT(table && table->s);
     DBUG_ASSERT(el && el->type == partition_element::VERSIONING);
+    bool updated;
     mysql_rwlock_wrlock(&table->s->LOCK_stat_serial);
     el->empty= false;
-    bool updated=
-      vers_stat_trx(STAT_TRX_END, el->id).update(table->vers_end_field());
+    if (table->versioned_by_engine())
+    {
+      // transaction is not yet pushed to VTQ, so we use now-time
+      my_time_t end_ts= my_time(0);
+
+      uchar buf[8];
+      Field_timestampf fld(buf, NULL, 0, Field::NONE, table->vers_end_field()->field_name, NULL, 6);
+      fld.store_TIME(end_ts, 0);
+      updated=
+        vers_stat_trx(STAT_TRX_END, el->id).update(&fld);
+    }
+    else
+    {
+      updated=
+        vers_stat_trx(STAT_TRX_END, el->id).update(table->vers_end_field());
+    }
     if (updated)
       table->s->stat_serial++;
     mysql_rwlock_unlock(&table->s->LOCK_stat_serial);
diff --git a/sql/partitioning/partition_handler.cc b/sql/partitioning/partition_handler.cc
new file mode 100644
index 00000000000..1e04439e100
--- /dev/null
+++ b/sql/partitioning/partition_handler.cc
@@ -0,0 +1,3746 @@
+/*
+   Copyright (c) 2005, 2016, Oracle and/or its affiliates. All rights reserved.
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License
+   as published by the Free Software Foundation; version 2 of
+   the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA
+*/
+
+#include "table.h"                           // TABLE_SHARE
+#include "sql_partition.h"                   // LIST_PART_ENTRY, part_id_range
+#include "partition_info.h"                  // NOT_A_PARTITION_ID
+#include "partition_handler.h"
+#include "log.h"                             // sql_print_error
+#include "key.h"                             // key_rec_cmp
+#include "sql_class.h"                       // THD
+#include <mysql/psi/psi_memory.h>
+
+#define MI_MAX_MSG_BUF      1024
+
+// In sql_class.cc:
+extern "C" int thd_binlog_format(const MYSQL_THD thd);
+
+/** operation names for the enum_part_operation. */
+static const char *opt_op_name[]= {"optimize", "analyze", "check", "repair",
+                                   "assign_to_keycache", "preload_keys"};
+
+// static PSI_memory_key key_memory_Partition_share;
+// static PSI_memory_key key_memory_partition_sort_buffer;
+// static PSI_memory_key key_memory_Partition_admin;
+#ifdef HAVE_PSI_INTERFACE
+extern PSI_mutex_key key_partition_auto_inc_mutex;
+// static PSI_memory_info all_partitioning_memory[]=
+// { { &key_memory_Partition_share, "Partition_share", 0},
+//   { &key_memory_partition_sort_buffer, "partition_sort_buffer", 0},
+//   { &key_memory_Partition_admin, "Partition_admin", 0} };
+static PSI_mutex_info all_partitioning_mutex[]=
+{ { &key_partition_auto_inc_mutex, "Partiton_share::auto_inc_mutex", 0} };
+#endif
+
+void partitioning_init()
+{
+#ifdef HAVE_PSI_INTERFACE
+  int count;
+//   count= array_elements(all_partitioning_memory);
+//   mysql_memory_register("sql", all_partitioning_memory, count);
+  count= array_elements(all_partitioning_mutex);
+  mysql_mutex_register("sql", all_partitioning_mutex, count);
+#endif
+}
+
+
+/**
+  Release reserved auto increment values not used.
+  @param thd             Thread.
+  @param table_share     Table Share
+  @param next_insert_id  Next insert id (first non used auto inc value).
+  @param max_reserved    End of reserved auto inc range.
+*/
+void
+Partition_share::release_auto_inc_if_possible(THD *thd, TABLE_SHARE *table_share,
+                                              const ulonglong next_insert_id,
+                                              const ulonglong max_reserved)
+{
+#ifndef DBUG_OFF
+  if (table_share->tmp_table == NO_TMP_TABLE)
+  {
+    mysql_mutex_assert_owner(&auto_inc_mutex);
+  }
+#endif /* DBUG_OFF */
+
+  /*
+    If the current auto_increment values is lower than the reserved value (1)
+    and the reserved value was reserved by this thread (2), then we can
+    lower the reserved value.
+    However, we cannot lower the value if there are forced/non generated
+    values from 'SET INSERT_ID = forced_val' (3). */
+  if (next_insert_id < next_auto_inc_val &&                       // (1)
+      max_reserved >= next_auto_inc_val &&                        // (2)
+      thd->auto_inc_intervals_forced.maximum() < next_insert_id)  // (3)
+  {
+    next_auto_inc_val= next_insert_id;
+  }
+}
+
+
+/**
+  Get the partition name.
+
+  @param       part   Struct containing name and length
+  @param[out]  length Length of the name
+
+  @return Partition name
+*/
+
+static uchar *get_part_name_from_def(PART_NAME_DEF *part,
+                                     size_t *length,
+                                     my_bool not_used MY_ATTRIBUTE((unused)))
+{
+  *length= part->length;
+  return part->partition_name;
+}
+
+
+/**
+  Populate the partition_name_hash in part_share.
+*/
+
+bool Partition_share::populate_partition_name_hash(partition_info *part_info)
+{
+  uint tot_names;
+  uint num_subparts= part_info->num_subparts;
+  DBUG_ENTER("Partition_share::populate_partition_name_hash");
+  DBUG_ASSERT(!part_info->is_sub_partitioned() || num_subparts);
+
+  if (num_subparts == 0)
+  {
+    num_subparts= 1;
+  }
+
+  /*
+    TABLE_SHARE::LOCK_ha_data must been locked before calling this function.
+    This ensures only one thread/table instance will execute this.
+  */
+
+#ifndef DBUG_OFF
+  if (part_info->table->s->tmp_table == NO_TMP_TABLE)
+  {
+    mysql_mutex_assert_owner(&part_info->table->s->LOCK_ha_data);
+  }
+#endif
+  if (partition_name_hash_initialized)
+  {
+    DBUG_RETURN(false);
+  }
+  tot_names= part_info->num_parts;
+  if (part_info->is_sub_partitioned())
+  {
+    tot_names+= part_info->num_parts * num_subparts;
+  }
+  partition_names= static_cast<const uchar**>(my_malloc(
+                                              part_info->get_tot_partitions() *
+                                                sizeof(*partition_names),
+                                              MYF(MY_WME)));
+  if (!partition_names)
+  {
+    DBUG_RETURN(true);
+  }
+  if (my_hash_init(&partition_name_hash,
+                   system_charset_info, tot_names, 0, 0,
+                   (my_hash_get_key) get_part_name_from_def,
+                   my_free, HASH_UNIQUE))
+  {
+    my_free(partition_names);
+    partition_names= NULL;
+    DBUG_RETURN(true);
+  }
+
+  List_iterator<partition_element> part_it(part_info->partitions);
+  uint i= 0;
+  do
+  {
+    partition_element *part_elem= part_it++;
+    DBUG_ASSERT(part_elem->part_state == PART_NORMAL);
+    if (part_elem->part_state == PART_NORMAL)
+    {
+      if (insert_partition_name_in_hash(part_elem->partition_name,
+                                        i * num_subparts,
+                                        false))
+        goto err;
+      if (part_info->is_sub_partitioned())
+      {
+        List_iterator<partition_element>
+                                    subpart_it(part_elem->subpartitions);
+        partition_element *sub_elem;
+        uint j= 0;
+        do
+        {
+          sub_elem= subpart_it++;
+          if (insert_partition_name_in_hash(sub_elem->partition_name,
+                                            i * num_subparts + j, true))
+            goto err;
+
+        } while (++j < num_subparts);
+      }
+    }
+  } while (++i < part_info->num_parts);
+
+  for (i= 0; i < tot_names; i++)
+  {
+    PART_NAME_DEF *part_def;
+    part_def= reinterpret_cast<PART_NAME_DEF*>(
+                              my_hash_element(&partition_name_hash, i));
+    if (part_def->is_subpart == part_info->is_sub_partitioned())
+    {
+      partition_names[part_def->part_id]= part_def->partition_name;
+    }
+  }
+  partition_name_hash_initialized= true;
+
+  DBUG_RETURN(false);
+err:
+  my_hash_free(&partition_name_hash);
+  my_free(partition_names);
+  partition_names= NULL;
+
+  DBUG_RETURN(true);
+}
+
+
+/**
+  Insert a partition name in the partition_name_hash.
+
+  @param name        Name of partition
+  @param part_id     Partition id (number)
+  @param is_subpart  Set if the name belongs to a subpartition
+
+  @return Operation status
+    @retval true   Failure
+    @retval false  Success
+*/
+
+bool Partition_share::insert_partition_name_in_hash(const char *name,
+                                                    uint part_id,
+                                                    bool is_subpart)
+{
+  PART_NAME_DEF *part_def;
+  uchar *part_name;
+  uint part_name_length;
+  DBUG_ENTER("Partition_share::insert_partition_name_in_hash");
+  /*
+    Calculate and store the length here, to avoid doing it when
+    searching the hash.
+  */
+  part_name_length= static_cast<uint>(strlen(name));
+  /*
+    Must use memory that lives as long as table_share.
+    Freed in the Partition_share destructor.
+    Since we use my_multi_malloc, then my_free(part_def) will also free
+    part_name, as a part of my_hash_free.
+  */
+  if (!my_multi_malloc(MY_WME,
+                       &part_def, sizeof(PART_NAME_DEF),
+                       &part_name, part_name_length + 1,
+                       NULL))
+  {
+    DBUG_RETURN(true);
+  }
+  memcpy(part_name, name, part_name_length + 1);
+  part_def->partition_name= part_name;
+  part_def->length= part_name_length;
+  part_def->part_id= part_id;
+  part_def->is_subpart= is_subpart;
+  if (my_hash_insert(&partition_name_hash, (uchar *) part_def))
+  {
+    my_free(part_def);
+    DBUG_RETURN(true);
+  }
+  DBUG_RETURN(false);
+}
+
+
+const char *Partition_share::get_partition_name(size_t part_id) const
+{
+  if (partition_names == NULL)
+  {
+    return NULL;
+  }
+  return reinterpret_cast<const char*>(partition_names[part_id]);
+}
+/*
+  Implementation of Partition_helper class.
+*/
+Partition_helper::Partition_helper(handler *main_handler)
+  :
+  m_handler(main_handler),
+  m_part_info(),
+  m_tot_parts(),
+  m_last_part(),
+  m_err_rec(),
+  m_ordered(),
+  m_ordered_scan_ongoing(),
+  m_ordered_rec_buffer(),
+  m_queue()
+{}
+
+
+Partition_helper::~Partition_helper()
+{
+  DBUG_ASSERT(m_ordered_rec_buffer == NULL);
+  DBUG_ASSERT(m_key_not_found_partitions.bitmap == NULL);
+}
+
+
+/**
+  Set partition info.
+
+  To be called from Partition_handler.
+
+  @param  part_info  Partition info to use.
+  @param  early      True if called when part_info only created and parsed,
+                     but not setup, checked or fixed.
+  */
+void Partition_helper::set_part_info_low(partition_info *part_info,
+                                         bool early)
+{
+  /* 
+    ha_partition will set m_tot_parts from the .par file during creating
+    the new handler.
+    And this call can be earlier than the partition_default_handling(),
+    so get_tot_partitions() may return zero.
+  */
+  if (m_tot_parts == 0 &&
+      (m_part_info == NULL || !early))
+  {
+    m_tot_parts= part_info->get_tot_partitions();
+  }
+  m_part_info= part_info;
+  m_is_sub_partitioned= m_part_info->is_sub_partitioned();
+}
+
+/**
+  Initialize the partitioning helper for use after the table is opened.
+
+  @param part_share  Partitioning share (used for auto increment).
+
+  @return Operation status.
+    @retval false for success otherwise true.
+*/
+
+bool Partition_helper::open_partitioning(Partition_share *part_share)
+{
+  m_table= get_table();
+  DBUG_ASSERT(m_part_info == m_table->part_info);
+  m_part_share= part_share;
+  m_tot_parts= m_part_info->get_tot_partitions();
+  if (bitmap_init(&m_key_not_found_partitions, NULL, m_tot_parts, false))
+  {
+    return true;
+  }
+  bitmap_clear_all(&m_key_not_found_partitions);
+  m_key_not_found= false;
+  m_is_sub_partitioned= m_part_info->is_sub_partitioned();
+  m_auto_increment_lock= false;
+  m_auto_increment_safe_stmt_log_lock= false;
+  m_pkey_is_clustered= m_handler->primary_key_is_clustered();
+  m_part_spec.start_part= NOT_A_PARTITION_ID;
+  m_part_spec.end_part= NOT_A_PARTITION_ID;
+  m_index_scan_type= PARTITION_NO_INDEX_SCAN;
+  m_start_key.key= NULL;
+  m_start_key.length= 0;
+  m_scan_value= 3;
+  m_reverse_order= false;
+  m_curr_key_info[0]= NULL;
+  m_curr_key_info[1]= NULL;
+  m_curr_key_info[2]= NULL;
+  m_top_entry= NO_CURRENT_PART_ID;
+  m_ref_usage= REF_NOT_USED;
+  m_rec_length= m_table->s->reclength;
+  return false;
+}
+
+
+void Partition_helper::close_partitioning()
+{
+  bitmap_free(&m_key_not_found_partitions);
+  DBUG_ASSERT(!m_ordered_rec_buffer);
+  destroy_record_priority_queue();
+}
+
+/****************************************************************************
+                MODULE change record
+****************************************************************************/
+
+/**
+  Insert a row to the partitioned table.
+
+  @param buf The row in MySQL Row Format.
+
+  @return Operation status.
+    @retval    0 Success
+    @retval != 0 Error code
+*/
+
+int Partition_helper::ph_write_row(uchar *buf)
+{
+  uint32 part_id;
+  int error;
+  longlong func_value;
+  bool have_auto_increment= m_table->next_number_field &&
+                            buf == m_table->record[0];
+  THD *thd= get_thd();
+  sql_mode_t saved_sql_mode= thd->variables.sql_mode;
+  bool saved_auto_inc_field_not_null= m_table->auto_increment_field_not_null;
+#ifndef DBUG_OFF
+  my_bitmap_map *old_map;
+#endif /* DBUG_OFF */
+  DBUG_ENTER("Partition_helper::ph_write_row");
+  DBUG_ASSERT(buf == m_table->record[0]);
+
+  /*
+    If we have an auto_increment column and we are writing a changed row
+    or a new row, then update the auto_increment value in the record.
+  */
+  if (have_auto_increment)
+  {
+    error= m_handler->update_auto_increment();
+
+    /*
+      If we have failed to set the auto-increment value for this row,
+      it is highly likely that we will not be able to insert it into
+      the correct partition. We must check and fail if neccessary.
+    */
+    if (error)
+      DBUG_RETURN(error);
+
+    /*
+      Don't allow generation of auto_increment value the partitions handler.
+      If a partitions handler would change the value, then it might not
+      match the partition any longer.
+      This can occur if 'SET INSERT_ID = 0; INSERT (NULL)',
+      So allow this by adding 'MODE_NO_AUTO_VALUE_ON_ZERO' to sql_mode.
+      The partitions handler::next_insert_id must always be 0. Otherwise
+      we need to forward release_auto_increment, or reset it for all
+      partitions.
+    */
+    if (m_table->next_number_field->val_int() == 0)
+    {
+      m_table->auto_increment_field_not_null= TRUE;
+      thd->variables.sql_mode|= MODE_NO_AUTO_VALUE_ON_ZERO;
+    }
+  }
+
+#ifndef DBUG_OFF
+  /* Temporary mark the partitioning fields as readable. */
+  old_map= dbug_tmp_use_all_columns(m_table, m_table->read_set);
+#endif /* DBUG_OFF */
+
+  error= m_part_info->get_partition_id(m_part_info, &part_id, &func_value);
+
+#ifndef DBUG_OFF
+  dbug_tmp_restore_column_map(m_table->read_set, old_map);
+#endif /* DBUG_OFF */
+
+  if (unlikely(error))
+  {
+    m_part_info->err_value= func_value;
+    goto exit;
+  }
+  if (!m_part_info->is_partition_locked(part_id))
+  {
+    DBUG_PRINT("info", ("Write to non-locked partition %u (func_value: %ld)",
+                        part_id, (long) func_value));
+    error= HA_ERR_NOT_IN_LOCK_PARTITIONS;
+    goto exit;
+  }
+  m_last_part= part_id;
+  DBUG_PRINT("info", ("Insert in partition %d", part_id));
+
+  error= write_row_in_part(part_id, buf);
+
+  if (have_auto_increment && !m_table->s->next_number_keypart)
+  {
+    set_auto_increment_if_higher();
+  }
+exit:
+  thd->variables.sql_mode= saved_sql_mode;
+  m_table->auto_increment_field_not_null= saved_auto_inc_field_not_null;
+  DBUG_RETURN(error);
+}
+
+
+/**
+  Update an existing row in the partitioned table.
+
+  Yes, update_row() does what you expect, it updates a row. old_data will
+  have the previous row record in it, while new_data will have the newest
+  data in it.
+  Keep in mind that the server can do updates based on ordering if an
+  ORDER BY clause was used. Consecutive ordering is not guaranteed.
+
+  If the new record belongs to a different partition than the old record
+  then it will be inserted into the new partition and deleted from the old.
+
+  new_data is always record[0]
+  old_data is always record[1]
+
+  @param old_data  The old record in MySQL Row Format.
+  @param new_data  The new record in MySQL Row Format.
+
+  @return Operation status.
+    @retval    0 Success
+    @retval != 0 Error code
+*/
+
+// FIXME: duplicate of ha_partition::update_row()
+int Partition_helper::ph_update_row(const uchar *old_data, uchar *new_data)
+{
+  THD *thd= get_thd();
+  uint32 new_part_id, old_part_id;
+  int error= 0;
+  longlong func_value;
+  DBUG_ENTER("Partition_helper::ph_update_row");
+  m_err_rec= NULL;
+
+  // Need to read partition-related columns, to locate the row's partition:
+  DBUG_ASSERT(bitmap_is_subset(&m_part_info->full_part_field_set,
+                               m_table->read_set));
+  if ((error= get_parts_for_update(old_data, new_data, m_table->record[0],
+                                   m_part_info, &old_part_id, &new_part_id,
+                                   &func_value)))
+  {
+    m_part_info->err_value= func_value;
+    goto exit;
+  }
+  DBUG_ASSERT(bitmap_is_set(&(m_part_info->read_partitions), old_part_id));
+  if (!bitmap_is_set(&(m_part_info->lock_partitions), new_part_id))
+  {
+    error= HA_ERR_NOT_IN_LOCK_PARTITIONS;
+    goto exit;
+  }
+
+  /*
+    The protocol for updating a row is:
+    1) position the handler (cursor) on the row to be updated,
+       either through the last read row (rnd or index) or by rnd_pos.
+    2) call update_row with both old and new full records as arguments.
+
+    This means that m_last_part should already be set to actual partition
+    where the row was read from. And if that is not the same as the
+    calculated part_id we found a misplaced row, we return an error to
+    notify the user that something is broken in the row distribution
+    between partitions! Since we don't check all rows on read, we return an
+    error instead of correcting m_last_part, to make the user aware of the
+    problem!
+
+    Notice that HA_READ_BEFORE_WRITE_REMOVAL does not require this protocol,
+    so this is not supported for this engine.
+  */
+  if (old_part_id != m_last_part)
+  {
+    m_err_rec= old_data;
+    DBUG_RETURN(HA_ERR_ROW_IN_WRONG_PARTITION);
+  }
+
+  m_last_part= new_part_id;
+  if (new_part_id == old_part_id)
+  {
+    DBUG_PRINT("info", ("Update in partition %d", new_part_id));
+    tmp_disable_binlog(thd); /* Do not replicate the low-level changes. */
+    error= update_row_in_part(new_part_id, old_data, new_data);
+    reenable_binlog(thd);
+    goto exit;
+  }
+  else
+  {
+    Field *saved_next_number_field= m_table->next_number_field;
+    /*
+      Don't allow generation of auto_increment value for update.
+      table->next_number_field is never set on UPDATE.
+      But is set for INSERT ... ON DUPLICATE KEY UPDATE,
+      and since update_row() does not generate or update an auto_inc value,
+      we cannot have next_number_field set when moving a row
+      to another partition with write_row(), since that could
+      generate/update the auto_inc value.
+      This gives the same behavior for partitioned vs non partitioned tables.
+    */
+    m_table->next_number_field= NULL;
+    DBUG_PRINT("info", ("Update from partition %d to partition %d",
+                        old_part_id, new_part_id));
+    tmp_disable_binlog(thd); /* Do not replicate the low-level changes. */
+    error= write_row_in_part(new_part_id, new_data);
+    reenable_binlog(thd);
+    m_table->next_number_field= saved_next_number_field;
+    if (error)
+      goto exit;
+
+    if (m_part_info->part_type == VERSIONING_PARTITION)
+    {
+      uint sub_factor= m_part_info->num_subparts ? m_part_info->num_subparts : 1;
+      DBUG_ASSERT(m_tot_parts == m_part_info->num_parts * sub_factor);
+      uint lpart_id= new_part_id / sub_factor;
+      // lpart_id is VERSIONING partition because new_part_id != old_part_id
+      m_part_info->vers_update_stats(thd, lpart_id);
+    }
+
+    tmp_disable_binlog(thd); /* Do not replicate the low-level changes. */
+    error= delete_row_in_part(old_part_id, old_data);
+    reenable_binlog(thd);
+    if (error)
+    {
+      goto exit;
+    }
+  }
+
+exit:
+  /*
+    if updating an auto_increment column, update
+    m_part_share->next_auto_inc_val if needed.
+    (not to be used if auto_increment on secondary field in a multi-column
+    index)
+    mysql_update does not set table->next_number_field, so we use
+    table->found_next_number_field instead.
+    Also checking that the field is marked in the write set.
+  */
+  if (m_table->found_next_number_field &&
+      new_data == m_table->record[0] &&
+      !m_table->s->next_number_keypart &&
+      bitmap_is_set(m_table->write_set,
+                    m_table->found_next_number_field->field_index))
+  {
+    set_auto_increment_if_higher();
+  }
+  DBUG_RETURN(error);
+}
+
+
+/**
+  Delete an existing row in the partitioned table.
+
+  This will delete a row. buf will contain a copy of the row to be deleted.
+  The server will call this right after the current row has been read
+  (from either a previous rnd_xxx() or index_xxx() call).
+  If you keep a pointer to the last row or can access a primary key it will
+  make doing the deletion quite a bit easier.
+  Keep in mind that the server does no guarentee consecutive deletions.
+  ORDER BY clauses can be used.
+
+  buf is either record[0] or record[1]
+
+  @param buf  The record in MySQL Row Format.
+
+  @return Operation status.
+    @retval    0 Success
+    @retval != 0 Error code
+*/
+
+int Partition_helper::ph_delete_row(const uchar *buf)
+{
+  int error;
+  uint part_id;
+  DBUG_ENTER("Partition_helper::ph_delete_row");
+  m_err_rec= NULL;
+
+  DBUG_ASSERT(bitmap_is_subset(&m_part_info->full_part_field_set,
+                               m_table->read_set));
+  if ((error= get_part_for_delete(buf,
+                                  m_table->record[0],
+                                  m_part_info,
+                                  &part_id)))
+  {
+    DBUG_RETURN(error);
+  }
+  if (!m_part_info->is_partition_locked(part_id))
+  {
+    DBUG_RETURN(HA_ERR_NOT_IN_LOCK_PARTITIONS);
+  }
+
+  /*
+    The protocol for deleting a row is:
+    1) position the handler (cursor) on the row to be deleted,
+       either through the last read row (rnd or index) or by rnd_pos.
+    2) call delete_row with the full record as argument.
+
+    This means that m_last_part should already be set to actual partition
+    where the row was read from. And if that is not the same as the
+    calculated part_id we found a misplaced row, we return an error to
+    notify the user that something is broken in the row distribution
+    between partitions! Since we don't check all rows on read, we return an
+    error instead of forwarding the delete to the correct (m_last_part)
+    partition!
+
+    Notice that HA_READ_BEFORE_WRITE_REMOVAL does not require this protocol,
+    so this is not supported for this engine.
+
+    TODO: change the assert in InnoDB into an error instead and make this one
+    an assert instead and remove the get_part_for_delete()!
+  */
+  if (part_id != m_last_part)
+  {
+    m_err_rec= buf;
+    DBUG_RETURN(HA_ERR_ROW_IN_WRONG_PARTITION);
+  }
+  /* Should never call delete_row on a partition which is not read */
+  DBUG_ASSERT(m_part_info->is_partition_used(part_id));
+
+  m_last_part= part_id;
+  error= delete_row_in_part(part_id, buf);
+  DBUG_RETURN(error);
+}
+
+
+/**
+  Get a range of auto increment values.
+
+  Can only be used if the auto increment field is the first field in an index.
+
+  This method is called by update_auto_increment which in turn is called
+  by the individual handlers as part of write_row. We use the
+  part_share->next_auto_inc_val, or search all
+  partitions for the highest auto_increment_value if not initialized or
+  if auto_increment field is a secondary part of a key, we must search
+  every partition when holding a mutex to be sure of correctness.
+
+  @param[in]   increment           Increment value.
+  @param[in]   nb_desired_values   Number of desired values.
+  @param[out]  first_value         First auto inc value reserved
+                                      or MAX if failure.
+  @param[out]  nb_reserved_values  Number of values reserved.
+*/
+
+void Partition_helper
+::get_auto_increment_first_field(ulonglong increment,
+                                 ulonglong nb_desired_values,
+                                 ulonglong *first_value,
+                                 ulonglong *nb_reserved_values)
+{
+  THD *thd= get_thd();
+  DBUG_ENTER("Partition_helper::get_auto_increment_first_field");
+  DBUG_PRINT("info", ("inc: %lu desired_values: %lu first_value: %lu",
+                      (ulong) increment,
+                      (ulong) nb_desired_values,
+                      (ulong) *first_value));
+  DBUG_ASSERT(increment && nb_desired_values);
+  /*
+    next_number_keypart is != 0 if the auto_increment column is a secondary
+    column in the index (it is allowed in MyISAM)
+  */
+  DBUG_ASSERT(m_table->s->next_number_keypart == 0);
+  *first_value= 0;
+
+  /*
+    Get a lock for handling the auto_increment in part_share
+    for avoiding two concurrent statements getting the same number.
+  */
+  lock_auto_increment();
+
+  /* Initialize if not already done. */
+  if (!m_part_share->auto_inc_initialized)
+  {
+    initialize_auto_increment(false);
+  }
+
+  /*
+    In a multi-row insert statement like INSERT SELECT and LOAD DATA
+    where the number of candidate rows to insert is not known in advance
+    we must hold a lock/mutex for the whole statement if we have statement
+    based replication. Because the statement-based binary log contains
+    only the first generated value used by the statement, and slaves assumes
+    all other generated values used by this statement were consecutive to
+    this first one, we must exclusively lock the generator until the statement
+    is done.
+  */
+  int binlog_format= thd_binlog_format(thd);
+  if (!m_auto_increment_safe_stmt_log_lock &&
+      thd->lex->sql_command != SQLCOM_INSERT &&
+      binlog_format != BINLOG_FORMAT_UNSPEC &&
+      binlog_format != BINLOG_FORMAT_ROW)
+  {
+    DBUG_PRINT("info", ("locking auto_increment_safe_stmt_log_lock"));
+    m_auto_increment_safe_stmt_log_lock= true;
+  }
+
+  /* this gets corrected (for offset/increment) in update_auto_increment */
+  *first_value= m_part_share->next_auto_inc_val;
+  m_part_share->next_auto_inc_val+= nb_desired_values * increment;
+  if (m_part_share->next_auto_inc_val < *first_value)
+  {
+    /* Overflow, set to max. */
+    m_part_share->next_auto_inc_val= ULLONG_MAX;
+  }
+
+  unlock_auto_increment();
+  DBUG_PRINT("info", ("*first_value: %lu", (ulong) *first_value));
+  *nb_reserved_values= nb_desired_values;
+  DBUG_VOID_RETURN;
+}
+
+
+inline void Partition_helper::set_auto_increment_if_higher()
+{
+  Field_num *field= static_cast<Field_num*>(m_table->found_next_number_field);
+  ulonglong nr= (field->unsigned_flag || field->val_int() > 0)
+                 ? field->val_int() : 0;
+  lock_auto_increment();
+  if (!m_part_share->auto_inc_initialized)
+  {
+    initialize_auto_increment(false);
+  }
+  /* must hold the mutex when looking/changing m_part_share. */
+  if (nr >= m_part_share->next_auto_inc_val)
+  {
+    m_part_share->next_auto_inc_val= nr + 1;
+  }
+  unlock_auto_increment();
+  save_auto_increment(nr);
+}
+
+
+void Partition_helper::ph_release_auto_increment()
+{
+  DBUG_ENTER("Partition_helper::ph_release_auto_increment");
+
+  if (m_table->s->next_number_keypart)
+  {
+    release_auto_increment_all_parts();
+  }
+  else if (m_handler->next_insert_id)
+  {
+    ulonglong max_reserved= m_handler->auto_inc_interval_for_cur_row.maximum();
+    lock_auto_increment();
+    m_part_share->release_auto_inc_if_possible(get_thd(), m_table->s,
+                                               m_handler->next_insert_id,
+                                               max_reserved);
+    DBUG_PRINT("info", ("part_share->next_auto_inc_val: %lu",
+                        (ulong) m_part_share->next_auto_inc_val));
+
+    /* Unlock the multi row statement lock taken in get_auto_increment */
+    if (m_auto_increment_safe_stmt_log_lock)
+    {
+      m_auto_increment_safe_stmt_log_lock= FALSE;
+      DBUG_PRINT("info", ("unlocking auto_increment_safe_stmt_log_lock"));
+    }
+
+    unlock_auto_increment();
+  }
+  DBUG_VOID_RETURN;
+}
+
+
+/**
+  Calculate key hash value from an null terminated array of fields.
+  Support function for KEY partitioning.
+
+  @param field_array   An array of the fields in KEY partitioning
+
+  @return hash_value calculated
+
+  @note Uses the hash function on the character set of the field.
+  Integer and floating point fields use the binary character set by default.
+*/
+
+uint32 Partition_helper::ph_calculate_key_hash_value(Field **field_array)
+{
+  ulong nr1= 1;
+  ulong nr2= 4;
+  bool use_51_hash;
+  use_51_hash= MY_TEST((*field_array)->table->part_info->key_algorithm ==
+                       partition_info::KEY_ALGORITHM_51);
+
+  do
+  {
+    Field *field= *field_array;
+    if (use_51_hash)
+    {
+      switch (field->real_type()) {
+      case MYSQL_TYPE_TINY:
+      case MYSQL_TYPE_SHORT:
+      case MYSQL_TYPE_LONG:
+      case MYSQL_TYPE_FLOAT:
+      case MYSQL_TYPE_DOUBLE:
+      case MYSQL_TYPE_NEWDECIMAL:
+      case MYSQL_TYPE_TIMESTAMP:
+      case MYSQL_TYPE_LONGLONG:
+      case MYSQL_TYPE_INT24:
+      case MYSQL_TYPE_TIME:
+      case MYSQL_TYPE_DATETIME:
+      case MYSQL_TYPE_YEAR:
+      case MYSQL_TYPE_NEWDATE:
+        {
+          if (field->is_null())
+          {
+            nr1^= (nr1 << 1) | 1;
+            continue;
+          }
+          /* Force this to my_hash_sort_bin, which was used in 5.1! */
+          uint len= field->pack_length();
+          my_charset_bin.coll->hash_sort(&my_charset_bin, field->ptr, len,
+                                         &nr1, &nr2);
+          /* Done with this field, continue with next one. */
+          continue;
+        }
+      case MYSQL_TYPE_STRING:
+      case MYSQL_TYPE_VARCHAR:
+      case MYSQL_TYPE_BIT:
+        /* Not affected, same in 5.1 and 5.5 */
+        break;
+      /*
+        ENUM/SET uses my_hash_sort_simple in 5.1 (i.e. my_charset_latin1)
+        and my_hash_sort_bin in 5.5!
+      */
+      case MYSQL_TYPE_ENUM:
+      case MYSQL_TYPE_SET:
+        {
+          if (field->is_null())
+          {
+            nr1^= (nr1 << 1) | 1;
+            continue;
+          }
+          /* Force this to my_hash_sort_bin, which was used in 5.1! */
+          uint len= field->pack_length();
+          my_charset_latin1.coll->hash_sort(&my_charset_latin1, field->ptr,
+                                            len, &nr1, &nr2);
+          continue;
+        }
+      /* New types in mysql-5.6. */
+      case MYSQL_TYPE_DATETIME2:
+      case MYSQL_TYPE_TIME2:
+      case MYSQL_TYPE_TIMESTAMP2:
+        /* Not affected, 5.6+ only! */
+        break;
+
+      /* These types should not be allowed for partitioning! */
+      case MYSQL_TYPE_NULL:
+      case MYSQL_TYPE_DECIMAL:
+      case MYSQL_TYPE_DATE:
+      case MYSQL_TYPE_TINY_BLOB:
+      case MYSQL_TYPE_MEDIUM_BLOB:
+      case MYSQL_TYPE_LONG_BLOB:
+      case MYSQL_TYPE_BLOB:
+      case MYSQL_TYPE_VAR_STRING:
+      case MYSQL_TYPE_GEOMETRY:
+        /* fall through. */
+      default:
+        DBUG_ASSERT(0);                    // New type?
+        /* Fall through for default hashing (5.5). */
+      }
+      /* fall through, use collation based hashing. */
+    }
+    field->hash(&nr1, &nr2);
+  } while (*(++field_array));
+  return (uint32) nr1;
+}
+
+
+bool Partition_helper::print_partition_error(int error, myf errflag)
+{
+  THD *thd= get_thd();
+  DBUG_ENTER("Partition_helper::print_partition_error");
+
+  /* Should probably look for my own errors first */
+  DBUG_PRINT("enter", ("error: %d", error));
+
+  if ((error == HA_ERR_NO_PARTITION_FOUND) &&
+      ! (thd->lex->alter_info.flags & Alter_info::ALTER_TRUNCATE_PARTITION))
+  {
+    m_part_info->print_no_partition_found(m_table, errflag);
+    // print_no_partition_found() reports an error, so we can just return here.
+    DBUG_RETURN(false);
+  }
+  else if (error == HA_ERR_ROW_IN_WRONG_PARTITION)
+  {
+    /*
+      Should only happen on DELETE or UPDATE!
+      Or in ALTER TABLE REBUILD/REORGANIZE where there are a misplaced
+      row that needed to move to an old partition (not in the given set).
+    */
+    DBUG_ASSERT(thd_sql_command(thd) == SQLCOM_DELETE ||
+                thd_sql_command(thd) == SQLCOM_DELETE_MULTI ||
+                thd_sql_command(thd) == SQLCOM_UPDATE ||
+                thd_sql_command(thd) == SQLCOM_UPDATE_MULTI ||
+                thd_sql_command(thd) == SQLCOM_ALTER_TABLE);
+    DBUG_ASSERT(m_err_rec);
+    if (m_err_rec)
+    {
+      size_t max_length;
+      char buf[MAX_KEY_LENGTH];
+      String str(buf,sizeof(buf),system_charset_info);
+      uint32 part_id;
+      DBUG_ASSERT(m_last_part < m_tot_parts);
+      str.length(0);
+      if (thd_sql_command(thd) == SQLCOM_ALTER_TABLE)
+      {
+        str.append("from REBUILD/REORGANIZED partition: ");
+        str.append_ulonglong(m_last_part);
+        str.append(" to non included partition (new definition): ");
+      }
+      else
+      {
+        str.append_ulonglong(m_last_part);
+        str.append(". Correct is ");
+      }
+      if (get_part_for_delete(m_err_rec,
+                              m_table->record[0],
+                              m_part_info,
+                              &part_id))
+      {
+        str.append("?");
+      }
+      else
+      {
+        str.append_ulonglong(part_id);
+      }
+      append_row_to_str(str, m_err_rec, m_table);
+
+      /* Log this error, so the DBA can notice it and fix it! */
+      sql_print_error("Table '%-192s' corrupted: row in wrong partition: %s\n"
+                      "Please REPAIR the table!",
+                      m_table->s->table_name.str,
+                      str.c_ptr_safe());
+
+      max_length= (MYSQL_ERRMSG_SIZE - strlen(ER(ER_ROW_IN_WRONG_PARTITION)));
+      if (str.length() >= max_length)
+      {
+        str.length(max_length-4);
+        str.append(STRING_WITH_LEN("..."));
+      }
+      my_error(ER_ROW_IN_WRONG_PARTITION, MYF(0), str.c_ptr_safe());
+      m_err_rec= NULL;
+      DBUG_RETURN(false);
+    }
+  }
+
+  DBUG_RETURN(true);
+}
+
+
+/**
+  Implement the partition changes defined by ALTER TABLE of partitions.
+
+  Add and copy if needed a number of partitions, during this operation
+  only read operation is ongoing in the server. This is used by
+  ADD PARTITION all types as well as by REORGANIZE PARTITION. For
+  one-phased implementations it is used also by DROP and COALESCE
+  PARTITIONs.
+  One-phased implementation needs the new frm file, other handlers will
+  get zero length and a NULL reference here.
+
+  @param[in]  create_info       HA_CREATE_INFO object describing all
+                                fields and indexes in table
+  @param[in]  path              Complete path of db and table name
+  @param[out] copied            Output parameter where number of copied
+                                records are added
+  @param[out] deleted           Output parameter where number of deleted
+                                records are added
+
+  @return Operation status
+    @retval    0 Success
+    @retval != 0 Failure
+*/
+
+// FIXME: duplicate of ha_partition::change_partitions
+int Partition_helper::change_partitions(HA_CREATE_INFO *create_info,
+                                        const char *path,
+                                        ulonglong * const copied,
+                                        ulonglong * const deleted)
+{
+  List_iterator<partition_element> part_it(m_part_info->partitions);
+  List_iterator <partition_element> t_it(m_part_info->temp_partitions);
+  char part_name_buff[FN_REFLEN];
+  const char *table_level_data_file_name= create_info->data_file_name;
+  const char *table_level_index_file_name= create_info->index_file_name;
+  const char *table_level_tablespace_name= create_info->tablespace;
+  uint num_parts= m_part_info->partitions.elements;
+  uint num_subparts= m_part_info->num_subparts;
+  uint i= 0;
+  uint num_remain_partitions;
+  uint num_reorged_parts;
+  int error= 1;
+  bool first;
+  uint temp_partitions= m_part_info->temp_partitions.elements;
+  THD *thd= get_thd();
+  DBUG_ENTER("Partition_helper::change_partitions");
+
+  /*
+    Use the read_partitions bitmap for reorganized partitions,
+    i.e. what to copy.
+  */
+  bitmap_clear_all(&m_part_info->read_partitions);
+
+  /*
+    Assert that it works without HA_FILE_BASED and lower_case_table_name = 2.
+  */
+  DBUG_ASSERT(!strcmp(path, get_canonical_filename(m_handler, path,
+                                                   part_name_buff)));
+  num_reorged_parts= 0;
+  if (!m_part_info->is_sub_partitioned())
+    num_subparts= 1;
+
+  /*
+    Step 1:
+      Calculate number of reorganized partitions.
+  */
+  if (temp_partitions)
+  {
+    num_reorged_parts= temp_partitions * num_subparts;
+  }
+  else
+  {
+    do
+    {
+      partition_element *part_elem= part_it++;
+      if (part_elem->part_state == PART_CHANGED ||
+          part_elem->part_state == PART_REORGED_DROPPED)
+      {
+        num_reorged_parts+= num_subparts;
+      }
+    } while (++i < num_parts);
+  }
+
+  /*
+    Step 2:
+      Calculate number of partitions after change.
+  */
+  num_remain_partitions= 0;
+  if (temp_partitions)
+  {
+    num_remain_partitions= num_parts * num_subparts;
+  }
+  else
+  {
+    part_it.rewind();
+    i= 0;
+    do
+    {
+      partition_element *part_elem= part_it++;
+      if (part_elem->part_state == PART_NORMAL ||
+          part_elem->part_state == PART_TO_BE_ADDED ||
+          part_elem->part_state == PART_CHANGED)
+      {
+        num_remain_partitions+= num_subparts;
+      }
+    } while (++i < num_parts);
+  }
+
+  /*
+    Step 3:
+      Set the read_partition bit for all partitions to be copied.
+  */
+  if (num_reorged_parts)
+  {
+    i= 0;
+    first= true;
+    part_it.rewind();
+    do
+    {
+      partition_element *part_elem= part_it++;
+      if (part_elem->part_state == PART_CHANGED ||
+          part_elem->part_state == PART_REORGED_DROPPED)
+      {
+        for (uint sp = 0; sp < num_subparts; sp++)
+        {
+          bitmap_set_bit(&m_part_info->read_partitions, i * num_subparts + sp);
+        }
+        DBUG_ASSERT(first);
+      }
+      else if (first && temp_partitions &&
+               part_elem->part_state == PART_TO_BE_ADDED)
+      {
+        /*
+          When doing an ALTER TABLE REORGANIZE PARTITION a number of
+          partitions is to be reorganized into a set of new partitions.
+          The reorganized partitions are in this case in the temp_partitions
+          list. We mark all of them in one batch and thus we only do this
+          until we find the first partition with state PART_TO_BE_ADDED
+          since this is where the new partitions go in and where the old
+          ones used to be.
+        */
+        first= false;
+        DBUG_ASSERT(((i*num_subparts) + num_reorged_parts) <= m_tot_parts);
+        for (uint sp = 0; sp < num_reorged_parts; sp++)
+        {
+          bitmap_set_bit(&m_part_info->read_partitions, i * num_subparts + sp);
+        }
+      }
+    } while (++i < num_parts);
+  }
+
+  /*
+    Step 4:
+      Create the new partitions and also open, lock and call
+      external_lock on them (if needed) to prepare them for copy phase
+      and also for later close calls.
+      No need to create PART_NORMAL partitions since they must not
+      be written to!
+      Only PART_CHANGED and PART_TO_BE_ADDED should be written to!
+  */
+
+  error= prepare_for_new_partitions(num_remain_partitions,
+                                    num_reorged_parts == 0);
+
+  i= 0;
+  part_it.rewind();
+  do
+  {
+    partition_element *part_elem= part_it++;
+    DBUG_ASSERT(part_elem->part_state >= PART_NORMAL &&
+                part_elem->part_state <= PART_CHANGED);
+    if (part_elem->part_state == PART_TO_BE_ADDED ||
+        part_elem->part_state == PART_CHANGED)
+    {
+      /*
+        A new partition needs to be created PART_TO_BE_ADDED means an
+        entirely new partition and PART_CHANGED means a changed partition
+        that will still exist with either more or less data in it.
+      */
+      uint name_variant= NORMAL_PART_NAME;
+      if (part_elem->part_state == PART_CHANGED ||
+          (part_elem->part_state == PART_TO_BE_ADDED && temp_partitions))
+        name_variant= TEMP_PART_NAME;
+      if (m_part_info->is_sub_partitioned())
+      {
+        List_iterator<partition_element> sub_it(part_elem->subpartitions);
+        uint j= 0, part;
+        do
+        {
+          partition_element *sub_elem= sub_it++;
+          create_subpartition_name(part_name_buff, path,
+                                   part_elem->partition_name,
+                                   sub_elem->partition_name,
+                                   name_variant);
+          part= i * num_subparts + j;
+          DBUG_PRINT("info", ("Add subpartition %s", part_name_buff));
+          /*
+            update_create_info was called previously in
+            mysql_prepare_alter_table. Which may have set data/index_file_name
+            for the partitions to the full partition name, including
+            '#P#<part_name>[#SP#<subpart_name>] suffix. Remove that suffix
+            if it exists.
+          */
+          truncate_partition_filename(sub_elem->data_file_name);
+          truncate_partition_filename(sub_elem->index_file_name);
+          /* Notice that sub_elem is already based on part_elem's defaults. */
+          error= set_up_table_before_create(thd,
+                                            m_table->s,
+                                            part_name_buff,
+                                            create_info,
+                                            sub_elem);
+          if (error)
+          {
+            goto err;
+          }
+          if ((error= create_new_partition(m_table,
+                                           create_info,
+                                           part_name_buff,
+                                           part,
+                                           sub_elem)))
+          {
+            goto err;
+          }
+          /* Reset create_info to table level values. */
+          create_info->data_file_name= table_level_data_file_name;
+          create_info->index_file_name= table_level_index_file_name;
+          create_info->tablespace= table_level_tablespace_name;
+        } while (++j < num_subparts);
+      }
+      else
+      {
+        create_partition_name(part_name_buff, path,
+                              part_elem->partition_name, name_variant,
+                              true);
+        DBUG_PRINT("info", ("Add partition %s", part_name_buff));
+        /* See comment in subpartition branch above! */
+        truncate_partition_filename(part_elem->data_file_name);
+        truncate_partition_filename(part_elem->index_file_name);
+        error= set_up_table_before_create(thd,
+                                          m_table->s,
+                                          part_name_buff,
+                                          create_info,
+                                          part_elem);
+        if (error)
+        {
+          goto err;
+        }
+        if ((error= create_new_partition(m_table,
+                                         create_info,
+                                         (const char *)part_name_buff,
+                                         i,
+                                         part_elem)))
+        {
+          goto err;
+        }
+        /* Reset create_info to table level values. */
+        create_info->data_file_name= table_level_data_file_name;
+        create_info->index_file_name= table_level_index_file_name;
+        create_info->tablespace= table_level_tablespace_name;
+      }
+    }
+  } while (++i < num_parts);
+
+  /*
+    Step 5:
+      State update to prepare for next write of the frm file.
+  */
+  i= 0;
+  part_it.rewind();
+  do
+  {
+    partition_element *part_elem= part_it++;
+    if (part_elem->part_state == PART_TO_BE_ADDED)
+      part_elem->part_state= PART_IS_ADDED;
+    else if (part_elem->part_state == PART_CHANGED)
+      part_elem->part_state= PART_IS_CHANGED;
+    else if (part_elem->part_state == PART_REORGED_DROPPED)
+      part_elem->part_state= PART_TO_BE_DROPPED;
+  } while (++i < num_parts);
+  for (i= 0; i < temp_partitions; i++)
+  {
+    partition_element *part_elem= t_it++;
+    DBUG_ASSERT(part_elem->part_state == PART_TO_BE_REORGED);
+    part_elem->part_state= PART_TO_BE_DROPPED;
+  }
+  error= copy_partitions(copied, deleted);
+err:
+  if (error)
+  {
+    m_handler->print_error(error,
+                           MYF(error != ER_OUTOFMEMORY ? 0 : ME_FATALERROR));
+  }
+  /*
+    Close and unlock the new temporary partitions.
+    They will later be deleted or renamed through the ddl-log.
+  */
+  close_new_partitions();
+  DBUG_RETURN(error);
+}
+
+
+/**
+  Copy partitions as part of ALTER TABLE of partitions.
+
+  change_partitions has done all the preparations, now it is time to
+  actually copy the data from the reorganized partitions to the new
+  partitions.
+
+  @param[out] copied   Number of records copied.
+  @param[out] deleted  Number of records deleted.
+
+  @return Operation status
+    @retval  0  Success
+    @retval >0  Error code
+*/
+
+int Partition_helper::copy_partitions(ulonglong * const copied,
+                                      ulonglong * const deleted)
+{
+  uint new_part= 0;
+  int result= 0;
+  longlong func_value;
+  DBUG_ENTER("Partition_helper::copy_partitions");
+
+  if (m_part_info->linear_hash_ind)
+  {
+    if (m_part_info->part_type == HASH_PARTITION)
+      set_linear_hash_mask(m_part_info, m_part_info->num_parts);
+    else
+      set_linear_hash_mask(m_part_info, m_part_info->num_subparts);
+  }
+
+  /*
+    m_part_info->read_partitions bitmap is setup for all the reorganized
+    partitions to be copied. So we can use the normal handler rnd interface
+    for reading.
+  */
+  if ((result= m_handler->ha_rnd_init(1)))
+  {
+    DBUG_RETURN(result);
+  }
+  while (true)
+  {
+    if ((result= m_handler->ha_rnd_next(m_table->record[0])))
+    {
+      if (result == HA_ERR_RECORD_DELETED)
+        continue;                              //Probably MyISAM
+      if (result != HA_ERR_END_OF_FILE)
+        goto error;
+      /*
+        End-of-file reached, break out to end the copy process.
+      */
+      break;
+    }
+    /* Found record to insert into new handler */
+    if (m_part_info->get_partition_id(m_part_info, &new_part,
+                                      &func_value))
+    {
+      /*
+        This record is in the original table but will not be in the new
+        table since it doesn't fit into any partition any longer due to
+        changed partitioning ranges or list values.
+      */
+      (*deleted)++;
+    }
+    else
+    {
+      if ((result= write_row_in_new_part(new_part)))
+      {
+        goto error;
+      }
+    }
+  }
+  m_handler->ha_rnd_end();
+  DBUG_RETURN(false);
+error:
+  m_handler->ha_rnd_end();
+  DBUG_RETURN(result);
+}
+
+
+/**
+  Check/fix misplaced rows.
+
+  @param part_id  Partition to check/fix.
+  @param repair   If true, move misplaced rows to correct partition.
+
+  @return Operation status.
+    @retval    0  Success
+    @retval != 0  Error
+*/
+
+int Partition_helper::check_misplaced_rows(uint read_part_id, bool repair)
+{
+  int result= 0;
+  THD *thd= get_thd();
+  bool ignore= thd->lex->ignore;
+  uint32 correct_part_id;
+  longlong func_value;
+  ha_rows num_misplaced_rows= 0;
+  ha_rows num_deleted_rows= 0;
+
+  DBUG_ENTER("Partition_helper::check_misplaced_rows");
+
+  if (repair)
+  {
+    /* We must read the full row, if we need to move it! */
+    bitmap_set_all(m_table->read_set);
+    bitmap_set_all(m_table->write_set);
+  }
+  else
+  {
+    /* Only need to read the partitioning fields. */
+    bitmap_union(m_table->read_set, &m_part_info->full_part_field_set);
+#if 0
+    /* Fill the base columns of virtual generated columns if necessary */
+    for (Field **ptr= m_part_info->full_part_field_array; *ptr; ptr++)
+    {
+      if ((*ptr)->is_virtual_gcol())
+        m_table->mark_gcol_in_maps(*ptr);
+    }
+#endif
+  }
+
+  if ((result= rnd_init_in_part(read_part_id, true)))
+    DBUG_RETURN(result);
+
+  while (true)
+  {
+    if ((result= ph_rnd_next_in_part(read_part_id, m_table->record[0])))
+    {
+      if (result == HA_ERR_RECORD_DELETED)
+        continue;
+      if (result != HA_ERR_END_OF_FILE)
+        break;
+
+      if (num_misplaced_rows > 0)
+      {
+        if (repair)
+        {
+          if (num_deleted_rows > 0)
+          {
+            print_admin_msg(thd, MI_MAX_MSG_BUF, "warning",
+                            m_table->s->db.str, m_table->alias,
+                            opt_op_name[REPAIR_PARTS],
+                            "Moved %lld misplaced rows, deleted %lld rows",
+                            num_misplaced_rows - num_deleted_rows,
+                            num_deleted_rows);
+          }
+          else
+          {
+            print_admin_msg(thd, MI_MAX_MSG_BUF, "warning",
+                            m_table->s->db.str, m_table->alias,
+                            opt_op_name[REPAIR_PARTS],
+                            "Moved %lld misplaced rows",
+                            num_misplaced_rows);
+          }
+        }
+        else
+        {
+          print_admin_msg(thd, MI_MAX_MSG_BUF, "error",
+                          m_table->s->db.str, m_table->alias,
+                          opt_op_name[CHECK_PARTS],
+                          "Found %lld misplaced rows in partition %u",
+                          num_misplaced_rows,
+                          read_part_id);
+        }
+      }
+      /* End-of-file reached, all rows are now OK, reset result and break. */
+      result= 0;
+      break;
+    }
+
+    result= m_part_info->get_partition_id(m_part_info, &correct_part_id,
+                                          &func_value);
+    // TODO: Add code to delete rows not matching any partition.
+    if (result)
+      break;
+
+    if (correct_part_id != read_part_id)
+    {
+      num_misplaced_rows++;
+      m_err_rec= NULL;
+      if (!repair)
+      {
+        /* Check. */
+        result= HA_ADMIN_NEEDS_UPGRADE;
+        char buf[MAX_KEY_LENGTH];
+        String str(buf,sizeof(buf),system_charset_info);
+        str.length(0);
+        append_row_to_str(str, m_err_rec, m_table);
+        print_admin_msg(thd, MI_MAX_MSG_BUF, "error",
+                        m_table->s->db.str, m_table->alias,
+                        opt_op_name[CHECK_PARTS],
+                        "Found a misplaced row"
+                        " in part %d should be in part %d:\n%s",
+                        read_part_id,
+                        correct_part_id,
+                        str.c_ptr_safe());
+        /* Break on first misplaced row, unless ignore is given! */
+        if (!ignore)
+          break;
+      }
+      else
+      {
+        DBUG_PRINT("info", ("Moving row from partition %d to %d",
+                            read_part_id, correct_part_id));
+
+        /*
+          Insert row into correct partition. Notice that there are no commit
+          for every N row, so the repair will be one large transaction!
+        */
+        if ((result= write_row_in_part(correct_part_id, m_table->record[0])))
+        {
+          /*
+            We have failed to insert a row, it might have been a duplicate!
+          */
+          char buf[MAX_KEY_LENGTH];
+          String str(buf,sizeof(buf),system_charset_info);
+          str.length(0);
+          if (result == HA_ERR_FOUND_DUPP_KEY)
+          {
+            if (ignore)
+            {
+              str.append("Duplicate key found, deleting the record:\n");
+              num_deleted_rows++;
+            }
+            else
+            {
+              str.append("Duplicate key found, "
+                         "please update or delete the record:\n");
+              result= HA_ADMIN_CORRUPT;
+            }
+          }
+          append_row_to_str(str, m_err_rec, m_table);
+
+          /*
+            If the engine supports transactions, the failure will be
+            rollbacked.
+          */
+          if (!m_handler->has_transactions() ||
+              ignore || result == HA_ADMIN_CORRUPT)
+          {
+            /* Log this error, so the DBA can notice it and fix it! */
+            sql_print_error("Table '%-192s' failed to move/insert a row"
+                            " from part %d into part %d:\n%s",
+                            m_table->s->table_name.str,
+                            read_part_id,
+                            correct_part_id,
+                            str.c_ptr_safe());
+          }
+          print_admin_msg(thd, MI_MAX_MSG_BUF, "error",
+                          m_table->s->db.str, m_table->alias,
+                          opt_op_name[REPAIR_PARTS],
+                          "Failed to move/insert a row"
+                          " from part %d into part %d:\n%s",
+                          read_part_id,
+                          correct_part_id,
+                          str.c_ptr_safe());
+          if (!ignore || result != HA_ERR_FOUND_DUPP_KEY)
+            break;
+        }
+
+        /* Delete row from wrong partition. */
+        if ((result= delete_row_in_part(read_part_id, m_table->record[0])))
+        {
+          result= HA_ADMIN_CORRUPT;
+          if (m_handler->has_transactions())
+            break;
+          /*
+            We have introduced a duplicate, since we failed to remove it
+            from the wrong partition.
+          */
+          char buf[MAX_KEY_LENGTH];
+          String str(buf,sizeof(buf),system_charset_info);
+          str.length(0);
+          append_row_to_str(str, m_err_rec, m_table);
+
+          /* Log this error, so the DBA can notice it and fix it! */
+          sql_print_error("Table '%-192s': Delete from part %d failed with"
+                          " error %d. But it was already inserted into"
+                          " part %d, when moving the misplaced row!"
+                          "\nPlease manually fix the duplicate row:\n%s",
+                          m_table->s->table_name.str,
+                          read_part_id,
+                          result,
+                          correct_part_id,
+                          str.c_ptr_safe());
+          break;
+        }
+      }
+    }
+  }
+
+  int tmp_result= rnd_end_in_part(read_part_id, true);
+  DBUG_RETURN(result ? result : tmp_result);
+}
+
+/**
+  Read next row during full partition scan (scan in random row order).
+
+  This function can evaluate the virtual generated columns. If virtual
+  generated columns are involved, you should not call rnd_next_in_part
+  directly but this one.
+
+  @param         part_id  Partition to read from.
+  @param[in,out] buf      buffer that should be filled with data.
+
+  @return Operation status.
+    @retval    0  Success
+    @retval != 0  Error code
+*/
+
+int Partition_helper::ph_rnd_next_in_part(uint part_id, uchar *buf)
+{
+  int result= rnd_next_in_part(part_id, buf);
+
+#if 0
+  if (!result && m_table->has_gcol())
+    result= update_generated_read_fields(buf, m_table);
+#endif
+
+  return result;
+}
+
+
+/** Set used partitions bitmap from Alter_info.
+
+  @return false if success else true.
+*/
+
+bool Partition_helper::set_altered_partitions()
+{
+  Alter_info *alter_info= &get_thd()->lex->alter_info;
+
+  if ((alter_info->flags & Alter_info::ALTER_ADMIN_PARTITION) == 0 ||
+      (alter_info->flags & Alter_info::ALTER_ALL_PARTITION))
+  {
+    /*
+      Full table command, not ALTER TABLE t <cmd> PARTITION <partition list>.
+      All partitions are already set, so do nothing.
+    */
+    return false;
+  }
+  return m_part_info->set_read_partitions(&alter_info->partition_names);
+}
+
+#if 0
+/**
+  Print a message row formatted for ANALYZE/CHECK/OPTIMIZE/REPAIR TABLE.
+
+  Modeled after mi_check_print_msg.
+
+  @param thd         Thread context.
+  @param len         Needed length for message buffer.
+  @param msg_type    Message type.
+  @param db_name     Database name.
+  @param table_name  Table name.
+  @param op_name     Operation name.
+  @param fmt         Message (in printf format with additional arguments).
+
+  @return Operation status.
+    @retval false for success else true.
+*/
+
+bool Partition_helper::print_admin_msg(THD* thd,
+                                       uint len,
+                                       const char *msg_type,
+                                       const char *db_name,
+                                       const char *table_name,
+                                       const char *op_name,
+                                       const char *fmt,
+                                       ...)
+{
+  va_list args;
+  Protocol *protocol= thd->protocol;
+  uint length;
+  size_t msg_length;
+  char name[NAME_LEN*2+2];
+  char *msgbuf;
+  bool error= true;
+
+  if (!(msgbuf= (char*) my_malloc(len, MYF(0))))
+    return true;
+  va_start(args, fmt);
+  msg_length= my_vsnprintf(msgbuf, len, fmt, args);
+  va_end(args);
+  if (msg_length >= (len - 1))
+    goto err;
+  msgbuf[len - 1] = 0; // healthy paranoia
+  
+  if (!thd->protocol->connection_alive())
+  {
+    sql_print_error("%s", msgbuf);
+    goto err;
+  }
+
+  length=(uint) (strxmov(name, db_name, ".", table_name,NullS) - name);
+  /*
+     TODO: switch from protocol to push_warning here. The main reason we didn't
+     it yet is parallel repair. Due to following trace:
+     mi_check_print_msg/push_warning/sql_alloc/my_pthread_getspecific_ptr.
+
+     Also we likely need to lock mutex here (in both cases with protocol and
+     push_warning).
+  */
+  DBUG_PRINT("info",("print_admin_msg:  %s, %s, %s, %s", name, op_name,
+                     msg_type, msgbuf));
+  protocol->start_row();
+  protocol->store(name, length, system_charset_info);
+  protocol->store(op_name, system_charset_info);
+  protocol->store(msg_type, system_charset_info);
+  protocol->store(msgbuf, msg_length, system_charset_info);
+  if (protocol->end_row())
+  {
+    sql_print_error("Failed on my_net_write, writing to stderr instead: %s\n",
+                    msgbuf);
+    goto err;
+  }
+  error= false;
+err:
+  my_free(msgbuf);
+  return error;
+}
+#endif
+
+
+/**
+  Set table->read_set taking partitioning expressions into account.
+
+  @param[in]	rnd_init	True if called from rnd_init (else index_init).
+*/
+
+inline
+void Partition_helper::set_partition_read_set()
+{
+  /*
+    For operations that may need to change data, we may need to extend
+    read_set.
+  */
+  if (m_handler->get_lock_type() == F_WRLCK)
+  {
+    /*
+      If write_set contains any of the fields used in partition and
+      subpartition expression, we need to set all bits in read_set because
+      the row may need to be inserted in a different [sub]partition. In
+      other words update_row() can be converted into write_row(), which
+      requires a complete record.
+    */
+    if (bitmap_is_overlapping(&m_part_info->full_part_field_set,
+                              m_table->write_set))
+    {
+      bitmap_set_all(m_table->read_set);
+    }
+    else
+    {
+      /*
+        Some handlers only read fields as specified by the bitmap for the
+        read set. For partitioned handlers we always require that the
+        fields of the partition functions are read such that we can
+        calculate the partition id to place updated and deleted records.
+      */
+      bitmap_union(m_table->read_set, &m_part_info->full_part_field_set);
+    }
+    // Mark virtual generated columns writable
+    for (Field **vf= m_table->vfield; vf && *vf; vf++)
+    {
+      if (bitmap_is_set(m_table->read_set, (*vf)->field_index))
+        bitmap_set_bit(m_table->write_set, (*vf)->field_index);
+    }
+  }
+}
+
+
+/****************************************************************************
+                MODULE full table scan
+****************************************************************************/
+
+/**
+  Initialize engine for random reads.
+
+  rnd_init() is called when the server wants the storage engine to do a
+  table scan or when the server wants to access data through rnd_pos.
+
+  When scan is used we will scan one handler partition at a time.
+  When preparing for rnd_pos we will initialize all handler partitions.
+  No extra cache handling is needed when scanning is not performed.
+
+  Before initializing we will call rnd_end to ensure that we clean up from
+  any previous incarnation of a table scan.
+
+  @param scan  false for initialize for random reads through rnd_pos()
+               true for initialize for random scan through rnd_next().
+
+  @return Operation status.
+    @retval    0  Success
+    @retval != 0  Error code
+*/
+
+int Partition_helper::ph_rnd_init(bool scan)
+{
+  int error;
+  uint i= 0;
+  uint part_id;
+  DBUG_ENTER("Partition_helper::ph_rnd_init");
+
+  set_partition_read_set();
+
+  /* Now we see what the index of our first important partition is */
+  DBUG_PRINT("info", ("m_part_info->read_partitions: 0x%lx",
+                      (long) m_part_info->read_partitions.bitmap));
+  part_id= m_part_info->get_first_used_partition();
+  DBUG_PRINT("info", ("m_part_spec.start_part %d", part_id));
+
+  if (MY_BIT_NONE == part_id)
+  {
+    error= 0;
+    goto err1;
+  }
+
+  DBUG_PRINT("info", ("rnd_init on partition %d", part_id));
+  if (scan)
+  {
+    /* A scan can be restarted without rnd_end() in between! */
+    if (m_scan_value == 1 && m_part_spec.start_part != NOT_A_PARTITION_ID)
+    {
+      /* End previous scan on partition before restart. */
+      if ((error= rnd_end_in_part(m_part_spec.start_part, scan)))
+      {
+        DBUG_RETURN(error);
+      }
+    }
+    m_scan_value= 1;
+    if ((error= rnd_init_in_part(part_id, scan)))
+      goto err;
+  }
+  else
+  {
+    m_scan_value= 0;
+    for (i= part_id;
+         i < MY_BIT_NONE;
+         i= m_part_info->get_next_used_partition(i))
+    {
+      if ((error= rnd_init_in_part(i, scan)))
+        goto err;
+    }
+  }
+  m_part_spec.start_part= part_id;
+  m_part_spec.end_part= m_tot_parts - 1;
+  DBUG_PRINT("info", ("m_scan_value=%d", m_scan_value));
+  DBUG_RETURN(0);
+
+err:
+  /* Call rnd_end for all previously initialized partitions. */
+  for (;
+       part_id < i;
+       part_id= m_part_info->get_next_used_partition(part_id))
+  {
+    rnd_end_in_part(part_id, scan);
+  }
+err1:
+  m_scan_value= 2;
+  m_part_spec.start_part= NO_CURRENT_PART_ID;
+  DBUG_RETURN(error);
+}
+
+
+/**
+  End of a table scan.
+
+  @return Operation status.
+    @retval    0  Success
+    @retval != 0  Error code
+*/
+
+int Partition_helper::ph_rnd_end()
+{
+  int error= 0;
+  DBUG_ENTER("Partition_helper::ph_rnd_end");
+  switch (m_scan_value) {
+  case 3:                                       // Error
+    DBUG_ASSERT(0);
+    /* fall through. */
+  case 2:                                       // Error
+    break;
+  case 1:
+    if (NO_CURRENT_PART_ID != m_part_spec.start_part)         // Table scan
+    {
+      error= rnd_end_in_part(m_part_spec.start_part, true);
+    }
+    break;
+  case 0:
+    uint i;
+    for (i= m_part_info->get_first_used_partition();
+         i < MY_BIT_NONE;
+         i= m_part_info->get_next_used_partition(i))
+    {
+      int part_error;
+      part_error= rnd_end_in_part(i, false);
+      if (part_error && !error) {
+        error= part_error;
+      }
+    }
+    break;
+  }
+  m_scan_value= 3;
+  m_part_spec.start_part= NO_CURRENT_PART_ID;
+  DBUG_RETURN(error);
+}
+
+
+/**
+  Read next row during full table scan (scan in random row order).
+
+  This is called for each row of the table scan. When you run out of records
+  you should return HA_ERR_END_OF_FILE.
+  The Field structure for the table is the key to getting data into buf
+  in a manner that will allow the server to understand it.
+
+  @param[out] buf  buffer that should be filled with data.
+
+  @return Operation status.
+    @retval    0  Success
+    @retval != 0  Error code
+*/
+
+int Partition_helper::ph_rnd_next(uchar *buf)
+{
+  int result= HA_ERR_END_OF_FILE;
+  uint part_id= m_part_spec.start_part;
+  DBUG_ENTER("Partition_helper::ph_rnd_next");
+
+  if (NO_CURRENT_PART_ID == part_id)
+  {
+    /*
+      The original set of partitions to scan was empty and thus we report
+      the result here.
+    */
+    goto end;
+  }
+
+  DBUG_ASSERT(m_scan_value == 1);
+
+  while (TRUE)
+  {
+    result= rnd_next_in_part(part_id, buf);
+    if (!result)
+    {
+      m_last_part= part_id;
+      m_part_spec.start_part= part_id;
+      m_table->status= 0;
+      DBUG_RETURN(0);
+    }
+
+    /*
+      if we get here, then the current partition ha_rnd_next returned failure
+    */
+    if (result == HA_ERR_RECORD_DELETED)
+      continue;                               // Probably MyISAM
+
+    if (result != HA_ERR_END_OF_FILE)
+      goto end_dont_reset_start_part;         // Return error
+
+    /* End current partition */
+    DBUG_PRINT("info", ("rnd_end on partition %d", part_id));
+    if ((result= rnd_end_in_part(part_id, true)))
+      break;
+
+    /* Shift to next partition */
+    part_id= m_part_info->get_next_used_partition(part_id);
+    if (part_id >= m_tot_parts)
+    {
+      result= HA_ERR_END_OF_FILE;
+      break;
+    }
+    m_last_part= part_id;
+    m_part_spec.start_part= part_id;
+    DBUG_PRINT("info", ("rnd_init on partition %d", part_id));
+    if ((result= rnd_init_in_part(part_id, true)))
+      break;
+  }
+
+end:
+  m_part_spec.start_part= NO_CURRENT_PART_ID;
+end_dont_reset_start_part:
+  m_table->status= STATUS_NOT_FOUND;
+  DBUG_RETURN(result);
+}
+
+
+/**
+  Save position of current row.
+
+  position() is called after each call to rnd_next() if the data needs
+  to be ordered or accessed later.
+
+  The server uses ref to store data. ref_length in the above case is
+  the size needed to store current_position. ref is just a byte array
+  that the server will maintain. If you are using offsets to mark rows, then
+  current_position should be the offset. If it is a primary key like in
+  InnoDB, then it needs to be a primary key.
+
+  @param record  Current record in MySQL Row Format.
+*/
+
+void Partition_helper::ph_position(const uchar *record)
+{
+  DBUG_ASSERT(m_part_info->is_partition_used(m_last_part));
+  DBUG_ENTER("Partition_helper::ph_position");
+  DBUG_PRINT("info", ("record: %p", record));
+  DBUG_DUMP("record", record, m_rec_length);
+
+  /*
+    If m_ref_usage is set, then the ref is already stored in the
+    priority queue (m_queue) when doing ordered scans.
+  */
+  if (m_ref_usage != REF_NOT_USED && m_ordered_scan_ongoing)
+  {
+    DBUG_ASSERT(!m_queue->empty());
+    DBUG_ASSERT(m_ordered_rec_buffer);
+    DBUG_ASSERT(!m_curr_key_info[1]);
+    DBUG_ASSERT(uint2korr(m_queue->top()) == m_last_part);
+    /* We already have the ref and part id. */
+    memcpy(m_handler->ref, m_queue->top(), m_handler->ref_length);
+  }
+  else
+  {
+    DBUG_PRINT("info", ("m_last_part: %u", m_last_part));
+    int2store(m_handler->ref, m_last_part);
+    position_in_last_part(m_handler->ref + PARTITION_BYTES_IN_POS, record);
+  }
+  DBUG_DUMP("ref_out", m_handler->ref, m_handler->ref_length);
+
+  DBUG_VOID_RETURN;
+}
+
+
+/**
+  Read row using position.
+
+  This is like rnd_next, but you are given a position to use to determine
+  the row. The position will be pointing to data of length handler::ref_length
+  that handler::ref was set by position(record). Tables clustered on primary
+  key usually use the full primary key as reference (like InnoDB). Heap based
+  tables usually returns offset in heap file (like MyISAM).
+
+  @param[out] buf  buffer that should be filled with record in MySQL format.
+  @param[in]  pos  position given as handler::ref when position() was called.
+
+  @return Operation status.
+    @retval    0  Success
+    @retval != 0  Error code
+*/
+
+int Partition_helper::ph_rnd_pos(uchar *buf, uchar *pos)
+{
+  uint part_id;
+  DBUG_ENTER("Partition_helper::ph_rnd_pos");
+
+  part_id= uint2korr(pos);
+  DBUG_ASSERT(part_id < m_tot_parts);
+  DBUG_ASSERT(m_part_info->is_partition_used(part_id));
+  m_last_part= part_id;
+  DBUG_RETURN(rnd_pos_in_part(part_id, buf, (pos + PARTITION_BYTES_IN_POS)));
+}
+
+
+/**
+  Read row using position using given record to find.
+
+  This works as position()+rnd_pos() functions, but does some extra work,
+  calculating m_last_part - the partition to where the 'record' should go.
+
+  Only useful when position is based on primary key
+  (HA_PRIMARY_KEY_REQUIRED_FOR_POSITION).
+
+  @param record  Current record in MySQL Row Format.
+
+  @return Operation status.
+    @retval    0  Success
+    @retval != 0  Error code
+*/
+
+int Partition_helper::ph_rnd_pos_by_record(uchar *record)
+{
+  DBUG_ENTER("Partition_helper::ph_rnd_pos_by_record");
+
+  DBUG_ASSERT(m_handler->ha_table_flags() &
+              HA_PRIMARY_KEY_REQUIRED_FOR_POSITION);
+  /* TODO: Support HA_READ_BEFORE_WRITE_REMOVAL */
+  /* Set m_last_part correctly. */
+  if (unlikely(get_part_for_delete(record,
+                                   m_table->record[0],
+                                   m_part_info,
+                                   &m_last_part)))
+    DBUG_RETURN(HA_ERR_INTERNAL_ERROR);
+
+  DBUG_RETURN(rnd_pos_by_record_in_last_part(record));
+}
+
+
+/****************************************************************************
+                MODULE index scan
+****************************************************************************/
+/*
+  Positions an index cursor to the index specified in the handle. Fetches the
+  row if available. If the key value is null, begin at the first key of the
+  index.
+
+  There are loads of optimizations possible here for the partition handler.
+  The same optimizations can also be checked for full table scan although
+  only through conditions and not from index ranges.
+  Phase one optimizations:
+    Check if the fields of the partition function are bound. If so only use
+    the single partition it becomes bound to.
+  Phase two optimizations:
+    If it can be deducted through range or list partitioning that only a
+    subset of the partitions are used, then only use those partitions.
+*/
+
+/**
+  Setup the ordered record buffer and the priority queue.
+
+  Call destroy_record_priority_queue() to deallocate or clean-up
+  from failure.
+
+  @return false on success, else true.
+*/
+
+int Partition_helper::init_record_priority_queue()
+{
+  uint used_parts= m_part_info->num_partitions_used();
+  DBUG_ENTER("Partition_helper::init_record_priority_queue");
+  DBUG_ASSERT(!m_ordered_rec_buffer);
+  DBUG_ASSERT(!m_queue);
+  /* Initialize the priority queue. */
+  // TODO: Create test to see the cost of allocating when needed vs
+  // allocate once and keep between statements. Also test on NUMA
+  // machines to see the difference (I guess that allocating when needed
+  // will allocate on 'correct' NUMA node and be faster.)
+  if (!m_queue)
+  {
+    m_queue= new (std::nothrow) Prio_queue(Key_rec_less(m_curr_key_info));
+    if (!m_queue)
+    {
+      DBUG_RETURN(HA_ERR_OUT_OF_MEM);
+    }
+  }
+  /* Initialize the ordered record buffer.  */
+  if (!m_ordered_rec_buffer)
+  {
+    uint alloc_len;
+    /*
+      Allocate record buffer for each used partition.
+      If PK is clustered index, it is either the primary sort key or is
+      added as secondary sort. So we only need to allocate for part id
+      and a full record per partition.
+      Otherwise if the clustered index was generated, we might need to
+      do a secondary sort by rowid (handler::ref) and must allocate for
+      ref (includes part id) and full record per partition. We don't
+      know yet if we need to do secondary sort by rowid, so we must
+      allocate space for it.
+      TODO: enhance ha_index_init() for HA_EXTRA_SECONDARY_SORT_ROWID to
+      avoid allocating space for handler::ref when not needed.
+      When enhancing ha_index_init() care must be taken on ph_position(),
+      so InnoDB's row_id is correctly handled (taken from m_last_part).
+    */
+    if (m_pkey_is_clustered && m_table->s->primary_key != MAX_KEY)
+    {
+      m_rec_offset= PARTITION_BYTES_IN_POS;
+      m_ref_usage= REF_NOT_USED;
+    }
+    else
+    {
+      m_rec_offset= m_handler->ref_length;
+      m_ref_usage= REF_STORED_IN_PQ;
+    }
+    alloc_len= used_parts * (m_rec_offset + m_rec_length);
+    /* Allocate a key for temporary use when setting up the scan. */
+    alloc_len+= m_table->s->max_key_length;
+
+    m_ordered_rec_buffer= static_cast<uchar*>(
+                            my_malloc(alloc_len,
+                                      MYF(MY_WME)));
+    if (!m_ordered_rec_buffer)
+    {
+      DBUG_RETURN(HA_ERR_OUT_OF_MEM);
+    }
+
+    /*
+      We set-up one record per partition and each record has 2 bytes in
+      front where the partition id is written. This is used by ordered
+      index_read.
+      If we need to also sort by rowid (handler::ref), then m_curr_key_info[1]
+      is NULL and we add the rowid before the record.
+      We also set-up a reference to the first record for temporary use in
+      setting up the scan.
+    */
+    char *ptr= (char*) m_ordered_rec_buffer;
+    uint i;
+    for (i= m_part_info->get_first_used_partition();
+         i < MY_BIT_NONE;
+         i= m_part_info->get_next_used_partition(i))
+    {
+      DBUG_PRINT("info", ("init rec-buf for part %u", i));
+      int2store(ptr, i);
+      ptr+= m_rec_offset + m_rec_length;
+    }
+    m_start_key.key= (const uchar*)ptr;
+    /*
+      Initialize priority queue, initialized to reading forward.
+      Start by only sort by KEY, HA_EXTRA_SECONDARY_SORT_ROWID
+      will be given if we should sort by handler::ref too.
+    */
+    m_queue->m_rec_offset= m_rec_offset;
+    if (m_queue->reserve(used_parts))
+    {
+      DBUG_RETURN(HA_ERR_OUT_OF_MEM);
+    }
+  }
+  DBUG_RETURN(init_record_priority_queue_for_parts(used_parts));
+}
+
+
+/**
+  Destroy the ordered record buffer and the priority queue.
+*/
+
+void Partition_helper::destroy_record_priority_queue()
+{
+  DBUG_ENTER("Partition_helper::destroy_record_priority_queue");
+  if (m_ordered_rec_buffer)
+  {
+    my_free(m_ordered_rec_buffer);
+    m_ordered_rec_buffer= NULL;
+  }
+  if (m_queue)
+  {
+    m_queue->clear();
+    delete m_queue;
+    m_queue= NULL;
+  }
+  m_ref_usage= REF_NOT_USED;
+  m_ordered_scan_ongoing= false;
+  DBUG_VOID_RETURN;
+}
+
+
+/**
+  Common setup for index_init.
+
+  Set up variables and initialize the record priority queue.
+
+  @param inx     Index to be used.
+  @param sorted  True if the rows must be returned in index order.
+
+  @return Operation status.
+    @retval    0  Success
+    @retval != 0  Error code
+*/
+
+int Partition_helper::ph_index_init_setup(uint inx, bool sorted)
+{
+  DBUG_ENTER("Partition_helper:ph_:index_init_setup");
+
+  DBUG_ASSERT(inx != MAX_KEY);
+  DBUG_PRINT("info", ("inx %u sorted %u", inx, sorted));
+  m_part_spec.start_part= NO_CURRENT_PART_ID;
+  m_start_key.length= 0;
+  m_ordered= sorted;
+  m_ref_usage= REF_NOT_USED;
+  m_curr_key_info[0]= m_table->key_info+inx;
+  m_curr_key_info[1]= NULL;
+  /*
+    There are two cases where it is not enough to only sort on the key:
+    1) For clustered indexes, the optimizer assumes that all keys
+       have the rest of the PK columns appended to the KEY, so it will
+       sort by PK as secondary sort key.
+    2) Rowid-Order-Retrieval access methods, like index_merge_intersect
+       and index_merge_union. These methods requires the index to be sorted
+       on rowid (handler::ref) as secondary sort key.
+  */
+  if (m_pkey_is_clustered && m_table->s->primary_key != MAX_KEY &&
+      inx != m_table->s->primary_key)
+  {
+    /*
+      if PK is clustered, then the key cmp must use the pk to
+      differentiate between equal key in given index.
+    */
+    DBUG_PRINT("info", ("Clustered pk, using pk as secondary cmp"));
+    m_curr_key_info[1]= m_table->key_info+m_table->s->primary_key;
+  }
+
+  /*
+    Some handlers only read fields as specified by the bitmap for the
+    read set. For partitioned handlers we always require that the
+    fields of the partition functions are read such that we can
+    calculate the partition id to place updated and deleted records.
+  */
+  if (m_handler->get_lock_type() == F_WRLCK)
+    bitmap_union(m_table->read_set, &m_part_info->full_part_field_set);
+
+  DBUG_RETURN(0);
+}
+
+
+/**
+  Initialize handler before start of index scan.
+
+  index_init is always called before starting index scans (except when
+  starting through index_read_idx and using read_range variants).
+
+  @param inx     Index number.
+  @param sorted  Is rows to be returned in sorted order.
+
+  @return Operation status
+    @retval    0  Success
+    @retval != 0  Error code
+*/
+
+int Partition_helper::ph_index_init(uint inx, bool sorted)
+{
+  int error;
+  uint part_id= m_part_info->get_first_used_partition();
+  DBUG_ENTER("Partition_helper::ph_index_init");
+  m_handler->active_index= inx;
+
+  if (part_id == MY_BIT_NONE)
+  {
+    DBUG_RETURN(0);
+  }
+
+  if ((error= ph_index_init_setup(inx, sorted)))
+  {
+    DBUG_RETURN(error);
+  }
+  if ((error= init_record_priority_queue()))
+  {
+    destroy_record_priority_queue();
+    DBUG_RETURN(error);
+  }
+
+  for (/* part_id already set. */;
+       part_id < MY_BIT_NONE;
+       part_id= m_part_info->get_next_used_partition(part_id))
+  {
+    if ((error= index_init_in_part(part_id, inx, sorted)))
+      goto err;
+
+    DBUG_EXECUTE_IF("partition_fail_index_init", {
+      part_id++;
+      error= HA_ERR_NO_PARTITION_FOUND;
+      goto err;
+    });
+  }
+err:
+  if (error)
+  {
+    /* End the previously initialized indexes. */
+    uint j;
+    for (j= m_part_info->get_first_used_partition();
+         j < part_id;
+         j= m_part_info->get_next_used_partition(j))
+    {
+      (void) index_end_in_part(j);
+    }
+    destroy_record_priority_queue();
+  }
+  DBUG_RETURN(error);
+}
+
+
+/**
+  End of index scan.
+
+  index_end is called at the end of an index scan to clean up any
+  things needed to clean up.
+
+  @return Operation status.
+    @retval    0  Success
+    @retval != 0  Error code
+*/
+
+int Partition_helper::ph_index_end()
+{
+  int error= 0;
+  uint i;
+  DBUG_ENTER("Partition_helper::ph_index_end");
+
+  m_part_spec.start_part= NO_CURRENT_PART_ID;
+  m_ref_usage= REF_NOT_USED;
+  for (i= m_part_info->get_first_used_partition();
+       i < MY_BIT_NONE;
+       i= m_part_info->get_next_used_partition(i))
+  {
+    int tmp;
+    if ((tmp= index_end_in_part(i)))
+      error= tmp;
+  }
+  destroy_record_priority_queue();
+  m_handler->active_index= MAX_KEY;
+  DBUG_RETURN(error);
+}
+
+
+/**
+  Read one record in an index scan and start an index scan.
+
+  index_read_map starts a new index scan using a start key. The MySQL Server
+  will check the end key on its own. Thus to function properly the
+  partitioned handler need to ensure that it delivers records in the sort
+  order of the MySQL Server.
+  index_read_map can be restarted without calling index_end on the previous
+  index scan and without calling index_init. In this case the index_read_map
+  is on the same index as the previous index_scan. This is particularly
+  used in conjunction with multi read ranges.
+
+  @param[out] buf          Read row in MySQL Row Format
+  @param[in]  key          Key parts in consecutive order
+  @param[in]  keypart_map  Which part of key is used
+  @param[in]  find_flag    What type of key condition is used
+
+  @return Operation status.
+    @retval    0  Success
+    @retval != 0  Error code
+*/
+
+int Partition_helper::ph_index_read_map(uchar *buf,
+                                     const uchar *key,
+                                     key_part_map keypart_map,
+                                     enum ha_rkey_function find_flag)
+{
+  DBUG_ENTER("Partition_handler::ph_index_read_map");
+  m_handler->end_range= NULL;
+  m_index_scan_type= PARTITION_INDEX_READ;
+  m_start_key.key= key;
+  m_start_key.keypart_map= keypart_map;
+  m_start_key.flag= find_flag;
+  DBUG_RETURN(common_index_read(buf, true));
+}
+
+
+/**
+  Common routine for a number of index_read variants.
+
+  @param[out] buf             Buffer where the record should be returned.
+  @param[in]  have_start_key  TRUE <=> the left endpoint is available, i.e.
+                              we're in index_read call or in read_range_first
+                              call and the range has left endpoint.
+                              FALSE <=> there is no left endpoint (we're in
+                              read_range_first() call and the range has no left
+                              endpoint).
+
+  @return Operation status
+    @retval 0                    OK
+    @retval HA_ERR_END_OF_FILE   Whole index scanned, without finding the record.
+    @retval HA_ERR_KEY_NOT_FOUND Record not found, but index cursor positioned.
+    @retval other                Error code.
+
+  @details
+    Start scanning the range (when invoked from read_range_first()) or doing
+    an index lookup (when invoked from index_read_XXX):
+     - If possible, perform partition selection
+     - Find the set of partitions we're going to use
+     - Depending on whether we need ordering:
+        NO:  Get the first record from first used partition (see
+             handle_unordered_scan_next_partition)
+        YES: Fill the priority queue and get the record that is the first in
+             the ordering
+*/
+
+int Partition_helper::common_index_read(uchar *buf, bool have_start_key)
+{
+  int error;
+  m_reverse_order= false;
+  DBUG_ENTER("Partition_helper::common_index_read");
+
+  DBUG_PRINT("info", ("m_ordered %u m_ordered_scan_ong %u",
+                      m_ordered, m_ordered_scan_ongoing));
+
+  if (have_start_key)
+  {
+    m_start_key.length= calculate_key_len(m_table,
+                                          m_handler->active_index,
+                                          NULL,
+                                          m_start_key.keypart_map);
+    DBUG_PRINT("info", ("have_start_key map %lu find_flag %u len %u",
+                        m_start_key.keypart_map, m_start_key.flag,
+                        m_start_key.length));
+    DBUG_ASSERT(m_start_key.length);
+  }
+  if ((error= partition_scan_set_up(buf, have_start_key)))
+  {
+    DBUG_RETURN(error);
+  }
+
+  if (have_start_key &&
+      (m_start_key.flag == HA_READ_KEY_OR_PREV ||
+       m_start_key.flag == HA_READ_PREFIX_LAST ||
+       m_start_key.flag == HA_READ_PREFIX_LAST_OR_PREV ||
+       m_start_key.flag == HA_READ_BEFORE_KEY))
+  {
+    m_reverse_order= true;
+    m_ordered_scan_ongoing= true;
+  }
+  DBUG_PRINT("info", ("m_ordered %u m_o_scan_ong %u have_start_key %u",
+                      m_ordered, m_ordered_scan_ongoing, have_start_key));
+  if (!m_ordered_scan_ongoing)
+   {
+    /*
+      We use unordered index scan when read_range is used and flag
+      is set to not use ordered.
+      We also use an unordered index scan when the number of partitions to
+      scan is only one.
+      The unordered index scan will use the partition set created.
+    */
+    DBUG_PRINT("info", ("doing unordered scan"));
+    error= handle_unordered_scan_next_partition(buf);
+  }
+  else
+  {
+    /*
+      In all other cases we will use the ordered index scan. This will use
+      the partition set created by the get_partition_set method.
+    */
+    error= handle_ordered_index_scan(buf);
+  }
+  DBUG_RETURN(error);
+}
+
+
+/**
+  Start an index scan from leftmost record and return first record.
+
+  index_first() asks for the first key in the index.
+  This is similar to index_read except that there is no start key since
+  the scan starts from the leftmost entry and proceeds forward with
+  index_next.
+
+  @param[out] buf  Read row in MySQL Row Format.
+
+  @return Operation status.
+    @retval    0  Success
+    @retval != 0  Error code
+*/
+
+int Partition_helper::ph_index_first(uchar *buf)
+{
+  DBUG_ENTER("Partition_helper::ph_index_first");
+
+  m_handler->end_range= NULL;
+  m_index_scan_type= PARTITION_INDEX_FIRST;
+  m_reverse_order= false;
+  DBUG_RETURN(common_first_last(buf));
+}
+
+
+/**
+  Start an index scan from rightmost record and return first record.
+
+  index_last() asks for the last key in the index.
+  This is similar to index_read except that there is no start key since
+  the scan starts from the rightmost entry and proceeds forward with
+  index_prev.
+
+  @param[out] buf  Read row in MySQL Row Format.
+
+  @return Operation status.
+    @retval    0  Success
+    @retval != 0  Error code
+*/
+
+int Partition_helper::ph_index_last(uchar *buf)
+{
+  DBUG_ENTER("Partition_helper::ph_index_last");
+
+  m_index_scan_type= PARTITION_INDEX_LAST;
+  m_reverse_order= true;
+  DBUG_RETURN(common_first_last(buf));
+}
+
+
+/**
+  Common routine for index_first/index_last.
+
+  @param[out] buf  Read row in MySQL Row Format.
+
+  @return Operation status.
+    @retval    0  Success
+    @retval != 0  Error code
+*/
+
+int Partition_helper::common_first_last(uchar *buf)
+{
+  int error;
+  DBUG_ENTER("Partition_helper::common_first_last");
+
+  if ((error= partition_scan_set_up(buf, false)))
+  {
+    DBUG_RETURN(error);
+  }
+  if (!m_ordered_scan_ongoing &&
+      m_index_scan_type != PARTITION_INDEX_LAST)
+  {
+    DBUG_RETURN(handle_unordered_scan_next_partition(buf));
+  }
+  DBUG_RETURN(handle_ordered_index_scan(buf));
+}
+
+
+/**
+  Read last using key.
+
+  This is used in join_read_last_key to optimize away an ORDER BY.
+  Can only be used on indexes supporting HA_READ_ORDER.
+
+  @param[out] buf          Read row in MySQL Row Format
+  @param[in]  key          Key
+  @param[in]  keypart_map  Which part of key is used
+
+  @return Operation status.
+    @retval    0  Success
+    @retval != 0  Error code
+*/
+
+int Partition_helper::ph_index_read_last_map(uchar *buf,
+                                          const uchar *key,
+                                          key_part_map keypart_map)
+{
+  DBUG_ENTER("Partition_helper::ph_index_read_last_map");
+
+  m_ordered= true;                              // Safety measure
+  m_handler->end_range= NULL;
+  m_index_scan_type= PARTITION_INDEX_READ_LAST;
+  m_start_key.key= key;
+  m_start_key.keypart_map= keypart_map;
+  m_start_key.flag= HA_READ_PREFIX_LAST;
+  DBUG_RETURN(common_index_read(buf, true));
+}
+
+
+/**
+  Read index by key and keymap.
+
+  Positions an index cursor to the index specified.
+  Fetches the row if available. If the key value is null,
+  begin at first key of the index.
+
+  Optimization of the default implementation to take advantage of dynamic
+  partition pruning.
+
+  @param[out] buf          Read row in MySQL Row Format
+  @param[in]  index        Index to read from
+  @param[in]  key          Key
+  @param[in]  keypart_map  Which part of key is used
+  @param[in]  find_flag    Direction/how to search.
+
+  @return Operation status.
+    @retval    0  Success
+    @retval != 0  Error code
+*/
+int Partition_helper::ph_index_read_idx_map(uchar *buf,
+                                         uint index,
+                                         const uchar *key,
+                                         key_part_map keypart_map,
+                                         enum ha_rkey_function find_flag)
+{
+  int error= HA_ERR_KEY_NOT_FOUND;
+  DBUG_ENTER("Partition_helper::ph_index_read_idx_map");
+
+  if (find_flag == HA_READ_KEY_EXACT)
+  {
+    uint part;
+    m_start_key.key= key;
+    m_start_key.keypart_map= keypart_map;
+    m_start_key.flag= find_flag;
+    m_start_key.length= calculate_key_len(m_table,
+                                          index,
+                                          NULL,
+                                          m_start_key.keypart_map);
+
+    get_partition_set(m_table, buf, index, &m_start_key, &m_part_spec);
+
+    /*
+      We have either found exactly 1 partition
+      (in which case start_part == end_part)
+      or no matching partitions (start_part > end_part)
+    */
+    DBUG_ASSERT(m_part_spec.start_part >= m_part_spec.end_part);
+    /* The start part is must be marked as used. */
+    DBUG_ASSERT(m_part_spec.start_part > m_part_spec.end_part ||
+                m_part_info->is_partition_used(m_part_spec.start_part));
+
+    for (part= m_part_spec.start_part;
+         part <= m_part_spec.end_part;
+         part= m_part_info->get_next_used_partition(part))
+    {
+      error= index_read_idx_map_in_part(part,
+                                        buf,
+                                        index,
+                                        key,
+                                        keypart_map,
+                                        find_flag);
+      if (error != HA_ERR_KEY_NOT_FOUND &&
+          error != HA_ERR_END_OF_FILE)
+      {
+        break;
+      }
+    }
+    if (part <= m_part_spec.end_part)
+    {
+      m_last_part= part;
+    }
+  }
+  else
+  {
+    /*
+      If not only used with HA_READ_KEY_EXACT, we should investigate if
+      possible to optimize for other find_flag's as well.
+    */
+    DBUG_ASSERT(0);
+    error= HA_ERR_INTERNAL_ERROR;
+  }
+  DBUG_RETURN(error);
+}
+
+
+/**
+  Read next record in a forward index scan.
+
+  Used to read forward through the index (left to right, low to high).
+
+  @param[out] buf  Read row in MySQL Row Format.
+
+  @return Operation status.
+    @retval    0  Success
+    @retval != 0  Error code
+*/
+
+int Partition_helper::ph_index_next(uchar *buf)
+{
+  DBUG_ENTER("Partition_helper::ph_index_next");
+
+  /*
+    TODO(low priority):
+    If we want partition to work with the HANDLER commands, we
+    must be able to do index_last() -> index_prev() -> index_next()
+    and if direction changes, we must step back those partitions in
+    the record queue so we don't return a value from the wrong direction.
+  */
+  DBUG_ASSERT(m_index_scan_type != PARTITION_INDEX_LAST ||
+              m_table->open_by_handler);
+  if (!m_ordered_scan_ongoing)
+  {
+    DBUG_RETURN(handle_unordered_next(buf, false));
+  }
+  DBUG_RETURN(handle_ordered_next(buf, false));
+}
+
+
+/**
+  Read next same record.
+
+  This routine is used to read the next but only if the key is the same
+  as supplied in the call.
+
+  @param[out] buf     Read row in MySQL Row Format.
+  @param[in]  key     Key.
+  @param[in]  keylen  Length of key.
+
+  @return Operation status.
+    @retval    0  Success
+    @retval != 0  Error code
+*/
+
+int Partition_helper::ph_index_next_same(uchar *buf, const uchar *key, uint keylen)
+{
+  DBUG_ENTER("Partition_helper::ph_index_next_same");
+
+  DBUG_ASSERT(keylen == m_start_key.length);
+  DBUG_ASSERT(m_index_scan_type != PARTITION_INDEX_LAST);
+  if (!m_ordered_scan_ongoing)
+    DBUG_RETURN(handle_unordered_next(buf, true));
+  DBUG_RETURN(handle_ordered_next(buf, true));
+}
+
+
+/**
+  Read next record when performing index scan backwards.
+
+  Used to read backwards through the index (right to left, high to low).
+
+  @param[out] buf  Read row in MySQL Row Format.
+
+  @return Operation status.
+    @retval    0  Success
+    @retval != 0  Error code
+*/
+
+int Partition_helper::ph_index_prev(uchar *buf)
+{
+  DBUG_ENTER("Partition_helper::ph_index_prev");
+
+  /* TODO: read comment in index_next */
+  DBUG_ASSERT(m_index_scan_type != PARTITION_INDEX_FIRST ||
+              m_table->open_by_handler);
+  DBUG_RETURN(handle_ordered_prev(buf));
+}
+
+
+/**
+  Start a read of one range with start and end key.
+
+  We re-implement read_range_first since we don't want the compare_key
+  check at the end. This is already performed in the partition handler.
+  read_range_next is very much different due to that we need to scan
+  all underlying handlers.
+
+  @param start_key     Specification of start key.
+  @param end_key       Specification of end key.
+  @param eq_range_arg  Is it equal range.
+  @param sorted        Should records be returned in sorted order.
+
+  @return Operation status.
+    @retval    0  Success
+    @retval != 0  Error code
+*/
+
+int Partition_helper::ph_read_range_first(const key_range *start_key,
+                                       const key_range *end_key,
+                                       bool eq_range_arg,
+                                       bool sorted)
+{
+  int error= HA_ERR_END_OF_FILE;
+  bool have_start_key= (start_key != NULL);
+  uint part_id= m_part_info->get_first_used_partition();
+  DBUG_ENTER("Partition_helper::ph_read_range_first");
+
+  if (part_id == MY_BIT_NONE)
+  {
+    /* No partition to scan. */
+    m_table->status= STATUS_NOT_FOUND;
+    DBUG_RETURN(error);
+  }
+
+  m_ordered= sorted;
+  set_eq_range(eq_range_arg);
+  m_handler->set_end_range(end_key);
+
+  set_range_key_part(m_curr_key_info[0]->key_part);
+  if (have_start_key)
+    m_start_key= *start_key;
+  else
+    m_start_key.key= NULL;
+
+  m_index_scan_type= PARTITION_READ_RANGE;
+  error= common_index_read(m_table->record[0], have_start_key);
+  DBUG_RETURN(error);
+}
+
+
+/**
+  Read next record in read of a range with start and end key.
+
+  @return Operation status.
+    @retval    0  Success
+    @retval != 0  Error code
+*/
+
+int Partition_helper::ph_read_range_next()
+{
+  DBUG_ENTER("Partition_helper::ph_read_range_next");
+
+  if (m_ordered_scan_ongoing)
+  {
+    DBUG_RETURN(handle_ordered_next(m_table->record[0], get_eq_range()));
+  }
+  DBUG_RETURN(handle_unordered_next(m_table->record[0], get_eq_range()));
+}
+
+
+/**
+  Common routine to set up index scans.
+
+  Find out which partitions we'll need to read when scanning the specified
+  range.
+
+  If we need to scan only one partition, set m_ordered_scan_ongoing=FALSE
+  as we will not need to do merge ordering.
+
+  @param buf            Buffer to later return record in (this function
+                        needs it to calculate partitioning function values)
+
+  @param idx_read_flag  TRUE <=> m_start_key has range start endpoint which
+                        probably can be used to determine the set of
+                        partitions to scan.
+                        FALSE <=> there is no start endpoint.
+
+  @return Operation status.
+    @retval   0  Success
+    @retval !=0  Error code
+*/
+
+int Partition_helper::partition_scan_set_up(uchar * buf, bool idx_read_flag)
+{
+  DBUG_ENTER("Partition_helper::partition_scan_set_up");
+
+  if (idx_read_flag)
+    get_partition_set(m_table,
+                      buf,
+                      m_handler->active_index,
+                      &m_start_key,
+                      &m_part_spec);
+  else
+  {
+    // TODO: set to get_first_used_part() instead!
+    m_part_spec.start_part= 0;
+    // TODO: Implement bitmap_get_last_set() and use that here!
+    m_part_spec.end_part= m_tot_parts - 1;
+  }
+  if (m_part_spec.start_part > m_part_spec.end_part)
+  {
+    /*
+      We discovered a partition set but the set was empty so we report
+      key not found.
+    */
+    DBUG_PRINT("info", ("scan with no partition to scan"));
+    m_table->status= STATUS_NOT_FOUND;
+    DBUG_RETURN(HA_ERR_END_OF_FILE);
+  }
+  if (m_part_spec.start_part == m_part_spec.end_part)
+  {
+    /*
+      We discovered a single partition to scan, this never needs to be
+      performed using the ordered index scan.
+    */
+    DBUG_PRINT("info", ("index scan using the single partition %d",
+                        m_part_spec.start_part));
+    m_ordered_scan_ongoing= FALSE;
+  }
+  else
+  {
+    /*
+      Set m_ordered_scan_ongoing according how the scan should be done
+      Only exact partitions are discovered atm by get_partition_set.
+      Verify this, also bitmap must have at least one bit set otherwise
+      the result from this table is the empty set.
+    */
+    uint start_part= m_part_info->get_first_used_partition();
+    if (start_part == MY_BIT_NONE)
+    {
+      DBUG_PRINT("info", ("scan with no partition to scan"));
+      m_table->status= STATUS_NOT_FOUND;
+      DBUG_RETURN(HA_ERR_END_OF_FILE);
+    }
+    if (start_part > m_part_spec.start_part)
+      m_part_spec.start_part= start_part;
+    m_ordered_scan_ongoing= m_ordered;
+  }
+  DBUG_ASSERT(m_part_spec.start_part < m_tot_parts);
+  DBUG_ASSERT(m_part_spec.end_part < m_tot_parts);
+  DBUG_RETURN(0);
+}
+
+
+/**
+  Common routine to handle index_next with unordered results.
+
+  These routines are used to scan partitions without considering order.
+  This is performed in two situations.
+  1) In read_multi_range this is the normal case
+  2) When performing any type of index_read, index_first, index_last where
+  all fields in the partition function is bound. In this case the index
+  scan is performed on only one partition and thus it isn't necessary to
+  perform any sort.
+
+  @param[out] buf        Read row in MySQL Row Format.
+  @param[in]  next_same  Called from index_next_same.
+
+  @return Operation status.
+    @retval HA_ERR_END_OF_FILE  End of scan
+    @retval 0                   Success
+    @retval other               Error code
+*/
+
+int Partition_helper::handle_unordered_next(uchar *buf, bool is_next_same)
+{
+  int error;
+  DBUG_ENTER("Partition_helper::handle_unordered_next");
+
+  if (m_part_spec.start_part >= m_tot_parts)
+  {
+    /* Should only happen with SQL HANDLER! */
+    DBUG_ASSERT(m_table->open_by_handler);
+    DBUG_RETURN(HA_ERR_END_OF_FILE);
+  }
+
+  /*
+    We should consider if this should be split into three functions as
+    partition_read_range is_next_same are always local constants
+  */
+
+  if (m_index_scan_type == PARTITION_READ_RANGE)
+  {
+    DBUG_ASSERT(buf == m_table->record[0]);
+    error= read_range_next_in_part(m_part_spec.start_part, NULL);
+  }
+  else if (is_next_same)
+  {
+    error= index_next_same_in_part(m_part_spec.start_part,
+                                   buf,
+                                   m_start_key.key,
+                                   m_start_key.length);
+  }
+  else
+  {
+    error= index_next_in_part(m_part_spec.start_part, buf);
+  }
+
+  if (error == HA_ERR_END_OF_FILE)
+  {
+    m_part_spec.start_part++;                    // Start using next part
+    error= handle_unordered_scan_next_partition(buf);
+  }
+  else
+  {
+    m_last_part= m_part_spec.start_part;
+  }
+  DBUG_RETURN(error);
+}
+
+
+/**
+  Handle index_next when changing to new partition.
+
+  This routine is used to start the index scan on the next partition.
+  Both initial start and after completing scan on one partition.
+
+  @param[out] buf  Read row in MySQL Row Format
+
+  @return Operation status.
+    @retval HA_ERR_END_OF_FILE  End of scan
+    @retval 0                   Success
+    @retval other               Error code
+*/
+
+int Partition_helper::handle_unordered_scan_next_partition(uchar * buf)
+{
+  uint i= m_part_spec.start_part;
+  int saved_error= HA_ERR_END_OF_FILE;
+  DBUG_ENTER("Partition_helper::handle_unordered_scan_next_partition");
+
+  if (i)
+    i= m_part_info->get_next_used_partition(i - 1);
+  else
+    i= m_part_info->get_first_used_partition();
+
+  for (;
+       i <= m_part_spec.end_part;
+       i= m_part_info->get_next_used_partition(i))
+  {
+    int error;
+    m_part_spec.start_part= i;
+    switch (m_index_scan_type) {
+    case PARTITION_READ_RANGE:
+      DBUG_ASSERT(buf == m_table->record[0]);
+      DBUG_PRINT("info", ("read_range_first on partition %d", i));
+      error= read_range_first_in_part(i,
+                                      NULL,
+                                      m_start_key.key? &m_start_key: NULL,
+                                      m_handler->end_range,
+                                      get_eq_range(),
+                                      false);
+      break;
+    case PARTITION_INDEX_READ:
+      DBUG_PRINT("info", ("index_read on partition %d", i));
+      error= index_read_map_in_part(i,
+                                    buf,
+                                    m_start_key.key,
+                                    m_start_key.keypart_map,
+                                    m_start_key.flag);
+      break;
+    case PARTITION_INDEX_FIRST:
+      DBUG_PRINT("info", ("index_first on partition %d", i));
+      error= index_first_in_part(i, buf);
+      break;
+    case PARTITION_INDEX_FIRST_UNORDERED:
+      /* When is this ever used? */
+      DBUG_ASSERT(0);
+      /*
+        We perform a scan without sorting and this means that we
+        should not use the index_first since not all handlers
+        support it and it is also unnecessary to restrict sort
+        order.
+      */
+      DBUG_PRINT("info", ("read_range_first on partition %d", i));
+      DBUG_ASSERT(buf == m_table->record[0]);
+      error= read_range_first_in_part(i,
+                                      NULL,
+                                      0,
+                                      m_handler->end_range,
+                                      get_eq_range(),
+                                      0);
+      break;
+    default:
+      DBUG_ASSERT(0);
+      DBUG_RETURN(HA_ERR_INTERNAL_ERROR);
+    }
+    if (!error)
+    {
+      m_last_part= i;
+      DBUG_RETURN(0);
+    }
+    if ((error != HA_ERR_END_OF_FILE) && (error != HA_ERR_KEY_NOT_FOUND))
+      DBUG_RETURN(error);
+
+    /*
+      If HA_ERR_KEY_NOT_FOUND, we must return that error instead of
+      HA_ERR_END_OF_FILE, to be able to continue search.
+    */
+    if (saved_error != HA_ERR_KEY_NOT_FOUND)
+      saved_error= error;
+    DBUG_PRINT("info", ("END_OF_FILE/KEY_NOT_FOUND on partition %d", i));
+  }
+  if (saved_error == HA_ERR_END_OF_FILE)
+    m_part_spec.start_part= NO_CURRENT_PART_ID;
+  DBUG_RETURN(saved_error);
+}
+
+
+/**
+  Common routine to start index scan with ordered results.
+
+  @param[out] buf  Read row in MySQL Row Format
+
+  @return Operation status
+    @retval HA_ERR_END_OF_FILE    End of scan
+    @retval HA_ERR_KEY_NOT_FOUND  End of scan
+    @retval 0                     Success
+    @retval other                 Error code
+
+  @details
+    This part contains the logic to handle index scans that require ordered
+    output. This includes all except those started by read_range_first with
+    the flag ordered set to FALSE. Thus most direct index_read and all
+    index_first and index_last.
+
+    We implement ordering by keeping one record plus a key buffer for each
+    partition. Every time a new entry is requested we will fetch a new
+    entry from the partition that is currently not filled with an entry.
+    Then the entry is put into its proper sort position.
+
+    Returning a record is done by getting the top record, copying the
+    record to the request buffer and setting the partition as empty on
+    entries.
+*/
+
+int Partition_helper::handle_ordered_index_scan(uchar *buf)
+{
+  uint i;
+  std::vector<uchar*> parts;
+  bool found= FALSE;
+  uchar *part_rec_buf_ptr= m_ordered_rec_buffer;
+  int saved_error= HA_ERR_END_OF_FILE;
+  DBUG_ENTER("Partition_helper::handle_ordered_index_scan");
+  DBUG_ASSERT(part_rec_buf_ptr);
+
+  if (m_key_not_found)
+  {
+    m_key_not_found= false;
+    bitmap_clear_all(&m_key_not_found_partitions);
+    DBUG_PRINT("info", ("Cleared m_key_not_found_partitions"));
+  }
+  m_top_entry= NO_CURRENT_PART_ID;
+  m_queue->clear();
+  parts.reserve(m_queue->capacity());
+  DBUG_ASSERT(m_part_info->is_partition_used(m_part_spec.start_part));
+
+  /*
+    Position part_rec_buf_ptr to point to the first used partition >=
+    start_part. There may be partitions marked by used_partitions,
+    but is before start_part. These partitions has allocated record buffers
+    but is dynamically pruned, so those buffers must be skipped.
+  */
+  for (i= m_part_info->get_first_used_partition();
+       i < m_part_spec.start_part;
+       i= m_part_info->get_next_used_partition(i))
+  {
+    part_rec_buf_ptr+= m_rec_offset + m_rec_length;
+  }
+  DBUG_PRINT("info", ("m_part_spec.start_part %u first_used_part %u",
+                      m_part_spec.start_part, i));
+  for (/* continue from above */ ;
+       i <= m_part_spec.end_part;
+       i= m_part_info->get_next_used_partition(i))
+  {
+    DBUG_PRINT("info", ("reading from part %u (scan_type: %u inx: %u)",
+                        i, m_index_scan_type, m_handler->active_index));
+    DBUG_ASSERT(i == uint2korr(part_rec_buf_ptr));
+    uchar *rec_buf_ptr= part_rec_buf_ptr + m_rec_offset;
+    uchar *read_buf;
+    int error;
+    DBUG_PRINT("info", ("part %u, scan_type %d", i, m_index_scan_type));
+
+    /* ICP relies on Item evaluation, which expects the row in record[0]. */
+    if (m_handler->pushed_idx_cond)
+      read_buf= m_table->record[0];
+    else
+      read_buf= rec_buf_ptr;
+
+    switch (m_index_scan_type) {
+    case PARTITION_INDEX_READ:
+      error= index_read_map_in_part(i,
+                                    read_buf,
+                                    m_start_key.key,
+                                    m_start_key.keypart_map,
+                                    m_start_key.flag);
+      break;
+    case PARTITION_INDEX_FIRST:
+      error= index_first_in_part(i, read_buf);
+      break;
+    case PARTITION_INDEX_LAST:
+      error= index_last_in_part(i, read_buf);
+      break;
+    case PARTITION_INDEX_READ_LAST:
+      error= index_read_last_map_in_part(i,
+                                         read_buf,
+                                         m_start_key.key,
+                                         m_start_key.keypart_map);
+      break;
+    case PARTITION_READ_RANGE:
+    {
+      /*
+        To enable optimization in derived engines, we provide a read buffer
+        pointer if we want to read into something different than table->record[0]
+        (which read_range_* always uses).
+      */
+      error= read_range_first_in_part(i,
+                                      read_buf == m_table->record[0]
+                                        ? NULL : read_buf,
+                                      m_start_key.key ? &m_start_key : NULL,
+                                      m_handler->end_range,
+                                      get_eq_range(),
+                                      true);
+      break;
+    }
+    default:
+      DBUG_ASSERT(false);
+      DBUG_RETURN(HA_ERR_END_OF_FILE);
+    }
+    DBUG_PRINT("info", ("error %d from partition %u", error, i));
+    /* When using ICP, copy record[0] to the priority queue for sorting. */
+    if (m_handler->pushed_idx_cond)
+      memcpy(rec_buf_ptr, read_buf, m_rec_length);
+    if (!error)
+    {
+      found= true;
+      if (m_ref_usage != REF_NOT_USED)
+      {
+        /* position_in_last_part needs m_last_part set. */
+        m_last_part= i;
+        position_in_last_part(part_rec_buf_ptr + PARTITION_BYTES_IN_POS,
+                              rec_buf_ptr);
+      }
+      /*
+        Save for later insertion in queue;
+      */
+      parts.push_back(part_rec_buf_ptr);
+      DBUG_DUMP("row", read_buf, m_rec_length);
+    }
+    else if (error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE)
+    {
+      DBUG_RETURN(error);
+    }
+    else if (error == HA_ERR_KEY_NOT_FOUND)
+    {
+      DBUG_PRINT("info", ("HA_ERR_KEY_NOT_FOUND from partition %u", i));
+      bitmap_set_bit(&m_key_not_found_partitions, i);
+      m_key_not_found= true;
+      saved_error= error;
+    }
+    part_rec_buf_ptr+= m_rec_offset + m_rec_length;
+  }
+  if (found)
+  {
+    /*
+      We found at least one partition with data, now sort all entries and
+      after that read the first entry and copy it to the buffer to return in.
+    */
+    m_queue->m_max_at_top= m_reverse_order;
+    m_queue->m_keys= m_curr_key_info;
+    DBUG_ASSERT(m_queue->empty());
+    /*
+      If PK, we should not sort by rowid, since that is already done
+      through the KEY setup.
+    */
+    DBUG_ASSERT(!m_curr_key_info[1] || m_ref_usage == REF_NOT_USED);
+    m_queue->assign(parts);
+    return_top_record(buf);
+    m_table->status= 0;
+    DBUG_PRINT("info", ("Record returned from partition %d", m_top_entry));
+    DBUG_RETURN(0);
+  }
+  DBUG_RETURN(saved_error);
+}
+
+
+/**
+  Return the top record in sort order.
+
+  @param[out] buf  Row returned in MySQL Row Format.
+*/
+
+void Partition_helper::return_top_record(uchar *buf)
+{
+  uint part_id;
+  uchar *key_buffer= m_queue->top();
+  uchar *rec_buffer= key_buffer + m_rec_offset;
+
+  part_id= uint2korr(key_buffer);
+  copy_cached_row(buf, rec_buffer);
+  DBUG_PRINT("info", ("from part_id %u", part_id));
+  DBUG_DUMP("returned_row", buf, m_table->s->reclength);
+  m_last_part= part_id;
+  m_top_entry= part_id;
+}
+
+
+/**
+  Add index_next/prev results from partitions without exact match.
+
+  If there where any partitions that returned HA_ERR_KEY_NOT_FOUND when
+  ha_index_read_map was done, those partitions must be included in the
+  following index_next/prev call.
+*/
+
+int Partition_helper::handle_ordered_index_scan_key_not_found()
+{
+  int error;
+  uint i;
+  size_t old_elements= m_queue->size();
+  uchar *part_buf= m_ordered_rec_buffer;
+  uchar *curr_rec_buf= NULL;
+  DBUG_ENTER("Partition_helper::handle_ordered_index_scan_key_not_found");
+  DBUG_ASSERT(m_key_not_found);
+  DBUG_ASSERT(part_buf);
+  /*
+    Loop over all used partitions to get the correct offset
+    into m_ordered_rec_buffer.
+  */
+  for (i= m_part_info->get_first_used_partition();
+       i < MY_BIT_NONE;
+       i= m_part_info->get_next_used_partition(i))
+  {
+    if (bitmap_is_set(&m_key_not_found_partitions, i))
+    {
+      /*
+        This partition is used and did return HA_ERR_KEY_NOT_FOUND
+        in index_read_map.
+      */
+      uchar *read_buf;
+      curr_rec_buf= part_buf + m_rec_offset;
+      /* ICP relies on Item evaluation, which expects the row in record[0]. */
+      if (m_handler->pushed_idx_cond)
+        read_buf= m_table->record[0];
+      else
+        read_buf= curr_rec_buf;
+
+      if (m_reverse_order)
+        error= index_prev_in_part(i, read_buf);
+      else
+        error= index_next_in_part(i, read_buf);
+      /* HA_ERR_KEY_NOT_FOUND is not allowed from index_next! */
+      DBUG_ASSERT(error != HA_ERR_KEY_NOT_FOUND);
+      DBUG_PRINT("info", ("Filling from partition %u reverse %u error %d",
+                         i, m_reverse_order, error));
+      if (!error)
+      {
+        /* When using ICP, copy record[0] to the priority queue for sorting. */
+        if (m_handler->pushed_idx_cond)
+          memcpy(curr_rec_buf, read_buf, m_rec_length);
+        if (m_ref_usage != REF_NOT_USED)
+        {
+          /* position_in_last_part needs m_last_part set. */
+          m_last_part= i;
+          position_in_last_part(part_buf + PARTITION_BYTES_IN_POS,
+                                curr_rec_buf);
+        }
+        m_queue->push(part_buf);
+      }
+      else if (error != HA_ERR_END_OF_FILE && error != HA_ERR_KEY_NOT_FOUND)
+        DBUG_RETURN(error);
+    }
+    part_buf+= m_rec_offset + m_rec_length;
+  }
+  DBUG_ASSERT(curr_rec_buf);
+  bitmap_clear_all(&m_key_not_found_partitions);
+  m_key_not_found= false;
+
+  if (m_queue->size() > old_elements)
+  {
+    /* Update m_top_entry, which may have changed. */
+    uchar *key_buffer= m_queue->top();
+    m_top_entry= uint2korr(key_buffer);
+  }
+  DBUG_RETURN(0);
+}
+
+
+/**
+  Common routine to handle index_next with ordered results.
+
+  @param[out] buf        Read row in MySQL Row Format.
+  @param[in]  next_same  Called from index_next_same.
+
+  @return Operation status.
+    @retval HA_ERR_END_OF_FILE  End of scan
+    @retval 0                   Success
+    @retval other               Error code
+*/
+
+int Partition_helper::handle_ordered_next(uchar *buf, bool is_next_same)
+{
+  int error;
+  uint part_id= m_top_entry;
+  uchar *rec_buf= m_queue->empty() ? NULL : m_queue->top() + m_rec_offset;
+  uchar *read_buf;
+  DBUG_ENTER("Partition_helper::handle_ordered_next");
+
+  if (m_reverse_order)
+  {
+    /*
+      TODO: To support change of direction (index_prev -> index_next,
+      index_read_map(HA_READ_KEY_EXACT) -> index_prev etc.)
+      We would need to:
+      - Step back all cursors we have a buffered row from a previous next/prev
+        call (i.e. for all partitions we previously called index_prev, we must
+        call index_next and skip that row.
+      - empty the priority queue and initialize it again with reverse ordering.
+    */
+    DBUG_ASSERT(m_table->open_by_handler);
+    DBUG_RETURN(HA_ERR_WRONG_COMMAND);
+  }
+
+  if (m_key_not_found)
+  {
+    if (is_next_same)
+    {
+      /* Only rows which match the key. */
+      m_key_not_found= false;
+      bitmap_clear_all(&m_key_not_found_partitions);
+    }
+    else
+    {
+      /* There are partitions not included in the index record queue. */
+      size_t old_elements= m_queue->size();
+      if ((error= handle_ordered_index_scan_key_not_found()))
+        DBUG_RETURN(error);
+      /*
+        If the queue top changed, i.e. one of the partitions that gave
+        HA_ERR_KEY_NOT_FOUND in index_read_map found the next record,
+        return it.
+        Otherwise replace the old with a call to index_next (fall through).
+      */
+      if (old_elements != m_queue->size() && part_id != m_top_entry)
+      {
+        return_top_record(buf);
+        DBUG_PRINT("info", ("Returning row from part %u (prev KEY_NOT_FOUND)",
+                            m_top_entry));
+        DBUG_RETURN(0);
+      }
+    }
+  }
+  if (part_id >= m_tot_parts)
+    DBUG_RETURN(HA_ERR_END_OF_FILE);
+
+  DBUG_PRINT("info", ("next row from part %u (inx %u)",
+                      part_id, m_handler->active_index));
+
+  /* Assert that buffer for fetch is not NULL */
+  DBUG_ASSERT(rec_buf);
+
+  /* ICP relies on Item evaluation, which expects the row in record[0]. */
+  if (m_handler->pushed_idx_cond)
+    read_buf= m_table->record[0];
+  else
+    read_buf= rec_buf;
+
+
+  if (m_index_scan_type == PARTITION_READ_RANGE)
+  {
+    error= read_range_next_in_part(part_id,
+                                   read_buf == m_table->record[0]
+                                     ? NULL : read_buf);
+  }
+  else if (!is_next_same)
+    error= index_next_in_part(part_id, read_buf);
+  else
+    error= index_next_same_in_part(part_id,
+                                   read_buf,
+                                   m_start_key.key,
+                                   m_start_key.length);
+  if (error)
+  {
+    if (error == HA_ERR_END_OF_FILE)
+    {
+      /* Return next buffered row */
+      if (!m_queue->empty())
+        m_queue->pop();
+      if (m_queue->empty())
+      {
+        /*
+          If priority queue is empty, we have finished fetching rows from all
+          partitions. Reset the value of next partition to NONE. This would
+          imply HA_ERR_END_OF_FILE for all future calls.
+        */
+        m_top_entry= NO_CURRENT_PART_ID;
+      }
+      else
+      {
+         return_top_record(buf);
+         DBUG_PRINT("info", ("Record returned from partition %u (2)",
+                     m_top_entry));
+         m_table->status= 0;
+         error= 0;
+      }
+    }
+    DBUG_RETURN(error);
+  }
+  /* When using ICP, copy record[0] to the priority queue for sorting. */
+  if (m_handler->pushed_idx_cond)
+    memcpy(rec_buf, read_buf, m_rec_length);
+  if (m_ref_usage != REF_NOT_USED)
+  {
+    /* position_in_last_part needs m_last_part set. */
+    m_last_part= part_id;
+    position_in_last_part(rec_buf - m_rec_offset + PARTITION_BYTES_IN_POS,
+                          rec_buf);
+  }
+  DBUG_DUMP("rec_buf", rec_buf, m_rec_length);
+  m_queue->update_top();
+  return_top_record(buf);
+  DBUG_PRINT("info", ("Record returned from partition %u", m_top_entry));
+  DBUG_RETURN(0);
+}
+
+
+/**
+  Common routine to handle index_prev with ordered results.
+
+  @param[out] buf  Read row in MySQL Row Format.
+
+  @return Operation status.
+    @retval HA_ERR_END_OF_FILE  End of scan
+    @retval 0                   Success
+    @retval other               Error code
+*/
+
+int Partition_helper::handle_ordered_prev(uchar *buf)
+{
+  int error;
+  uint part_id= m_top_entry;
+  uchar *rec_buf= m_queue->empty() ? NULL : m_queue->top() + m_rec_offset;
+  uchar *read_buf;
+  DBUG_ENTER("Partition_helper::handle_ordered_prev");
+
+  if (!m_reverse_order)
+  {
+    /* TODO: See comment in handle_ordered_next(). */
+    DBUG_ASSERT(m_table->open_by_handler);
+    DBUG_RETURN(HA_ERR_WRONG_COMMAND);
+  }
+
+  if (m_key_not_found)
+  {
+    /* There are partitions not included in the index record queue. */
+    size_t old_elements= m_queue->size();
+    if ((error= handle_ordered_index_scan_key_not_found()))
+      DBUG_RETURN(error);
+    if (old_elements != m_queue->size() && part_id != m_top_entry)
+    {
+      /*
+        Should only be possible for when HA_READ_KEY_EXACT was previously used,
+        which is not supported to have a subsequent call for PREV.
+        I.e. HA_READ_KEY_EXACT is considered to not have reverse order!
+      */
+      DBUG_ASSERT(0);
+      /*
+        If the queue top changed, i.e. one of the partitions that gave
+        HA_ERR_KEY_NOT_FOUND in index_read_map found the next record,
+        return it.
+        Otherwise replace the old with a call to index_next (fall through).
+      */
+      return_top_record(buf);
+      DBUG_RETURN(0);
+    }
+  }
+
+  if (part_id >= m_tot_parts)
+  {
+    /* This should never happen, except for SQL HANDLER calls! */
+    DBUG_ASSERT(m_table->open_by_handler);
+    DBUG_RETURN(HA_ERR_END_OF_FILE);
+  }
+
+  /* Assert that buffer for fetch is not NULL */
+  DBUG_ASSERT(rec_buf);
+
+  /* ICP relies on Item evaluation, which expects the row in record[0]. */
+  if (m_handler->pushed_idx_cond)
+    read_buf= m_table->record[0];
+  else
+    read_buf= rec_buf;
+
+  if ((error= index_prev_in_part(part_id, read_buf)))
+  {
+    if (error == HA_ERR_END_OF_FILE)
+    {
+      if (!m_queue->empty())
+        m_queue->pop();
+      if (m_queue->empty())
+      {
+        /*
+          If priority queue is empty, we have finished fetching rows from all
+          partitions. Reset the value of next partition to NONE. This would
+          imply HA_ERR_END_OF_FILE for all future calls.
+        */
+        m_top_entry= NO_CURRENT_PART_ID;
+      }
+      else
+      {
+        return_top_record(buf);
+        DBUG_PRINT("info", ("Record returned from partition %d (2)",
+                            m_top_entry));
+        error= 0;
+        m_table->status= 0;
+      }
+    }
+    DBUG_RETURN(error);
+  }
+  /* When using ICP, copy record[0] to the priority queue for sorting. */
+  if (m_handler->pushed_idx_cond)
+    memcpy(rec_buf, read_buf, m_rec_length);
+
+  if (m_ref_usage != REF_NOT_USED)
+  {
+    /* position_in_last_part needs m_last_part set. */
+    m_last_part= part_id;
+    position_in_last_part(rec_buf - m_rec_offset + PARTITION_BYTES_IN_POS,
+                          rec_buf);
+  }
+  m_queue->update_top();
+  return_top_record(buf);
+  DBUG_PRINT("info", ("Record returned from partition %d", m_top_entry));
+  DBUG_RETURN(0);
+}
+
+/**
+  Get statistics from a specific partition.
+
+  @param[out] stat_info  Area to report values into.
+  @param[out] check_sum  Check sum of partition.
+  @param[in]  part_id    Partition to report from.
+*/
+void
+Partition_helper::get_dynamic_partition_info_low(PARTITION_STATS *stat_info,
+                                                 ha_checksum *check_sum,
+                                                 uint part_id)
+{
+  ha_statistics *part_stat= &m_handler->stats;
+  DBUG_ASSERT(bitmap_is_set(&m_part_info->read_partitions, part_id));
+  DBUG_ASSERT(bitmap_is_subset(&m_part_info->read_partitions,
+                               &m_part_info->lock_partitions));
+  DBUG_ASSERT(bitmap_is_subset(&m_part_info->lock_partitions,
+                               &m_part_info->read_partitions));
+  bitmap_clear_all(&m_part_info->read_partitions);
+  bitmap_set_bit(&m_part_info->read_partitions, part_id);
+  m_handler->info(HA_STATUS_TIME |
+                  HA_STATUS_VARIABLE |
+                  HA_STATUS_VARIABLE_EXTRA |
+                  HA_STATUS_NO_LOCK);
+  stat_info->records=              part_stat->records;
+  stat_info->mean_rec_length=      part_stat->mean_rec_length;
+  stat_info->data_file_length=     part_stat->data_file_length;
+  stat_info->max_data_file_length= part_stat->max_data_file_length;
+  stat_info->index_file_length=    part_stat->index_file_length;
+  stat_info->delete_length=        part_stat->delete_length;
+  stat_info->create_time=          part_stat->create_time;
+  stat_info->update_time=          part_stat->update_time;
+  stat_info->check_time=           part_stat->check_time;
+  if (get_thd()->variables.old_mode ?
+    m_handler->ha_table_flags() & HA_HAS_OLD_CHECKSUM :
+    m_handler->ha_table_flags() & HA_HAS_NEW_CHECKSUM)
+  {
+    *check_sum= checksum_in_part(part_id);
+  }
+  bitmap_copy(&m_part_info->read_partitions, &m_part_info->lock_partitions);
+}
+
+
+/**
+  Get checksum for table.
+
+  @return Checksum or 0 if not supported, which also may be a correct checksum!.
+*/
+
+ha_checksum Partition_helper::ph_checksum() const
+{
+  ha_checksum sum= 0;
+  if (get_thd()->variables.old_mode ?
+    m_handler->ha_table_flags() & HA_HAS_OLD_CHECKSUM :
+    m_handler->ha_table_flags() & HA_HAS_NEW_CHECKSUM)
+  {
+    for (uint i= 0; i < m_tot_parts; i++)
+    {
+      sum+= checksum_in_part(i);
+    }
+  }
+  return sum;
+}
diff --git a/sql/partitioning/partition_handler.h b/sql/partitioning/partition_handler.h
new file mode 100644
index 00000000000..cf4e1dcb24b
--- /dev/null
+++ b/sql/partitioning/partition_handler.h
@@ -0,0 +1,1113 @@
+#ifndef PARTITION_HANDLER_INCLUDED
+#define PARTITION_HANDLER_INCLUDED
+
+/*
+   Copyright (c) 2005, 2015, Oracle and/or its affiliates. All rights reserved.
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License
+   as published by the Free Software Foundation; version 2 of
+   the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA
+*/
+
+#include "my_global.h"            // uint etc.
+#include "my_base.h"              // ha_rows.
+#include "handler.h"              // Handler_share
+#include "sql_partition.h"        // part_id_range
+#include "mysqld_error.h"         // ER_ILLEGAL_HA
+#include "priority_queue.h"
+#include "key.h"                  // key_rec_cmp
+#include "ha_partition.h"
+#include <vector>
+
+#define PARTITION_BYTES_IN_POS 2
+
+/* forward declarations */
+typedef struct st_mem_root MEM_ROOT;
+
+static const uint NO_CURRENT_PART_ID= UINT_MAX32;
+
+/**
+  bits in Partition_handler::alter_flags():
+
+  HA_PARTITION_FUNCTION_SUPPORTED indicates that the function is
+  supported at all.
+  HA_FAST_CHANGE_PARTITION means that optimized variants of the changes
+  exists but they are not necessarily done online.
+
+  HA_ONLINE_DOUBLE_WRITE means that the handler supports writing to both
+  the new partition and to the old partitions when updating through the
+  old partitioning schema while performing a change of the partitioning.
+  This means that we can support updating of the table while performing
+  the copy phase of the change. For no lock at all also a double write
+  from new to old must exist and this is not required when this flag is
+  set.
+  This is actually removed even before it was introduced the first time.
+  The new idea is that handlers will handle the lock level already in
+  store_lock for ALTER TABLE partitions.
+  TODO: Implement this via the alter-inplace api.
+*/
+
+enum enum_part_operation {
+  OPTIMIZE_PARTS= 0,
+  ANALYZE_PARTS,
+  CHECK_PARTS,
+  REPAIR_PARTS,
+  ASSIGN_KEYCACHE_PARTS,
+  PRELOAD_KEYS_PARTS
+};
+
+
+/**
+  Initialize partitioning (currently only PSI keys).
+*/
+void partitioning_init();
+
+
+/**
+  Class for partitioning specific operations.
+
+  Returned from handler::get_partition_handler().
+*/
+class Partition_handler :public Sql_alloc
+{
+public:
+  Partition_handler() {}
+  ~Partition_handler() {}
+  
+  bool init(uint num_parts);
+
+  /**
+    Get dynamic table information from partition.
+
+    @param[out] stat_info  Statistics struct to fill in.
+    @param[out] check_sum  Check sum value to fill in if supported.
+    @param[in]  part_id    Partition to report for.
+
+    @note stat_info and check_sum are initialized by caller.
+    check_sum is only expected to be updated if HA_HAS_CHECKSUM.
+  */
+  virtual void get_dynamic_partition_info(PARTITION_STATS *stat_info,
+                                          uint part_id) = 0;
+                                          
+  /**
+    Get default number of partitions.
+
+    Used during creating a partitioned table.
+
+    @param info  Create info.
+    @return Number of default partitions.
+  */
+  virtual int get_default_num_partitions(HA_CREATE_INFO *info) { return 1;}
+  /**
+    Setup auto partitioning.
+
+    Called for engines with HA_USE_AUTO_PARTITION to setup the partition info
+    object
+
+    @param[in,out] part_info  Partition object to setup.
+  */
+  virtual void set_auto_partitions(partition_info *part_info) { return; }
+  /**
+    Get number of partitions for table in SE
+
+    @param name normalized path(same as open) to the table
+
+    @param[out] num_parts Number of partitions
+
+    @retval false for success
+    @retval true for failure, for example table didn't exist in engine
+  */
+  virtual bool get_num_parts(const char *name,
+                            uint *num_parts)
+  {
+    *num_parts= 0;
+    return false;
+  }
+  /**
+    Set the partition info object to be used by the handler.
+
+    @param part_info  Partition info to be used by the handler.
+    @param early      True if called when part_info only created and parsed,
+                      but not setup, checked or fixed.
+  */
+  virtual void set_part_info(partition_info *part_info) = 0;
+  /**
+    Initialize partition.
+
+    @param mem_root  Memory root for memory allocations.
+
+    @return Operation status
+      @retval false  Success.
+      @retval true   Failure.
+  */
+  virtual bool initialize_partition(MEM_ROOT *mem_root) {return false;}
+
+
+  /**
+    Alter flags.
+
+    Given a set of alter table flags, return which is supported.
+
+    @param flags  Alter table operation flags.
+
+    @return Supported alter table flags.
+  */
+  virtual uint alter_flags(uint flags) const
+  { return 0; }
+
+private:
+  /**
+    Truncate partition.
+
+    Low-level primitive for handler, implementing
+    Partition_handler::truncate_partition().
+
+    @return Operation status
+      @retval    0  Success.
+      @retval != 0  Error code.
+  */
+  virtual int truncate_partition_low()
+  { return HA_ERR_WRONG_COMMAND; }
+  /**
+    Truncate partition.
+
+    Low-level primitive for handler, implementing
+    Partition_handler::change_partitions().
+
+    @param[in]     create_info  Table create info.
+    @param[in]     path         Path including table name.
+    @param[out]    copied       Number of rows copied.
+    @param[out]    deleted      Number of rows deleted.
+
+    @return Operation status
+      @retval    0  Success.
+      @retval != 0  Error code.
+  */
+  virtual int change_partitions_low(HA_CREATE_INFO *create_info,
+                                    const char *path,
+                                    ulonglong * const copied,
+                                    ulonglong * const deleted)
+  {
+    my_error(ER_ILLEGAL_HA, MYF(0), create_info->alias);
+    return HA_ERR_WRONG_COMMAND;
+  }
+  /**
+    Return the table handler.
+
+    For some partitioning specific functions it is still needed to access
+    the handler directly for transaction handling (mark_trx_read_write())
+    and to assert correct locking.
+
+    @return handler or NULL if not supported.
+  */
+  virtual handler *get_handler()
+  { return NULL; }
+};
+
+
+/// Maps compare function to strict weak ordering required by Priority_queue.
+struct Key_rec_less
+{
+  typedef int (*key_compare_fun)(void*, uchar *, uchar *);
+
+  explicit Key_rec_less(KEY **keys)
+    : m_keys(keys), m_fun(key_rec_cmp), m_max_at_top(false)
+  {
+  }
+
+  bool operator()(uchar *first, uchar *second)
+  {
+    const int cmpval=
+     (*m_fun)(m_keys, first + m_rec_offset, second + m_rec_offset);
+    return m_max_at_top ? cmpval < 0 : cmpval > 0;
+  }
+
+  KEY **m_keys;
+  key_compare_fun m_fun;
+  uint m_rec_offset;
+  bool m_max_at_top;
+};
+
+
+/**
+  Partition_helper is a helper class that implements most generic partitioning
+  functionality such as:
+  table scan, index scan (both ordered and non-ordered),
+  insert (write_row()), delete and update.
+  And includes ALTER TABLE ... ADD/COALESCE/DROP/REORGANIZE/... PARTITION
+  support.
+  It also implements a cache for the auto increment value and check/repair for
+  rows in wrong partition.
+
+  How to use it:
+  Inherit it and implement:
+  - *_in_part() functions for row operations.
+  - prepare_for_new_partitions(), create_new_partition(), close_new_partitions()
+    write_row_in_new_part() for handling 'fast' alter partition.
+*/
+class Partition_helper : public Sql_alloc
+{
+  typedef Priority_queue<uchar *, std::vector<uchar*>, Key_rec_less> Prio_queue;
+public:
+  Partition_helper(handler *main_handler);
+  ~Partition_helper();
+
+  /**
+    Set partition info.
+
+    To be called from Partition_handler.
+
+    @param  part_info  Partition info to use.
+    @param  early      True if called when part_info only created and parsed,
+                       but not setup, checked or fixed.
+  */
+  virtual void set_part_info_low(partition_info *part_info, bool early);
+  /**
+    Initialize variables used before the table is opened.
+
+    @param mem_root  Memory root to allocate things from (not yet used).
+
+    @return Operation status.
+      @retval false success.
+      @retval true  failure.
+  */
+  inline bool init_partitioning(MEM_ROOT *mem_root)
+  {
+#ifndef DBUG_OFF
+    m_key_not_found_partitions.bitmap= NULL;
+#endif
+    return false;
+  }
+
+
+  /**
+    INSERT/UPDATE/DELETE functions.
+    @see handler.h
+    @{
+  */
+
+  /**
+    Insert a row to the partitioned table.
+
+    @param buf The row in MySQL Row Format.
+
+    @return Operation status.
+      @retval    0 Success
+      @retval != 0 Error code
+  */
+  int ph_write_row(uchar *buf);
+  /**
+    Update an existing row in the partitioned table.
+
+    Yes, update_row() does what you expect, it updates a row. old_data will
+    have the previous row record in it, while new_data will have the newest
+    data in it.
+    Keep in mind that the server can do updates based on ordering if an
+    ORDER BY clause was used. Consecutive ordering is not guaranteed.
+
+    If the new record belongs to a different partition than the old record
+    then it will be inserted into the new partition and deleted from the old.
+
+    new_data is always record[0]
+    old_data is always record[1]
+
+    @param old_data  The old record in MySQL Row Format.
+    @param new_data  The new record in MySQL Row Format.
+
+    @return Operation status.
+      @retval    0 Success
+      @retval != 0 Error code
+  */
+  int ph_update_row(const uchar *old_data, uchar *new_data);
+  /**
+    Delete an existing row in the partitioned table.
+
+    This will delete a row. buf will contain a copy of the row to be deleted.
+    The server will call this right after the current row has been read
+    (from either a previous rnd_xxx() or index_xxx() call).
+    If you keep a pointer to the last row or can access a primary key it will
+    make doing the deletion quite a bit easier.
+    Keep in mind that the server does no guarantee consecutive deletions.
+    ORDER BY clauses can be used.
+
+    buf is either record[0] or record[1]
+
+    @param buf  The record in MySQL Row Format.
+
+    @return Operation status.
+      @retval    0 Success
+      @retval != 0 Error code
+  */
+  int ph_delete_row(const uchar *buf);
+
+  /** @} */
+
+  /** Release unused auto increment values. */
+  void ph_release_auto_increment();
+  /**
+    Calculate key hash value from an null terminated array of fields.
+    Support function for KEY partitioning.
+
+    @param field_array   An array of the fields in KEY partitioning
+
+    @return hash_value calculated
+
+    @note Uses the hash function on the character set of the field.
+    Integer and floating point fields use the binary character set by default.
+  */
+  static uint32 ph_calculate_key_hash_value(Field **field_array);
+  /** Get checksum for table.
+    @return Checksum or 0 if not supported (which also may be a correct checksum!).
+  */
+  ha_checksum ph_checksum() const;
+
+  /**
+    MODULE full table scan
+
+    This module is used for the most basic access method for any table
+    handler. This is to fetch all data through a full table scan. No
+    indexes are needed to implement this part.
+    It contains one method to start the scan (rnd_init) that can also be
+    called multiple times (typical in a nested loop join). Then proceeding
+    to the next record (rnd_next) and closing the scan (rnd_end).
+    To remember a record for later access there is a method (position)
+    and there is a method used to retrieve the record based on the stored
+    position.
+    The position can be a file position, a primary key, a ROWID dependent
+    on the handler below.
+
+    unlike index_init(), rnd_init() can be called two times
+    without rnd_end() in between (it only makes sense if scan=1).
+    then the second call should prepare for the new table scan
+    (e.g if rnd_init allocates the cursor, second call should
+    position it to the start of the table, no need to deallocate
+    and allocate it again.
+    @see handler.h
+    @{
+  */
+
+  int ph_rnd_init(bool scan);
+  int ph_rnd_end();
+  int ph_rnd_next(uchar *buf);
+  void ph_position(const uchar *record);
+  int ph_rnd_pos(uchar *buf, uchar *pos);
+  int ph_rnd_pos_by_record(uchar *record);
+
+  /** @} */
+
+  /**
+    MODULE index scan
+
+    This part of the handler interface is used to perform access through
+    indexes. The interface is defined as a scan interface but the handler
+    can also use key lookup if the index is a unique index or a primary
+    key index.
+    Index scans are mostly useful for SELECT queries but are an important
+    part also of UPDATE, DELETE, REPLACE and CREATE TABLE table AS SELECT
+    and so forth.
+    Naturally an index is needed for an index scan and indexes can either
+    be ordered, hash based. Some ordered indexes can return data in order
+    but not necessarily all of them.
+    There are many flags that define the behavior of indexes in the
+    various handlers. These methods are found in the optimizer module.
+    -------------------------------------------------------------------------
+
+    index_read is called to start a scan of an index. The find_flag defines
+    the semantics of the scan. These flags are defined in
+    include/my_base.h
+    index_read_idx is the same but also initializes index before calling doing
+    the same thing as index_read. Thus it is similar to index_init followed
+    by index_read. This is also how we implement it.
+
+    index_read/index_read_idx does also return the first row. Thus for
+    key lookups, the index_read will be the only call to the handler in
+    the index scan.
+
+    index_init initializes an index before using it and index_end does
+    any end processing needed.
+    @{
+  */
+
+  int ph_index_init_setup(uint key_nr, bool sorted);
+  int ph_index_init(uint key_nr, bool sorted);
+  int ph_index_end();
+  /*
+    These methods are used to jump to next or previous entry in the index
+    scan. There are also methods to jump to first and last entry.
+  */
+  int ph_index_first(uchar *buf);
+  int ph_index_last(uchar *buf);
+  int ph_index_next(uchar *buf);
+  int ph_index_next_same(uchar *buf, const uchar *key, uint keylen);
+  int ph_index_prev(uchar *buf);
+  int ph_index_read_map(uchar *buf,
+                        const uchar *key,
+                        key_part_map keypart_map,
+                        enum ha_rkey_function find_flag);
+  int ph_index_read_last_map(uchar *buf,
+                             const uchar *key,
+                             key_part_map keypart_map);
+  int ph_index_read_idx_map(uchar *buf,
+                            uint index,
+                            const uchar *key,
+                            key_part_map keypart_map,
+                            enum ha_rkey_function find_flag);
+  int ph_read_range_first(const key_range *start_key,
+                          const key_range *end_key,
+                          bool eq_range_arg,
+                          bool sorted);
+  int ph_read_range_next();
+  /** @} */
+
+  /**
+    Functions matching Partition_handler API.
+    @{
+  */
+
+  /**
+    Get statistics from a specific partition.
+    @param[out] stat_info  Area to report values into.
+    @param[out] check_sum  Check sum of partition.
+    @param[in]  part_id    Partition to report from.
+  */
+  virtual void get_dynamic_partition_info_low(PARTITION_STATS *stat_info,
+                                              ha_checksum *check_sum,
+                                              uint part_id);
+
+  /**
+    Implement the partition changes defined by ALTER TABLE of partitions.
+
+    Add and copy if needed a number of partitions, during this operation
+    only read operation is ongoing in the server. This is used by
+    ADD PARTITION all types as well as by REORGANIZE PARTITION. For
+    one-phased implementations it is used also by DROP and COALESCE
+    PARTITIONs.
+    One-phased implementation needs the new frm file, other handlers will
+    get zero length and a NULL reference here.
+
+    @param[in]  create_info       HA_CREATE_INFO object describing all
+                                  fields and indexes in table
+    @param[in]  path              Complete path of db and table name
+    @param[out] copied            Output parameter where number of copied
+                                  records are added
+    @param[out] deleted           Output parameter where number of deleted
+                                  records are added
+
+    @return Operation status
+      @retval    0 Success
+      @retval != 0 Failure
+  */
+  int change_partitions(HA_CREATE_INFO *create_info,
+                                const char *path,
+                                ulonglong * const copied,
+                                ulonglong * const deleted);
+  /** @} */
+
+protected:
+  /* Common helper functions to be used by inheriting engines. */
+
+  /*
+    open/close functions.
+  */
+
+  /**
+    Set m_part_share, Allocate internal bitmaps etc. used by open tables.
+
+    @param mem_root  Memory root to allocate things from (not yet used).
+
+    @return Operation status.
+      @retval false success.
+      @retval true  failure.
+  */
+  bool open_partitioning(Partition_share *part_share);
+  /**
+    Close partitioning for a table.
+
+    Frees memory and release other resources.
+  */
+  void close_partitioning();
+
+  /**
+    Lock auto increment value if needed.
+  */
+  inline void lock_auto_increment()
+  {
+    /* lock already taken */
+    if (m_auto_increment_safe_stmt_log_lock)
+      return;
+    DBUG_ASSERT(!m_auto_increment_lock);
+    if(m_table->s->tmp_table == NO_TMP_TABLE)
+    {
+      m_auto_increment_lock= true;
+      m_part_share->lock_auto_inc();
+    }
+  }
+  /**
+    unlock auto increment.
+  */
+  inline void unlock_auto_increment()
+  {
+    /*
+      If m_auto_increment_safe_stmt_log_lock is true, we have to keep the lock.
+      It will be set to false and thus unlocked at the end of the statement by
+      ha_partition::release_auto_increment.
+    */
+    if(m_auto_increment_lock && !m_auto_increment_safe_stmt_log_lock)
+    {
+      m_part_share->unlock_auto_inc();
+      m_auto_increment_lock= false;
+    }
+  }
+  /**
+    Get auto increment.
+
+    Only to be used for auto increment values that are the first field in
+    an unique index.
+
+    @param[in]  increment           Increment between generated numbers.
+    @param[in]  nb_desired_values   Number of values requested.
+    @param[out] first_value         First reserved value (ULLONG_MAX on error).
+    @param[out] nb_reserved_values  Number of values reserved.
+  */
+  void get_auto_increment_first_field(ulonglong increment,
+                                      ulonglong nb_desired_values,
+                                      ulonglong *first_value,
+                                      ulonglong *nb_reserved_values);
+
+  /**
+    Initialize the record priority queue used for sorted index scans.
+    @return Operation status.
+      @retval    0   Success.
+      @retval != 0   Error code.
+  */
+  int init_record_priority_queue();
+  /**
+    Destroy the record priority queue used for sorted index scans.
+  */
+  void destroy_record_priority_queue();
+  /*
+    Administrative support functions.
+  */
+
+  /** Print partitioning specific error.
+    @param error   Error code.
+    @param errflag Error flag.
+    @return false if error is printed else true.
+  */
+  bool print_partition_error(int error, myf errflag);
+#if 0
+  /**
+    Print a message row formatted for ANALYZE/CHECK/OPTIMIZE/REPAIR TABLE.
+
+    Modeled after mi_check_print_msg.
+
+    @param thd         Thread context.
+    @param len         Needed length for message buffer.
+    @param msg_type    Message type.
+    @param db_name     Database name.
+    @param table_name  Table name.
+    @param op_name     Operation name.
+    @param fmt         Message (in printf format with additional arguments).
+
+    @return Operation status.
+      @retval false for success else true.
+  */
+  bool print_admin_msg(THD *thd,
+                       uint len,
+                       const char *msg_type,
+                       const char *db_name,
+                       const char *table_name,
+                       const char *op_name,
+                       const char *fmt,
+                       ...);
+#endif
+  /**
+    Check/fix misplaced rows.
+
+    @param part_id  Partition to check/fix.
+    @param repair   If true, move misplaced rows to correct partition.
+
+    @return Operation status.
+      @retval    0  Success
+      @retval != 0  Error
+  */
+  int check_misplaced_rows(uint part_id, bool repair);
+  /**
+    Set used partitions bitmap from Alter_info.
+
+    @return false if success else true.
+  */
+  bool set_altered_partitions();
+
+private:
+  enum partition_index_scan_type
+  {
+    PARTITION_INDEX_READ= 1,
+    PARTITION_INDEX_FIRST,
+    PARTITION_INDEX_FIRST_UNORDERED,
+    PARTITION_INDEX_LAST,
+    PARTITION_INDEX_READ_LAST,
+    PARTITION_READ_RANGE,
+    PARTITION_NO_INDEX_SCAN
+  };
+
+  /** handler to use (ha_partition, ha_innopart etc.) */
+  handler *m_handler;
+  /** Convenience pointer to table from m_handler (i.e. m_handler->table). */
+  TABLE *m_table;
+
+  /*
+    Access methods to protected areas in handler to avoid adding
+    friend class Partition_helper in class handler.
+  */
+  virtual THD *get_thd() const = 0;
+  virtual TABLE *get_table() const = 0;
+  virtual bool get_eq_range() const = 0;
+  virtual void set_eq_range(bool eq_range) = 0;
+  virtual void set_range_key_part(KEY_PART_INFO *key_part) = 0;
+
+  /*
+    Implementation of per partition operation by instantiated engine.
+    These must be implemented in the 'real' partition_helper subclass.
+  */
+
+  /**
+    Write a row in the specified partition.
+
+    @see handler::write_row().
+
+    @param  part_id  Partition to write to.
+    @param  buf      Buffer with data to write.
+
+    @return Operation status.
+      @retval    0  Success.
+      @retval != 0  Error code.
+  */
+  virtual int write_row_in_part(uint part_id, uchar *buf) = 0;
+  /**
+    Update a row in the specified partition.
+
+    @see handler::update_row().
+
+    @param  part_id   Partition to update in.
+    @param  old_data  Buffer containing old row.
+    @param  new_data  Buffer containing new row.
+
+    @return Operation status.
+      @retval    0  Success.
+      @retval != 0  Error code.
+  */
+  virtual int update_row_in_part(uint new_part_id,
+                                 const uchar *old_data,
+                                 uchar *new_data) = 0;
+  /**
+    Delete an existing row in the specified partition.
+
+    @see handler::delete_row().
+
+    @param  part_id  Partition to delete from.
+    @param  buf      Buffer containing row to delete.
+
+    @return Operation status.
+      @retval    0  Success.
+      @retval != 0  Error code.
+  */
+  virtual int delete_row_in_part(uint part_id, const uchar *buf) = 0;
+  /**
+    Initialize the shared auto increment value.
+
+    @param no_lock  If HA_STATUS_NO_LOCK should be used in info(HA_STATUS_AUTO).
+
+    Also sets stats.auto_increment_value.
+  */
+  virtual int initialize_auto_increment(bool no_lock) = 0;
+  /** Release auto_increment in all underlying partitions. */
+  virtual void release_auto_increment_all_parts() {}
+  /** Save or persist the current max auto increment. */
+  virtual void save_auto_increment(ulonglong nr) {}
+  /**
+    Per partition equivalent of rnd_* and index_* functions.
+
+    @see class handler.
+  */
+  virtual int rnd_init_in_part(uint part_id, bool table_scan) = 0;
+  int ph_rnd_next_in_part(uint part_id, uchar *buf);
+  virtual int rnd_next_in_part(uint part_id, uchar *buf) = 0;
+  virtual int rnd_end_in_part(uint part_id, bool scan) = 0;
+  virtual void position_in_last_part(uchar *ref, const uchar *row) = 0;
+  /* If ph_rnd_pos is used then this needs to be implemented! */
+  virtual int rnd_pos_in_part(uint part_id, uchar *buf, uchar *pos)
+  { DBUG_ASSERT(0); return HA_ERR_WRONG_COMMAND; }
+  virtual int rnd_pos_by_record_in_last_part(uchar *row)
+  {
+    /*
+      Not much overhead to use default function. This avoids out-of-sync code.
+    */
+    return m_handler->rnd_pos_by_record(row);
+  }
+  virtual int index_init_in_part(uint part, uint keynr, bool sorted)
+  { DBUG_ASSERT(0); return HA_ERR_WRONG_COMMAND; }
+  virtual int index_end_in_part(uint part)
+  { DBUG_ASSERT(0); return HA_ERR_WRONG_COMMAND; }
+  virtual int index_first_in_part(uint part, uchar *buf) = 0;
+  virtual int index_last_in_part(uint part, uchar *buf) = 0;
+  virtual int index_prev_in_part(uint part, uchar *buf) = 0;
+  virtual int index_next_in_part(uint part, uchar *buf) = 0;
+  virtual int index_next_same_in_part(uint part,
+                                      uchar *buf,
+                                      const uchar *key,
+                                      uint length) = 0;
+  virtual int index_read_map_in_part(uint part,
+                                     uchar *buf,
+                                     const uchar *key,
+                                     key_part_map keypart_map,
+                                     enum ha_rkey_function find_flag) = 0;
+  virtual int index_read_last_map_in_part(uint part,
+                                          uchar *buf,
+                                          const uchar *key,
+                                          key_part_map keypart_map) = 0;
+  /**
+    Do read_range_first in the specified partition.
+    If buf is set, then copy the result there instead of table->record[0].
+  */
+  virtual int read_range_first_in_part(uint part,
+                                       uchar *buf,
+                                       const key_range *start_key,
+                                       const key_range *end_key,
+                                       bool eq_range,
+                                       bool sorted) = 0;
+  /**
+    Do read_range_next in the specified partition.
+    If buf is set, then copy the result there instead of table->record[0].
+  */
+  virtual int read_range_next_in_part(uint part, uchar *buf) = 0;
+  virtual int index_read_idx_map_in_part(uint part,
+                                         uchar *buf,
+                                         uint index,
+                                         const uchar *key,
+                                         key_part_map keypart_map,
+                                         enum ha_rkey_function find_flag) = 0;
+  /**
+    Initialize engine specific resources for the record priority queue
+    used duing ordered index reads for multiple partitions.
+
+    @param used_parts  Number of partitions used in query
+                       (number of set bits in m_part_info->read_partitions).
+
+    @return Operation status.
+      @retval    0   Success.
+      @retval != 0   Error code.
+  */
+  virtual int init_record_priority_queue_for_parts(uint used_parts)
+  {
+    return 0;
+  }
+  /**
+    Destroy and release engine specific resources used by the record
+    priority queue.
+  */
+  virtual void destroy_record_priority_queue_for_parts() {}
+  /**
+    Checksum for a partition.
+
+    @param part_id  Partition to checksum.
+  */
+  virtual ha_checksum checksum_in_part(uint part_id) const
+  { DBUG_ASSERT(0); return 0; }
+  /**
+    Copy a cached row.
+
+    Used when copying a row from the record priority queue to the return buffer.
+    For some engines, like InnoDB, only marked columns must be copied,
+    to preserve non-read columns.
+
+    @param[out] to_rec    Buffer to copy to.
+    @param[in]  from_rec  Buffer to copy from.
+  */
+  virtual void copy_cached_row(uchar *to_rec, const uchar *from_rec)
+  { memcpy(to_rec, from_rec, m_rec_length); }
+  /**
+    Prepare for creating new partitions during ALTER TABLE ... PARTITION.
+    @param  num_partitions  Number of new partitions to be created.
+    @param  only_create     True if only creating the partition
+                            (no open/lock is needed).
+
+    @return Operation status.
+      @retval    0  Success.
+      @retval != 0  Error code.
+  */
+  virtual int prepare_for_new_partitions(uint num_partitions,
+                                         bool only_create) = 0;
+  /**
+    Create a new partition to be filled during ALTER TABLE ... PARTITION.
+    @param   table         Table to create the partition in.
+    @param   create_info   Table/partition specific create info.
+    @param   part_name     Partition name.
+    @param   new_part_id   Partition id in new table.
+    @param   part_elem     Partition element.
+
+    @return Operation status.
+      @retval    0  Success.
+      @retval != 0  Error code.
+  */
+  virtual int create_new_partition(TABLE *table,
+                                   HA_CREATE_INFO *create_info,
+                                   const char *part_name,
+                                   uint new_part_id,
+                                   partition_element *part_elem) = 0;
+  /**
+    Close and finalize new partitions.
+  */
+  virtual void close_new_partitions() = 0;
+  /**
+    write row to new partition.
+    @param  new_part   New partition to write to.
+
+    @return Operation status.
+      @retval    0  Success.
+      @retval != 0  Error code.
+  */
+  virtual int write_row_in_new_part(uint new_part) = 0;
+
+  /* Internal helper functions*/
+  /**
+    Update auto increment value if current row contains a higher value.
+  */
+  inline void set_auto_increment_if_higher();
+  /**
+    Common routine to set up index scans.
+
+    Find out which partitions we'll need to read when scanning the specified
+    range.
+
+    If we need to scan only one partition, set m_ordered_scan_ongoing=FALSE
+    as we will not need to do merge ordering.
+
+    @param buf            Buffer to later return record in (this function
+                          needs it to calculate partitioning function values)
+
+    @param idx_read_flag  True <=> m_start_key has range start endpoint which
+                          probably can be used to determine the set of
+                          partitions to scan.
+                          False <=> there is no start endpoint.
+
+    @return Operation status.
+      @retval   0  Success
+      @retval !=0  Error code
+  */
+  int partition_scan_set_up(uchar *buf, bool idx_read_flag);
+  /**
+    Common routine to handle index_next with unordered results.
+
+    These routines are used to scan partitions without considering order.
+    This is performed in two situations.
+    1) In read_multi_range this is the normal case
+    2) When performing any type of index_read, index_first, index_last where
+    all fields in the partition function is bound. In this case the index
+    scan is performed on only one partition and thus it isn't necessary to
+    perform any sort.
+
+    @param[out] buf        Read row in MySQL Row Format.
+    @param[in]  next_same  Called from index_next_same.
+
+    @return Operation status.
+      @retval HA_ERR_END_OF_FILE  End of scan
+      @retval 0                   Success
+      @retval other               Error code
+  */
+  int handle_unordered_next(uchar *buf, bool is_next_same);
+  /**
+    Handle index_next when changing to new partition.
+
+    This routine is used to start the index scan on the next partition.
+    Both initial start and after completing scan on one partition.
+
+    @param[out] buf  Read row in MySQL Row Format
+
+    @return Operation status.
+      @retval HA_ERR_END_OF_FILE  End of scan
+      @retval 0                   Success
+      @retval other               Error code
+  */
+  int handle_unordered_scan_next_partition(uchar *buf);
+  /**
+    Common routine to start index scan with ordered results.
+
+    @param[out] buf  Read row in MySQL Row Format
+
+    @return Operation status
+      @retval HA_ERR_END_OF_FILE    End of scan
+      @retval HA_ERR_KEY_NOT_FOUND  End of scan
+      @retval 0                     Success
+      @retval other                 Error code
+  */
+  int handle_ordered_index_scan(uchar *buf);
+  /**
+    Add index_next/prev results from partitions without exact match.
+
+    If there where any partitions that returned HA_ERR_KEY_NOT_FOUND when
+    ha_index_read_map was done, those partitions must be included in the
+    following index_next/prev call.
+
+    @return Operation status
+      @retval HA_ERR_END_OF_FILE    End of scan
+      @retval 0                     Success
+      @retval other                 Error code
+  */
+  int handle_ordered_index_scan_key_not_found();
+  /**
+    Common routine to handle index_prev with ordered results.
+
+    @param[out] buf  Read row in MySQL Row Format.
+
+    @return Operation status.
+      @retval HA_ERR_END_OF_FILE  End of scan
+      @retval 0                   Success
+      @retval other               Error code
+  */
+  int handle_ordered_prev(uchar *buf);
+  /**
+    Common routine to handle index_next with ordered results.
+
+    @param[out] buf        Read row in MySQL Row Format.
+    @param[in]  next_same  Called from index_next_same.
+
+    @return Operation status.
+      @retval HA_ERR_END_OF_FILE  End of scan
+      @retval 0                   Success
+      @retval other               Error code
+  */
+  int handle_ordered_next(uchar *buf, bool is_next_same);
+  /**
+    Common routine for a number of index_read variants.
+
+    @param[out] buf             Buffer where the record should be returned.
+    @param[in]  have_start_key  TRUE <=> the left endpoint is available, i.e.
+                                we're in index_read call or in read_range_first
+                                call and the range has left endpoint.
+                                FALSE <=> there is no left endpoint (we're in
+                                read_range_first() call and the range has no
+                                left endpoint).
+
+    @return Operation status
+      @retval 0                    OK
+      @retval HA_ERR_END_OF_FILE   Whole index scanned, without finding the record.
+      @retval HA_ERR_KEY_NOT_FOUND Record not found, but index cursor positioned.
+      @retval other                Error code.
+  */
+  int common_index_read(uchar *buf, bool have_start_key);
+  /**
+    Common routine for index_first/index_last.
+
+    @param[out] buf  Read row in MySQL Row Format.
+
+    @return Operation status.
+      @retval    0  Success
+      @retval != 0  Error code
+  */
+  int common_first_last(uchar *buf);
+  /**
+    Return the top record in sort order.
+
+    @param[out] buf  Row returned in MySQL Row Format.
+  */
+  void return_top_record(uchar *buf);
+  /**
+    Copy partitions as part of ALTER TABLE of partitions.
+
+    change_partitions has done all the preparations, now it is time to
+    actually copy the data from the reorganized partitions to the new
+    partitions.
+
+    @param[out] copied   Number of records copied.
+    @param[out] deleted  Number of records deleted.
+
+    @return Operation status
+      @retval  0  Success
+      @retval >0  Error code
+  */
+  virtual int copy_partitions(ulonglong * const copied,
+                              ulonglong * const deleted);
+
+  /**
+    Set table->read_set taking partitioning expressions into account.
+  */
+  void set_partition_read_set();
+
+  /*
+    These could be private as well,
+    but easier to expose them to derived classes to use.
+  */
+protected:
+  /** All internal partitioning data! @{ */
+  /** Tables partitioning info (same as table->part_info) */
+  partition_info *m_part_info;
+  /** Is primary key clustered. */
+  bool m_pkey_is_clustered;
+  /** Cached value of m_part_info->is_sub_partitioned(). */
+  bool m_is_sub_partitioned;
+  /** Partition share for auto_inc handling. */
+  Partition_share *m_part_share;
+  /** Total number of partitions. */
+  uint m_tot_parts;
+  uint m_last_part;                      // Last accessed partition.
+  const uchar *m_err_rec;                // record which gave error.
+  bool m_auto_increment_safe_stmt_log_lock;
+  bool m_auto_increment_lock;
+  part_id_range m_part_spec;             // Which parts to scan
+  uint m_scan_value;                     // Value passed in rnd_init
+                                         // call
+  key_range m_start_key;                 // index read key range
+  enum partition_index_scan_type m_index_scan_type;// What type of index
+                                                   // scan
+  uint m_rec_length;                     // Local copy of record length
+
+  bool m_ordered;                        // Ordered/Unordered index scan.
+  bool m_ordered_scan_ongoing;           // Ordered index scan ongoing.
+  bool m_reverse_order;                  // Scanning in reverse order (prev).
+  /** Row and key buffer for ordered index scan. */
+  uchar *m_ordered_rec_buffer;
+  /** Prio queue used by sorted read. */
+  Prio_queue *m_queue;
+  /** Which partition is to deliver next result. */
+  uint m_top_entry;
+  /** Offset in m_ordered_rec_buffer from part buffer to its record buffer. */
+  uint m_rec_offset;
+  /**
+    Current index used for sorting.
+    If clustered PK exists, then it will be used as secondary index to
+    sort on if the first is equal in key_rec_cmp.
+    So if clustered pk: m_curr_key_info[0]= current index and
+    m_curr_key_info[1]= pk and [2]= NULL.
+    Otherwise [0]= current index, [1]= NULL, and we will
+    sort by rowid as secondary sort key if equal first key.
+  */
+  KEY *m_curr_key_info[3];
+  enum enum_using_ref {
+    /** handler::ref is not copied to the PQ. */
+    REF_NOT_USED= 0,
+    /**
+      handler::ref is copied to the PQ but does not need to be used in sorting.
+    */
+    REF_STORED_IN_PQ,
+    /** handler::ref is copied to the PQ and must be used during sorting. */
+    REF_USED_FOR_SORT};
+  /** How handler::ref is used in the priority queue. */
+  enum_using_ref m_ref_usage;
+  /** Set if previous index_* call returned HA_ERR_KEY_NOT_FOUND. */
+  bool m_key_not_found;
+  /** Partitions that returned HA_ERR_KEY_NOT_FOUND. */
+  MY_BITMAP m_key_not_found_partitions;
+  /** @} */
+};
+#endif /* PARTITION_HANDLER_INCLUDED */
diff --git a/sql/share/errmsg-utf8.txt b/sql/share/errmsg-utf8.txt
index b30240f64c3..0aeaa058cf9 100644
--- a/sql/share/errmsg-utf8.txt
+++ b/sql/share/errmsg-utf8.txt
@@ -7523,8 +7523,17 @@ WARN_VERS_PARAMETERS
 WARN_VERS_PART_ROTATION
         eng "Switching from partition %`s to %`s"
 
+WARN_VERS_TRX_MISSING
+        eng "VTQ missing transaction ID %lu"
+
+WARN_VERS_PART_NON_HISTORICAL
+        eng "Partition %`s contains non-historical data"
+
 ER_VERS_NOT_ALLOWED
         eng "%`s is not allowed for versioned table"
 
 ER_VERS_WRONG_QUERY_TYPE
         eng "%`s works only with %`s query type"
+
+ER_WRONG_TABLESPACE_NAME 42000
+  eng "Incorrect tablespace name `%-.192s`"
diff --git a/sql/sql_partition.cc b/sql/sql_partition.cc
index b358fe3386e..caca441e5e4 100644
--- a/sql/sql_partition.cc
+++ b/sql/sql_partition.cc
@@ -67,6 +67,7 @@
 #include "opt_range.h"                  // store_key_image_to_rec
 #include "sql_alter.h"                  // Alter_table_ctx
 #include "sql_select.h"
+#include "sql_tablespace.h"             // check_tablespace_name
 
 #include <algorithm>
 using std::max;
@@ -3458,7 +3459,10 @@ int vers_get_partition_id(partition_info *part_info,
       {
         table->s->busy_rotation= true;
         mysql_mutex_unlock(&table->s->LOCK_rotation);
-        if (part_info->vers_limit_exceed() || part_info->vers_interval_exceed(sys_trx_end->get_timestamp()))
+        // transaction is not yet pushed to VTQ, so we use now-time
+        my_time_t end_ts= sys_trx_end->table->versioned_by_engine() ?
+          my_time(0) : sys_trx_end->get_timestamp();
+        if (part_info->vers_limit_exceed() || part_info->vers_interval_exceed(end_ts))
         {
           part_info->vers_part_rotate(thd);
         }
@@ -7388,6 +7392,39 @@ err:
 }
 #endif
 
+
+/*
+  Prepare for calling val_int on partition function by setting fields to
+  point to the record where the values of the PF-fields are stored.
+
+  SYNOPSIS
+    set_field_ptr()
+    ptr                 Array of fields to change ptr
+    new_buf             New record pointer
+    old_buf             Old record pointer
+
+  DESCRIPTION
+    Set ptr in field objects of field array to refer to new_buf record
+    instead of previously old_buf. Used before calling val_int and after
+    it is used to restore pointers to table->record[0].
+    This routine is placed outside of partition code since it can be useful
+    also for other programs.
+*/
+
+void set_field_ptr(Field **ptr, const uchar *new_buf,
+                   const uchar *old_buf)
+{
+  my_ptrdiff_t diff= (new_buf - old_buf);
+  DBUG_ENTER("set_field_ptr");
+
+  do
+  {
+    (*ptr)->move_field_offset(diff);
+  } while (*(++ptr));
+  DBUG_VOID_RETURN;
+}
+
+
 /*
   Prepare for calling val_int on partition function by setting fields to
   point to the record where the values of the PF-fields are stored.
@@ -7426,6 +7463,61 @@ void set_key_field_ptr(KEY *key_info, const uchar *new_buf,
 }
 
 
+/**
+  Append all fields in read_set to string
+
+  @param[in,out] str   String to append to.
+  @param[in]     row   Row to append.
+  @param[in]     table Table containing read_set and fields for the row.
+*/
+void append_row_to_str(String &str, const uchar *row, TABLE *table)
+{
+  Field **fields, **field_ptr;
+  const uchar *rec;
+  uint num_fields= bitmap_bits_set(table->read_set);
+  uint curr_field_index= 0;
+  bool is_rec0= !row || row == table->record[0];
+  if (!row)
+    rec= table->record[0];
+  else
+    rec= row;
+
+  /* Create a new array of all read fields. */
+  fields= (Field**) my_malloc(sizeof(void*) * (num_fields + 1),
+                              MYF(0));
+  if (!fields)
+    return;
+  fields[num_fields]= NULL;
+  for (field_ptr= table->field;
+       *field_ptr;
+       field_ptr++)
+  {
+    if (!bitmap_is_set(table->read_set, (*field_ptr)->field_index))
+      continue;
+    fields[curr_field_index++]= *field_ptr;
+  }
+
+
+  if (!is_rec0)
+    set_field_ptr(fields, rec, table->record[0]);
+
+  for (field_ptr= fields;
+       *field_ptr;
+       field_ptr++)
+  {
+    Field *field= *field_ptr;
+    str.append(" ");
+    str.append(field->field_name);
+    str.append(":");
+    field_unpack(&str, field, rec, 0, false);
+  }
+
+  if (!is_rec0)
+    set_field_ptr(fields, table->record[0], rec);
+  my_free(fields);
+}
+
+
 /*
   SYNOPSIS
     mem_alloc_error()
@@ -8595,4 +8687,52 @@ uint get_partition_field_store_length(Field *field)
     store_length+= HA_KEY_BLOB_LENGTH;
   return store_length;
 }
+
+// FIXME: duplicate of ha_partition::set_up_table_before_create
+bool set_up_table_before_create(THD *thd,
+                                TABLE_SHARE *share,
+                                const char *partition_name_with_path,
+                                HA_CREATE_INFO *info,
+                                partition_element *part_elem)
+{
+  bool error= false;
+  const char *partition_name;
+  DBUG_ENTER("set_up_table_before_create");
+
+  DBUG_ASSERT(part_elem);
+
+  if (!part_elem)
+    DBUG_RETURN(true);
+  share->max_rows= part_elem->part_max_rows;
+  share->min_rows= part_elem->part_min_rows;
+  partition_name= strrchr(partition_name_with_path, FN_LIBCHAR);
+  if ((part_elem->index_file_name &&
+      (error= append_file_to_dir(thd,
+                                 const_cast<const char**>(&part_elem->index_file_name),
+                                 partition_name+1))) ||
+      (part_elem->data_file_name &&
+      (error= append_file_to_dir(thd,
+                                 const_cast<const char**>(&part_elem->data_file_name),
+                                 partition_name+1))))
+  {
+    DBUG_RETURN(error);
+  }
+  if (part_elem->index_file_name != NULL)
+  {
+    info->index_file_name= part_elem->index_file_name;
+  }
+  if (part_elem->data_file_name != NULL)
+  {
+    info->data_file_name= part_elem->data_file_name;
+  }
+  if (part_elem->tablespace_name != NULL)
+  {
+    if (check_tablespace_name(part_elem->tablespace_name) != IDENT_NAME_OK)
+    {
+	    DBUG_RETURN(true);
+    }
+    info->tablespace= part_elem->tablespace_name;
+  }
+  DBUG_RETURN(error);
+}
 #endif
diff --git a/sql/sql_partition.h b/sql/sql_partition.h
index c2665a8366b..aef4a6ce5e1 100644
--- a/sql/sql_partition.h
+++ b/sql/sql_partition.h
@@ -40,6 +40,7 @@ typedef struct st_key_range key_range;
 #define HA_CAN_UPDATE_PARTITION_KEY (1 << 1)
 #define HA_CAN_PARTITION_UNIQUE (1 << 2)
 #define HA_USE_AUTO_PARTITION (1 << 3)
+#define HA_ONLY_VERS_PARTITION (1 << 4)
 
 #define NORMAL_PART_NAME 0
 #define TEMP_PART_NAME 1
@@ -127,6 +128,14 @@ uint32 get_partition_id_range_for_endpoint(partition_info *part_info,
 bool check_part_func_fields(Field **ptr, bool ok_with_charsets);
 bool field_is_partition_charset(Field *field);
 Item* convert_charset_partition_constant(Item *item, CHARSET_INFO *cs);
+/**
+  Append all fields in read_set to string
+
+  @param[in,out] str   String to append to.
+  @param[in]     row   Row to append.
+  @param[in]     table Table containing read_set and fields for the row.
+*/
+void append_row_to_str(String &str, const uchar *row, TABLE *table);
 void mem_alloc_error(size_t size);
 void truncate_partition_filename(char *path);
 
@@ -291,6 +300,31 @@ void create_subpartition_name(char *out, const char *in1,
 void set_key_field_ptr(KEY *key_info, const uchar *new_buf,
                        const uchar *old_buf);
 
+/** Set up table for creating a partition.
+Copy info from partition to the table share so the created partition
+has the correct info.
+  @param thd               THD object
+  @param share             Table share to be updated.
+  @param info              Create info to be updated.
+  @param part_elem         partition_element containing the info.
+
+  @return    status
+    @retval  TRUE  Error
+    @retval  FALSE Success
+
+  @details
+    Set up
+    1) Comment on partition
+    2) MAX_ROWS, MIN_ROWS on partition
+    3) Index file name on partition
+    4) Data file name on partition
+*/
+bool set_up_table_before_create(THD *thd,
+                                TABLE_SHARE *share,
+                                const char *partition_name_with_path,
+                                HA_CREATE_INFO *info,
+                                partition_element *part_elem);
+
 extern const LEX_STRING partition_keywords[];
 
 #endif /* SQL_PARTITION_INCLUDED */
diff --git a/sql/sql_table.cc b/sql/sql_table.cc
index b5cf35ed17c..3a921e0dc79 100644
--- a/sql/sql_table.cc
+++ b/sql/sql_table.cc
@@ -4492,7 +4492,10 @@ handler *mysql_create_frm_image(THD *thd,
     part_info->part_info_string= part_syntax_buf;
     part_info->part_info_len= syntax_len;
     if ((!(engine_type->partition_flags &&
-           engine_type->partition_flags() & HA_CAN_PARTITION)) ||
+           ((engine_type->partition_flags() & HA_CAN_PARTITION) ||
+            (part_info->part_type == VERSIONING_PARTITION &&
+            engine_type->partition_flags() & HA_ONLY_VERS_PARTITION))
+          )) ||
         create_info->db_type == partition_hton)
     {
       /*
diff --git a/sql/sql_tablespace.cc b/sql/sql_tablespace.cc
index 8b9e14e5a18..318be320640 100644
--- a/sql/sql_tablespace.cc
+++ b/sql/sql_tablespace.cc
@@ -22,6 +22,70 @@
 #include "sql_table.h"                          // write_bin_log
 #include "sql_class.h"                          // THD
 
+/**
+  Check if tablespace name is valid
+
+  @param tablespace_name        Name of the tablespace
+
+  @note Tablespace names are not reflected in the file system, so
+        character case conversion or consideration is not relevant.
+
+  @note Checking for path characters or ending space is not done.
+        The only checks are for identifier length, both in terms of
+        number of characters and number of bytes.
+
+  @retval  IDENT_NAME_OK        Identifier name is ok (Success)
+  @retval  IDENT_NAME_WRONG     Identifier name is wrong, if length == 0
+*                               (ER_WRONG_TABLESPACE_NAME)
+  @retval  IDENT_NAME_TOO_LONG  Identifier name is too long if it is greater
+                                than 64 characters (ER_TOO_LONG_IDENT)
+
+  @note In case of IDENT_NAME_TOO_LONG or IDENT_NAME_WRONG, the function
+        reports an error (using my_error()).
+*/
+
+enum_ident_name_check check_tablespace_name(const char *tablespace_name)
+{
+  size_t name_length= 0;                       //< Length as number of bytes
+  size_t name_length_symbols= 0;               //< Length as number of symbols
+
+  // Name must be != NULL and length must be > 0
+  if (!tablespace_name || (name_length= strlen(tablespace_name)) == 0)
+  {
+    my_error(ER_WRONG_TABLESPACE_NAME, MYF(0), tablespace_name);
+    return IDENT_NAME_WRONG;
+  }
+
+  // If we do not have too many bytes, we must check the number of symbols,
+  // provided the system character set may use more than one byte per symbol.
+  if (name_length <= NAME_LEN && use_mb(system_charset_info))
+  {
+    const char *name= tablespace_name;   //< The actual tablespace name
+    const char *end= name + name_length; //< Pointer to first byte after name
+
+    // Loop over all symbols as long as we don't have too many already
+    while (name != end && name_length_symbols <= NAME_CHAR_LEN)
+    {
+      int len= my_ismbchar(system_charset_info, name, end);
+      if (len)
+        name += len;
+      else
+        name++;
+
+      name_length_symbols++;
+    }
+  }
+
+  if (name_length_symbols > NAME_CHAR_LEN || name_length > NAME_LEN)
+  {
+    my_error(ER_TOO_LONG_IDENT, MYF(0), tablespace_name);
+    return IDENT_NAME_TOO_LONG;
+  }
+
+  return IDENT_NAME_OK;
+}
+
+
 int mysql_alter_tablespace(THD *thd, st_alter_tablespace *ts_info)
 {
   int error= HA_ADMIN_NOT_IMPLEMENTED;
diff --git a/sql/sql_tablespace.h b/sql/sql_tablespace.h
index ae77d15cbcb..b97c64f7965 100644
--- a/sql/sql_tablespace.h
+++ b/sql/sql_tablespace.h
@@ -19,6 +19,41 @@
 class THD;
 class st_alter_tablespace;
 
+/**
+ Enumerate possible status of a identifier name while determining
+ its validity
+*/
+enum enum_ident_name_check
+{
+  IDENT_NAME_OK,
+  IDENT_NAME_WRONG,
+  IDENT_NAME_TOO_LONG
+};
+
+/**
+  Check if tablespace name is valid
+
+  @param tablespace_name        Name of the tablespace
+
+  @note Tablespace names are not reflected in the file system, so
+        character case conversion or consideration is not relevant.
+
+  @note Checking for path characters or ending space is not done.
+        The only checks are for identifier length, both in terms of
+        number of characters and number of bytes.
+
+  @retval  IDENT_NAME_OK        Identifier name is ok (Success)
+  @retval  IDENT_NAME_WRONG     Identifier name is wrong, if length == 0
+                                (ER_WRONG_TABLESPACE_NAME)
+  @retval  IDENT_NAME_TOO_LONG  Identifier name is too long if it is greater
+                                than 64 characters (ER_TOO_LONG_IDENT)
+
+  @note In case of IDENT_NAME_TOO_LONG or IDENT_NAME_WRONG, the function
+        reports an error (using my_error()).
+*/
+
+enum_ident_name_check check_tablespace_name(const char *tablespace_name);
+
 int mysql_alter_tablespace(THD* thd, st_alter_tablespace *ts_info);
 
 #endif /* SQL_TABLESPACE_INCLUDED */
diff --git a/sql/table.cc b/sql/table.cc
index 354658ba476..b256b3e91b6 100644
--- a/sql/table.cc
+++ b/sql/table.cc
@@ -3261,6 +3261,20 @@ enum open_frm_error open_table_from_share(THD *thd, TABLE_SHARE *share,
     }
     outparam->part_info->is_auto_partitioned= share->auto_partitioned;
     DBUG_PRINT("info", ("autopartitioned: %u", share->auto_partitioned));
+    if (outparam->part_info->part_type == VERSIONING_PARTITION &&
+      share->db_type()->vers_upgrade_handler)
+    {
+      outparam->file= share->db_type()->vers_upgrade_handler(
+        outparam->file, &outparam->mem_root);
+      if (!outparam->file)
+      {
+        thd->stmt_arena= backup_stmt_arena_ptr;
+        thd->restore_active_arena(&part_func_arena, &backup_arena);
+        my_error(ER_OUTOFMEMORY, MYF(0), 4095);
+        error_reported= TRUE;
+        goto err;
+      }
+    }
     /* 
       We should perform the fix_partition_func in either local or
       caller's arena depending on work_part_info_used value.
author	Aleksey Midenkov <midenok@gmail.com>	2017-02-20 10:06:58 +0300
committer	Aleksey Midenkov <midenok@gmail.com>	2017-05-05 20:36:28 +0300
commit	fc7da4dd4f1e2b9b78b292f20d8fe61f1e9a1d11 (patch)
tree	72d251bf8b315752eed47a093630c658c5b85282 /sql
parent	fb801289f314bee6e5b1864f3ef58f8f38a59278 (diff)
download	mariadb-git-fc7da4dd4f1e2b9b78b292f20d8fe61f1e9a1d11.tar.gz