summaryrefslogtreecommitdiff
path: root/sql
diff options
context:
space:
mode:
Diffstat (limited to 'sql')
-rw-r--r--sql/Makefile.am14
-rw-r--r--sql/examples/ha_tina.cc19
-rw-r--r--sql/field.cc92
-rw-r--r--sql/field.h35
-rw-r--r--sql/ha_berkeley.cc18
-rw-r--r--sql/ha_berkeley.h5
-rw-r--r--sql/ha_federated.cc31
-rw-r--r--sql/ha_federated.h2
-rw-r--r--sql/ha_heap.cc12
-rw-r--r--sql/ha_heap.h1
-rw-r--r--sql/ha_innodb.cc72
-rw-r--r--sql/ha_innodb.h14
-rw-r--r--sql/ha_myisam.cc26
-rw-r--r--sql/ha_myisam.h1
-rw-r--r--sql/ha_myisammrg.cc13
-rw-r--r--sql/ha_myisammrg.h1
-rw-r--r--sql/ha_ndbcluster.cc712
-rw-r--r--sql/ha_ndbcluster.h42
-rw-r--r--sql/ha_partition.cc3240
-rw-r--r--sql/ha_partition.h922
-rw-r--r--sql/handler.cc201
-rw-r--r--sql/handler.h527
-rw-r--r--sql/item.cc17
-rw-r--r--sql/item_subselect.cc4
-rw-r--r--sql/key.cc83
-rw-r--r--sql/lex.h16
-rw-r--r--sql/lock.cc3
-rw-r--r--sql/log.cc6
-rw-r--r--sql/log_event.cc11
-rw-r--r--sql/mysql_priv.h36
-rw-r--r--sql/mysqld.cc107
-rw-r--r--sql/opt_range.cc76
-rw-r--r--sql/opt_sum.cc4
-rw-r--r--sql/records.cc68
-rw-r--r--sql/repl_failsafe.cc9
-rw-r--r--sql/rpl_filter.cc539
-rw-r--r--sql/rpl_filter.h113
-rw-r--r--sql/set_var.cc5
-rw-r--r--sql/set_var.h1
-rw-r--r--sql/share/errmsg.txt119
-rw-r--r--sql/slave.cc407
-rw-r--r--sql/slave.h37
-rw-r--r--sql/sp.cc4
-rw-r--r--sql/sql_acl.cc52
-rw-r--r--sql/sql_acl.h3
-rw-r--r--sql/sql_base.cc61
-rw-r--r--sql/sql_bitmap.h8
-rw-r--r--sql/sql_cache.cc2
-rw-r--r--sql/sql_class.h21
-rw-r--r--sql/sql_delete.cc26
-rw-r--r--sql/sql_handler.cc6
-rw-r--r--sql/sql_help.cc4
-rw-r--r--sql/sql_insert.cc25
-rw-r--r--sql/sql_lex.cc1
-rw-r--r--sql/sql_lex.h18
-rw-r--r--sql/sql_load.cc17
-rw-r--r--sql/sql_parse.cc24
-rw-r--r--sql/sql_partition.cc3219
-rw-r--r--sql/sql_prepare.cc2
-rw-r--r--sql/sql_repl.cc5
-rw-r--r--sql/sql_repl.h1
-rw-r--r--sql/sql_select.cc93
-rw-r--r--sql/sql_select.h1
-rw-r--r--sql/sql_show.cc34
-rw-r--r--sql/sql_table.cc943
-rw-r--r--sql/sql_udf.cc2
-rw-r--r--sql/sql_update.cc133
-rw-r--r--sql/sql_yacc.yy620
-rw-r--r--sql/table.cc47
-rw-r--r--sql/table.h7
-rw-r--r--sql/tztime.cc10
-rw-r--r--sql/unireg.cc57
-rw-r--r--sql/unireg.h1
73 files changed, 11945 insertions, 1163 deletions
diff --git a/sql/Makefile.am b/sql/Makefile.am
index 4824a75d6fa..e90be7630fa 100644
--- a/sql/Makefile.am
+++ b/sql/Makefile.am
@@ -29,9 +29,9 @@ libexec_PROGRAMS = mysqld
noinst_PROGRAMS = gen_lex_hash
bin_PROGRAMS = mysql_tzinfo_to_sql
gen_lex_hash_LDFLAGS = @NOINST_LDFLAGS@
-LDADD = $(top_builddir)/myisam/libmyisam.a \
- $(top_builddir)/myisammrg/libmyisammrg.a \
- $(top_builddir)/heap/libheap.a \
+LDADD = $(top_builddir)/storage/myisam/libmyisam.a \
+ $(top_builddir)/storage/myisammrg/libmyisammrg.a \
+ $(top_builddir)/storage/heap/libheap.a \
$(top_builddir)/vio/libvio.a \
$(top_builddir)/mysys/libmysys.a \
$(top_builddir)/dbug/libdbug.a \
@@ -55,7 +55,7 @@ noinst_HEADERS = item.h item_func.h item_sum.h item_cmpfunc.h \
ha_ndbcluster.h opt_range.h protocol.h \
sql_select.h structs.h table.h sql_udf.h hash_filo.h\
lex.h lex_symbol.h sql_acl.h sql_crypt.h \
- log_event.h sql_repl.h slave.h \
+ log_event.h sql_repl.h slave.h rpl_filter.h \
stacktrace.h sql_sort.h sql_cache.h set_var.h \
spatial.h gstream.h client_settings.h tzfile.h \
tztime.h my_decimal.h\
@@ -64,7 +64,7 @@ noinst_HEADERS = item.h item_func.h item_sum.h item_cmpfunc.h \
sql_array.h \
examples/ha_example.h examples/ha_archive.h \
examples/ha_tina.h ha_blackhole.h \
- ha_federated.h
+ ha_federated.h ha_partition.h
mysqld_SOURCES = sql_lex.cc sql_handler.cc \
item.cc item_sum.cc item_buff.cc item_func.cc \
item_cmpfunc.cc item_strfunc.cc item_timefunc.cc \
@@ -90,7 +90,8 @@ mysqld_SOURCES = sql_lex.cc sql_handler.cc \
sql_db.cc sql_table.cc sql_rename.cc sql_crypt.cc \
sql_load.cc mf_iocache.cc field_conv.cc sql_show.cc \
sql_udf.cc sql_analyse.cc sql_analyse.h sql_cache.cc \
- slave.cc sql_repl.cc sql_union.cc sql_derived.cc \
+ slave.cc sql_repl.cc rpl_filter.cc \
+ sql_union.cc sql_derived.cc \
client.c sql_client.cc mini_client_errors.c pack.c\
stacktrace.c repl_failsafe.h repl_failsafe.cc \
sql_olap.cc sql_view.cc \
@@ -100,6 +101,7 @@ mysqld_SOURCES = sql_lex.cc sql_handler.cc \
sp_cache.cc parse_file.cc sql_trigger.cc \
examples/ha_example.cc examples/ha_archive.cc \
examples/ha_tina.cc ha_blackhole.cc \
+ ha_partition.cc sql_partition.cc \
ha_federated.cc
gen_lex_hash_SOURCES = gen_lex_hash.cc
diff --git a/sql/examples/ha_tina.cc b/sql/examples/ha_tina.cc
index 5c3cbdcf2ca..3a9302483b4 100644
--- a/sql/examples/ha_tina.cc
+++ b/sql/examples/ha_tina.cc
@@ -99,7 +99,8 @@ static byte* tina_get_key(TINA_SHARE *share,uint *length,
int get_mmap(TINA_SHARE *share, int write)
{
DBUG_ENTER("ha_tina::get_mmap");
- if (share->mapped_file && munmap(share->mapped_file, share->file_stat.st_size))
+ if (share->mapped_file && my_munmap(share->mapped_file,
+ share->file_stat.st_size))
DBUG_RETURN(1);
if (my_fstat(share->data_file, &share->file_stat, MYF(MY_WME)) == -1)
@@ -108,13 +109,13 @@ int get_mmap(TINA_SHARE *share, int write)
if (share->file_stat.st_size)
{
if (write)
- share->mapped_file= (byte *)mmap(NULL, share->file_stat.st_size,
- PROT_READ|PROT_WRITE, MAP_SHARED,
- share->data_file, 0);
+ share->mapped_file= (byte *)my_mmap(NULL, share->file_stat.st_size,
+ PROT_READ|PROT_WRITE, MAP_SHARED,
+ share->data_file, 0);
else
- share->mapped_file= (byte *)mmap(NULL, share->file_stat.st_size,
- PROT_READ, MAP_PRIVATE,
- share->data_file, 0);
+ share->mapped_file= (byte *)my_mmap(NULL, share->file_stat.st_size,
+ PROT_READ, MAP_PRIVATE,
+ share->data_file, 0);
if ((share->mapped_file ==(caddr_t)-1))
{
/*
@@ -222,7 +223,7 @@ static int free_share(TINA_SHARE *share)
if (!--share->use_count){
/* Drop the mapped file */
if (share->mapped_file)
- munmap(share->mapped_file, share->file_stat.st_size);
+ my_munmap(share->mapped_file, share->file_stat.st_size);
result_code= my_close(share->data_file,MYF(0));
hash_delete(&tina_open_tables, (byte*) share);
thr_lock_delete(&share->lock);
@@ -797,7 +798,7 @@ int ha_tina::rnd_end()
if (my_chsize(share->data_file, length, 0, MYF(MY_WME)))
DBUG_RETURN(-1);
- if (munmap(share->mapped_file, length))
+ if (my_munmap(share->mapped_file, length))
DBUG_RETURN(-1);
/* We set it to null so that get_mmap() won't try to unmap it */
diff --git a/sql/field.cc b/sql/field.cc
index 224b6c279f3..270214d4350 100644
--- a/sql/field.cc
+++ b/sql/field.cc
@@ -67,6 +67,7 @@ inline int field_type2index (enum_field_types field_type)
((int)FIELDTYPE_TEAR_FROM) + (field_type - FIELDTYPE_TEAR_TO) - 1);
}
+
static enum_field_types field_types_merge_rules [FIELDTYPE_NUM][FIELDTYPE_NUM]=
{
/* MYSQL_TYPE_DECIMAL -> */
@@ -1233,6 +1234,7 @@ Field::Field(char *ptr_arg,uint32 length_arg,uchar *null_ptr_arg,
flags=null_ptr ? 0: NOT_NULL_FLAG;
comment.str= (char*) "";
comment.length=0;
+ fieldnr= 0;
}
uint Field::offset()
@@ -5905,6 +5907,26 @@ int Field_str::store(double nr)
}
+uint Field::is_equal(create_field *new_field)
+{
+ return (new_field->sql_type == type());
+}
+
+
+uint Field_str::is_equal(create_field *new_field)
+{
+ if (((new_field->flags & (BINCMP_FLAG | BINARY_FLAG)) &&
+ !(flags & (BINCMP_FLAG | BINARY_FLAG))) ||
+ (!(new_field->flags & (BINCMP_FLAG | BINARY_FLAG)) &&
+ (flags & (BINCMP_FLAG | BINARY_FLAG))))
+ return 0; /* One of the fields is binary and the other one isn't */
+
+ return ((new_field->sql_type == type()) &&
+ new_field->charset == field_charset &&
+ new_field->length == max_length());
+}
+
+
int Field_string::store(longlong nr)
{
char buff[64];
@@ -6283,7 +6305,8 @@ my_decimal *Field_varstring::val_decimal(my_decimal *decimal_value)
}
-int Field_varstring::cmp(const char *a_ptr, const char *b_ptr)
+int Field_varstring::cmp_max(const char *a_ptr, const char *b_ptr,
+ uint max_len)
{
uint a_length, b_length;
int diff;
@@ -6298,6 +6321,8 @@ int Field_varstring::cmp(const char *a_ptr, const char *b_ptr)
a_length= uint2korr(a_ptr);
b_length= uint2korr(b_ptr);
}
+ set_if_smaller(a_length, max_len);
+ set_if_smaller(b_length, max_len);
diff= field_charset->coll->strnncollsp(field_charset,
(const uchar*) a_ptr+
length_bytes,
@@ -6661,6 +6686,22 @@ Field *Field_varstring::new_key_field(MEM_ROOT *root,
}
+uint Field_varstring::is_equal(create_field *new_field)
+{
+ if (new_field->sql_type == type() &&
+ new_field->charset == field_charset)
+ {
+ if (new_field->length == max_length())
+ return IS_EQUAL_YES;
+ if (new_field->length > max_length() &&
+ ((new_field->length <= 255 && max_length() <= 255) ||
+ (new_field->length > 255 && max_length() > 255)))
+ return IS_EQUAL_PACK_LENGTH; // VARCHAR, longer variable length
+ }
+ return IS_EQUAL_NO;
+}
+
+
/****************************************************************************
** blob type
** A blob is saved as a length and a pointer. The length is stored in the
@@ -6928,13 +6969,16 @@ int Field_blob::cmp(const char *a,uint32 a_length, const char *b,
}
-int Field_blob::cmp(const char *a_ptr, const char *b_ptr)
+int Field_blob::cmp_max(const char *a_ptr, const char *b_ptr,
+ uint max_length)
{
char *blob1,*blob2;
memcpy_fixed(&blob1,a_ptr+packlength,sizeof(char*));
memcpy_fixed(&blob2,b_ptr+packlength,sizeof(char*));
- return Field_blob::cmp(blob1,get_length(a_ptr),
- blob2,get_length(b_ptr));
+ uint a_len= get_length(a_ptr), b_len= get_length(b_ptr);
+ set_if_smaller(a_len, max_length);
+ set_if_smaller(b_len, max_length);
+ return Field_blob::cmp(blob1,a_len,blob2,b_len);
}
@@ -7767,6 +7811,17 @@ bool Field_num::eq_def(Field *field)
}
+uint Field_num::is_equal(create_field *new_field)
+{
+ return ((new_field->sql_type == type()) &&
+ ((new_field->flags & UNSIGNED_FLAG) == (uint) (flags &
+ UNSIGNED_FLAG)) &&
+ ((new_field->flags & AUTO_INCREMENT_FLAG) ==
+ (uint) (flags & AUTO_INCREMENT_FLAG)) &&
+ (new_field->length >= max_length()));
+}
+
+
/*
Bit field.
@@ -7954,6 +8009,35 @@ my_decimal *Field_bit::val_decimal(my_decimal *deciaml_value)
}
+/*
+ Compare two bit fields using pointers within the record.
+ SYNOPSIS
+ cmp_max()
+ a Pointer to field->ptr in first record
+ b Pointer to field->ptr in second record
+ max_len Maximum length used in index
+ DESCRIPTION
+ This method is used from key_rec_cmp used by merge sorts used
+ by partitioned index read and later other similar places.
+ The a and b pointer must be pointers to the field in a record
+ (not the table->record[0] necessarily)
+*/
+int Field_bit::cmp_max(const char *a, const char *b, uint max_len)
+{
+ my_ptrdiff_t a_diff= a - ptr;
+ my_ptrdiff_t b_diff= b - ptr;
+ if (bit_len)
+ {
+ int flag;
+ uchar bits_a= get_rec_bits(bit_ptr+a_diff, bit_ofs, bit_len);
+ uchar bits_b= get_rec_bits(bit_ptr+b_diff, bit_ofs, bit_len);
+ if ((flag= (int) (bits_a - bits_b)))
+ return flag;
+ }
+ return memcmp(a, b, field_length);
+}
+
+
int Field_bit::key_cmp(const byte *str, uint length)
{
if (bit_len)
diff --git a/sql/field.h b/sql/field.h
index 2b67ed3f599..c31d70dd651 100644
--- a/sql/field.h
+++ b/sql/field.h
@@ -29,6 +29,7 @@
class Send_field;
class Protocol;
+class create_field;
struct st_cache_field;
void field_conv(Field *to,Field *from);
@@ -87,7 +88,11 @@ public:
utype unireg_check;
uint32 field_length; // Length of field
uint field_index; // field number in fields array
- uint16 flags;
+ uint32 flags;
+ /* fieldnr is the id of the field (first field = 1) as is also
+ used in key_part.
+ */
+ uint16 fieldnr;
uchar null_bit; // Bit used to test null bit
Field(char *ptr_arg,uint32 length_arg,uchar *null_ptr_arg,uchar null_bit_arg,
@@ -150,6 +155,8 @@ public:
virtual enum_field_types type() const =0;
virtual enum_field_types real_type() const { return type(); }
inline int cmp(const char *str) { return cmp(ptr,str); }
+ virtual int cmp_max(const char *a, const char *b, uint max_len)
+ { return cmp(a, b); }
virtual int cmp(const char *,const char *)=0;
virtual int cmp_binary(const char *a,const char *b, uint32 max_length=~0L)
{ return memcmp(a,b,pack_length()); }
@@ -179,6 +186,12 @@ public:
return test(record[(uint) (null_ptr - (uchar*) table->record[0])] &
null_bit);
}
+ inline bool is_null_in_record_with_offset(my_ptrdiff_t offset)
+ {
+ if (!null_ptr)
+ return 0;
+ return test(null_ptr[offset] & null_bit);
+ }
inline void set_null(int row_offset=0)
{ if (null_ptr) null_ptr[row_offset]|= null_bit; }
inline void set_notnull(int row_offset=0)
@@ -303,6 +316,8 @@ public:
int warn_if_overflow(int op_result);
/* maximum possible display length */
virtual uint32 max_length()= 0;
+
+ virtual uint is_equal(create_field *new_field);
/* convert decimal to longlong with overflow check */
longlong convert_decimal2longlong(const my_decimal *val, bool unsigned_flag,
int *err);
@@ -343,6 +358,7 @@ public:
bool eq_def(Field *field);
int store_decimal(const my_decimal *);
my_decimal *val_decimal(my_decimal *);
+ uint is_equal(create_field *new_field);
};
@@ -367,6 +383,7 @@ public:
uint32 max_length() { return field_length; }
friend class create_field;
my_decimal *val_decimal(my_decimal *);
+ uint is_equal(create_field *new_field);
};
@@ -1055,7 +1072,11 @@ public:
longlong val_int(void);
String *val_str(String*,String *);
my_decimal *val_decimal(my_decimal *);
- int cmp(const char *,const char*);
+ int cmp_max(const char *, const char *, uint max_length);
+ int cmp(const char *a,const char*b)
+ {
+ return cmp_max(a, b, ~0);
+ }
void sort_string(char *buff,uint length);
void get_key_image(char *buff,uint length, imagetype type);
void set_key_image(char *buff,uint length);
@@ -1081,6 +1102,7 @@ public:
Field *new_key_field(MEM_ROOT *root, struct st_table *new_table,
char *new_ptr, uchar *new_null_ptr,
uint new_null_bit);
+ uint is_equal(create_field *new_field);
};
@@ -1111,7 +1133,9 @@ public:
longlong val_int(void);
String *val_str(String*,String *);
my_decimal *val_decimal(my_decimal *);
- int cmp(const char *,const char*);
+ int cmp_max(const char *, const char *, uint max_length);
+ int cmp(const char *a,const char*b)
+ { return cmp_max(a, b, ~0); }
int cmp(const char *a, uint32 a_length, const char *b, uint32 b_length);
int cmp_binary(const char *a,const char *b, uint32 max_length=~0L);
int key_cmp(const byte *,const byte*);
@@ -1135,6 +1159,10 @@ public:
{
memcpy_fixed(str,ptr+packlength,sizeof(char*));
}
+ inline void get_ptr(char **str, uint row_offset)
+ {
+ memcpy_fixed(str,ptr+packlength+row_offset,sizeof(char*));
+ }
inline void set_ptr(char *length,char *data)
{
memcpy(ptr,length,packlength);
@@ -1304,6 +1332,7 @@ public:
my_decimal *val_decimal(my_decimal *);
int cmp(const char *a, const char *b)
{ return cmp_binary(a, b); }
+ int cmp_max(const char *a, const char *b, uint max_length);
int key_cmp(const byte *a, const byte *b)
{ return cmp_binary((char *) a, (char *) b); }
int key_cmp(const byte *str, uint length);
diff --git a/sql/ha_berkeley.cc b/sql/ha_berkeley.cc
index b8a779c08cf..9a0e0ed1488 100644
--- a/sql/ha_berkeley.cc
+++ b/sql/ha_berkeley.cc
@@ -1367,7 +1367,7 @@ int ha_berkeley::delete_row(const byte * record)
}
-int ha_berkeley::index_init(uint keynr)
+int ha_berkeley::index_init(uint keynr, bool sorted)
{
int error;
DBUG_ENTER("ha_berkeley::index_init");
@@ -1645,7 +1645,7 @@ int ha_berkeley::rnd_init(bool scan)
{
DBUG_ENTER("rnd_init");
current_row.flags=DB_DBT_REALLOC;
- DBUG_RETURN(index_init(primary_key));
+ DBUG_RETURN(index_init(primary_key, 0));
}
int ha_berkeley::rnd_end()
@@ -2153,7 +2153,7 @@ ulonglong ha_berkeley::get_auto_increment()
(void) ha_berkeley::extra(HA_EXTRA_KEYREAD);
/* Set 'active_index' */
- ha_berkeley::index_init(table->s->next_number_index);
+ ha_berkeley::index_init(table->s->next_number_index, 0);
if (!table->s->next_number_key_offset)
{ // Autoincrement at key-start
@@ -2492,7 +2492,7 @@ void ha_berkeley::get_status()
if (!(share->status & STATUS_PRIMARY_KEY_INIT))
{
(void) extra(HA_EXTRA_KEYREAD);
- index_init(primary_key);
+ index_init(primary_key, 0);
if (!index_last(table->record[1]))
share->auto_ident=uint5korr(current_ident);
index_end();
@@ -2645,4 +2645,14 @@ int ha_berkeley::cmp_ref(const byte *ref1, const byte *ref2)
return 0;
}
+
+bool ha_berkeley::check_if_incompatible_data(HA_CREATE_INFO *info,
+ uint table_changes)
+{
+ if (table_changes < IS_EQUAL_YES)
+ return COMPATIBLE_DATA_NO;
+ return COMPATIBLE_DATA_YES;
+}
+
+
#endif /* HAVE_BERKELEY_DB */
diff --git a/sql/ha_berkeley.h b/sql/ha_berkeley.h
index 282641e3f25..aab76accefa 100644
--- a/sql/ha_berkeley.h
+++ b/sql/ha_berkeley.h
@@ -92,7 +92,7 @@ class ha_berkeley: public handler
const char **bas_ext() const;
ulong table_flags(void) const { return int_table_flags; }
uint max_supported_keys() const { return MAX_KEY-1; }
- uint extra_rec_buf_length() { return BDB_HIDDEN_PRIMARY_KEY_LENGTH; }
+ uint extra_rec_buf_length() const { return BDB_HIDDEN_PRIMARY_KEY_LENGTH; }
ha_rows estimate_rows_upper_bound();
const key_map *keys_to_use_for_scanning() { return &key_map_full; }
bool has_transactions() { return 1;}
@@ -103,7 +103,7 @@ class ha_berkeley: public handler
int write_row(byte * buf);
int update_row(const byte * old_data, byte * new_data);
int delete_row(const byte * buf);
- int index_init(uint index);
+ int index_init(uint index, bool sorted);
int index_end();
int index_read(byte * buf, const byte * key,
uint key_len, enum ha_rkey_function find_flag);
@@ -151,6 +151,7 @@ class ha_berkeley: public handler
uint8 table_cache_type() { return HA_CACHE_TBL_TRANSACT; }
bool primary_key_is_clustered() { return true; }
int cmp_ref(const byte *ref1, const byte *ref2);
+ bool check_if_incompatible_data(HA_CREATE_INFO *info, uint table_changes);
};
extern bool berkeley_shared_data;
diff --git a/sql/ha_federated.cc b/sql/ha_federated.cc
index 96cb81fe3ec..87525edd4c9 100644
--- a/sql/ha_federated.cc
+++ b/sql/ha_federated.cc
@@ -1546,9 +1546,6 @@ inline uint field_in_record_is_null(TABLE *table,
int ha_federated::write_row(byte *buf)
{
bool has_fields= FALSE;
- uint all_fields_have_same_query_id= 1;
- ulong current_query_id= 1;
- ulong tmp_query_id= 1;
char insert_buffer[FEDERATED_QUERY_BUFFER_SIZE];
char values_buffer[FEDERATED_QUERY_BUFFER_SIZE];
char insert_field_value_buffer[STRING_BUFFER_USUAL_SIZE];
@@ -1577,14 +1574,6 @@ int ha_federated::write_row(byte *buf)
table->timestamp_field->set_time();
/*
- get the current query id - the fields that we add to the insert
- statement to send to the foreign will not be appended unless they match
- this query id
- */
- current_query_id= table->in_use->query_id;
- DBUG_PRINT("info", ("current query id %d", current_query_id));
-
- /*
start both our field and field values strings
*/
insert_string.append(FEDERATED_INSERT);
@@ -1597,21 +1586,8 @@ int ha_federated::write_row(byte *buf)
values_string.append(FEDERATED_OPENPAREN);
/*
- Even if one field is different, all_fields_same_query_id can't remain
- 0 if it remains 0, then that means no fields were specified in the query
- such as in the case of INSERT INTO table VALUES (val1, val2, valN)
-
- */
- for (field= table->field; *field; field++)
- {
- if (field > table->field && tmp_query_id != (*field)->query_id)
- all_fields_have_same_query_id= 0;
-
- tmp_query_id= (*field)->query_id;
- }
- /*
loop through the field pointer array, add any fields to both the values
- list and the fields list that match the current query id
+ list and the fields list that is part of the write set
You might ask "Why an index variable (has_fields) ?" My answer is that
we need to count how many fields we actually need
@@ -1619,8 +1595,7 @@ int ha_federated::write_row(byte *buf)
for (field= table->field; *field; field++)
{
/* if there is a query id and if it's equal to the current query id */
- if (((*field)->query_id && (*field)->query_id == current_query_id)
- || all_fields_have_same_query_id)
+ if (ha_get_bit_in_write_set((*field)->fieldnr))
{
/*
There are some fields. This will be used later to determine
@@ -2086,7 +2061,7 @@ error:
}
/* Initialized at each key walk (called multiple times unlike rnd_init()) */
-int ha_federated::index_init(uint keynr)
+int ha_federated::index_init(uint keynr, bool sorted)
{
int error;
DBUG_ENTER("ha_federated::index_init");
diff --git a/sql/ha_federated.h b/sql/ha_federated.h
index f75fa21b1d6..c94a28219ae 100644
--- a/sql/ha_federated.h
+++ b/sql/ha_federated.h
@@ -248,7 +248,7 @@ public:
int write_row(byte *buf);
int update_row(const byte *old_data, byte *new_data);
int delete_row(const byte *buf);
- int index_init(uint keynr);
+ int index_init(uint keynr, bool sorted);
int index_read(byte *buf, const byte *key,
uint key_len, enum ha_rkey_function find_flag);
int index_read_idx(byte *buf, uint idx, const byte *key,
diff --git a/sql/ha_heap.cc b/sql/ha_heap.cc
index 94ee3f8e656..01e693978db 100644
--- a/sql/ha_heap.cc
+++ b/sql/ha_heap.cc
@@ -629,3 +629,15 @@ ulonglong ha_heap::get_auto_increment()
ha_heap::info(HA_STATUS_AUTO);
return auto_increment_value;
}
+
+
+bool ha_heap::check_if_incompatible_data(HA_CREATE_INFO *info,
+ uint table_changes)
+{
+ /* Check that auto_increment value was not changed */
+ if ((table_changes != IS_EQUAL_YES &&
+ info->used_fields & HA_CREATE_USED_AUTO) &&
+ info->auto_increment_value != 0)
+ return COMPATIBLE_DATA_NO;
+ return COMPATIBLE_DATA_YES;
+}
diff --git a/sql/ha_heap.h b/sql/ha_heap.h
index 7c4227e952c..24097460a24 100644
--- a/sql/ha_heap.h
+++ b/sql/ha_heap.h
@@ -106,6 +106,7 @@ public:
HEAP_PTR ptr2=*(HEAP_PTR*)ref2;
return ptr1 < ptr2? -1 : (ptr1 > ptr2? 1 : 0);
}
+ bool check_if_incompatible_data(HA_CREATE_INFO *info, uint table_changes);
private:
void update_key_stats();
};
diff --git a/sql/ha_innodb.cc b/sql/ha_innodb.cc
index 4ed5fadb603..638bda49002 100644
--- a/sql/ha_innodb.cc
+++ b/sql/ha_innodb.cc
@@ -111,28 +111,28 @@ typedef byte mysql_byte;
/* Include necessary InnoDB headers */
extern "C" {
-#include "../innobase/include/univ.i"
-#include "../innobase/include/os0file.h"
-#include "../innobase/include/os0thread.h"
-#include "../innobase/include/srv0start.h"
-#include "../innobase/include/srv0srv.h"
-#include "../innobase/include/trx0roll.h"
-#include "../innobase/include/trx0trx.h"
-#include "../innobase/include/trx0sys.h"
-#include "../innobase/include/mtr0mtr.h"
-#include "../innobase/include/row0ins.h"
-#include "../innobase/include/row0mysql.h"
-#include "../innobase/include/row0sel.h"
-#include "../innobase/include/row0upd.h"
-#include "../innobase/include/log0log.h"
-#include "../innobase/include/lock0lock.h"
-#include "../innobase/include/dict0crea.h"
-#include "../innobase/include/btr0cur.h"
-#include "../innobase/include/btr0btr.h"
-#include "../innobase/include/fsp0fsp.h"
-#include "../innobase/include/sync0sync.h"
-#include "../innobase/include/fil0fil.h"
-#include "../innobase/include/trx0xa.h"
+#include "../storage/innobase/include/univ.i"
+#include "../storage/innobase/include/os0file.h"
+#include "../storage/innobase/include/os0thread.h"
+#include "../storage/innobase/include/srv0start.h"
+#include "../storage/innobase/include/srv0srv.h"
+#include "../storage/innobase/include/trx0roll.h"
+#include "../storage/innobase/include/trx0trx.h"
+#include "../storage/innobase/include/trx0sys.h"
+#include "../storage/innobase/include/mtr0mtr.h"
+#include "../storage/innobase/include/row0ins.h"
+#include "../storage/innobase/include/row0mysql.h"
+#include "../storage/innobase/include/row0sel.h"
+#include "../storage/innobase/include/row0upd.h"
+#include "../storage/innobase/include/log0log.h"
+#include "../storage/innobase/include/lock0lock.h"
+#include "../storage/innobase/include/dict0crea.h"
+#include "../storage/innobase/include/btr0cur.h"
+#include "../storage/innobase/include/btr0btr.h"
+#include "../storage/innobase/include/fsp0fsp.h"
+#include "../storage/innobase/include/sync0sync.h"
+#include "../storage/innobase/include/fil0fil.h"
+#include "../storage/innobase/include/trx0xa.h"
}
#define HA_INNOBASE_ROWS_IN_TABLE 10000 /* to get optimization right */
@@ -3081,7 +3081,8 @@ build_template(
goto include_field;
}
- if (thd->query_id == field->query_id) {
+ if (table->file->ha_get_bit_in_read_set(i+1) ||
+ table->file->ha_get_bit_in_write_set(i+1)) {
/* This field is needed in the query */
goto include_field;
@@ -3701,7 +3702,8 @@ int
ha_innobase::index_init(
/*====================*/
/* out: 0 or error number */
- uint keynr) /* in: key (index) number */
+ uint keynr, /* in: key (index) number */
+ bool sorted) /* in: 1 if result MUST be sorted according to index */
{
int error = 0;
DBUG_ENTER("index_init");
@@ -6809,7 +6811,7 @@ ha_innobase::innobase_read_and_init_auto_inc(
}
(void) extra(HA_EXTRA_KEYREAD);
- index_init(table->s->next_number_index);
+ index_init(table->s->next_number_index, 1);
/* Starting from 5.0.9, we use a consistent read to read the auto-inc
column maximum value. This eliminates the spurious deadlocks caused
@@ -7342,4 +7344,24 @@ innobase_set_cursor_view(
(cursor_view_t*) curview);
}
+
+bool ha_innobase::check_if_incompatible_data(HA_CREATE_INFO *info,
+ uint table_changes)
+{
+ if (table_changes != IS_EQUAL_YES)
+ return COMPATIBLE_DATA_NO;
+
+ /* Check that auto_increment value was not changed */
+ if ((info->used_fields & HA_CREATE_USED_AUTO) &&
+ info->auto_increment_value != 0)
+ return COMPATIBLE_DATA_NO;
+
+ /* Check that row format didn't change */
+ if ((info->used_fields & HA_CREATE_USED_AUTO) &&
+ get_row_type() != info->row_type)
+ return COMPATIBLE_DATA_NO;
+
+ return COMPATIBLE_DATA_YES;
+}
+
#endif /* HAVE_INNOBASE_DB */
diff --git a/sql/ha_innodb.h b/sql/ha_innodb.h
index 672e48d9817..595ab5ccde2 100644
--- a/sql/ha_innodb.h
+++ b/sql/ha_innodb.h
@@ -124,7 +124,7 @@ class ha_innobase: public handler
int delete_row(const byte * buf);
void unlock_row();
- int index_init(uint index);
+ int index_init(uint index, bool sorted);
int index_end();
int index_read(byte * buf, const byte * key,
uint key_len, enum ha_rkey_function find_flag);
@@ -152,6 +152,16 @@ class ha_innobase: public handler
int transactional_table_lock(THD *thd, int lock_type);
int start_stmt(THD *thd);
+ int ha_retrieve_all_cols()
+ {
+ ha_set_all_bits_in_read_set();
+ return extra(HA_EXTRA_RETRIEVE_ALL_COLS);
+ }
+ int ha_retrieve_all_pk()
+ {
+ ha_set_primary_key_in_read_set();
+ return extra(HA_EXTRA_RETRIEVE_PRIMARY_KEY);
+ }
void position(byte *record);
ha_rows records_in_range(uint inx, key_range *min_key, key_range
*max_key);
@@ -194,6 +204,8 @@ class ha_innobase: public handler
static ulonglong get_mysql_bin_log_pos();
bool primary_key_is_clustered() { return true; }
int cmp_ref(const byte *ref1, const byte *ref2);
+ bool check_if_incompatible_data(HA_CREATE_INFO *info,
+ uint table_changes);
};
extern struct show_var_st innodb_status_variables[];
diff --git a/sql/ha_myisam.cc b/sql/ha_myisam.cc
index 8f3970d69e6..715ee3da8b9 100644
--- a/sql/ha_myisam.cc
+++ b/sql/ha_myisam.cc
@@ -27,8 +27,8 @@
#ifndef MASTER
#include "../srclib/myisam/myisamdef.h"
#else
-#include "../myisam/myisamdef.h"
-#include "../myisam/rt_index.h"
+#include "../storage/myisam/myisamdef.h"
+#include "../storage/myisam/rt_index.h"
#endif
ulong myisam_recover_options= HA_RECOVER_NONE;
@@ -1697,3 +1697,25 @@ uint ha_myisam::checksum() const
return (uint)file->s->state.checksum;
}
+
+bool ha_myisam::check_if_incompatible_data(HA_CREATE_INFO *info,
+ uint table_changes)
+{
+ uint options= table->s->db_options_in_use;
+
+ if (info->auto_increment_value != auto_increment_value ||
+ info->raid_type != raid_type ||
+ info->raid_chunks != raid_chunks ||
+ info->raid_chunksize != raid_chunksize ||
+ info->data_file_name != data_file_name ||
+ info->index_file_name != index_file_name ||
+ table_changes == IS_EQUAL_NO)
+ return COMPATIBLE_DATA_NO;
+
+ if ((options & (HA_OPTION_PACK_RECORD | HA_OPTION_CHECKSUM |
+ HA_OPTION_DELAY_KEY_WRITE)) !=
+ (info->table_options & (HA_OPTION_PACK_RECORD | HA_OPTION_CHECKSUM |
+ HA_OPTION_DELAY_KEY_WRITE)))
+ return COMPATIBLE_DATA_NO;
+ return COMPATIBLE_DATA_YES;
+}
diff --git a/sql/ha_myisam.h b/sql/ha_myisam.h
index ca684463311..79036893faf 100644
--- a/sql/ha_myisam.h
+++ b/sql/ha_myisam.h
@@ -123,6 +123,7 @@ class ha_myisam: public handler
int backup(THD* thd, HA_CHECK_OPT* check_opt);
int assign_to_keycache(THD* thd, HA_CHECK_OPT* check_opt);
int preload_keys(THD* thd, HA_CHECK_OPT* check_opt);
+ bool check_if_incompatible_data(HA_CREATE_INFO *info, uint table_changes);
#ifdef HAVE_REPLICATION
int dump(THD* thd, int fd);
int net_read_dump(NET* net);
diff --git a/sql/ha_myisammrg.cc b/sql/ha_myisammrg.cc
index f92717e11eb..388ecfb331f 100644
--- a/sql/ha_myisammrg.cc
+++ b/sql/ha_myisammrg.cc
@@ -25,7 +25,7 @@
#ifndef MASTER
#include "../srclib/myisammrg/myrg_def.h"
#else
-#include "../myisammrg/myrg_def.h"
+#include "../storage/myisammrg/myrg_def.h"
#endif
/*****************************************************************************
@@ -515,3 +515,14 @@ void ha_myisammrg::append_create_info(String *packet)
}
packet->append(')');
}
+
+
+bool ha_myisammrg::check_if_incompatible_data(HA_CREATE_INFO *info,
+ uint table_changes)
+{
+ /*
+ For myisammrg, we should always re-generate the mapping file as this
+ is trivial to do
+ */
+ return COMPATIBLE_DATA_NO;
+}
diff --git a/sql/ha_myisammrg.h b/sql/ha_myisammrg.h
index c762b7c286e..45cf93a7e63 100644
--- a/sql/ha_myisammrg.h
+++ b/sql/ha_myisammrg.h
@@ -82,4 +82,5 @@ class ha_myisammrg: public handler
void update_create_info(HA_CREATE_INFO *create_info);
void append_create_info(String *packet);
MYRG_INFO *myrg_info() { return file; }
+ bool check_if_incompatible_data(HA_CREATE_INFO *info, uint table_changes);
};
diff --git a/sql/ha_ndbcluster.cc b/sql/ha_ndbcluster.cc
index 146fbee93c8..a036ccdbf14 100644
--- a/sql/ha_ndbcluster.cc
+++ b/sql/ha_ndbcluster.cc
@@ -34,6 +34,7 @@
// options from from mysqld.cc
extern my_bool opt_ndb_optimized_node_selection;
+extern enum ndb_distribution opt_ndb_distribution_id;
extern const char *opt_ndbcluster_connectstring;
// Default value for parallelism
@@ -41,7 +42,7 @@ static const int parallelism= 0;
// Default value for max number of transactions
// createable against NDB from this handler
-static const int max_transactions= 2;
+static const int max_transactions= 3; // should really be 2 but there is a transaction to much allocated when loch table is used
static const char *ha_ndb_ext=".ndb";
@@ -103,6 +104,7 @@ static HASH ndbcluster_open_tables;
static byte *ndbcluster_get_key(NDB_SHARE *share,uint *length,
my_bool not_used __attribute__((unused)));
+static void ndb_set_fragmentation(NDBTAB & tab, TABLE *table, uint pk_len);
static NDB_SHARE *get_share(const char *table_name);
static void free_share(NDB_SHARE *share);
@@ -858,21 +860,18 @@ int ha_ndbcluster::get_ndb_value(NdbOperation *ndb_op, Field *field,
/*
Check if any set or get of blob value in current query.
*/
-bool ha_ndbcluster::uses_blob_value(bool all_fields)
+bool ha_ndbcluster::uses_blob_value()
{
if (table->s->blob_fields == 0)
return FALSE;
- if (all_fields)
- return TRUE;
{
uint no_fields= table->s->fields;
int i;
- THD *thd= current_thd;
// They always put blobs at the end..
for (i= no_fields - 1; i >= 0; i--)
{
- Field *field= table->field[i];
- if (thd->query_id == field->query_id)
+ if ((m_write_op && ha_get_bit_in_write_set(i+1)) ||
+ (!m_write_op && ha_get_bit_in_read_set(i+1)))
{
return TRUE;
}
@@ -1145,7 +1144,7 @@ int ha_ndbcluster::get_ndb_lock_type(enum thr_lock_type type)
{
if (type >= TL_WRITE_ALLOW_WRITE)
return NdbOperation::LM_Exclusive;
- else if (uses_blob_value(m_retrieve_all_fields))
+ else if (uses_blob_value())
return NdbOperation::LM_Read;
else
return NdbOperation::LM_CommittedRead;
@@ -1297,17 +1296,14 @@ inline
int ha_ndbcluster::define_read_attrs(byte* buf, NdbOperation* op)
{
uint i;
- THD *thd= current_thd;
-
DBUG_ENTER("define_read_attrs");
// Define attributes to read
for (i= 0; i < table->s->fields; i++)
{
Field *field= table->field[i];
- if ((thd->query_id == field->query_id) ||
- ((field->flags & PRI_KEY_FLAG)) ||
- m_retrieve_all_fields)
+ if (ha_get_bit_in_read_set(i+1) ||
+ ((field->flags & PRI_KEY_FLAG)))
{
if (get_ndb_value(op, field, i, buf))
ERR_RETURN(op->getNdbError());
@@ -1334,11 +1330,13 @@ int ha_ndbcluster::define_read_attrs(byte* buf, NdbOperation* op)
DBUG_RETURN(0);
}
+
/*
Read one record from NDB using primary key
*/
-int ha_ndbcluster::pk_read(const byte *key, uint key_len, byte *buf)
+int ha_ndbcluster::pk_read(const byte *key, uint key_len, byte *buf,
+ uint32 part_id)
{
uint no_fields= table->s->fields;
NdbConnection *trans= m_active_trans;
@@ -1348,6 +1346,7 @@ int ha_ndbcluster::pk_read(const byte *key, uint key_len, byte *buf)
DBUG_ENTER("pk_read");
DBUG_PRINT("enter", ("key_len: %u", key_len));
DBUG_DUMP("key", (char*)key, key_len);
+ m_write_op= FALSE;
NdbOperation::LockMode lm=
(NdbOperation::LockMode)get_ndb_lock_type(m_lock.type);
@@ -1355,6 +1354,8 @@ int ha_ndbcluster::pk_read(const byte *key, uint key_len, byte *buf)
op->readTuple(lm) != 0)
ERR_RETURN(trans->getNdbError());
+ if (m_use_partition_function)
+ op->setPartitionId(part_id);
if (table->s->primary_key == MAX_KEY)
{
// This table has no primary key, use "hidden" primary key
@@ -1392,17 +1393,20 @@ int ha_ndbcluster::pk_read(const byte *key, uint key_len, byte *buf)
Read one complementing record from NDB using primary key from old_data
*/
-int ha_ndbcluster::complemented_pk_read(const byte *old_data, byte *new_data)
+int ha_ndbcluster::complemented_pk_read(const byte *old_data, byte *new_data,
+ uint32 old_part_id)
{
uint no_fields= table->s->fields, i;
NdbTransaction *trans= m_active_trans;
NdbOperation *op;
- THD *thd= current_thd;
DBUG_ENTER("complemented_pk_read");
+ m_write_op= FALSE;
- if (m_retrieve_all_fields)
+ if (ha_get_all_bit_in_read_set())
+ {
// We have allready retrieved all fields, nothing to complement
DBUG_RETURN(0);
+ }
NdbOperation::LockMode lm=
(NdbOperation::LockMode)get_ndb_lock_type(m_lock.type);
@@ -1412,12 +1416,16 @@ int ha_ndbcluster::complemented_pk_read(const byte *old_data, byte *new_data)
int res;
if ((res= set_primary_key_from_record(op, old_data)))
ERR_RETURN(trans->getNdbError());
+
+ if (m_use_partition_function)
+ op->setPartitionId(old_part_id);
+
// Read all unreferenced non-key field(s)
for (i= 0; i < no_fields; i++)
{
Field *field= table->field[i];
if (!((field->flags & PRI_KEY_FLAG) ||
- (thd->query_id == field->query_id)))
+ (ha_get_bit_in_read_set(i+1))))
{
if (get_ndb_value(op, field, i, new_data))
ERR_RETURN(trans->getNdbError());
@@ -1441,7 +1449,7 @@ int ha_ndbcluster::complemented_pk_read(const byte *old_data, byte *new_data)
{
Field *field= table->field[i];
if (!((field->flags & PRI_KEY_FLAG) ||
- (thd->query_id == field->query_id)))
+ (ha_get_bit_in_read_set(i+1))))
{
m_value[i].ptr= NULL;
}
@@ -1470,6 +1478,17 @@ int ha_ndbcluster::peek_row(const byte *record)
if ((res= set_primary_key_from_record(op, record)))
ERR_RETURN(trans->getNdbError());
+ if (m_use_partition_function)
+ {
+ uint32 part_id;
+ int error;
+ if ((error= m_part_info->get_partition_id(m_part_info, &part_id)))
+ {
+ DBUG_RETURN(error);
+ }
+ op->setPartitionId(part_id);
+ }
+
if (execute_no_commit_ie(this,trans) != 0)
{
table->status= STATUS_NOT_FOUND;
@@ -1808,7 +1827,8 @@ int ha_ndbcluster::set_bounds(NdbIndexScanOperation *op,
int ha_ndbcluster::ordered_index_scan(const key_range *start_key,
const key_range *end_key,
- bool sorted, bool descending, byte* buf)
+ bool sorted, bool descending,
+ byte* buf, part_id_range *part_spec)
{
int res;
bool restart;
@@ -1819,6 +1839,7 @@ int ha_ndbcluster::ordered_index_scan(const key_range *start_key,
DBUG_PRINT("enter", ("index: %u, sorted: %d, descending: %d",
active_index, sorted, descending));
DBUG_PRINT("enter", ("Starting new ordered scan on %s", m_tabname));
+ m_write_op= FALSE;
// Check that sorted seems to be initialised
DBUG_ASSERT(sorted == 0 || sorted == 1);
@@ -1833,11 +1854,17 @@ int ha_ndbcluster::ordered_index_scan(const key_range *start_key,
(const NDBTAB *) m_table)) ||
op->readTuples(lm, 0, parallelism, sorted, descending))
ERR_RETURN(trans->getNdbError());
+ if (m_use_partition_function && part_spec != NULL &&
+ part_spec->start_part == part_spec->end_part)
+ op->setPartitionId(part_spec->start_part);
m_active_cursor= op;
} else {
restart= TRUE;
op= (NdbIndexScanOperation*)m_active_cursor;
+ if (m_use_partition_function && part_spec != NULL &&
+ part_spec->start_part == part_spec->end_part)
+ op->setPartitionId(part_spec->start_part);
DBUG_ASSERT(op->getSorted() == sorted);
DBUG_ASSERT(op->getLockMode() ==
(NdbOperation::LockMode)get_ndb_lock_type(m_lock.type));
@@ -1878,6 +1905,7 @@ int ha_ndbcluster::full_table_scan(byte *buf)
DBUG_ENTER("full_table_scan");
DBUG_PRINT("enter", ("Starting new scan on %s", m_tabname));
+ m_write_op= FALSE;
NdbOperation::LockMode lm=
(NdbOperation::LockMode)get_ndb_lock_type(m_lock.type);
@@ -1907,6 +1935,7 @@ int ha_ndbcluster::write_row(byte *record)
NdbOperation *op;
int res;
THD *thd= current_thd;
+ m_write_op= TRUE;
DBUG_ENTER("write_row");
@@ -1935,6 +1964,17 @@ int ha_ndbcluster::write_row(byte *record)
if (res != 0)
ERR_RETURN(trans->getNdbError());
+ if (m_use_partition_function)
+ {
+ uint32 part_id;
+ int error;
+ if ((error= m_part_info->get_partition_id(m_part_info, &part_id)))
+ {
+ DBUG_RETURN(error);
+ }
+ op->setPartitionId(part_id);
+ }
+
if (table->s->primary_key == MAX_KEY)
{
// Table has hidden primary key
@@ -2092,25 +2132,35 @@ int ha_ndbcluster::update_row(const byte *old_data, byte *new_data)
NdbScanOperation* cursor= m_active_cursor;
NdbOperation *op;
uint i;
+ uint32 old_part_id= 0, new_part_id= 0;
+ int error;
DBUG_ENTER("update_row");
+ m_write_op= TRUE;
statistic_increment(thd->status_var.ha_update_count, &LOCK_status);
if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_UPDATE)
{
table->timestamp_field->set_time();
- // Set query_id so that field is really updated
- table->timestamp_field->query_id= thd->query_id;
+ ha_set_bit_in_write_set(table->timestamp_field->fieldnr);
+ }
+
+ if (m_use_partition_function &&
+ (error= get_parts_for_update(old_data, new_data, table->record[0],
+ m_part_info, &old_part_id, &new_part_id)))
+ {
+ DBUG_RETURN(error);
}
/* Check for update of primary key for special handling */
if ((table->s->primary_key != MAX_KEY) &&
- (key_cmp(table->s->primary_key, old_data, new_data)))
+ (key_cmp(table->s->primary_key, old_data, new_data)) ||
+ (old_part_id != new_part_id))
{
int read_res, insert_res, delete_res, undo_res;
DBUG_PRINT("info", ("primary key update, doing pk read+delete+insert"));
// Get all old fields, since we optimize away fields not in query
- read_res= complemented_pk_read(old_data, new_data);
+ read_res= complemented_pk_read(old_data, new_data, old_part_id);
if (read_res)
{
DBUG_PRINT("info", ("pk read failed"));
@@ -2164,8 +2214,10 @@ int ha_ndbcluster::update_row(const byte *old_data, byte *new_data)
if (!(op= cursor->updateCurrentTuple()))
ERR_RETURN(trans->getNdbError());
m_ops_pending++;
- if (uses_blob_value(FALSE))
+ if (uses_blob_value())
m_blobs_pending= TRUE;
+ if (m_use_partition_function)
+ cursor->setPartitionId(new_part_id);
}
else
{
@@ -2173,6 +2225,8 @@ int ha_ndbcluster::update_row(const byte *old_data, byte *new_data)
op->updateTuple() != 0)
ERR_RETURN(trans->getNdbError());
+ if (m_use_partition_function)
+ op->setPartitionId(new_part_id);
if (table->s->primary_key == MAX_KEY)
{
// This table has no primary key, use "hidden" primary key
@@ -2202,7 +2256,7 @@ int ha_ndbcluster::update_row(const byte *old_data, byte *new_data)
for (i= 0; i < table->s->fields; i++)
{
Field *field= table->field[i];
- if (((thd->query_id == field->query_id) || m_retrieve_all_fields) &&
+ if (ha_get_bit_in_write_set(i+1) &&
(!(field->flags & PRI_KEY_FLAG)) &&
set_ndb_value(op, field, i))
ERR_RETURN(op->getNdbError());
@@ -2228,11 +2282,21 @@ int ha_ndbcluster::delete_row(const byte *record)
NdbTransaction *trans= m_active_trans;
NdbScanOperation* cursor= m_active_cursor;
NdbOperation *op;
+ uint32 part_id;
+ int error;
DBUG_ENTER("delete_row");
+ m_write_op= TRUE;
statistic_increment(thd->status_var.ha_delete_count,&LOCK_status);
m_rows_changed++;
+ if (m_use_partition_function &&
+ (error= get_part_for_delete(record, table->record[0], m_part_info,
+ &part_id)))
+ {
+ DBUG_RETURN(error);
+ }
+
if (cursor)
{
/*
@@ -2247,6 +2311,9 @@ int ha_ndbcluster::delete_row(const byte *record)
ERR_RETURN(trans->getNdbError());
m_ops_pending++;
+ if (m_use_partition_function)
+ cursor->setPartitionId(part_id);
+
no_uncommitted_rows_update(-1);
if (!m_primary_key_update)
@@ -2260,6 +2327,9 @@ int ha_ndbcluster::delete_row(const byte *record)
op->deleteTuple() != 0)
ERR_RETURN(trans->getNdbError());
+ if (m_use_partition_function)
+ op->setPartitionId(part_id);
+
no_uncommitted_rows_update(-1);
if (table->s->primary_key == MAX_KEY)
@@ -2385,8 +2455,6 @@ void ha_ndbcluster::print_results()
DBUG_ENTER("print_results");
#ifndef DBUG_OFF
- const NDBTAB *tab= (const NDBTAB*) m_table;
-
if (!_db_on_)
DBUG_VOID_RETURN;
@@ -2441,11 +2509,13 @@ print_value:
}
-int ha_ndbcluster::index_init(uint index)
+int ha_ndbcluster::index_init(uint index, bool sorted)
{
DBUG_ENTER("ha_ndbcluster::index_init");
- DBUG_PRINT("enter", ("index: %u", index));
- DBUG_RETURN(handler::index_init(index));
+ DBUG_PRINT("enter", ("index: %u sorted: %d", index, sorted));
+ active_index= index;
+ m_sorted= sorted;
+ DBUG_RETURN(0);
}
@@ -2482,55 +2552,16 @@ int ha_ndbcluster::index_read(byte *buf,
const byte *key, uint key_len,
enum ha_rkey_function find_flag)
{
+ key_range start_key;
+ bool descending= FALSE;
DBUG_ENTER("ha_ndbcluster::index_read");
DBUG_PRINT("enter", ("active_index: %u, key_len: %u, find_flag: %d",
active_index, key_len, find_flag));
- int error;
- ndb_index_type type= get_index_type(active_index);
- const KEY* key_info= table->key_info+active_index;
- switch (type){
- case PRIMARY_KEY_ORDERED_INDEX:
- case PRIMARY_KEY_INDEX:
- if (find_flag == HA_READ_KEY_EXACT && key_info->key_length == key_len)
- {
- if (m_active_cursor && (error= close_scan()))
- DBUG_RETURN(error);
- DBUG_RETURN(pk_read(key, key_len, buf));
- }
- else if (type == PRIMARY_KEY_INDEX)
- {
- DBUG_RETURN(1);
- }
- break;
- case UNIQUE_ORDERED_INDEX:
- case UNIQUE_INDEX:
- if (find_flag == HA_READ_KEY_EXACT && key_info->key_length == key_len &&
- !check_null_in_key(key_info, key, key_len))
- {
- if (m_active_cursor && (error= close_scan()))
- DBUG_RETURN(error);
- DBUG_RETURN(unique_index_read(key, key_len, buf));
- }
- else if (type == UNIQUE_INDEX)
- {
- DBUG_RETURN(1);
- }
- break;
- case ORDERED_INDEX:
- break;
- default:
- case UNDEFINED_INDEX:
- DBUG_ASSERT(FALSE);
- DBUG_RETURN(1);
- break;
- }
-
- key_range start_key;
start_key.key= key;
start_key.length= key_len;
start_key.flag= find_flag;
- bool descending= FALSE;
+ descending= FALSE;
switch (find_flag) {
case HA_READ_KEY_OR_PREV:
case HA_READ_BEFORE_KEY:
@@ -2541,8 +2572,8 @@ int ha_ndbcluster::index_read(byte *buf,
default:
break;
}
- error= ordered_index_scan(&start_key, 0, TRUE, descending, buf);
- DBUG_RETURN(error == HA_ERR_END_OF_FILE ? HA_ERR_KEY_NOT_FOUND : error);
+ DBUG_RETURN(read_range_first_to_buf(&start_key, 0, descending,
+ m_sorted, buf));
}
@@ -2553,7 +2584,7 @@ int ha_ndbcluster::index_read_idx(byte *buf, uint index_no,
statistic_increment(current_thd->status_var.ha_read_key_count, &LOCK_status);
DBUG_ENTER("ha_ndbcluster::index_read_idx");
DBUG_PRINT("enter", ("index_no: %u, key_len: %u", index_no, key_len));
- index_init(index_no);
+ index_init(index_no, 0);
DBUG_RETURN(index_read(buf, key, key_len, find_flag));
}
@@ -2584,7 +2615,7 @@ int ha_ndbcluster::index_first(byte *buf)
// Start the ordered index scan and fetch the first row
// Only HA_READ_ORDER indexes get called by index_first
- DBUG_RETURN(ordered_index_scan(0, 0, TRUE, FALSE, buf));
+ DBUG_RETURN(ordered_index_scan(0, 0, TRUE, FALSE, buf, NULL));
}
@@ -2592,7 +2623,7 @@ int ha_ndbcluster::index_last(byte *buf)
{
DBUG_ENTER("ha_ndbcluster::index_last");
statistic_increment(current_thd->status_var.ha_read_last_count,&LOCK_status);
- DBUG_RETURN(ordered_index_scan(0, 0, TRUE, TRUE, buf));
+ DBUG_RETURN(ordered_index_scan(0, 0, TRUE, TRUE, buf, NULL));
}
int ha_ndbcluster::index_read_last(byte * buf, const byte * key, uint key_len)
@@ -2601,66 +2632,76 @@ int ha_ndbcluster::index_read_last(byte * buf, const byte * key, uint key_len)
DBUG_RETURN(index_read(buf, key, key_len, HA_READ_PREFIX_LAST));
}
-inline
int ha_ndbcluster::read_range_first_to_buf(const key_range *start_key,
const key_range *end_key,
- bool eq_r, bool sorted,
+ bool desc, bool sorted,
byte* buf)
{
- KEY* key_info;
- int error= 1;
+ part_id_range part_spec;
+ ndb_index_type type= get_index_type(active_index);
+ const KEY* key_info= table->key_info+active_index;
+ int error;
DBUG_ENTER("ha_ndbcluster::read_range_first_to_buf");
- DBUG_PRINT("info", ("eq_r: %d, sorted: %d", eq_r, sorted));
+ DBUG_PRINT("info", ("desc: %d, sorted: %d", desc, sorted));
- switch (get_index_type(active_index)){
+ if (m_use_partition_function)
+ {
+ get_partition_set(table, buf, active_index, start_key, &part_spec);
+ if (part_spec.start_part > part_spec.end_part)
+ {
+ DBUG_RETURN(HA_ERR_END_OF_FILE);
+ }
+ else if (part_spec.start_part == part_spec.end_part)
+ {
+ /*
+ Only one partition is required to scan, if sorted is required we
+ don't need it any more since output from one ordered partitioned
+ index is always sorted.
+ */
+ sorted= FALSE;
+ }
+ }
+ m_write_op= FALSE;
+ switch (type){
case PRIMARY_KEY_ORDERED_INDEX:
case PRIMARY_KEY_INDEX:
- key_info= table->key_info + active_index;
if (start_key &&
start_key->length == key_info->key_length &&
start_key->flag == HA_READ_KEY_EXACT)
{
if (m_active_cursor && (error= close_scan()))
DBUG_RETURN(error);
- error= pk_read(start_key->key, start_key->length, buf);
- DBUG_RETURN(error == HA_ERR_KEY_NOT_FOUND ? HA_ERR_END_OF_FILE : error);
+ DBUG_RETURN(pk_read(start_key->key, start_key->length, buf,
+ part_spec.start_part));
}
break;
case UNIQUE_ORDERED_INDEX:
case UNIQUE_INDEX:
- key_info= table->key_info + active_index;
if (start_key && start_key->length == key_info->key_length &&
start_key->flag == HA_READ_KEY_EXACT &&
!check_null_in_key(key_info, start_key->key, start_key->length))
{
if (m_active_cursor && (error= close_scan()))
DBUG_RETURN(error);
- error= unique_index_read(start_key->key, start_key->length, buf);
- DBUG_RETURN(error == HA_ERR_KEY_NOT_FOUND ? HA_ERR_END_OF_FILE : error);
+ DBUG_RETURN(unique_index_read(start_key->key, start_key->length, buf));
}
break;
default:
break;
}
-
// Start the ordered index scan and fetch the first row
- error= ordered_index_scan(start_key, end_key, sorted, FALSE, buf);
- DBUG_RETURN(error);
+ DBUG_RETURN(ordered_index_scan(start_key, end_key, sorted, desc, buf,
+ &part_spec));
}
-
int ha_ndbcluster::read_range_first(const key_range *start_key,
const key_range *end_key,
bool eq_r, bool sorted)
{
byte* buf= table->record[0];
DBUG_ENTER("ha_ndbcluster::read_range_first");
-
- DBUG_RETURN(read_range_first_to_buf(start_key,
- end_key,
- eq_r,
- sorted,
- buf));
+ DBUG_RETURN(read_range_first_to_buf(start_key, end_key, FALSE,
+ sorted, buf));
}
int ha_ndbcluster::read_range_next()
@@ -2686,7 +2727,7 @@ int ha_ndbcluster::rnd_init(bool scan)
DBUG_RETURN(-1);
}
}
- index_init(table->s->primary_key);
+ index_init(table->s->primary_key, 0);
DBUG_RETURN(0);
}
@@ -2753,7 +2794,20 @@ int ha_ndbcluster::rnd_pos(byte *buf, byte *pos)
&LOCK_status);
// The primary key for the record is stored in pos
// Perform a pk_read using primary key "index"
- DBUG_RETURN(pk_read(pos, ref_length, buf));
+ {
+ part_id_range part_spec;
+ if (m_use_partition_function)
+ {
+ key_range key_spec;
+ KEY *key_info= table->key_info + active_index;
+ key_spec.key= pos;
+ key_spec.length= ref_length;
+ key_spec.flag= HA_READ_KEY_EXACT;
+ get_full_part_id_from_key(table, buf, key_info, &key_spec, &part_spec);
+ DBUG_ASSERT(part_spec.start_part == part_spec.end_part);
+ }
+ DBUG_RETURN(pk_read(pos, ref_length, buf, part_spec.start_part));
+ }
}
@@ -2885,83 +2939,11 @@ int ha_ndbcluster::extra(enum ha_extra_function operation)
{
DBUG_ENTER("extra");
switch (operation) {
- case HA_EXTRA_NORMAL: /* Optimize for space (def) */
- DBUG_PRINT("info", ("HA_EXTRA_NORMAL"));
- break;
- case HA_EXTRA_QUICK: /* Optimize for speed */
- DBUG_PRINT("info", ("HA_EXTRA_QUICK"));
- break;
case HA_EXTRA_RESET: /* Reset database to after open */
DBUG_PRINT("info", ("HA_EXTRA_RESET"));
DBUG_PRINT("info", ("Clearing condition stack"));
cond_clear();
break;
- case HA_EXTRA_CACHE: /* Cash record in HA_rrnd() */
- DBUG_PRINT("info", ("HA_EXTRA_CACHE"));
- break;
- case HA_EXTRA_NO_CACHE: /* End cacheing of records (def) */
- DBUG_PRINT("info", ("HA_EXTRA_NO_CACHE"));
- break;
- case HA_EXTRA_NO_READCHECK: /* No readcheck on update */
- DBUG_PRINT("info", ("HA_EXTRA_NO_READCHECK"));
- break;
- case HA_EXTRA_READCHECK: /* Use readcheck (def) */
- DBUG_PRINT("info", ("HA_EXTRA_READCHECK"));
- break;
- case HA_EXTRA_KEYREAD: /* Read only key to database */
- DBUG_PRINT("info", ("HA_EXTRA_KEYREAD"));
- break;
- case HA_EXTRA_NO_KEYREAD: /* Normal read of records (def) */
- DBUG_PRINT("info", ("HA_EXTRA_NO_KEYREAD"));
- break;
- case HA_EXTRA_NO_USER_CHANGE: /* No user is allowed to write */
- DBUG_PRINT("info", ("HA_EXTRA_NO_USER_CHANGE"));
- break;
- case HA_EXTRA_KEY_CACHE:
- DBUG_PRINT("info", ("HA_EXTRA_KEY_CACHE"));
- break;
- case HA_EXTRA_NO_KEY_CACHE:
- DBUG_PRINT("info", ("HA_EXTRA_NO_KEY_CACHE"));
- break;
- case HA_EXTRA_WAIT_LOCK: /* Wait until file is avalably (def) */
- DBUG_PRINT("info", ("HA_EXTRA_WAIT_LOCK"));
- break;
- case HA_EXTRA_NO_WAIT_LOCK: /* If file is locked, return quickly */
- DBUG_PRINT("info", ("HA_EXTRA_NO_WAIT_LOCK"));
- break;
- case HA_EXTRA_WRITE_CACHE: /* Use write cache in ha_write() */
- DBUG_PRINT("info", ("HA_EXTRA_WRITE_CACHE"));
- break;
- case HA_EXTRA_FLUSH_CACHE: /* flush write_record_cache */
- DBUG_PRINT("info", ("HA_EXTRA_FLUSH_CACHE"));
- break;
- case HA_EXTRA_NO_KEYS: /* Remove all update of keys */
- DBUG_PRINT("info", ("HA_EXTRA_NO_KEYS"));
- break;
- case HA_EXTRA_KEYREAD_CHANGE_POS: /* Keyread, but change pos */
- DBUG_PRINT("info", ("HA_EXTRA_KEYREAD_CHANGE_POS")); /* xxxxchk -r must be used */
- break;
- case HA_EXTRA_REMEMBER_POS: /* Remember pos for next/prev */
- DBUG_PRINT("info", ("HA_EXTRA_REMEMBER_POS"));
- break;
- case HA_EXTRA_RESTORE_POS:
- DBUG_PRINT("info", ("HA_EXTRA_RESTORE_POS"));
- break;
- case HA_EXTRA_REINIT_CACHE: /* init cache from current record */
- DBUG_PRINT("info", ("HA_EXTRA_REINIT_CACHE"));
- break;
- case HA_EXTRA_FORCE_REOPEN: /* Datafile have changed on disk */
- DBUG_PRINT("info", ("HA_EXTRA_FORCE_REOPEN"));
- break;
- case HA_EXTRA_FLUSH: /* Flush tables to disk */
- DBUG_PRINT("info", ("HA_EXTRA_FLUSH"));
- break;
- case HA_EXTRA_NO_ROWS: /* Don't write rows */
- DBUG_PRINT("info", ("HA_EXTRA_NO_ROWS"));
- break;
- case HA_EXTRA_RESET_STATE: /* Reset positions */
- DBUG_PRINT("info", ("HA_EXTRA_RESET_STATE"));
- break;
case HA_EXTRA_IGNORE_DUP_KEY: /* Dup keys don't rollback everything*/
DBUG_PRINT("info", ("HA_EXTRA_IGNORE_DUP_KEY"));
if (current_thd->lex->sql_command == SQLCOM_REPLACE)
@@ -2980,34 +2962,8 @@ int ha_ndbcluster::extra(enum ha_extra_function operation)
m_use_write= FALSE;
m_ignore_dup_key= FALSE;
break;
- case HA_EXTRA_RETRIEVE_ALL_COLS: /* Retrieve all columns, not just those
- where field->query_id is the same as
- the current query id */
- DBUG_PRINT("info", ("HA_EXTRA_RETRIEVE_ALL_COLS"));
- m_retrieve_all_fields= TRUE;
- break;
- case HA_EXTRA_PREPARE_FOR_DELETE:
- DBUG_PRINT("info", ("HA_EXTRA_PREPARE_FOR_DELETE"));
- break;
- case HA_EXTRA_PREPARE_FOR_UPDATE: /* Remove read cache if problems */
- DBUG_PRINT("info", ("HA_EXTRA_PREPARE_FOR_UPDATE"));
- break;
- case HA_EXTRA_PRELOAD_BUFFER_SIZE:
- DBUG_PRINT("info", ("HA_EXTRA_PRELOAD_BUFFER_SIZE"));
- break;
- case HA_EXTRA_RETRIEVE_PRIMARY_KEY:
- DBUG_PRINT("info", ("HA_EXTRA_RETRIEVE_PRIMARY_KEY"));
- m_retrieve_primary_key= TRUE;
- break;
- case HA_EXTRA_CHANGE_KEY_TO_UNIQUE:
- DBUG_PRINT("info", ("HA_EXTRA_CHANGE_KEY_TO_UNIQUE"));
- break;
- case HA_EXTRA_CHANGE_KEY_TO_DUP:
- DBUG_PRINT("info", ("HA_EXTRA_CHANGE_KEY_TO_DUP"));
- case HA_EXTRA_KEYREAD_PRESERVE_FIELDS:
- DBUG_PRINT("info", ("HA_EXTRA_KEYREAD_PRESERVE_FIELDS"));
+ default:
break;
-
}
DBUG_RETURN(0);
@@ -3286,8 +3242,6 @@ int ha_ndbcluster::external_lock(THD *thd, int lock_type)
DBUG_ASSERT(m_active_trans);
// Start of transaction
m_rows_changed= 0;
- m_retrieve_all_fields= FALSE;
- m_retrieve_primary_key= FALSE;
m_ops_pending= 0;
{
NDBDICT *dict= ndb->getDictionary();
@@ -3426,8 +3380,6 @@ int ha_ndbcluster::start_stmt(THD *thd)
m_active_trans= trans;
// Start of statement
- m_retrieve_all_fields= FALSE;
- m_retrieve_primary_key= FALSE;
m_ops_pending= 0;
DBUG_RETURN(error);
@@ -3802,56 +3754,6 @@ static int create_ndb_column(NDBCOL &col,
return 0;
}
-/*
- Create a table in NDB Cluster
- */
-
-static void ndb_set_fragmentation(NDBTAB &tab, TABLE *form, uint pk_length)
-{
- if (form->s->max_rows == (ha_rows) 0) /* default setting, don't set fragmentation */
- return;
- /**
- * get the number of fragments right
- */
- uint no_fragments;
- {
-#if MYSQL_VERSION_ID >= 50000
- uint acc_row_size= 25 + /*safety margin*/ 2;
-#else
- uint acc_row_size= pk_length*4;
- /* add acc overhead */
- if (pk_length <= 8) /* main page will set the limit */
- acc_row_size+= 25 + /*safety margin*/ 2;
- else /* overflow page will set the limit */
- acc_row_size+= 4 + /*safety margin*/ 4;
-#endif
- ulonglong acc_fragment_size= 512*1024*1024;
- ulonglong max_rows= form->s->max_rows;
-#if MYSQL_VERSION_ID >= 50100
- no_fragments= (max_rows*acc_row_size)/acc_fragment_size+1;
-#else
- no_fragments= ((max_rows*acc_row_size)/acc_fragment_size+1
- +1/*correct rounding*/)/2;
-#endif
- }
- {
- uint no_nodes= g_ndb_cluster_connection->no_db_nodes();
- NDBTAB::FragmentType ftype;
- if (no_fragments > 2*no_nodes)
- {
- ftype= NDBTAB::FragAllLarge;
- if (no_fragments > 4*no_nodes)
- push_warning(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN, ER_UNKNOWN_ERROR,
- "Ndb might have problems storing the max amount of rows specified");
- }
- else if (no_fragments > no_nodes)
- ftype= NDBTAB::FragAllMedium;
- else
- ftype= NDBTAB::FragAllSmall;
- tab.setFragmentType(ftype);
- }
-}
-
int ha_ndbcluster::create(const char *name,
TABLE *form,
HA_CREATE_INFO *info)
@@ -3954,7 +3856,22 @@ int ha_ndbcluster::create(const char *name,
}
}
- ndb_set_fragmentation(tab, form, pk_length);
+ // Check partition info
+ partition_info *part_info= form->s->part_info;
+ if (part_info)
+ {
+ int error;
+ if ((error= set_up_partition_info(part_info, form, (void*)&tab)))
+ {
+ DBUG_RETURN(error);
+ }
+ }
+ else
+ {
+ ndb_set_fragmentation(tab, form, pk_length);
+ }
+
+
if ((my_errno= check_ndb_connection()))
DBUG_RETURN(my_errno);
@@ -4203,11 +4120,12 @@ ha_ndbcluster::ha_ndbcluster(TABLE *table_arg):
HA_NEED_READ_RANGE_BUFFER |
HA_CAN_BIT_FIELD),
m_share(0),
+ m_part_info(NULL),
+ m_use_partition_function(FALSE),
+ m_sorted(FALSE),
m_use_write(FALSE),
m_ignore_dup_key(FALSE),
m_primary_key_update(FALSE),
- m_retrieve_all_fields(FALSE),
- m_retrieve_primary_key(FALSE),
m_rows_to_insert((ha_rows) 1),
m_rows_inserted((ha_rows) 0),
m_bulk_insert_rows((ha_rows) 1024),
@@ -4319,6 +4237,15 @@ int ha_ndbcluster::open(const char *name, int mode, uint test_if_locked)
if (!res)
info(HA_STATUS_VARIABLE | HA_STATUS_CONST);
+ if (table->s->part_info)
+ {
+ m_part_info= table->s->part_info;
+ if (!(m_part_info->part_type == HASH_PARTITION &&
+ m_part_info->list_of_part_fields &&
+ !is_sub_partitioned(m_part_info)))
+ m_use_partition_function= TRUE;
+ }
+
DBUG_RETURN(res);
}
@@ -5531,6 +5458,7 @@ ha_ndbcluster::read_multi_range_first(KEY_MULTI_RANGE **found_range_p,
HANDLER_BUFFER *buffer)
{
DBUG_ENTER("ha_ndbcluster::read_multi_range_first");
+ m_write_op= FALSE;
int res;
KEY* key_info= table->key_info + active_index;
@@ -5538,7 +5466,7 @@ ha_ndbcluster::read_multi_range_first(KEY_MULTI_RANGE **found_range_p,
ulong reclength= table->s->reclength;
NdbOperation* op;
- if (uses_blob_value(m_retrieve_all_fields))
+ if (uses_blob_value())
{
/**
* blobs can't be batched currently
@@ -5590,12 +5518,29 @@ ha_ndbcluster::read_multi_range_first(KEY_MULTI_RANGE **found_range_p,
for (; multi_range_curr<multi_range_end && curr+reclength <= end_of_buffer;
multi_range_curr++)
{
- switch (index_type){
+ part_id_range part_spec;
+ if (m_use_partition_function)
+ {
+ get_partition_set(table, curr, active_index,
+ &multi_range_curr->start_key,
+ &part_spec);
+ if (part_spec.start_part > part_spec.end_part)
+ {
+ /*
+ We can skip this partition since the key won't fit into any
+ partition
+ */
+ curr += reclength;
+ multi_range_curr->range_flag |= SKIP_RANGE;
+ continue;
+ }
+ }
+ switch(index_type){
case PRIMARY_KEY_ORDERED_INDEX:
if (!(multi_range_curr->start_key.length == key_info->key_length &&
- multi_range_curr->start_key.flag == HA_READ_KEY_EXACT))
- goto range;
- /* fall through */
+ multi_range_curr->start_key.flag == HA_READ_KEY_EXACT))
+ goto range;
+ // else fall through
case PRIMARY_KEY_INDEX:
{
multi_range_curr->range_flag |= UNIQUE_RANGE;
@@ -5603,7 +5548,9 @@ ha_ndbcluster::read_multi_range_first(KEY_MULTI_RANGE **found_range_p,
!op->readTuple(lm) &&
!set_primary_key(op, multi_range_curr->start_key.key) &&
!define_read_attrs(curr, op) &&
- (op->setAbortOption(AO_IgnoreError), TRUE))
+ (op->setAbortOption(AO_IgnoreError), TRUE) &&
+ (!m_use_partition_function ||
+ (op->setPartitionId(part_spec.start_part), true)))
curr += reclength;
else
ERR_RETURN(op ? op->getNdbError() : m_active_trans->getNdbError());
@@ -5612,11 +5559,11 @@ ha_ndbcluster::read_multi_range_first(KEY_MULTI_RANGE **found_range_p,
break;
case UNIQUE_ORDERED_INDEX:
if (!(multi_range_curr->start_key.length == key_info->key_length &&
- multi_range_curr->start_key.flag == HA_READ_KEY_EXACT &&
- !check_null_in_key(key_info, multi_range_curr->start_key.key,
- multi_range_curr->start_key.length)))
- goto range;
- /* fall through */
+ multi_range_curr->start_key.flag == HA_READ_KEY_EXACT &&
+ !check_null_in_key(key_info, multi_range_curr->start_key.key,
+ multi_range_curr->start_key.length)))
+ goto range;
+ // else fall through
case UNIQUE_INDEX:
{
multi_range_curr->range_flag |= UNIQUE_RANGE;
@@ -5630,8 +5577,7 @@ ha_ndbcluster::read_multi_range_first(KEY_MULTI_RANGE **found_range_p,
ERR_RETURN(op ? op->getNdbError() : m_active_trans->getNdbError());
break;
}
- case ORDERED_INDEX:
- {
+ case ORDERED_INDEX: {
range:
multi_range_curr->range_flag &= ~(uint)UNIQUE_RANGE;
if (scanOp == 0)
@@ -5706,7 +5652,7 @@ ha_ndbcluster::read_multi_range_first(KEY_MULTI_RANGE **found_range_p,
}
#if 0
-#define DBUG_MULTI_RANGE(x) printf("read_multi_range_next: case %d\n", x);
+#define DBUG_MULTI_RANGE(x) DBUG_PRINT("info", ("read_multi_range_next: case %d\n", x));
#else
#define DBUG_MULTI_RANGE(x)
#endif
@@ -5717,6 +5663,7 @@ ha_ndbcluster::read_multi_range_next(KEY_MULTI_RANGE ** multi_range_found_p)
DBUG_ENTER("ha_ndbcluster::read_multi_range_next");
if (m_disable_multi_read)
{
+ DBUG_MULTI_RANGE(11);
DBUG_RETURN(handler::read_multi_range_next(multi_range_found_p));
}
@@ -5726,10 +5673,16 @@ ha_ndbcluster::read_multi_range_next(KEY_MULTI_RANGE ** multi_range_found_p)
const NdbOperation* op= m_current_multi_operation;
for (;multi_range_curr < m_multi_range_defined; multi_range_curr++)
{
+ DBUG_MULTI_RANGE(12);
+ if (multi_range_curr->range_flag & SKIP_RANGE)
+ continue;
if (multi_range_curr->range_flag & UNIQUE_RANGE)
{
if (op->getNdbError().code == 0)
+ {
+ DBUG_MULTI_RANGE(13);
goto found_next;
+ }
op= m_active_trans->getNextCompletedOperation(op);
m_multi_range_result_ptr += reclength;
@@ -5746,6 +5699,7 @@ ha_ndbcluster::read_multi_range_next(KEY_MULTI_RANGE ** multi_range_found_p)
}
else
{
+ DBUG_MULTI_RANGE(14);
goto close_scan;
}
}
@@ -5779,6 +5733,7 @@ ha_ndbcluster::read_multi_range_next(KEY_MULTI_RANGE ** multi_range_found_p)
DBUG_ASSERT(range_no == -1);
if ((res= m_multi_cursor->nextResult(true)))
{
+ DBUG_MULTI_RANGE(15);
goto close_scan;
}
multi_range_curr--; // Will be increased in for-loop
@@ -5806,12 +5761,16 @@ close_scan:
}
else
{
+ DBUG_MULTI_RANGE(9);
DBUG_RETURN(ndb_err(m_active_trans));
}
}
if (multi_range_curr == multi_range_end)
+ {
+ DBUG_MULTI_RANGE(16);
DBUG_RETURN(HA_ERR_END_OF_FILE);
+ }
/**
* Read remaining ranges
@@ -7043,6 +7002,8 @@ ha_ndbcluster::build_scan_filter_predicate(Ndb_cond * &cond,
: NULL;
break;
default:
+ field= NULL; //Keep compiler happy
+ DBUG_ASSERT(0);
break;
}
switch ((negated) ?
@@ -7390,4 +7351,211 @@ ha_ndbcluster::generate_scan_filter(Ndb_cond_stack *ndb_cond_stack,
DBUG_RETURN(0);
}
+
+/*
+ Create a table in NDB Cluster
+ */
+static uint get_no_fragments(ulonglong max_rows)
+{
+#if MYSQL_VERSION_ID >= 50000
+ uint acc_row_size= 25 + /*safety margin*/ 2;
+#else
+ uint acc_row_size= pk_length*4;
+ /* add acc overhead */
+ if (pk_length <= 8) /* main page will set the limit */
+ acc_row_size+= 25 + /*safety margin*/ 2;
+ else /* overflow page will set the limit */
+ acc_row_size+= 4 + /*safety margin*/ 4;
+#endif
+ ulonglong acc_fragment_size= 512*1024*1024;
+#if MYSQL_VERSION_ID >= 50100
+ return (max_rows*acc_row_size)/acc_fragment_size+1;
+#else
+ return ((max_rows*acc_row_size)/acc_fragment_size+1
+ +1/*correct rounding*/)/2;
+#endif
+}
+
+
+/*
+ Routine to adjust default number of partitions to always be a multiple
+ of number of nodes and never more than 4 times the number of nodes.
+
+*/
+static bool adjusted_frag_count(uint no_fragments, uint no_nodes,
+ uint &reported_frags)
+{
+ uint i= 0;
+ reported_frags= no_nodes;
+ while (reported_frags < no_fragments && ++i < 4 &&
+ (reported_frags + no_nodes) < MAX_PARTITIONS)
+ reported_frags+= no_nodes;
+ return (reported_frags < no_fragments);
+}
+
+int ha_ndbcluster::get_default_no_partitions(ulonglong max_rows)
+{
+ uint reported_frags;
+ uint no_fragments= get_no_fragments(max_rows);
+ uint no_nodes= g_ndb_cluster_connection->no_db_nodes();
+ adjusted_frag_count(no_fragments, no_nodes, reported_frags);
+ return (int)reported_frags;
+}
+
+
+/*
+ User defined partitioning set-up. We need to check how many fragments the
+ user wants defined and which node groups to put those into. Later we also
+ want to attach those partitions to a tablespace.
+
+ All the functionality of the partition function, partition limits and so
+ forth are entirely handled by the MySQL Server. There is one exception to
+ this rule for PARTITION BY KEY where NDB handles the hash function and
+ this type can thus be handled transparently also by NDB API program.
+ For RANGE, HASH and LIST and subpartitioning the NDB API programs must
+ implement the function to map to a partition.
+*/
+
+uint ha_ndbcluster::set_up_partition_info(partition_info *part_info,
+ TABLE *table,
+ void *tab_par)
+{
+ DBUG_ENTER("ha_ndbcluster::set_up_partition_info");
+ ushort node_group[MAX_PARTITIONS];
+ ulong ng_index= 0, i, j;
+ NDBTAB *tab= (NDBTAB*)tab_par;
+ NDBTAB::FragmentType ftype= NDBTAB::UserDefined;
+ partition_element *part_elem;
+
+ if (part_info->part_type == HASH_PARTITION &&
+ part_info->list_of_part_fields == TRUE)
+ {
+ Field **fields= part_info->part_field_array;
+
+ if (part_info->linear_hash_ind)
+ ftype= NDBTAB::DistrKeyLin;
+ else
+ ftype= NDBTAB::DistrKeyHash;
+
+ for (i= 0; i < part_info->part_field_list.elements; i++)
+ {
+ NDBCOL *col= tab->getColumn(fields[i]->fieldnr - 1);
+ DBUG_PRINT("info",("setting dist key on %s", col->getName()));
+ col->setPartitionKey(TRUE);
+ }
+ }
+ List_iterator<partition_element> part_it(part_info->partitions);
+ for (i= 0; i < part_info->no_parts; i++)
+ {
+ part_elem= part_it++;
+ if (!is_sub_partitioned(part_info))
+ {
+ node_group[ng_index++]= part_elem->nodegroup_id;
+ //Here we should insert tablespace id based on tablespace name
+ }
+ else
+ {
+ List_iterator<partition_element> sub_it(part_elem->subpartitions);
+ for (j= 0; j < part_info->no_subparts; j++)
+ {
+ part_elem= sub_it++;
+ node_group[ng_index++]= part_elem->nodegroup_id;
+ //Here we should insert tablespace id based on tablespace name
+ }
+ }
+ }
+ {
+ uint no_nodes= g_ndb_cluster_connection->no_db_nodes();
+ if (ng_index > 4 * no_nodes)
+ {
+ DBUG_RETURN(1300);
+ }
+ }
+ tab->setNodeGroupIds(&node_group, ng_index);
+ tab->setFragmentType(ftype);
+ DBUG_RETURN(0);
+}
+
+
+/*
+ This routine is used to set-up fragmentation when the user has only specified
+ ENGINE = NDB and no user defined partitioning what so ever. Thus all values
+ will be based on default values. We will choose Linear Hash or Hash with
+ perfect spread dependent on a session variable defined in MySQL.
+*/
+
+static void ndb_set_fragmentation(NDBTAB &tab, TABLE *form, uint pk_length)
+{
+ NDBTAB::FragmentType ftype= NDBTAB::DistrKeyHash;
+ ushort node_group[MAX_PARTITIONS];
+ uint no_nodes= g_ndb_cluster_connection->no_db_nodes(), no_fragments, i;
+ DBUG_ENTER("ndb_set_fragmentation");
+
+ if (form->s->max_rows == (ha_rows) 0)
+ {
+ no_fragments= no_nodes;
+ }
+ else
+ {
+ /*
+ Ensure that we get enough fragments to handle all rows and ensure that
+ the table is fully distributed by keeping the number of fragments a
+ multiple of the number of nodes.
+ */
+ uint fragments= get_no_fragments(form->s->max_rows);
+ if (adjusted_frag_count(fragments, no_nodes, no_fragments))
+ {
+ push_warning(current_thd,
+ MYSQL_ERROR::WARN_LEVEL_WARN, ER_UNKNOWN_ERROR,
+ "Ndb might have problems storing the max amount of rows specified");
+ }
+ }
+ /*
+ Always start with node group 0 and continue with next node group from
+ there
+ */
+ node_group[0]= 0;
+ for (i= 1; i < no_fragments; i++)
+ node_group[i]= UNDEF_NODEGROUP;
+ switch (opt_ndb_distribution_id)
+ {
+ case ND_KEYHASH:
+ ftype= NDBTAB::DistrKeyHash;
+ break;
+ case ND_LINHASH:
+ ftype= NDBTAB::DistrKeyLin;
+ break;
+ }
+ tab.setFragmentType(ftype);
+ tab.setNodeGroupIds(&node_group, no_fragments);
+ DBUG_VOID_RETURN;
+}
+
+
+bool ha_ndbcluster::check_if_incompatible_data(HA_CREATE_INFO *info,
+ uint table_changes)
+{
+ /*
+ TODO: Remove the dummy return below, when cluster gets
+ signal from alter table when only .frm is changed. Cluster
+ needs it to manage the copies.
+ */
+ return COMPATIBLE_DATA_NO;
+
+ if (table_changes != IS_EQUAL_YES)
+ return COMPATIBLE_DATA_NO;
+
+ /* Check that auto_increment value was not changed */
+ if ((info->used_fields & HA_CREATE_USED_AUTO) &&
+ info->auto_increment_value != 0)
+ return COMPATIBLE_DATA_NO;
+
+ /* Check that row format didn't change */
+ if ((info->used_fields & HA_CREATE_USED_AUTO) &&
+ get_row_type() != info->row_type)
+ return COMPATIBLE_DATA_NO;
+
+ return COMPATIBLE_DATA_YES;
+}
+
#endif /* HAVE_NDBCLUSTER_DB */
diff --git a/sql/ha_ndbcluster.h b/sql/ha_ndbcluster.h
index 034bb9292e8..bf9891c364b 100644
--- a/sql/ha_ndbcluster.h
+++ b/sql/ha_ndbcluster.h
@@ -113,6 +113,8 @@ struct negated_function_mapping
NDB_FUNC_TYPE neg_fun;
};
+enum ndb_distribution { ND_KEYHASH= 0, ND_LINHASH= 1 };
+
/*
Define what functions can be negated in condition pushdown.
Note, these HAVE to be in the same order as in definition enum
@@ -463,7 +465,7 @@ class ha_ndbcluster: public handler
int write_row(byte *buf);
int update_row(const byte *old_data, byte *new_data);
int delete_row(const byte *buf);
- int index_init(uint index);
+ int index_init(uint index, bool sorted);
int index_end();
int index_read(byte *buf, const byte *key, uint key_len,
enum ha_rkey_function find_flag);
@@ -505,6 +507,11 @@ class ha_ndbcluster: public handler
const char * table_type() const;
const char ** bas_ext() const;
ulong table_flags(void) const;
+ ulong partition_flags(void) const
+ {
+ return (HA_CAN_PARTITION | HA_CAN_UPDATE_PARTITION_KEY |
+ HA_CAN_PARTITION_UNIQUE);
+ }
ulong index_flags(uint idx, uint part, bool all_parts) const;
uint max_supported_record_length() const;
uint max_supported_keys() const;
@@ -514,6 +521,7 @@ class ha_ndbcluster: public handler
int rename_table(const char *from, const char *to);
int delete_table(const char *name);
int create(const char *name, TABLE *form, HA_CREATE_INFO *info);
+ int get_default_no_partitions(ulonglong max_rows);
THR_LOCK_DATA **store_lock(THD *thd,
THR_LOCK_DATA **to,
enum thr_lock_type lock_type);
@@ -577,6 +585,10 @@ static void set_tabname(const char *pathname, char *tabname);
uint key_length,
qc_engine_callback *engine_callback,
ulonglong *engine_data);
+
+ bool check_if_incompatible_data(HA_CREATE_INFO *info,
+ uint table_changes);
+
private:
int alter_table_name(const char *to);
int drop_table();
@@ -592,15 +604,21 @@ private:
NDB_INDEX_TYPE get_index_type_from_table(uint index_no) const;
int check_index_fields_not_null(uint index_no);
- int pk_read(const byte *key, uint key_len, byte *buf);
- int complemented_pk_read(const byte *old_data, byte *new_data);
- int peek_row(const byte *record);
- int unique_index_read(const byte *key, uint key_len,
- byte *buf);
+ uint set_up_partition_info(partition_info *part_info,
+ TABLE *table,
+ void *tab);
+ int complemented_pk_read(const byte *old_data, byte *new_data,
+ uint32 old_part_id);
+ int pk_read(const byte *key, uint key_len, byte *buf, uint32 part_id);
int ordered_index_scan(const key_range *start_key,
const key_range *end_key,
- bool sorted, bool descending, byte* buf);
+ bool sorted, bool descending, byte* buf,
+ part_id_range *part_spec);
int full_table_scan(byte * buf);
+
+ int peek_row(const byte *record);
+ int unique_index_read(const byte *key, uint key_len,
+ byte *buf);
int fetch_next(NdbScanOperation* op);
int next_result(byte *buf);
int define_read_attrs(byte* buf, NdbOperation* op);
@@ -632,7 +650,7 @@ private:
ulonglong get_auto_increment();
void invalidate_dictionary_cache(bool global);
int ndb_err(NdbTransaction*);
- bool uses_blob_value(bool all_fields);
+ bool uses_blob_value();
char *update_table_comment(const char * comment);
@@ -680,11 +698,15 @@ private:
// NdbRecAttr has no reference to blob
typedef union { const NdbRecAttr *rec; NdbBlob *blob; void *ptr; } NdbValue;
NdbValue m_value[NDB_MAX_ATTRIBUTES_IN_TABLE];
+ partition_info *m_part_info;
+ byte *m_rec0;
+ Field **m_part_field_array;
+ bool m_use_partition_function;
+ bool m_sorted;
bool m_use_write;
bool m_ignore_dup_key;
bool m_primary_key_update;
- bool m_retrieve_all_fields;
- bool m_retrieve_primary_key;
+ bool m_write_op;
ha_rows m_rows_to_insert;
ha_rows m_rows_inserted;
ha_rows m_bulk_insert_rows;
diff --git a/sql/ha_partition.cc b/sql/ha_partition.cc
new file mode 100644
index 00000000000..39c4f2243a5
--- /dev/null
+++ b/sql/ha_partition.cc
@@ -0,0 +1,3240 @@
+/* Copyright (C) 2005 MySQL AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/*
+ This handler was developed by Mikael Ronström for version 5.1 of MySQL.
+ It is an abstraction layer on top of other handlers such as MyISAM,
+ InnoDB, Federated, Berkeley DB and so forth. Partitioned tables can also
+ be handled by a storage engine. The current example of this is NDB
+ Cluster that has internally handled partitioning. This have benefits in
+ that many loops needed in the partition handler can be avoided.
+
+ Partitioning has an inherent feature which in some cases is positive and
+ in some cases is negative. It splits the data into chunks. This makes
+ the data more manageable, queries can easily be parallelised towards the
+ parts and indexes are split such that there are less levels in the
+ index trees. The inherent disadvantage is that to use a split index
+ one has to scan all index parts which is ok for large queries but for
+ small queries it can be a disadvantage.
+
+ Partitioning lays the foundation for more manageable databases that are
+ extremely large. It does also lay the foundation for more parallelism
+ in the execution of queries. This functionality will grow with later
+ versions of MySQL.
+
+ You can enable it in your buld by doing the following during your build
+ process:
+ ./configure --with-partition
+
+ The partition is setup to use table locks. It implements an partition "SHARE"
+ that is inserted into a hash by table name. You can use this to store
+ information of state that any partition handler object will be able to see
+ if it is using the same table.
+
+ Please read the object definition in ha_partition.h before reading the rest
+ if this file.
+*/
+
+#ifdef __GNUC__
+#pragma implementation // gcc: Class implementation
+#endif
+
+#include <mysql_priv.h>
+
+#ifdef HAVE_PARTITION_DB
+#include "ha_partition.h"
+
+static const char *ha_par_ext= ".par";
+#ifdef NOT_USED
+static int free_share(PARTITION_SHARE * share);
+static PARTITION_SHARE *get_share(const char *table_name, TABLE * table);
+#endif
+
+/****************************************************************************
+ MODULE create/delete handler object
+****************************************************************************/
+
+static handlerton partition_hton = {
+ "partition",
+ 0, /* slot */
+ 0, /* savepoint size */
+ NULL /*ndbcluster_close_connection*/,
+ NULL, /* savepoint_set */
+ NULL, /* savepoint_rollback */
+ NULL, /* savepoint_release */
+ NULL /*ndbcluster_commit*/,
+ NULL /*ndbcluster_rollback*/,
+ NULL, /* prepare */
+ NULL, /* recover */
+ NULL, /* commit_by_xid */
+ NULL, /* rollback_by_xid */
+ HTON_NO_FLAGS
+};
+
+ha_partition::ha_partition(TABLE *table)
+ :handler(&partition_hton, table), m_part_info(NULL), m_create_handler(FALSE),
+ m_is_sub_partitioned(0)
+{
+ DBUG_ENTER("ha_partition::ha_partition(table)");
+ init_handler_variables();
+ if (table)
+ {
+ if (table->s->part_info)
+ {
+ m_part_info= table->s->part_info;
+ m_is_sub_partitioned= is_sub_partitioned(m_part_info);
+ }
+ }
+ DBUG_VOID_RETURN;
+}
+
+
+ha_partition::ha_partition(partition_info *part_info)
+ :handler(&partition_hton, NULL), m_part_info(part_info), m_create_handler(TRUE),
+ m_is_sub_partitioned(is_sub_partitioned(m_part_info))
+
+{
+ DBUG_ENTER("ha_partition::ha_partition(part_info)");
+ init_handler_variables();
+ DBUG_ASSERT(m_part_info);
+ DBUG_VOID_RETURN;
+}
+
+
+void ha_partition::init_handler_variables()
+{
+ active_index= MAX_KEY;
+ m_file_buffer= NULL;
+ m_name_buffer_ptr= NULL;
+ m_engine_array= NULL;
+ m_file= NULL;
+ m_tot_parts= 0;
+ m_has_transactions= 0;
+ m_pkey_is_clustered= 0;
+ m_lock_type= F_UNLCK;
+ m_part_spec.start_part= NO_CURRENT_PART_ID;
+ m_scan_value= 2;
+ m_ref_length= 0;
+ m_part_spec.end_part= NO_CURRENT_PART_ID;
+ m_index_scan_type= partition_no_index_scan;
+ m_start_key.key= NULL;
+ m_start_key.length= 0;
+ m_myisam= FALSE;
+ m_innodb= FALSE;
+ m_extra_cache= FALSE;
+ m_extra_cache_size= 0;
+ m_table_flags= HA_FILE_BASED | HA_REC_NOT_IN_SEQ;
+ m_low_byte_first= 1;
+ m_part_field_array= NULL;
+ m_ordered_rec_buffer= NULL;
+ m_top_entry= NO_CURRENT_PART_ID;
+ m_rec_length= 0;
+ m_last_part= 0;
+ m_rec0= 0;
+ m_curr_key_info= 0;
+
+#ifdef DONT_HAVE_TO_BE_INITALIZED
+ m_start_key.flag= 0;
+ m_ordered= TRUE;
+#endif
+}
+
+
+ha_partition::~ha_partition()
+{
+ DBUG_ENTER("ha_partition::~ha_partition()");
+ if (m_file != NULL)
+ {
+ uint i;
+ for (i= 0; i < m_tot_parts; i++)
+ delete m_file[i];
+ }
+ my_free((char*) m_ordered_rec_buffer, MYF(MY_ALLOW_ZERO_PTR));
+
+ clear_handler_file();
+ DBUG_VOID_RETURN;
+}
+
+
+/*
+ The partition handler is only a layer on top of other engines. Thus it
+ can't really perform anything without the underlying handlers. Thus we
+ add this method as part of the allocation of a handler object.
+
+ 1) Allocation of underlying handlers
+ If we have access to the partition info we will allocate one handler
+ instance for each partition.
+ 2) Allocation without partition info
+ The cases where we don't have access to this information is when called
+ in preparation for delete_table and rename_table and in that case we
+ only need to set HA_FILE_BASED. In that case we will use the .par file
+ that contains information about the partitions and their engines and
+ the names of each partition.
+ 3) Table flags initialisation
+ We need also to set table flags for the partition handler. This is not
+ static since it depends on what storage engines are used as underlying
+ handlers.
+ The table flags is set in this routine to simulate the behaviour of a
+ normal storage engine
+ The flag HA_FILE_BASED will be set independent of the underlying handlers
+ 4) Index flags initialisation
+ When knowledge exists on the indexes it is also possible to initialise the
+ index flags. Again the index flags must be initialised by using the under-
+ lying handlers since this is storage engine dependent.
+ The flag HA_READ_ORDER will be reset for the time being to indicate no
+ ordered output is available from partition handler indexes. Later a merge
+ sort will be performed using the underlying handlers.
+ 5) primary_key_is_clustered, has_transactions and low_byte_first is
+ calculated here.
+*/
+
+int ha_partition::ha_initialise()
+{
+ handler **file_array, *file;
+ DBUG_ENTER("ha_partition::set_up_constants");
+
+ if (m_part_info)
+ {
+ m_tot_parts= get_tot_partitions(m_part_info);
+ DBUG_ASSERT(m_tot_parts > 0);
+ if (m_create_handler)
+ {
+ if (new_handlers_from_part_info())
+ DBUG_RETURN(1);
+ }
+ else if (get_from_handler_file(table->s->path))
+ {
+ my_error(ER_OUTOFMEMORY, MYF(0), 129); //Temporary fix TODO print_error
+ DBUG_RETURN(1);
+ }
+ /*
+ We create all underlying table handlers here. We only do it if we have
+ access to the partition info. We do it in this special method to be
+ able to report allocation errors.
+ */
+ /*
+ Set up table_flags, low_byte_first, primary_key_is_clustered and
+ has_transactions since they are called often in all kinds of places,
+ other parameters are calculated on demand.
+ HA_FILE_BASED is always set for partition handler since we use a
+ special file for handling names of partitions, engine types.
+ HA_CAN_GEOMETRY, HA_CAN_FULLTEXT, HA_CAN_SQL_HANDLER,
+ HA_CAN_INSERT_DELAYED is disabled until further investigated.
+ */
+ m_table_flags= m_file[0]->table_flags();
+ m_low_byte_first= m_file[0]->low_byte_first();
+ m_has_transactions= TRUE;
+ m_pkey_is_clustered= TRUE;
+ file_array= m_file;
+ do
+ {
+ file= *file_array;
+ if (m_low_byte_first != file->low_byte_first())
+ {
+ // Cannot have handlers with different endian
+ my_error(ER_MIX_HANDLER_ERROR, MYF(0));
+ DBUG_RETURN(1);
+ }
+ if (!file->has_transactions())
+ m_has_transactions= FALSE;
+ if (!file->primary_key_is_clustered())
+ m_pkey_is_clustered= FALSE;
+ m_table_flags&= file->table_flags();
+ } while (*(++file_array));
+ m_table_flags&= ~(HA_CAN_GEOMETRY & HA_CAN_FULLTEXT &
+ HA_CAN_SQL_HANDLER & HA_CAN_INSERT_DELAYED);
+ /*
+ TODO RONM:
+ Make sure that the tree works without partition defined, compiles
+ and goes through mysql-test-run.
+ */
+ }
+ m_table_flags|= HA_FILE_BASED | HA_REC_NOT_IN_SEQ;
+ DBUG_RETURN(0);
+}
+
+/****************************************************************************
+ MODULE meta data changes
+****************************************************************************/
+/*
+ This method is used to calculate the partition name, service routine to
+ the del_ren_cre_table method.
+*/
+
+static void create_partition_name(char *out, const char *in1, const char *in2)
+{
+ strxmov(out, in1, "_", in2, NullS);
+}
+
+/*
+ This method is used to calculate the partition name, service routine to
+ the del_ren_cre_table method.
+*/
+
+static void create_subpartition_name(char *out, const char *in1,
+ const char *in2, const char *in3)
+{
+ strxmov(out, in1, "_", in2, "_", in3, NullS);
+}
+
+
+/*
+ Used to delete a table. By the time delete_table() has been called all
+ opened references to this table will have been closed (and your globally
+ shared references released. The variable name will just be the name of
+ the table. You will need to remove any files you have created at this
+ point.
+
+ If you do not implement this, the default delete_table() is called from
+ handler.cc and it will delete all files with the file extentions returned
+ by bas_ext().
+
+ Called from handler.cc by delete_table and ha_create_table(). Only used
+ during create if the table_flag HA_DROP_BEFORE_CREATE was specified for
+ the storage engine.
+*/
+
+int ha_partition::delete_table(const char *name)
+{
+ int error;
+ DBUG_ENTER("ha_partition::delete_table");
+ if ((error= del_ren_cre_table(name, NULL, NULL, NULL)))
+ DBUG_RETURN(error);
+ DBUG_RETURN(handler::delete_table(name));
+}
+
+
+/*
+ Renames a table from one name to another from alter table call.
+
+ If you do not implement this, the default rename_table() is called from
+ handler.cc and it will delete all files with the file extentions returned
+ by bas_ext().
+
+ Called from sql_table.cc by mysql_rename_table().
+*/
+
+int ha_partition::rename_table(const char *from, const char *to)
+{
+ int error;
+ DBUG_ENTER("ha_partition::rename_table");
+ if ((error= del_ren_cre_table(from, to, NULL, NULL)))
+ DBUG_RETURN(error);
+ DBUG_RETURN(handler::rename_table(from, to));
+}
+
+
+/*
+ create_handler_files is called to create any handler specific files
+ before opening the file with openfrm to later call ::create on the
+ file object.
+ In the partition handler this is used to store the names of partitions
+ and types of engines in the partitions.
+*/
+
+int ha_partition::create_handler_files(const char *name)
+{
+ DBUG_ENTER("ha_partition::create_handler_files()");
+
+ /*
+ We need to update total number of parts since we might write the handler
+ file as part of a partition management command
+ */
+ m_tot_parts= get_tot_partitions(m_part_info);
+ if (create_handler_file(name))
+ {
+ my_error(ER_CANT_CREATE_HANDLER_FILE, MYF(0));
+ DBUG_RETURN(1);
+ }
+ DBUG_RETURN(0);
+}
+
+
+/*
+ create() is called to create a table. The variable name will have the name
+ of the table. When create() is called you do not need to worry about
+ opening the table. Also, the FRM file will have already been created so
+ adjusting create_info will not do you any good. You can overwrite the frm
+ file at this point if you wish to change the table definition, but there
+ are no methods currently provided for doing that.
+
+ Called from handle.cc by ha_create_table().
+*/
+
+int ha_partition::create(const char *name, TABLE *table_arg,
+ HA_CREATE_INFO *create_info)
+{
+ char t_name[FN_REFLEN];
+ DBUG_ENTER("ha_partition::create");
+
+ strmov(t_name, name);
+ *fn_ext(t_name)= 0;
+ if (del_ren_cre_table(t_name, NULL, table_arg, create_info))
+ {
+ handler::delete_table(t_name);
+ DBUG_RETURN(1);
+ }
+ DBUG_RETURN(0);
+}
+
+int ha_partition::drop_partitions(const char *path)
+{
+ List_iterator<partition_element> part_it(m_part_info->partitions);
+ char part_name_buff[FN_REFLEN];
+ uint no_parts= m_part_info->no_parts;
+ uint no_subparts= m_part_info->no_subparts, i= 0;
+ int error= 1;
+ DBUG_ENTER("ha_partition::drop_partitions()");
+
+ do
+ {
+ partition_element *part_elem= part_it++;
+ if (part_elem->part_state == PART_IS_DROPPED)
+ {
+ /*
+ This part is to be dropped, meaning the part or all its subparts.
+ */
+ if (is_sub_partitioned(m_part_info))
+ {
+ List_iterator<partition_element> sub_it(part_elem->subpartitions);
+ uint j= 0, part;
+ do
+ {
+ partition_element *sub_elem= sub_it++;
+ create_subpartition_name(part_name_buff, path,
+ part_elem->partition_name,
+ sub_elem->partition_name);
+ part= i * no_subparts + j;
+ DBUG_PRINT("info", ("Drop subpartition %s", part_name_buff));
+ error= m_file[part]->delete_table((const char *) part_name_buff);
+ } while (++j < no_subparts);
+ }
+ else
+ {
+ create_partition_name(part_name_buff, path,
+ part_elem->partition_name);
+ DBUG_PRINT("info", ("Drop partition %s", part_name_buff));
+ error= m_file[i]->delete_table((const char *) part_name_buff);
+ }
+ }
+ } while (++i < no_parts);
+ DBUG_RETURN(error);
+}
+
+void ha_partition::update_create_info(HA_CREATE_INFO *create_info)
+{
+ return;
+}
+
+
+char *ha_partition::update_table_comment(const char *comment)
+{
+ return (char*) comment; // Nothing to change
+}
+
+
+
+/*
+ Common routine to handle delete_table and rename_table.
+ The routine uses the partition handler file to get the
+ names of the partition instances. Both these routines
+ are called after creating the handler without table
+ object and thus the file is needed to discover the
+ names of the partitions and the underlying storage engines.
+*/
+
+uint ha_partition::del_ren_cre_table(const char *from,
+ const char *to,
+ TABLE *table_arg,
+ HA_CREATE_INFO *create_info)
+{
+ int save_error= 0, error;
+ char from_buff[FN_REFLEN], to_buff[FN_REFLEN];
+ char *name_buffer_ptr;
+ uint i;
+ handler **file;
+ DBUG_ENTER("del_ren_cre_table()");
+
+ if (get_from_handler_file(from))
+ DBUG_RETURN(TRUE);
+ DBUG_ASSERT(m_file_buffer);
+ name_buffer_ptr= m_name_buffer_ptr;
+ file= m_file;
+ i= 0;
+ do
+ {
+ create_partition_name(from_buff, from, name_buffer_ptr);
+ if (to != NULL)
+ { // Rename branch
+ create_partition_name(to_buff, to, name_buffer_ptr);
+ error= (*file)->rename_table((const char*) from_buff,
+ (const char*) to_buff);
+ }
+ else if (table_arg == NULL) // delete branch
+ error= (*file)->delete_table((const char*) from_buff);
+ else
+ {
+ set_up_table_before_create(table_arg, create_info, i);
+ error= (*file)->create(from_buff, table_arg, create_info);
+ }
+ name_buffer_ptr= strend(name_buffer_ptr) + 1;
+ if (error)
+ save_error= error;
+ i++;
+ } while (*(++file));
+ DBUG_RETURN(save_error);
+}
+
+
+partition_element *ha_partition::find_partition_element(uint part_id)
+{
+ uint i;
+ uint curr_part_id= 0;
+ List_iterator_fast < partition_element > part_it(m_part_info->partitions);
+
+ for (i= 0; i < m_part_info->no_parts; i++)
+ {
+ partition_element *part_elem;
+ part_elem= part_it++;
+ if (m_is_sub_partitioned)
+ {
+ uint j;
+ List_iterator_fast <partition_element> sub_it(part_elem->subpartitions);
+ for (j= 0; j < m_part_info->no_subparts; j++)
+ {
+ part_elem= sub_it++;
+ if (part_id == curr_part_id++)
+ return part_elem;
+ }
+ }
+ else if (part_id == curr_part_id++)
+ return part_elem;
+ }
+ DBUG_ASSERT(0);
+ current_thd->fatal_error(); // Abort
+ return NULL;
+}
+
+
+void ha_partition::set_up_table_before_create(TABLE *table,
+ HA_CREATE_INFO *info,
+ uint part_id)
+{
+ /*
+ Set up
+ 1) Comment on partition
+ 2) MAX_ROWS, MIN_ROWS on partition
+ 3) Index file name on partition
+ 4) Data file name on partition
+ */
+ partition_element *part_elem= find_partition_element(part_id);
+ if (!part_elem)
+ return; // Fatal error
+ table->s->max_rows= part_elem->part_max_rows;
+ table->s->min_rows= part_elem->part_min_rows;
+ info->index_file_name= part_elem->index_file_name;
+ info->data_file_name= part_elem->data_file_name;
+}
+
+
+/*
+ Routine used to add two names with '_' in between then. Service routine
+ to create_handler_file
+ Include the NULL in the count of characters since it is needed as separator
+ between the partition names.
+*/
+
+static uint name_add(char *dest, const char *first_name, const char *sec_name)
+{
+ return (uint) (strxmov(dest, first_name, "_", sec_name, NullS) -dest) + 1;
+}
+
+
+/*
+ Method used to create handler file with names of partitions, their
+ engine types and the number of partitions.
+*/
+
+bool ha_partition::create_handler_file(const char *name)
+{
+ partition_element *part_elem, *subpart_elem;
+ uint i, j, part_name_len, subpart_name_len;
+ uint tot_partition_words, tot_name_len;
+ uint tot_len_words, tot_len_byte, chksum, tot_name_words;
+ char *name_buffer_ptr;
+ uchar *file_buffer, *engine_array;
+ bool result= TRUE;
+ char file_name[FN_REFLEN];
+ File file;
+ List_iterator_fast < partition_element > part_it(m_part_info->partitions);
+ DBUG_ENTER("create_handler_file");
+
+ DBUG_PRINT("info", ("table name = %s", name));
+ tot_name_len= 0;
+ for (i= 0; i < m_part_info->no_parts; i++)
+ {
+ part_elem= part_it++;
+ part_name_len= strlen(part_elem->partition_name);
+ if (!m_is_sub_partitioned)
+ tot_name_len+= part_name_len + 1;
+ else
+ {
+ List_iterator_fast<partition_element> sub_it(part_elem->subpartitions);
+ for (j= 0; j < m_part_info->no_subparts; j++)
+ {
+ subpart_elem= sub_it++;
+ subpart_name_len= strlen(subpart_elem->partition_name);
+ tot_name_len+= part_name_len + subpart_name_len + 2;
+ }
+ }
+ }
+ /*
+ File format:
+ Length in words 4 byte
+ Checksum 4 byte
+ Total number of partitions 4 byte
+ Array of engine types n * 4 bytes where
+ n = (m_tot_parts + 3)/4
+ Length of name part in bytes 4 bytes
+ Name part m * 4 bytes where
+ m = ((length_name_part + 3)/4)*4
+
+ All padding bytes are zeroed
+ */
+ tot_partition_words= (m_tot_parts + 3) / 4;
+ tot_name_words= (tot_name_len + 3) / 4;
+ tot_len_words= 4 + tot_partition_words + tot_name_words;
+ tot_len_byte= 4 * tot_len_words;
+ if (!(file_buffer= (uchar *) my_malloc(tot_len_byte, MYF(MY_ZEROFILL))))
+ DBUG_RETURN(TRUE);
+ engine_array= (file_buffer + 12);
+ name_buffer_ptr= (char*) (file_buffer + ((4 + tot_partition_words) * 4));
+ part_it.rewind();
+ for (i= 0; i < m_part_info->no_parts; i++)
+ {
+ part_elem= part_it++;
+ if (!m_is_sub_partitioned)
+ {
+ name_buffer_ptr= strmov(name_buffer_ptr, part_elem->partition_name)+1;
+ *engine_array= (uchar) part_elem->engine_type;
+ DBUG_PRINT("info", ("engine: %u", *engine_array));
+ engine_array++;
+ }
+ else
+ {
+ List_iterator_fast<partition_element> sub_it(part_elem->subpartitions);
+ for (j= 0; j < m_part_info->no_subparts; j++)
+ {
+ subpart_elem= sub_it++;
+ name_buffer_ptr+= name_add(name_buffer_ptr,
+ part_elem->partition_name,
+ subpart_elem->partition_name);
+ *engine_array= (uchar) part_elem->engine_type;
+ engine_array++;
+ }
+ }
+ }
+ chksum= 0;
+ int4store(file_buffer, tot_len_words);
+ int4store(file_buffer + 8, m_tot_parts);
+ int4store(file_buffer + 12 + (tot_partition_words * 4), tot_name_len);
+ for (i= 0; i < tot_len_words; i++)
+ chksum^= uint4korr(file_buffer + 4 * i);
+ int4store(file_buffer + 4, chksum);
+ /*
+ Remove .frm extension and replace with .par
+ Create and write and close file
+ to be used at open, delete_table and rename_table
+ */
+ fn_format(file_name, name, "", ".par", MYF(MY_REPLACE_EXT));
+ if ((file= my_create(file_name, CREATE_MODE, O_RDWR | O_TRUNC,
+ MYF(MY_WME))) >= 0)
+ {
+ result= my_write(file, (byte *) file_buffer, tot_len_byte,
+ MYF(MY_WME | MY_NABP));
+ VOID(my_close(file, MYF(0)));
+ }
+ else
+ result= TRUE;
+ my_free((char*) file_buffer, MYF(0));
+ DBUG_RETURN(result);
+}
+
+
+void ha_partition::clear_handler_file()
+{
+ my_free((char*) m_file_buffer, MYF(MY_ALLOW_ZERO_PTR));
+ m_file_buffer= NULL;
+ m_name_buffer_ptr= NULL;
+ m_engine_array= NULL;
+}
+
+
+bool ha_partition::create_handlers()
+{
+ uint i;
+ uint alloc_len= (m_tot_parts + 1) * sizeof(handler*);
+ DBUG_ENTER("create_handlers");
+
+ if (!(m_file= (handler **) sql_alloc(alloc_len)))
+ DBUG_RETURN(TRUE);
+ bzero(m_file, alloc_len);
+ for (i= 0; i < m_tot_parts; i++)
+ {
+ if (!(m_file[i]= get_new_handler(table, (enum db_type) m_engine_array[i])))
+ DBUG_RETURN(TRUE);
+ DBUG_PRINT("info", ("engine_type: %u", m_engine_array[i]));
+ }
+ m_file[m_tot_parts]= 0;
+ /* For the moment we only support partition over the same table engine */
+ if (m_engine_array[0] == (uchar) DB_TYPE_MYISAM)
+ {
+ DBUG_PRINT("info", ("MyISAM"));
+ m_myisam= TRUE;
+ }
+ else if (m_engine_array[0] == (uchar) DB_TYPE_INNODB)
+ {
+ DBUG_PRINT("info", ("InnoDB"));
+ m_innodb= TRUE;
+ }
+ DBUG_RETURN(FALSE);
+}
+
+
+bool ha_partition::new_handlers_from_part_info()
+{
+ uint i, j;
+ partition_element *part_elem;
+ uint alloc_len= (m_tot_parts + 1) * sizeof(handler*);
+ List_iterator_fast <partition_element> part_it(m_part_info->partitions);
+ DBUG_ENTER("ha_partition::new_handlers_from_part_info");
+
+ if (!(m_file= (handler **) sql_alloc(alloc_len)))
+ goto error;
+ bzero(m_file, alloc_len);
+ DBUG_ASSERT(m_part_info->no_parts > 0);
+
+ i= 0;
+ /*
+ Don't know the size of the underlying storage engine, invent a number of
+ bytes allocated for error message if allocation fails
+ */
+ alloc_len= 128;
+ do
+ {
+ part_elem= part_it++;
+ if (!(m_file[i]= get_new_handler(table, part_elem->engine_type)))
+ goto error;
+ DBUG_PRINT("info", ("engine_type: %u", (uint) part_elem->engine_type));
+ if (m_is_sub_partitioned)
+ {
+ for (j= 0; j < m_part_info->no_subparts; j++)
+ {
+ if (!(m_file[i]= get_new_handler(table, part_elem->engine_type)))
+ goto error;
+ DBUG_PRINT("info", ("engine_type: %u", (uint) part_elem->engine_type));
+ }
+ }
+ } while (++i < m_part_info->no_parts);
+ if (part_elem->engine_type == DB_TYPE_MYISAM)
+ {
+ DBUG_PRINT("info", ("MyISAM"));
+ m_myisam= TRUE;
+ }
+ DBUG_RETURN(FALSE);
+error:
+ my_error(ER_OUTOFMEMORY, MYF(0), alloc_len);
+ DBUG_RETURN(TRUE);
+}
+
+
+/*
+ Open handler file to get partition names, engine types and number of
+ partitions.
+*/
+
+bool ha_partition::get_from_handler_file(const char *name)
+{
+ char buff[FN_REFLEN], *address_tot_name_len;
+ File file;
+ char *file_buffer, *name_buffer_ptr;
+ uchar *engine_array;
+ uint i, len_bytes, len_words, tot_partition_words, tot_name_words, chksum;
+ DBUG_ENTER("ha_partition::get_from_handler_file");
+ DBUG_PRINT("enter", ("table name: '%s'", name));
+
+ if (m_file_buffer)
+ DBUG_RETURN(FALSE);
+ fn_format(buff, name, "", ha_par_ext, MYF(0));
+
+ /* Following could be done with my_stat to read in whole file */
+ if ((file= my_open(buff, O_RDONLY | O_SHARE, MYF(0))) < 0)
+ DBUG_RETURN(TRUE);
+ if (my_read(file, (byte *) & buff[0], 8, MYF(MY_NABP)))
+ goto err1;
+ len_words= uint4korr(buff);
+ len_bytes= 4 * len_words;
+ if (!(file_buffer= my_malloc(len_bytes, MYF(0))))
+ goto err1;
+ VOID(my_seek(file, 0, MY_SEEK_SET, MYF(0)));
+ if (my_read(file, (byte *) file_buffer, len_bytes, MYF(MY_NABP)))
+ goto err2;
+
+ chksum= 0;
+ for (i= 0; i < len_words; i++)
+ chksum ^= uint4korr((file_buffer) + 4 * i);
+ if (chksum)
+ goto err2;
+ m_tot_parts= uint4korr((file_buffer) + 8);
+ tot_partition_words= (m_tot_parts + 3) / 4;
+ engine_array= (uchar *) ((file_buffer) + 12);
+ address_tot_name_len= file_buffer + 12 + 4 * tot_partition_words;
+ tot_name_words= (uint4korr(address_tot_name_len) + 3) / 4;
+ if (len_words != (tot_partition_words + tot_name_words + 4))
+ goto err2;
+ name_buffer_ptr= file_buffer + 16 + 4 * tot_partition_words;
+ VOID(my_close(file, MYF(0)));
+ m_file_buffer= file_buffer; // Will be freed in clear_handler_file()
+ m_name_buffer_ptr= name_buffer_ptr;
+ m_engine_array= engine_array;
+ if (!m_file && create_handlers())
+ {
+ clear_handler_file();
+ DBUG_RETURN(TRUE);
+ }
+ DBUG_RETURN(FALSE);
+
+err2:
+ my_free(file_buffer, MYF(0));
+err1:
+ VOID(my_close(file, MYF(0)));
+ DBUG_RETURN(TRUE);
+}
+
+/****************************************************************************
+ MODULE open/close object
+****************************************************************************/
+/*
+ Used for opening tables. The name will be the name of the file.
+ A table is opened when it needs to be opened. For instance
+ when a request comes in for a select on the table (tables are not
+ open and closed for each request, they are cached).
+
+ Called from handler.cc by handler::ha_open(). The server opens all tables
+ by calling ha_open() which then calls the handler specific open().
+*/
+
+int ha_partition::open(const char *name, int mode, uint test_if_locked)
+{
+ int error;
+ char name_buff[FN_REFLEN];
+ char *name_buffer_ptr= m_name_buffer_ptr;
+ handler **file;
+ uint alloc_len;
+ DBUG_ENTER("ha_partition::open");
+
+ ref_length= 0;
+ m_part_field_array= m_part_info->full_part_field_array;
+ if (get_from_handler_file(name))
+ DBUG_RETURN(1);
+ m_start_key.length= 0;
+ m_rec0= table->record[0];
+ m_rec_length= table->s->reclength;
+ alloc_len= m_tot_parts * (m_rec_length + PARTITION_BYTES_IN_POS);
+ alloc_len+= table->s->max_key_length;
+ if (!m_ordered_rec_buffer)
+ {
+ if (!(m_ordered_rec_buffer= my_malloc(alloc_len, MYF(MY_WME))))
+ {
+ DBUG_RETURN(1);
+ }
+ {
+ /*
+ We set-up one record per partition and each record has 2 bytes in
+ front where the partition id is written. This is used by ordered
+ index_read.
+ We also set-up a reference to the first record for temporary use in
+ setting up the scan.
+ */
+ char *ptr= m_ordered_rec_buffer;
+ uint i= 0;
+ do
+ {
+ int2store(ptr, i);
+ ptr+= m_rec_length + PARTITION_BYTES_IN_POS;
+ } while (++i < m_tot_parts);
+ m_start_key.key= ptr;
+ }
+ }
+ file= m_file;
+ do
+ {
+ create_partition_name(name_buff, name, name_buffer_ptr);
+ if ((error= (*file)->ha_open((const char*) name_buff, mode,
+ test_if_locked)))
+ goto err_handler;
+ name_buffer_ptr+= strlen(name_buffer_ptr) + 1;
+ set_if_bigger(ref_length, ((*file)->ref_length));
+ } while (*(++file));
+ /*
+ Add 2 bytes for partition id in position ref length.
+ ref_length=max_in_all_partitions(ref_length) + PARTITION_BYTES_IN_POS
+ */
+ ref_length+= PARTITION_BYTES_IN_POS;
+ m_ref_length= ref_length;
+ /*
+ Release buffer read from .par file. It will not be reused again after
+ being opened once.
+ */
+ clear_handler_file();
+ /*
+ Initialise priority queue, initialised to reading forward.
+ */
+ if ((error= init_queue(&queue, m_tot_parts, (uint) PARTITION_BYTES_IN_POS,
+ 0, key_rec_cmp, (void*)this)))
+ goto err_handler;
+ /*
+ Some handlers update statistics as part of the open call. This will in
+ some cases corrupt the statistics of the partition handler and thus
+ to ensure we have correct statistics we call info from open after
+ calling open on all individual handlers.
+ */
+ info(HA_STATUS_VARIABLE | HA_STATUS_CONST);
+ DBUG_RETURN(0);
+
+err_handler:
+ while (file-- != m_file)
+ (*file)->close();
+ DBUG_RETURN(error);
+}
+
+/*
+ Closes a table. We call the free_share() function to free any resources
+ that we have allocated in the "shared" structure.
+
+ Called from sql_base.cc, sql_select.cc, and table.cc.
+ In sql_select.cc it is only used to close up temporary tables or during
+ the process where a temporary table is converted over to being a
+ myisam table.
+ For sql_base.cc look at close_data_tables().
+*/
+
+int ha_partition::close(void)
+{
+ handler **file;
+ DBUG_ENTER("ha_partition::close");
+ file= m_file;
+ do
+ {
+ (*file)->close();
+ } while (*(++file));
+ DBUG_RETURN(0);
+}
+
+
+/****************************************************************************
+ MODULE start/end statement
+****************************************************************************/
+/*
+ A number of methods to define various constants for the handler. In
+ the case of the partition handler we need to use some max and min
+ of the underlying handlers in most cases.
+*/
+
+/*
+ First you should go read the section "locking functions for mysql" in
+ lock.cc to understand this.
+ This create a lock on the table. If you are implementing a storage engine
+ that can handle transactions look at ha_berkely.cc to see how you will
+ want to goo about doing this. Otherwise you should consider calling
+ flock() here.
+ Originally this method was used to set locks on file level to enable
+ several MySQL Servers to work on the same data. For transactional
+ engines it has been "abused" to also mean start and end of statements
+ to enable proper rollback of statements and transactions. When LOCK
+ TABLES has been issued the start_stmt method takes over the role of
+ indicating start of statement but in this case there is no end of
+ statement indicator(?).
+
+ Called from lock.cc by lock_external() and unlock_external(). Also called
+ from sql_table.cc by copy_data_between_tables().
+*/
+
+int ha_partition::external_lock(THD *thd, int lock_type)
+{
+ uint error;
+ handler **file;
+ DBUG_ENTER("ha_partition::external_lock");
+ file= m_file;
+ do
+ {
+ if ((error= (*file)->external_lock(thd, lock_type)))
+ {
+ if (lock_type != F_UNLCK)
+ goto err_handler;
+ }
+ } while (*(++file));
+ m_lock_type= lock_type; // For the future (2009?)
+ DBUG_RETURN(0);
+
+err_handler:
+ while (file-- != m_file)
+ (*file)->external_lock(thd, F_UNLCK);
+ DBUG_RETURN(error);
+}
+
+
+/*
+ The idea with handler::store_lock() is the following:
+
+ The statement decided which locks we should need for the table
+ for updates/deletes/inserts we get WRITE locks, for SELECT... we get
+ read locks.
+
+ Before adding the lock into the table lock handler (see thr_lock.c)
+ mysqld calls store lock with the requested locks. Store lock can now
+ modify a write lock to a read lock (or some other lock), ignore the
+ lock (if we don't want to use MySQL table locks at all) or add locks
+ for many tables (like we do when we are using a MERGE handler).
+
+ Berkeley DB for partition changes all WRITE locks to TL_WRITE_ALLOW_WRITE
+ (which signals that we are doing WRITES, but we are still allowing other
+ reader's and writer's.
+
+ When releasing locks, store_lock() are also called. In this case one
+ usually doesn't have to do anything.
+
+ store_lock is called when holding a global mutex to ensure that only
+ one thread at a time changes the locking information of tables.
+
+ In some exceptional cases MySQL may send a request for a TL_IGNORE;
+ This means that we are requesting the same lock as last time and this
+ should also be ignored. (This may happen when someone does a flush
+ table when we have opened a part of the tables, in which case mysqld
+ closes and reopens the tables and tries to get the same locks at last
+ time). In the future we will probably try to remove this.
+
+ Called from lock.cc by get_lock_data().
+*/
+
+THR_LOCK_DATA **ha_partition::store_lock(THD *thd,
+ THR_LOCK_DATA **to,
+ enum thr_lock_type lock_type)
+{
+ handler **file;
+ DBUG_ENTER("ha_partition::store_lock");
+ file= m_file;
+ do
+ {
+ to= (*file)->store_lock(thd, to, lock_type);
+ } while (*(++file));
+ DBUG_RETURN(to);
+}
+
+
+int ha_partition::start_stmt(THD *thd)
+{
+ int error= 0;
+ handler **file;
+ DBUG_ENTER("ha_partition::start_stmt");
+ file= m_file;
+ do
+ {
+ if ((error= (*file)->start_stmt(thd)))
+ break;
+ } while (*(++file));
+ DBUG_RETURN(error);
+}
+
+
+/*
+ Returns the number of store locks needed in call to store lock.
+ We return number of partitions since we call store_lock on each
+ underlying handler. Assists the above functions in allocating
+ sufficient space for lock structures.
+*/
+
+uint ha_partition::lock_count() const
+{
+ DBUG_ENTER("ha_partition::lock_count");
+ DBUG_RETURN(m_tot_parts);
+}
+
+
+/*
+ Record currently processed was not in the result set of the statement
+ and is thus unlocked. Used for UPDATE and DELETE queries.
+*/
+
+void ha_partition::unlock_row()
+{
+ m_file[m_last_part]->unlock_row();
+ return;
+}
+
+
+/****************************************************************************
+ MODULE change record
+****************************************************************************/
+
+/*
+ write_row() inserts a row. buf() is a byte array of data, normally record[0].
+
+ You can use the field information to extract the data from the native byte
+ array type.
+
+ Example of this would be:
+ for (Field **field=table->field ; *field ; field++)
+ {
+ ...
+ }
+
+ See ha_tina.cc for an partition of extracting all of the data as strings.
+ ha_berekly.cc has an partition of how to store it intact by "packing" it
+ for ha_berkeley's own native storage type.
+
+ See the note for update_row() on auto_increments and timestamps. This
+ case also applied to write_row().
+
+ Called from item_sum.cc, item_sum.cc, sql_acl.cc, sql_insert.cc,
+ sql_insert.cc, sql_select.cc, sql_table.cc, sql_udf.cc, and sql_update.cc.
+
+ ADDITIONAL INFO:
+
+ Most handlers set timestamp when calling write row if any such fields
+ exists. Since we are calling an underlying handler we assume the´
+ underlying handler will assume this responsibility.
+
+ Underlying handlers will also call update_auto_increment to calculate
+ the new auto increment value. We will catch the call to
+ get_auto_increment and ensure this increment value is maintained by
+ only one of the underlying handlers.
+*/
+
+int ha_partition::write_row(byte * buf)
+{
+ uint32 part_id;
+ int error;
+#ifdef NOT_NEEDED
+ byte *rec0= m_rec0;
+#endif
+ DBUG_ENTER("ha_partition::write_row");
+ DBUG_ASSERT(buf == m_rec0);
+
+#ifdef NOT_NEEDED
+ if (likely(buf == rec0))
+#endif
+ error= m_part_info->get_partition_id(m_part_info, &part_id);
+#ifdef NOT_NEEDED
+ else
+ {
+ set_field_ptr(m_part_field_array, buf, rec0);
+ error= m_part_info->get_partition_id(m_part_info, &part_id);
+ set_field_ptr(m_part_field_array, rec0, buf);
+ }
+#endif
+ if (unlikely(error))
+ DBUG_RETURN(error);
+ m_last_part= part_id;
+ DBUG_PRINT("info", ("Insert in partition %d", part_id));
+ DBUG_RETURN(m_file[part_id]->write_row(buf));
+}
+
+
+/*
+ Yes, update_row() does what you expect, it updates a row. old_data will
+ have the previous row record in it, while new_data will have the newest
+ data in it.
+ Keep in mind that the server can do updates based on ordering if an
+ ORDER BY clause was used. Consecutive ordering is not guarenteed.
+
+ Currently new_data will not have an updated auto_increament record, or
+ and updated timestamp field. You can do these for partition by doing these:
+ if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_UPDATE)
+ table->timestamp_field->set_time();
+ if (table->next_number_field && record == table->record[0])
+ update_auto_increment();
+
+ Called from sql_select.cc, sql_acl.cc, sql_update.cc, and sql_insert.cc.
+ new_data is always record[0]
+ old_data is normally record[1] but may be anything
+
+*/
+
+int ha_partition::update_row(const byte *old_data, byte *new_data)
+{
+ uint32 new_part_id, old_part_id;
+ int error;
+ DBUG_ENTER("ha_partition::update_row");
+
+ if ((error= get_parts_for_update(old_data, new_data, table->record[0],
+ m_part_info, &old_part_id, &new_part_id)))
+ {
+ DBUG_RETURN(error);
+ }
+
+ /*
+ TODO:
+ set_internal_auto_increment=
+ max(set_internal_auto_increment, new_data->auto_increment)
+ */
+ m_last_part= new_part_id;
+ if (new_part_id == old_part_id)
+ {
+ DBUG_PRINT("info", ("Update in partition %d", new_part_id));
+ DBUG_RETURN(m_file[new_part_id]->update_row(old_data, new_data));
+ }
+ else
+ {
+ DBUG_PRINT("info", ("Update from partition %d to partition %d",
+ old_part_id, new_part_id));
+ if ((error= m_file[new_part_id]->write_row(new_data)))
+ DBUG_RETURN(error);
+ if ((error= m_file[old_part_id]->delete_row(old_data)))
+ {
+#ifdef IN_THE_FUTURE
+ (void) m_file[new_part_id]->delete_last_inserted_row(new_data);
+#endif
+ DBUG_RETURN(error);
+ }
+ }
+ DBUG_RETURN(0);
+}
+
+
+/*
+ This will delete a row. buf will contain a copy of the row to be deleted.
+ The server will call this right after the current row has been read
+ (from either a previous rnd_xxx() or index_xxx() call).
+ If you keep a pointer to the last row or can access a primary key it will
+ make doing the deletion quite a bit easier.
+ Keep in mind that the server does no guarentee consecutive deletions.
+ ORDER BY clauses can be used.
+
+ Called in sql_acl.cc and sql_udf.cc to manage internal table information.
+ Called in sql_delete.cc, sql_insert.cc, and sql_select.cc. In sql_select
+ it is used for removing duplicates while in insert it is used for REPLACE
+ calls.
+
+ buf is either record[0] or record[1]
+
+*/
+
+int ha_partition::delete_row(const byte *buf)
+{
+ uint32 part_id;
+ int error;
+ DBUG_ENTER("ha_partition::delete_row");
+
+ if ((error= get_part_for_delete(buf, m_rec0, m_part_info, &part_id)))
+ {
+ DBUG_RETURN(error);
+ }
+ m_last_part= part_id;
+ DBUG_RETURN(m_file[part_id]->delete_row(buf));
+}
+
+
+/*
+ Used to delete all rows in a table. Both for cases of truncate and
+ for cases where the optimizer realizes that all rows will be
+ removed as a result of a SQL statement.
+
+ Called from item_sum.cc by Item_func_group_concat::clear(),
+ Item_sum_count_distinct::clear(), and Item_func_group_concat::clear().
+ Called from sql_delete.cc by mysql_delete().
+ Called from sql_select.cc by JOIN::reinit().
+ Called from sql_union.cc by st_select_lex_unit::exec().
+*/
+
+int ha_partition::delete_all_rows()
+{
+ int error;
+ handler **file;
+ DBUG_ENTER("ha_partition::delete_all_rows");
+ file= m_file;
+ do
+ {
+ if ((error= (*file)->delete_all_rows()))
+ DBUG_RETURN(error);
+ } while (*(++file));
+ DBUG_RETURN(0);
+}
+
+/*
+ rows == 0 means we will probably insert many rows
+*/
+
+void ha_partition::start_bulk_insert(ha_rows rows)
+{
+ handler **file;
+ DBUG_ENTER("ha_partition::start_bulk_insert");
+ if (!rows)
+ {
+ /* Avoid allocation big caches in all underlaying handlers */
+ DBUG_VOID_RETURN;
+ }
+ rows= rows/m_tot_parts + 1;
+ file= m_file;
+ do
+ {
+ (*file)->start_bulk_insert(rows);
+ } while (*(++file));
+ DBUG_VOID_RETURN;
+}
+
+
+int ha_partition::end_bulk_insert()
+{
+ int error= 0;
+ handler **file;
+ DBUG_ENTER("ha_partition::end_bulk_insert");
+
+ file= m_file;
+ do
+ {
+ int tmp;
+ /* We want to execute end_bulk_insert() on all handlers */
+ if ((tmp= (*file)->end_bulk_insert()))
+ error= tmp;
+ } while (*(++file));
+ DBUG_RETURN(error);
+}
+
+/****************************************************************************
+ MODULE full table scan
+****************************************************************************/
+/*
+ Initialize engine for random reads
+
+ SYNOPSIS
+ ha_partition::rnd_init()
+ scan 0 Initialize for random reads through rnd_pos()
+ 1 Initialize for random scan through rnd_next()
+
+ NOTES
+ rnd_init() is called when the server wants the storage engine to do a
+ table scan or when the server wants to access data through rnd_pos.
+
+ When scan is used we will scan one handler partition at a time.
+ When preparing for rnd_pos we will init all handler partitions.
+ No extra cache handling is needed when scannning is not performed.
+
+ Before initialising we will call rnd_end to ensure that we clean up from
+ any previous incarnation of a table scan.
+ Called from filesort.cc, records.cc, sql_handler.cc, sql_select.cc,
+ sql_table.cc, and sql_update.cc.
+*/
+
+int ha_partition::rnd_init(bool scan)
+{
+ int error;
+ handler **file;
+ DBUG_ENTER("ha_partition::rnd_init");
+
+ include_partition_fields_in_used_fields();
+ if (scan)
+ {
+ /*
+ rnd_end() is needed for partitioning to reset internal data if scan
+ is already in use
+ */
+
+ rnd_end();
+ if (partition_scan_set_up(rec_buf(0), FALSE))
+ {
+ /*
+ The set of partitions to scan is empty. We return success and return
+ end of file on first rnd_next.
+ */
+ DBUG_RETURN(0);
+ }
+ /*
+ We will use the partition set in our scan, using the start and stop
+ partition and checking each scan before start dependent on bittfields.
+ */
+ late_extra_cache(m_part_spec.start_part);
+ DBUG_PRINT("info", ("rnd_init on partition %d",m_part_spec.start_part));
+ error= m_file[m_part_spec.start_part]->ha_rnd_init(1);
+ m_scan_value= 1; // Scan active
+ if (error)
+ m_scan_value= 2; // No scan active
+ DBUG_RETURN(error);
+ }
+ file= m_file;
+ do
+ {
+ if ((error= (*file)->ha_rnd_init(0)))
+ goto err;
+ } while (*(++file));
+ m_scan_value= 0;
+ DBUG_RETURN(0);
+
+err:
+ while (file--)
+ (*file)->ha_rnd_end();
+ DBUG_RETURN(error);
+}
+
+
+int ha_partition::rnd_end()
+{
+ handler **file;
+ DBUG_ENTER("ha_partition::rnd_end");
+ switch (m_scan_value) {
+ case 2: // Error
+ break;
+ case 1: // Table scan
+ if (m_part_spec.start_part != NO_CURRENT_PART_ID)
+ {
+ late_extra_no_cache(m_part_spec.start_part);
+ m_file[m_part_spec.start_part]->ha_rnd_end();
+ }
+ break;
+ case 0:
+ file= m_file;
+ do
+ {
+ (*file)->ha_rnd_end();
+ } while (*(++file));
+ break;
+ }
+ m_part_spec.start_part= NO_CURRENT_PART_ID;
+ m_scan_value= 2;
+ DBUG_RETURN(0);
+}
+
+
+/*
+ read next row during full table scan (scan in random row order)
+
+ SYNOPSIS
+ rnd_next()
+ buf buffer that should be filled with data
+
+ This is called for each row of the table scan. When you run out of records
+ you should return HA_ERR_END_OF_FILE.
+ The Field structure for the table is the key to getting data into buf
+ in a manner that will allow the server to understand it.
+
+ Called from filesort.cc, records.cc, sql_handler.cc, sql_select.cc,
+ sql_table.cc, and sql_update.cc.
+*/
+
+int ha_partition::rnd_next(byte *buf)
+{
+ DBUG_ASSERT(m_scan_value);
+ uint part_id= m_part_spec.start_part; // Cache of this variable
+ handler *file= m_file[part_id];
+ int result= HA_ERR_END_OF_FILE;
+ DBUG_ENTER("ha_partition::rnd_next");
+
+ DBUG_ASSERT(m_scan_value == 1);
+
+ if (part_id > m_part_spec.end_part)
+ {
+ /*
+ The original set of partitions to scan was empty and thus we report
+ the result here.
+ */
+ goto end;
+ }
+ while (TRUE)
+ {
+ if ((result= file->rnd_next(buf)))
+ {
+ if (result == HA_ERR_RECORD_DELETED)
+ continue; // Probably MyISAM
+
+ if (result != HA_ERR_END_OF_FILE)
+ break; // Return error
+
+ /* End current partition */
+ late_extra_no_cache(part_id);
+ DBUG_PRINT("info", ("rnd_end on partition %d", part_id));
+ if ((result= file->ha_rnd_end()))
+ break;
+ /* Shift to next partition */
+ if (++part_id > m_part_spec.end_part)
+ {
+ result= HA_ERR_END_OF_FILE;
+ break;
+ }
+ file= m_file[part_id];
+ DBUG_PRINT("info", ("rnd_init on partition %d", part_id));
+ if ((result= file->ha_rnd_init(1)))
+ break;
+ late_extra_cache(part_id);
+ }
+ else
+ {
+ m_part_spec.start_part= part_id;
+ m_last_part= part_id;
+ table->status= 0;
+ DBUG_RETURN(0);
+ }
+ }
+
+end:
+ m_part_spec.start_part= NO_CURRENT_PART_ID;
+ table->status= STATUS_NOT_FOUND;
+ DBUG_RETURN(result);
+}
+
+
+inline void store_part_id_in_pos(byte *pos, uint part_id)
+{
+ int2store(pos, part_id);
+}
+
+inline uint get_part_id_from_pos(const byte *pos)
+{
+ return uint2korr(pos);
+}
+
+/*
+ position() is called after each call to rnd_next() if the data needs
+ to be ordered. You can do something like the following to store
+ the position:
+ ha_store_ptr(ref, ref_length, current_position);
+
+ The server uses ref to store data. ref_length in the above case is
+ the size needed to store current_position. ref is just a byte array
+ that the server will maintain. If you are using offsets to mark rows, then
+ current_position should be the offset. If it is a primary key like in
+ BDB, then it needs to be a primary key.
+
+ Called from filesort.cc, sql_select.cc, sql_delete.cc and sql_update.cc.
+*/
+
+void ha_partition::position(const byte *record)
+{
+ handler *file= m_file[m_last_part];
+ DBUG_ENTER("ha_partition::position");
+ file->position(record);
+ store_part_id_in_pos(ref, m_last_part);
+ memcpy((ref + PARTITION_BYTES_IN_POS), file->ref,
+ (ref_length - PARTITION_BYTES_IN_POS));
+
+#ifdef SUPPORTING_PARTITION_OVER_DIFFERENT_ENGINES
+#ifdef HAVE_purify
+ bzero(ref + PARTITION_BYTES_IN_POS + ref_length, max_ref_length-ref_length);
+#endif /* HAVE_purify */
+#endif
+ DBUG_VOID_RETURN;
+}
+
+/*
+ This is like rnd_next, but you are given a position to use
+ to determine the row. The position will be of the type that you stored in
+ ref. You can use ha_get_ptr(pos,ref_length) to retrieve whatever key
+ or position you saved when position() was called.
+ Called from filesort.cc records.cc sql_insert.cc sql_select.cc
+ sql_update.cc.
+*/
+
+int ha_partition::rnd_pos(byte * buf, byte *pos)
+{
+ uint part_id;
+ handler *file;
+ DBUG_ENTER("ha_partition::rnd_pos");
+
+ part_id= get_part_id_from_pos((const byte *) pos);
+ DBUG_ASSERT(part_id < m_tot_parts);
+ file= m_file[part_id];
+ m_last_part= part_id;
+ DBUG_RETURN(file->rnd_pos(buf, (pos + PARTITION_BYTES_IN_POS)));
+}
+
+
+/****************************************************************************
+ MODULE index scan
+****************************************************************************/
+/*
+ Positions an index cursor to the index specified in the handle. Fetches the
+ row if available. If the key value is null, begin at the first key of the
+ index.
+
+ There are loads of optimisations possible here for the partition handler.
+ The same optimisations can also be checked for full table scan although
+ only through conditions and not from index ranges.
+ Phase one optimisations:
+ Check if the fields of the partition function are bound. If so only use
+ the single partition it becomes bound to.
+ Phase two optimisations:
+ If it can be deducted through range or list partitioning that only a
+ subset of the partitions are used, then only use those partitions.
+*/
+
+/*
+ index_init is always called before starting index scans (except when
+ starting through index_read_idx and using read_range variants).
+*/
+
+int ha_partition::index_init(uint inx, bool sorted)
+{
+ int error= 0;
+ handler **file;
+ DBUG_ENTER("ha_partition::index_init");
+
+ active_index= inx;
+ m_part_spec.start_part= NO_CURRENT_PART_ID;
+ m_start_key.length= 0;
+ m_ordered= sorted;
+ m_curr_key_info= table->key_info+inx;
+ include_partition_fields_in_used_fields();
+
+ file= m_file;
+ do
+ {
+ /* TODO RONM: Change to index_init() when code is stable */
+ if ((error= (*file)->ha_index_init(inx, sorted)))
+ {
+ DBUG_ASSERT(0); // Should never happen
+ break;
+ }
+ } while (*(++file));
+ DBUG_RETURN(error);
+}
+
+
+/*
+ index_end is called at the end of an index scan to clean up any
+ things needed to clean up.
+*/
+
+int ha_partition::index_end()
+{
+ int error= 0;
+ handler **file;
+ DBUG_ENTER("ha_partition::index_end");
+
+ active_index= MAX_KEY;
+ m_part_spec.start_part= NO_CURRENT_PART_ID;
+ file= m_file;
+ do
+ {
+ int tmp;
+ /* We want to execute index_end() on all handlers */
+ /* TODO RONM: Change to index_end() when code is stable */
+ if ((tmp= (*file)->ha_index_end()))
+ error= tmp;
+ } while (*(++file));
+ DBUG_RETURN(error);
+}
+
+
+/*
+ index_read starts a new index scan using a start key. The MySQL Server
+ will check the end key on its own. Thus to function properly the
+ partitioned handler need to ensure that it delivers records in the sort
+ order of the MySQL Server.
+ index_read can be restarted without calling index_end on the previous
+ index scan and without calling index_init. In this case the index_read
+ is on the same index as the previous index_scan. This is particularly
+ used in conjuntion with multi read ranges.
+*/
+
+int ha_partition::index_read(byte * buf, const byte * key,
+ uint key_len, enum ha_rkey_function find_flag)
+{
+ DBUG_ENTER("ha_partition::index_read");
+ end_range= 0;
+ DBUG_RETURN(common_index_read(buf, key, key_len, find_flag));
+}
+
+
+int ha_partition::common_index_read(byte *buf, const byte *key, uint key_len,
+ enum ha_rkey_function find_flag)
+{
+ int error;
+ DBUG_ENTER("ha_partition::common_index_read");
+
+ memcpy((void*)m_start_key.key, key, key_len);
+ m_start_key.length= key_len;
+ m_start_key.flag= find_flag;
+ m_index_scan_type= partition_index_read;
+
+ if ((error= partition_scan_set_up(buf, TRUE)))
+ {
+ DBUG_RETURN(error);
+ }
+
+ if (!m_ordered_scan_ongoing ||
+ (find_flag == HA_READ_KEY_EXACT &&
+ (key_len >= m_curr_key_info->key_length ||
+ key_len == 0)))
+ {
+ /*
+ We use unordered index scan either when read_range is used and flag
+ is set to not use ordered or when an exact key is used and in this
+ case all records will be sorted equal and thus the sort order of the
+ resulting records doesn't matter.
+ We also use an unordered index scan when the number of partitions to
+ scan is only one.
+ The unordered index scan will use the partition set created.
+ Need to set unordered scan ongoing since we can come here even when
+ it isn't set.
+ */
+ m_ordered_scan_ongoing= FALSE;
+ error= handle_unordered_scan_next_partition(buf);
+ }
+ else
+ {
+ /*
+ In all other cases we will use the ordered index scan. This will use
+ the partition set created by the get_partition_set method.
+ */
+ error= handle_ordered_index_scan(buf);
+ }
+ DBUG_RETURN(error);
+}
+
+
+/*
+ index_first() asks for the first key in the index.
+ This is similar to index_read except that there is no start key since
+ the scan starts from the leftmost entry and proceeds forward with
+ index_next.
+
+ Called from opt_range.cc, opt_sum.cc, sql_handler.cc,
+ and sql_select.cc.
+*/
+
+int ha_partition::index_first(byte * buf)
+{
+ DBUG_ENTER("ha_partition::index_first");
+ end_range= 0;
+ m_index_scan_type= partition_index_first;
+ DBUG_RETURN(common_first_last(buf));
+}
+
+
+/*
+ index_last() asks for the last key in the index.
+ This is similar to index_read except that there is no start key since
+ the scan starts from the rightmost entry and proceeds forward with
+ index_prev.
+
+ Called from opt_range.cc, opt_sum.cc, sql_handler.cc,
+ and sql_select.cc.
+*/
+
+int ha_partition::index_last(byte * buf)
+{
+ DBUG_ENTER("ha_partition::index_last");
+ m_index_scan_type= partition_index_last;
+ DBUG_RETURN(common_first_last(buf));
+}
+
+int ha_partition::common_first_last(byte *buf)
+{
+ int error;
+ if ((error= partition_scan_set_up(buf, FALSE)))
+ return error;
+ if (!m_ordered_scan_ongoing)
+ return handle_unordered_scan_next_partition(buf);
+ return handle_ordered_index_scan(buf);
+}
+
+/*
+ Positions an index cursor to the index specified in key. Fetches the
+ row if any. This is only used to read whole keys.
+ TODO: Optimise this code to avoid index_init and index_end
+*/
+
+int ha_partition::index_read_idx(byte * buf, uint index, const byte * key,
+ uint key_len,
+ enum ha_rkey_function find_flag)
+{
+ int res;
+ DBUG_ENTER("ha_partition::index_read_idx");
+ index_init(index, 0);
+ res= index_read(buf, key, key_len, find_flag);
+ index_end();
+ DBUG_RETURN(res);
+}
+
+/*
+ This is used in join_read_last_key to optimise away an ORDER BY.
+ Can only be used on indexes supporting HA_READ_ORDER
+*/
+
+int ha_partition::index_read_last(byte *buf, const byte *key, uint keylen)
+{
+ DBUG_ENTER("ha_partition::index_read_last");
+ m_ordered= TRUE; // Safety measure
+ DBUG_RETURN(index_read(buf, key, keylen, HA_READ_PREFIX_LAST));
+}
+
+
+/*
+ Used to read forward through the index.
+*/
+
+int ha_partition::index_next(byte * buf)
+{
+ DBUG_ENTER("ha_partition::index_next");
+ /*
+ TODO(low priority):
+ If we want partition to work with the HANDLER commands, we
+ must be able to do index_last() -> index_prev() -> index_next()
+ */
+ DBUG_ASSERT(m_index_scan_type != partition_index_last);
+ if (!m_ordered_scan_ongoing)
+ {
+ DBUG_RETURN(handle_unordered_next(buf, FALSE));
+ }
+ DBUG_RETURN(handle_ordered_next(buf, FALSE));
+}
+
+
+/*
+ This routine is used to read the next but only if the key is the same
+ as supplied in the call.
+*/
+
+int ha_partition::index_next_same(byte *buf, const byte *key, uint keylen)
+{
+ DBUG_ENTER("ha_partition::index_next_same");
+ DBUG_ASSERT(keylen == m_start_key.length);
+ DBUG_ASSERT(m_index_scan_type != partition_index_last);
+ if (!m_ordered_scan_ongoing)
+ DBUG_RETURN(handle_unordered_next(buf, TRUE));
+ DBUG_RETURN(handle_ordered_next(buf, TRUE));
+}
+
+/*
+ Used to read backwards through the index.
+*/
+
+int ha_partition::index_prev(byte * buf)
+{
+ DBUG_ENTER("ha_partition::index_prev");
+ /* TODO: read comment in index_next */
+ DBUG_ASSERT(m_index_scan_type != partition_index_first);
+ DBUG_RETURN(handle_ordered_prev(buf));
+}
+
+
+/*
+ We reimplement read_range_first since we don't want the compare_key
+ check at the end. This is already performed in the partition handler.
+ read_range_next is very much different due to that we need to scan
+ all underlying handlers.
+*/
+
+int ha_partition::read_range_first(const key_range *start_key,
+ const key_range *end_key,
+ bool eq_range_arg, bool sorted)
+{
+ int error;
+ DBUG_ENTER("ha_partition::read_range_first");
+ m_ordered= sorted;
+ eq_range= eq_range_arg;
+ end_range= 0;
+ if (end_key)
+ {
+ end_range= &save_end_range;
+ save_end_range= *end_key;
+ key_compare_result_on_equal=
+ ((end_key->flag == HA_READ_BEFORE_KEY) ? 1 :
+ (end_key->flag == HA_READ_AFTER_KEY) ? -1 : 0);
+ }
+ range_key_part= m_curr_key_info->key_part;
+
+ if (!start_key) // Read first record
+ {
+ m_index_scan_type= partition_index_first;
+ error= common_first_last(m_rec0);
+ }
+ else
+ {
+ error= common_index_read(m_rec0,
+ start_key->key,
+ start_key->length, start_key->flag);
+ }
+ DBUG_RETURN(error);
+}
+
+
+int ha_partition::read_range_next()
+{
+ DBUG_ENTER("ha_partition::read_range_next");
+ if (m_ordered)
+ {
+ DBUG_RETURN(handler::read_range_next());
+ }
+ DBUG_RETURN(handle_unordered_next(m_rec0, eq_range));
+}
+
+
+int ha_partition::partition_scan_set_up(byte * buf, bool idx_read_flag)
+{
+ DBUG_ENTER("ha_partition::partition_scan_set_up");
+
+ if (idx_read_flag)
+ get_partition_set(table,buf,active_index,&m_start_key,&m_part_spec);
+ else
+ get_partition_set(table, buf, MAX_KEY, 0, &m_part_spec);
+ if (m_part_spec.start_part > m_part_spec.end_part)
+ {
+ /*
+ We discovered a partition set but the set was empty so we report
+ key not found.
+ */
+ DBUG_PRINT("info", ("scan with no partition to scan"));
+ DBUG_RETURN(HA_ERR_END_OF_FILE);
+ }
+ if (m_part_spec.start_part == m_part_spec.end_part)
+ {
+ /*
+ We discovered a single partition to scan, this never needs to be
+ performed using the ordered index scan.
+ */
+ DBUG_PRINT("info", ("index scan using the single partition %d",
+ m_part_spec.start_part));
+ m_ordered_scan_ongoing= FALSE;
+ }
+ else
+ {
+ /*
+ Set m_ordered_scan_ongoing according how the scan should be done
+ */
+ m_ordered_scan_ongoing= m_ordered;
+ }
+ DBUG_ASSERT(m_part_spec.start_part < m_tot_parts &&
+ m_part_spec.end_part < m_tot_parts);
+ DBUG_RETURN(0);
+}
+
+
+/****************************************************************************
+ Unordered Index Scan Routines
+****************************************************************************/
+/*
+ These routines are used to scan partitions without considering order.
+ This is performed in two situations.
+ 1) In read_multi_range this is the normal case
+ 2) When performing any type of index_read, index_first, index_last where
+ all fields in the partition function is bound. In this case the index
+ scan is performed on only one partition and thus it isn't necessary to
+ perform any sort.
+*/
+
+int ha_partition::handle_unordered_next(byte *buf, bool next_same)
+{
+ handler *file= file= m_file[m_part_spec.start_part];
+ int error;
+ DBUG_ENTER("ha_partition::handle_unordered_next");
+
+ /*
+ We should consider if this should be split into two functions as
+ next_same is alwas a local constant
+ */
+ if (next_same)
+ {
+ if (!(error= file->index_next_same(buf, m_start_key.key,
+ m_start_key.length)))
+ {
+ m_last_part= m_part_spec.start_part;
+ DBUG_RETURN(0);
+ }
+ }
+ else if (!(error= file->index_next(buf)))
+ {
+ if (compare_key(end_range) <= 0)
+ {
+ m_last_part= m_part_spec.start_part;
+ DBUG_RETURN(0); // Row was in range
+ }
+ error= HA_ERR_END_OF_FILE;
+ }
+
+ if (error == HA_ERR_END_OF_FILE)
+ {
+ m_part_spec.start_part++; // Start using next part
+ error= handle_unordered_scan_next_partition(buf);
+ }
+ DBUG_RETURN(error);
+}
+
+
+/*
+ This routine is used to start the index scan on the next partition.
+ Both initial start and after completing scan on one partition.
+*/
+
+int ha_partition::handle_unordered_scan_next_partition(byte * buf)
+{
+ uint i;
+ DBUG_ENTER("ha_partition::handle_unordered_scan_next_partition");
+
+ for (i= m_part_spec.start_part; i <= m_part_spec.end_part; i++)
+ {
+ int error;
+ handler *file= m_file[i];
+
+ m_part_spec.start_part= i;
+ switch (m_index_scan_type) {
+ case partition_index_read:
+ DBUG_PRINT("info", ("index_read on partition %d", i));
+ error= file->index_read(buf, m_start_key.key,
+ m_start_key.length,
+ m_start_key.flag);
+ break;
+ case partition_index_first:
+ DBUG_PRINT("info", ("index_first on partition %d", i));
+ error= file->index_first(buf);
+ break;
+ default:
+ DBUG_ASSERT(FALSE);
+ DBUG_RETURN(1);
+ }
+ if (!error)
+ {
+ if (compare_key(end_range) <= 0)
+ {
+ m_last_part= i;
+ DBUG_RETURN(0);
+ }
+ error= HA_ERR_END_OF_FILE;
+ }
+ if ((error != HA_ERR_END_OF_FILE) && (error != HA_ERR_KEY_NOT_FOUND))
+ DBUG_RETURN(error);
+ DBUG_PRINT("info", ("HA_ERR_END_OF_FILE on partition %d", i));
+ }
+ m_part_spec.start_part= NO_CURRENT_PART_ID;
+ DBUG_RETURN(HA_ERR_END_OF_FILE);
+}
+
+
+/*
+ This part contains the logic to handle index scans that require ordered
+ output. This includes all except those started by read_range_first with
+ the flag ordered set to FALSE. Thus most direct index_read and all
+ index_first and index_last.
+
+ We implement ordering by keeping one record plus a key buffer for each
+ partition. Every time a new entry is requested we will fetch a new
+ entry from the partition that is currently not filled with an entry.
+ Then the entry is put into its proper sort position.
+
+ Returning a record is done by getting the top record, copying the
+ record to the request buffer and setting the partition as empty on
+ entries.
+*/
+
+int ha_partition::handle_ordered_index_scan(byte *buf)
+{
+ uint i, j= 0;
+ bool found= FALSE;
+ bool reverse_order= FALSE;
+ DBUG_ENTER("ha_partition::handle_ordered_index_scan");
+
+ m_top_entry= NO_CURRENT_PART_ID;
+ queue_remove_all(&queue);
+ for (i= m_part_spec.start_part; i <= m_part_spec.end_part; i++)
+ {
+ int error;
+ byte *rec_buf_ptr= rec_buf(i);
+ handler *file= m_file[i];
+
+ switch (m_index_scan_type) {
+ case partition_index_read:
+ error= file->index_read(rec_buf_ptr,
+ m_start_key.key,
+ m_start_key.length,
+ m_start_key.flag);
+ reverse_order= FALSE;
+ break;
+ case partition_index_first:
+ error= file->index_first(rec_buf_ptr);
+ reverse_order= FALSE;
+ break;
+ case partition_index_last:
+ error= file->index_last(rec_buf_ptr);
+ reverse_order= TRUE;
+ break;
+ default:
+ DBUG_ASSERT(FALSE);
+ DBUG_RETURN(HA_ERR_END_OF_FILE);
+ }
+ if (!error)
+ {
+ found= TRUE;
+ /*
+ Initialise queue without order first, simply insert
+ */
+ queue_element(&queue, j++)= (byte*)queue_buf(i);
+ }
+ else if (error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE)
+ {
+ DBUG_RETURN(error);
+ }
+ }
+ if (found)
+ {
+ /*
+ We found at least one partition with data, now sort all entries and
+ after that read the first entry and copy it to the buffer to return in.
+ */
+ queue_set_max_at_top(&queue, reverse_order);
+ queue_set_cmp_arg(&queue, (void*)m_curr_key_info);
+ queue.elements= j;
+ queue_fix(&queue);
+ return_top_record(buf);
+ DBUG_PRINT("info", ("Record returned from partition %d", m_top_entry));
+ DBUG_RETURN(0);
+ }
+ DBUG_RETURN(HA_ERR_END_OF_FILE);
+}
+
+
+void ha_partition::return_top_record(byte *buf)
+{
+ uint part_id;
+ byte *key_buffer= queue_top(&queue);
+ byte *rec_buffer= key_buffer + PARTITION_BYTES_IN_POS;
+ part_id= uint2korr(key_buffer);
+ memcpy(buf, rec_buffer, m_rec_length);
+ m_last_part= part_id;
+ m_top_entry= part_id;
+}
+
+
+int ha_partition::handle_ordered_next(byte *buf, bool next_same)
+{
+ int error;
+ uint part_id= m_top_entry;
+ handler *file= m_file[part_id];
+ DBUG_ENTER("ha_partition::handle_ordered_next");
+
+ if (!next_same)
+ error= file->index_next(rec_buf(part_id));
+ else
+ error= file->index_next_same(rec_buf(part_id), m_start_key.key,
+ m_start_key.length);
+ if (error)
+ {
+ if (error == HA_ERR_END_OF_FILE)
+ {
+ /* Return next buffered row */
+ queue_remove(&queue, (uint) 0);
+ if (queue.elements)
+ {
+ DBUG_PRINT("info", ("Record returned from partition %u (2)",
+ m_top_entry));
+ return_top_record(buf);
+ error= 0;
+ }
+ }
+ DBUG_RETURN(error);
+ }
+ queue_replaced(&queue);
+ return_top_record(buf);
+ DBUG_PRINT("info", ("Record returned from partition %u", m_top_entry));
+ DBUG_RETURN(0);
+}
+
+
+int ha_partition::handle_ordered_prev(byte *buf)
+{
+ int error;
+ uint part_id= m_top_entry;
+ handler *file= m_file[part_id];
+ DBUG_ENTER("ha_partition::handle_ordered_prev");
+ if ((error= file->index_prev(rec_buf(part_id))))
+ {
+ if (error == HA_ERR_END_OF_FILE)
+ {
+ queue_remove(&queue, (uint) 0);
+ if (queue.elements)
+ {
+ return_top_record(buf);
+ DBUG_PRINT("info", ("Record returned from partition %d (2)",
+ m_top_entry));
+ error= 0;
+ }
+ }
+ DBUG_RETURN(error);
+ }
+ queue_replaced(&queue);
+ return_top_record(buf);
+ DBUG_PRINT("info", ("Record returned from partition %d", m_top_entry));
+ DBUG_RETURN(0);
+}
+
+
+void ha_partition::include_partition_fields_in_used_fields()
+{
+ DBUG_ENTER("ha_partition::include_partition_fields_in_used_fields");
+ Field **ptr= m_part_field_array;
+ do
+ {
+ ha_set_bit_in_read_set((*ptr)->fieldnr);
+ } while (*(++ptr));
+ DBUG_VOID_RETURN;
+}
+
+
+/****************************************************************************
+ MODULE information calls
+****************************************************************************/
+
+/*
+ These are all first approximations of the extra, info, scan_time
+ and read_time calls
+*/
+
+/*
+ ::info() is used to return information to the optimizer.
+ Currently this table handler doesn't implement most of the fields
+ really needed. SHOW also makes use of this data
+ Another note, if your handler doesn't proved exact record count,
+ you will probably want to have the following in your code:
+ if (records < 2)
+ records = 2;
+ The reason is that the server will optimize for cases of only a single
+ record. If in a table scan you don't know the number of records
+ it will probably be better to set records to two so you can return
+ as many records as you need.
+
+ Along with records a few more variables you may wish to set are:
+ records
+ deleted
+ data_file_length
+ index_file_length
+ delete_length
+ check_time
+ Take a look at the public variables in handler.h for more information.
+
+ Called in:
+ filesort.cc
+ ha_heap.cc
+ item_sum.cc
+ opt_sum.cc
+ sql_delete.cc
+ sql_delete.cc
+ sql_derived.cc
+ sql_select.cc
+ sql_select.cc
+ sql_select.cc
+ sql_select.cc
+ sql_select.cc
+ sql_show.cc
+ sql_show.cc
+ sql_show.cc
+ sql_show.cc
+ sql_table.cc
+ sql_union.cc
+ sql_update.cc
+
+ Some flags that are not implemented
+ HA_STATUS_POS:
+ This parameter is never used from the MySQL Server. It is checked in a
+ place in MyISAM so could potentially be used by MyISAM specific programs.
+ HA_STATUS_NO_LOCK:
+ This is declared and often used. It's only used by MyISAM.
+ It means that MySQL doesn't need the absolute latest statistics
+ information. This may save the handler from doing internal locks while
+ retrieving statistics data.
+*/
+
+void ha_partition::info(uint flag)
+{
+ handler *file, **file_array;
+ DBUG_ENTER("ha_partition:info");
+
+ if (flag & HA_STATUS_AUTO)
+ {
+ DBUG_PRINT("info", ("HA_STATUS_AUTO"));
+ /*
+ The auto increment value is only maintained by the first handler
+ so we will only call this.
+ */
+ m_file[0]->info(HA_STATUS_AUTO);
+ }
+ if (flag & HA_STATUS_VARIABLE)
+ {
+ DBUG_PRINT("info", ("HA_STATUS_VARIABLE"));
+ /*
+ Calculates statistical variables
+ records: Estimate of number records in table
+ We report sum (always at least 2)
+ deleted: Estimate of number holes in the table due to
+ deletes
+ We report sum
+ data_file_length: Length of data file, in principle bytes in table
+ We report sum
+ index_file_length: Length of index file, in principle bytes in
+ indexes in the table
+ We report sum
+ mean_record_length:Mean record length in the table
+ We calculate this
+ check_time: Time of last check (only applicable to MyISAM)
+ We report last time of all underlying handlers
+ */
+ records= 0;
+ deleted= 0;
+ data_file_length= 0;
+ index_file_length= 0;
+ check_time= 0;
+ file_array= m_file;
+ do
+ {
+ file= *file_array;
+ file->info(HA_STATUS_VARIABLE);
+ records+= file->records;
+ deleted+= file->deleted;
+ data_file_length+= file->data_file_length;
+ index_file_length+= file->index_file_length;
+ if (file->check_time > check_time)
+ check_time= file->check_time;
+ } while (*(++file_array));
+ if (records < 2)
+ records= 2;
+ mean_rec_length= (ulong) (data_file_length / records);
+ }
+ if (flag & HA_STATUS_CONST)
+ {
+ DBUG_PRINT("info", ("HA_STATUS_CONST"));
+ /*
+ Recalculate loads of constant variables. MyISAM also sets things
+ directly on the table share object.
+
+ Check whether this should be fixed since handlers should not
+ change things directly on the table object.
+
+ Monty comment: This should NOT be changed! It's the handlers
+ responsibility to correct table->s->keys_xxxx information if keys
+ have been disabled.
+
+ The most important parameters set here is records per key on
+ all indexes. block_size and primar key ref_length.
+
+ For each index there is an array of rec_per_key.
+ As an example if we have an index with three attributes a,b and c
+ we will have an array of 3 rec_per_key.
+ rec_per_key[0] is an estimate of number of records divided by
+ number of unique values of the field a.
+ rec_per_key[1] is an estimate of the number of records divided
+ by the number of unique combinations of the fields a and b.
+ rec_per_key[2] is an estimate of the number of records divided
+ by the number of unique combinations of the fields a,b and c.
+
+ Many handlers only set the value of rec_per_key when all fields
+ are bound (rec_per_key[2] in the example above).
+
+ If the handler doesn't support statistics, it should set all of the
+ above to 0.
+
+ We will allow the first handler to set the rec_per_key and use
+ this as an estimate on the total table.
+
+ max_data_file_length: Maximum data file length
+ We ignore it, is only used in
+ SHOW TABLE STATUS
+ max_index_file_length: Maximum index file length
+ We ignore it since it is never used
+ block_size: Block size used
+ We set it to the value of the first handler
+ sortkey: Never used at any place so ignored
+ ref_length: We set this to the value calculated
+ and stored in local object
+ raid_type: Set by first handler (MyISAM)
+ raid_chunks: Set by first handler (MyISAM)
+ raid_chunksize: Set by first handler (MyISAM)
+ create_time: Creation time of table
+ Set by first handler
+
+ So we calculate these constants by using the variables on the first
+ handler.
+ */
+
+ file= m_file[0];
+ file->info(HA_STATUS_CONST);
+ create_time= file->create_time;
+ raid_type= file->raid_type;
+ raid_chunks= file->raid_chunks;
+ raid_chunksize= file->raid_chunksize;
+ ref_length= m_ref_length;
+ }
+ if (flag & HA_STATUS_ERRKEY)
+ {
+ handler *file= m_file[m_last_part];
+ DBUG_PRINT("info", ("info: HA_STATUS_ERRKEY"));
+ /*
+ This flag is used to get index number of the unique index that
+ reported duplicate key
+ We will report the errkey on the last handler used and ignore the rest
+ */
+ file->info(HA_STATUS_ERRKEY);
+ if (file->errkey != (uint) -1)
+ errkey= file->errkey;
+ }
+ if (flag & HA_STATUS_TIME)
+ {
+ DBUG_PRINT("info", ("info: HA_STATUS_TIME"));
+ /*
+ This flag is used to set the latest update time of the table.
+ Used by SHOW commands
+ We will report the maximum of these times
+ */
+ update_time= 0;
+ file_array= m_file;
+ do
+ {
+ file= *file_array;
+ file->info(HA_STATUS_TIME);
+ if (file->update_time > update_time)
+ update_time= file->update_time;
+ } while (*(++file_array));
+ }
+ DBUG_VOID_RETURN;
+}
+
+
+/*
+ extra() is called whenever the server wishes to send a hint to
+ the storage engine. The MyISAM engine implements the most hints.
+
+ We divide the parameters into the following categories:
+ 1) Parameters used by most handlers
+ 2) Parameters used by some non-MyISAM handlers
+ 3) Parameters used only by MyISAM
+ 4) Parameters only used by temporary tables for query processing
+ 5) Parameters only used by MyISAM internally
+ 6) Parameters not used at all
+
+ The partition handler need to handle category 1), 2) and 3).
+
+ 1) Parameters used by most handlers
+ -----------------------------------
+ HA_EXTRA_RESET:
+ This option is used by most handlers and it resets the handler state
+ to the same state as after an open call. This includes releasing
+ any READ CACHE or WRITE CACHE or other internal buffer used.
+
+ It is called from the reset method in the handler interface. There are
+ three instances where this is called.
+ 1) After completing a INSERT ... SELECT ... query the handler for the
+ table inserted into is reset
+ 2) It is called from close_thread_table which in turn is called from
+ close_thread_tables except in the case where the tables are locked
+ in which case ha_commit_stmt is called instead.
+ It is only called from here if flush_version hasn't changed and the
+ table is not an old table when calling close_thread_table.
+ close_thread_tables is called from many places as a general clean up
+ function after completing a query.
+ 3) It is called when deleting the QUICK_RANGE_SELECT object if the
+ QUICK_RANGE_SELECT object had its own handler object. It is called
+ immediatley before close of this local handler object.
+ HA_EXTRA_KEYREAD:
+ HA_EXTRA_NO_KEYREAD:
+ These parameters are used to provide an optimisation hint to the handler.
+ If HA_EXTRA_KEYREAD is set it is enough to read the index fields, for
+ many handlers this means that the index-only scans can be used and it
+ is not necessary to use the real records to satisfy this part of the
+ query. Index-only scans is a very important optimisation for disk-based
+ indexes. For main-memory indexes most indexes contain a reference to the
+ record and thus KEYREAD only says that it is enough to read key fields.
+ HA_EXTRA_NO_KEYREAD disables this for the handler, also HA_EXTRA_RESET
+ will disable this option.
+ The handler will set HA_KEYREAD_ONLY in its table flags to indicate this
+ feature is supported.
+ HA_EXTRA_FLUSH:
+ Indication to flush tables to disk, called at close_thread_table to
+ ensure disk based tables are flushed at end of query execution.
+
+ 2) Parameters used by some non-MyISAM handlers
+ ----------------------------------------------
+ HA_EXTRA_RETRIEVE_ALL_COLS:
+ Many handlers have implemented optimisations to avoid fetching all
+ fields when retrieving data. In certain situations all fields need
+ to be retrieved even though the query_id is not set on all field
+ objects.
+
+ It is called from copy_data_between_tables where all fields are
+ copied without setting query_id before calling the handlers.
+ It is called from UPDATE statements when the fields of the index
+ used is updated or ORDER BY is used with UPDATE.
+ And finally when calculating checksum of a table using the CHECKSUM
+ command.
+ HA_EXTRA_RETRIEVE_PRIMARY_KEY:
+ In some situations it is mandatory to retrieve primary key fields
+ independent of the query id's. This extra flag specifies that fetch
+ of primary key fields is mandatory.
+ HA_EXTRA_KEYREAD_PRESERVE_FIELDS:
+ This is a strictly InnoDB feature that is more or less undocumented.
+ When it is activated InnoDB copies field by field from its fetch
+ cache instead of all fields in one memcpy. Have no idea what the
+ purpose of this is.
+ Cut from include/my_base.h:
+ When using HA_EXTRA_KEYREAD, overwrite only key member fields and keep
+ other fields intact. When this is off (by default) InnoDB will use memcpy
+ to overwrite entire row.
+ HA_EXTRA_IGNORE_DUP_KEY:
+ HA_EXTRA_NO_IGNORE_DUP_KEY:
+ Informs the handler to we will not stop the transaction if we get an
+ duplicate key errors during insert/upate.
+ Always called in pair, triggered by INSERT IGNORE and other similar
+ SQL constructs.
+ Not used by MyISAM.
+
+ 3) Parameters used only by MyISAM
+ ---------------------------------
+ HA_EXTRA_NORMAL:
+ Only used in MyISAM to reset quick mode, not implemented by any other
+ handler. Quick mode is also reset in MyISAM by HA_EXTRA_RESET.
+
+ It is called after completing a successful DELETE query if the QUICK
+ option is set.
+
+ HA_EXTRA_QUICK:
+ When the user does DELETE QUICK FROM table where-clause; this extra
+ option is called before the delete query is performed and
+ HA_EXTRA_NORMAL is called after the delete query is completed.
+ Temporary tables used internally in MySQL always set this option
+
+ The meaning of quick mode is that when deleting in a B-tree no merging
+ of leafs is performed. This is a common method and many large DBMS's
+ actually only support this quick mode since it is very difficult to
+ merge leaves in a tree used by many threads concurrently.
+
+ HA_EXTRA_CACHE:
+ This flag is usually set with extra_opt along with a cache size.
+ The size of this buffer is set by the user variable
+ record_buffer_size. The value of this cache size is the amount of
+ data read from disk in each fetch when performing a table scan.
+ This means that before scanning a table it is normal to call
+ extra with HA_EXTRA_CACHE and when the scan is completed to call
+ HA_EXTRA_NO_CACHE to release the cache memory.
+
+ Some special care is taken when using this extra parameter since there
+ could be a write ongoing on the table in the same statement. In this
+ one has to take special care since there might be a WRITE CACHE as
+ well. HA_EXTRA_CACHE specifies using a READ CACHE and using
+ READ CACHE and WRITE CACHE at the same time is not possible.
+
+ Only MyISAM currently use this option.
+
+ It is set when doing full table scans using rr_sequential and
+ reset when completing such a scan with end_read_record
+ (resetting means calling extra with HA_EXTRA_NO_CACHE).
+
+ It is set in filesort.cc for MyISAM internal tables and it is set in
+ a multi-update where HA_EXTRA_CACHE is called on a temporary result
+ table and after that ha_rnd_init(0) on table to be updated
+ and immediately after that HA_EXTRA_NO_CACHE on table to be updated.
+
+ Apart from that it is always used from init_read_record but not when
+ used from UPDATE statements. It is not used from DELETE statements
+ with ORDER BY and LIMIT but it is used in normal scan loop in DELETE
+ statements. The reason here is that DELETE's in MyISAM doesn't move
+ existings data rows.
+
+ It is also set in copy_data_between_tables when scanning the old table
+ to copy over to the new table.
+ And it is set in join_init_read_record where quick objects are used
+ to perform a scan on the table. In this case the full table scan can
+ even be performed multiple times as part of the nested loop join.
+
+ For purposes of the partition handler it is obviously necessary to have
+ special treatment of this extra call. If we would simply pass this
+ extra call down to each handler we would allocate
+ cache size * no of partitions amount of memory and this is not
+ necessary since we will only scan one partition at a time when doing
+ full table scans.
+
+ Thus we treat it by first checking whether we have MyISAM handlers in
+ the table, if not we simply ignore the call and if we have we will
+ record the call but will not call any underlying handler yet. Then
+ when performing the sequential scan we will check this recorded value
+ and call extra_opt whenever we start scanning a new partition.
+
+ monty: Neads to be fixed so that it's passed to all handlers when we
+ move to another partition during table scan.
+
+ HA_EXTRA_NO_CACHE:
+ When performing a UNION SELECT HA_EXTRA_NO_CACHE is called from the
+ flush method in the select_union class.
+ It is used to some extent when insert delayed inserts.
+ See HA_EXTRA_RESET_STATE for use in conjunction with delete_all_rows().
+
+ It should be ok to call HA_EXTRA_NO_CACHE on all underlying handlers
+ if they are MyISAM handlers. Other handlers we can ignore the call
+ for. If no cache is in use they will quickly return after finding
+ this out. And we also ensure that all caches are disabled and no one
+ is left by mistake.
+ In the future this call will probably be deleted an we will instead call
+ ::reset();
+
+ HA_EXTRA_WRITE_CACHE:
+ See above, called from various places. It is mostly used when we
+ do INSERT ... SELECT
+ No special handling to save cache space is developed currently.
+
+ HA_EXTRA_PREPARE_FOR_UPDATE:
+ This is called as part of a multi-table update. When the table to be
+ updated is also scanned then this informs MyISAM handler to drop any
+ caches if dynamic records are used (fixed size records do not care
+ about this call). We pass this along to all underlying MyISAM handlers
+ and ignore it for the rest.
+
+ HA_EXTRA_PREPARE_FOR_DELETE:
+ Only used by MyISAM, called in preparation for a DROP TABLE.
+ It's used mostly by Windows that cannot handle dropping an open file.
+ On other platforms it has the same effect as HA_EXTRA_FORCE_REOPEN.
+
+ HA_EXTRA_READCHECK:
+ HA_EXTRA_NO_READCHECK:
+ Only one call to HA_EXTRA_NO_READCHECK from ha_open where it says that
+ this is not needed in SQL. The reason for this call is that MyISAM sets
+ the READ_CHECK_USED in the open call so the call is needed for MyISAM
+ to reset this feature.
+ The idea with this parameter was to inform of doing/not doing a read
+ check before applying an update. Since SQL always performs a read before
+ applying the update No Read Check is needed in MyISAM as well.
+
+ This is a cut from Docs/myisam.txt
+ Sometimes you might want to force an update without checking whether
+ another user has changed the record since you last read it. This is
+ somewhat dangerous, so it should ideally not be used. That can be
+ accomplished by wrapping the mi_update() call in two calls to mi_extra(),
+ using these functions:
+ HA_EXTRA_NO_READCHECK=5 No readcheck on update
+ HA_EXTRA_READCHECK=6 Use readcheck (def)
+
+ HA_EXTRA_FORCE_REOPEN:
+ Only used by MyISAM, called when altering table, closing tables to
+ enforce a reopen of the table files.
+
+ 4) Parameters only used by temporary tables for query processing
+ ----------------------------------------------------------------
+ HA_EXTRA_RESET_STATE:
+ Same as HA_EXTRA_RESET except that buffers are not released. If there is
+ a READ CACHE it is reinit'ed. A cache is reinit'ed to restart reading
+ or to change type of cache between READ CACHE and WRITE CACHE.
+
+ This extra function is always called immediately before calling
+ delete_all_rows on the handler for temporary tables.
+ There are cases however when HA_EXTRA_RESET_STATE isn't called in
+ a similar case for a temporary table in sql_union.cc and in two other
+ cases HA_EXTRA_NO_CACHE is called before and HA_EXTRA_WRITE_CACHE
+ called afterwards.
+ The case with HA_EXTRA_NO_CACHE and HA_EXTRA_WRITE_CACHE means
+ disable caching, delete all rows and enable WRITE CACHE. This is
+ used for temporary tables containing distinct sums and a
+ functional group.
+
+ The only case that delete_all_rows is called on non-temporary tables
+ is in sql_delete.cc when DELETE FROM table; is called by a user.
+ In this case no special extra calls are performed before or after this
+ call.
+
+ The partition handler should not need to bother about this one. It
+ should never be called.
+
+ HA_EXTRA_NO_ROWS:
+ Don't insert rows indication to HEAP and MyISAM, only used by temporary
+ tables used in query processing.
+ Not handled by partition handler.
+
+ 5) Parameters only used by MyISAM internally
+ --------------------------------------------
+ HA_EXTRA_REINIT_CACHE:
+ This call reinitialises the READ CACHE described above if there is one
+ and otherwise the call is ignored.
+
+ We can thus safely call it on all underlying handlers if they are
+ MyISAM handlers. It is however never called so we don't handle it at all.
+ HA_EXTRA_FLUSH_CACHE:
+ Flush WRITE CACHE in MyISAM. It is only from one place in the code.
+ This is in sql_insert.cc where it is called if the table_flags doesn't
+ contain HA_DUPP_POS. The only handler having the HA_DUPP_POS set is the
+ MyISAM handler and so the only handler not receiving this call is MyISAM.
+ Thus in effect this call is called but never used. Could be removed
+ from sql_insert.cc
+ HA_EXTRA_NO_USER_CHANGE:
+ Only used by MyISAM, never called.
+ Simulates lock_type as locked.
+ HA_EXTRA_WAIT_LOCK:
+ HA_EXTRA_WAIT_NOLOCK:
+ Only used by MyISAM, called from MyISAM handler but never from server
+ code on top of the handler.
+ Sets lock_wait on/off
+ HA_EXTRA_NO_KEYS:
+ Only used MyISAM, only used internally in MyISAM handler, never called
+ from server level.
+ HA_EXTRA_KEYREAD_CHANGE_POS:
+ HA_EXTRA_REMEMBER_POS:
+ HA_EXTRA_RESTORE_POS:
+ HA_EXTRA_PRELOAD_BUFFER_SIZE:
+ HA_EXTRA_CHANGE_KEY_TO_DUP:
+ HA_EXTRA_CHANGE_KEY_TO_UNIQUE:
+ Only used by MyISAM, never called.
+
+ 6) Parameters not used at all
+ -----------------------------
+ HA_EXTRA_KEY_CACHE:
+ HA_EXTRA_NO_KEY_CACHE:
+ This parameters are no longer used and could be removed.
+*/
+
+int ha_partition::extra(enum ha_extra_function operation)
+{
+ DBUG_ENTER("ha_partition:extra");
+ DBUG_PRINT("info", ("operation: %d", (int) operation));
+
+ switch (operation) {
+ /* Category 1), used by most handlers */
+ case HA_EXTRA_KEYREAD:
+ case HA_EXTRA_NO_KEYREAD:
+ case HA_EXTRA_FLUSH:
+ DBUG_RETURN(loop_extra(operation));
+
+ /* Category 2), used by non-MyISAM handlers */
+ case HA_EXTRA_IGNORE_DUP_KEY:
+ case HA_EXTRA_NO_IGNORE_DUP_KEY:
+ case HA_EXTRA_RETRIEVE_ALL_COLS:
+ case HA_EXTRA_RETRIEVE_PRIMARY_KEY:
+ case HA_EXTRA_KEYREAD_PRESERVE_FIELDS:
+ {
+ if (!m_myisam)
+ DBUG_RETURN(loop_extra(operation));
+ break;
+ }
+
+ /* Category 3), used by MyISAM handlers */
+ case HA_EXTRA_NORMAL:
+ case HA_EXTRA_QUICK:
+ case HA_EXTRA_NO_READCHECK:
+ case HA_EXTRA_PREPARE_FOR_UPDATE:
+ case HA_EXTRA_PREPARE_FOR_DELETE:
+ case HA_EXTRA_FORCE_REOPEN:
+ {
+ if (m_myisam)
+ DBUG_RETURN(loop_extra(operation));
+ break;
+ }
+ case HA_EXTRA_CACHE:
+ {
+ prepare_extra_cache(0);
+ break;
+ }
+ case HA_EXTRA_NO_CACHE:
+ {
+ m_extra_cache= FALSE;
+ m_extra_cache_size= 0;
+ DBUG_RETURN(loop_extra(operation));
+ }
+ default:
+ {
+ /* Temporary crash to discover what is wrong */
+ DBUG_ASSERT(0);
+ break;
+ }
+ }
+ DBUG_RETURN(0);
+}
+
+
+/*
+ This will in the future be called instead of extra(HA_EXTRA_RESET) as this
+ is such a common call
+*/
+
+int ha_partition::reset(void)
+{
+ int result= 0, tmp;
+ handler **file;
+ DBUG_ENTER("ha_partition::reset");
+ file= m_file;
+ do
+ {
+ if ((tmp= (*file)->reset()))
+ result= tmp;
+ } while (*(++file));
+ DBUG_RETURN(result);
+}
+
+
+int ha_partition::extra_opt(enum ha_extra_function operation, ulong cachesize)
+{
+ DBUG_ENTER("ha_partition::extra_opt()");
+ DBUG_ASSERT(HA_EXTRA_CACHE == operation);
+ prepare_extra_cache(cachesize);
+ DBUG_RETURN(0);
+}
+
+
+void ha_partition::prepare_extra_cache(uint cachesize)
+{
+ DBUG_ENTER("ha_partition::prepare_extra_cache()");
+
+ m_extra_cache= TRUE;
+ m_extra_cache_size= cachesize;
+ if (m_part_spec.start_part != NO_CURRENT_PART_ID)
+ {
+ DBUG_ASSERT(m_part_spec.start_part == 0);
+ late_extra_cache(0);
+ }
+ DBUG_VOID_RETURN;
+}
+
+
+int ha_partition::loop_extra(enum ha_extra_function operation)
+{
+ int result= 0, tmp;
+ handler **file;
+ DBUG_ENTER("ha_partition::loop_extra()");
+ for (file= m_file; *file; file++)
+ {
+ if ((tmp= (*file)->extra(operation)))
+ result= tmp;
+ }
+ DBUG_RETURN(result);
+}
+
+
+void ha_partition::late_extra_cache(uint partition_id)
+{
+ handler *file;
+ DBUG_ENTER("ha_partition::late_extra_cache");
+ if (!m_extra_cache)
+ DBUG_VOID_RETURN;
+ file= m_file[partition_id];
+ if (m_extra_cache_size == 0)
+ VOID(file->extra(HA_EXTRA_CACHE));
+ else
+ VOID(file->extra_opt(HA_EXTRA_CACHE, m_extra_cache_size));
+ DBUG_VOID_RETURN;
+}
+
+
+void ha_partition::late_extra_no_cache(uint partition_id)
+{
+ handler *file;
+ DBUG_ENTER("ha_partition::late_extra_no_cache");
+ if (!m_extra_cache)
+ DBUG_VOID_RETURN;
+ file= m_file[partition_id];
+ VOID(file->extra(HA_EXTRA_NO_CACHE));
+ DBUG_VOID_RETURN;
+}
+
+
+/****************************************************************************
+ MODULE optimiser support
+****************************************************************************/
+
+const key_map *ha_partition::keys_to_use_for_scanning()
+{
+ DBUG_ENTER("ha_partition::keys_to_use_for_scanning");
+ DBUG_RETURN(m_file[0]->keys_to_use_for_scanning());
+}
+
+double ha_partition::scan_time()
+{
+ double scan_time= 0;
+ handler **file;
+ DBUG_ENTER("ha_partition::scan_time");
+
+ for (file= m_file; *file; file++)
+ scan_time+= (*file)->scan_time();
+ DBUG_RETURN(scan_time);
+}
+
+
+/*
+ This will be optimised later to include whether or not the index can
+ be used with partitioning. To achieve we need to add another parameter
+ that specifies how many of the index fields that are bound in the ranges.
+ Possibly added as a new call to handlers.
+*/
+
+double ha_partition::read_time(uint index, uint ranges, ha_rows rows)
+{
+ DBUG_ENTER("ha_partition::read_time");
+ DBUG_RETURN(m_file[0]->read_time(index, ranges, rows));
+}
+
+/*
+ Given a starting key, and an ending key estimate the number of rows that
+ will exist between the two. end_key may be empty which in case determine
+ if start_key matches any rows.
+
+ Called from opt_range.cc by check_quick_keys().
+
+ monty: MUST be called for each range and added.
+ Note that MySQL will assume that if this returns 0 there is no
+ matching rows for the range!
+*/
+
+ha_rows ha_partition::records_in_range(uint inx, key_range *min_key,
+ key_range *max_key)
+{
+ ha_rows in_range= 0;
+ handler **file;
+ DBUG_ENTER("ha_partition::records_in_range");
+
+ file= m_file;
+ do
+ {
+ in_range+= (*file)->records_in_range(inx, min_key, max_key);
+ } while (*(++file));
+ DBUG_RETURN(in_range);
+}
+
+
+ha_rows ha_partition::estimate_rows_upper_bound()
+{
+ ha_rows rows, tot_rows= 0;
+ handler **file;
+ DBUG_ENTER("ha_partition::estimate_rows_upper_bound");
+
+ file= m_file;
+ do
+ {
+ rows= (*file)->estimate_rows_upper_bound();
+ if (rows == HA_POS_ERROR)
+ DBUG_RETURN(HA_POS_ERROR);
+ tot_rows+= rows;
+ } while (*(++file));
+ DBUG_RETURN(tot_rows);
+}
+
+
+uint8 ha_partition::table_cache_type()
+{
+ DBUG_ENTER("ha_partition::table_cache_type");
+ DBUG_RETURN(m_file[0]->table_cache_type());
+}
+
+
+/****************************************************************************
+ MODULE print messages
+****************************************************************************/
+
+const char *ha_partition::index_type(uint inx)
+{
+ DBUG_ENTER("ha_partition::index_type");
+ DBUG_RETURN(m_file[0]->index_type(inx));
+}
+
+
+void ha_partition::print_error(int error, myf errflag)
+{
+ DBUG_ENTER("ha_partition::print_error");
+ /* Should probably look for my own errors first */
+ /* monty: needs to be called for the last used partition ! */
+ m_file[0]->print_error(error, errflag);
+ DBUG_VOID_RETURN;
+}
+
+
+bool ha_partition::get_error_message(int error, String *buf)
+{
+ DBUG_ENTER("ha_partition::get_error_message");
+ /* Should probably look for my own errors first */
+ /* monty: needs to be called for the last used partition ! */
+ DBUG_RETURN(m_file[0]->get_error_message(error, buf));
+}
+
+
+/****************************************************************************
+ MODULE handler characteristics
+****************************************************************************/
+/*
+ If frm_error() is called then we will use this to to find out what file
+ extensions exist for the storage engine. This is also used by the default
+ rename_table and delete_table method in handler.cc.
+*/
+
+static const char *ha_partition_ext[]=
+{
+ ha_par_ext, NullS
+};
+
+const char **ha_partition::bas_ext() const
+{ return ha_partition_ext; }
+
+
+uint ha_partition::min_of_the_max_uint(uint (handler::*operator_func)(void) const) const
+{
+ handler **file;
+ uint min_of_the_max= ((*m_file)->*operator_func)();
+
+ for (file= m_file+1; *file; file++)
+ {
+ uint tmp= ((*file)->*operator_func)();
+ set_if_smaller(min_of_the_max, tmp);
+ }
+ return min_of_the_max;
+}
+
+
+uint ha_partition::max_supported_key_parts() const
+{
+ return min_of_the_max_uint(&handler::max_supported_key_parts);
+}
+
+
+uint ha_partition::max_supported_key_length() const
+{
+ return min_of_the_max_uint(&handler::max_supported_key_length);
+}
+
+
+uint ha_partition::max_supported_key_part_length() const
+{
+ return min_of_the_max_uint(&handler::max_supported_key_part_length);
+}
+
+
+uint ha_partition::max_supported_record_length() const
+{
+ return min_of_the_max_uint(&handler::max_supported_record_length);
+}
+
+
+uint ha_partition::max_supported_keys() const
+{
+ return min_of_the_max_uint(&handler::max_supported_keys);
+}
+
+
+uint ha_partition::extra_rec_buf_length() const
+{
+ handler **file;
+ uint max= (*m_file)->extra_rec_buf_length();
+ for (file= m_file, file++; *file; file++)
+ if (max < (*file)->extra_rec_buf_length())
+ max= (*file)->extra_rec_buf_length();
+ return max;
+}
+
+
+uint ha_partition::min_record_length(uint options) const
+{
+ handler **file;
+ uint max= (*m_file)->min_record_length(options);
+ for (file= m_file, file++; *file; file++)
+ if (max < (*file)->min_record_length(options))
+ max= (*file)->min_record_length(options);
+ return max;
+}
+
+
+/****************************************************************************
+ MODULE compare records
+****************************************************************************/
+/*
+ We get two references and need to check if those records are the same.
+ If they belong to different partitions we decide that they are not
+ the same record. Otherwise we use the particular handler to decide if
+ they are the same. Sort in partition id order if not equal.
+*/
+
+int ha_partition::cmp_ref(const byte *ref1, const byte *ref2)
+{
+ uint part_id;
+ my_ptrdiff_t diff1, diff2;
+ handler *file;
+ DBUG_ENTER("ha_partition::cmp_ref");
+ if ((ref1[0] == ref2[0]) && (ref1[1] == ref2[1]))
+ {
+ part_id= get_part_id_from_pos(ref1);
+ file= m_file[part_id];
+ DBUG_ASSERT(part_id < m_tot_parts);
+ DBUG_RETURN(file->cmp_ref((ref1 + PARTITION_BYTES_IN_POS),
+ (ref2 + PARTITION_BYTES_IN_POS)));
+ }
+ diff1= ref2[1] - ref1[1];
+ diff2= ref2[0] - ref1[0];
+ if (diff1 > 0)
+ {
+ DBUG_RETURN(-1);
+ }
+ if (diff1 < 0)
+ {
+ DBUG_RETURN(+1);
+ }
+ if (diff2 > 0)
+ {
+ DBUG_RETURN(-1);
+ }
+ DBUG_RETURN(+1);
+}
+
+
+/****************************************************************************
+ MODULE auto increment
+****************************************************************************/
+
+void ha_partition::restore_auto_increment()
+{
+ DBUG_ENTER("ha_partition::restore_auto_increment");
+ DBUG_VOID_RETURN;
+}
+
+
+/*
+ This method is called by update_auto_increment which in turn is called
+ by the individual handlers as part of write_row. We will always let
+ the first handler keep track of the auto increment value for all
+ partitions.
+*/
+
+ulonglong ha_partition::get_auto_increment()
+{
+ DBUG_ENTER("ha_partition::get_auto_increment");
+ DBUG_RETURN(m_file[0]->get_auto_increment());
+}
+
+
+/****************************************************************************
+ MODULE initialise handler for HANDLER call
+****************************************************************************/
+
+void ha_partition::init_table_handle_for_HANDLER()
+{
+ return;
+}
+
+
+/****************************************************************************
+ MODULE Partition Share
+****************************************************************************/
+/*
+ Service routines for ... methods.
+-------------------------------------------------------------------------
+ Variables for partition share methods. A hash used to track open tables.
+ A mutex for the hash table and an init variable to check if hash table
+ is initialised.
+ There is also a constant ending of the partition handler file name.
+*/
+
+#ifdef NOT_USED
+static HASH partition_open_tables;
+static pthread_mutex_t partition_mutex;
+static int partition_init= 0;
+
+
+/*
+ Function we use in the creation of our hash to get key.
+*/
+static byte *partition_get_key(PARTITION_SHARE *share, uint *length,
+ my_bool not_used __attribute__ ((unused)))
+{
+ *length= share->table_name_length;
+ return (byte *) share->table_name;
+}
+
+/*
+ Example of simple lock controls. The "share" it creates is structure we
+ will pass to each partition handler. Do you have to have one of these?
+ Well, you have pieces that are used for locking, and they are needed to
+ function.
+*/
+
+
+static PARTITION_SHARE *get_share(const char *table_name, TABLE *table)
+{
+ PARTITION_SHARE *share;
+ uint length;
+ char *tmp_name;
+
+ /*
+ So why does this exist? There is no way currently to init a storage
+ engine.
+ Innodb and BDB both have modifications to the server to allow them to
+ do this. Since you will not want to do this, this is probably the next
+ best method.
+ */
+ if (!partition_init)
+ {
+ /* Hijack a mutex for init'ing the storage engine */
+ pthread_mutex_lock(&LOCK_mysql_create_db);
+ if (!partition_init)
+ {
+ partition_init++;
+ VOID(pthread_mutex_init(&partition_mutex, MY_MUTEX_INIT_FAST));
+ (void) hash_init(&partition_open_tables, system_charset_info, 32, 0, 0,
+ (hash_get_key) partition_get_key, 0, 0);
+ }
+ pthread_mutex_unlock(&LOCK_mysql_create_db);
+ }
+ pthread_mutex_lock(&partition_mutex);
+ length= (uint) strlen(table_name);
+
+ if (!(share= (PARTITION_SHARE *) hash_search(&partition_open_tables,
+ (byte *) table_name, length)))
+ {
+ if (!(share= (PARTITION_SHARE *)
+ my_multi_malloc(MYF(MY_WME | MY_ZEROFILL),
+ &share, sizeof(*share),
+ &tmp_name, length + 1, NullS)))
+ {
+ pthread_mutex_unlock(&partition_mutex);
+ return NULL;
+ }
+
+ share->use_count= 0;
+ share->table_name_length= length;
+ share->table_name= tmp_name;
+ strmov(share->table_name, table_name);
+ if (my_hash_insert(&partition_open_tables, (byte *) share))
+ goto error;
+ thr_lock_init(&share->lock);
+ pthread_mutex_init(&share->mutex, MY_MUTEX_INIT_FAST);
+ }
+ share->use_count++;
+ pthread_mutex_unlock(&partition_mutex);
+
+ return share;
+
+error:
+ pthread_mutex_unlock(&partition_mutex);
+ my_free((gptr) share, MYF(0));
+
+ return NULL;
+}
+
+
+/*
+ Free lock controls. We call this whenever we close a table. If the table
+ had the last reference to the share then we free memory associated with
+ it.
+*/
+
+static int free_share(PARTITION_SHARE *share)
+{
+ pthread_mutex_lock(&partition_mutex);
+ if (!--share->use_count)
+ {
+ hash_delete(&partition_open_tables, (byte *) share);
+ thr_lock_delete(&share->lock);
+ pthread_mutex_destroy(&share->mutex);
+ my_free((gptr) share, MYF(0));
+ }
+ pthread_mutex_unlock(&partition_mutex);
+
+ return 0;
+}
+#endif /* NOT_USED */
+#endif /* HAVE_PARTITION_DB */
diff --git a/sql/ha_partition.h b/sql/ha_partition.h
new file mode 100644
index 00000000000..858bf09ecaa
--- /dev/null
+++ b/sql/ha_partition.h
@@ -0,0 +1,922 @@
+/* Copyright (C) 2005 MySQL AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+#ifdef __GNUC__
+#pragma interface /* gcc class implementation */
+#endif
+
+/*
+ PARTITION_SHARE is a structure that will be shared amoung all open handlers
+ The partition implements the minimum of what you will probably need.
+*/
+
+typedef struct st_partition_share
+{
+ char *table_name;
+ uint table_name_length, use_count;
+ pthread_mutex_t mutex;
+ THR_LOCK lock;
+} PARTITION_SHARE;
+
+
+#define PARTITION_BYTES_IN_POS 2
+class ha_partition :public handler
+{
+private:
+ enum partition_index_scan_type
+ {
+ partition_index_read= 0,
+ partition_index_first= 1,
+ partition_index_last= 2,
+ partition_no_index_scan= 3
+ };
+ /* Data for the partition handler */
+ char *m_file_buffer; // Buffer with names
+ char *m_name_buffer_ptr; // Pointer to first partition name
+ uchar *m_engine_array; // Array of types of the handlers
+ handler **m_file; // Array of references to handler inst.
+ partition_info *m_part_info; // local reference to partition
+ byte *m_start_key_ref; // Reference of start key in current
+ // index scan info
+ Field **m_part_field_array; // Part field array locally to save acc
+ byte *m_ordered_rec_buffer; // Row and key buffer for ord. idx scan
+ KEY *m_curr_key_info; // Current index
+ byte *m_rec0; // table->record[0]
+ QUEUE queue; // Prio queue used by sorted read
+ /*
+ Since the partition handler is a handler on top of other handlers, it
+ is necessary to keep information about what the underlying handler
+ characteristics is. It is not possible to keep any handler instances
+ for this since the MySQL Server sometimes allocating the handler object
+ without freeing them.
+ */
+ u_long m_table_flags;
+ u_long m_low_byte_first;
+
+ uint m_tot_parts; // Total number of partitions;
+ uint m_last_part; // Last file that we update,write
+ int m_lock_type; // Remembers type of last
+ // external_lock
+ part_id_range m_part_spec; // Which parts to scan
+ uint m_scan_value; // Value passed in rnd_init
+ // call
+ uint m_ref_length; // Length of position in this
+ // handler object
+ key_range m_start_key; // index read key range
+ enum partition_index_scan_type m_index_scan_type;// What type of index
+ // scan
+ uint m_top_entry; // Which partition is to
+ // deliver next result
+ uint m_rec_length; // Local copy of record length
+
+ bool m_ordered; // Ordered/Unordered index scan
+ bool m_has_transactions; // Can we support transactions
+ bool m_pkey_is_clustered; // Is primary key clustered
+ bool m_create_handler; // Handler used to create table
+ bool m_is_sub_partitioned; // Is subpartitioned
+ bool m_ordered_scan_ongoing;
+ bool m_use_bit_array;
+
+ /*
+ We keep track if all underlying handlers are MyISAM since MyISAM has a
+ great number of extra flags not needed by other handlers.
+ */
+ bool m_myisam; // Are all underlying handlers
+ // MyISAM
+ /*
+ We keep track of InnoDB handlers below since it requires proper setting
+ of query_id in fields at index_init and index_read calls.
+ */
+ bool m_innodb; // Are all underlying handlers
+ // InnoDB
+ /*
+ When calling extra(HA_EXTRA_CACHE) we do not pass this to the underlying
+ handlers immediately. Instead we cache it and call the underlying
+ immediately before starting the scan on the partition. This is to
+ prevent allocating a READ CACHE for each partition in parallel when
+ performing a full table scan on MyISAM partitioned table.
+ This state is cleared by extra(HA_EXTRA_NO_CACHE).
+ */
+ bool m_extra_cache;
+ uint m_extra_cache_size;
+
+ void init_handler_variables();
+ /*
+ Variables for lock structures.
+ */
+ THR_LOCK_DATA lock; /* MySQL lock */
+ PARTITION_SHARE *share; /* Shared lock info */
+
+public:
+ /*
+ -------------------------------------------------------------------------
+ MODULE create/delete handler object
+ -------------------------------------------------------------------------
+ Object create/delete methode. The normal called when a table object
+ exists. There is also a method to create the handler object with only
+ partition information. This is used from mysql_create_table when the
+ table is to be created and the engine type is deduced to be the
+ partition handler.
+ -------------------------------------------------------------------------
+ */
+ ha_partition(TABLE * table);
+ ha_partition(partition_info * part_info);
+ ~ha_partition();
+ /*
+ A partition handler has no characteristics in itself. It only inherits
+ those from the underlying handlers. Here we set-up those constants to
+ enable later calls of the methods to retrieve constants from the under-
+ lying handlers. Returns false if not successful.
+ */
+ int ha_initialise();
+
+ /*
+ -------------------------------------------------------------------------
+ MODULE meta data changes
+ -------------------------------------------------------------------------
+ Meta data routines to CREATE, DROP, RENAME table and often used at
+ ALTER TABLE (update_create_info used from ALTER TABLE and SHOW ..).
+
+ update_table_comment is used in SHOW TABLE commands to provide a
+ chance for the handler to add any interesting comments to the table
+ comments not provided by the users comment.
+
+ create_handler_files is called before opening a new handler object
+ with openfrm to call create. It is used to create any local handler
+ object needed in opening the object in openfrm
+ -------------------------------------------------------------------------
+ */
+ virtual int delete_table(const char *from);
+ virtual int rename_table(const char *from, const char *to);
+ virtual int create(const char *name, TABLE * form,
+ HA_CREATE_INFO * create_info);
+ virtual int create_handler_files(const char *name);
+ virtual void update_create_info(HA_CREATE_INFO * create_info);
+ virtual char *update_table_comment(const char *comment);
+ virtual int drop_partitions(const char *path);
+private:
+ /*
+ delete_table, rename_table and create uses very similar logic which
+ is packed into this routine.
+ */
+ uint del_ren_cre_table(const char *from,
+ const char *to= NULL,
+ TABLE * table_arg= NULL,
+ HA_CREATE_INFO * create_info= NULL);
+ /*
+ One method to create the table_name.par file containing the names of the
+ underlying partitions, their engine and the number of partitions.
+ And one method to read it in.
+ */
+ bool create_handler_file(const char *name);
+ bool get_from_handler_file(const char *name);
+ bool new_handlers_from_part_info();
+ bool create_handlers();
+ void clear_handler_file();
+ void set_up_table_before_create(TABLE * table_arg, HA_CREATE_INFO * info,
+ uint part_id);
+ partition_element *find_partition_element(uint part_id);
+public:
+
+ /*
+ -------------------------------------------------------------------------
+ MODULE open/close object
+ -------------------------------------------------------------------------
+ Open and close handler object to ensure all underlying files and
+ objects allocated and deallocated for query handling is handled
+ properly.
+ -------------------------------------------------------------------------
+
+ A handler object is opened as part of its initialisation and before
+ being used for normal queries (not before meta-data changes always.
+ If the object was opened it will also be closed before being deleted.
+ */
+ virtual int open(const char *name, int mode, uint test_if_locked);
+ virtual int close(void);
+
+ /*
+ -------------------------------------------------------------------------
+ MODULE start/end statement
+ -------------------------------------------------------------------------
+ This module contains methods that are used to understand start/end of
+ statements, transaction boundaries, and aid for proper concurrency
+ control.
+ The partition handler need not implement abort and commit since this
+ will be handled by any underlying handlers implementing transactions.
+ There is only one call to each handler type involved per transaction
+ and these go directly to the handlers supporting transactions
+ currently InnoDB, BDB and NDB).
+ -------------------------------------------------------------------------
+ */
+ virtual THR_LOCK_DATA **store_lock(THD * thd, THR_LOCK_DATA ** to,
+ enum thr_lock_type lock_type);
+ virtual int external_lock(THD * thd, int lock_type);
+ /*
+ When table is locked a statement is started by calling start_stmt
+ instead of external_lock
+ */
+ virtual int start_stmt(THD * thd);
+ /*
+ Lock count is number of locked underlying handlers (I assume)
+ */
+ virtual uint lock_count(void) const;
+ /*
+ Call to unlock rows not to be updated in transaction
+ */
+ virtual void unlock_row();
+
+ /*
+ -------------------------------------------------------------------------
+ MODULE change record
+ -------------------------------------------------------------------------
+ This part of the handler interface is used to change the records
+ after INSERT, DELETE, UPDATE, REPLACE method calls but also other
+ special meta-data operations as ALTER TABLE, LOAD DATA, TRUNCATE.
+ -------------------------------------------------------------------------
+
+ These methods are used for insert (write_row), update (update_row)
+ and delete (delete_row). All methods to change data always work on
+ one row at a time. update_row and delete_row also contains the old
+ row.
+ delete_all_rows will delete all rows in the table in one call as a
+ special optimisation for DELETE from table;
+
+ Bulk inserts are supported if all underlying handlers support it.
+ start_bulk_insert and end_bulk_insert is called before and after a
+ number of calls to write_row.
+ Not yet though.
+ */
+ virtual int write_row(byte * buf);
+ virtual int update_row(const byte * old_data, byte * new_data);
+ virtual int delete_row(const byte * buf);
+ virtual int delete_all_rows(void);
+ virtual void start_bulk_insert(ha_rows rows);
+ virtual int end_bulk_insert();
+
+ /*
+ -------------------------------------------------------------------------
+ MODULE full table scan
+ -------------------------------------------------------------------------
+ This module is used for the most basic access method for any table
+ handler. This is to fetch all data through a full table scan. No
+ indexes are needed to implement this part.
+ It contains one method to start the scan (rnd_init) that can also be
+ called multiple times (typical in a nested loop join). Then proceeding
+ to the next record (rnd_next) and closing the scan (rnd_end).
+ To remember a record for later access there is a method (position)
+ and there is a method used to retrieve the record based on the stored
+ position.
+ The position can be a file position, a primary key, a ROWID dependent
+ on the handler below.
+ -------------------------------------------------------------------------
+ */
+ /*
+ unlike index_init(), rnd_init() can be called two times
+ without rnd_end() in between (it only makes sense if scan=1).
+ then the second call should prepare for the new table scan
+ (e.g if rnd_init allocates the cursor, second call should
+ position it to the start of the table, no need to deallocate
+ and allocate it again
+ */
+ virtual int rnd_init(bool scan);
+ virtual int rnd_end();
+ virtual int rnd_next(byte * buf);
+ virtual int rnd_pos(byte * buf, byte * pos);
+ virtual void position(const byte * record);
+
+ /*
+ -------------------------------------------------------------------------
+ MODULE index scan
+ -------------------------------------------------------------------------
+ This part of the handler interface is used to perform access through
+ indexes. The interface is defined as a scan interface but the handler
+ can also use key lookup if the index is a unique index or a primary
+ key index.
+ Index scans are mostly useful for SELECT queries but are an important
+ part also of UPDATE, DELETE, REPLACE and CREATE TABLE table AS SELECT
+ and so forth.
+ Naturally an index is needed for an index scan and indexes can either
+ be ordered, hash based. Some ordered indexes can return data in order
+ but not necessarily all of them.
+ There are many flags that define the behavior of indexes in the
+ various handlers. These methods are found in the optimizer module.
+ -------------------------------------------------------------------------
+
+ index_read is called to start a scan of an index. The find_flag defines
+ the semantics of the scan. These flags are defined in
+ include/my_base.h
+ index_read_idx is the same but also initializes index before calling doing
+ the same thing as index_read. Thus it is similar to index_init followed
+ by index_read. This is also how we implement it.
+
+ index_read/index_read_idx does also return the first row. Thus for
+ key lookups, the index_read will be the only call to the handler in
+ the index scan.
+
+ index_init initializes an index before using it and index_end does
+ any end processing needed.
+ */
+ virtual int index_read(byte * buf, const byte * key,
+ uint key_len, enum ha_rkey_function find_flag);
+ virtual int index_read_idx(byte * buf, uint idx, const byte * key,
+ uint key_len, enum ha_rkey_function find_flag);
+ virtual int index_init(uint idx, bool sorted);
+ virtual int index_end();
+
+ /*
+ These methods are used to jump to next or previous entry in the index
+ scan. There are also methods to jump to first and last entry.
+ */
+ virtual int index_next(byte * buf);
+ virtual int index_prev(byte * buf);
+ virtual int index_first(byte * buf);
+ virtual int index_last(byte * buf);
+ virtual int index_next_same(byte * buf, const byte * key, uint keylen);
+ virtual int index_read_last(byte * buf, const byte * key, uint keylen);
+
+ /*
+ read_first_row is virtual method but is only implemented by
+ handler.cc, no storage engine has implemented it so neither
+ will the partition handler.
+
+ virtual int read_first_row(byte *buf, uint primary_key);
+ */
+
+ /*
+ We don't implement multi read range yet, will do later.
+ virtual int read_multi_range_first(KEY_MULTI_RANGE **found_range_p,
+ KEY_MULTI_RANGE *ranges, uint range_count,
+ bool sorted, HANDLER_BUFFER *buffer);
+ virtual int read_multi_range_next(KEY_MULTI_RANGE **found_range_p);
+ */
+
+
+ virtual int read_range_first(const key_range * start_key,
+ const key_range * end_key,
+ bool eq_range, bool sorted);
+ virtual int read_range_next();
+
+private:
+ int common_index_read(byte * buf, const byte * key,
+ uint key_len, enum ha_rkey_function find_flag);
+ int common_first_last(byte * buf);
+ int partition_scan_set_up(byte * buf, bool idx_read_flag);
+ int handle_unordered_next(byte * buf, bool next_same);
+ int handle_unordered_scan_next_partition(byte * buf);
+ byte *queue_buf(uint part_id)
+ {
+ return (m_ordered_rec_buffer +
+ (part_id * (m_rec_length + PARTITION_BYTES_IN_POS)));
+ }
+ byte *rec_buf(uint part_id)
+ {
+ return (queue_buf(part_id) +
+ PARTITION_BYTES_IN_POS);
+ }
+ int handle_ordered_index_scan(byte * buf);
+ int handle_ordered_next(byte * buf, bool next_same);
+ int handle_ordered_prev(byte * buf);
+ void return_top_record(byte * buf);
+ void include_partition_fields_in_used_fields();
+public:
+ /*
+ -------------------------------------------------------------------------
+ MODULE information calls
+ -------------------------------------------------------------------------
+ This calls are used to inform the handler of specifics of the ongoing
+ scans and other actions. Most of these are used for optimisation
+ purposes.
+ -------------------------------------------------------------------------
+ */
+ virtual void info(uint);
+ virtual int extra(enum ha_extra_function operation);
+ virtual int extra_opt(enum ha_extra_function operation, ulong cachesize);
+ virtual int reset(void);
+
+private:
+ static const uint NO_CURRENT_PART_ID= 0xFFFFFFFF;
+ int loop_extra(enum ha_extra_function operation);
+ void late_extra_cache(uint partition_id);
+ void late_extra_no_cache(uint partition_id);
+ void prepare_extra_cache(uint cachesize);
+public:
+
+ /*
+ -------------------------------------------------------------------------
+ MODULE optimiser support
+ -------------------------------------------------------------------------
+ -------------------------------------------------------------------------
+ */
+
+ /*
+ NOTE !!!!!!
+ -------------------------------------------------------------------------
+ -------------------------------------------------------------------------
+ One important part of the public handler interface that is not depicted in
+ the methods is the attribute records
+
+ which is defined in the base class. This is looked upon directly and is
+ set by calling info(HA_STATUS_INFO) ?
+ -------------------------------------------------------------------------
+ */
+
+ /*
+ keys_to_use_for_scanning can probably be implemented as the
+ intersection of all underlying handlers if mixed handlers are used.
+ This method is used to derive whether an index can be used for
+ index-only scanning when performing an ORDER BY query.
+ Only called from one place in sql_select.cc
+ */
+ virtual const key_map *keys_to_use_for_scanning();
+
+ /*
+ Called in test_quick_select to determine if indexes should be used.
+ */
+ virtual double scan_time();
+
+ /*
+ The next method will never be called if you do not implement indexes.
+ */
+ virtual double read_time(uint index, uint ranges, ha_rows rows);
+ /*
+ For the given range how many records are estimated to be in this range.
+ Used by optimiser to calculate cost of using a particular index.
+ */
+ virtual ha_rows records_in_range(uint inx, key_range * min_key,
+ key_range * max_key);
+
+ /*
+ Upper bound of number records returned in scan is sum of all
+ underlying handlers.
+ */
+ virtual ha_rows estimate_rows_upper_bound();
+
+ /*
+ table_cache_type is implemented by the underlying handler but all
+ underlying handlers must have the same implementation for it to work.
+ */
+ virtual uint8 table_cache_type();
+
+ /*
+ -------------------------------------------------------------------------
+ MODULE print messages
+ -------------------------------------------------------------------------
+ This module contains various methods that returns text messages for
+ table types, index type and error messages.
+ -------------------------------------------------------------------------
+ */
+ /*
+ The name of the index type that will be used for display
+ Here we must ensure that all handlers use the same index type
+ for each index created.
+ */
+ virtual const char *index_type(uint inx);
+
+ /* The name of the table type that will be used for display purposes */
+ virtual const char *table_type() const
+ { return "PARTITION"; }
+
+ /*
+ Handler specific error messages
+ */
+ virtual void print_error(int error, myf errflag);
+ virtual bool get_error_message(int error, String * buf);
+ /*
+ -------------------------------------------------------------------------
+ MODULE handler characteristics
+ -------------------------------------------------------------------------
+ This module contains a number of methods defining limitations and
+ characteristics of the handler. The partition handler will calculate
+ this characteristics based on underlying handler characteristics.
+ -------------------------------------------------------------------------
+
+ This is a list of flags that says what the storage engine
+ implements. The current table flags are documented in handler.h
+ The partition handler will support whatever the underlying handlers
+ support except when specifically mentioned below about exceptions
+ to this rule.
+
+ HA_READ_RND_SAME:
+ Not currently used. (Means that the handler supports the rnd_same() call)
+ (MyISAM, HEAP)
+
+ HA_TABLE_SCAN_ON_INDEX:
+ Used to avoid scanning full tables on an index. If this flag is set then
+ the handler always has a primary key (hidden if not defined) and this
+ index is used for scanning rather than a full table scan in all
+ situations.
+ (InnoDB, BDB, Federated)
+
+ HA_REC_NOT_IN_SEQ:
+ This flag is set for handlers that cannot guarantee that the rows are
+ returned accroding to incremental positions (0, 1, 2, 3...).
+ This also means that rnd_next() should return HA_ERR_RECORD_DELETED
+ if it finds a deleted row.
+ (MyISAM (not fixed length row), BDB, HEAP, NDB, InooDB)
+
+ HA_CAN_GEOMETRY:
+ Can the storage engine handle spatial data.
+ Used to check that no spatial attributes are declared unless
+ the storage engine is capable of handling it.
+ (MyISAM)
+
+ HA_FAST_KEY_READ:
+ Setting this flag indicates that the handler is equally fast in
+ finding a row by key as by position.
+ This flag is used in a very special situation in conjunction with
+ filesort's. For further explanation see intro to init_read_record.
+ (BDB, HEAP, InnoDB)
+
+ HA_NULL_IN_KEY:
+ Is NULL values allowed in indexes.
+ If this is not allowed then it is not possible to use an index on a
+ NULLable field.
+ (BDB, HEAP, MyISAM, NDB, InnoDB)
+
+ HA_DUPP_POS:
+ Tells that we can the position for the conflicting duplicate key
+ record is stored in table->file->dupp_ref. (insert uses rnd_pos() on
+ this to find the duplicated row)
+ (MyISAM)
+
+ HA_CAN_INDEX_BLOBS:
+ Is the storage engine capable of defining an index of a prefix on
+ a BLOB attribute.
+ (BDB, Federated, MyISAM, InnoDB)
+
+ HA_AUTO_PART_KEY:
+ Auto increment fields can be part of a multi-part key. For second part
+ auto-increment keys, the auto_incrementing is done in handler.cc
+ (BDB, Federated, MyISAM, NDB)
+
+ HA_REQUIRE_PRIMARY_KEY:
+ Can't define a table without primary key (and cannot handle a table
+ with hidden primary key)
+ (No handler has this limitation currently)
+
+ HA_NOT_EXACT_COUNT:
+ Does the counter of records after the info call specify an exact
+ value or not. If it doesn't this flag is set.
+ Only MyISAM and HEAP uses exact count.
+ (MyISAM, HEAP, BDB, InnoDB, NDB, Federated)
+
+ HA_CAN_INSERT_DELAYED:
+ Can the storage engine support delayed inserts.
+ To start with the partition handler will not support delayed inserts.
+ Further investigation needed.
+ (HEAP, MyISAM)
+
+ HA_PRIMARY_KEY_IN_READ_INDEX:
+ This parameter is set when the handler will also return the primary key
+ when doing read-only-key on another index.
+
+ HA_NOT_DELETE_WITH_CACHE:
+ Seems to be an old MyISAM feature that is no longer used. No handler
+ has it defined but it is checked in init_read_record.
+ Further investigation needed.
+ (No handler defines it)
+
+ HA_NO_PREFIX_CHAR_KEYS:
+ Indexes on prefixes of character fields is not allowed.
+ (NDB)
+
+ HA_CAN_FULLTEXT:
+ Does the storage engine support fulltext indexes
+ The partition handler will start by not supporting fulltext indexes.
+ (MyISAM)
+
+ HA_CAN_SQL_HANDLER:
+ Can the HANDLER interface in the MySQL API be used towards this
+ storage engine.
+ (MyISAM, InnoDB)
+
+ HA_NO_AUTO_INCREMENT:
+ Set if the storage engine does not support auto increment fields.
+ (Currently not set by any handler)
+
+ HA_HAS_CHECKSUM:
+ Special MyISAM feature. Has special SQL support in CREATE TABLE.
+ No special handling needed by partition handler.
+ (MyISAM)
+
+ HA_FILE_BASED:
+ Should file names always be in lower case (used by engines
+ that map table names to file names.
+ Since partition handler has a local file this flag is set.
+ (BDB, Federated, MyISAM)
+
+ HA_CAN_BIT_FIELD:
+ Is the storage engine capable of handling bit fields?
+ (MyISAM, NDB)
+
+ HA_NEED_READ_RANGE_BUFFER:
+ Is Read Multi-Range supported => need multi read range buffer
+ This parameter specifies whether a buffer for read multi range
+ is needed by the handler. Whether the handler supports this
+ feature or not is dependent of whether the handler implements
+ read_multi_range* calls or not. The only handler currently
+ supporting this feature is NDB so the partition handler need
+ not handle this call. There are methods in handler.cc that will
+ transfer those calls into index_read and other calls in the
+ index scan module.
+ (NDB)
+ */
+ virtual ulong alter_table_flags(void) const
+ {
+ //return HA_ONLINE_ADD_EMPTY_PARTITION + HA_ONLINE_DROP_PARTITION;
+ return HA_ONLINE_DROP_PARTITION;
+ }
+ virtual ulong table_flags() const
+ { return m_table_flags; }
+ /*
+ HA_CAN_PARTITION:
+ Used by storage engines that can handle partitioning without this
+ partition handler
+ (Partition, NDB)
+
+ HA_CAN_UPDATE_PARTITION_KEY:
+ Set if the handler can update fields that are part of the partition
+ function.
+
+ HA_CAN_PARTITION_UNIQUE:
+ Set if the handler can handle unique indexes where the fields of the
+ unique key are not part of the fields of the partition function. Thus
+ a unique key can be set on all fields.
+ */
+ virtual ulong partition_flags() const
+ { return HA_CAN_PARTITION; }
+
+ /*
+ This is a bitmap of flags that says how the storage engine
+ implements indexes. The current index flags are documented in
+ handler.h. If you do not implement indexes, just return zero
+ here.
+
+ part is the key part to check. First key part is 0
+ If all_parts it's set, MySQL want to know the flags for the combined
+ index up to and including 'part'.
+
+ HA_READ_NEXT:
+ Does the index support read next, this is assumed in the server
+ code and never checked so all indexes must support this.
+ Note that the handler can be used even if it doesn't have any index.
+ (BDB, HEAP, MyISAM, Federated, NDB, InnoDB)
+
+ HA_READ_PREV:
+ Can the index be used to scan backwards.
+ (BDB, HEAP, MyISAM, NDB, InnoDB)
+
+ HA_READ_ORDER:
+ Can the index deliver its record in index order. Typically true for
+ all ordered indexes and not true for hash indexes.
+ In first step this is not true for partition handler until a merge
+ sort has been implemented in partition handler.
+ Used to set keymap part_of_sortkey
+ This keymap is only used to find indexes usable for resolving an ORDER BY
+ in the query. Thus in most cases index_read will work just fine without
+ order in result production. When this flag is set it is however safe to
+ order all output started by index_read since most engines do this. With
+ read_multi_range calls there is a specific flag setting order or not
+ order so in those cases ordering of index output can be avoided.
+ (BDB, InnoDB, HEAP, MyISAM, NDB)
+
+ HA_READ_RANGE:
+ Specify whether index can handle ranges, typically true for all
+ ordered indexes and not true for hash indexes.
+ Used by optimiser to check if ranges (as key >= 5) can be optimised
+ by index.
+ (BDB, InnoDB, NDB, MyISAM, HEAP)
+
+ HA_ONLY_WHOLE_INDEX:
+ Can't use part key searches. This is typically true for hash indexes
+ and typically not true for ordered indexes.
+ (Federated, NDB, HEAP)
+
+ HA_KEYREAD_ONLY:
+ Does the storage engine support index-only scans on this index.
+ Enables use of HA_EXTRA_KEYREAD and HA_EXTRA_NO_KEYREAD
+ Used to set key_map keys_for_keyread and to check in optimiser for
+ index-only scans. When doing a read under HA_EXTRA_KEYREAD the handler
+ only have to fill in the columns the key covers. If
+ HA_PRIMARY_KEY_IN_READ_INDEX is set then also the PRIMARY KEY columns
+ must be updated in the row.
+ (BDB, InnoDB, MyISAM)
+ */
+ virtual ulong index_flags(uint inx, uint part, bool all_parts) const
+ {
+ return m_file[0]->index_flags(inx, part, all_parts);
+ }
+
+ /*
+ extensions of table handler files
+ */
+ virtual const char **bas_ext() const;
+ /*
+ unireg.cc will call the following to make sure that the storage engine
+ can handle the data it is about to send.
+
+ The maximum supported values is the minimum of all handlers in the table
+ */
+ uint min_of_the_max_uint(uint (handler::*operator_func)(void) const) const;
+ virtual uint max_supported_record_length() const;
+ virtual uint max_supported_keys() const;
+ virtual uint max_supported_key_parts() const;
+ virtual uint max_supported_key_length() const;
+ virtual uint max_supported_key_part_length() const;
+
+ /*
+ All handlers in a partitioned table must have the same low_byte_first
+ */
+ virtual bool low_byte_first() const
+ { return m_low_byte_first; }
+
+ /*
+ The extra record buffer length is the maximum needed by all handlers.
+ The minimum record length is the maximum of all involved handlers.
+ */
+ virtual uint extra_rec_buf_length() const;
+ virtual uint min_record_length(uint options) const;
+
+ /*
+ Transactions on the table is supported if all handlers below support
+ transactions.
+ */
+ virtual bool has_transactions()
+ { return m_has_transactions; }
+
+ /*
+ Primary key is clustered can only be true if all underlying handlers have
+ this feature.
+ */
+ virtual bool primary_key_is_clustered()
+ { return m_pkey_is_clustered; }
+
+ /*
+ -------------------------------------------------------------------------
+ MODULE compare records
+ -------------------------------------------------------------------------
+ cmp_ref checks if two references are the same. For most handlers this is
+ a simple memcmp of the reference. However some handlers use primary key
+ as reference and this can be the same even if memcmp says they are
+ different. This is due to character sets and end spaces and so forth.
+ For the partition handler the reference is first two bytes providing the
+ partition identity of the referred record and then the reference of the
+ underlying handler.
+ Thus cmp_ref for the partition handler always returns FALSE for records
+ not in the same partition and uses cmp_ref on the underlying handler
+ to check whether the rest of the reference part is also the same.
+ -------------------------------------------------------------------------
+ */
+ virtual int cmp_ref(const byte * ref1, const byte * ref2);
+ /*
+ -------------------------------------------------------------------------
+ MODULE auto increment
+ -------------------------------------------------------------------------
+ This module is used to handle the support of auto increments.
+
+ This variable in the handler is used as part of the handler interface
+ It is maintained by the parent handler object and should not be
+ touched by child handler objects (see handler.cc for its use).
+
+ auto_increment_column_changed
+ -------------------------------------------------------------------------
+ */
+ virtual void restore_auto_increment();
+ virtual ulonglong get_auto_increment();
+
+ /*
+ -------------------------------------------------------------------------
+ MODULE initialise handler for HANDLER call
+ -------------------------------------------------------------------------
+ This method is a special InnoDB method called before a HANDLER query.
+ -------------------------------------------------------------------------
+ */
+ virtual void init_table_handle_for_HANDLER();
+
+ /*
+ The remainder of this file defines the handler methods not implemented
+ by the partition handler
+ */
+
+ /*
+ -------------------------------------------------------------------------
+ MODULE foreign key support
+ -------------------------------------------------------------------------
+ The following methods are used to implement foreign keys as supported by
+ InnoDB. Implement this ??
+ get_foreign_key_create_info is used by SHOW CREATE TABLE to get a textual
+ description of how the CREATE TABLE part to define FOREIGN KEY's is done.
+ free_foreign_key_create_info is used to free the memory area that provided
+ this description.
+ -------------------------------------------------------------------------
+
+ virtual char* get_foreign_key_create_info()
+ virtual void free_foreign_key_create_info(char* str)
+
+ virtual int get_foreign_key_list(THD *thd,
+ List<FOREIGN_KEY_INFO> *f_key_list)
+ virtual uint referenced_by_foreign_key()
+ */
+
+ /*
+ -------------------------------------------------------------------------
+ MODULE fulltext index
+ -------------------------------------------------------------------------
+ Fulltext stuff not yet.
+ -------------------------------------------------------------------------
+ virtual int ft_init() { return HA_ERR_WRONG_COMMAND; }
+ virtual FT_INFO *ft_init_ext(uint flags,uint inx,const byte *key,
+ uint keylen)
+ { return NULL; }
+ virtual int ft_read(byte *buf) { return HA_ERR_WRONG_COMMAND; }
+ */
+
+ /*
+ -------------------------------------------------------------------------
+ MODULE restart full table scan at position (MyISAM)
+ -------------------------------------------------------------------------
+ The following method is only used by MyISAM when used as
+ temporary tables in a join.
+ virtual int restart_rnd_next(byte *buf, byte *pos);
+ */
+
+ /*
+ -------------------------------------------------------------------------
+ MODULE on-line ALTER TABLE
+ -------------------------------------------------------------------------
+ These methods are in the handler interface but never used (yet)
+ They are to be used by on-line alter table add/drop index:
+ -------------------------------------------------------------------------
+ virtual ulong index_ddl_flags(KEY *wanted_index) const
+ virtual int add_index(TABLE *table_arg,KEY *key_info,uint num_of_keys);
+ virtual int drop_index(TABLE *table_arg,uint *key_num,uint num_of_keys);
+ */
+
+ /*
+ -------------------------------------------------------------------------
+ MODULE tablespace support
+ -------------------------------------------------------------------------
+ Admin of table spaces is not applicable to the partition handler (InnoDB)
+ This means that the following method is not implemented:
+ -------------------------------------------------------------------------
+ virtual int discard_or_import_tablespace(my_bool discard)
+ */
+
+ /*
+ -------------------------------------------------------------------------
+ MODULE admin MyISAM
+ -------------------------------------------------------------------------
+ Admin commands not supported currently (almost purely MyISAM routines)
+ This means that the following methods are not implemented:
+ -------------------------------------------------------------------------
+
+ virtual int check(THD* thd, HA_CHECK_OPT *check_opt);
+ virtual int backup(TD* thd, HA_CHECK_OPT *check_opt);
+ virtual int restore(THD* thd, HA_CHECK_OPT *check_opt);
+ virtual int repair(THD* thd, HA_CHECK_OPT *check_opt);
+ virtual int optimize(THD* thd, HA_CHECK_OPT *check_opt);
+ virtual int analyze(THD* thd, HA_CHECK_OPT *check_opt);
+ virtual int assign_to_keycache(THD* thd, HA_CHECK_OPT *check_opt);
+ virtual int preload_keys(THD *thd, HA_CHECK_OPT *check_opt);
+ virtual bool check_and_repair(THD *thd);
+ virtual int dump(THD* thd, int fd = -1);
+ virtual int net_read_dump(NET* net);
+ virtual uint checksum() const;
+ virtual bool is_crashed() const;
+ virtual bool auto_repair() const;
+
+ -------------------------------------------------------------------------
+ MODULE enable/disable indexes
+ -------------------------------------------------------------------------
+ Enable/Disable Indexes are not supported currently (Heap, MyISAM)
+ This means that the following methods are not implemented:
+ -------------------------------------------------------------------------
+ virtual int disable_indexes(uint mode);
+ virtual int enable_indexes(uint mode);
+ virtual int indexes_are_disabled(void);
+ */
+
+ /*
+ -------------------------------------------------------------------------
+ MODULE append_create_info
+ -------------------------------------------------------------------------
+ append_create_info is only used by MyISAM MERGE tables and the partition
+ handler will not support this handler as underlying handler.
+ Implement this??
+ -------------------------------------------------------------------------
+ virtual void append_create_info(String *packet)
+ */
+};
diff --git a/sql/handler.cc b/sql/handler.cc
index 3acca812a13..3e85e73cab5 100644
--- a/sql/handler.cc
+++ b/sql/handler.cc
@@ -34,6 +34,9 @@
#ifdef HAVE_EXAMPLE_DB
#include "examples/ha_example.h"
#endif
+#ifdef HAVE_PARTITION_DB
+#include "ha_partition.h"
+#endif
#ifdef HAVE_ARCHIVE_DB
#include "examples/ha_archive.h"
#endif
@@ -170,7 +173,13 @@ enum db_type ha_checktype(THD *thd, enum db_type database_type,
{
if (ha_storage_engine_is_enabled(database_type))
return database_type;
-
+#ifdef HAVE_PARTITION_DB
+ /*
+ Partition handler is not in the list of handlers shown since it is an internal handler
+ */
+ if (database_type == DB_TYPE_PARTITION_DB)
+ return database_type;
+#endif
if (no_substitute)
{
if (report_error)
@@ -203,47 +212,66 @@ enum db_type ha_checktype(THD *thd, enum db_type database_type,
handler *get_new_handler(TABLE *table, enum db_type db_type)
{
+ handler *file;
switch (db_type) {
#ifndef NO_HASH
case DB_TYPE_HASH:
- return new ha_hash(table);
+ file= new ha_hash(table);
+ break;
#endif
case DB_TYPE_MRG_ISAM:
- return new ha_myisammrg(table);
+ file= new ha_myisammrg(table);
+ break;
#ifdef HAVE_BERKELEY_DB
case DB_TYPE_BERKELEY_DB:
- return new ha_berkeley(table);
+ file= new ha_berkeley(table);
+ break;
#endif
#ifdef HAVE_INNOBASE_DB
case DB_TYPE_INNODB:
- return new ha_innobase(table);
+ file= new ha_innobase(table);
+ break;
#endif
#ifdef HAVE_EXAMPLE_DB
case DB_TYPE_EXAMPLE_DB:
- return new ha_example(table);
+ file= new ha_example(table);
+ break;
+#endif
+#ifdef HAVE_PARTITION_DB
+ case DB_TYPE_PARTITION_DB:
+ {
+ file= new ha_partition(table);
+ break;
+ }
#endif
#ifdef HAVE_ARCHIVE_DB
case DB_TYPE_ARCHIVE_DB:
- return new ha_archive(table);
+ file= new ha_archive(table);
+ break;
#endif
#ifdef HAVE_BLACKHOLE_DB
case DB_TYPE_BLACKHOLE_DB:
- return new ha_blackhole(table);
+ file= new ha_blackhole(table);
+ break;
#endif
#ifdef HAVE_FEDERATED_DB
case DB_TYPE_FEDERATED_DB:
- return new ha_federated(table);
+ file= new ha_federated(table);
+ break;
#endif
#ifdef HAVE_CSV_DB
case DB_TYPE_CSV_DB:
- return new ha_tina(table);
+ file= new ha_tina(table);
+ break;
#endif
#ifdef HAVE_NDBCLUSTER_DB
case DB_TYPE_NDBCLUSTER:
- return new ha_ndbcluster(table);
+ file= new ha_ndbcluster(table);
+ break;
#endif
case DB_TYPE_HEAP:
- return new ha_heap(table);
+ file= new ha_heap(table);
+ break;
default: // should never happen
{
enum db_type def=(enum db_type) current_thd->variables.table_type;
@@ -253,12 +281,46 @@ handler *get_new_handler(TABLE *table, enum db_type db_type)
}
/* Fall back to MyISAM */
case DB_TYPE_MYISAM:
- return new ha_myisam(table);
+ file= new ha_myisam(table);
+ break;
case DB_TYPE_MRG_MYISAM:
- return new ha_myisammrg(table);
+ file= new ha_myisammrg(table);
+ break;
+ }
+ if (file)
+ {
+ if (file->ha_initialise())
+ {
+ delete file;
+ file=0;
+ }
}
+ return file;
}
+
+#ifdef HAVE_PARTITION_DB
+handler *get_ha_partition(partition_info *part_info)
+{
+ ha_partition *partition;
+ DBUG_ENTER("get_ha_partition");
+ if ((partition= new ha_partition(part_info)))
+ {
+ if (partition->ha_initialise())
+ {
+ delete partition;
+ partition= 0;
+ }
+ }
+ else
+ {
+ my_error(ER_OUTOFMEMORY, MYF(0), sizeof(ha_partition));
+ }
+ DBUG_RETURN(((handler*) partition));
+}
+#endif
+
+
/*
Register handler error messages for use with my_error().
@@ -1354,6 +1416,111 @@ int handler::ha_open(const char *name, int mode, int test_if_locked)
DBUG_RETURN(error);
}
+int handler::ha_initialise()
+{
+ DBUG_ENTER("ha_initialise");
+ if (table && table->s->fields &&
+ ha_allocate_read_write_set(table->s->fields))
+ {
+ DBUG_RETURN(TRUE);
+ }
+ DBUG_RETURN(FALSE);
+}
+
+int handler::ha_allocate_read_write_set(ulong no_fields)
+{
+ uint bitmap_size= 4*(((no_fields+1)+31)/32);
+ uint32 *read_buf, *write_buf;
+#ifndef DEBUG_OFF
+ my_bool r;
+#endif
+ DBUG_ENTER("ha_allocate_read_write_set");
+ DBUG_PRINT("enter", ("no_fields = %d", no_fields));
+
+ if (table)
+ {
+ if (table->read_set == NULL)
+ {
+ read_set= (MY_BITMAP*)sql_alloc(sizeof(MY_BITMAP));
+ write_set= (MY_BITMAP*)sql_alloc(sizeof(MY_BITMAP));
+ read_buf= (uint32*)sql_alloc(bitmap_size);
+ write_buf= (uint32*)sql_alloc(bitmap_size);
+ if (!read_set || !write_set || !read_buf || !write_buf)
+ {
+ ha_deallocate_read_write_set();
+ DBUG_RETURN(TRUE);
+ }
+#ifndef DEBUG_OFF
+ r =
+#endif
+ bitmap_init(read_set, read_buf, no_fields+1, FALSE);
+ DBUG_ASSERT(!r /*bitmap_init(read_set...)*/);
+#ifndef DEBUG_OFF
+ r =
+#endif
+ bitmap_init(write_set, write_buf, no_fields+1, FALSE);
+ DBUG_ASSERT(!r /*bitmap_init(write_set...)*/);
+ table->read_set= read_set;
+ table->write_set= write_set;
+ ha_clear_all_set();
+ }
+ else
+ {
+ read_set= table->read_set;
+ write_set= table->write_set;
+ }
+ }
+ DBUG_RETURN(FALSE);
+}
+
+void handler::ha_deallocate_read_write_set()
+{
+ DBUG_ENTER("ha_deallocate_read_write_set");
+ read_set=write_set=0;
+ DBUG_VOID_RETURN;
+}
+
+void handler::ha_clear_all_set()
+{
+ DBUG_ENTER("ha_clear_all_set");
+ bitmap_clear_all(read_set);
+ bitmap_clear_all(write_set);
+ bitmap_set_bit(read_set, 0);
+ bitmap_set_bit(write_set, 0);
+ DBUG_VOID_RETURN;
+}
+
+int handler::ha_retrieve_all_cols()
+{
+ DBUG_ENTER("handler::ha_retrieve_all_cols");
+ bitmap_set_all(read_set);
+ DBUG_RETURN(0);
+}
+
+int handler::ha_retrieve_all_pk()
+{
+ DBUG_ENTER("ha_retrieve_all_pk");
+ ha_set_primary_key_in_read_set();
+ DBUG_RETURN(0);
+}
+
+void handler::ha_set_primary_key_in_read_set()
+{
+ ulong prim_key= table->s->primary_key;
+ DBUG_ENTER("handler::ha_set_primary_key_in_read_set");
+ DBUG_PRINT("info", ("Primary key = %d", prim_key));
+ if (prim_key != MAX_KEY)
+ {
+ KEY_PART_INFO *key_part= table->key_info[prim_key].key_part;
+ KEY_PART_INFO *key_part_end= key_part +
+ table->key_info[prim_key].key_parts;
+ for (;key_part != key_part_end; ++key_part)
+ ha_set_bit_in_read_set(key_part->fieldnr);
+ }
+ DBUG_VOID_RETURN;
+}
+
+
/*
Read first row (only) from a table
This is never called for InnoDB or BDB tables, as these table types
@@ -1382,7 +1549,7 @@ int handler::read_first_row(byte * buf, uint primary_key)
else
{
/* Find the first row through the primary key */
- (void) ha_index_init(primary_key);
+ (void) ha_index_init(primary_key, 0);
error=index_first(buf);
(void) ha_index_end();
}
@@ -1566,7 +1733,7 @@ ulonglong handler::get_auto_increment()
int error;
(void) extra(HA_EXTRA_KEYREAD);
- index_init(table->s->next_number_index);
+ index_init(table->s->next_number_index, 1);
if (!table->s->next_number_key_offset)
{ // Autoincrement at key-start
error=index_last(table->record[1]);
@@ -2390,7 +2557,7 @@ int handler::compare_key(key_range *range)
int handler::index_read_idx(byte * buf, uint index, const byte * key,
uint key_len, enum ha_rkey_function find_flag)
{
- int error= ha_index_init(index);
+ int error= ha_index_init(index, 0);
if (!error)
error= index_read(buf, key, key_len, find_flag);
if (!error)
diff --git a/sql/handler.h b/sql/handler.h
index 811791a498b..fca0717011f 100644
--- a/sql/handler.h
+++ b/sql/handler.h
@@ -89,6 +89,11 @@
#define HA_NEED_READ_RANGE_BUFFER (1 << 29) /* for read_multi_range */
#define HA_ANY_INDEX_MAY_BE_UNIQUE (1 << 30)
+/* Flags for partition handlers */
+#define HA_CAN_PARTITION (1 << 0) /* Partition support */
+#define HA_CAN_UPDATE_PARTITION_KEY (1 << 1)
+#define HA_CAN_PARTITION_UNIQUE (1 << 2)
+
/* bits in index_flags(index_number) for what you can do with index */
#define HA_READ_NEXT 1 /* TODO really use this flag */
@@ -98,6 +103,10 @@
#define HA_ONLY_WHOLE_INDEX 16 /* Can't use part key searches */
#define HA_KEYREAD_ONLY 64 /* Support HA_EXTRA_KEYREAD */
+/* bits in alter_table_flags */
+#define HA_ONLINE_ADD_EMPTY_PARTITION 1
+#define HA_ONLINE_DROP_PARTITION 2
+
/* operations for disable/enable indexes */
#define HA_KEY_SWITCH_NONUNIQ 0
#define HA_KEY_SWITCH_ALL 1
@@ -174,6 +183,7 @@ enum db_type
DB_TYPE_EXAMPLE_DB, DB_TYPE_ARCHIVE_DB, DB_TYPE_CSV_DB,
DB_TYPE_FEDERATED_DB,
DB_TYPE_BLACKHOLE_DB,
+ DB_TYPE_PARTITION_DB,
DB_TYPE_DEFAULT // Must be last
};
@@ -220,6 +230,9 @@ typedef ulonglong my_xid; // this line is the same as in log_event.h
#define MAXGTRIDSIZE 64
#define MAXBQUALSIZE 64
+#define COMPATIBLE_DATA_YES 0
+#define COMPATIBLE_DATA_NO 1
+
struct xid_t {
long formatID;
long gtrid_length;
@@ -379,6 +392,221 @@ typedef struct st_thd_trans
enum enum_tx_isolation { ISO_READ_UNCOMMITTED, ISO_READ_COMMITTED,
ISO_REPEATABLE_READ, ISO_SERIALIZABLE};
+
+typedef struct {
+ uint32 start_part;
+ uint32 end_part;
+ bool use_bit_array;
+} part_id_range;
+/**
+ * An enum and a struct to handle partitioning and subpartitioning.
+ */
+enum partition_type {
+ NOT_A_PARTITION= 0,
+ RANGE_PARTITION,
+ HASH_PARTITION,
+ LIST_PARTITION
+};
+
+enum partition_state {
+ PART_NORMAL= 0,
+ PART_IS_DROPPED= 1,
+ PART_TO_BE_DROPPED= 2,
+ PART_DROPPING= 3,
+ PART_IS_ADDED= 4,
+ PART_ADDING= 5,
+ PART_ADDED= 6
+};
+
+#define UNDEF_NODEGROUP 65535
+class Item;
+
+class partition_element :public Sql_alloc {
+public:
+ List<partition_element> subpartitions;
+ List<longlong> list_val_list;
+ ulonglong part_max_rows;
+ ulonglong part_min_rows;
+ char *partition_name;
+ char *tablespace_name;
+ longlong range_value;
+ char* part_comment;
+ char* data_file_name;
+ char* index_file_name;
+ enum db_type engine_type;
+ enum partition_state part_state;
+ uint16 nodegroup_id;
+
+ partition_element()
+ : part_max_rows(0), part_min_rows(0), partition_name(NULL),
+ tablespace_name(NULL), range_value(0), part_comment(NULL),
+ data_file_name(NULL), index_file_name(NULL),
+ engine_type(DB_TYPE_UNKNOWN),part_state(PART_NORMAL),
+ nodegroup_id(UNDEF_NODEGROUP)
+ {
+ subpartitions.empty();
+ list_val_list.empty();
+ }
+ ~partition_element() {}
+};
+
+typedef struct {
+ longlong list_value;
+ uint partition_id;
+} LIST_PART_ENTRY;
+enum Item_result;
+
+class partition_info;
+
+typedef bool (*get_part_id_func)(partition_info *part_info,
+ uint32 *part_id);
+typedef uint32 (*get_subpart_id_func)(partition_info *part_info);
+
+class partition_info :public Sql_alloc {
+public:
+ /*
+ * Here comes a set of definitions needed for partitioned table handlers.
+ */
+ List<partition_element> partitions;
+ List<partition_element> temp_partitions;
+
+ List<char> part_field_list;
+ List<char> subpart_field_list;
+
+ get_part_id_func get_partition_id;
+ get_part_id_func get_part_partition_id;
+ get_subpart_id_func get_subpartition_id;
+
+ Field **part_field_array;
+ Field **subpart_field_array;
+ Field **full_part_field_array;
+
+ Item *part_expr;
+ Item *subpart_expr;
+
+ Item *item_free_list;
+
+ union {
+ longlong *range_int_array;
+ LIST_PART_ENTRY *list_array;
+ };
+ char* part_info_string;
+
+ char *part_func_string;
+ char *subpart_func_string;
+
+ partition_element *curr_part_elem;
+ partition_element *current_partition;
+ /*
+ These key_map's are used for Partitioning to enable quick decisions
+ on whether we can derive more information about which partition to
+ scan just by looking at what index is used.
+ */
+ key_map all_fields_in_PF, all_fields_in_PPF, all_fields_in_SPF;
+ key_map some_fields_in_PF;
+
+ enum db_type default_engine_type;
+ Item_result part_result_type;
+ partition_type part_type;
+ partition_type subpart_type;
+
+ uint part_info_len;
+ uint part_func_len;
+ uint subpart_func_len;
+
+ uint no_parts;
+ uint no_subparts;
+ uint count_curr_parts;
+ uint count_curr_subparts;
+
+ uint part_error_code;
+
+ uint no_list_values;
+
+ uint no_part_fields;
+ uint no_subpart_fields;
+ uint no_full_part_fields;
+
+ uint16 linear_hash_mask;
+
+ bool use_default_partitions;
+ bool use_default_subpartitions;
+ bool defined_max_value;
+ bool list_of_part_fields;
+ bool list_of_subpart_fields;
+ bool linear_hash_ind;
+
+ partition_info()
+ : get_partition_id(NULL), get_part_partition_id(NULL),
+ get_subpartition_id(NULL),
+ part_field_array(NULL), subpart_field_array(NULL),
+ full_part_field_array(NULL),
+ part_expr(NULL), subpart_expr(NULL), item_free_list(NULL),
+ list_array(NULL),
+ part_info_string(NULL),
+ part_func_string(NULL), subpart_func_string(NULL),
+ curr_part_elem(NULL), current_partition(NULL),
+ default_engine_type(DB_TYPE_UNKNOWN),
+ part_result_type(INT_RESULT),
+ part_type(NOT_A_PARTITION), subpart_type(NOT_A_PARTITION),
+ part_info_len(0), part_func_len(0), subpart_func_len(0),
+ no_parts(0), no_subparts(0),
+ count_curr_parts(0), count_curr_subparts(0), part_error_code(0),
+ no_list_values(0), no_part_fields(0), no_subpart_fields(0),
+ no_full_part_fields(0), linear_hash_mask(0),
+ use_default_partitions(TRUE),
+ use_default_subpartitions(TRUE), defined_max_value(FALSE),
+ list_of_part_fields(FALSE), list_of_subpart_fields(FALSE),
+ linear_hash_ind(FALSE)
+ {
+ all_fields_in_PF.clear_all();
+ all_fields_in_PPF.clear_all();
+ all_fields_in_SPF.clear_all();
+ some_fields_in_PF.clear_all();
+ partitions.empty();
+ temp_partitions.empty();
+ part_field_list.empty();
+ subpart_field_list.empty();
+ }
+ ~partition_info() {}
+};
+
+
+#ifdef HAVE_PARTITION_DB
+/*
+ Answers the question if subpartitioning is used for a certain table
+ SYNOPSIS
+ is_sub_partitioned()
+ part_info A reference to the partition_info struct
+ RETURN VALUE
+ Returns true if subpartitioning used and false otherwise
+ DESCRIPTION
+ A routine to check for subpartitioning for improved readability of code
+*/
+inline
+bool is_sub_partitioned(partition_info *part_info)
+{ return (part_info->subpart_type == NOT_A_PARTITION ? FALSE : TRUE); }
+
+
+/*
+ Returns the total number of partitions on the leaf level.
+ SYNOPSIS
+ get_tot_partitions()
+ part_info A reference to the partition_info struct
+ RETURN VALUE
+ Returns the number of partitions
+ DESCRIPTION
+ A routine to check for number of partitions for improved readability
+ of code
+*/
+inline
+uint get_tot_partitions(partition_info *part_info)
+{
+ return part_info->no_parts *
+ (is_sub_partitioned(part_info) ? part_info->no_subparts : 1);
+}
+#endif
+
typedef struct st_ha_create_information
{
CHARSET_INFO *table_charset, *default_table_charset;
@@ -427,6 +655,38 @@ typedef struct st_ha_check_opt
} HA_CHECK_OPT;
+#ifdef HAVE_PARTITION_DB
+bool is_partition_in_list(char *part_name, List<char> list_part_names);
+bool is_partitions_in_table(partition_info *new_part_info,
+ partition_info *old_part_info);
+bool set_up_defaults_for_partitioning(partition_info *part_info,
+ handler *file,
+ ulonglong max_rows,
+ uint start_no);
+handler *get_ha_partition(partition_info *part_info);
+int get_parts_for_update(const byte *old_data, byte *new_data,
+ const byte *rec0, partition_info *part_info,
+ uint32 *old_part_id, uint32 *new_part_id);
+int get_part_for_delete(const byte *buf, const byte *rec0,
+ partition_info *part_info, uint32 *part_id);
+bool check_partition_info(partition_info *part_info,enum db_type eng_type,
+ handler *file, ulonglong max_rows);
+bool fix_partition_func(THD *thd, const char *name, TABLE *table);
+char *generate_partition_syntax(partition_info *part_info,
+ uint *buf_length, bool use_sql_alloc);
+bool partition_key_modified(TABLE *table, List<Item> &fields);
+void get_partition_set(const TABLE *table, byte *buf, const uint index,
+ const key_range *key_spec,
+ part_id_range *part_spec);
+void get_full_part_id_from_key(const TABLE *table, byte *buf,
+ KEY *key_info,
+ const key_range *key_spec,
+ part_id_range *part_spec);
+bool mysql_unpack_partition(File file, THD *thd, uint part_info_len,
+ TABLE *table);
+#endif
+
+
/*
This is a buffer area that the handler can use to store rows.
'end_of_used_area' should be kept updated after calls to
@@ -444,10 +704,13 @@ typedef struct st_handler_buffer
class handler :public Sql_alloc
{
+#ifdef HAVE_PARTITION_DB
+ friend class ha_partition;
+#endif
protected:
struct st_table *table; /* The table definition */
- virtual int index_init(uint idx) { active_index=idx; return 0; }
+ virtual int index_init(uint idx, bool sorted) { active_index=idx; return 0; }
virtual int index_end() { active_index=MAX_KEY; return 0; }
/*
rnd_init() can be called two times without rnd_end() in between
@@ -459,6 +722,8 @@ class handler :public Sql_alloc
virtual int rnd_init(bool scan) =0;
virtual int rnd_end() { return 0; }
+private:
+ virtual int reset() { return extra(HA_EXTRA_RESET); }
public:
const handlerton *ht; /* storage engine of this handler */
byte *ref; /* Pointer to current row */
@@ -501,6 +766,8 @@ public:
bool auto_increment_column_changed;
bool implicit_emptied; /* Can be !=0 only if HEAP */
const COND *pushed_cond;
+ MY_BITMAP *read_set;
+ MY_BITMAP *write_set;
handler(const handlerton *ht_arg, TABLE *table_arg) :table(table_arg),
ht(ht_arg),
@@ -513,7 +780,12 @@ public:
raid_type(0), ft_handler(0), inited(NONE), implicit_emptied(0),
pushed_cond(NULL)
{}
- virtual ~handler(void) { /* TODO: DBUG_ASSERT(inited == NONE); */ }
+ virtual ~handler(void)
+ {
+ ha_deallocate_read_write_set();
+ /* TODO: DBUG_ASSERT(inited == NONE); */
+ }
+ virtual int ha_initialise();
int ha_open(const char *name, int mode, int test_if_locked);
bool update_auto_increment();
virtual void print_error(int error, myf errflag);
@@ -526,7 +798,7 @@ public:
{ return rows2double(ranges+rows); }
virtual const key_map *keys_to_use_for_scanning() { return &key_map_empty; }
virtual bool has_transactions(){ return 0;}
- virtual uint extra_rec_buf_length() { return 0; }
+ virtual uint extra_rec_buf_length() const { return 0; }
/*
Return upper bound of current number of records in the table
@@ -545,12 +817,12 @@ public:
virtual const char *index_type(uint key_number) { DBUG_ASSERT(0); return "";}
- int ha_index_init(uint idx)
+ int ha_index_init(uint idx, bool sorted)
{
DBUG_ENTER("ha_index_init");
DBUG_ASSERT(inited==NONE);
inited=INDEX;
- DBUG_RETURN(index_init(idx));
+ DBUG_RETURN(index_init(idx, sorted));
}
int ha_index_end()
{
@@ -573,11 +845,140 @@ public:
inited=NONE;
DBUG_RETURN(rnd_end());
}
+ int ha_reset()
+ {
+ DBUG_ENTER("ha_reset");
+ ha_clear_all_set();
+ DBUG_RETURN(reset());
+ }
+
/* this is necessary in many places, e.g. in HANDLER command */
int ha_index_or_rnd_end()
{
return inited == INDEX ? ha_index_end() : inited == RND ? ha_rnd_end() : 0;
}
+ /*
+ These are a set of routines used to enable handlers to only read/write
+ partial lists of the fields in the table. The bit vector is maintained
+ by the server part and is used by the handler at calls to read/write
+ data in the table.
+ It replaces the use of query id's for this purpose. The benefit is that
+ the handler can also set bits in the read/write set if it has special
+ needs and it is also easy for other parts of the server to interact
+ with the handler (e.g. the replication part for row-level logging).
+ The routines are all part of the general handler and are not possible
+ to override by a handler. A handler can however set/reset bits by
+ calling these routines.
+
+ The methods ha_retrieve_all_cols and ha_retrieve_all_pk are made
+ virtual to handle InnoDB specifics. If InnoDB doesn't need the
+ extra parameters HA_EXTRA_RETRIEVE_ALL_COLS and
+ HA_EXTRA_RETRIEVE_PRIMARY_KEY anymore then these methods need not be
+ virtual anymore.
+ */
+ virtual int ha_retrieve_all_cols();
+ virtual int ha_retrieve_all_pk();
+ void ha_set_all_bits_in_read_set()
+ {
+ DBUG_ENTER("ha_set_all_bits_in_read_set");
+ bitmap_set_all(read_set);
+ DBUG_VOID_RETURN;
+ }
+ void ha_set_all_bits_in_write_set()
+ {
+ DBUG_ENTER("ha_set_all_bits_in_write_set");
+ bitmap_set_all(write_set);
+ DBUG_VOID_RETURN;
+ }
+ void ha_set_bit_in_read_set(uint fieldnr)
+ {
+ DBUG_ENTER("ha_set_bit_in_read_set");
+ DBUG_PRINT("info", ("fieldnr = %d", fieldnr));
+ bitmap_set_bit(read_set, fieldnr);
+ DBUG_VOID_RETURN;
+ }
+ void ha_clear_bit_in_read_set(uint fieldnr)
+ {
+ DBUG_ENTER("ha_clear_bit_in_read_set");
+ DBUG_PRINT("info", ("fieldnr = %d", fieldnr));
+ bitmap_clear_bit(read_set, fieldnr);
+ DBUG_VOID_RETURN;
+ }
+ void ha_set_bit_in_write_set(uint fieldnr)
+ {
+ DBUG_ENTER("ha_set_bit_in_write_set");
+ DBUG_PRINT("info", ("fieldnr = %d", fieldnr));
+ bitmap_set_bit(write_set, fieldnr);
+ DBUG_VOID_RETURN;
+ }
+ void ha_clear_bit_in_write_set(uint fieldnr)
+ {
+ DBUG_ENTER("ha_clear_bit_in_write_set");
+ DBUG_PRINT("info", ("fieldnr = %d", fieldnr));
+ bitmap_clear_bit(write_set, fieldnr);
+ DBUG_VOID_RETURN;
+ }
+ void ha_set_bit_in_rw_set(uint fieldnr, bool write_op)
+ {
+ DBUG_ENTER("ha_set_bit_in_rw_set");
+ DBUG_PRINT("info", ("Set bit %u in read set", fieldnr));
+ bitmap_set_bit(read_set, fieldnr);
+ if (!write_op) {
+ DBUG_VOID_RETURN;
+ }
+ else
+ {
+ DBUG_PRINT("info", ("Set bit %u in read and write set", fieldnr));
+ bitmap_set_bit(write_set, fieldnr);
+ }
+ DBUG_VOID_RETURN;
+ }
+ bool ha_get_bit_in_read_set(uint fieldnr)
+ {
+ bool bit_set=bitmap_is_set(read_set,fieldnr);
+ DBUG_ENTER("ha_get_bit_in_read_set");
+ DBUG_PRINT("info", ("bit %u = %u", fieldnr, bit_set));
+ DBUG_RETURN(bit_set);
+ }
+ bool ha_get_bit_in_write_set(uint fieldnr)
+ {
+ bool bit_set=bitmap_is_set(write_set,fieldnr);
+ DBUG_ENTER("ha_get_bit_in_write_set");
+ DBUG_PRINT("info", ("bit %u = %u", fieldnr, bit_set));
+ DBUG_RETURN(bit_set);
+ }
+ bool ha_get_all_bit_in_read_set()
+ {
+ bool all_bits_set= bitmap_is_set_all(read_set);
+ DBUG_ENTER("ha_get_all_bit_in_read_set");
+ DBUG_PRINT("info", ("all bits set = %u", all_bits_set));
+ DBUG_RETURN(all_bits_set);
+ }
+ bool ha_get_all_bit_in_read_clear()
+ {
+ bool all_bits_set= bitmap_is_clear_all(read_set);
+ DBUG_ENTER("ha_get_all_bit_in_read_clear");
+ DBUG_PRINT("info", ("all bits clear = %u", all_bits_set));
+ DBUG_RETURN(all_bits_set);
+ }
+ bool ha_get_all_bit_in_write_set()
+ {
+ bool all_bits_set= bitmap_is_set_all(write_set);
+ DBUG_ENTER("ha_get_all_bit_in_write_set");
+ DBUG_PRINT("info", ("all bits set = %u", all_bits_set));
+ DBUG_RETURN(all_bits_set);
+ }
+ bool ha_get_all_bit_in_write_clear()
+ {
+ bool all_bits_set= bitmap_is_clear_all(write_set);
+ DBUG_ENTER("ha_get_all_bit_in_write_clear");
+ DBUG_PRINT("info", ("all bits clear = %u", all_bits_set));
+ DBUG_RETURN(all_bits_set);
+ }
+ void ha_set_primary_key_in_read_set();
+ int ha_allocate_read_write_set(ulong no_fields);
+ void ha_deallocate_read_write_set();
+ void ha_clear_all_set();
uint get_index(void) const { return active_index; }
virtual int open(const char *name, int mode, uint test_if_locked)=0;
virtual int close(void)=0;
@@ -586,6 +987,85 @@ public:
{ return HA_ERR_WRONG_COMMAND; }
virtual int delete_row(const byte * buf)
{ return HA_ERR_WRONG_COMMAND; }
+ /*
+ SYNOPSIS
+ start_bulk_update()
+ RETURN
+ 0 Bulk update used by handler
+ 1 Bulk update not used, normal operation used
+ */
+ virtual bool start_bulk_update() { return 1; }
+ /*
+ SYNOPSIS
+ start_bulk_delete()
+ RETURN
+ 0 Bulk delete used by handler
+ 1 Bulk delete not used, normal operation used
+ */
+ virtual bool start_bulk_delete() { return 1; }
+ /*
+ SYNOPSIS
+ This method is similar to update_row, however the handler doesn't need
+ to execute the updates at this point in time. The handler can be certain
+ that another call to bulk_update_row will occur OR a call to
+ exec_bulk_update before the set of updates in this query is concluded.
+
+ bulk_update_row()
+ old_data Old record
+ new_data New record
+ dup_key_found Number of duplicate keys found
+ RETURN
+ 0 Bulk delete used by handler
+ 1 Bulk delete not used, normal operation used
+ */
+ virtual int bulk_update_row(const byte *old_data, byte *new_data,
+ uint *dup_key_found)
+ {
+ DBUG_ASSERT(FALSE);
+ return HA_ERR_WRONG_COMMAND;
+ }
+ /*
+ SYNOPSIS
+ After this call all outstanding updates must be performed. The number
+ of duplicate key errors are reported in the duplicate key parameter.
+ It is allowed to continue to the batched update after this call, the
+ handler has to wait until end_bulk_update with changing state.
+
+ exec_bulk_update()
+ dup_key_found Number of duplicate keys found
+ RETURN
+ 0 Success
+ >0 Error code
+ */
+ virtual int exec_bulk_update(uint *dup_key_found)
+ {
+ DBUG_ASSERT(FALSE);
+ return HA_ERR_WRONG_COMMAND;
+ }
+ /*
+ SYNOPSIS
+ Perform any needed clean-up, no outstanding updates are there at the
+ moment.
+
+ end_bulk_update()
+ RETURN
+ Nothing
+ */
+ virtual void end_bulk_update() { return; }
+ /*
+ SYNOPSIS
+ Execute all outstanding deletes and close down the bulk delete.
+
+ end_bulk_delete()
+ RETURN
+ 0 Success
+ >0 Error code
+ */
+ virtual int end_bulk_delete()
+ {
+ DBUG_ASSERT(FALSE);
+ return HA_ERR_WRONG_COMMAND;
+ }
virtual int index_read(byte * buf, const byte * key,
uint key_len, enum ha_rkey_function find_flag)
{ return HA_ERR_WRONG_COMMAND; }
@@ -636,7 +1116,6 @@ public:
{ return 0; }
virtual int extra_opt(enum ha_extra_function operation, ulong cache_size)
{ return extra(operation); }
- virtual int reset() { return extra(HA_EXTRA_RESET); }
virtual int external_lock(THD *thd, int lock_type) { return 0; }
virtual void unlock_row() {}
virtual int start_stmt(THD *thd) {return 0;}
@@ -698,6 +1177,20 @@ public:
virtual char *update_table_comment(const char * comment)
{ return (char*) comment;}
virtual void append_create_info(String *packet) {}
+ /*
+ SYNOPSIS
+ is_fk_defined_on_table_or_index()
+ index Index to check if foreign key uses it
+ RETURN VALUE
+ TRUE Foreign key defined on table or index
+ FALSE No foreign key defined
+ DESCRIPTION
+ If index == MAX_KEY then a check for table is made and if index <
+ MAX_KEY then a check is made if the table has foreign keys and if
+ a foreign key uses this index (and thus the index cannot be dropped).
+ */
+ virtual bool is_fk_defined_on_table_or_index(uint index)
+ { return FALSE; }
virtual char* get_foreign_key_create_info()
{ return(NULL);} /* gets foreign key create string from InnoDB */
/* used in ALTER TABLE; 1 if changing storage engine is allowed */
@@ -713,6 +1206,11 @@ public:
virtual const char *table_type() const =0;
virtual const char **bas_ext() const =0;
virtual ulong table_flags(void) const =0;
+ virtual ulong alter_table_flags(void) const { return 0; }
+#ifdef HAVE_PARTITION_DB
+ virtual ulong partition_flags(void) const { return 0;}
+ virtual int get_default_no_partitions(ulonglong max_rows) { return 1;}
+#endif
virtual ulong index_flags(uint idx, uint part, bool all_parts) const =0;
virtual ulong index_ddl_flags(KEY *wanted_index) const
{ return (HA_DDL_SUPPORT); }
@@ -752,7 +1250,21 @@ public:
virtual int delete_table(const char *name);
virtual int create(const char *name, TABLE *form, HA_CREATE_INFO *info)=0;
+ virtual int create_handler_files(const char *name) { return FALSE;}
+ /*
+ SYNOPSIS
+ drop_partitions()
+ path Complete path of db and table name
+ RETURN VALUE
+ TRUE Failure
+ FALSE Success
+ DESCRIPTION
+ Drop a partition, during this operation no other activity is ongoing
+ in this server on the table.
+ */
+ virtual int drop_partitions(const char *path)
+ { return HA_ERR_WRONG_COMMAND; }
/* lock_count() can be more than one if the table is a MERGE */
virtual uint lock_count(void) const { return 1; }
virtual THR_LOCK_DATA **store_lock(THD *thd,
@@ -816,6 +1328,9 @@ public:
Pops the top if condition stack, if stack is not empty
*/
virtual void cond_pop() { return; };
+ virtual bool check_if_incompatible_data(HA_CREATE_INFO *create_info,
+ uint table_changes)
+ { return COMPATIBLE_DATA_NO; }
};
/* Some extern variables used with handlers */
diff --git a/sql/item.cc b/sql/item.cc
index e7da646ae73..1cb6734d373 100644
--- a/sql/item.cc
+++ b/sql/item.cc
@@ -3407,13 +3407,18 @@ bool Item_field::fix_fields(THD *thd, Item **reference)
set_field(from_field);
}
- else if (thd->set_query_id && field->query_id != thd->query_id)
+ else if (thd->set_query_id)
{
- /* We only come here in unions */
- TABLE *table=field->table;
- field->query_id=thd->query_id;
- table->used_fields++;
- table->used_keys.intersect(field->part_of_key);
+ TABLE *table= field->table;
+ table->file->ha_set_bit_in_rw_set(field->fieldnr,
+ (bool)(thd->set_query_id-1));
+ if (field->query_id != thd->query_id)
+ {
+ /* We only come here in unions */
+ field->query_id=thd->query_id;
+ table->used_fields++;
+ table->used_keys.intersect(field->part_of_key);
+ }
}
#ifndef NO_EMBEDDED_ACCESS_CHECKS
if (any_privileges)
diff --git a/sql/item_subselect.cc b/sql/item_subselect.cc
index 52a74b6f4c6..aead2d67c2c 100644
--- a/sql/item_subselect.cc
+++ b/sql/item_subselect.cc
@@ -1603,7 +1603,7 @@ int subselect_uniquesubquery_engine::exec()
}
if (!table->file->inited)
- table->file->ha_index_init(tab->ref.key);
+ table->file->ha_index_init(tab->ref.key, 0);
error= table->file->index_read(table->record[0],
tab->ref.key_buff,
tab->ref.key_length,HA_READ_KEY_EXACT);
@@ -1656,7 +1656,7 @@ int subselect_indexsubquery_engine::exec()
}
if (!table->file->inited)
- table->file->ha_index_init(tab->ref.key);
+ table->file->ha_index_init(tab->ref.key, 1);
error= table->file->index_read(table->record[0],
tab->ref.key_buff,
tab->ref.key_length,HA_READ_KEY_EXACT);
diff --git a/sql/key.cc b/sql/key.cc
index 4bd71d2fa47..731df134efa 100644
--- a/sql/key.cc
+++ b/sql/key.cc
@@ -429,3 +429,86 @@ int key_cmp(KEY_PART_INFO *key_part, const byte *key, uint key_length)
}
return 0; // Keys are equal
}
+
+
+/*
+ Compare two records in index order
+ SYNOPSIS
+ key_rec_cmp()
+ key Index information
+ rec0 Pointer to table->record[0]
+ first_rec Pointer to record compare with
+ second_rec Pointer to record compare against first_rec
+ DESCRIPTION
+ This method is set-up such that it can be called directly from the
+ priority queue and it is attempted to be optimised as much as possible
+ since this will be called O(N * log N) times while performing a merge
+ sort in various places in the code.
+
+ We retrieve the pointer to table->record[0] using the fact that key_parts
+ have an offset making it possible to calculate the start of the record.
+ We need to get the diff to the compared record since none of the records
+ being compared are stored in table->record[0].
+
+ We first check for NULL values, if there are no NULL values we use
+ a compare method that gets two field pointers and a max length
+ and return the result of the comparison.
+*/
+
+int key_rec_cmp(void *key, byte *first_rec, byte *second_rec)
+{
+ KEY *key_info= (KEY*)key;
+ uint key_parts= key_info->key_parts, i= 0;
+ KEY_PART_INFO *key_part= key_info->key_part;
+ char *rec0= key_part->field->ptr - key_part->offset;
+ my_ptrdiff_t first_diff= first_rec - rec0, sec_diff= second_rec - rec0;
+ int result= 0;
+ DBUG_ENTER("key_rec_cmp");
+
+ do
+ {
+ Field *field= key_part->field;
+ uint length;
+
+ if (key_part->null_bit)
+ {
+ /* The key_part can contain NULL values */
+ bool first_is_null= field->is_null_in_record_with_offset(first_diff);
+ bool sec_is_null= field->is_null_in_record_with_offset(sec_diff);
+ /*
+ NULL is smaller then everything so if first is NULL and the other
+ not then we know that we should return -1 and for the opposite
+ we should return +1. If both are NULL then we call it equality
+ although it is a strange form of equality, we have equally little
+ information of the real value.
+ */
+ if (!first_is_null)
+ {
+ if (!sec_is_null)
+ ; /* Fall through, no NULL fields */
+ else
+ {
+ DBUG_RETURN(+1);
+ }
+ }
+ else if (!sec_is_null)
+ {
+ DBUG_RETURN(-1);
+ }
+ else
+ goto next_loop; /* Both were NULL */
+ }
+ /*
+ No null values in the fields
+ We use the virtual method cmp_max with a max length parameter.
+ For most field types this translates into a cmp without
+ max length. The exceptions are the BLOB and VARCHAR field types
+ that take the max length into account.
+ */
+ result= field->cmp_max(field->ptr+first_diff, field->ptr+sec_diff,
+ key_part->length);
+next_loop:
+ key_part++;
+ } while (!result && ++i < key_parts);
+ DBUG_RETURN(result);
+}
diff --git a/sql/lex.h b/sql/lex.h
index a5366742fd9..9b8f94f61bf 100644
--- a/sql/lex.h
+++ b/sql/lex.h
@@ -110,6 +110,7 @@ static SYMBOL symbols[] = {
{ "CIPHER", SYM(CIPHER_SYM)},
{ "CLIENT", SYM(CLIENT_SYM)},
{ "CLOSE", SYM(CLOSE_SYM)},
+ { "COALESCE", SYM(COALESCE)},
{ "COLLATE", SYM(COLLATE_SYM)},
{ "COLLATION", SYM(COLLATION_SYM)},
{ "COLUMN", SYM(COLUMN_SYM)},
@@ -274,11 +275,14 @@ static SYMBOL symbols[] = {
{ "LEAVE", SYM(LEAVE_SYM)},
{ "LEAVES", SYM(LEAVES)},
{ "LEFT", SYM(LEFT)},
+ { "LESS", SYM(LESS_SYM)},
{ "LEVEL", SYM(LEVEL_SYM)},
{ "LIKE", SYM(LIKE)},
{ "LIMIT", SYM(LIMIT)},
+ { "LINEAR", SYM(LINEAR_SYM)},
{ "LINES", SYM(LINES)},
{ "LINESTRING", SYM(LINESTRING)},
+ { "LIST", SYM(LIST_SYM)},
{ "LOAD", SYM(LOAD)},
{ "LOCAL", SYM(LOCAL_SYM)},
{ "LOCALTIME", SYM(NOW_SYM)},
@@ -312,6 +316,7 @@ static SYMBOL symbols[] = {
{ "MAX_ROWS", SYM(MAX_ROWS)},
{ "MAX_UPDATES_PER_HOUR", SYM(MAX_UPDATES_PER_HOUR)},
{ "MAX_USER_CONNECTIONS", SYM(MAX_USER_CONNECTIONS_SYM)},
+ { "MAXVALUE", SYM(MAX_VALUE_SYM)},
{ "MEDIUM", SYM(MEDIUM_SYM)},
{ "MEDIUMBLOB", SYM(MEDIUMBLOB)},
{ "MEDIUMINT", SYM(MEDIUMINT)},
@@ -343,6 +348,7 @@ static SYMBOL symbols[] = {
{ "NEW", SYM(NEW_SYM)},
{ "NEXT", SYM(NEXT_SYM)},
{ "NO", SYM(NO_SYM)},
+ { "NODEGROUP", SYM(NODEGROUP_SYM)},
{ "NONE", SYM(NONE_SYM)},
{ "NOT", SYM(NOT_SYM)},
{ "NO_WRITE_TO_BINLOG", SYM(NO_WRITE_TO_BINLOG)},
@@ -365,6 +371,10 @@ static SYMBOL symbols[] = {
{ "OUTFILE", SYM(OUTFILE)},
{ "PACK_KEYS", SYM(PACK_KEYS_SYM)},
{ "PARTIAL", SYM(PARTIAL)},
+#ifdef HAVE_PARTITION_DB
+ { "PARTITION", SYM(PARTITION_SYM)},
+#endif
+ { "PARTITIONS", SYM(PARTITIONS_SYM)},
{ "PASSWORD", SYM(PASSWORD)},
{ "PHASE", SYM(PHASE_SYM)},
{ "POINT", SYM(POINT_SYM)},
@@ -385,6 +395,7 @@ static SYMBOL symbols[] = {
{ "RAID_CHUNKS", SYM(RAID_CHUNKS)},
{ "RAID_CHUNKSIZE", SYM(RAID_CHUNKSIZE)},
{ "RAID_TYPE", SYM(RAID_TYPE)},
+ { "RANGE", SYM(RANGE_SYM)},
{ "READ", SYM(READ_SYM)},
{ "READS", SYM(READS_SYM)},
{ "REAL", SYM(REAL)},
@@ -398,6 +409,7 @@ static SYMBOL symbols[] = {
{ "RELEASE", SYM(RELEASE_SYM)},
{ "RELOAD", SYM(RELOAD)},
{ "RENAME", SYM(RENAME)},
+ { "REORGANISE", SYM(REORGANISE_SYM)},
{ "REPAIR", SYM(REPAIR)},
{ "REPEATABLE", SYM(REPEATABLE_SYM)},
{ "REPLACE", SYM(REPLACE)},
@@ -476,6 +488,8 @@ static SYMBOL symbols[] = {
{ "STRING", SYM(STRING_SYM)},
{ "STRIPED", SYM(RAID_STRIPED_SYM)},
{ "SUBJECT", SYM(SUBJECT_SYM)},
+ { "SUBPARTITION", SYM(SUBPARTITION_SYM)},
+ { "SUBPARTITIONS", SYM(SUBPARTITIONS_SYM)},
{ "SUPER", SYM(SUPER_SYM)},
{ "SUSPEND", SYM(SUSPEND_SYM)},
{ "TABLE", SYM(TABLE_SYM)},
@@ -485,6 +499,7 @@ static SYMBOL symbols[] = {
{ "TEMPTABLE", SYM(TEMPTABLE_SYM)},
{ "TERMINATED", SYM(TERMINATED)},
{ "TEXT", SYM(TEXT_SYM)},
+ { "THAN", SYM(THAN_SYM)},
{ "THEN", SYM(THEN_SYM)},
{ "TIME", SYM(TIME_SYM)},
{ "TIMESTAMP", SYM(TIMESTAMP)},
@@ -576,7 +591,6 @@ static SYMBOL sql_functions[] = {
{ "CENTROID", F_SYM(FUNC_ARG1),0,CREATE_FUNC_GEOM(create_func_centroid)},
{ "CHAR_LENGTH", F_SYM(FUNC_ARG1),0,CREATE_FUNC(create_func_char_length)},
{ "CHARACTER_LENGTH", F_SYM(FUNC_ARG1),0,CREATE_FUNC(create_func_char_length)},
- { "COALESCE", SYM(COALESCE)},
{ "COERCIBILITY", F_SYM(FUNC_ARG1),0,CREATE_FUNC(create_func_coercibility)},
{ "COMPRESS", F_SYM(FUNC_ARG1),0,CREATE_FUNC(create_func_compress)},
{ "CONCAT", SYM(CONCAT)},
diff --git a/sql/lock.cc b/sql/lock.cc
index 941d7baa76e..0aba7fddb51 100644
--- a/sql/lock.cc
+++ b/sql/lock.cc
@@ -72,7 +72,7 @@ TODO:
#ifndef MASTER
#include "../srclib/myisammrg/myrg_def.h"
#else
-#include "../myisammrg/myrg_def.h"
+#include "../storage/myisammrg/myrg_def.h"
#endif
static MYSQL_LOCK *get_lock_data(THD *thd, TABLE **table,uint count,
@@ -211,7 +211,6 @@ static int lock_external(THD *thd, TABLE **tables, uint count)
((*tables)->reginfo.lock_type >= TL_READ &&
(*tables)->reginfo.lock_type <= TL_READ_NO_INSERT))
lock_type=F_RDLCK;
-
if ((error=(*tables)->file->external_lock(thd,lock_type)))
{
print_lock_error(error, (*tables)->file->table_type());
diff --git a/sql/log.cc b/sql/log.cc
index 920a3fcff42..9d9f500fe80 100644
--- a/sql/log.cc
+++ b/sql/log.cc
@@ -24,6 +24,7 @@
#include "mysql_priv.h"
#include "sql_repl.h"
+#include "rpl_filter.h"
#include <my_dir.h>
#include <stdarg.h>
@@ -1613,10 +1614,11 @@ bool MYSQL_LOG::write(Log_event *event_info)
binlog_[wild_]{do|ignore}_table?" (WL#1049)"
*/
if ((thd && !(thd->options & OPTION_BIN_LOG)) ||
- (!db_ok(local_db, binlog_do_db, binlog_ignore_db)))
+ (!binlog_filter->db_ok(local_db)))
{
VOID(pthread_mutex_unlock(&LOCK_log));
- DBUG_PRINT("error",("!db_ok('%s')", local_db));
+ DBUG_PRINT("info",("db_ok('%s')==%d", local_db,
+ binlog_filter->db_ok(local_db)));
DBUG_RETURN(0);
}
#endif /* HAVE_REPLICATION */
diff --git a/sql/log_event.cc b/sql/log_event.cc
index 5cb4c289a10..7ee505939f0 100644
--- a/sql/log_event.cc
+++ b/sql/log_event.cc
@@ -23,6 +23,7 @@
#include "mysql_priv.h"
#include "slave.h"
+#include "rpl_filter.h"
#include <my_dir.h>
#endif /* MYSQL_CLIENT */
@@ -1535,7 +1536,7 @@ int Query_log_event::exec_event(struct st_relay_log_info* rli, const char *query
*/
thd->catalog= catalog_len ? (char *) catalog : (char *)"";
thd->db_length= db_len;
- thd->db= (char*) rewrite_db(db, &thd->db_length);
+ thd->db= (char *) rpl_filter->get_rewrite_db(db, &thd->db_length);
thd->variables.auto_increment_increment= auto_increment_increment;
thd->variables.auto_increment_offset= auto_increment_offset;
@@ -1564,7 +1565,7 @@ int Query_log_event::exec_event(struct st_relay_log_info* rli, const char *query
::exec_event(), then the companion SET also have so we
don't need to reset_one_shot_variables().
*/
- if (db_ok(thd->db, replicate_do_db, replicate_ignore_db))
+ if (rpl_filter->db_ok(thd->db))
{
thd->set_time((time_t)when);
thd->query_length= q_len_arg;
@@ -2685,7 +2686,7 @@ int Load_log_event::exec_event(NET* net, struct st_relay_log_info* rli,
bool use_rli_only_for_errors)
{
thd->db_length= db_len;
- thd->db= (char*) rewrite_db(db, &thd->db_length);
+ thd->db= (char *) rpl_filter->get_rewrite_db(db, &thd->db_length);
DBUG_ASSERT(thd->query == 0);
thd->query_length= 0; // Should not be needed
thd->query_error= 0;
@@ -2724,7 +2725,7 @@ int Load_log_event::exec_event(NET* net, struct st_relay_log_info* rli,
::exec_event(), then the companion SET also have so we
don't need to reset_one_shot_variables().
*/
- if (db_ok(thd->db, replicate_do_db, replicate_ignore_db))
+ if (rpl_filter->db_ok(thd->db))
{
thd->set_time((time_t)when);
VOID(pthread_mutex_lock(&LOCK_thread_count));
@@ -2746,7 +2747,7 @@ int Load_log_event::exec_event(NET* net, struct st_relay_log_info* rli,
tables.updating= 1;
// the table will be opened in mysql_load
- if (table_rules_on && !tables_ok(thd, &tables))
+ if (rpl_filter->is_on() && !rpl_filter->tables_ok(thd->db, &tables))
{
// TODO: this is a bug - this needs to be moved to the I/O thread
if (net)
diff --git a/sql/mysql_priv.h b/sql/mysql_priv.h
index 0bddf92e6aa..497382d8c96 100644
--- a/sql/mysql_priv.h
+++ b/sql/mysql_priv.h
@@ -398,6 +398,13 @@ void debug_sync_point(const char* lock_name, uint lock_timeout);
#define STRING_BUFFER_USUAL_SIZE 80
+/*
+ Some defines for exit codes for ::is_equal class functions.
+*/
+#define IS_EQUAL_NO 0
+#define IS_EQUAL_YES 1
+#define IS_EQUAL_PACK_LENGTH 2
+
enum enum_parsing_place
{
NO_MATTER,
@@ -626,6 +633,22 @@ bool check_table_access(THD *thd, ulong want_access, TABLE_LIST *tables,
bool no_errors);
bool check_global_access(THD *thd, ulong want_access);
+/*
+ Support routine for SQL parser on partitioning syntax
+*/
+my_bool is_partition_management(LEX *lex);
+/*
+ General routine to change field->ptr of a NULL-terminated array of Field
+ objects. Useful when needed to call val_int, val_str or similar and the
+ field data is not in table->record[0] but in some other structure.
+ set_key_field_ptr changes all fields of an index using a key_info object.
+ All methods presume that there is at least one field to change.
+*/
+
+void set_field_ptr(Field **ptr, const byte *new_buf, const byte *old_buf);
+void set_key_field_ptr(KEY *key_info, const byte *new_buf,
+ const byte *old_buf);
+
bool mysql_backup_table(THD* thd, TABLE_LIST* table_list);
bool mysql_restore_table(THD* thd, TABLE_LIST* table_list);
@@ -786,6 +809,10 @@ Field *
find_field_in_table(THD *thd, TABLE *table, const char *name,
uint length, bool check_grants, bool allow_rowid,
uint *cached_field_index_ptr);
+
+Field *
+find_field_in_table_sef(TABLE *table, const char *name);
+
#ifdef HAVE_OPENSSL
#include <openssl/des.h>
struct st_des_keyblock
@@ -921,10 +948,10 @@ bool setup_tables(THD *thd, Name_resolution_context *context,
int setup_wild(THD *thd, TABLE_LIST *tables, List<Item> &fields,
List<Item> *sum_func_list, uint wild_num);
bool setup_fields(THD *thd, Item** ref_pointer_array,
- List<Item> &item, bool set_query_id,
+ List<Item> &item, ulong set_query_id,
List<Item> *sum_func_list, bool allow_sum_func);
inline bool setup_fields_with_no_wrap(THD *thd, Item **ref_pointer_array,
- List<Item> &item, bool set_query_id,
+ List<Item> &item, ulong set_query_id,
List<Item> *sum_func_list,
bool allow_sum_func)
{
@@ -1044,6 +1071,7 @@ bool key_cmp_if_same(TABLE *form,const byte *key,uint index,uint key_length);
void key_unpack(String *to,TABLE *form,uint index);
bool check_if_key_used(TABLE *table, uint idx, List<Item> &fields);
int key_cmp(KEY_PART_INFO *key_part, const byte *key, uint key_length);
+int key_rec_cmp(void *key_info, byte *a, byte *b);
bool init_errmessage(void);
void sql_perror(const char *message);
@@ -1204,7 +1232,6 @@ extern KNOWN_DATE_TIME_FORMAT known_date_time_formats[];
extern String null_string;
extern HASH open_cache;
extern TABLE *unused_tables;
-extern I_List<i_string> binlog_do_db, binlog_ignore_db;
extern const char* any_db;
extern struct my_option my_long_options[];
extern const LEX_STRING view_type;
@@ -1221,6 +1248,7 @@ extern SHOW_COMP_OPTION have_query_cache;
extern SHOW_COMP_OPTION have_geometry, have_rtree_keys;
extern SHOW_COMP_OPTION have_crypt;
extern SHOW_COMP_OPTION have_compress;
+extern SHOW_COMP_OPTION have_partition_db;
#ifndef __WIN__
extern pthread_t signal_thread;
@@ -1274,7 +1302,7 @@ int rea_create_table(THD *thd, my_string file_name,
const char *db, const char *table,
HA_CREATE_INFO *create_info,
List<create_field> &create_field,
- uint key_count,KEY *key_info);
+ uint key_count,KEY *key_info, handler *file);
int format_number(uint inputflag,uint max_length,my_string pos,uint length,
my_string *errpos);
int openfrm(THD *thd, const char *name,const char *alias,uint filestat,
diff --git a/sql/mysqld.cc b/sql/mysqld.cc
index f7e9b21076e..aaed7b64377 100644
--- a/sql/mysqld.cc
+++ b/sql/mysqld.cc
@@ -19,6 +19,7 @@
#include <my_dir.h>
#include "slave.h"
#include "sql_repl.h"
+#include "rpl_filter.h"
#include "repl_failsafe.h"
#include "stacktrace.h"
#include "mysqld_suffix.h"
@@ -362,6 +363,12 @@ my_bool opt_ndb_shm, opt_ndb_optimized_node_selection;
ulong opt_ndb_cache_check_time;
const char *opt_ndb_mgmd;
ulong opt_ndb_nodeid;
+
+const char *ndb_distribution_names[]= {"KEYHASH", "LINHASH", NullS};
+TYPELIB ndb_distribution_typelib= { array_elements(ndb_distribution_names)-1,
+ "", ndb_distribution_names, NULL };
+const char *opt_ndb_distribution= ndb_distribution_names[ND_KEYHASH];
+enum ndb_distribution opt_ndb_distribution_id= ND_KEYHASH;
#endif
my_bool opt_readonly, use_temp_pool, relay_log_purge;
my_bool opt_sync_frm, opt_allow_suspicious_udfs;
@@ -450,12 +457,10 @@ FILE *bootstrap_file;
int bootstrap_error;
FILE *stderror_file=0;
-I_List<i_string_pair> replicate_rewrite_db;
-I_List<i_string> replicate_do_db, replicate_ignore_db;
-// allow the user to tell us which db to replicate and which to ignore
-I_List<i_string> binlog_do_db, binlog_ignore_db;
I_List<THD> threads;
I_List<NAMED_LIST> key_caches;
+Rpl_filter* rpl_filter;
+Rpl_filter* binlog_filter;
struct system_variables global_system_variables;
struct system_variables max_system_variables;
@@ -470,6 +475,7 @@ CHARSET_INFO *national_charset_info, *table_alias_charset;
SHOW_COMP_OPTION have_berkeley_db, have_innodb, have_isam, have_ndbcluster,
have_example_db, have_archive_db, have_csv_db;
SHOW_COMP_OPTION have_federated_db;
+SHOW_COMP_OPTION have_partition_db;
SHOW_COMP_OPTION have_raid, have_openssl, have_symlink, have_query_cache;
SHOW_COMP_OPTION have_geometry, have_rtree_keys;
SHOW_COMP_OPTION have_crypt, have_compress;
@@ -1084,12 +1090,9 @@ void clean_up(bool print_message)
free_max_user_conn();
#ifdef HAVE_REPLICATION
end_slave_list();
- free_list(&replicate_do_db);
- free_list(&replicate_ignore_db);
- free_list(&binlog_do_db);
- free_list(&binlog_ignore_db);
- free_list(&replicate_rewrite_db);
#endif
+ delete binlog_filter;
+ delete rpl_filter;
#ifdef HAVE_OPENSSL
if (ssl_acceptor_fd)
my_free((gptr) ssl_acceptor_fd, MYF(MY_ALLOW_ZERO_PTR));
@@ -3111,6 +3114,15 @@ int main(int argc, char **argv)
#endif
{
DEBUGGER_OFF;
+
+ rpl_filter= new Rpl_filter;
+ binlog_filter= new Rpl_filter;
+ if (!rpl_filter || !binlog_filter)
+ {
+ sql_perror("Could not allocate replication and binlog filters");
+ exit(1);
+ }
+
MY_INIT(argv[0]); // init my_sys library & pthreads
#ifdef _CUSTOMSTARTUPCONFIG_
@@ -3466,7 +3478,6 @@ default_service_handling(char **argv,
int main(int argc, char **argv)
{
-
/*
When several instances are running on the same machine, we
need to have an unique named hEventShudown through the
@@ -4323,6 +4334,7 @@ enum options_mysqld
OPT_NDB_FORCE_SEND, OPT_NDB_AUTOINCREMENT_PREFETCH_SZ,
OPT_NDB_SHM, OPT_NDB_OPTIMIZED_NODE_SELECTION, OPT_NDB_CACHE_CHECK_TIME,
OPT_NDB_MGMD, OPT_NDB_NODEID,
+ OPT_NDB_DISTRIBUTION,
OPT_SKIP_SAFEMALLOC,
OPT_TEMP_POOL, OPT_TX_ISOLATION, OPT_COMPLETION_TYPE,
OPT_SKIP_STACK_TRACE, OPT_SKIP_SYMLINKS,
@@ -4409,6 +4421,7 @@ enum options_mysqld
OPT_ENABLE_SHARED_MEMORY,
OPT_SHARED_MEMORY_BASE_NAME,
OPT_OLD_PASSWORDS,
+ OPT_OLD_ALTER_TABLE,
OPT_EXPIRE_LOGS_DAYS,
OPT_GROUP_CONCAT_MAX_LEN,
OPT_DEFAULT_COLLATION,
@@ -4895,6 +4908,11 @@ Disable with --skip-ndbcluster (will save memory).",
(gptr*) &global_system_variables.ndb_autoincrement_prefetch_sz,
(gptr*) &global_system_variables.ndb_autoincrement_prefetch_sz,
0, GET_ULONG, REQUIRED_ARG, 32, 1, 256, 0, 0, 0},
+ {"ndb-distibution", OPT_NDB_DISTRIBUTION,
+ "Default distribution for new tables in ndb",
+ (gptr*) &opt_ndb_distribution,
+ (gptr*) &opt_ndb_distribution,
+ 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
{"ndb-force-send", OPT_NDB_FORCE_SEND,
"Force send of buffers to ndb immediately without waiting for "
"other threads.",
@@ -4941,6 +4959,11 @@ Disable with --skip-ndbcluster (will save memory).",
(gptr*) &opt_no_mix_types, (gptr*) &opt_no_mix_types, 0, GET_BOOL, NO_ARG,
0, 0, 0, 0, 0, 0},
#endif
+ {"old-alter-table", OPT_OLD_ALTER_TABLE,
+ "Use old, non-optimized alter table.",
+ (gptr*) &global_system_variables.old_alter_table,
+ (gptr*) &max_system_variables.old_alter_table, 0, GET_BOOL, NO_ARG,
+ 0, 0, 0, 0, 0, 0},
{"old-passwords", OPT_OLD_PASSWORDS, "Use old password encryption method (needed for 4.0 and older clients).",
(gptr*) &global_system_variables.old_passwords,
(gptr*) &max_system_variables.old_passwords, 0, GET_BOOL, NO_ARG,
@@ -6105,13 +6128,6 @@ static void mysql_init_variables(void)
exit(1);
multi_keycache_init(); /* set key_cache_hash.default_value = dflt_key_cache */
- /* Initialize structures that is used when processing options */
- replicate_rewrite_db.empty();
- replicate_do_db.empty();
- replicate_ignore_db.empty();
- binlog_do_db.empty();
- binlog_ignore_db.empty();
-
/* Set directory paths */
strmake(language, LANGUAGE, sizeof(language)-1);
strmake(mysql_real_data_home, get_relative_path(DATADIR),
@@ -6144,6 +6160,7 @@ static void mysql_init_variables(void)
global_system_variables.max_join_size= (ulonglong) HA_POS_ERROR;
max_system_variables.max_join_size= (ulonglong) HA_POS_ERROR;
global_system_variables.old_passwords= 0;
+ global_system_variables.old_alter_table= 0;
/* Variables that depends on compile options */
#ifndef DBUG_OFF
@@ -6167,6 +6184,11 @@ static void mysql_init_variables(void)
#else
have_example_db= SHOW_OPTION_NO;
#endif
+#ifdef HAVE_PARTITION_DB
+ have_partition_db= SHOW_OPTION_YES;
+#else
+ have_partition_db= SHOW_OPTION_NO;
+#endif
#ifdef HAVE_ARCHIVE_DB
have_archive_db= SHOW_OPTION_YES;
#else
@@ -6357,14 +6379,12 @@ get_one_option(int optid, const struct my_option *opt __attribute__((unused)),
}
case (int)OPT_REPLICATE_IGNORE_DB:
{
- i_string *db = new i_string(argument);
- replicate_ignore_db.push_back(db);
+ rpl_filter->add_ignore_db(argument);
break;
}
case (int)OPT_REPLICATE_DO_DB:
{
- i_string *db = new i_string(argument);
- replicate_do_db.push_back(db);
+ rpl_filter->add_do_db(argument);
break;
}
case (int)OPT_REPLICATE_REWRITE_DB:
@@ -6397,71 +6417,54 @@ get_one_option(int optid, const struct my_option *opt __attribute__((unused)),
exit(1);
}
- i_string_pair *db_pair = new i_string_pair(key, val);
- replicate_rewrite_db.push_back(db_pair);
+ rpl_filter->add_db_rewrite(key, val);
break;
}
case (int)OPT_BINLOG_IGNORE_DB:
{
- i_string *db = new i_string(argument);
- binlog_ignore_db.push_back(db);
+ binlog_filter->add_ignore_db(argument);
break;
}
case (int)OPT_BINLOG_DO_DB:
{
- i_string *db = new i_string(argument);
- binlog_do_db.push_back(db);
+ binlog_filter->add_do_db(argument);
break;
}
case (int)OPT_REPLICATE_DO_TABLE:
{
- if (!do_table_inited)
- init_table_rule_hash(&replicate_do_table, &do_table_inited);
- if (add_table_rule(&replicate_do_table, argument))
+ if (rpl_filter->add_do_table(argument))
{
fprintf(stderr, "Could not add do table rule '%s'!\n", argument);
exit(1);
}
- table_rules_on = 1;
break;
}
case (int)OPT_REPLICATE_WILD_DO_TABLE:
{
- if (!wild_do_table_inited)
- init_table_rule_array(&replicate_wild_do_table,
- &wild_do_table_inited);
- if (add_wild_table_rule(&replicate_wild_do_table, argument))
+ if (rpl_filter->add_wild_do_table(argument))
{
fprintf(stderr, "Could not add do table rule '%s'!\n", argument);
exit(1);
}
- table_rules_on = 1;
break;
}
case (int)OPT_REPLICATE_WILD_IGNORE_TABLE:
{
- if (!wild_ignore_table_inited)
- init_table_rule_array(&replicate_wild_ignore_table,
- &wild_ignore_table_inited);
- if (add_wild_table_rule(&replicate_wild_ignore_table, argument))
+ if (rpl_filter->add_wild_ignore_table(argument))
{
fprintf(stderr, "Could not add ignore table rule '%s'!\n", argument);
exit(1);
}
- table_rules_on = 1;
break;
}
case (int)OPT_REPLICATE_IGNORE_TABLE:
{
- if (!ignore_table_inited)
- init_table_rule_hash(&replicate_ignore_table, &ignore_table_inited);
- if (add_table_rule(&replicate_ignore_table, argument))
+ if (rpl_filter->add_ignore_table(argument))
{
fprintf(stderr, "Could not add ignore table rule '%s'!\n", argument);
exit(1);
}
- table_rules_on = 1;
break;
}
#endif /* HAVE_REPLICATION */
@@ -6699,6 +6702,20 @@ get_one_option(int optid, const struct my_option *opt __attribute__((unused)),
opt_ndb_constrbuf[opt_ndb_constrbuf_len]= 0;
opt_ndbcluster_connectstring= opt_ndb_constrbuf;
break;
+ case OPT_NDB_DISTRIBUTION:
+ int id;
+ if ((id= find_type(argument, &ndb_distribution_typelib, 2)) <= 0)
+ {
+ fprintf(stderr,
+ "Unknown ndb distribution type: '%s' "
+ "(should be '%s' or '%s')\n",
+ argument,
+ ndb_distribution_names[ND_KEYHASH],
+ ndb_distribution_names[ND_LINHASH]);
+ exit(1);
+ }
+ opt_ndb_distribution_id= (enum ndb_distribution)(id-1);
+ break;
#endif
case OPT_INNODB:
#ifdef HAVE_INNOBASE_DB
diff --git a/sql/opt_range.cc b/sql/opt_range.cc
index cb250251155..b69822d201b 100644
--- a/sql/opt_range.cc
+++ b/sql/opt_range.cc
@@ -751,7 +751,7 @@ int QUICK_RANGE_SELECT::init()
DBUG_ENTER("QUICK_RANGE_SELECT::init");
if (file->inited == handler::NONE)
- DBUG_RETURN(error= file->ha_index_init(index));
+ DBUG_RETURN(error= file->ha_index_init(index, 1));
error= 0;
DBUG_RETURN(0);
}
@@ -778,9 +778,10 @@ QUICK_RANGE_SELECT::~QUICK_RANGE_SELECT()
{
DBUG_PRINT("info", ("Freeing separate handler %p (free=%d)", file,
free_file));
- file->reset();
+ file->ha_reset();
file->external_lock(current_thd, F_UNLCK);
file->close();
+ delete file;
}
}
delete_dynamic(&ranges); /* ranges are allocated in alloc */
@@ -916,7 +917,7 @@ int QUICK_RANGE_SELECT::init_ror_merged_scan(bool reuse_handler)
{
DBUG_PRINT("info", ("Reusing handler %p", file));
if (file->extra(HA_EXTRA_KEYREAD) ||
- file->extra(HA_EXTRA_RETRIEVE_PRIMARY_KEY) ||
+ file->ha_retrieve_all_pk() ||
init() || reset())
{
DBUG_RETURN(1);
@@ -944,7 +945,7 @@ int QUICK_RANGE_SELECT::init_ror_merged_scan(bool reuse_handler)
goto failure;
if (file->extra(HA_EXTRA_KEYREAD) ||
- file->extra(HA_EXTRA_RETRIEVE_PRIMARY_KEY) ||
+ file->ha_retrieve_all_pk() ||
init() || reset())
{
file->external_lock(thd, F_UNLCK);
@@ -956,6 +957,8 @@ int QUICK_RANGE_SELECT::init_ror_merged_scan(bool reuse_handler)
DBUG_RETURN(0);
failure:
+ if (file)
+ delete file;
file= save_file;
DBUG_RETURN(1);
}
@@ -1562,9 +1565,10 @@ static int fill_used_fields_bitmap(PARAM *param)
{
TABLE *table= param->table;
param->fields_bitmap_size= (table->s->fields/8 + 1);
- uchar *tmp;
+ uint32 *tmp;
uint pk;
- if (!(tmp= (uchar*)alloc_root(param->mem_root,param->fields_bitmap_size)) ||
+ if (!(tmp= (uint32*)alloc_root(param->mem_root,
+ bytes_word_aligned(param->fields_bitmap_size))) ||
bitmap_init(&param->needed_fields, tmp, param->fields_bitmap_size*8,
FALSE))
return 1;
@@ -2307,7 +2311,7 @@ static
ROR_SCAN_INFO *make_ror_scan(const PARAM *param, int idx, SEL_ARG *sel_arg)
{
ROR_SCAN_INFO *ror_scan;
- uchar *bitmap_buf;
+ uint32 *bitmap_buf;
uint keynr;
DBUG_ENTER("make_ror_scan");
@@ -2322,8 +2326,8 @@ ROR_SCAN_INFO *make_ror_scan(const PARAM *param, int idx, SEL_ARG *sel_arg)
ror_scan->sel_arg= sel_arg;
ror_scan->records= param->table->quick_rows[keynr];
- if (!(bitmap_buf= (uchar*)alloc_root(param->mem_root,
- param->fields_bitmap_size)))
+ if (!(bitmap_buf= (uint32*)alloc_root(param->mem_root,
+ bytes_word_aligned(param->fields_bitmap_size))))
DBUG_RETURN(NULL);
if (bitmap_init(&ror_scan->covered_fields, bitmap_buf,
@@ -2437,12 +2441,13 @@ static
ROR_INTERSECT_INFO* ror_intersect_init(const PARAM *param)
{
ROR_INTERSECT_INFO *info;
- uchar* buf;
+ uint32* buf;
if (!(info= (ROR_INTERSECT_INFO*)alloc_root(param->mem_root,
sizeof(ROR_INTERSECT_INFO))))
return NULL;
info->param= param;
- if (!(buf= (uchar*)alloc_root(param->mem_root, param->fields_bitmap_size)))
+ if (!(buf= (uint32*)alloc_root(param->mem_root,
+ bytes_word_aligned(param->fields_bitmap_size))))
return NULL;
if (bitmap_init(&info->covered_fields, buf, param->fields_bitmap_size*8,
FALSE))
@@ -2459,7 +2464,7 @@ void ror_intersect_cpy(ROR_INTERSECT_INFO *dst, const ROR_INTERSECT_INFO *src)
{
dst->param= src->param;
memcpy(dst->covered_fields.bitmap, src->covered_fields.bitmap,
- src->covered_fields.bitmap_size);
+ no_bytes_in_map(&src->covered_fields));
dst->out_rows= src->out_rows;
dst->is_covering= src->is_covering;
dst->index_records= src->index_records;
@@ -3001,9 +3006,9 @@ TRP_ROR_INTERSECT *get_best_covering_ror_intersect(PARAM *param,
/*I=set of all covering indexes */
ror_scan_mark= tree->ror_scans;
- uchar buf[MAX_KEY/8+1];
+ uint32 int_buf[MAX_KEY/32+1];
MY_BITMAP covered_fields;
- if (bitmap_init(&covered_fields, buf, nbits, FALSE))
+ if (bitmap_init(&covered_fields, int_buf, nbits, FALSE))
DBUG_RETURN(0);
bitmap_clear_all(&covered_fields);
@@ -5767,7 +5772,7 @@ int QUICK_INDEX_MERGE_SELECT::read_keys_and_merge()
(This also creates a deficiency - it is possible that we will retrieve
parts of key that are not used by current query at all.)
*/
- if (head->file->extra(HA_EXTRA_RETRIEVE_PRIMARY_KEY))
+ if (head->file->ha_retrieve_all_pk())
DBUG_RETURN(1);
cur_quick_it.rewind();
@@ -6037,7 +6042,7 @@ int QUICK_RANGE_SELECT::reset()
in_range= FALSE;
cur_range= (QUICK_RANGE**) ranges.buffer;
- if (file->inited == handler::NONE && (error= file->ha_index_init(index)))
+ if (file->inited == handler::NONE && (error= file->ha_index_init(index,1)))
DBUG_RETURN(error);
/* Do not allocate the buffers twice. */
@@ -6296,7 +6301,7 @@ int QUICK_RANGE_SELECT_GEOM::get_next()
(byte*) range->min_key,
range->min_length,
(ha_rkey_function)(range->flag ^ GEOM_FLAG));
- if (result != HA_ERR_KEY_NOT_FOUND)
+ if (result != HA_ERR_KEY_NOT_FOUND && result != HA_ERR_END_OF_FILE)
DBUG_RETURN(result);
range=0; // Not found, to next range
}
@@ -6439,7 +6444,7 @@ int QUICK_SELECT_DESC::get_next()
}
if (result)
{
- if (result != HA_ERR_KEY_NOT_FOUND)
+ if (result != HA_ERR_KEY_NOT_FOUND && result != HA_ERR_END_OF_FILE)
DBUG_RETURN(result);
range=0; // Not found, to next range
continue;
@@ -8111,7 +8116,7 @@ int QUICK_GROUP_MIN_MAX_SELECT::reset(void)
DBUG_ENTER("QUICK_GROUP_MIN_MAX_SELECT::reset");
file->extra(HA_EXTRA_KEYREAD); /* We need only the key attributes */
- result= file->ha_index_init(index);
+ result= file->ha_index_init(index, 1);
result= file->index_last(record);
if (result == HA_ERR_END_OF_FILE)
DBUG_RETURN(0);
@@ -8187,7 +8192,7 @@ int QUICK_GROUP_MIN_MAX_SELECT::get_next()
DBUG_ASSERT(is_last_prefix <= 0);
if (result == HA_ERR_KEY_NOT_FOUND)
continue;
- else if (result)
+ if (result)
break;
if (have_min)
@@ -8217,10 +8222,11 @@ int QUICK_GROUP_MIN_MAX_SELECT::get_next()
HA_READ_KEY_EXACT);
result= have_min ? min_res : have_max ? max_res : result;
- }
- while (result == HA_ERR_KEY_NOT_FOUND && is_last_prefix != 0);
+ } while ((result == HA_ERR_KEY_NOT_FOUND || result == HA_ERR_END_OF_FILE) &&
+ is_last_prefix != 0);
if (result == 0)
+ {
/*
Partially mimic the behavior of end_select_send. Copy the
field data from Item_field::field into Item_field::result_field
@@ -8228,6 +8234,7 @@ int QUICK_GROUP_MIN_MAX_SELECT::get_next()
other fields in non-ANSI SQL mode).
*/
copy_fields(&join->tmp_table_param);
+ }
else if (result == HA_ERR_KEY_NOT_FOUND)
result= HA_ERR_END_OF_FILE;
@@ -8254,6 +8261,7 @@ int QUICK_GROUP_MIN_MAX_SELECT::get_next()
RETURN
0 on success
HA_ERR_KEY_NOT_FOUND if no MIN key was found that fulfills all conditions.
+ HA_ERR_END_OF_FILE - "" -
other if some error occurred
*/
@@ -8307,7 +8315,7 @@ int QUICK_GROUP_MIN_MAX_SELECT::next_min()
if (key_cmp(index_info->key_part, group_prefix, real_prefix_len))
key_restore(record, tmp_record, index_info, 0);
}
- else if (result == HA_ERR_KEY_NOT_FOUND)
+ else if (result == HA_ERR_KEY_NOT_FOUND || result == HA_ERR_END_OF_FILE)
result= 0; /* There is a result in any case. */
}
}
@@ -8332,6 +8340,7 @@ int QUICK_GROUP_MIN_MAX_SELECT::next_min()
RETURN
0 on success
HA_ERR_KEY_NOT_FOUND if no MAX key was found that fulfills all conditions.
+ HA_ERR_END_OF_FILE - "" -
other if some error occurred
*/
@@ -8432,6 +8441,7 @@ int QUICK_GROUP_MIN_MAX_SELECT::next_prefix()
0 on success
HA_ERR_KEY_NOT_FOUND if there is no key with the given prefix in any of
the ranges
+ HA_ERR_END_OF_FILE - "" -
other if some error
*/
@@ -8476,11 +8486,12 @@ int QUICK_GROUP_MIN_MAX_SELECT::next_min_in_range()
result= file->index_read(record, group_prefix, search_prefix_len,
find_flag);
- if ((result == HA_ERR_KEY_NOT_FOUND) &&
- (cur_range->flag & (EQ_RANGE | NULL_RANGE)))
- continue; /* Check the next range. */
- else if (result)
+ if (result)
{
+ if ((result == HA_ERR_KEY_NOT_FOUND || result == HA_ERR_END_OF_FILE) &&
+ (cur_range->flag & (EQ_RANGE | NULL_RANGE)))
+ continue; /* Check the next range. */
+
/*
In all other cases (HA_ERR_*, HA_READ_KEY_EXACT with NO_MIN_RANGE,
HA_READ_AFTER_KEY, HA_READ_KEY_OR_NEXT) if the lookup failed for this
@@ -8507,7 +8518,7 @@ int QUICK_GROUP_MIN_MAX_SELECT::next_min_in_range()
/* Check if record belongs to the current group. */
if (key_cmp(index_info->key_part, group_prefix, real_prefix_len))
{
- result = HA_ERR_KEY_NOT_FOUND;
+ result= HA_ERR_KEY_NOT_FOUND;
continue;
}
@@ -8525,7 +8536,7 @@ int QUICK_GROUP_MIN_MAX_SELECT::next_min_in_range()
if (!((cur_range->flag & NEAR_MAX) && (cmp_res == -1) ||
(cmp_res <= 0)))
{
- result = HA_ERR_KEY_NOT_FOUND;
+ result= HA_ERR_KEY_NOT_FOUND;
continue;
}
}
@@ -8564,6 +8575,7 @@ int QUICK_GROUP_MIN_MAX_SELECT::next_min_in_range()
0 on success
HA_ERR_KEY_NOT_FOUND if there is no key with the given prefix in any of
the ranges
+ HA_ERR_END_OF_FILE - "" -
other if some error
*/
@@ -8609,10 +8621,12 @@ int QUICK_GROUP_MIN_MAX_SELECT::next_max_in_range()
result= file->index_read(record, group_prefix, search_prefix_len,
find_flag);
- if ((result == HA_ERR_KEY_NOT_FOUND) && (cur_range->flag & EQ_RANGE))
- continue; /* Check the next range. */
if (result)
{
+ if ((result == HA_ERR_KEY_NOT_FOUND || result == HA_ERR_END_OF_FILE) &&
+ (cur_range->flag & EQ_RANGE))
+ continue; /* Check the next range. */
+
/*
In no key was found with this upper bound, there certainly are no keys
in the ranges to the left.
diff --git a/sql/opt_sum.cc b/sql/opt_sum.cc
index 33c8eadc065..9802bbddde6 100644
--- a/sql/opt_sum.cc
+++ b/sql/opt_sum.cc
@@ -181,7 +181,7 @@ int opt_sum_query(TABLE_LIST *tables, List<Item> &all_fields,COND *conds)
const_result= 0;
break;
}
- error= table->file->ha_index_init((uint) ref.key);
+ error= table->file->ha_index_init((uint) ref.key, 1);
if (!ref.key_length)
error= table->file->index_first(table->record[0]);
@@ -253,7 +253,7 @@ int opt_sum_query(TABLE_LIST *tables, List<Item> &all_fields,COND *conds)
const_result= 0;
break;
}
- error= table->file->ha_index_init((uint) ref.key);
+ error= table->file->ha_index_init((uint) ref.key, 1);
if (!ref.key_length)
error= table->file->index_last(table->record[0]);
diff --git a/sql/records.cc b/sql/records.cc
index 9b05dc3e291..b3610cf1bbf 100644
--- a/sql/records.cc
+++ b/sql/records.cc
@@ -31,6 +31,74 @@ static int rr_cmp(uchar *a,uchar *b);
/* init struct for read with info->read_record */
+/*
+ init_read_record is used to scan by using a number of different methods.
+ Which method to use is set-up in this call so that later calls to
+ the info->read_record will call the appropriate method using a function
+ pointer.
+
+ There are five methods that relate completely to the sort function
+ filesort. The result of a filesort is retrieved using read_record
+ calls. The other two methods are used for normal table access.
+
+ The filesort will produce references to the records sorted, these
+ references can be stored in memory or in a temporary file.
+
+ The temporary file is normally used when the references doesn't fit into
+ a properly sized memory buffer. For most small queries the references
+ are stored in the memory buffer.
+
+ The temporary file is also used when performing an update where a key is
+ modified.
+
+ Methods used when ref's are in memory (using rr_from_pointers):
+ rr_unpack_from_buffer:
+ ----------------------
+ This method is used when table->sort.addon_field is allocated.
+ This is allocated for most SELECT queries not involving any BLOB's.
+ In this case the records are fetched from a memory buffer.
+ rr_from_pointers:
+ -----------------
+ Used when the above is not true, UPDATE, DELETE and so forth and
+ SELECT's involving BLOB's. It is also used when the addon_field
+ buffer is not allocated due to that its size was bigger than the
+ session variable max_length_for_sort_data.
+ In this case the record data is fetched from the handler using the
+ saved reference using the rnd_pos handler call.
+
+ Methods used when ref's are in a temporary file (using rr_from_tempfile)
+ rr_unpack_from_tempfile:
+ ------------------------
+ Same as rr_unpack_from_buffer except that references are fetched from
+ temporary file. Should obviously not really happen other than in
+ strange configurations.
+
+ rr_from_tempfile:
+ -----------------
+ Same as rr_from_pointers except that references are fetched from
+ temporary file instead of from
+ rr_from_cache:
+ --------------
+ This is a special variant of rr_from_tempfile that can be used for
+ handlers that is not using the HA_FAST_KEY_READ table flag. Instead
+ of reading the references one by one from the temporary file it reads
+ a set of them, sorts them and reads all of them into a buffer which
+ is then used for a number of subsequent calls to rr_from_cache.
+ It is only used for SELECT queries and a number of other conditions
+ on table size.
+
+ All other accesses use either index access methods (rr_quick) or a full
+ table scan (rr_sequential).
+ rr_quick:
+ ---------
+ rr_quick uses one of the QUICK_SELECT classes in opt_range.cc to
+ perform an index scan. There are loads of functionality hidden
+ in these quick classes. It handles all index scans of various kinds.
+ rr_sequential:
+ --------------
+ This is the most basic access method of a table using rnd_init,
+ rnd_next and rnd_end. No indexes are used.
+*/
void init_read_record(READ_RECORD *info,THD *thd, TABLE *table,
SQL_SELECT *select,
int use_record_cache, bool print_error)
diff --git a/sql/repl_failsafe.cc b/sql/repl_failsafe.cc
index 0b6e44c0272..855520fb2e4 100644
--- a/sql/repl_failsafe.cc
+++ b/sql/repl_failsafe.cc
@@ -20,6 +20,7 @@
#include "repl_failsafe.h"
#include "sql_repl.h"
#include "slave.h"
+#include "rpl_filter.h"
#include "log_event.h"
#include <mysql.h>
@@ -735,14 +736,14 @@ static int fetch_db_tables(THD *thd, MYSQL *mysql, const char *db,
TABLE_LIST table;
const char* table_name= row[0];
int error;
- if (table_rules_on)
+ if (rpl_filter->is_on())
{
bzero((char*) &table, sizeof(table)); //just for safe
table.db= (char*) db;
table.table_name= (char*) table_name;
table.updating= 1;
- if (!tables_ok(thd, &table))
+ if (!rpl_filter->tables_ok(thd->db, &table))
continue;
}
/* download master's table and overwrite slave's table */
@@ -860,8 +861,8 @@ bool load_master_data(THD* thd)
data from master
*/
- if (!db_ok(db, replicate_do_db, replicate_ignore_db) ||
- !db_ok_with_wild_table(db) ||
+ if (!rpl_filter->db_ok(db) ||
+ !rpl_filter->db_ok_with_wild_table(db) ||
!strcmp(db,"mysql"))
{
*cur_table_res = 0;
diff --git a/sql/rpl_filter.cc b/sql/rpl_filter.cc
new file mode 100644
index 00000000000..f9f8a3e98a7
--- /dev/null
+++ b/sql/rpl_filter.cc
@@ -0,0 +1,539 @@
+/* Copyright (C) 2000-2003 MySQL AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+#include "mysql_priv.h"
+#include "rpl_filter.h"
+
+#define TABLE_RULE_HASH_SIZE 16
+#define TABLE_RULE_ARR_SIZE 16
+
+Rpl_filter::Rpl_filter() :
+ table_rules_on(0), do_table_inited(0), ignore_table_inited(0),
+ wild_do_table_inited(0), wild_ignore_table_inited(0)
+{
+ do_db.empty();
+ ignore_db.empty();
+ rewrite_db.empty();
+}
+
+
+Rpl_filter::~Rpl_filter()
+{
+ if (do_table_inited)
+ hash_free(&do_table);
+ if (ignore_table_inited)
+ hash_free(&ignore_table);
+ if (wild_do_table_inited)
+ free_string_array(&wild_do_table);
+ if (wild_ignore_table_inited)
+ free_string_array(&wild_ignore_table);
+ free_list(&do_db);
+ free_list(&ignore_db);
+ free_list(&rewrite_db);
+}
+
+
+/*
+ Returns true if table should be logged/replicated
+
+ SYNOPSIS
+ tables_ok()
+ db db to use if db in TABLE_LIST is undefined for a table
+ tables list of tables to check
+
+ NOTES
+ Changing table order in the list can lead to different results.
+
+ Note also order of precedence of do/ignore rules (see code). For
+ that reason, users should not set conflicting rules because they
+ may get unpredicted results (precedence order is explained in the
+ manual).
+
+ If no table in the list is marked "updating", then we always
+ return 0, because there is no reason to execute this statement on
+ slave if it updates nothing. (Currently, this can only happen if
+ statement is a multi-delete (SQLCOM_DELETE_MULTI) and "tables" are
+ the tables in the FROM):
+
+ In the case of SQLCOM_DELETE_MULTI, there will be a second call to
+ tables_ok(), with tables having "updating==TRUE" (those after the
+ DELETE), so this second call will make the decision (because
+ all_tables_not_ok() = !tables_ok(1st_list) &&
+ !tables_ok(2nd_list)).
+
+ TODO
+ "Include all tables like "abc.%" except "%.EFG"". (Can't be done now.)
+ If we supported Perl regexps, we could do it with pattern: /^abc\.(?!EFG)/
+ (I could not find an equivalent in the regex library MySQL uses).
+
+ RETURN VALUES
+ 0 should not be logged/replicated
+ 1 should be logged/replicated
+*/
+
+bool
+Rpl_filter::tables_ok(const char* db, TABLE_LIST* tables)
+{
+ bool some_tables_updating= 0;
+ DBUG_ENTER("Rpl_filter::tables_ok");
+
+ for (; tables; tables= tables->next_global)
+ {
+ char hash_key[2*NAME_LEN+2];
+ char *end;
+ uint len;
+
+ if (!tables->updating)
+ continue;
+ some_tables_updating= 1;
+ end= strmov(hash_key, tables->db ? tables->db : db);
+ *end++= '.';
+ len= (uint) (strmov(end, tables->table_name) - hash_key);
+ if (do_table_inited) // if there are any do's
+ {
+ if (hash_search(&do_table, (byte*) hash_key, len))
+ DBUG_RETURN(1);
+ }
+ if (ignore_table_inited) // if there are any ignores
+ {
+ if (hash_search(&ignore_table, (byte*) hash_key, len))
+ DBUG_RETURN(0);
+ }
+ if (wild_do_table_inited &&
+ find_wild(&wild_do_table, hash_key, len))
+ DBUG_RETURN(1);
+ if (wild_ignore_table_inited &&
+ find_wild(&wild_ignore_table, hash_key, len))
+ DBUG_RETURN(0);
+ }
+
+ /*
+ If no table was to be updated, ignore statement (no reason we play it on
+ slave, slave is supposed to replicate _changes_ only).
+ If no explicit rule found and there was a do list, do not replicate.
+ If there was no do list, go ahead
+ */
+ DBUG_RETURN(some_tables_updating &&
+ !do_table_inited && !wild_do_table_inited);
+}
+
+
+/*
+ Checks whether a db matches some do_db and ignore_db rules
+
+ SYNOPSIS
+ db_ok()
+ db name of the db to check
+
+ RETURN VALUES
+ 0 should not be logged/replicated
+ 1 should be logged/replicated
+*/
+
+bool
+Rpl_filter::db_ok(const char* db)
+{
+ DBUG_ENTER("Rpl_filter::db_ok");
+
+ if (do_db.is_empty() && ignore_db.is_empty())
+ DBUG_RETURN(1); // Ok to replicate if the user puts no constraints
+
+ /*
+ If the user has specified restrictions on which databases to replicate
+ and db was not selected, do not replicate.
+ */
+ if (!db)
+ DBUG_RETURN(0);
+
+ if (!do_db.is_empty()) // if the do's are not empty
+ {
+ I_List_iterator<i_string> it(do_db);
+ i_string* tmp;
+
+ while ((tmp=it++))
+ {
+ if (!strcmp(tmp->ptr, db))
+ DBUG_RETURN(1); // match
+ }
+ DBUG_RETURN(0);
+ }
+ else // there are some elements in the don't, otherwise we cannot get here
+ {
+ I_List_iterator<i_string> it(ignore_db);
+ i_string* tmp;
+
+ while ((tmp=it++))
+ {
+ if (!strcmp(tmp->ptr, db))
+ DBUG_RETURN(0); // match
+ }
+ DBUG_RETURN(1);
+ }
+}
+
+
+/*
+ Checks whether a db matches wild_do_table and wild_ignore_table
+ rules (for replication)
+
+ SYNOPSIS
+ db_ok_with_wild_table()
+ db name of the db to check.
+ Is tested with check_db_name() before calling this function.
+
+ NOTES
+ Here is the reason for this function.
+ We advise users who want to exclude a database 'db1' safely to do it
+ with replicate_wild_ignore_table='db1.%' instead of binlog_ignore_db or
+ replicate_ignore_db because the two lasts only check for the selected db,
+ which won't work in that case:
+ USE db2;
+ UPDATE db1.t SET ... #this will be replicated and should not
+ whereas replicate_wild_ignore_table will work in all cases.
+ With replicate_wild_ignore_table, we only check tables. When
+ one does 'DROP DATABASE db1', tables are not involved and the
+ statement will be replicated, while users could expect it would not (as it
+ rougly means 'DROP db1.first_table, DROP db1.second_table...').
+ In other words, we want to interpret 'db1.%' as "everything touching db1".
+ That is why we want to match 'db1' against 'db1.%' wild table rules.
+
+ RETURN VALUES
+ 0 should not be logged/replicated
+ 1 should be logged/replicated
+*/
+
+bool
+Rpl_filter::db_ok_with_wild_table(const char *db)
+{
+ DBUG_ENTER("Rpl_filter::db_ok_with_wild_table");
+
+ char hash_key[NAME_LEN+2];
+ char *end;
+ int len;
+ end= strmov(hash_key, db);
+ *end++= '.';
+ len= end - hash_key ;
+ if (wild_do_table_inited && find_wild(&wild_do_table, hash_key, len))
+ {
+ DBUG_PRINT("return",("1"));
+ DBUG_RETURN(1);
+ }
+ if (wild_ignore_table_inited && find_wild(&wild_ignore_table, hash_key, len))
+ {
+ DBUG_PRINT("return",("0"));
+ DBUG_RETURN(0);
+ }
+
+ /*
+ If no explicit rule found and there was a do list, do not replicate.
+ If there was no do list, go ahead
+ */
+ DBUG_PRINT("return",("db=%s,retval=%d", db, !wild_do_table_inited));
+ DBUG_RETURN(!wild_do_table_inited);
+}
+
+
+bool
+Rpl_filter::is_on()
+{
+ return table_rules_on;
+}
+
+
+int
+Rpl_filter::add_do_table(const char* table_spec)
+{
+ DBUG_ENTER("Rpl_filter::add_do_table");
+ if (!do_table_inited)
+ init_table_rule_hash(&do_table, &do_table_inited);
+ table_rules_on= 1;
+ DBUG_RETURN(add_table_rule(&do_table, table_spec));
+}
+
+
+int
+Rpl_filter::add_ignore_table(const char* table_spec)
+{
+ DBUG_ENTER("Rpl_filter::add_ignore_table");
+ if (!ignore_table_inited)
+ init_table_rule_hash(&ignore_table, &ignore_table_inited);
+ table_rules_on= 1;
+ DBUG_RETURN(add_table_rule(&ignore_table, table_spec));
+}
+
+
+int
+Rpl_filter::add_wild_do_table(const char* table_spec)
+{
+ DBUG_ENTER("Rpl_filter::add_wild_do_table");
+ if (!wild_do_table_inited)
+ init_table_rule_array(&wild_do_table, &wild_do_table_inited);
+ table_rules_on= 1;
+ DBUG_RETURN(add_wild_table_rule(&wild_do_table, table_spec));
+}
+
+
+int
+Rpl_filter::add_wild_ignore_table(const char* table_spec)
+{
+ DBUG_ENTER("Rpl_filter::add_wild_ignore_table");
+ if (!wild_ignore_table_inited)
+ init_table_rule_array(&wild_ignore_table, &wild_ignore_table_inited);
+ table_rules_on= 1;
+ DBUG_RETURN(add_wild_table_rule(&wild_ignore_table, table_spec));
+}
+
+
+void
+Rpl_filter::add_db_rewrite(const char* from_db, const char* to_db)
+{
+ i_string_pair *db_pair = new i_string_pair(from_db, to_db);
+ rewrite_db.push_back(db_pair);
+}
+
+
+int
+Rpl_filter::add_table_rule(HASH* h, const char* table_spec)
+{
+ const char* dot = strchr(table_spec, '.');
+ if (!dot) return 1;
+ // len is always > 0 because we know the there exists a '.'
+ uint len = (uint)strlen(table_spec);
+ TABLE_RULE_ENT* e = (TABLE_RULE_ENT*)my_malloc(sizeof(TABLE_RULE_ENT)
+ + len, MYF(MY_WME));
+ if (!e) return 1;
+ e->db= (char*)e + sizeof(TABLE_RULE_ENT);
+ e->tbl_name= e->db + (dot - table_spec) + 1;
+ e->key_len= len;
+ memcpy(e->db, table_spec, len);
+
+ return my_hash_insert(h, (byte*)e);
+}
+
+
+/*
+ Add table expression with wildcards to dynamic array
+*/
+
+int
+Rpl_filter::add_wild_table_rule(DYNAMIC_ARRAY* a, const char* table_spec)
+{
+ const char* dot = strchr(table_spec, '.');
+ if (!dot) return 1;
+ uint len = (uint)strlen(table_spec);
+ TABLE_RULE_ENT* e = (TABLE_RULE_ENT*)my_malloc(sizeof(TABLE_RULE_ENT)
+ + len, MYF(MY_WME));
+ if (!e) return 1;
+ e->db= (char*)e + sizeof(TABLE_RULE_ENT);
+ e->tbl_name= e->db + (dot - table_spec) + 1;
+ e->key_len= len;
+ memcpy(e->db, table_spec, len);
+ insert_dynamic(a, (gptr)&e);
+ return 0;
+}
+
+
+void
+Rpl_filter::add_do_db(const char* table_spec)
+{
+ DBUG_ENTER("Rpl_filter::add_do_db");
+ i_string *db = new i_string(table_spec);
+ do_db.push_back(db);
+}
+
+
+void
+Rpl_filter::add_ignore_db(const char* table_spec)
+{
+ DBUG_ENTER("Rpl_filter::add_ignore_db");
+ i_string *db = new i_string(table_spec);
+ ignore_db.push_back(db);
+}
+
+
+static byte* get_table_key(const byte* a, uint* len,
+ my_bool __attribute__((unused)))
+{
+ TABLE_RULE_ENT *e= (TABLE_RULE_ENT *) a;
+
+ *len= e->key_len;
+ return (byte*)e->db;
+}
+
+
+static void free_table_ent(void* a)
+{
+ TABLE_RULE_ENT *e= (TABLE_RULE_ENT *) a;
+
+ my_free((gptr) e, MYF(0));
+}
+
+
+void
+Rpl_filter::init_table_rule_hash(HASH* h, bool* h_inited)
+{
+ hash_init(h, system_charset_info,TABLE_RULE_HASH_SIZE,0,0,
+ get_table_key, free_table_ent, 0);
+ *h_inited = 1;
+}
+
+
+void
+Rpl_filter::init_table_rule_array(DYNAMIC_ARRAY* a, bool* a_inited)
+{
+ my_init_dynamic_array(a, sizeof(TABLE_RULE_ENT*), TABLE_RULE_ARR_SIZE,
+ TABLE_RULE_ARR_SIZE);
+ *a_inited = 1;
+}
+
+
+TABLE_RULE_ENT*
+Rpl_filter::find_wild(DYNAMIC_ARRAY *a, const char* key, int len)
+{
+ uint i;
+ const char* key_end= key + len;
+
+ for (i= 0; i < a->elements; i++)
+ {
+ TABLE_RULE_ENT* e ;
+ get_dynamic(a, (gptr)&e, i);
+ if (!my_wildcmp(system_charset_info, key, key_end,
+ (const char*)e->db,
+ (const char*)(e->db + e->key_len),
+ '\\',wild_one,wild_many))
+ return e;
+ }
+
+ return 0;
+}
+
+
+void
+Rpl_filter::free_string_array(DYNAMIC_ARRAY *a)
+{
+ uint i;
+ for (i= 0; i < a->elements; i++)
+ {
+ char* p;
+ get_dynamic(a, (gptr) &p, i);
+ my_free(p, MYF(MY_WME));
+ }
+ delete_dynamic(a);
+}
+
+
+/*
+ Builds a String from a HASH of TABLE_RULE_ENT. Cannot be used for any other
+ hash, as it assumes that the hash entries are TABLE_RULE_ENT.
+
+ SYNOPSIS
+ table_rule_ent_hash_to_str()
+ s pointer to the String to fill
+ h pointer to the HASH to read
+
+ RETURN VALUES
+ none
+*/
+
+void
+Rpl_filter::table_rule_ent_hash_to_str(String* s, HASH* h)
+{
+ s->length(0);
+ for (uint i= 0; i < h->records; i++)
+ {
+ TABLE_RULE_ENT* e= (TABLE_RULE_ENT*) hash_element(h, i);
+ if (s->length())
+ s->append(',');
+ s->append(e->db,e->key_len);
+ }
+}
+
+
+void
+Rpl_filter::table_rule_ent_dynamic_array_to_str(String* s, DYNAMIC_ARRAY* a)
+{
+ s->length(0);
+ for (uint i= 0; i < a->elements; i++)
+ {
+ TABLE_RULE_ENT* e;
+ get_dynamic(a, (gptr)&e, i);
+ if (s->length())
+ s->append(',');
+ s->append(e->db,e->key_len);
+ }
+}
+
+
+void
+Rpl_filter::get_do_table(String* str)
+{
+ table_rule_ent_hash_to_str(str, &do_table);
+}
+
+
+void
+Rpl_filter::get_ignore_table(String* str)
+{
+ table_rule_ent_hash_to_str(str, &ignore_table);
+}
+
+
+void
+Rpl_filter::get_wild_do_table(String* str)
+{
+ table_rule_ent_dynamic_array_to_str(str, &wild_do_table);
+}
+
+
+void
+Rpl_filter::get_wild_ignore_table(String* str)
+{
+ table_rule_ent_dynamic_array_to_str(str, &wild_ignore_table);
+}
+
+
+const char*
+Rpl_filter::get_rewrite_db(const char* db, uint32 *new_len)
+{
+ if (rewrite_db.is_empty() || !db)
+ return db;
+ I_List_iterator<i_string_pair> it(rewrite_db);
+ i_string_pair* tmp;
+
+ while ((tmp=it++))
+ {
+ if (!strcmp(tmp->key, db))
+ {
+ *new_len= strlen(tmp->val);
+ return tmp->val;
+ }
+ }
+ return db;
+}
+
+
+I_List<i_string>*
+Rpl_filter::get_do_db()
+{
+ return &do_db;
+}
+
+
+I_List<i_string>*
+Rpl_filter::get_ignore_db()
+{
+ return &ignore_db;
+}
diff --git a/sql/rpl_filter.h b/sql/rpl_filter.h
new file mode 100644
index 00000000000..cfcb3b43607
--- /dev/null
+++ b/sql/rpl_filter.h
@@ -0,0 +1,113 @@
+/* Copyright (C) 2000-2003 MySQL AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+#ifndef RPL_FILTER_H
+#define RPL_FILTER_H
+
+#include "mysql.h"
+#include "my_list.h"
+
+typedef struct st_table_rule_ent
+{
+ char* db;
+ char* tbl_name;
+ uint key_len;
+} TABLE_RULE_ENT;
+
+/*
+ Rpl_filter
+
+ Inclusion and exclusion rules of tables and databases.
+ Also handles rewrites of db.
+ Used for replication and binlogging.
+ */
+class Rpl_filter
+{
+public:
+ Rpl_filter();
+ ~Rpl_filter();
+ Rpl_filter(Rpl_filter const&);
+ Rpl_filter& operator=(Rpl_filter const&);
+
+ /* Checks - returns true if ok to replicate/log */
+
+ bool tables_ok(const char* db, TABLE_LIST* tables);
+ bool db_ok(const char* db);
+ bool db_ok_with_wild_table(const char *db);
+
+ bool is_on();
+
+ /* Setters - add filtering rules */
+
+ int add_do_table(const char* table_spec);
+ int add_ignore_table(const char* table_spec);
+
+ int add_wild_do_table(const char* table_spec);
+ int add_wild_ignore_table(const char* table_spec);
+
+ void add_do_db(const char* db_spec);
+ void add_ignore_db(const char* db_spec);
+
+ void add_db_rewrite(const char* from_db, const char* to_db);
+
+ /* Getters - to get information about current rules */
+
+ void get_do_table(String* str);
+ void get_ignore_table(String* str);
+
+ void get_wild_do_table(String* str);
+ void get_wild_ignore_table(String* str);
+
+ const char* get_rewrite_db(const char* db, uint32 *new_len);
+
+ I_List<i_string>* get_do_db();
+ I_List<i_string>* get_ignore_db();
+
+private:
+ bool table_rules_on;
+
+ void init_table_rule_hash(HASH* h, bool* h_inited);
+ void init_table_rule_array(DYNAMIC_ARRAY* a, bool* a_inited);
+
+ int add_table_rule(HASH* h, const char* table_spec);
+ int add_wild_table_rule(DYNAMIC_ARRAY* a, const char* table_spec);
+
+ void free_string_array(DYNAMIC_ARRAY *a);
+
+ void table_rule_ent_hash_to_str(String* s, HASH* h);
+ void table_rule_ent_dynamic_array_to_str(String* s, DYNAMIC_ARRAY* a);
+ TABLE_RULE_ENT* find_wild(DYNAMIC_ARRAY *a, const char* key, int len);
+
+ HASH do_table;
+ HASH ignore_table;
+ DYNAMIC_ARRAY wild_do_table;
+ DYNAMIC_ARRAY wild_ignore_table;
+
+ bool do_table_inited;
+ bool ignore_table_inited;
+ bool wild_do_table_inited;
+ bool wild_ignore_table_inited;
+
+ I_List<i_string> do_db;
+ I_List<i_string> ignore_db;
+
+ I_List<i_string_pair> rewrite_db;
+};
+
+extern Rpl_filter *rpl_filter;
+extern Rpl_filter *binlog_filter;
+
+#endif // RPL_FILTER_H
diff --git a/sql/set_var.cc b/sql/set_var.cc
index 774062dedf2..f1ab86ace51 100644
--- a/sql/set_var.cc
+++ b/sql/set_var.cc
@@ -282,6 +282,8 @@ sys_var_thd_ulong sys_net_retry_count("net_retry_count",
&SV::net_retry_count,
0, fix_net_retry_count);
sys_var_thd_bool sys_new_mode("new", &SV::new_mode);
+sys_var_thd_bool sys_old_alter_table("old_alter_table",
+ &SV::old_alter_table);
sys_var_thd_bool sys_old_passwords("old_passwords", &SV::old_passwords);
sys_var_thd_ulong sys_optimizer_prune_level("optimizer_prune_level",
&SV::optimizer_prune_level);
@@ -636,6 +638,7 @@ sys_var *sys_variables[]=
&sys_net_wait_timeout,
&sys_net_write_timeout,
&sys_new_mode,
+ &sys_old_alter_table,
&sys_old_passwords,
&sys_optimizer_prune_level,
&sys_optimizer_search_depth,
@@ -796,6 +799,7 @@ struct show_var_st init_vars[]= {
{"have_isam", (char*) &have_isam, SHOW_HAVE},
{"have_ndbcluster", (char*) &have_ndbcluster, SHOW_HAVE},
{"have_openssl", (char*) &have_openssl, SHOW_HAVE},
+ {"have_partition_engine", (char*) &have_partition_db, SHOW_HAVE},
{"have_query_cache", (char*) &have_query_cache, SHOW_HAVE},
{"have_raid", (char*) &have_raid, SHOW_HAVE},
{"have_rtree_keys", (char*) &have_rtree_keys, SHOW_HAVE},
@@ -912,6 +916,7 @@ struct show_var_st init_vars[]= {
{sys_net_retry_count.name, (char*) &sys_net_retry_count, SHOW_SYS},
{sys_net_write_timeout.name,(char*) &sys_net_write_timeout, SHOW_SYS},
{sys_new_mode.name, (char*) &sys_new_mode, SHOW_SYS},
+ {sys_old_alter_table.name, (char*) &sys_old_alter_table, SHOW_SYS},
{sys_old_passwords.name, (char*) &sys_old_passwords, SHOW_SYS},
{"open_files_limit", (char*) &open_files_limit, SHOW_LONG},
{sys_optimizer_prune_level.name, (char*) &sys_optimizer_prune_level,
diff --git a/sql/set_var.h b/sql/set_var.h
index 40ff4c8583f..8c1444870eb 100644
--- a/sql/set_var.h
+++ b/sql/set_var.h
@@ -880,6 +880,7 @@ public:
/* updated in sql_acl.cc */
+extern sys_var_thd_bool sys_old_alter_table;
extern sys_var_thd_bool sys_old_passwords;
extern LEX_STRING default_key_cache_base;
diff --git a/sql/share/errmsg.txt b/sql/share/errmsg.txt
index 5f1f7035af9..681b3b5b572 100644
--- a/sql/share/errmsg.txt
+++ b/sql/share/errmsg.txt
@@ -5403,3 +5403,122 @@ ER_VIEW_PREVENT_UPDATE
eng "The definition of table '%-.64s' prevents operation %s on table '%-.64s'."
ER_PS_NO_RECURSION
eng "The prepared statement contains a stored routine call that refers to that same statement. It's not allowed to execute a prepared statement in such a recursive manner"
+ER_PARTITION_REQUIRES_VALUES_ERROR
+ eng "%s PARTITIONING requires definition of VALUES %s for each partition"
+ swe "%s PARTITIONering kräver definition av VALUES %s för varje partition"
+ER_PARTITION_WRONG_VALUES_ERROR
+ eng "Only %s PARTITIONING can use VALUES %s in partition definition"
+ swe "Endast %s partitionering kan använda VALUES %s i definition av partitionen"
+ER_PARTITION_MAXVALUE_ERROR
+ eng "MAXVALUE can only be used in last partition definition"
+ swe "MAXVALUE kan bara användas i definitionen av den sista partitionen"
+ER_PARTITION_SUBPARTITION_ERROR
+ eng "Subpartitions can only be hash partitions and by key"
+ swe "Subpartitioner kan bara vara hash och key partitioner"
+ER_PARTITION_WRONG_NO_PART_ERROR
+ eng "Wrong number of partitions defined, mismatch with previous setting"
+ swe "Antal partitioner definierade och antal partitioner är inte lika"
+ER_PARTITION_WRONG_NO_SUBPART_ERROR
+ eng "Wrong number of subpartitions defined, mismatch with previous setting"
+ swe "Antal subpartitioner definierade och antal subpartitioner är inte lika"
+ER_CONST_EXPR_IN_PARTITION_FUNC_ERROR
+ eng "Constant/Random expression in (sub)partitioning function is not allowed"
+ swe "Konstanta uttryck eller slumpmässiga uttryck är inte tillåtna (sub)partitioneringsfunktioner"
+ER_NO_CONST_EXPR_IN_RANGE_OR_LIST_ERROR
+ eng "Expression in RANGE/LIST VALUES must be constant"
+ swe "Uttryck i RANGE/LIST VALUES måste vara ett konstant uttryck"
+ER_FIELD_NOT_FOUND_PART_ERROR
+ eng "Field in list of fields for partition function not found in table"
+ swe "Fält i listan av fält för partitionering med key inte funnen i tabellen"
+ER_LIST_OF_FIELDS_ONLY_IN_HASH_ERROR
+ eng "List of fields is only allowed in KEY partitions"
+ swe "En lista av fält är endast tillåtet för KEY partitioner"
+ER_INCONSISTENT_PARTITION_INFO_ERROR
+ eng "The partition info in the frm file is not consistent with what can be written into the frm file"
+ swe "Partitioneringsinformationen i frm-filen är inte konsistent med vad som kan skrivas i frm-filen"
+ER_PARTITION_FUNC_NOT_ALLOWED_ERROR
+ eng "The %s function returns the wrong type"
+ swe "%s-funktionen returnerar felaktig typ"
+ER_PARTITIONS_MUST_BE_DEFINED_ERROR
+ eng "For %s partitions each partition must be defined"
+ swe "För %s partitionering så måste varje partition definieras"
+ER_RANGE_NOT_INCREASING_ERROR
+ eng "VALUES LESS THAN value must be strictly increasing for each partition"
+ swe "Värden i VALUES LESS THAN måste vara strikt växande för varje partition"
+ER_INCONSISTENT_TYPE_OF_FUNCTIONS_ERROR
+ eng "VALUES value must be of same type as partition function"
+ swe "Värden i VALUES måste vara av samma typ som partitioneringsfunktionen"
+ER_MULTIPLE_DEF_CONST_IN_LIST_PART_ERROR
+ eng "Multiple definition of same constant in list partitioning"
+ swe "Multipel definition av samma konstant i list partitionering"
+ER_PARTITION_ENTRY_ERROR
+ eng "Partitioning can not be used stand-alone in query"
+ swe "Partitioneringssyntax kan inte användas på egen hand i en SQL-fråga"
+ER_MIX_HANDLER_ERROR
+ eng "The mix of handlers in the partitions is not allowed in this version of MySQL"
+ swe "Denna mix av lagringsmotorer är inte tillåten i denna version av MySQL"
+ER_PARTITION_NOT_DEFINED_ERROR
+ eng "For the partitioned engine it is necessary to define all %s"
+ swe "För partitioneringsmotorn så är det nödvändigt att definiera alla %s"
+ER_TOO_MANY_PARTITIONS_ERROR
+ eng "Too many partitions were defined"
+ swe "För många partitioner definierades"
+ER_SUBPARTITION_ERROR
+ eng "It is only possible to mix RANGE/LIST partitioning with HASH/KEY partitioning for subpartitioning"
+ swe "Det är endast möjligt att blanda RANGE/LIST partitionering med HASH/KEY partitionering för subpartitionering"
+ER_CANT_CREATE_HANDLER_FILE
+ eng "Failed to create specific handler file"
+ swe "Misslyckades med att skapa specifik fil i lagringsmotor"
+ER_BLOB_FIELD_IN_PART_FUNC_ERROR
+ eng "A BLOB field is not allowed in partition function"
+ swe "Ett BLOB-fält är inte tillåtet i partitioneringsfunktioner"
+ER_CHAR_SET_IN_PART_FIELD_ERROR
+ eng "VARCHAR only allowed if binary collation for partition functions"
+ swe "VARCHAR endast tillåten med binär collation för partitioneringsfunktion"
+ER_UNIQUE_KEY_NEED_ALL_FIELDS_IN_PF
+ eng "A %s need to include all fields in the partition function"
+ swe "En %s behöver inkludera alla fält i partitioneringsfunktionen för denna lagringsmotor"
+ER_NO_PARTS_ERROR
+ eng "Number of %s = 0 is not an allowed value"
+ swe "Antal %s = 0 är inte ett tillåten värde"
+ER_PARTITION_MGMT_ON_NONPARTITIONED
+ eng "Partition management on a not partitioned table is not possible"
+ swe "Partitioneringskommando på en opartitionerad tabell är inte möjligt"
+ER_DROP_PARTITION_NON_EXISTENT
+ eng "Error in list of partitions to change"
+ swe "Fel i listan av partitioner att förändra"
+ER_DROP_LAST_PARTITION
+ eng "Cannot remove all partitions, use DROP TABLE instead"
+ swe "Det är inte tillåtet att ta bort alla partitioner, använd DROP TABLE istället"
+ER_COALESCE_ONLY_ON_HASH_PARTITION
+ eng "COALESCE PARTITION can only be used on HASH/KEY partitions"
+ swe "COALESCE PARTITION kan bara användas på HASH/KEY partitioner"
+ER_ONLY_ON_RANGE_LIST_PARTITION
+ eng "%s PARTITION can only be used on RANGE/LIST partitions"
+ swe "%s PARTITION kan bara användas på RANGE/LIST-partitioner"
+ER_ADD_PARTITION_SUBPART_ERROR
+ eng "Trying to Add partition(s) with wrong number of subpartitions"
+ swe "ADD PARTITION med fel antal subpartitioner"
+ER_ADD_PARTITION_NO_NEW_PARTITION
+ eng "At least one partition must be added"
+ swe "Åtminstone en partition måste läggas till vid ADD PARTITION"
+ER_COALESCE_PARTITION_NO_PARTITION
+ eng "At least one partition must be coalesced"
+ swe "Åtminstone en partition måste slås ihop vid COALESCE PARTITION"
+ER_REORG_PARTITION_NOT_EXIST
+ eng "More partitions to reorganise than there are partitions"
+ swe "Fler partitioner att reorganisera än det finns partitioner"
+ER_SAME_NAME_PARTITION
+ eng "All partitions must have unique names in the table"
+ swe "Alla partitioner i tabellen måste ha unika namn"
+ER_CONSECUTIVE_REORG_PARTITIONS
+ eng "When reorganising a set of partitions they must be in consecutive order"
+ swe "När ett antal partitioner omorganiseras måste de vara i konsekutiv ordning"
+ER_REORG_OUTSIDE_RANGE
+ eng "The new partitions cover a bigger range then the reorganised partitions do"
+ swe "De nya partitionerna täcker ett större intervall än de omorganiserade partitionerna"
+ER_DROP_PARTITION_FAILURE
+ eng "Drop partition not supported in this version for this handler"
+ER_DROP_PARTITION_WHEN_FK_DEFINED
+ eng "Cannot drop a partition when a foreign key constraint is defined on the table"
+ swe "Kan inte ta bort en partition när en främmande nyckel är definierad på tabellen"
diff --git a/sql/slave.cc b/sql/slave.cc
index d2a60076cef..757d8bc212d 100644
--- a/sql/slave.cc
+++ b/sql/slave.cc
@@ -22,6 +22,7 @@
#include <myisam.h>
#include "slave.h"
#include "sql_repl.h"
+#include "rpl_filter.h"
#include "repl_failsafe.h"
#include <thr_alarm.h>
#include <my_dir.h>
@@ -36,11 +37,7 @@ typedef bool (*CHECK_KILLED_FUNC)(THD*,void*);
volatile bool slave_sql_running = 0, slave_io_running = 0;
char* slave_load_tmpdir = 0;
MASTER_INFO *active_mi;
-HASH replicate_do_table, replicate_ignore_table;
-DYNAMIC_ARRAY replicate_wild_do_table, replicate_wild_ignore_table;
-bool do_table_inited = 0, ignore_table_inited = 0;
-bool wild_do_table_inited = 0, wild_ignore_table_inited = 0;
-bool table_rules_on= 0, replicate_same_server_id;
+bool replicate_same_server_id;
ulonglong relay_log_space_limit = 0;
/*
@@ -194,20 +191,6 @@ err:
}
-static void free_table_ent(TABLE_RULE_ENT* e)
-{
- my_free((gptr) e, MYF(0));
-}
-
-
-static byte* get_table_key(TABLE_RULE_ENT* e, uint* len,
- my_bool not_used __attribute__((unused)))
-{
- *len = e->key_len;
- return (byte*)e->db;
-}
-
-
/*
Open the given relay log
@@ -809,245 +792,6 @@ int start_slave_threads(bool need_slave_mutex, bool wait_for_start,
}
-void init_table_rule_hash(HASH* h, bool* h_inited)
-{
- hash_init(h, system_charset_info,TABLE_RULE_HASH_SIZE,0,0,
- (hash_get_key) get_table_key,
- (hash_free_key) free_table_ent, 0);
- *h_inited = 1;
-}
-
-
-void init_table_rule_array(DYNAMIC_ARRAY* a, bool* a_inited)
-{
- my_init_dynamic_array(a, sizeof(TABLE_RULE_ENT*), TABLE_RULE_ARR_SIZE,
- TABLE_RULE_ARR_SIZE);
- *a_inited = 1;
-}
-
-
-static TABLE_RULE_ENT* find_wild(DYNAMIC_ARRAY *a, const char* key, int len)
-{
- uint i;
- const char* key_end = key + len;
-
- for (i = 0; i < a->elements; i++)
- {
- TABLE_RULE_ENT* e ;
- get_dynamic(a, (gptr)&e, i);
- if (!my_wildcmp(system_charset_info, key, key_end,
- (const char*)e->db,
- (const char*)(e->db + e->key_len),
- '\\',wild_one,wild_many))
- return e;
- }
-
- return 0;
-}
-
-
-/*
- Checks whether tables match some (wild_)do_table and (wild_)ignore_table
- rules (for replication)
-
- SYNOPSIS
- tables_ok()
- thd thread (SQL slave thread normally). Mustn't be null.
- tables list of tables to check
-
- NOTES
- Note that changing the order of the tables in the list can lead to
- different results. Note also the order of precedence of the do/ignore
- rules (see code below). For that reason, users should not set conflicting
- rules because they may get unpredicted results (precedence order is
- explained in the manual).
- If no table of the list is marked "updating" (so far this can only happen
- if the statement is a multi-delete (SQLCOM_DELETE_MULTI) and the "tables"
- is the tables in the FROM): then we always return 0, because there is no
- reason we play this statement on this slave if it updates nothing. In the
- case of SQLCOM_DELETE_MULTI, there will be a second call to tables_ok(),
- with tables having "updating==TRUE" (those after the DELETE), so this
- second call will make the decision (because
- all_tables_not_ok() = !tables_ok(1st_list) && !tables_ok(2nd_list)).
-
- Thought which arose from a question of a big customer "I want to include
- all tables like "abc.%" except the "%.EFG"". This can't be done now. If we
- supported Perl regexps we could do it with this pattern: /^abc\.(?!EFG)/
- (I could not find an equivalent in the regex library MySQL uses).
-
- RETURN VALUES
- 0 should not be logged/replicated
- 1 should be logged/replicated
-*/
-
-bool tables_ok(THD* thd, TABLE_LIST* tables)
-{
- bool some_tables_updating= 0;
- DBUG_ENTER("tables_ok");
-
- /*
- In routine, can't reliably pick and choose substatements, so always
- replicate.
- We can't reliably know if one substatement should be executed or not:
- consider the case of this substatement: a SELECT on a non-replicated
- constant table; if we don't execute it maybe it was going to fill a
- variable which was going to be used by the next substatement to update
- a replicated table? If we execute it maybe the constant non-replicated
- table does not exist (and so we'll fail) while there was no need to
- execute this as this SELECT does not influence replicated tables in the
- rest of the routine? In other words: users are used to replicate-*-table
- specifying how to handle updates to tables, these options don't say
- anything about reads to tables; we can't guess.
- */
- if (thd->spcont)
- DBUG_RETURN(1);
-
- for (; tables; tables= tables->next_global)
- {
- char hash_key[2*NAME_LEN+2];
- char *end;
- uint len;
-
- if (!tables->updating)
- continue;
- some_tables_updating= 1;
- end= strmov(hash_key, tables->db ? tables->db : thd->db);
- *end++= '.';
- len= (uint) (strmov(end, tables->table_name) - hash_key);
- if (do_table_inited) // if there are any do's
- {
- if (hash_search(&replicate_do_table, (byte*) hash_key, len))
- DBUG_RETURN(1);
- }
- if (ignore_table_inited) // if there are any ignores
- {
- if (hash_search(&replicate_ignore_table, (byte*) hash_key, len))
- DBUG_RETURN(0);
- }
- if (wild_do_table_inited && find_wild(&replicate_wild_do_table,
- hash_key, len))
- DBUG_RETURN(1);
- if (wild_ignore_table_inited && find_wild(&replicate_wild_ignore_table,
- hash_key, len))
- DBUG_RETURN(0);
- }
-
- /*
- If no table was to be updated, ignore statement (no reason we play it on
- slave, slave is supposed to replicate _changes_ only).
- If no explicit rule found and there was a do list, do not replicate.
- If there was no do list, go ahead
- */
- DBUG_RETURN(some_tables_updating &&
- !do_table_inited && !wild_do_table_inited);
-}
-
-
-/*
- Checks whether a db matches wild_do_table and wild_ignore_table
- rules (for replication)
-
- SYNOPSIS
- db_ok_with_wild_table()
- db name of the db to check.
- Is tested with check_db_name() before calling this function.
-
- NOTES
- Here is the reason for this function.
- We advise users who want to exclude a database 'db1' safely to do it
- with replicate_wild_ignore_table='db1.%' instead of binlog_ignore_db or
- replicate_ignore_db because the two lasts only check for the selected db,
- which won't work in that case:
- USE db2;
- UPDATE db1.t SET ... #this will be replicated and should not
- whereas replicate_wild_ignore_table will work in all cases.
- With replicate_wild_ignore_table, we only check tables. When
- one does 'DROP DATABASE db1', tables are not involved and the
- statement will be replicated, while users could expect it would not (as it
- rougly means 'DROP db1.first_table, DROP db1.second_table...').
- In other words, we want to interpret 'db1.%' as "everything touching db1".
- That is why we want to match 'db1' against 'db1.%' wild table rules.
-
- RETURN VALUES
- 0 should not be logged/replicated
- 1 should be logged/replicated
- */
-
-int db_ok_with_wild_table(const char *db)
-{
- char hash_key[NAME_LEN+2];
- char *end;
- int len;
- end= strmov(hash_key, db);
- *end++= '.';
- len= end - hash_key ;
- if (wild_do_table_inited && find_wild(&replicate_wild_do_table,
- hash_key, len))
- return 1;
- if (wild_ignore_table_inited && find_wild(&replicate_wild_ignore_table,
- hash_key, len))
- return 0;
-
- /*
- If no explicit rule found and there was a do list, do not replicate.
- If there was no do list, go ahead
- */
- return !wild_do_table_inited;
-}
-
-
-int add_table_rule(HASH* h, const char* table_spec)
-{
- const char* dot = strchr(table_spec, '.');
- if (!dot) return 1;
- // len is always > 0 because we know the there exists a '.'
- uint len = (uint)strlen(table_spec);
- TABLE_RULE_ENT* e = (TABLE_RULE_ENT*)my_malloc(sizeof(TABLE_RULE_ENT)
- + len, MYF(MY_WME));
- if (!e) return 1;
- e->db = (char*)e + sizeof(TABLE_RULE_ENT);
- e->tbl_name = e->db + (dot - table_spec) + 1;
- e->key_len = len;
- memcpy(e->db, table_spec, len);
- (void)my_hash_insert(h, (byte*)e);
- return 0;
-}
-
-
-/*
- Add table expression with wildcards to dynamic array
-*/
-
-int add_wild_table_rule(DYNAMIC_ARRAY* a, const char* table_spec)
-{
- const char* dot = strchr(table_spec, '.');
- if (!dot) return 1;
- uint len = (uint)strlen(table_spec);
- TABLE_RULE_ENT* e = (TABLE_RULE_ENT*)my_malloc(sizeof(TABLE_RULE_ENT)
- + len, MYF(MY_WME));
- if (!e) return 1;
- e->db = (char*)e + sizeof(TABLE_RULE_ENT);
- e->tbl_name = e->db + (dot - table_spec) + 1;
- e->key_len = len;
- memcpy(e->db, table_spec, len);
- insert_dynamic(a, (gptr)&e);
- return 0;
-}
-
-
-static void free_string_array(DYNAMIC_ARRAY *a)
-{
- uint i;
- for (i = 0; i < a->elements; i++)
- {
- char* p;
- get_dynamic(a, (gptr) &p, i);
- my_free(p, MYF(MY_WME));
- }
- delete_dynamic(a);
-}
-
-
#ifdef NOT_USED_YET
static int end_slave_on_walk(MASTER_INFO* mi, gptr /*unused*/)
{
@@ -1083,14 +827,6 @@ void end_slave()
*/
terminate_slave_threads(active_mi,SLAVE_FORCE_ALL);
end_master_info(active_mi);
- if (do_table_inited)
- hash_free(&replicate_do_table);
- if (ignore_table_inited)
- hash_free(&replicate_ignore_table);
- if (wild_do_table_inited)
- free_string_array(&replicate_wild_do_table);
- if (wild_ignore_table_inited)
- free_string_array(&replicate_wild_ignore_table);
delete active_mi;
active_mi= 0;
}
@@ -1170,24 +906,6 @@ bool net_request_file(NET* net, const char* fname)
}
-const char *rewrite_db(const char* db, uint32 *new_len)
-{
- if (replicate_rewrite_db.is_empty() || !db)
- return db;
- I_List_iterator<i_string_pair> it(replicate_rewrite_db);
- i_string_pair* tmp;
-
- while ((tmp=it++))
- {
- if (!strcmp(tmp->key, db))
- {
- *new_len= (uint32)strlen(tmp->val);
- return tmp->val;
- }
- }
- return db;
-}
-
/*
From other comments and tests in code, it looks like
sometimes Query_log_event and Load_log_event can have db == 0
@@ -1200,60 +918,6 @@ const char *print_slave_db_safe(const char* db)
return (db ? db : "");
}
-/*
- Checks whether a db matches some do_db and ignore_db rules
- (for logging or replication)
-
- SYNOPSIS
- db_ok()
- db name of the db to check
- do_list either binlog_do_db or replicate_do_db
- ignore_list either binlog_ignore_db or replicate_ignore_db
-
- RETURN VALUES
- 0 should not be logged/replicated
- 1 should be logged/replicated
-*/
-
-int db_ok(const char* db, I_List<i_string> &do_list,
- I_List<i_string> &ignore_list )
-{
- if (do_list.is_empty() && ignore_list.is_empty())
- return 1; // ok to replicate if the user puts no constraints
-
- /*
- If the user has specified restrictions on which databases to replicate
- and db was not selected, do not replicate.
- */
- if (!db)
- return 0;
-
- if (!do_list.is_empty()) // if the do's are not empty
- {
- I_List_iterator<i_string> it(do_list);
- i_string* tmp;
-
- while ((tmp=it++))
- {
- if (!strcmp(tmp->ptr, db))
- return 1; // match
- }
- return 0;
- }
- else // there are some elements in the don't, otherwise we cannot get here
- {
- I_List_iterator<i_string> it(ignore_list);
- i_string* tmp;
-
- while ((tmp=it++))
- {
- if (!strcmp(tmp->ptr, db))
- return 0; // match
- }
- return 1;
- }
-}
-
static int init_strvar_from_file(char *var, int max_size, IO_CACHE *f,
const char *default_val)
@@ -2262,48 +1926,6 @@ int register_slave_on_master(MYSQL* mysql)
}
-/*
- Builds a String from a HASH of TABLE_RULE_ENT. Cannot be used for any other
- hash, as it assumes that the hash entries are TABLE_RULE_ENT.
-
- SYNOPSIS
- table_rule_ent_hash_to_str()
- s pointer to the String to fill
- h pointer to the HASH to read
-
- RETURN VALUES
- none
-*/
-
-void table_rule_ent_hash_to_str(String* s, HASH* h)
-{
- s->length(0);
- for (uint i=0 ; i < h->records ; i++)
- {
- TABLE_RULE_ENT* e= (TABLE_RULE_ENT*) hash_element(h, i);
- if (s->length())
- s->append(',');
- s->append(e->db,e->key_len);
- }
-}
-
-/*
- Mostly the same thing as above
-*/
-
-void table_rule_ent_dynamic_array_to_str(String* s, DYNAMIC_ARRAY* a)
-{
- s->length(0);
- for (uint i=0 ; i < a->elements ; i++)
- {
- TABLE_RULE_ENT* e;
- get_dynamic(a, (gptr)&e, i);
- if (s->length())
- s->append(',');
- s->append(e->db,e->key_len);
- }
-}
-
bool show_master_info(THD* thd, MASTER_INFO* mi)
{
// TODO: fix this for multi-master
@@ -2399,23 +2021,18 @@ bool show_master_info(THD* thd, MASTER_INFO* mi)
protocol->store(mi->slave_running == MYSQL_SLAVE_RUN_CONNECT ?
"Yes" : "No", &my_charset_bin);
protocol->store(mi->rli.slave_running ? "Yes":"No", &my_charset_bin);
- protocol->store(&replicate_do_db);
- protocol->store(&replicate_ignore_db);
- /*
- We can't directly use some protocol->store for
- replicate_*_table,
- as Protocol doesn't know the TABLE_RULE_ENT struct.
- We first build Strings and then pass them to protocol->store.
- */
+ protocol->store(rpl_filter->get_do_db());
+ protocol->store(rpl_filter->get_ignore_db());
+
char buf[256];
String tmp(buf, sizeof(buf), &my_charset_bin);
- table_rule_ent_hash_to_str(&tmp, &replicate_do_table);
+ rpl_filter->get_do_table(&tmp);
protocol->store(&tmp);
- table_rule_ent_hash_to_str(&tmp, &replicate_ignore_table);
+ rpl_filter->get_ignore_table(&tmp);
protocol->store(&tmp);
- table_rule_ent_dynamic_array_to_str(&tmp, &replicate_wild_do_table);
+ rpl_filter->get_wild_do_table(&tmp);
protocol->store(&tmp);
- table_rule_ent_dynamic_array_to_str(&tmp, &replicate_wild_ignore_table);
+ rpl_filter->get_wild_ignore_table(&tmp);
protocol->store(&tmp);
protocol->store((uint32) mi->rli.last_slave_errno);
@@ -3888,10 +3505,8 @@ static int process_io_create_file(MASTER_INFO* mi, Create_file_log_event* cev)
if (unlikely(!cev->is_valid()))
DBUG_RETURN(1);
- /*
- TODO: fix to honor table rules, not only db rules
- */
- if (!db_ok(cev->db, replicate_do_db, replicate_ignore_db))
+
+ if (!rpl_filter->db_ok(cev->db))
{
skip_load_data_infile(net);
DBUG_RETURN(0);
diff --git a/sql/slave.h b/sql/slave.h
index c41234ab2ed..ead1aa87ce6 100644
--- a/sql/slave.h
+++ b/sql/slave.h
@@ -21,6 +21,8 @@
#include "mysql.h"
#include "my_list.h"
+#include "rpl_filter.h"
+
#define SLAVE_NET_TIMEOUT 3600
#define MAX_SLAVE_ERRMSG 1024
#define MAX_SLAVE_ERROR 2000
@@ -461,15 +463,6 @@ typedef struct st_master_info
int queue_event(MASTER_INFO* mi,const char* buf,ulong event_len);
-typedef struct st_table_rule_ent
-{
- char* db;
- char* tbl_name;
- uint key_len;
-} TABLE_RULE_ENT;
-
-#define TABLE_RULE_HASH_SIZE 16
-#define TABLE_RULE_ARR_SIZE 16
#define MAX_SLAVE_ERRMSG 1024
#define RPL_LOG_NAME (rli->group_master_log_name[0] ? rli->group_master_log_name :\
@@ -523,27 +516,9 @@ int mysql_table_dump(THD* thd, const char* db,
int fetch_master_table(THD* thd, const char* db_name, const char* table_name,
MASTER_INFO* mi, MYSQL* mysql, bool overwrite);
-void table_rule_ent_hash_to_str(String* s, HASH* h);
-void table_rule_ent_dynamic_array_to_str(String* s, DYNAMIC_ARRAY* a);
bool show_master_info(THD* thd, MASTER_INFO* mi);
bool show_binlog_info(THD* thd);
-/* See if the query uses any tables that should not be replicated */
-bool tables_ok(THD* thd, TABLE_LIST* tables);
-
-/*
- Check to see if the database is ok to operate on with respect to the
- do and ignore lists - used in replication
-*/
-int db_ok(const char* db, I_List<i_string> &do_list,
- I_List<i_string> &ignore_list );
-int db_ok_with_wild_table(const char *db);
-
-int add_table_rule(HASH* h, const char* table_spec);
-int add_wild_table_rule(DYNAMIC_ARRAY* a, const char* table_spec);
-void init_table_rule_hash(HASH* h, bool* h_inited);
-void init_table_rule_array(DYNAMIC_ARRAY* a, bool* a_inited);
-const char *rewrite_db(const char* db, uint32 *new_db_len);
const char *print_slave_db_safe(const char *db);
int check_expected_error(THD* thd, RELAY_LOG_INFO* rli, int error_code);
void skip_load_data_infile(NET* net);
@@ -577,11 +552,7 @@ extern "C" pthread_handler_decl(handle_slave_sql,arg);
extern bool volatile abort_loop;
extern MASTER_INFO main_mi, *active_mi; /* active_mi for multi-master */
extern LIST master_list;
-extern HASH replicate_do_table, replicate_ignore_table;
-extern DYNAMIC_ARRAY replicate_wild_do_table, replicate_wild_ignore_table;
-extern bool do_table_inited, ignore_table_inited,
- wild_do_table_inited, wild_ignore_table_inited;
-extern bool table_rules_on, replicate_same_server_id;
+extern bool replicate_same_server_id;
extern int disconnect_slave_event_count, abort_slave_event_count ;
@@ -595,8 +566,6 @@ extern my_bool master_ssl;
extern my_string master_ssl_ca, master_ssl_capath, master_ssl_cert,
master_ssl_cipher, master_ssl_key;
-extern I_List<i_string> replicate_do_db, replicate_ignore_db;
-extern I_List<i_string_pair> replicate_rewrite_db;
extern I_List<THD> threads;
#endif
diff --git a/sql/sp.cc b/sql/sp.cc
index 016703662a5..d635d9ad728 100644
--- a/sql/sp.cc
+++ b/sql/sp.cc
@@ -808,7 +808,7 @@ db_show_routine_status(THD *thd, int type, const char *wild)
}
}
- table->file->ha_index_init(0);
+ table->file->ha_index_init(0, 1);
if ((res= table->file->index_first(table->record[0])))
{
res= (res == HA_ERR_END_OF_FILE) ? 0 : SP_INTERNAL_ERROR;
@@ -858,7 +858,7 @@ sp_drop_db_routines(THD *thd, char *db)
goto err;
ret= SP_OK;
- table->file->ha_index_init(0);
+ table->file->ha_index_init(0, 1);
if (! table->file->index_read(table->record[0],
key, keylen, HA_READ_KEY_EXACT))
{
diff --git a/sql/sql_acl.cc b/sql/sql_acl.cc
index 4127129576b..c2413545b3a 100644
--- a/sql/sql_acl.cc
+++ b/sql/sql_acl.cc
@@ -27,9 +27,6 @@
#include "mysql_priv.h"
#include "hash_filo.h"
-#ifdef HAVE_REPLICATION
-#include "sql_repl.h" //for tables_ok()
-#endif
#include <m_ctype.h>
#include <stdarg.h>
#include "sp_head.h"
@@ -37,6 +34,8 @@
#ifndef NO_EMBEDDED_ACCESS_CHECKS
+#define FIRST_NON_YN_FIELD 26
+
class acl_entry :public hash_filo_element
{
public:
@@ -1399,7 +1398,7 @@ bool change_password(THD *thd, const char *host, const char *user,
GRANT and REVOKE are applied the slave in/exclusion rules as they are
some kind of updates to the mysql.% tables.
*/
- if (thd->slave_thread && table_rules_on)
+ if (thd->slave_thread && rpl_filter->is_on())
{
/*
The tables must be marked "updating" so that tables_ok() takes them into
@@ -1407,7 +1406,7 @@ bool change_password(THD *thd, const char *host, const char *user,
*/
tables.updating= 1;
/* Thanks to bzero, tables.next==0 */
- if (!tables_ok(thd, &tables))
+ if (!thd->spcont || rpl_filter->tables_ok(0, &tables))
DBUG_RETURN(0);
}
#endif
@@ -1594,7 +1593,7 @@ static bool update_user_table(THD *thd, TABLE *table,
key_copy((byte *) user_key, table->record[0], table->key_info,
table->key_info->key_length);
- table->file->extra(HA_EXTRA_RETRIEVE_ALL_COLS);
+ table->file->ha_retrieve_all_cols();
if (table->file->index_read_idx(table->record[0], 0,
(byte *) user_key, table->key_info->key_length,
HA_READ_KEY_EXACT))
@@ -1683,7 +1682,7 @@ static int replace_user_table(THD *thd, TABLE *table, const LEX_USER &combo,
key_copy(user_key, table->record[0], table->key_info,
table->key_info->key_length);
- table->file->extra(HA_EXTRA_RETRIEVE_ALL_COLS);
+ table->file->ha_retrieve_all_cols();
if (table->file->index_read_idx(table->record[0], 0,
user_key, table->key_info->key_length,
HA_READ_KEY_EXACT))
@@ -1816,7 +1815,7 @@ static int replace_user_table(THD *thd, TABLE *table, const LEX_USER &combo,
We should NEVER delete from the user table, as a uses can still
use mysqld even if he doesn't have any privileges in the user table!
*/
- table->file->extra(HA_EXTRA_RETRIEVE_ALL_COLS);
+ table->file->ha_retrieve_all_cols();
if (cmp_record(table,record[1]) &&
(error=table->file->update_row(table->record[1],table->record[0])))
{ // This should never happen
@@ -1898,7 +1897,7 @@ static int replace_db_table(TABLE *table, const char *db,
key_copy(user_key, table->record[0], table->key_info,
table->key_info->key_length);
- table->file->extra(HA_EXTRA_RETRIEVE_ALL_COLS);
+ table->file->ha_retrieve_all_cols();
if (table->file->index_read_idx(table->record[0],0,
user_key, table->key_info->key_length,
HA_READ_KEY_EXACT))
@@ -1934,7 +1933,7 @@ static int replace_db_table(TABLE *table, const char *db,
/* update old existing row */
if (rights)
{
- table->file->extra(HA_EXTRA_RETRIEVE_ALL_COLS);
+ table->file->ha_retrieve_all_cols();
if ((error=table->file->update_row(table->record[1],table->record[0])))
goto table_error; /* purecov: deadcode */
}
@@ -2113,7 +2112,7 @@ GRANT_TABLE::GRANT_TABLE(TABLE *form, TABLE *col_privs)
key_copy(key, col_privs->record[0], col_privs->key_info, key_prefix_len);
col_privs->field[4]->store("",0, &my_charset_latin1);
- col_privs->file->ha_index_init(0);
+ col_privs->file->ha_index_init(0, 1);
if (col_privs->file->index_read(col_privs->record[0],
(byte*) key,
key_prefix_len, HA_READ_KEY_EXACT))
@@ -2258,7 +2257,7 @@ static int replace_column_table(GRANT_TABLE *g_t,
List_iterator <LEX_COLUMN> iter(columns);
class LEX_COLUMN *column;
- table->file->ha_index_init(0);
+ table->file->ha_index_init(0, 1);
while ((column= iter++))
{
ulong privileges= column->rights;
@@ -2273,7 +2272,7 @@ static int replace_column_table(GRANT_TABLE *g_t,
key_copy(user_key, table->record[0], table->key_info,
table->key_info->key_length);
- table->file->extra(HA_EXTRA_RETRIEVE_ALL_COLS);
+ table->file->ha_retrieve_all_cols();
if (table->file->index_read(table->record[0], user_key,
table->key_info->key_length,
HA_READ_KEY_EXACT))
@@ -2351,7 +2350,7 @@ static int replace_column_table(GRANT_TABLE *g_t,
key_copy(user_key, table->record[0], table->key_info,
key_prefix_length);
- table->file->extra(HA_EXTRA_RETRIEVE_ALL_COLS);
+ table->file->ha_retrieve_all_cols();
if (table->file->index_read(table->record[0], user_key,
key_prefix_length,
HA_READ_KEY_EXACT))
@@ -2449,7 +2448,7 @@ static int replace_table_table(THD *thd, GRANT_TABLE *grant_table,
key_copy(user_key, table->record[0], table->key_info,
table->key_info->key_length);
- table->file->extra(HA_EXTRA_RETRIEVE_ALL_COLS);
+ table->file->ha_retrieve_all_cols();
if (table->file->index_read_idx(table->record[0], 0,
user_key, table->key_info->key_length,
HA_READ_KEY_EXACT))
@@ -2766,14 +2765,15 @@ bool mysql_table_grant(THD *thd, TABLE_LIST *table_list,
GRANT and REVOKE are applied the slave in/exclusion rules as they are
some kind of updates to the mysql.% tables.
*/
- if (thd->slave_thread && table_rules_on)
+ if (thd->slave_thread && rpl_filter->is_on())
+ if (thd->slave_thread && rpl_filter->is_on())
{
/*
The tables must be marked "updating" so that tables_ok() takes them into
account in tests.
*/
tables[0].updating= tables[1].updating= tables[2].updating= 1;
- if (!tables_ok(thd, tables))
+ if (!(thd->spcont || rpl_filter->tables_ok(0, tables)))
DBUG_RETURN(FALSE);
}
#endif
@@ -2973,14 +2973,14 @@ bool mysql_routine_grant(THD *thd, TABLE_LIST *table_list, bool is_proc,
GRANT and REVOKE are applied the slave in/exclusion rules as they are
some kind of updates to the mysql.% tables.
*/
- if (thd->slave_thread && table_rules_on)
+ if (thd->slave_thread && rpl_filter->is_on())
{
/*
The tables must be marked "updating" so that tables_ok() takes them into
account in tests.
*/
tables[0].updating= tables[1].updating= 1;
- if (!tables_ok(thd, tables))
+ if (!(thd->spcont || rpl_filter->tables_ok(0, tables)))
DBUG_RETURN(FALSE);
}
#endif
@@ -3104,14 +3104,14 @@ bool mysql_grant(THD *thd, const char *db, List <LEX_USER> &list,
GRANT and REVOKE are applied the slave in/exclusion rules as they are
some kind of updates to the mysql.% tables.
*/
- if (thd->slave_thread && table_rules_on)
+ if (thd->slave_thread && rpl_filter->is_on())
{
/*
The tables must be marked "updating" so that tables_ok() takes them into
account in tests.
*/
tables[0].updating= tables[1].updating= 1;
- if (!tables_ok(thd, tables))
+ if (!(thd->spcont || rpl_filter->tables_ok(0, tables)))
DBUG_RETURN(FALSE);
}
#endif
@@ -3254,8 +3254,8 @@ static my_bool grant_load(TABLE_LIST *tables)
t_table = tables[0].table; c_table = tables[1].table;
p_table= tables[2].table;
- t_table->file->ha_index_init(0);
- p_table->file->ha_index_init(0);
+ t_table->file->ha_index_init(0, 1);
+ p_table->file->ha_index_init(0, 1);
if (!t_table->file->index_first(t_table->record[0]))
{
memex_ptr= &memex;
@@ -4425,7 +4425,7 @@ int open_grant_tables(THD *thd, TABLE_LIST *tables)
GRANT and REVOKE are applied the slave in/exclusion rules as they are
some kind of updates to the mysql.% tables.
*/
- if (thd->slave_thread && table_rules_on)
+ if (thd->slave_thread && rpl_filter->is_on())
{
/*
The tables must be marked "updating" so that tables_ok() takes them into
@@ -4433,7 +4433,7 @@ int open_grant_tables(THD *thd, TABLE_LIST *tables)
*/
tables[0].updating=tables[1].updating=tables[2].updating=
tables[3].updating=tables[4].updating=1;
- if (!tables_ok(thd, tables))
+ if (!(thd->spcont || rpl_filter->tables_ok(0, tables)))
DBUG_RETURN(1);
tables[0].updating=tables[1].updating=tables[2].updating=
tables[3].updating=tables[4].updating=0;;
@@ -4595,7 +4595,7 @@ static int handle_grant_table(TABLE_LIST *tables, uint table_no, bool drop,
user_key, key_prefix_length,
HA_READ_KEY_EXACT)))
{
- if (error != HA_ERR_KEY_NOT_FOUND)
+ if (error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE)
{
table->file->print_error(error, MYF(0));
result= -1;
diff --git a/sql/sql_acl.h b/sql/sql_acl.h
index 6e6f33e68c2..50aa35e8cc7 100644
--- a/sql/sql_acl.h
+++ b/sql/sql_acl.h
@@ -14,6 +14,8 @@
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+#include "slave.h" // for tables_ok(), rpl_filter
+
#define SELECT_ACL (1L << 0)
#define INSERT_ACL (1L << 1)
#define UPDATE_ACL (1L << 2)
@@ -50,7 +52,6 @@
*/
#define EXTRA_ACL (1L << 29)
#define NO_ACCESS (1L << 30)
-
#define DB_ACLS \
(UPDATE_ACL | SELECT_ACL | INSERT_ACL | DELETE_ACL | CREATE_ACL | DROP_ACL | \
GRANT_ACL | REFERENCES_ACL | INDEX_ACL | ALTER_ACL | CREATE_TMP_ACL | \
diff --git a/sql/sql_base.cc b/sql/sql_base.cc
index 766840e667c..105cd6a6670 100644
--- a/sql/sql_base.cc
+++ b/sql/sql_base.cc
@@ -565,7 +565,7 @@ bool close_thread_table(THD *thd, TABLE **table_ptr)
else
{
// Free memory and reset for next loop
- table->file->reset();
+ table->file->ha_reset();
}
table->in_use=0;
if (unused_tables)
@@ -2566,15 +2566,20 @@ static void update_field_dependencies(THD *thd, Field *field, TABLE *table)
{
if (thd->set_query_id)
{
+ table->file->ha_set_bit_in_rw_set(field->fieldnr,
+ (bool)(thd->set_query_id-1));
if (field->query_id != thd->query_id)
{
+ if (table->get_fields_in_item_tree)
+ field->flags|= GET_FIXED_FIELDS_FLAG;
field->query_id= thd->query_id;
table->used_fields++;
table->used_keys.intersect(field->part_of_key);
}
else
thd->dupp_field= field;
- }
+ } else if (table->get_fields_in_item_tree)
+ field->flags|= GET_FIXED_FIELDS_FLAG;
}
@@ -2977,6 +2982,42 @@ find_field_in_table_ref(THD *thd, TABLE_LIST *table_list,
/*
+ Find field in table, no side effects, only purpose is to check for field
+ in table object and get reference to the field if found.
+
+ SYNOPSIS
+ find_field_in_table_sef()
+
+ table table where to find
+ name Name of field searched for
+
+ RETURN
+ 0 field is not found
+ # pointer to field
+*/
+
+Field *find_field_in_table_sef(TABLE *table, const char *name)
+{
+ Field **field_ptr;
+ if (table->s->name_hash.records)
+ field_ptr= (Field**)hash_search(&table->s->name_hash,(byte*) name,
+ strlen(name));
+ else
+ {
+ if (!(field_ptr= table->field))
+ return (Field *)0;
+ for (; *field_ptr; ++field_ptr)
+ if (!my_strcasecmp(system_charset_info, (*field_ptr)->field_name, name))
+ break;
+ }
+ if (field_ptr)
+ return *field_ptr;
+ else
+ return (Field *)0;
+}
+
+
+/*
Find field in table list.
SYNOPSIS
@@ -3642,15 +3683,19 @@ mark_common_columns(THD *thd, TABLE_LIST *table_ref_1, TABLE_LIST *table_ref_2,
if (field_1)
{
+ TABLE *table_1= nj_col_1->table_ref->table;
/* Mark field_1 used for table cache. */
field_1->query_id= thd->query_id;
- nj_col_1->table_ref->table->used_keys.intersect(field_1->part_of_key);
+ table_1->file->ha_set_bit_in_read_set(field_1->fieldnr);
+ table_1->used_keys.intersect(field_1->part_of_key);
}
if (field_2)
{
+ TABLE *table_2= nj_col_2->table_ref->table;
/* Mark field_2 used for table cache. */
field_2->query_id= thd->query_id;
- nj_col_2->table_ref->table->used_keys.intersect(field_2->part_of_key);
+ table_2->file->ha_set_bit_in_read_set(field_2->fieldnr);
+ table_2->used_keys.intersect(field_2->part_of_key);
}
if (using_fields != NULL)
@@ -4130,11 +4175,11 @@ int setup_wild(THD *thd, TABLE_LIST *tables, List<Item> &fields,
****************************************************************************/
bool setup_fields(THD *thd, Item **ref_pointer_array,
- List<Item> &fields, bool set_query_id,
+ List<Item> &fields, ulong set_query_id,
List<Item> *sum_func_list, bool allow_sum_func)
{
reg2 Item *item;
- bool save_set_query_id= thd->set_query_id;
+ ulong save_set_query_id= thd->set_query_id;
List_iterator<Item> it(fields);
DBUG_ENTER("setup_fields");
@@ -4510,6 +4555,7 @@ insert_fields(THD *thd, Name_resolution_context *context, const char *db_name,
if (field->query_id == thd->query_id)
thd->dupp_field= field;
field->query_id= thd->query_id;
+ field->table->file->ha_set_bit_in_read_set(field->fieldnr);
if (table)
table->used_keys.intersect(field->part_of_key);
@@ -4550,7 +4596,10 @@ insert_fields(THD *thd, Name_resolution_context *context, const char *db_name,
For NATURAL joins, used_tables is updated in the IF above.
*/
if (table)
+ {
table->used_fields= table->s->fields;
+ table->file->ha_set_all_bits_in_read_set();
+ }
}
if (found)
DBUG_RETURN(FALSE);
diff --git a/sql/sql_bitmap.h b/sql/sql_bitmap.h
index 0f5b6dcd35e..35c501ede56 100644
--- a/sql/sql_bitmap.h
+++ b/sql/sql_bitmap.h
@@ -25,7 +25,7 @@
template <uint default_width> class Bitmap
{
MY_BITMAP map;
- uchar buffer[(default_width+7)/8];
+ uint32 buffer[(default_width+31)/32];
public:
Bitmap() { init(); }
Bitmap(const Bitmap& from) { *this=from; }
@@ -48,14 +48,14 @@ public:
void intersect(ulonglong map2buff)
{
MY_BITMAP map2;
- bitmap_init(&map2, (uchar *)&map2buff, sizeof(ulonglong)*8, 0);
+ bitmap_init(&map2, (uint32 *)&map2buff, sizeof(ulonglong)*8, 0);
bitmap_intersect(&map, &map2);
}
/* Use highest bit for all bits above sizeof(ulonglong)*8. */
void intersect_extended(ulonglong map2buff)
{
intersect(map2buff);
- if (map.bitmap_size > sizeof(ulonglong))
+ if (map.n_bits > sizeof(ulonglong) * 8)
bitmap_set_above(&map, sizeof(ulonglong),
test(map2buff & (LL(1) << (sizeof(ulonglong) * 8 - 1))));
}
@@ -70,7 +70,7 @@ public:
char *print(char *buf) const
{
char *s=buf;
- const uchar *e=buffer, *b=e+sizeof(buffer)-1;
+ const uchar *e=(uchar *)buffer, *b=e+sizeof(buffer)-1;
while (!*b && b>e)
b--;
if ((*s=_dig_vec_upper[*b >> 4]) != '0')
diff --git a/sql/sql_cache.cc b/sql/sql_cache.cc
index 04663c5b096..492d9b99d88 100644
--- a/sql/sql_cache.cc
+++ b/sql/sql_cache.cc
@@ -303,7 +303,7 @@ TODO list:
#ifndef MASTER
#include "../srclib/myisammrg/myrg_def.h"
#else
-#include "../myisammrg/myrg_def.h"
+#include "../storage/myisammrg/myrg_def.h"
#endif
#ifdef EMBEDDED_LIBRARY
diff --git a/sql/sql_class.h b/sql/sql_class.h
index a0c61944c6a..5ce2f7d8847 100644
--- a/sql/sql_class.h
+++ b/sql/sql_class.h
@@ -465,19 +465,20 @@ public:
class i_string: public ilink
{
public:
- char* ptr;
+ const char* ptr;
i_string():ptr(0) { }
- i_string(char* s) : ptr(s) {}
+ i_string(const char* s) : ptr(s) {}
};
/* needed for linked list of two strings for replicate-rewrite-db */
class i_string_pair: public ilink
{
public:
- char* key;
- char* val;
+ const char* key;
+ const char* val;
i_string_pair():key(0),val(0) { }
- i_string_pair(char* key_arg, char* val_arg) : key(key_arg),val(val_arg) {}
+ i_string_pair(const char* key_arg, const char* val_arg) :
+ key(key_arg),val(val_arg) {}
};
@@ -565,6 +566,7 @@ struct system_variables
my_bool ndb_use_exact_count;
my_bool ndb_use_transactions;
#endif /* HAVE_NDBCLUSTER_DB */
+ my_bool old_alter_table;
my_bool old_passwords;
/* Only charset part of these variables is sensible */
@@ -774,8 +776,15 @@ public:
/*
- if set_query_id=1, we set field->query_id for all fields. In that case
field list can not contain duplicates.
+ 0: Means query_id is not set and no indicator to handler of fields used
+ is set
+ 1: Means query_id is set for fields in list and bit in read set is set
+ to inform handler of that field is to be read
+ 2: Means query is set for fields in list and bit is set in update set
+ to inform handler that it needs to update this field in write_row
+ and update_row
*/
- bool set_query_id;
+ ulong set_query_id;
/*
This variable is used in post-parse stage to declare that sum-functions,
or functions which have sense only if GROUP BY is present, are allowed.
diff --git a/sql/sql_delete.cc b/sql/sql_delete.cc
index 7fb9f9eccdd..00d82bcdfda 100644
--- a/sql/sql_delete.cc
+++ b/sql/sql_delete.cc
@@ -30,7 +30,8 @@ bool mysql_delete(THD *thd, TABLE_LIST *table_list, COND *conds,
SQL_LIST *order, ha_rows limit, ulonglong options,
bool reset_auto_increment)
{
- int error;
+ bool will_batch;
+ int error, loc_error;
TABLE *table;
SQL_SELECT *select=0;
READ_RECORD info;
@@ -174,6 +175,7 @@ bool mysql_delete(THD *thd, TABLE_LIST *table_list, COND *conds,
deleted=0L;
init_ftfuncs(thd, select_lex, 1);
thd->proc_info="updating";
+ will_batch= !table->file->start_bulk_delete();
while (!(error=info.read_record(&info)) && !thd->killed &&
!thd->net.report_error)
{
@@ -189,7 +191,7 @@ bool mysql_delete(THD *thd, TABLE_LIST *table_list, COND *conds,
break;
}
- if (!(error=table->file->delete_row(table->record[0])))
+ if (!(error= table->file->delete_row(table->record[0])))
{
deleted++;
if (table->triggers &&
@@ -225,7 +227,13 @@ bool mysql_delete(THD *thd, TABLE_LIST *table_list, COND *conds,
}
if (thd->killed && !error)
error= 1; // Aborted
- thd->proc_info="end";
+ if (will_batch && (loc_error= table->file->end_bulk_delete()))
+ {
+ if (error != 1)
+ table->file->print_error(loc_error,MYF(0));
+ error=1;
+ }
+ thd->proc_info= "end";
end_read_record(&info);
free_io_cache(table); // Will not do any harm
if (options & OPTION_QUICK)
@@ -643,7 +651,8 @@ void multi_delete::send_error(uint errcode,const char *err)
int multi_delete::do_deletes()
{
- int local_error= 0, counter= 0;
+ int local_error= 0, counter= 0, error;
+ bool will_batch;
DBUG_ENTER("do_deletes");
DBUG_ASSERT(do_delete);
@@ -671,6 +680,7 @@ int multi_delete::do_deletes()
been deleted by foreign key handling
*/
info.ignore_not_found_rows= 1;
+ will_batch= !table->file->start_bulk_delete();
while (!(local_error=info.read_record(&info)) && !thd->killed)
{
if (table->triggers &&
@@ -694,6 +704,14 @@ int multi_delete::do_deletes()
break;
}
}
+ if (will_batch && (error= table->file->end_bulk_delete()))
+ {
+ if (!local_error)
+ {
+ local_error= error;
+ table->file->print_error(local_error,MYF(0));
+ }
+ }
end_read_record(&info);
if (thd->killed && !local_error)
local_error= 1;
diff --git a/sql/sql_handler.cc b/sql/sql_handler.cc
index 169132e2185..fc7d7ac0012 100644
--- a/sql/sql_handler.cc
+++ b/sql/sql_handler.cc
@@ -461,7 +461,7 @@ bool mysql_ha_read(THD *thd, TABLE_LIST *tables,
if (keyname)
{
table->file->ha_index_or_rnd_end();
- table->file->ha_index_init(keyno);
+ table->file->ha_index_init(keyno, 1);
error= table->file->index_first(table->record[0]);
}
else
@@ -483,7 +483,7 @@ bool mysql_ha_read(THD *thd, TABLE_LIST *tables,
case RLAST:
DBUG_ASSERT(keyname != 0);
table->file->ha_index_or_rnd_end();
- table->file->ha_index_init(keyno);
+ table->file->ha_index_init(keyno, 1);
error= table->file->index_last(table->record[0]);
mode=RPREV;
break;
@@ -522,7 +522,7 @@ bool mysql_ha_read(THD *thd, TABLE_LIST *tables,
if (!(key= (byte*) thd->calloc(ALIGN_SIZE(key_len))))
goto err;
table->file->ha_index_or_rnd_end();
- table->file->ha_index_init(keyno);
+ table->file->ha_index_init(keyno, 1);
key_copy(key, table->record[0], table->key_info + keyno, key_len);
error= table->file->index_read(table->record[0],
key,key_len,ha_rkey_mode);
diff --git a/sql/sql_help.cc b/sql/sql_help.cc
index 799758f7d1e..e3a2602713f 100644
--- a/sql/sql_help.cc
+++ b/sql/sql_help.cc
@@ -286,8 +286,8 @@ int get_topics_for_keyword(THD *thd, TABLE *topics, TABLE *relations,
rtopic_id= find_fields[help_relation_help_topic_id].field;
rkey_id= find_fields[help_relation_help_keyword_id].field;
- topics->file->ha_index_init(iindex_topic);
- relations->file->ha_index_init(iindex_relations);
+ topics->file->ha_index_init(iindex_topic,1);
+ relations->file->ha_index_init(iindex_relations,1);
rkey_id->store((longlong) key_id);
rkey_id->get_key_image(buff, rkey_id->pack_length(), Field::itRAW);
diff --git a/sql/sql_insert.cc b/sql/sql_insert.cc
index 9421cc4bb6b..77741d0ed3c 100644
--- a/sql/sql_insert.cc
+++ b/sql/sql_insert.cc
@@ -103,6 +103,11 @@ static int check_insert_fields(THD *thd, TABLE_LIST *table_list,
#endif
clear_timestamp_auto_bits(table->timestamp_field_type,
TIMESTAMP_AUTO_SET_ON_INSERT);
+ /*
+ No fields are provided so all fields must be provided in the values.
+ Thus we set all bits in the write set.
+ */
+ table->file->ha_set_all_bits_in_write_set();
}
else
{ // Part field list
@@ -140,7 +145,11 @@ static int check_insert_fields(THD *thd, TABLE_LIST *table_list,
*/
table_list->next_local= 0;
context->resolve_in_table_list_only(table_list);
- res= setup_fields(thd, 0, fields, 1, 0, 0);
+ /*
+ Indicate fields in list is to be updated by setting set_query_id
+ parameter to 2. This sets the bit in the write_set for each field.
+ */
+ res= setup_fields(thd, 0, fields, 2, 0, 0);
/* Restore the current context. */
table_list->next_local= save_next_local;
@@ -239,9 +248,10 @@ static int check_update_fields(THD *thd, TABLE_LIST *insert_table_list,
/*
Check the fields we are going to modify. This will set the query_id
- of all used fields to the threads query_id.
+ of all used fields to the threads query_id. It will also set all
+ fields into the write set of this table.
*/
- if (setup_fields(thd, 0, update_fields, 1, 0, 0))
+ if (setup_fields(thd, 0, update_fields, 2, 0, 0))
return -1;
if (table->timestamp_field)
@@ -251,7 +261,10 @@ static int check_update_fields(THD *thd, TABLE_LIST *insert_table_list,
clear_timestamp_auto_bits(table->timestamp_field_type,
TIMESTAMP_AUTO_SET_ON_UPDATE);
else
+ {
table->timestamp_field->query_id= timestamp_query_id;
+ table->file->ha_set_bit_in_write_set(table->timestamp_field->fieldnr);
+ }
}
return 0;
@@ -687,7 +700,7 @@ static bool check_view_insertability(THD * thd, TABLE_LIST *view)
Field_translator *trans;
Field **field_ptr= table->field;
uint used_fields_buff_size= (table->s->fields + 7) / 8;
- uchar *used_fields_buff= (uchar*)thd->alloc(used_fields_buff_size);
+ uint32 *used_fields_buff= (uint32*)thd->alloc(used_fields_buff_size);
MY_BITMAP used_fields;
DBUG_ENTER("check_key_in_view");
@@ -927,7 +940,7 @@ bool mysql_prepare_insert(THD *thd, TABLE_LIST *table_list,
select_lex->first_execution= 0;
}
if (duplic == DUP_UPDATE || duplic == DUP_REPLACE)
- table->file->extra(HA_EXTRA_RETRIEVE_PRIMARY_KEY);
+ table->file->ha_retrieve_all_pk();
DBUG_RETURN(FALSE);
}
@@ -2300,7 +2313,7 @@ select_insert::~select_insert()
if (table)
{
table->next_number_field=0;
- table->file->reset();
+ table->file->ha_reset();
}
thd->count_cuted_fields= CHECK_FIELD_IGNORE;
thd->abort_on_warning= 0;
diff --git a/sql/sql_lex.cc b/sql/sql_lex.cc
index e579ee9f8bd..3f1e237b653 100644
--- a/sql/sql_lex.cc
+++ b/sql/sql_lex.cc
@@ -159,6 +159,7 @@ void lex_start(THD *thd, uchar *buf,uint length)
lex->yylineno = 1;
lex->in_comment=0;
lex->length=0;
+ lex->part_info= 0;
lex->select_lex.in_sum_expr=0;
lex->select_lex.expr_list.empty();
lex->select_lex.ftfunc_list_alloc.empty();
diff --git a/sql/sql_lex.h b/sql/sql_lex.h
index 6c91045189c..e5567d0d7c4 100644
--- a/sql/sql_lex.h
+++ b/sql/sql_lex.h
@@ -25,6 +25,7 @@ class sp_head;
class sp_name;
class sp_instr;
class sp_pcontext;
+class partition_info;
/*
The following hack is needed because mysql_yacc.cc does not define
@@ -655,6 +656,11 @@ typedef class st_select_lex SELECT_LEX;
#define ALTER_KEYS_ONOFF 512
#define ALTER_CONVERT 1024
#define ALTER_FORCE 2048
+#define ALTER_RECREATE 4096
+#define ALTER_ADD_PARTITION 8192
+#define ALTER_DROP_PARTITION 16384
+#define ALTER_COALESCE_PARTITION 32768
+#define ALTER_REORGANISE_PARTITION 65536
typedef struct st_alter_info
{
@@ -663,9 +669,17 @@ typedef struct st_alter_info
uint flags;
enum enum_enable_or_disable keys_onoff;
enum tablespace_op_type tablespace_op;
+ List<char> partition_names;
+ uint no_parts;
st_alter_info(){clear();}
- void clear(){keys_onoff= LEAVE_AS_IS;tablespace_op= NO_TABLESPACE_OP;}
+ void clear()
+ {
+ keys_onoff= LEAVE_AS_IS;
+ tablespace_op= NO_TABLESPACE_OP;
+ no_parts= 0;
+ partition_names.empty();
+ }
void reset(){drop_list.empty();alter_list.empty();clear();}
} ALTER_INFO;
@@ -733,6 +747,8 @@ typedef struct st_lex
TABLE_LIST **query_tables_last;
/* store original leaf_tables for INSERT SELECT and PS/SP */
TABLE_LIST *leaf_tables_insert;
+ /* Partition info structure filled in by PARTITION BY parse part */
+ partition_info *part_info;
List<key_part_spec> col_list;
List<key_part_spec> ref_list;
diff --git a/sql/sql_load.cc b/sql/sql_load.cc
index e1684f9bb11..8eec970b4df 100644
--- a/sql/sql_load.cc
+++ b/sql/sql_load.cc
@@ -172,7 +172,7 @@ bool mysql_load(THD *thd,sql_exchange *ex,TABLE_LIST *table_list,
The main thing to fix to remove this restriction is to ensure that the
table is marked to be 'used for insert' in which case we should never
- mark this table as as 'const table' (ie, one that has only one row).
+ mark this table as 'const table' (ie, one that has only one row).
*/
if (unique_table(table_list, table_list->next_global))
{
@@ -188,6 +188,10 @@ bool mysql_load(THD *thd,sql_exchange *ex,TABLE_LIST *table_list,
Field **field;
for (field=table->field; *field ; field++)
fields_vars.push_back(new Item_field(*field));
+ /*
+ Since all fields are set we set all bits in the write set
+ */
+ table->file->ha_set_all_bits_in_write_set();
table->timestamp_field_type= TIMESTAMP_NO_AUTO_SET;
/*
Let us also prepare SET clause, altough it is probably empty
@@ -200,8 +204,15 @@ bool mysql_load(THD *thd,sql_exchange *ex,TABLE_LIST *table_list,
else
{ // Part field list
/* TODO: use this conds for 'WITH CHECK OPTIONS' */
- if (setup_fields(thd, 0, fields_vars, 1, 0, 0) ||
- setup_fields(thd, 0, set_fields, 1, 0, 0) ||
+ /*
+ Indicate that both variables in field list and fields in update_list
+ is to be included in write set of table. We do however set all bits
+ in write set anyways since it is not allowed to specify NULLs in
+ LOAD DATA
+ */
+ table->file->ha_set_all_bits_in_write_set();
+ if (setup_fields(thd, 0, fields_vars, 2, 0, 0) ||
+ setup_fields(thd, 0, set_fields, 2, 0, 0) ||
check_that_all_fields_are_given_values(thd, table, table_list))
DBUG_RETURN(TRUE);
/*
diff --git a/sql/sql_parse.cc b/sql/sql_parse.cc
index 514b1acd2b4..f4cab56e97c 100644
--- a/sql/sql_parse.cc
+++ b/sql/sql_parse.cc
@@ -16,6 +16,7 @@
#include "mysql_priv.h"
#include "sql_repl.h"
+#include "rpl_filter.h"
#include "repl_failsafe.h"
#include <m_ctype.h>
#include <myisam.h>
@@ -179,10 +180,13 @@ static bool begin_trans(THD *thd)
*/
inline bool all_tables_not_ok(THD *thd, TABLE_LIST *tables)
{
- return (table_rules_on && tables && !tables_ok(thd,tables) &&
+ return (rpl_filter->is_on() && tables &&
+ !(thd->spcont || rpl_filter->tables_ok(thd->db, tables)) &&
((thd->lex->sql_command != SQLCOM_DELETE_MULTI) ||
- !tables_ok(thd,
- (TABLE_LIST *)thd->lex->auxilliary_table_list.first)));
+ !(thd->spcont ||
+ rpl_filter->tables_ok(thd->db,
+ (TABLE_LIST *)
+ thd->lex->auxilliary_table_list.first))));
}
#endif
@@ -3511,9 +3515,9 @@ end_with_restore_list:
above was not called. So we have to check rules again here.
*/
#ifdef HAVE_REPLICATION
- if (thd->slave_thread &&
- (!db_ok(lex->name, replicate_do_db, replicate_ignore_db) ||
- !db_ok_with_wild_table(lex->name)))
+ if (thd->slave_thread &&
+ (!rpl_filter->db_ok(lex->name) ||
+ !rpl_filter->db_ok_with_wild_table(lex->name)))
{
my_message(ER_SLAVE_IGNORED_TABLE, ER(ER_SLAVE_IGNORED_TABLE), MYF(0));
break;
@@ -3546,8 +3550,8 @@ end_with_restore_list:
*/
#ifdef HAVE_REPLICATION
if (thd->slave_thread &&
- (!db_ok(lex->name, replicate_do_db, replicate_ignore_db) ||
- !db_ok_with_wild_table(lex->name)))
+ (!rpl_filter->db_ok(lex->name) ||
+ !rpl_filter->db_ok_with_wild_table(lex->name)))
{
my_message(ER_SLAVE_IGNORED_TABLE, ER(ER_SLAVE_IGNORED_TABLE), MYF(0));
break;
@@ -3586,8 +3590,8 @@ end_with_restore_list:
*/
#ifdef HAVE_REPLICATION
if (thd->slave_thread &&
- (!db_ok(db, replicate_do_db, replicate_ignore_db) ||
- !db_ok_with_wild_table(db)))
+ (!rpl_filter->db_ok(lex->name) ||
+ !rpl_filter->db_ok_with_wild_table(lex->name)))
{
my_message(ER_SLAVE_IGNORED_TABLE, ER(ER_SLAVE_IGNORED_TABLE), MYF(0));
break;
diff --git a/sql/sql_partition.cc b/sql/sql_partition.cc
new file mode 100644
index 00000000000..1f24806dc5e
--- /dev/null
+++ b/sql/sql_partition.cc
@@ -0,0 +1,3219 @@
+/* Copyright (C) 2005 MySQL AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/*
+ This file was introduced as a container for general functionality related
+ to partitioning introduced in MySQL version 5.1. It contains functionality
+ used by all handlers that support partitioning, which in the first version
+ is the partitioning handler itself and the NDB handler.
+
+ The first version was written by Mikael Ronström.
+
+ This version supports RANGE partitioning, LIST partitioning, HASH
+ partitioning and composite partitioning (hereafter called subpartitioning)
+ where each RANGE/LIST partitioning is HASH partitioned. The hash function
+ can either be supplied by the user or by only a list of fields (also
+ called KEY partitioning, where the MySQL server will use an internal
+ hash function.
+ There are quite a few defaults that can be used as well.
+*/
+
+/* Some general useful functions */
+
+#include "mysql_priv.h"
+#include <errno.h>
+#include <m_ctype.h>
+#include "md5.h"
+
+#ifdef HAVE_PARTITION_DB
+/*
+ Partition related functions declarations and some static constants;
+*/
+static char *hash_str= "HASH";
+static char *range_str= "RANGE";
+static char *list_str= "LIST";
+static char *part_str= "PARTITION";
+static char *sub_str= "SUB";
+static char *by_str= "BY";
+static char *key_str= "KEY";
+static char *space_str= " ";
+static char *equal_str= "=";
+static char *end_paren_str= ")";
+static char *begin_paren_str= "(";
+static char *comma_str= ",";
+static char buff[22];
+
+bool get_partition_id_list(partition_info *part_info,
+ uint32 *part_id);
+bool get_partition_id_range(partition_info *part_info,
+ uint32 *part_id);
+bool get_partition_id_hash_nosub(partition_info *part_info,
+ uint32 *part_id);
+bool get_partition_id_key_nosub(partition_info *part_info,
+ uint32 *part_id);
+bool get_partition_id_linear_hash_nosub(partition_info *part_info,
+ uint32 *part_id);
+bool get_partition_id_linear_key_nosub(partition_info *part_info,
+ uint32 *part_id);
+bool get_partition_id_range_sub_hash(partition_info *part_info,
+ uint32 *part_id);
+bool get_partition_id_range_sub_key(partition_info *part_info,
+ uint32 *part_id);
+bool get_partition_id_range_sub_linear_hash(partition_info *part_info,
+ uint32 *part_id);
+bool get_partition_id_range_sub_linear_key(partition_info *part_info,
+ uint32 *part_id);
+bool get_partition_id_list_sub_hash(partition_info *part_info,
+ uint32 *part_id);
+bool get_partition_id_list_sub_key(partition_info *part_info,
+ uint32 *part_id);
+bool get_partition_id_list_sub_linear_hash(partition_info *part_info,
+ uint32 *part_id);
+bool get_partition_id_list_sub_linear_key(partition_info *part_info,
+ uint32 *part_id);
+uint32 get_partition_id_hash_sub(partition_info *part_info);
+uint32 get_partition_id_key_sub(partition_info *part_info);
+uint32 get_partition_id_linear_hash_sub(partition_info *part_info);
+uint32 get_partition_id_linear_key_sub(partition_info *part_info);
+#endif
+
+
+/*
+ A routine used by the parser to decide whether we are specifying a full
+ partitioning or if only partitions to add or to split.
+ SYNOPSIS
+ is_partition_management()
+ lex Reference to the lex object
+ RETURN VALUE
+ TRUE Yes, it is part of a management partition command
+ FALSE No, not a management partition command
+ DESCRIPTION
+ This needs to be outside of HAVE_PARTITION_DB since it is used from the
+ sql parser that doesn't have any #ifdef's
+*/
+
+my_bool is_partition_management(LEX *lex)
+{
+ return (lex->sql_command == SQLCOM_ALTER_TABLE &&
+ (lex->alter_info.flags == ALTER_ADD_PARTITION ||
+ lex->alter_info.flags == ALTER_REORGANISE_PARTITION));
+}
+
+#ifdef HAVE_PARTITION_DB
+/*
+ A support function to check if a partition name is in a list of strings
+ SYNOPSIS
+ is_partition_in_list()
+ part_name String searched for
+ list_part_names A list of names searched in
+ RETURN VALUES
+ TRUE String found
+ FALSE String not found
+*/
+
+bool is_partition_in_list(char *part_name,
+ List<char> list_part_names)
+{
+ List_iterator<char> part_names_it(list_part_names);
+ uint no_names= list_part_names.elements;
+ uint i= 0;
+ do
+ {
+ char *list_name= part_names_it++;
+ if (!(my_strcasecmp(system_charset_info, part_name, list_name)))
+ return TRUE;
+ } while (++i < no_names);
+ return FALSE;
+}
+
+
+/*
+ A support function to check partition names for duplication in a
+ partitioned table
+ SYNOPSIS
+ is_partitions_in_table()
+ new_part_info New partition info
+ old_part_info Old partition info
+ RETURN VALUES
+ TRUE Duplicate names found
+ FALSE Duplicate names not found
+ DESCRIPTION
+ Can handle that the new and old parts are the same in which case it
+ checks that the list of names in the partitions doesn't contain any
+ duplicated names.
+*/
+
+bool is_partitions_in_table(partition_info *new_part_info,
+ partition_info *old_part_info)
+{
+ uint no_new_parts= new_part_info->partitions.elements, new_count;
+ uint no_old_parts= old_part_info->partitions.elements, old_count;
+ List_iterator<partition_element> new_parts_it(new_part_info->partitions);
+ bool same_part_info= (new_part_info == old_part_info);
+ DBUG_ENTER("is_partitions_in_table");
+
+ new_count= 0;
+ do
+ {
+ List_iterator<partition_element> old_parts_it(old_part_info->partitions);
+ char *new_name= (new_parts_it++)->partition_name;
+ new_count++;
+ old_count= 0;
+ do
+ {
+ char *old_name= (old_parts_it++)->partition_name;
+ old_count++;
+ if (same_part_info && old_count == new_count)
+ break;
+ if (!(my_strcasecmp(system_charset_info, old_name, new_name)))
+ {
+ DBUG_RETURN(TRUE);
+ }
+ } while (old_count < no_old_parts);
+ } while (new_count < no_new_parts);
+ DBUG_RETURN(FALSE);
+}
+
+
+/*
+ A useful routine used by update_row for partition handlers to calculate
+ the partition ids of the old and the new record.
+ SYNOPSIS
+ get_part_for_update()
+ old_data Buffer of old record
+ new_data Buffer of new record
+ rec0 Reference to table->record[0]
+ part_info Reference to partition information
+ part_field_array A NULL-terminated array of fields for partition
+ function
+ old_part_id The returned partition id of old record
+ new_part_id The returned partition id of new record
+ RETURN VALUE
+ 0 Success
+ > 0 Error code
+ DESCRIPTION
+ Dependent on whether buf is not record[0] we need to prepare the
+ fields. Then we call the function pointer get_partition_id to
+ calculate the partition ids.
+*/
+
+int get_parts_for_update(const byte *old_data, byte *new_data,
+ const byte *rec0, partition_info *part_info,
+ uint32 *old_part_id, uint32 *new_part_id)
+{
+ Field **part_field_array= part_info->full_part_field_array;
+ int error;
+ DBUG_ENTER("get_parts_for_update");
+ DBUG_ASSERT(new_data == rec0);
+
+ set_field_ptr(part_field_array, old_data, rec0);
+ error= part_info->get_partition_id(part_info, old_part_id);
+ set_field_ptr(part_field_array, rec0, old_data);
+ if (unlikely(error)) // Should never happen
+ {
+ DBUG_ASSERT(0);
+ DBUG_RETURN(error);
+ }
+#ifdef NOT_NEEDED
+ if (new_data == rec0)
+#endif
+ {
+ if (unlikely(error= part_info->get_partition_id(part_info,new_part_id)))
+ {
+ DBUG_RETURN(error);
+ }
+ }
+#ifdef NOT_NEEDED
+ else
+ {
+ /*
+ This branch should never execute but it is written anyways for
+ future use. It will be tested by ensuring that the above
+ condition is false in one test situation before pushing the code.
+ */
+ set_field_ptr(part_field_array, new_data, rec0);
+ error= part_info->get_partition_id(part_info, new_part_id);
+ set_field_ptr(part_field_array, rec0, new_data);
+ if (unlikely(error))
+ {
+ DBUG_RETURN(error);
+ }
+ }
+#endif
+ DBUG_RETURN(0);
+}
+
+
+/*
+ A useful routine used by delete_row for partition handlers to calculate
+ the partition id.
+ SYNOPSIS
+ get_part_for_delete()
+ buf Buffer of old record
+ rec0 Reference to table->record[0]
+ part_info Reference to partition information
+ part_field_array A NULL-terminated array of fields for partition
+ function
+ part_id The returned partition id to delete from
+ RETURN VALUE
+ 0 Success
+ > 0 Error code
+ DESCRIPTION
+ Dependent on whether buf is not record[0] we need to prepare the
+ fields. Then we call the function pointer get_partition_id to
+ calculate the partition id.
+*/
+
+int get_part_for_delete(const byte *buf, const byte *rec0,
+ partition_info *part_info, uint32 *part_id)
+{
+ int error;
+ DBUG_ENTER("get_part_for_delete");
+
+ if (likely(buf == rec0))
+ {
+ if (unlikely((error= part_info->get_partition_id(part_info, part_id))))
+ {
+ DBUG_RETURN(error);
+ }
+ DBUG_PRINT("info", ("Delete from partition %d", *part_id));
+ }
+ else
+ {
+ Field **part_field_array= part_info->full_part_field_array;
+ set_field_ptr(part_field_array, buf, rec0);
+ error= part_info->get_partition_id(part_info, part_id);
+ set_field_ptr(part_field_array, rec0, buf);
+ if (unlikely(error))
+ {
+ DBUG_RETURN(error);
+ }
+ DBUG_PRINT("info", ("Delete from partition %d (path2)", *part_id));
+ }
+ DBUG_RETURN(0);
+}
+
+
+/*
+ This routine allocates an array for all range constants to achieve a fast
+ check what partition a certain value belongs to. At the same time it does
+ also check that the range constants are defined in increasing order and
+ that the expressions are constant integer expressions.
+ SYNOPSIS
+ check_range_constants()
+ part_info
+ RETURN VALUE
+ TRUE An error occurred during creation of range constants
+ FALSE Successful creation of range constant mapping
+ DESCRIPTION
+ This routine is called from check_partition_info to get a quick error
+ before we came too far into the CREATE TABLE process. It is also called
+ from fix_partition_func every time we open the .frm file. It is only
+ called for RANGE PARTITIONed tables.
+*/
+
+static bool check_range_constants(partition_info *part_info)
+{
+ partition_element* part_def;
+ longlong current_largest_int= LONGLONG_MIN, part_range_value_int;
+ uint no_parts= part_info->no_parts, i;
+ List_iterator<partition_element> it(part_info->partitions);
+ bool result= TRUE;
+ DBUG_ENTER("check_range_constants");
+ DBUG_PRINT("enter", ("INT_RESULT with %d parts", no_parts));
+
+ part_info->part_result_type= INT_RESULT;
+ part_info->range_int_array=
+ (longlong*)sql_alloc(no_parts * sizeof(longlong));
+ if (unlikely(part_info->range_int_array == NULL))
+ {
+ my_error(ER_OUTOFMEMORY, MYF(0), no_parts*sizeof(longlong));
+ goto end;
+ }
+ i= 0;
+ do
+ {
+ part_def= it++;
+ if ((i != (no_parts - 1)) || !part_info->defined_max_value)
+ part_range_value_int= part_def->range_value;
+ else
+ part_range_value_int= LONGLONG_MAX;
+ if (likely(current_largest_int < part_range_value_int))
+ {
+ current_largest_int= part_range_value_int;
+ part_info->range_int_array[i]= part_range_value_int;
+ }
+ else
+ {
+ my_error(ER_RANGE_NOT_INCREASING_ERROR, MYF(0));
+ goto end;
+ }
+ } while (++i < no_parts);
+ result= FALSE;
+end:
+ DBUG_RETURN(result);
+}
+
+
+/*
+ A support routine for check_list_constants used by qsort to sort the
+ constant list expressions.
+ SYNOPSIS
+ list_part_cmp()
+ a First list constant to compare with
+ b Second list constant to compare with
+ RETURN VALUE
+ +1 a > b
+ 0 a == b
+ -1 a < b
+*/
+
+static int list_part_cmp(const void* a, const void* b)
+{
+ longlong a1, b1;
+ a1= ((LIST_PART_ENTRY*)a)->list_value;
+ b1= ((LIST_PART_ENTRY*)b)->list_value;
+ if (a1 < b1)
+ return -1;
+ else if (a1 > b1)
+ return +1;
+ else
+ return 0;
+}
+
+
+/*
+ This routine allocates an array for all list constants to achieve a fast
+ check what partition a certain value belongs to. At the same time it does
+ also check that there are no duplicates among the list constants and that
+ that the list expressions are constant integer expressions.
+ SYNOPSIS
+ check_list_constants()
+ part_info
+ RETURN VALUE
+ TRUE An error occurred during creation of list constants
+ FALSE Successful creation of list constant mapping
+ DESCRIPTION
+ This routine is called from check_partition_info to get a quick error
+ before we came too far into the CREATE TABLE process. It is also called
+ from fix_partition_func every time we open the .frm file. It is only
+ called for LIST PARTITIONed tables.
+*/
+
+static bool check_list_constants(partition_info *part_info)
+{
+ uint i, no_list_values= 0, no_parts, list_index= 0;
+ longlong *list_value;
+ bool not_first, result= TRUE;
+ longlong curr_value, prev_value;
+ partition_element* part_def;
+ List_iterator<partition_element> list_func_it(part_info->partitions);
+ DBUG_ENTER("check_list_constants");
+
+ part_info->part_result_type= INT_RESULT;
+
+ /*
+ We begin by calculating the number of list values that have been
+ defined in the first step.
+
+ We use this number to allocate a properly sized array of structs
+ to keep the partition id and the value to use in that partition.
+ In the second traversal we assign them values in the struct array.
+
+ Finally we sort the array of structs in order of values to enable
+ a quick binary search for the proper value to discover the
+ partition id.
+ After sorting the array we check that there are no duplicates in the
+ list.
+ */
+
+ no_parts= part_info->no_parts;
+ i= 0;
+ do
+ {
+ part_def= list_func_it++;
+ List_iterator<longlong> list_val_it1(part_def->list_val_list);
+ while (list_val_it1++)
+ no_list_values++;
+ } while (++i < no_parts);
+ list_func_it.rewind();
+ part_info->no_list_values= no_list_values;
+ part_info->list_array=
+ (LIST_PART_ENTRY*)sql_alloc(no_list_values*sizeof(LIST_PART_ENTRY));
+ if (unlikely(part_info->list_array == NULL))
+ {
+ my_error(ER_OUTOFMEMORY, MYF(0), no_list_values*sizeof(LIST_PART_ENTRY));
+ goto end;
+ }
+
+ i= 0;
+ do
+ {
+ part_def= list_func_it++;
+ List_iterator<longlong> list_val_it2(part_def->list_val_list);
+ while ((list_value= list_val_it2++))
+ {
+ part_info->list_array[list_index].list_value= *list_value;
+ part_info->list_array[list_index++].partition_id= i;
+ }
+ } while (++i < no_parts);
+
+ qsort((void*)part_info->list_array, no_list_values,
+ sizeof(LIST_PART_ENTRY), &list_part_cmp);
+
+ not_first= FALSE;
+ i= prev_value= 0; //prev_value initialised to quiet compiler
+ do
+ {
+ curr_value= part_info->list_array[i].list_value;
+ if (likely(!not_first || prev_value != curr_value))
+ {
+ prev_value= curr_value;
+ not_first= TRUE;
+ }
+ else
+ {
+ my_error(ER_MULTIPLE_DEF_CONST_IN_LIST_PART_ERROR, MYF(0));
+ goto end;
+ }
+ } while (++i < no_list_values);
+ result= FALSE;
+end:
+ DBUG_RETURN(result);
+}
+
+
+/*
+ Create a memory area where default partition names are stored and fill it
+ up with the names.
+ SYNOPSIS
+ create_default_partition_names()
+ no_parts Number of partitions
+ subpart Is it subpartitions
+ RETURN VALUE
+ A pointer to the memory area of the default partition names
+ DESCRIPTION
+ A support routine for the partition code where default values are
+ generated.
+ The external routine needing this code is check_partition_info
+*/
+
+#define MAX_PART_NAME_SIZE 8
+
+static char *create_default_partition_names(uint no_parts, uint start_no,
+ bool subpart)
+{
+ char *ptr= sql_calloc(no_parts*MAX_PART_NAME_SIZE);
+ char *move_ptr= ptr;
+ uint i= 0;
+ DBUG_ENTER("create_default_partition_names");
+ if (likely(ptr != 0))
+ {
+ do
+ {
+ if (subpart)
+ my_sprintf(move_ptr, (move_ptr,"sp%u", (start_no + i)));
+ else
+ my_sprintf(move_ptr, (move_ptr,"p%u", (start_no + i)));
+ move_ptr+=MAX_PART_NAME_SIZE;
+ } while (++i < no_parts);
+ }
+ else
+ {
+ my_error(ER_OUTOFMEMORY, MYF(0), no_parts*MAX_PART_NAME_SIZE);
+ }
+ DBUG_RETURN(ptr);
+}
+
+
+/*
+ Set up all the default partitions not set-up by the user in the SQL
+ statement. Also perform a number of checks that the user hasn't tried
+ to use default values where no defaults exists.
+ SYNOPSIS
+ set_up_default_partitions()
+ part_info The reference to all partition information
+ file A reference to a handler of the table
+ max_rows Maximum number of rows stored in the table
+ RETURN VALUE
+ TRUE Error, attempted default values not possible
+ FALSE Ok, default partitions set-up
+ DESCRIPTION
+ The routine uses the underlying handler of the partitioning to define
+ the default number of partitions. For some handlers this requires
+ knowledge of the maximum number of rows to be stored in the table.
+ This routine only accepts HASH and KEY partitioning and thus there is
+ no subpartitioning if this routine is successful.
+ The external routine needing this code is check_partition_info
+*/
+
+static bool set_up_default_partitions(partition_info *part_info,
+ handler *file, ulonglong max_rows,
+ uint start_no)
+{
+ uint no_parts, i;
+ char *default_name;
+ bool result= TRUE;
+ DBUG_ENTER("set_up_default_partitions");
+
+ if (part_info->part_type != HASH_PARTITION)
+ {
+ char *error_string;
+ if (part_info->part_type == RANGE_PARTITION)
+ error_string= range_str;
+ else
+ error_string= list_str;
+ my_error(ER_PARTITIONS_MUST_BE_DEFINED_ERROR, MYF(0), error_string);
+ goto end;
+ }
+ if (part_info->no_parts == 0)
+ part_info->no_parts= file->get_default_no_partitions(max_rows);
+ no_parts= part_info->no_parts;
+ part_info->use_default_partitions= FALSE;
+ if (unlikely(no_parts > MAX_PARTITIONS))
+ {
+ my_error(ER_TOO_MANY_PARTITIONS_ERROR, MYF(0));
+ goto end;
+ }
+ if (unlikely((!(default_name= create_default_partition_names(no_parts,
+ start_no,
+ FALSE)))))
+ goto end;
+ i= 0;
+ do
+ {
+ partition_element *part_elem= new partition_element();
+ if (likely(part_elem != 0))
+ {
+ part_elem->engine_type= DB_TYPE_UNKNOWN;
+ part_elem->partition_name= default_name;
+ default_name+=MAX_PART_NAME_SIZE;
+ part_info->partitions.push_back(part_elem);
+ }
+ else
+ {
+ my_error(ER_OUTOFMEMORY, MYF(0), sizeof(partition_element));
+ goto end;
+ }
+ } while (++i < no_parts);
+ result= FALSE;
+end:
+ DBUG_RETURN(result);
+}
+
+
+/*
+ Set up all the default subpartitions not set-up by the user in the SQL
+ statement. Also perform a number of checks that the default partitioning
+ becomes an allowed partitioning scheme.
+ SYNOPSIS
+ set_up_default_subpartitions()
+ part_info The reference to all partition information
+ file A reference to a handler of the table
+ max_rows Maximum number of rows stored in the table
+ RETURN VALUE
+ TRUE Error, attempted default values not possible
+ FALSE Ok, default partitions set-up
+ DESCRIPTION
+ The routine uses the underlying handler of the partitioning to define
+ the default number of partitions. For some handlers this requires
+ knowledge of the maximum number of rows to be stored in the table.
+ This routine is only called for RANGE or LIST partitioning and those
+ need to be specified so only subpartitions are specified.
+ The external routine needing this code is check_partition_info
+*/
+
+static bool set_up_default_subpartitions(partition_info *part_info,
+ handler *file, ulonglong max_rows)
+{
+ uint i, j, no_parts, no_subparts;
+ char *default_name, *name_ptr;
+ bool result= TRUE;
+ partition_element *part_elem;
+ List_iterator<partition_element> part_it(part_info->partitions);
+ DBUG_ENTER("set_up_default_subpartitions");
+
+ if (part_info->no_subparts == 0)
+ part_info->no_subparts= file->get_default_no_partitions(max_rows);
+ no_parts= part_info->no_parts;
+ no_subparts= part_info->no_subparts;
+ part_info->use_default_subpartitions= FALSE;
+ if (unlikely((no_parts * no_subparts) > MAX_PARTITIONS))
+ {
+ my_error(ER_TOO_MANY_PARTITIONS_ERROR, MYF(0));
+ goto end;
+ }
+ if (unlikely((!(default_name=
+ create_default_partition_names(no_subparts, (uint)0, TRUE)))))
+ goto end;
+ i= 0;
+ do
+ {
+ part_elem= part_it++;
+ j= 0;
+ name_ptr= default_name;
+ do
+ {
+ partition_element *subpart_elem= new partition_element();
+ if (likely(subpart_elem != 0))
+ {
+ subpart_elem->engine_type= DB_TYPE_UNKNOWN;
+ subpart_elem->partition_name= name_ptr;
+ name_ptr+= MAX_PART_NAME_SIZE;
+ part_elem->subpartitions.push_back(subpart_elem);
+ }
+ else
+ {
+ my_error(ER_OUTOFMEMORY, MYF(0), sizeof(partition_element));
+ goto end;
+ }
+ } while (++j < no_subparts);
+ } while (++i < no_parts);
+ result= FALSE;
+end:
+ DBUG_RETURN(result);
+}
+
+
+/*
+ Set up defaults for partition or subpartition (cannot set-up for both,
+ this will return an error.
+ SYNOPSIS
+ set_up_defaults_for_partitioning()
+ part_info The reference to all partition information
+ file A reference to a handler of the table
+ max_rows Maximum number of rows stored in the table
+ RETURN VALUE
+ TRUE Error, attempted default values not possible
+ FALSE Ok, default partitions set-up
+ DESCRIPTION
+ Support routine for check_partition_info
+*/
+
+bool set_up_defaults_for_partitioning(partition_info *part_info,
+ handler *file,
+ ulonglong max_rows, uint start_no)
+{
+ DBUG_ENTER("set_up_defaults_for_partitioning");
+
+ if (part_info->use_default_partitions)
+ DBUG_RETURN(set_up_default_partitions(part_info, file, max_rows,
+ start_no));
+ if (is_sub_partitioned(part_info) && part_info->use_default_subpartitions)
+ DBUG_RETURN(set_up_default_subpartitions(part_info, file, max_rows));
+ DBUG_RETURN(FALSE);
+}
+
+
+/*
+ Check that all partitions use the same storage engine.
+ This is currently a limitation in this version.
+ SYNOPSIS
+ check_engine_mix()
+ engine_array An array of engine identifiers
+ no_parts Total number of partitions
+ RETURN VALUE
+ TRUE Error, mixed engines
+ FALSE Ok, no mixed engines
+*/
+
+static bool check_engine_mix(u_char *engine_array, uint no_parts)
+{
+ /*
+ Current check verifies only that all handlers are the same.
+ Later this check will be more sophisticated.
+ */
+ uint i= 0;
+ bool result= FALSE;
+ DBUG_ENTER("check_engine_mix");
+
+ do
+ {
+ if (engine_array[i] != engine_array[0])
+ {
+ result= TRUE;
+ break;
+ }
+ } while (++i < no_parts);
+ DBUG_RETURN(result);
+}
+
+
+/*
+ We will check that the partition info requested is possible to set-up in
+ this version. This routine is an extension of the parser one could say.
+ If defaults were used we will generate default data structures for all
+ partitions.
+ SYNOPSIS
+ check_partition_info()
+ part_info The reference to all partition information
+ db_type Default storage engine if no engine specified per
+ partition.
+ file A reference to a handler of the table
+ max_rows Maximum number of rows stored in the table
+ RETURN VALUE
+ TRUE Error, something went wrong
+ FALSE Ok, full partition data structures are now generated
+ DESCRIPTION
+ This code is used early in the CREATE TABLE and ALTER TABLE process.
+*/
+
+bool check_partition_info(partition_info *part_info,enum db_type eng_type,
+ handler *file, ulonglong max_rows)
+{
+ u_char *engine_array= NULL;
+ uint part_count= 0, i, no_parts, tot_partitions;
+ bool result= TRUE;
+ List_iterator<partition_element> part_it(part_info->partitions);
+ DBUG_ENTER("check_partition_info");
+
+ if (unlikely(is_sub_partitioned(part_info) &&
+ (!(part_info->part_type == RANGE_PARTITION ||
+ part_info->part_type == LIST_PARTITION))))
+ {
+ /* Only RANGE and LIST partitioning can be subpartitioned */
+ my_error(ER_SUBPARTITION_ERROR, MYF(0));
+ goto end;
+ }
+ if (unlikely(set_up_defaults_for_partitioning(part_info, file,
+ max_rows, (uint)0)))
+ goto end;
+ tot_partitions= get_tot_partitions(part_info);
+ if (unlikely(tot_partitions > MAX_PARTITIONS))
+ {
+ my_error(ER_TOO_MANY_PARTITIONS_ERROR, MYF(0));
+ goto end;
+ }
+ if (unlikely(is_partitions_in_table(part_info, part_info)))
+ {
+ my_error(ER_SAME_NAME_PARTITION, MYF(0));
+ goto end;
+ }
+ engine_array= (u_char*)my_malloc(tot_partitions, MYF(MY_WME));
+ if (unlikely(!engine_array))
+ goto end;
+ i= 0;
+ no_parts= part_info->no_parts;
+ do
+ {
+ partition_element *part_elem= part_it++;
+ if (!is_sub_partitioned(part_info))
+ {
+ if (part_elem->engine_type == DB_TYPE_UNKNOWN)
+ part_elem->engine_type= eng_type;
+ DBUG_PRINT("info", ("engine = %u",(uint)part_elem->engine_type));
+ engine_array[part_count++]= (u_char)part_elem->engine_type;
+ }
+ else
+ {
+ uint j= 0, no_subparts= part_info->no_subparts;;
+ List_iterator<partition_element> sub_it(part_elem->subpartitions);
+ do
+ {
+ part_elem= sub_it++;
+ if (part_elem->engine_type == DB_TYPE_UNKNOWN)
+ part_elem->engine_type= eng_type;
+ DBUG_PRINT("info", ("engine = %u",(uint)part_elem->engine_type));
+ engine_array[part_count++]= (u_char)part_elem->engine_type;
+ } while (++j < no_subparts);
+ }
+ } while (++i < part_info->no_parts);
+ if (unlikely(check_engine_mix(engine_array, part_count)))
+ {
+ my_error(ER_MIX_HANDLER_ERROR, MYF(0));
+ goto end;
+ }
+
+ /*
+ We need to check all constant expressions that they are of the correct
+ type and that they are increasing for ranges and not overlapping for
+ list constants.
+ */
+
+ if (unlikely((part_info->part_type == RANGE_PARTITION &&
+ check_range_constants(part_info)) ||
+ (part_info->part_type == LIST_PARTITION &&
+ check_list_constants(part_info))))
+ goto end;
+ result= FALSE;
+end:
+ my_free((char*)engine_array,MYF(MY_ALLOW_ZERO_PTR));
+ DBUG_RETURN(result);
+}
+
+
+/*
+ A great number of functions below here is part of the fix_partition_func
+ method. It is used to set up the partition structures for execution from
+ openfrm. It is called at the end of the openfrm when the table struct has
+ been set-up apart from the partition information.
+ It involves:
+ 1) Setting arrays of fields for the partition functions.
+ 2) Setting up binary search array for LIST partitioning
+ 3) Setting up array for binary search for RANGE partitioning
+ 4) Setting up key_map's to assist in quick evaluation whether one
+ can deduce anything from a given index of what partition to use
+ 5) Checking whether a set of partitions can be derived from a range on
+ a field in the partition function.
+ As part of doing this there is also a great number of error controls.
+ This is actually the place where most of the things are checked for
+ partition information when creating a table.
+ Things that are checked includes
+ 1) No NULLable fields in partition function
+ 2) All fields of partition function in Primary keys and unique indexes
+ (if not supported)
+ 3) No fields in partition function that are BLOB's or VARCHAR with a
+ collation other than the binary collation.
+
+
+
+ Create an array of partition fields (NULL terminated). Before this method
+ is called fix_fields or find_table_in_sef has been called to set
+ GET_FIXED_FIELDS_FLAG on all fields that are part of the partition
+ function.
+ SYNOPSIS
+ set_up_field_array()
+ table TABLE object for which partition fields are set-up
+ sub_part Is the table subpartitioned as well
+ RETURN VALUE
+ TRUE Error, some field didn't meet requirements
+ FALSE Ok, partition field array set-up
+ DESCRIPTION
+ This method is used to set-up both partition and subpartitioning
+ field array and used for all types of partitioning.
+ It is part of the logic around fix_partition_func.
+*/
+static bool set_up_field_array(TABLE *table,
+ bool sub_part)
+{
+ Field **ptr, *field, **field_array;
+ uint no_fields= 0, size_field_array, i= 0;
+ partition_info *part_info= table->s->part_info;
+ int result= FALSE;
+ DBUG_ENTER("set_up_field_array");
+
+ ptr= table->field;
+ while ((field= *(ptr++)))
+ {
+ if (field->flags & GET_FIXED_FIELDS_FLAG)
+ no_fields++;
+ }
+ size_field_array= (no_fields+1)*sizeof(Field*);
+ field_array= (Field**)sql_alloc(size_field_array);
+ if (unlikely(!field_array))
+ {
+ my_error(ER_OUTOFMEMORY, MYF(0), size_field_array);
+ result= TRUE;
+ }
+ ptr= table->field;
+ while ((field= *(ptr++)))
+ {
+ if (field->flags & GET_FIXED_FIELDS_FLAG)
+ {
+ field->flags&= ~GET_FIXED_FIELDS_FLAG;
+ field->flags|= FIELD_IN_PART_FUNC_FLAG;
+ if (likely(!result))
+ {
+ field_array[i++]= field;
+
+ /*
+ We check that the fields are proper. It is required for each
+ field in a partition function to:
+ 1) Not be a BLOB of any type
+ A BLOB takes too long time to evaluate so we don't want it for
+ performance reasons.
+ 2) Not be a VARCHAR other than VARCHAR with a binary collation
+ A VARCHAR with character sets can have several values being
+ equal with different number of spaces or NULL's. This is not a
+ good ground for a safe and exact partition function. Thus it is
+ not allowed in partition functions.
+ */
+
+ if (unlikely(field->flags & BLOB_FLAG))
+ {
+ my_error(ER_BLOB_FIELD_IN_PART_FUNC_ERROR, MYF(0));
+ result= TRUE;
+ }
+ else if (unlikely((!field->flags & BINARY_FLAG) &&
+ field->real_type() == MYSQL_TYPE_VARCHAR))
+ {
+ my_error(ER_CHAR_SET_IN_PART_FIELD_ERROR, MYF(0));
+ result= TRUE;
+ }
+ }
+ }
+ }
+ field_array[no_fields]= 0;
+ if (!sub_part)
+ {
+ part_info->part_field_array= field_array;
+ part_info->no_part_fields= no_fields;
+ }
+ else
+ {
+ part_info->subpart_field_array= field_array;
+ part_info->no_subpart_fields= no_fields;
+ }
+ DBUG_RETURN(result);
+}
+
+
+/*
+ Create a field array including all fields of both the partitioning and the
+ subpartitioning functions.
+ SYNOPSIS
+ create_full_part_field_array()
+ table TABLE object for which partition fields are set-up
+ part_info Reference to partitioning data structure
+ RETURN VALUE
+ TRUE Memory allocation of field array failed
+ FALSE Ok
+ DESCRIPTION
+ If there is no subpartitioning then the same array is used as for the
+ partitioning. Otherwise a new array is built up using the flag
+ FIELD_IN_PART_FUNC in the field object.
+ This function is called from fix_partition_func
+*/
+
+static bool create_full_part_field_array(TABLE *table,
+ partition_info *part_info)
+{
+ bool result= FALSE;
+ DBUG_ENTER("create_full_part_field_array");
+
+ if (!is_sub_partitioned(part_info))
+ {
+ part_info->full_part_field_array= part_info->part_field_array;
+ part_info->no_full_part_fields= part_info->no_part_fields;
+ }
+ else
+ {
+ Field **ptr, *field, **field_array;
+ uint no_part_fields=0, size_field_array;
+ ptr= table->field;
+ while ((field= *(ptr++)))
+ {
+ if (field->flags & FIELD_IN_PART_FUNC_FLAG)
+ no_part_fields++;
+ }
+ size_field_array= (no_part_fields+1)*sizeof(Field*);
+ field_array= (Field**)sql_alloc(size_field_array);
+ if (unlikely(!field_array))
+ {
+ my_error(ER_OUTOFMEMORY, MYF(0), size_field_array);
+ result= TRUE;
+ goto end;
+ }
+ no_part_fields= 0;
+ ptr= table->field;
+ while ((field= *(ptr++)))
+ {
+ if (field->flags & FIELD_IN_PART_FUNC_FLAG)
+ field_array[no_part_fields++]= field;
+ }
+ field_array[no_part_fields]=0;
+ part_info->full_part_field_array= field_array;
+ part_info->no_full_part_fields= no_part_fields;
+ }
+end:
+ DBUG_RETURN(result);
+}
+
+
+/*
+ These support routines is used to set/reset an indicator of all fields
+ in a certain key. It is used in conjunction with another support routine
+ that traverse all fields in the PF to find if all or some fields in the
+ PF is part of the key. This is used to check primary keys and unique
+ keys involve all fields in PF (unless supported) and to derive the
+ key_map's used to quickly decide whether the index can be used to
+ derive which partitions are needed to scan.
+
+
+
+ Clear flag GET_FIXED_FIELDS_FLAG in all fields of a key previously set by
+ set_indicator_in_key_fields (always used in pairs).
+ SYNOPSIS
+ clear_indicator_in_key_fields()
+ key_info Reference to find the key fields
+*/
+
+static void clear_indicator_in_key_fields(KEY *key_info)
+{
+ KEY_PART_INFO *key_part;
+ uint key_parts= key_info->key_parts, i;
+ for (i= 0, key_part=key_info->key_part; i < key_parts; i++, key_part++)
+ key_part->field->flags&= (~GET_FIXED_FIELDS_FLAG);
+}
+
+
+/*
+ Set flag GET_FIXED_FIELDS_FLAG in all fields of a key.
+ SYNOPSIS
+ set_indicator_in_key_fields
+ key_info Reference to find the key fields
+*/
+
+static void set_indicator_in_key_fields(KEY *key_info)
+{
+ KEY_PART_INFO *key_part;
+ uint key_parts= key_info->key_parts, i;
+ for (i= 0, key_part=key_info->key_part; i < key_parts; i++, key_part++)
+ key_part->field->flags|= GET_FIXED_FIELDS_FLAG;
+}
+
+
+/*
+ Check if all or some fields in partition field array is part of a key
+ previously used to tag key fields.
+ SYNOPSIS
+ check_fields_in_PF()
+ ptr Partition field array
+ all_fields Is all fields of partition field array used in key
+ some_fields Is some fields of partition field array used in key
+ RETURN VALUE
+ all_fields, some_fields
+*/
+
+static void check_fields_in_PF(Field **ptr, bool *all_fields,
+ bool *some_fields)
+{
+ DBUG_ENTER("check_fields_in_PF");
+ *all_fields= TRUE;
+ *some_fields= FALSE;
+ do
+ {
+ /* Check if the field of the PF is part of the current key investigated */
+ if ((*ptr)->flags & GET_FIXED_FIELDS_FLAG)
+ *some_fields= TRUE;
+ else
+ *all_fields= FALSE;
+ } while (*(++ptr));
+ DBUG_VOID_RETURN;
+}
+
+
+/*
+ Clear flag GET_FIXED_FIELDS_FLAG in all fields of the table.
+ This routine is used for error handling purposes.
+ SYNOPSIS
+ clear_field_flag()
+ table TABLE object for which partition fields are set-up
+*/
+
+static void clear_field_flag(TABLE *table)
+{
+ Field **ptr;
+ DBUG_ENTER("clear_field_flag");
+
+ for (ptr= table->field; *ptr; ptr++)
+ (*ptr)->flags&= (~GET_FIXED_FIELDS_FLAG);
+ DBUG_VOID_RETURN;
+}
+
+
+/*
+ This routine sets-up the partition field array for KEY partitioning, it
+ also verifies that all fields in the list of fields is actually a part of
+ the table.
+ SYNOPSIS
+ handle_list_of_fields()
+ it A list of field names for the partition function
+ table TABLE object for which partition fields are set-up
+ part_info Reference to partitioning data structure
+ sub_part Is the table subpartitioned as well
+ RETURN VALUE
+ TRUE Fields in list of fields not part of table
+ FALSE All fields ok and array created
+ DESCRIPTION
+ find_field_in_table_sef finds the field given its name. All fields get
+ GET_FIXED_FIELDS_FLAG set.
+*/
+
+static bool handle_list_of_fields(List_iterator<char> it,
+ TABLE *table,
+ partition_info *part_info,
+ bool sub_part)
+{
+ Field *field;
+ bool result;
+ char *field_name;
+ DBUG_ENTER("handle_list_of_fields");
+
+ while ((field_name= it++))
+ {
+ field= find_field_in_table_sef(table, field_name);
+ if (likely(field != 0))
+ field->flags|= GET_FIXED_FIELDS_FLAG;
+ else
+ {
+ my_error(ER_FIELD_NOT_FOUND_PART_ERROR, MYF(0));
+ clear_field_flag(table);
+ result= TRUE;
+ goto end;
+ }
+ }
+ result= set_up_field_array(table, sub_part);
+end:
+ DBUG_RETURN(result);
+}
+
+
+/*
+ This function is used to build an array of partition fields for the
+ partitioning function and subpartitioning function. The partitioning
+ function is an item tree that must reference at least one field in the
+ table. This is checked first in the parser that the function doesn't
+ contain non-cacheable parts (like a random function) and by checking
+ here that the function isn't a constant function.
+ SYNOPSIS
+ fix_fields_part_func()
+ thd The thread object
+ tables A list of one table, the partitioned table
+ func_expr The item tree reference of the partition function
+ part_info Reference to partitioning data structure
+ sub_part Is the table subpartitioned as well
+ RETURN VALUE
+ TRUE An error occurred, something was wrong with the
+ partition function.
+ FALSE Ok, a partition field array was created
+ DESCRIPTION
+ The function uses a new feature in fix_fields where the flag
+ GET_FIXED_FIELDS_FLAG is set for all fields in the item tree.
+ This field must always be reset before returning from the function
+ since it is used for other purposes as well.
+*/
+
+static bool fix_fields_part_func(THD *thd, TABLE_LIST *tables,
+ Item* func_expr, partition_info *part_info,
+ bool sub_part)
+{
+ /*
+ Calculate the number of fields in the partition function.
+ Use it allocate memory for array of Field pointers.
+ Initialise array of field pointers. Use information set when
+ calling fix_fields and reset it immediately after.
+ The get_fields_in_item_tree activates setting of bit in flags
+ on the field object.
+ */
+
+ bool result= TRUE;
+ TABLE *table= tables->table;
+ TABLE_LIST *save_table_list, *save_first_table, *save_last_table;
+ int error;
+ Name_resolution_context *context;
+ DBUG_ENTER("fix_fields_part_func");
+
+ context= thd->lex->current_context();
+ table->map= 1; //To ensure correct calculation of const item
+ table->get_fields_in_item_tree= TRUE;
+ save_table_list= context->table_list;
+ save_first_table= context->first_name_resolution_table;
+ save_last_table= context->last_name_resolution_table;
+ context->table_list= tables;
+ context->first_name_resolution_table= tables;
+ context->last_name_resolution_table= NULL;
+ func_expr->walk(&Item::change_context_processor, (byte*) context);
+ thd->where= "partition function";
+ error= func_expr->fix_fields(thd, (Item**)0);
+ context->table_list= save_table_list;
+ context->first_name_resolution_table= save_first_table;
+ context->last_name_resolution_table= save_last_table;
+ if (unlikely(error))
+ {
+ DBUG_PRINT("info", ("Field in partition function not part of table"));
+ clear_field_flag(table);
+ goto end;
+ }
+ if (unlikely(func_expr->const_item()))
+ {
+ my_error(ER_CONST_EXPR_IN_PARTITION_FUNC_ERROR, MYF(0));
+ clear_field_flag(table);
+ goto end;
+ }
+ result= set_up_field_array(table, sub_part);
+end:
+ table->get_fields_in_item_tree= FALSE;
+ table->map= 0; //Restore old value
+ DBUG_RETURN(result);
+}
+
+
+/*
+ This function verifies that if there is a primary key that it contains
+ all the fields of the partition function.
+ This is a temporary limitation that will hopefully be removed after a
+ while.
+ SYNOPSIS
+ check_primary_key()
+ table TABLE object for which partition fields are set-up
+ RETURN VALUES
+ TRUE Not all fields in partitioning function was part
+ of primary key
+ FALSE Ok, all fields of partitioning function were part
+ of primary key
+*/
+
+static bool check_primary_key(TABLE *table)
+{
+ uint primary_key= table->s->primary_key;
+ bool all_fields, some_fields, result= FALSE;
+ DBUG_ENTER("check_primary_key");
+
+ if (primary_key < MAX_KEY)
+ {
+ set_indicator_in_key_fields(table->key_info+primary_key);
+ check_fields_in_PF(table->s->part_info->full_part_field_array,
+ &all_fields, &some_fields);
+ clear_indicator_in_key_fields(table->key_info+primary_key);
+ if (unlikely(!all_fields))
+ {
+ my_error(ER_UNIQUE_KEY_NEED_ALL_FIELDS_IN_PF,MYF(0),"PRIMARY KEY");
+ result= TRUE;
+ }
+ }
+ DBUG_RETURN(result);
+}
+
+
+/*
+ This function verifies that if there is a unique index that it contains
+ all the fields of the partition function.
+ This is a temporary limitation that will hopefully be removed after a
+ while.
+ SYNOPSIS
+ check_unique_keys()
+ table TABLE object for which partition fields are set-up
+ RETURN VALUES
+ TRUE Not all fields in partitioning function was part
+ of all unique keys
+ FALSE Ok, all fields of partitioning function were part
+ of unique keys
+*/
+
+static bool check_unique_keys(TABLE *table)
+{
+ bool all_fields, some_fields, result= FALSE;
+ uint keys= table->s->keys, i;
+ DBUG_ENTER("check_unique_keys");
+ for (i= 0; i < keys; i++)
+ {
+ if (table->key_info[i].flags & HA_NOSAME) //Unique index
+ {
+ set_indicator_in_key_fields(table->key_info+i);
+ check_fields_in_PF(table->s->part_info->full_part_field_array,
+ &all_fields, &some_fields);
+ clear_indicator_in_key_fields(table->key_info+i);
+ if (unlikely(!all_fields))
+ {
+ my_error(ER_UNIQUE_KEY_NEED_ALL_FIELDS_IN_PF,MYF(0),"UNIQUE INDEX");
+ result= TRUE;
+ break;
+ }
+ }
+ }
+ DBUG_RETURN(result);
+}
+
+
+/*
+ An important optimisation is whether a range on a field can select a subset
+ of the partitions.
+ A prerequisite for this to happen is that the PF is a growing function OR
+ a shrinking function.
+ This can never happen for a multi-dimensional PF. Thus this can only happen
+ with PF with at most one field involved in the PF.
+ The idea is that if the function is a growing function and you know that
+ the field of the PF is 4 <= A <= 6 then we can convert this to a range
+ in the PF instead by setting the range to PF(4) <= PF(A) <= PF(6). In the
+ case of RANGE PARTITIONING and LIST PARTITIONING this can be used to
+ calculate a set of partitions rather than scanning all of them.
+ Thus the following prerequisites are there to check if sets of partitions
+ can be found.
+ 1) Only possible for RANGE and LIST partitioning (not for subpartitioning)
+ 2) Only possible if PF only contains 1 field
+ 3) Possible if PF is a growing function of the field
+ 4) Possible if PF is a shrinking function of the field
+ OBSERVATION:
+ 1) IF f1(A) is a growing function AND f2(A) is a growing function THEN
+ f1(A) + f2(A) is a growing function
+ f1(A) * f2(A) is a growing function if f1(A) >= 0 and f2(A) >= 0
+ 2) IF f1(A) is a growing function and f2(A) is a shrinking function THEN
+ f1(A) / f2(A) is a growing function if f1(A) >= 0 and f2(A) > 0
+ 3) IF A is a growing function then a function f(A) that removes the
+ least significant portion of A is a growing function
+ E.g. DATE(datetime) is a growing function
+ MONTH(datetime) is not a growing/shrinking function
+ 4) IF f1(A) is a growing function and f2(A) is a growing function THEN
+ f1(f2(A)) and f2(f1(A)) are also growing functions
+ 5) IF f1(A) is a shrinking function and f2(A) is a growing function THEN
+ f1(f2(A)) is a shrinking function and f2(f1(A)) is a shrinking function
+ 6) f1(A) = A is a growing function
+ 7) f1(A) = A*a + b (where a and b are constants) is a growing function
+
+ By analysing the item tree of the PF we can use these deducements and
+ derive whether the PF is a growing function or a shrinking function or
+ neither of it.
+
+ If the PF is range capable then a flag is set on the table object
+ indicating this to notify that we can use also ranges on the field
+ of the PF to deduce a set of partitions if the fields of the PF were
+ not all fully bound.
+ SYNOPSIS
+ check_range_capable_PF()
+ table TABLE object for which partition fields are set-up
+ DESCRIPTION
+ Support for this is not implemented yet.
+*/
+
+void check_range_capable_PF(TABLE *table)
+{
+ DBUG_ENTER("check_range_capable_PF");
+ DBUG_VOID_RETURN;
+}
+
+
+/*
+ Set up partition key maps
+ SYNOPSIS
+ set_up_partition_key_maps()
+ table TABLE object for which partition fields are set-up
+ part_info Reference to partitioning data structure
+ RETURN VALUES
+ None
+ DESCRIPTION
+ This function sets up a couple of key maps to be able to quickly check
+ if an index ever can be used to deduce the partition fields or even
+ a part of the fields of the partition function.
+ We set up the following key_map's.
+ PF = Partition Function
+ 1) All fields of the PF is set even by equal on the first fields in the
+ key
+ 2) All fields of the PF is set if all fields of the key is set
+ 3) At least one field in the PF is set if all fields is set
+ 4) At least one field in the PF is part of the key
+*/
+
+static void set_up_partition_key_maps(TABLE *table,
+ partition_info *part_info)
+{
+ uint keys= table->s->keys, i;
+ bool all_fields, some_fields;
+ DBUG_ENTER("set_up_partition_key_maps");
+
+ part_info->all_fields_in_PF.clear_all();
+ part_info->all_fields_in_PPF.clear_all();
+ part_info->all_fields_in_SPF.clear_all();
+ part_info->some_fields_in_PF.clear_all();
+ for (i= 0; i < keys; i++)
+ {
+ set_indicator_in_key_fields(table->key_info+i);
+ check_fields_in_PF(part_info->full_part_field_array,
+ &all_fields, &some_fields);
+ if (all_fields)
+ part_info->all_fields_in_PF.set_bit(i);
+ if (some_fields)
+ part_info->some_fields_in_PF.set_bit(i);
+ if (is_sub_partitioned(part_info))
+ {
+ check_fields_in_PF(part_info->part_field_array,
+ &all_fields, &some_fields);
+ if (all_fields)
+ part_info->all_fields_in_PPF.set_bit(i);
+ check_fields_in_PF(part_info->subpart_field_array,
+ &all_fields, &some_fields);
+ if (all_fields)
+ part_info->all_fields_in_SPF.set_bit(i);
+ }
+ clear_indicator_in_key_fields(table->key_info+i);
+ }
+ DBUG_VOID_RETURN;
+}
+
+
+/*
+ Set-up all function pointers for calculation of partition id,
+ subpartition id and the upper part in subpartitioning. This is to speed up
+ execution of get_partition_id which is executed once every record to be
+ written and deleted and twice for updates.
+ SYNOPSIS
+ set_up_partition_function_pointers()
+ part_info Reference to partitioning data structure
+*/
+
+static void set_up_partition_func_pointers(partition_info *part_info)
+{
+ if (is_sub_partitioned(part_info))
+ {
+ if (part_info->part_type == RANGE_PARTITION)
+ {
+ part_info->get_part_partition_id= get_partition_id_range;
+ if (part_info->list_of_subpart_fields)
+ {
+ if (part_info->linear_hash_ind)
+ {
+ part_info->get_partition_id= get_partition_id_range_sub_linear_key;
+ part_info->get_subpartition_id= get_partition_id_linear_key_sub;
+ }
+ else
+ {
+ part_info->get_partition_id= get_partition_id_range_sub_key;
+ part_info->get_subpartition_id= get_partition_id_key_sub;
+ }
+ }
+ else
+ {
+ if (part_info->linear_hash_ind)
+ {
+ part_info->get_partition_id= get_partition_id_range_sub_linear_hash;
+ part_info->get_subpartition_id= get_partition_id_linear_hash_sub;
+ }
+ else
+ {
+ part_info->get_partition_id= get_partition_id_range_sub_hash;
+ part_info->get_subpartition_id= get_partition_id_hash_sub;
+ }
+ }
+ }
+ else //LIST Partitioning
+ {
+ part_info->get_part_partition_id= get_partition_id_list;
+ if (part_info->list_of_subpart_fields)
+ {
+ if (part_info->linear_hash_ind)
+ {
+ part_info->get_partition_id= get_partition_id_list_sub_linear_key;
+ part_info->get_subpartition_id= get_partition_id_linear_key_sub;
+ }
+ else
+ {
+ part_info->get_partition_id= get_partition_id_list_sub_key;
+ part_info->get_subpartition_id= get_partition_id_key_sub;
+ }
+ }
+ else
+ {
+ if (part_info->linear_hash_ind)
+ {
+ part_info->get_partition_id= get_partition_id_list_sub_linear_hash;
+ part_info->get_subpartition_id= get_partition_id_linear_hash_sub;
+ }
+ else
+ {
+ part_info->get_partition_id= get_partition_id_list_sub_hash;
+ part_info->get_subpartition_id= get_partition_id_hash_sub;
+ }
+ }
+ }
+ }
+ else //No subpartitioning
+ {
+ part_info->get_part_partition_id= NULL;
+ part_info->get_subpartition_id= NULL;
+ if (part_info->part_type == RANGE_PARTITION)
+ part_info->get_partition_id= get_partition_id_range;
+ else if (part_info->part_type == LIST_PARTITION)
+ part_info->get_partition_id= get_partition_id_list;
+ else //HASH partitioning
+ {
+ if (part_info->list_of_part_fields)
+ {
+ if (part_info->linear_hash_ind)
+ part_info->get_partition_id= get_partition_id_linear_key_nosub;
+ else
+ part_info->get_partition_id= get_partition_id_key_nosub;
+ }
+ else
+ {
+ if (part_info->linear_hash_ind)
+ part_info->get_partition_id= get_partition_id_linear_hash_nosub;
+ else
+ part_info->get_partition_id= get_partition_id_hash_nosub;
+ }
+ }
+ }
+}
+
+
+/*
+ For linear hashing we need a mask which is on the form 2**n - 1 where
+ 2**n >= no_parts. Thus if no_parts is 6 then mask is 2**3 - 1 = 8 - 1 = 7.
+ SYNOPSIS
+ set_linear_hash_mask()
+ part_info Reference to partitioning data structure
+ no_parts Number of parts in linear hash partitioning
+*/
+
+static void set_linear_hash_mask(partition_info *part_info, uint no_parts)
+{
+ uint mask;
+ for (mask= 1; mask < no_parts; mask<<=1)
+ ;
+ part_info->linear_hash_mask= mask - 1;
+}
+
+
+/*
+ This function calculates the partition id provided the result of the hash
+ function using linear hashing parameters, mask and number of partitions.
+ SYNOPSIS
+ get_part_id_from_linear_hash()
+ hash_value Hash value calculated by HASH function or KEY function
+ mask Mask calculated previously by set_linear_hash_mask
+ no_parts Number of partitions in HASH partitioned part
+ RETURN VALUE
+ part_id The calculated partition identity (starting at 0)
+ DESCRIPTION
+ The partition is calculated according to the theory of linear hashing.
+ See e.g. Linear hashing: a new tool for file and table addressing,
+ Reprinted from VLDB-80 in Readings Database Systems, 2nd ed, M. Stonebraker
+ (ed.), Morgan Kaufmann 1994.
+*/
+
+static uint32 get_part_id_from_linear_hash(longlong hash_value, uint mask,
+ uint no_parts)
+{
+ uint32 part_id= (uint32)(hash_value & mask);
+ if (part_id >= no_parts)
+ {
+ uint new_mask= ((mask + 1) >> 1) - 1;
+ part_id= hash_value & new_mask;
+ }
+ return part_id;
+}
+
+/*
+ This function is called as part of opening the table by opening the .frm
+ file. It is a part of CREATE TABLE to do this so it is quite permissible
+ that errors due to erroneus syntax isn't found until we come here.
+ If the user has used a non-existing field in the table is one such example
+ of an error that is not discovered until here.
+ SYNOPSIS
+ fix_partition_func()
+ thd The thread object
+ name The name of the partitioned table
+ table TABLE object for which partition fields are set-up
+ RETURN VALUE
+ TRUE
+ FALSE
+ DESCRIPTION
+ The name parameter contains the full table name and is used to get the
+ database name of the table which is used to set-up a correct
+ TABLE_LIST object for use in fix_fields.
+*/
+
+bool fix_partition_func(THD *thd, const char* name, TABLE *table)
+{
+ bool result= TRUE;
+ uint dir_length, home_dir_length;
+ TABLE_LIST tables;
+ TABLE_SHARE *share= table->s;
+ char db_name_string[FN_REFLEN];
+ char* db_name;
+ partition_info *part_info= share->part_info;
+ ulong save_set_query_id= thd->set_query_id;
+ DBUG_ENTER("fix_partition_func");
+
+ thd->set_query_id= 0;
+ /*
+ Set-up the TABLE_LIST object to be a list with a single table
+ Set the object to zero to create NULL pointers and set alias
+ and real name to table name and get database name from file name.
+ */
+
+ bzero((void*)&tables, sizeof(TABLE_LIST));
+ tables.alias= tables.table_name= (char*)share->table_name;
+ tables.table= table;
+ tables.next_local= 0;
+ tables.next_name_resolution_table= 0;
+ strmov(db_name_string, name);
+ dir_length= dirname_length(db_name_string);
+ db_name_string[dir_length - 1]= 0;
+ home_dir_length= dirname_length(db_name_string);
+ db_name= &db_name_string[home_dir_length];
+ tables.db= db_name;
+
+ if (is_sub_partitioned(part_info))
+ {
+ DBUG_ASSERT(part_info->subpart_type == HASH_PARTITION);
+ /*
+ Subpartition is defined. We need to verify that subpartitioning
+ function is correct.
+ */
+ if (part_info->linear_hash_ind)
+ set_linear_hash_mask(part_info, part_info->no_subparts);
+ if (part_info->list_of_subpart_fields)
+ {
+ List_iterator<char> it(part_info->subpart_field_list);
+ if (unlikely(handle_list_of_fields(it, table, part_info, TRUE)))
+ goto end;
+ }
+ else
+ {
+ if (unlikely(fix_fields_part_func(thd, &tables,
+ part_info->subpart_expr, part_info, TRUE)))
+ goto end;
+ if (unlikely(part_info->subpart_expr->result_type() != INT_RESULT))
+ {
+ my_error(ER_PARTITION_FUNC_NOT_ALLOWED_ERROR, MYF(0),
+ "SUBPARTITION");
+ goto end;
+ }
+ }
+ }
+ DBUG_ASSERT(part_info->part_type != NOT_A_PARTITION);
+ /*
+ Partition is defined. We need to verify that partitioning
+ function is correct.
+ */
+ if (part_info->part_type == HASH_PARTITION)
+ {
+ if (part_info->linear_hash_ind)
+ set_linear_hash_mask(part_info, part_info->no_parts);
+ if (part_info->list_of_part_fields)
+ {
+ List_iterator<char> it(part_info->part_field_list);
+ if (unlikely(handle_list_of_fields(it, table, part_info, FALSE)))
+ goto end;
+ }
+ else
+ {
+ if (unlikely(fix_fields_part_func(thd, &tables, part_info->part_expr,
+ part_info, FALSE)))
+ goto end;
+ if (unlikely(part_info->part_expr->result_type() != INT_RESULT))
+ {
+ my_error(ER_PARTITION_FUNC_NOT_ALLOWED_ERROR, MYF(0), part_str);
+ goto end;
+ }
+ part_info->part_result_type= INT_RESULT;
+ }
+ }
+ else
+ {
+ char *error_str;
+ if (part_info->part_type == RANGE_PARTITION)
+ {
+ error_str= range_str;
+ if (unlikely(check_range_constants(part_info)))
+ goto end;
+ }
+ else if (part_info->part_type == LIST_PARTITION)
+ {
+ error_str= list_str;
+ if (unlikely(check_list_constants(part_info)))
+ goto end;
+ }
+ else
+ {
+ DBUG_ASSERT(0);
+ my_error(ER_INCONSISTENT_PARTITION_INFO_ERROR, MYF(0));
+ goto end;
+ }
+ if (unlikely(part_info->no_parts < 1))
+ {
+ my_error(ER_PARTITIONS_MUST_BE_DEFINED_ERROR, MYF(0), error_str);
+ goto end;
+ }
+ if (unlikely(fix_fields_part_func(thd, &tables, part_info->part_expr,
+ part_info, FALSE)))
+ goto end;
+ if (unlikely(part_info->part_expr->result_type() != INT_RESULT))
+ {
+ my_error(ER_PARTITION_FUNC_NOT_ALLOWED_ERROR, MYF(0), part_str);
+ goto end;
+ }
+ }
+ if (unlikely(create_full_part_field_array(table, part_info)))
+ goto end;
+ if (unlikely(check_primary_key(table)))
+ goto end;
+ if (unlikely((!table->file->partition_flags() & HA_CAN_PARTITION_UNIQUE) &&
+ check_unique_keys(table)))
+ goto end;
+ check_range_capable_PF(table);
+ set_up_partition_key_maps(table, part_info);
+ set_up_partition_func_pointers(part_info);
+ result= FALSE;
+end:
+ thd->set_query_id= save_set_query_id;
+ DBUG_RETURN(result);
+}
+
+
+/*
+ The code below is support routines for the reverse parsing of the
+ partitioning syntax. This feature is very useful to generate syntax for
+ all default values to avoid all default checking when opening the frm
+ file. It is also used when altering the partitioning by use of various
+ ALTER TABLE commands. Finally it is used for SHOW CREATE TABLES.
+*/
+
+static int add_write(File fptr, const char *buf, uint len)
+{
+ uint len_written= my_write(fptr, buf, len, MYF(0));
+ if (likely(len == len_written))
+ return 0;
+ else
+ return 1;
+}
+
+static int add_string(File fptr, const char *string)
+{
+ return add_write(fptr, string, strlen(string));
+}
+
+static int add_string_len(File fptr, const char *string, uint len)
+{
+ return add_write(fptr, string, len);
+}
+
+static int add_space(File fptr)
+{
+ return add_string(fptr, space_str);
+}
+
+static int add_comma(File fptr)
+{
+ return add_string(fptr, comma_str);
+}
+
+static int add_equal(File fptr)
+{
+ return add_string(fptr, equal_str);
+}
+
+static int add_end_parenthesis(File fptr)
+{
+ return add_string(fptr, end_paren_str);
+}
+
+static int add_begin_parenthesis(File fptr)
+{
+ return add_string(fptr, begin_paren_str);
+}
+
+static int add_part_key_word(File fptr, const char *key_string)
+{
+ int err= add_string(fptr, key_string);
+ err+= add_space(fptr);
+ return err + add_begin_parenthesis(fptr);
+}
+
+static int add_hash(File fptr)
+{
+ return add_part_key_word(fptr, hash_str);
+}
+
+static int add_partition(File fptr)
+{
+ strxmov(buff, part_str, space_str, NullS);
+ return add_string(fptr, buff);
+}
+
+static int add_subpartition(File fptr)
+{
+ int err= add_string(fptr, sub_str);
+ return err + add_partition(fptr);
+}
+
+static int add_partition_by(File fptr)
+{
+ strxmov(buff, part_str, space_str, by_str, space_str, NullS);
+ return add_string(fptr, buff);
+}
+
+static int add_subpartition_by(File fptr)
+{
+ int err= add_string(fptr, sub_str);
+ return err + add_partition_by(fptr);
+}
+
+static int add_key_partition(File fptr, List<char> field_list)
+{
+ uint i, no_fields;
+ int err;
+ List_iterator<char> part_it(field_list);
+ err= add_part_key_word(fptr, key_str);
+ no_fields= field_list.elements;
+ i= 0;
+ do
+ {
+ const char *field_str= part_it++;
+ err+= add_string(fptr, field_str);
+ if (i != (no_fields-1))
+ err+= add_comma(fptr);
+ } while (++i < no_fields);
+ return err;
+}
+
+static int add_int(File fptr, longlong number)
+{
+ llstr(number, buff);
+ return add_string(fptr, buff);
+}
+
+static int add_keyword_string(File fptr, const char *keyword,
+ const char *keystr)
+{
+ int err= add_string(fptr, keyword);
+ err+= add_space(fptr);
+ err+= add_equal(fptr);
+ err+= add_space(fptr);
+ err+= add_string(fptr, keystr);
+ return err + add_space(fptr);
+}
+
+static int add_keyword_int(File fptr, const char *keyword, longlong num)
+{
+ int err= add_string(fptr, keyword);
+ err+= add_space(fptr);
+ err+= add_equal(fptr);
+ err+= add_space(fptr);
+ err+= add_int(fptr, num);
+ return err + add_space(fptr);
+}
+
+static int add_engine(File fptr, enum db_type engine_type)
+{
+ const char *engine_str= ha_get_storage_engine(engine_type);
+ int err= add_string(fptr, "ENGINE = ");
+ return err + add_string(fptr, engine_str);
+ return err;
+}
+
+static int add_partition_options(File fptr, partition_element *p_elem)
+{
+ int err= 0;
+ if (p_elem->tablespace_name)
+ err+= add_keyword_string(fptr,"TABLESPACE",p_elem->tablespace_name);
+ if (p_elem->nodegroup_id != UNDEF_NODEGROUP)
+ err+= add_keyword_int(fptr,"NODEGROUP",(longlong)p_elem->nodegroup_id);
+ if (p_elem->part_max_rows)
+ err+= add_keyword_int(fptr,"MAX_ROWS",(longlong)p_elem->part_max_rows);
+ if (p_elem->part_min_rows)
+ err+= add_keyword_int(fptr,"MIN_ROWS",(longlong)p_elem->part_min_rows);
+ if (p_elem->data_file_name)
+ err+= add_keyword_string(fptr,"DATA DIRECTORY",p_elem->data_file_name);
+ if (p_elem->index_file_name)
+ err+= add_keyword_string(fptr,"INDEX DIRECTORY",p_elem->index_file_name);
+ if (p_elem->part_comment)
+ err+= add_keyword_string(fptr, "COMMENT",p_elem->part_comment);
+ return err + add_engine(fptr,p_elem->engine_type);
+}
+
+static int add_partition_values(File fptr, partition_info *part_info,
+ partition_element *p_elem)
+{
+ int err= 0;
+ if (part_info->part_type == RANGE_PARTITION)
+ {
+ err+= add_string(fptr, "VALUES LESS THAN ");
+ if (p_elem->range_value != LONGLONG_MAX)
+ {
+ err+= add_begin_parenthesis(fptr);
+ err+= add_int(fptr, p_elem->range_value);
+ err+= add_end_parenthesis(fptr);
+ }
+ else
+ err+= add_string(fptr, "MAXVALUE");
+ }
+ else if (part_info->part_type == LIST_PARTITION)
+ {
+ uint i;
+ List_iterator<longlong> list_val_it(p_elem->list_val_list);
+ err+= add_string(fptr, "VALUES IN ");
+ uint no_items= p_elem->list_val_list.elements;
+ err+= add_begin_parenthesis(fptr);
+ i= 0;
+ do
+ {
+ longlong *list_value= list_val_it++;
+ err+= add_int(fptr, *list_value);
+ if (i != (no_items-1))
+ err+= add_comma(fptr);
+ } while (++i < no_items);
+ err+= add_end_parenthesis(fptr);
+ }
+ return err + add_space(fptr);
+}
+
+/*
+ Generate the partition syntax from the partition data structure.
+ Useful for support of generating defaults, SHOW CREATE TABLES
+ and easy partition management.
+ SYNOPSIS
+ generate_partition_syntax()
+ part_info The partitioning data structure
+ buf_length A pointer to the returned buffer length
+ use_sql_alloc Allocate buffer from sql_alloc if true
+ otherwise use my_malloc
+ RETURN VALUES
+ NULL error
+ buf, buf_length Buffer and its length
+ DESCRIPTION
+ Here we will generate the full syntax for the given command where all
+ defaults have been expanded. By so doing the it is also possible to
+ make lots of checks of correctness while at it.
+ This could will also be reused for SHOW CREATE TABLES and also for all
+ type ALTER TABLE commands focusing on changing the PARTITION structure
+ in any fashion.
+
+ The implementation writes the syntax to a temporary file (essentially
+ an abstraction of a dynamic array) and if all writes goes well it
+ allocates a buffer and writes the syntax into this one and returns it.
+
+ As a security precaution the file is deleted before writing into it. This
+ means that no other processes on the machine can open and read the file
+ while this processing is ongoing.
+
+ The code is optimised for minimal code size since it is not used in any
+ common queries.
+*/
+
+char *generate_partition_syntax(partition_info *part_info,
+ uint *buf_length,
+ bool use_sql_alloc)
+{
+ uint i,j, no_parts, no_subparts;
+ partition_element *part_elem;
+ ulonglong buffer_length;
+ char path[FN_REFLEN];
+ int err= 0;
+ DBUG_ENTER("generate_partition_syntax");
+ File fptr;
+ char *buf= NULL; //Return buffer
+ const char *file_name;
+ sprintf(path, "%s_%lx_%lx", "part_syntax", current_pid,
+ current_thd->thread_id);
+ fn_format(path,path,mysql_tmpdir,".psy", MY_REPLACE_EXT);
+ file_name= &path[0];
+ DBUG_PRINT("info", ("File name = %s", file_name));
+ if (unlikely(((fptr= my_open(file_name,O_CREAT|O_RDWR, MYF(MY_WME))) == -1)))
+ DBUG_RETURN(NULL);
+#if defined(MSDOS) || defined(__WIN__) || defined(__EMX__) || defined(OS2)
+#else
+ my_delete(file_name, MYF(0));
+#endif
+ err+= add_space(fptr);
+ err+= add_partition_by(fptr);
+ switch (part_info->part_type)
+ {
+ case RANGE_PARTITION:
+ err+= add_part_key_word(fptr, range_str);
+ break;
+ case LIST_PARTITION:
+ err+= add_part_key_word(fptr, list_str);
+ break;
+ case HASH_PARTITION:
+ if (part_info->linear_hash_ind)
+ err+= add_string(fptr, "LINEAR ");
+ if (part_info->list_of_part_fields)
+ err+= add_key_partition(fptr, part_info->part_field_list);
+ else
+ err+= add_hash(fptr);
+ break;
+ default:
+ DBUG_ASSERT(0);
+ /* We really shouldn't get here, no use in continuing from here */
+ current_thd->fatal_error();
+ DBUG_RETURN(NULL);
+ }
+ if (part_info->part_expr)
+ err+= add_string_len(fptr, part_info->part_func_string,
+ part_info->part_func_len);
+ err+= add_end_parenthesis(fptr);
+ err+= add_space(fptr);
+ if (is_sub_partitioned(part_info))
+ {
+ err+= add_subpartition_by(fptr);
+ /* Must be hash partitioning for subpartitioning */
+ if (part_info->list_of_subpart_fields)
+ err+= add_key_partition(fptr, part_info->subpart_field_list);
+ else
+ err+= add_hash(fptr);
+ if (part_info->subpart_expr)
+ err+= add_string_len(fptr, part_info->subpart_func_string,
+ part_info->subpart_func_len);
+ err+= add_end_parenthesis(fptr);
+ err+= add_space(fptr);
+ }
+ err+= add_begin_parenthesis(fptr);
+ List_iterator<partition_element> part_it(part_info->partitions);
+ no_parts= part_info->no_parts;
+ no_subparts= part_info->no_subparts;
+ i= 0;
+ do
+ {
+ part_elem= part_it++;
+ err+= add_partition(fptr);
+ err+= add_string(fptr, part_elem->partition_name);
+ err+= add_space(fptr);
+ err+= add_partition_values(fptr, part_info, part_elem);
+ if (!is_sub_partitioned(part_info))
+ err+= add_partition_options(fptr, part_elem);
+ if (is_sub_partitioned(part_info))
+ {
+ err+= add_space(fptr);
+ err+= add_begin_parenthesis(fptr);
+ List_iterator<partition_element> sub_it(part_elem->subpartitions);
+ j= 0;
+ do
+ {
+ part_elem= sub_it++;
+ err+= add_subpartition(fptr);
+ err+= add_string(fptr, part_elem->partition_name);
+ err+= add_space(fptr);
+ err+= add_partition_options(fptr, part_elem);
+ if (j != (no_subparts-1))
+ {
+ err+= add_comma(fptr);
+ err+= add_space(fptr);
+ }
+ else
+ err+= add_end_parenthesis(fptr);
+ } while (++j < no_subparts);
+ }
+ if (i != (no_parts-1))
+ {
+ err+= add_comma(fptr);
+ err+= add_space(fptr);
+ }
+ else
+ err+= add_end_parenthesis(fptr);
+ } while (++i < no_parts);
+ if (err)
+ goto close_file;
+ buffer_length= my_seek(fptr, 0L,MY_SEEK_END,MYF(0));
+ if (unlikely(buffer_length == MY_FILEPOS_ERROR))
+ goto close_file;
+ if (unlikely(my_seek(fptr, 0L, MY_SEEK_SET, MYF(0)) == MY_FILEPOS_ERROR))
+ goto close_file;
+ *buf_length= (uint)buffer_length;
+ if (use_sql_alloc)
+ buf= sql_alloc(*buf_length+1);
+ else
+ buf= my_malloc(*buf_length+1, MYF(MY_WME));
+ if (!buf)
+ goto close_file;
+
+ if (unlikely(my_read(fptr, buf, *buf_length, MYF(MY_FNABP))))
+ {
+ if (!use_sql_alloc)
+ my_free(buf, MYF(0));
+ else
+ buf= NULL;
+ }
+ else
+ buf[*buf_length]= 0;
+
+close_file:
+ /*
+ Delete the file before closing to ensure the file doesn't get synched
+ to disk unnecessary. We only used the file system as a dynamic array
+ implementation so we are not really interested in getting the file
+ present on disk.
+ This is not possible on Windows so here it has to be done after closing
+ the file. Also on Unix we delete immediately after opening to ensure no
+ other process can read the information written into the file.
+ */
+ my_close(fptr, MYF(0));
+#if defined(MSDOS) || defined(__WIN__) || defined(__EMX__) || defined(OS2)
+ my_delete(file_name, MYF(0));
+#endif
+ DBUG_RETURN(buf);
+}
+
+
+/*
+ Check if partition key fields are modified and if it can be handled by the
+ underlying storage engine.
+ SYNOPSIS
+ partition_key_modified
+ table TABLE object for which partition fields are set-up
+ fields A list of the to be modifed
+ RETURN VALUES
+ TRUE Need special handling of UPDATE
+ FALSE Normal UPDATE handling is ok
+*/
+
+bool partition_key_modified(TABLE *table, List<Item> &fields)
+{
+ List_iterator_fast<Item> f(fields);
+ partition_info *part_info= table->s->part_info;
+ Item_field *item_field;
+ DBUG_ENTER("partition_key_modified");
+ if (!part_info)
+ DBUG_RETURN(FALSE);
+ if (table->file->partition_flags() & HA_CAN_UPDATE_PARTITION_KEY)
+ DBUG_RETURN(FALSE);
+ f.rewind();
+ while ((item_field=(Item_field*) f++))
+ if (item_field->field->flags & FIELD_IN_PART_FUNC_FLAG)
+ DBUG_RETURN(TRUE);
+ DBUG_RETURN(FALSE);
+}
+
+
+/*
+ The next set of functions are used to calculate the partition identity.
+ A handler sets up a variable that corresponds to one of these functions
+ to be able to quickly call it whenever the partition id needs to calculated
+ based on the record in table->record[0] (or set up to fake that).
+ There are 4 functions for hash partitioning and 2 for RANGE/LIST partitions.
+ In addition there are 4 variants for RANGE subpartitioning and 4 variants
+ for LIST subpartitioning thus in total there are 14 variants of this
+ function.
+
+ We have a set of support functions for these 14 variants. There are 4
+ variants of hash functions and there is a function for each. The KEY
+ partitioning uses the function calculate_key_value to calculate the hash
+ value based on an array of fields. The linear hash variants uses the
+ method get_part_id_from_linear_hash to get the partition id using the
+ hash value and some parameters calculated from the number of partitions.
+*/
+
+/*
+ Calculate hash value for KEY partitioning using an array of fields.
+ SYNOPSIS
+ calculate_key_value()
+ field_array An array of the fields in KEY partitioning
+ RETURN VALUE
+ hash_value calculated
+ DESCRIPTION
+ Uses the hash function on the character set of the field. Integer and
+ floating point fields use the binary character set by default.
+*/
+
+static uint32 calculate_key_value(Field **field_array)
+{
+ uint32 hashnr= 0;
+ ulong nr2= 4;
+ do
+ {
+ Field *field= *field_array;
+ if (field->is_null())
+ {
+ hashnr^= (hashnr << 1) | 1;
+ }
+ else
+ {
+ uint len= field->pack_length();
+ ulong nr1= 1;
+ CHARSET_INFO *cs= field->charset();
+ cs->coll->hash_sort(cs, (uchar*)field->ptr, len, &nr1, &nr2);
+ hashnr^= (uint32)nr1;
+ }
+ } while (*(++field_array));
+ return hashnr;
+}
+
+
+/*
+ A simple support function to calculate part_id given local part and
+ sub part.
+ SYNOPSIS
+ get_part_id_for_sub()
+ loc_part_id Local partition id
+ sub_part_id Subpartition id
+ no_subparts Number of subparts
+*/
+
+inline
+static uint32 get_part_id_for_sub(uint32 loc_part_id, uint32 sub_part_id,
+ uint no_subparts)
+{
+ return (uint32)((loc_part_id * no_subparts) + sub_part_id);
+}
+
+
+/*
+ Calculate part_id for (SUB)PARTITION BY HASH
+ SYNOPSIS
+ get_part_id_hash()
+ no_parts Number of hash partitions
+ part_expr Item tree of hash function
+ RETURN VALUE
+ Calculated partition id
+*/
+
+inline
+static uint32 get_part_id_hash(uint no_parts,
+ Item *part_expr)
+{
+ DBUG_ENTER("get_part_id_hash");
+ DBUG_RETURN((uint32)(part_expr->val_int() % no_parts));
+}
+
+
+/*
+ Calculate part_id for (SUB)PARTITION BY LINEAR HASH
+ SYNOPSIS
+ get_part_id_linear_hash()
+ part_info A reference to the partition_info struct where all the
+ desired information is given
+ no_parts Number of hash partitions
+ part_expr Item tree of hash function
+ RETURN VALUE
+ Calculated partition id
+*/
+
+inline
+static uint32 get_part_id_linear_hash(partition_info *part_info,
+ uint no_parts,
+ Item *part_expr)
+{
+ DBUG_ENTER("get_part_id_linear_hash");
+ DBUG_RETURN(get_part_id_from_linear_hash(part_expr->val_int(),
+ part_info->linear_hash_mask,
+ no_parts));
+}
+
+
+/*
+ Calculate part_id for (SUB)PARTITION BY KEY
+ SYNOPSIS
+ get_part_id_key()
+ field_array Array of fields for PARTTION KEY
+ no_parts Number of KEY partitions
+ RETURN VALUE
+ Calculated partition id
+*/
+
+inline
+static uint32 get_part_id_key(Field **field_array,
+ uint no_parts)
+{
+ DBUG_ENTER("get_part_id_key");
+ DBUG_RETURN(calculate_key_value(field_array) & no_parts);
+}
+
+
+/*
+ Calculate part_id for (SUB)PARTITION BY LINEAR KEY
+ SYNOPSIS
+ get_part_id_linear_key()
+ part_info A reference to the partition_info struct where all the
+ desired information is given
+ field_array Array of fields for PARTTION KEY
+ no_parts Number of KEY partitions
+ RETURN VALUE
+ Calculated partition id
+*/
+
+inline
+static uint32 get_part_id_linear_key(partition_info *part_info,
+ Field **field_array,
+ uint no_parts)
+{
+ DBUG_ENTER("get_partition_id_linear_key");
+ DBUG_RETURN(get_part_id_from_linear_hash(calculate_key_value(field_array),
+ part_info->linear_hash_mask,
+ no_parts));
+}
+
+/*
+ This function is used to calculate the partition id where all partition
+ fields have been prepared to point to a record where the partition field
+ values are bound.
+ SYNOPSIS
+ get_partition_id()
+ part_info A reference to the partition_info struct where all the
+ desired information is given
+ part_id The partition id is returned through this pointer
+ RETURN VALUE
+ part_id
+ return TRUE means that the fields of the partition function didn't fit
+ into any partition and thus the values of the PF-fields are not allowed.
+ DESCRIPTION
+ A routine used from write_row, update_row and delete_row from any
+ handler supporting partitioning. It is also a support routine for
+ get_partition_set used to find the set of partitions needed to scan
+ for a certain index scan or full table scan.
+
+ It is actually 14 different variants of this function which are called
+ through a function pointer.
+
+ get_partition_id_list
+ get_partition_id_range
+ get_partition_id_hash_nosub
+ get_partition_id_key_nosub
+ get_partition_id_linear_hash_nosub
+ get_partition_id_linear_key_nosub
+ get_partition_id_range_sub_hash
+ get_partition_id_range_sub_key
+ get_partition_id_range_sub_linear_hash
+ get_partition_id_range_sub_linear_key
+ get_partition_id_list_sub_hash
+ get_partition_id_list_sub_key
+ get_partition_id_list_sub_linear_hash
+ get_partition_id_list_sub_linear_key
+*/
+
+/*
+ This function is used to calculate the main partition to use in the case of
+ subpartitioning and we don't know enough to get the partition identity in
+ total.
+ SYNOPSIS
+ get_part_partition_id()
+ part_info A reference to the partition_info struct where all the
+ desired information is given
+ part_id The partition id is returned through this pointer
+ RETURN VALUE
+ part_id
+ return TRUE means that the fields of the partition function didn't fit
+ into any partition and thus the values of the PF-fields are not allowed.
+ DESCRIPTION
+
+ It is actually 6 different variants of this function which are called
+ through a function pointer.
+
+ get_partition_id_list
+ get_partition_id_range
+ get_partition_id_hash_nosub
+ get_partition_id_key_nosub
+ get_partition_id_linear_hash_nosub
+ get_partition_id_linear_key_nosub
+*/
+
+
+bool get_partition_id_list(partition_info *part_info,
+ uint32 *part_id)
+{
+ DBUG_ENTER("get_partition_id_list");
+ LIST_PART_ENTRY *list_array= part_info->list_array;
+ uint list_index;
+ longlong list_value;
+ uint min_list_index= 0, max_list_index= part_info->no_list_values - 1;
+ longlong part_func_value= part_info->part_expr->val_int();
+ while (max_list_index >= min_list_index)
+ {
+ list_index= (max_list_index + min_list_index) >> 1;
+ list_value= list_array[list_index].list_value;
+ if (list_value < part_func_value)
+ min_list_index= list_index + 1;
+ else if (list_value > part_func_value)
+ max_list_index= list_index - 1;
+ else {
+ *part_id= (uint32)list_array[list_index].partition_id;
+ DBUG_RETURN(FALSE);
+ }
+ }
+ *part_id= 0;
+ DBUG_RETURN(TRUE);
+}
+
+
+bool get_partition_id_range(partition_info *part_info,
+ uint32 *part_id)
+{
+ DBUG_ENTER("get_partition_id_int_range");
+ longlong *range_array= part_info->range_int_array;
+ uint max_partition= part_info->no_parts - 1;
+ uint min_part_id= 0, max_part_id= max_partition, loc_part_id;
+ longlong part_func_value= part_info->part_expr->val_int();
+ while (max_part_id > min_part_id)
+ {
+ loc_part_id= (max_part_id + min_part_id + 1) >> 1;
+ if (range_array[loc_part_id] < part_func_value)
+ min_part_id= loc_part_id + 1;
+ else
+ max_part_id= loc_part_id - 1;
+ }
+ loc_part_id= max_part_id;
+ if (part_func_value >= range_array[loc_part_id])
+ if (loc_part_id != max_partition)
+ loc_part_id++;
+ *part_id= (uint32)loc_part_id;
+ if (loc_part_id == max_partition)
+ if (range_array[loc_part_id] != LONGLONG_MAX)
+ if (part_func_value >= range_array[loc_part_id])
+ DBUG_RETURN(TRUE);
+ DBUG_RETURN(FALSE);
+}
+
+bool get_partition_id_hash_nosub(partition_info *part_info,
+ uint32 *part_id)
+{
+ *part_id= get_part_id_hash(part_info->no_parts, part_info->part_expr);
+ return FALSE;
+}
+
+
+bool get_partition_id_linear_hash_nosub(partition_info *part_info,
+ uint32 *part_id)
+{
+ *part_id= get_part_id_linear_hash(part_info, part_info->no_parts,
+ part_info->part_expr);
+ return FALSE;
+}
+
+
+bool get_partition_id_key_nosub(partition_info *part_info,
+ uint32 *part_id)
+{
+ *part_id= get_part_id_key(part_info->part_field_array, part_info->no_parts);
+ return FALSE;
+}
+
+
+bool get_partition_id_linear_key_nosub(partition_info *part_info,
+ uint32 *part_id)
+{
+ *part_id= get_part_id_linear_key(part_info,
+ part_info->part_field_array,
+ part_info->no_parts);
+ return FALSE;
+}
+
+
+bool get_partition_id_range_sub_hash(partition_info *part_info,
+ uint32 *part_id)
+{
+ uint32 loc_part_id, sub_part_id;
+ uint no_subparts;
+ DBUG_ENTER("get_partition_id_range_sub_hash");
+ if (unlikely(get_partition_id_range(part_info, &loc_part_id)))
+ {
+ DBUG_RETURN(TRUE);
+ }
+ no_subparts= part_info->no_subparts;
+ sub_part_id= get_part_id_hash(no_subparts, part_info->subpart_expr);
+ *part_id= get_part_id_for_sub(loc_part_id, sub_part_id, no_subparts);
+ DBUG_RETURN(FALSE);
+}
+
+
+bool get_partition_id_range_sub_linear_hash(partition_info *part_info,
+ uint32 *part_id)
+{
+ uint32 loc_part_id, sub_part_id;
+ uint no_subparts;
+ DBUG_ENTER("get_partition_id_range_sub_linear_hash");
+ if (unlikely(get_partition_id_range(part_info, &loc_part_id)))
+ {
+ DBUG_RETURN(TRUE);
+ }
+ no_subparts= part_info->no_subparts;
+ sub_part_id= get_part_id_linear_hash(part_info, no_subparts,
+ part_info->subpart_expr);
+ *part_id= get_part_id_for_sub(loc_part_id, sub_part_id, no_subparts);
+ DBUG_RETURN(FALSE);
+}
+
+
+bool get_partition_id_range_sub_key(partition_info *part_info,
+ uint32 *part_id)
+{
+ uint32 loc_part_id, sub_part_id;
+ uint no_subparts;
+ DBUG_ENTER("get_partition_id_range_sub_key");
+ if (unlikely(get_partition_id_range(part_info, &loc_part_id)))
+ {
+ DBUG_RETURN(TRUE);
+ }
+ no_subparts= part_info->no_subparts;
+ sub_part_id= get_part_id_key(part_info->subpart_field_array, no_subparts);
+ *part_id= get_part_id_for_sub(loc_part_id, sub_part_id, no_subparts);
+ DBUG_RETURN(FALSE);
+}
+
+
+bool get_partition_id_range_sub_linear_key(partition_info *part_info,
+ uint32 *part_id)
+{
+ uint32 loc_part_id, sub_part_id;
+ uint no_subparts;
+ DBUG_ENTER("get_partition_id_range_sub_linear_key");
+ if (unlikely(get_partition_id_range(part_info, &loc_part_id)))
+ {
+ DBUG_RETURN(TRUE);
+ }
+ no_subparts= part_info->no_subparts;
+ sub_part_id= get_part_id_linear_key(part_info,
+ part_info->subpart_field_array,
+ no_subparts);
+ *part_id= get_part_id_for_sub(loc_part_id, sub_part_id, no_subparts);
+ DBUG_RETURN(FALSE);
+}
+
+
+bool get_partition_id_list_sub_hash(partition_info *part_info,
+ uint32 *part_id)
+{
+ uint32 loc_part_id, sub_part_id;
+ uint no_subparts;
+ DBUG_ENTER("get_partition_id_list_sub_hash");
+ if (unlikely(get_partition_id_list(part_info, &loc_part_id)))
+ {
+ DBUG_RETURN(TRUE);
+ }
+ no_subparts= part_info->no_subparts;
+ sub_part_id= get_part_id_hash(no_subparts, part_info->subpart_expr);
+ *part_id= get_part_id_for_sub(loc_part_id, sub_part_id, no_subparts);
+ DBUG_RETURN(FALSE);
+}
+
+
+bool get_partition_id_list_sub_linear_hash(partition_info *part_info,
+ uint32 *part_id)
+{
+ uint32 loc_part_id, sub_part_id;
+ uint no_subparts;
+ DBUG_ENTER("get_partition_id_list_sub_linear_hash");
+ if (unlikely(get_partition_id_list(part_info, &loc_part_id)))
+ {
+ DBUG_RETURN(TRUE);
+ }
+ no_subparts= part_info->no_subparts;
+ sub_part_id= get_part_id_hash(no_subparts, part_info->subpart_expr);
+ *part_id= get_part_id_for_sub(loc_part_id, sub_part_id, no_subparts);
+ DBUG_RETURN(FALSE);
+}
+
+
+bool get_partition_id_list_sub_key(partition_info *part_info,
+ uint32 *part_id)
+{
+ uint32 loc_part_id, sub_part_id;
+ uint no_subparts;
+ DBUG_ENTER("get_partition_id_range_sub_key");
+ if (unlikely(get_partition_id_list(part_info, &loc_part_id)))
+ {
+ DBUG_RETURN(TRUE);
+ }
+ no_subparts= part_info->no_subparts;
+ sub_part_id= get_part_id_key(part_info->subpart_field_array, no_subparts);
+ *part_id= get_part_id_for_sub(loc_part_id, sub_part_id, no_subparts);
+ DBUG_RETURN(FALSE);
+}
+
+
+bool get_partition_id_list_sub_linear_key(partition_info *part_info,
+ uint32 *part_id)
+{
+ uint32 loc_part_id, sub_part_id;
+ uint no_subparts;
+ DBUG_ENTER("get_partition_id_list_sub_linear_key");
+ if (unlikely(get_partition_id_list(part_info, &loc_part_id)))
+ {
+ DBUG_RETURN(TRUE);
+ }
+ no_subparts= part_info->no_subparts;
+ sub_part_id= get_part_id_linear_key(part_info,
+ part_info->subpart_field_array,
+ no_subparts);
+ *part_id= get_part_id_for_sub(loc_part_id, sub_part_id, no_subparts);
+ DBUG_RETURN(FALSE);
+}
+
+
+/*
+ This function is used to calculate the subpartition id
+ SYNOPSIS
+ get_subpartition_id()
+ part_info A reference to the partition_info struct where all the
+ desired information is given
+ RETURN VALUE
+ part_id
+ The subpartition identity
+ DESCRIPTION
+ A routine used in some SELECT's when only partial knowledge of the
+ partitions is known.
+
+ It is actually 4 different variants of this function which are called
+ through a function pointer.
+
+ get_partition_id_hash_sub
+ get_partition_id_key_sub
+ get_partition_id_linear_hash_sub
+ get_partition_id_linear_key_sub
+*/
+
+uint32 get_partition_id_hash_sub(partition_info *part_info)
+{
+ return get_part_id_hash(part_info->no_subparts, part_info->subpart_expr);
+}
+
+
+uint32 get_partition_id_linear_hash_sub(partition_info *part_info)
+{
+ return get_part_id_linear_hash(part_info, part_info->no_subparts,
+ part_info->subpart_expr);
+}
+
+
+uint32 get_partition_id_key_sub(partition_info *part_info)
+{
+ return get_part_id_key(part_info->subpart_field_array,
+ part_info->no_subparts);
+}
+
+
+uint32 get_partition_id_linear_key_sub(partition_info *part_info)
+{
+ return get_part_id_linear_key(part_info,
+ part_info->subpart_field_array,
+ part_info->no_subparts);
+}
+
+
+/*
+ Set an indicator on all partition fields that are set by the key
+ SYNOPSIS
+ set_PF_fields_in_key()
+ key_info Information about the index
+ key_length Length of key
+ RETURN VALUE
+ TRUE Found partition field set by key
+ FALSE No partition field set by key
+*/
+
+static bool set_PF_fields_in_key(KEY *key_info, uint key_length)
+{
+ KEY_PART_INFO *key_part;
+ bool found_part_field= FALSE;
+ DBUG_ENTER("set_PF_fields_in_key");
+
+ for (key_part= key_info->key_part; (int)key_length > 0; key_part++)
+ {
+ if (key_part->null_bit)
+ key_length--;
+ if (key_part->type == HA_KEYTYPE_BIT)
+ {
+ if (((Field_bit*)key_part->field)->bit_len)
+ key_length--;
+ }
+ if (key_part->key_part_flag & (HA_BLOB_PART + HA_VAR_LENGTH_PART))
+ {
+ key_length-= HA_KEY_BLOB_LENGTH;
+ }
+ if (key_length < key_part->length)
+ break;
+ key_length-= key_part->length;
+ if (key_part->field->flags & FIELD_IN_PART_FUNC_FLAG)
+ {
+ found_part_field= TRUE;
+ key_part->field->flags|= GET_FIXED_FIELDS_FLAG;
+ }
+ }
+ DBUG_RETURN(found_part_field);
+}
+
+
+/*
+ We have found that at least one partition field was set by a key, now
+ check if a partition function has all its fields bound or not.
+ SYNOPSIS
+ check_part_func_bound()
+ ptr Array of fields NULL terminated (partition fields)
+ RETURN VALUE
+ TRUE All fields in partition function are set
+ FALSE Not all fields in partition function are set
+*/
+
+static bool check_part_func_bound(Field **ptr)
+{
+ bool result= TRUE;
+ DBUG_ENTER("check_part_func_bound");
+
+ for (; *ptr; ptr++)
+ {
+ if (!((*ptr)->flags & GET_FIXED_FIELDS_FLAG))
+ {
+ result= FALSE;
+ break;
+ }
+ }
+ DBUG_RETURN(result);
+}
+
+
+/*
+ Get the id of the subpartitioning part by using the key buffer of the
+ index scan.
+ SYNOPSIS
+ get_sub_part_id_from_key()
+ table The table object
+ buf A buffer that can be used to evaluate the partition function
+ key_info The index object
+ key_spec A key_range containing key and key length
+ RETURN VALUES
+ part_id Subpartition id to use
+ DESCRIPTION
+ Use key buffer to set-up record in buf, move field pointers and
+ get the partition identity and restore field pointers afterwards.
+*/
+
+static uint32 get_sub_part_id_from_key(const TABLE *table,byte *buf,
+ KEY *key_info,
+ const key_range *key_spec)
+{
+ byte *rec0= table->record[0];
+ partition_info *part_info= table->s->part_info;
+ uint32 part_id;
+ DBUG_ENTER("get_sub_part_id_from_key");
+
+ key_restore(buf, (byte*)key_spec->key, key_info, key_spec->length);
+ if (likely(rec0 == buf))
+ part_id= part_info->get_subpartition_id(part_info);
+ else
+ {
+ Field **part_field_array= part_info->subpart_field_array;
+ set_field_ptr(part_field_array, buf, rec0);
+ part_id= part_info->get_subpartition_id(part_info);
+ set_field_ptr(part_field_array, rec0, buf);
+ }
+ DBUG_RETURN(part_id);
+}
+
+/*
+ Get the id of the partitioning part by using the key buffer of the
+ index scan.
+ SYNOPSIS
+ get_part_id_from_key()
+ table The table object
+ buf A buffer that can be used to evaluate the partition function
+ key_info The index object
+ key_spec A key_range containing key and key length
+ part_id Partition to use
+ RETURN VALUES
+ TRUE Partition to use not found
+ FALSE Ok, part_id indicates partition to use
+ DESCRIPTION
+ Use key buffer to set-up record in buf, move field pointers and
+ get the partition identity and restore field pointers afterwards.
+*/
+bool get_part_id_from_key(const TABLE *table, byte *buf, KEY *key_info,
+ const key_range *key_spec, uint32 *part_id)
+{
+ bool result;
+ byte *rec0= table->record[0];
+ partition_info *part_info= table->s->part_info;
+ DBUG_ENTER("get_part_id_from_key");
+
+ key_restore(buf, (byte*)key_spec->key, key_info, key_spec->length);
+ if (likely(rec0 == buf))
+ result= part_info->get_part_partition_id(part_info, part_id);
+ else
+ {
+ Field **part_field_array= part_info->part_field_array;
+ set_field_ptr(part_field_array, buf, rec0);
+ result= part_info->get_part_partition_id(part_info, part_id);
+ set_field_ptr(part_field_array, rec0, buf);
+ }
+ DBUG_RETURN(result);
+}
+
+/*
+ Get the partitioning id of the full PF by using the key buffer of the
+ index scan.
+ SYNOPSIS
+ get_full_part_id_from_key()
+ table The table object
+ buf A buffer that is used to evaluate the partition function
+ key_info The index object
+ key_spec A key_range containing key and key length
+ part_spec A partition id containing start part and end part
+ RETURN VALUES
+ part_spec
+ No partitions to scan is indicated by end_part > start_part when returning
+ DESCRIPTION
+ Use key buffer to set-up record in buf, move field pointers if needed and
+ get the partition identity and restore field pointers afterwards.
+*/
+
+void get_full_part_id_from_key(const TABLE *table, byte *buf,
+ KEY *key_info,
+ const key_range *key_spec,
+ part_id_range *part_spec)
+{
+ bool result;
+ partition_info *part_info= table->s->part_info;
+ byte *rec0= table->record[0];
+ DBUG_ENTER("get_full_part_id_from_key");
+
+ key_restore(buf, (byte*)key_spec->key, key_info, key_spec->length);
+ if (likely(rec0 == buf))
+ result= part_info->get_partition_id(part_info, &part_spec->start_part);
+ else
+ {
+ Field **part_field_array= part_info->full_part_field_array;
+ set_field_ptr(part_field_array, buf, rec0);
+ result= part_info->get_partition_id(part_info, &part_spec->start_part);
+ set_field_ptr(part_field_array, rec0, buf);
+ }
+ part_spec->end_part= part_spec->start_part;
+ if (unlikely(result))
+ part_spec->start_part++;
+ DBUG_VOID_RETURN;
+}
+
+/*
+ Get the set of partitions to use in query.
+ SYNOPSIS
+ get_partition_set()
+ table The table object
+ buf A buffer that can be used to evaluate the partition function
+ index The index of the key used, if MAX_KEY no index used
+ key_spec A key_range containing key and key length
+ part_spec Contains start part, end part and indicator if bitmap is
+ used for which partitions to scan
+ DESCRIPTION
+ This function is called to discover which partitions to use in an index
+ scan or a full table scan.
+ It returns a range of partitions to scan. If there are holes in this
+ range with partitions that are not needed to scan a bit array is used
+ to signal which partitions to use and which not to use.
+ If start_part > end_part at return it means no partition needs to be
+ scanned. If start_part == end_part it always means a single partition
+ needs to be scanned.
+ RETURN VALUE
+ part_spec
+*/
+void get_partition_set(const TABLE *table, byte *buf, const uint index,
+ const key_range *key_spec, part_id_range *part_spec)
+{
+ partition_info *part_info= table->s->part_info;
+ uint no_parts= get_tot_partitions(part_info), i, part_id;
+ uint sub_part= no_parts, part_part= no_parts;
+ KEY *key_info= NULL;
+ bool found_part_field= FALSE;
+ DBUG_ENTER("get_partition_set");
+
+ part_spec->use_bit_array= FALSE;
+ part_spec->start_part= 0;
+ part_spec->end_part= no_parts - 1;
+ if ((index < MAX_KEY) &&
+ key_spec->flag == (uint)HA_READ_KEY_EXACT &&
+ part_info->some_fields_in_PF.is_set(index))
+ {
+ key_info= table->key_info+index;
+ /*
+ The index can potentially provide at least one PF-field (field in the
+ partition function). Thus it is interesting to continue our probe.
+ */
+ if (key_spec->length == key_info->key_length)
+ {
+ /*
+ The entire key is set so we can check whether we can immediately
+ derive either the complete PF or if we can derive either
+ the top PF or the subpartitioning PF. This can be established by
+ checking precalculated bits on each index.
+ */
+ if (part_info->all_fields_in_PF.is_set(index))
+ {
+ /*
+ We can derive the exact partition to use, no more than this one
+ is needed.
+ */
+ get_full_part_id_from_key(table,buf,key_info,key_spec,part_spec);
+ DBUG_VOID_RETURN;
+ }
+ else if (is_sub_partitioned(part_info))
+ {
+ if (part_info->all_fields_in_SPF.is_set(index))
+ sub_part= get_sub_part_id_from_key(table, buf, key_info, key_spec);
+ else if (part_info->all_fields_in_PPF.is_set(index))
+ {
+ if (get_part_id_from_key(table,buf,key_info,key_spec,&part_part))
+ {
+ /*
+ The value of the RANGE or LIST partitioning was outside of
+ allowed values. Thus it is certain that the result of this
+ scan will be empty.
+ */
+ part_spec->start_part= no_parts;
+ DBUG_VOID_RETURN;
+ }
+ }
+ }
+ }
+ else
+ {
+ /*
+ Set an indicator on all partition fields that are bound.
+ If at least one PF-field was bound it pays off to check whether
+ the PF or PPF or SPF has been bound.
+ (PF = Partition Function, SPF = Subpartition Function and
+ PPF = Partition Function part of subpartitioning)
+ */
+ if ((found_part_field= set_PF_fields_in_key(key_info,
+ key_spec->length)))
+ {
+ if (check_part_func_bound(part_info->full_part_field_array))
+ {
+ /*
+ We were able to bind all fields in the partition function even
+ by using only a part of the key. Calculate the partition to use.
+ */
+ get_full_part_id_from_key(table,buf,key_info,key_spec,part_spec);
+ clear_indicator_in_key_fields(key_info);
+ DBUG_VOID_RETURN;
+ }
+ else if (check_part_func_bound(part_info->part_field_array))
+ sub_part= get_sub_part_id_from_key(table, buf, key_info, key_spec);
+ else if (check_part_func_bound(part_info->subpart_field_array))
+ {
+ if (get_part_id_from_key(table,buf,key_info,key_spec,&part_part))
+ {
+ part_spec->start_part= no_parts;
+ clear_indicator_in_key_fields(key_info);
+ DBUG_VOID_RETURN;
+ }
+ }
+ }
+ }
+ }
+ {
+ /*
+ The next step is to analyse the table condition to see whether any
+ information about which partitions to scan can be derived from there.
+ Currently not implemented.
+ */
+ }
+ /*
+ If we come here we have found a range of sorts we have either discovered
+ nothing or we have discovered a range of partitions with possible holes
+ in it. We need a bitvector to further the work here.
+ */
+ if (!(part_part == no_parts && sub_part == no_parts))
+ {
+ /*
+ We can only arrive here if we are using subpartitioning.
+ */
+ if (part_part != no_parts)
+ {
+ /*
+ We know the top partition and need to scan all underlying
+ subpartitions. This is a range without holes.
+ */
+ DBUG_ASSERT(sub_part == no_parts);
+ part_spec->start_part= part_part * part_info->no_parts;
+ part_spec->end_part= part_spec->start_part+part_info->no_subparts - 1;
+ }
+ else
+ {
+ DBUG_ASSERT(sub_part != no_parts);
+ part_spec->use_bit_array= TRUE;
+ part_spec->start_part= sub_part;
+ part_spec->end_part=sub_part+
+ (part_info->no_subparts*(part_info->no_parts-1));
+ for (i= 0, part_id= sub_part; i < part_info->no_parts;
+ i++, part_id+= part_info->no_subparts)
+ ; //Set bit part_id in bit array
+ }
+ }
+ if (found_part_field)
+ clear_indicator_in_key_fields(key_info);
+ DBUG_VOID_RETURN;
+}
+
+
+/*
+ If the table is partitioned we will read the partition info into the
+ .frm file here.
+ -------------------------------
+ | Fileinfo 64 bytes |
+ -------------------------------
+ | Formnames 7 bytes |
+ -------------------------------
+ | Not used 4021 bytes |
+ -------------------------------
+ | Keyinfo + record |
+ -------------------------------
+ | Padded to next multiple |
+ | of IO_SIZE |
+ -------------------------------
+ | Forminfo 288 bytes |
+ -------------------------------
+ | Screen buffer, to make |
+ | field names readable |
+ -------------------------------
+ | Packed field info |
+ | 17 + 1 + strlen(field_name) |
+ | + 1 end of file character |
+ -------------------------------
+ | Partition info |
+ -------------------------------
+ We provide the length of partition length in Fileinfo[55-58].
+
+ Read the partition syntax from the frm file and parse it to get the
+ data structures of the partitioning.
+ SYNOPSIS
+ mysql_unpack_partition()
+ file File reference of frm file
+ thd Thread object
+ part_info_len Length of partition syntax
+ table Table object of partitioned table
+ RETURN VALUE
+ TRUE Error
+ FALSE Sucess
+ DESCRIPTION
+ Read the partition syntax from the current position in the frm file.
+ Initiate a LEX object, save the list of item tree objects to free after
+ the query is done. Set-up partition info object such that parser knows
+ it is called from internally. Call parser to create data structures
+ (best possible recreation of item trees and so forth since there is no
+ serialisation of these objects other than in parseable text format).
+ We need to save the text of the partition functions since it is not
+ possible to retrace this given an item tree.
+*/
+
+bool mysql_unpack_partition(File file, THD *thd, uint part_info_len,
+ TABLE* table)
+{
+ Item *thd_free_list= thd->free_list;
+ bool result= TRUE;
+ uchar* part_buf= NULL;
+ partition_info *part_info;
+ LEX *old_lex= thd->lex, lex;
+ DBUG_ENTER("mysql_unpack_partition");
+ if (read_string(file, (gptr*)&part_buf, part_info_len))
+ DBUG_RETURN(result);
+ thd->lex= &lex;
+ lex_start(thd, part_buf, part_info_len);
+ /*
+ We need to use the current SELECT_LEX since I need to keep the
+ Name_resolution_context object which is referenced from the
+ Item_field objects.
+ This is not a nice solution since if the parser uses current_select
+ for anything else it will corrupt the current LEX object.
+ */
+ thd->lex->current_select= old_lex->current_select;
+ /*
+ All Items created is put into a free list on the THD object. This list
+ is used to free all Item objects after completing a query. We don't
+ want that to happen with the Item tree created as part of the partition
+ info. This should be attached to the table object and remain so until
+ the table object is released.
+ Thus we move away the current list temporarily and start a new list that
+ we then save in the partition info structure.
+ */
+ thd->free_list= NULL;
+ lex.part_info= (partition_info*)1; //Indicate yyparse from this place
+ if (yyparse((void*)thd) || thd->is_fatal_error)
+ {
+ free_items(thd->free_list);
+ goto end;
+ }
+ part_info= lex.part_info;
+ table->s->part_info= part_info;
+ part_info->item_free_list= thd->free_list;
+
+ {
+ /*
+ This code part allocates memory for the serialised item information for
+ the partition functions. In most cases this is not needed but if the
+ table is used for SHOW CREATE TABLES or ALTER TABLE that modifies
+ partition information it is needed and the info is lost if we don't
+ save it here so unfortunately we have to do it here even if in most
+ cases it is not needed. This is a consequence of that item trees are
+ not serialisable.
+ */
+ uint part_func_len= part_info->part_func_len;
+ uint subpart_func_len= part_info->subpart_func_len;
+ char *part_func_string, *subpart_func_string= NULL;
+ if (!((part_func_string= sql_alloc(part_func_len))) ||
+ (subpart_func_len &&
+ !((subpart_func_string= sql_alloc(subpart_func_len)))))
+ {
+ my_error(ER_OUTOFMEMORY, MYF(0), part_func_len);
+ free_items(thd->free_list);
+ part_info->item_free_list= 0;
+ goto end;
+ }
+ memcpy(part_func_string, part_info->part_func_string, part_func_len);
+ if (subpart_func_len)
+ memcpy(subpart_func_string, part_info->subpart_func_string,
+ subpart_func_len);
+ part_info->part_func_string= part_func_string;
+ part_info->subpart_func_string= subpart_func_string;
+ }
+
+ result= FALSE;
+end:
+ thd->free_list= thd_free_list;
+ x_free((gptr)part_buf);
+ thd->lex= old_lex;
+ DBUG_RETURN(result);
+}
+#endif
+
+/*
+ Prepare for calling val_int on partition function by setting fields to
+ point to the record where the values of the PF-fields are stored.
+ SYNOPSIS
+ set_field_ptr()
+ ptr Array of fields to change ptr
+ new_buf New record pointer
+ old_buf Old record pointer
+ DESCRIPTION
+ Set ptr in field objects of field array to refer to new_buf record
+ instead of previously old_buf. Used before calling val_int and after
+ it is used to restore pointers to table->record[0].
+ This routine is placed outside of partition code since it can be useful
+ also for other programs.
+*/
+
+void set_field_ptr(Field **ptr, const byte *new_buf,
+ const byte *old_buf)
+{
+ my_ptrdiff_t diff= (new_buf - old_buf);
+ DBUG_ENTER("set_nullable_field_ptr");
+
+ do
+ {
+ (*ptr)->move_field(diff);
+ } while (*(++ptr));
+ DBUG_VOID_RETURN;
+}
+
+
+/*
+ Prepare for calling val_int on partition function by setting fields to
+ point to the record where the values of the PF-fields are stored.
+ This variant works on a key_part reference.
+ It is not required that all fields are NOT NULL fields.
+ SYNOPSIS
+ set_key_field_ptr()
+ key_part key part with a set of fields to change ptr
+ new_buf New record pointer
+ old_buf Old record pointer
+ DESCRIPTION
+ Set ptr in field objects of field array to refer to new_buf record
+ instead of previously old_buf. Used before calling val_int and after
+ it is used to restore pointers to table->record[0].
+ This routine is placed outside of partition code since it can be useful
+ also for other programs.
+*/
+
+void set_key_field_ptr(KEY *key_info, const byte *new_buf,
+ const byte *old_buf)
+{
+ KEY_PART_INFO *key_part= key_info->key_part;
+ uint key_parts= key_info->key_parts, i= 0;
+ my_ptrdiff_t diff= (new_buf - old_buf);
+ DBUG_ENTER("set_key_field_ptr");
+
+ do
+ {
+ key_part->field->move_field(diff);
+ key_part++;
+ } while (++i < key_parts);
+ DBUG_VOID_RETURN;
+}
+
diff --git a/sql/sql_prepare.cc b/sql/sql_prepare.cc
index 879ea626494..73bbce647bd 100644
--- a/sql/sql_prepare.cc
+++ b/sql/sql_prepare.cc
@@ -2686,11 +2686,11 @@ bool Prepared_statement::prepare(const char *packet, uint packet_len)
old_stmt_arena= thd->stmt_arena;
thd->stmt_arena= this;
lex_start(thd, (uchar*) thd->query, thd->query_length);
- lex->safe_to_cache_query= FALSE;
lex->stmt_prepare_mode= TRUE;
rc= yyparse((void *)thd) || thd->is_fatal_error ||
thd->net.report_error || init_param_array(this);
+ lex->safe_to_cache_query= FALSE;
/*
While doing context analysis of the query (in check_prepared_statement)
we allocate a lot of additional memory: for open tables, JOINs, derived
diff --git a/sql/sql_repl.cc b/sql/sql_repl.cc
index 32a8378d41d..d376423e990 100644
--- a/sql/sql_repl.cc
+++ b/sql/sql_repl.cc
@@ -19,6 +19,7 @@
#include "sql_repl.h"
#include "log_event.h"
+#include "rpl_filter.h"
#include <my_dir.h>
int max_binlog_dump_events = 0; // unlimited
@@ -1455,8 +1456,8 @@ bool show_binlog_info(THD* thd)
int dir_len = dirname_length(li.log_file_name);
protocol->store(li.log_file_name + dir_len, &my_charset_bin);
protocol->store((ulonglong) li.pos);
- protocol->store(&binlog_do_db);
- protocol->store(&binlog_ignore_db);
+ protocol->store(binlog_filter->get_do_db());
+ protocol->store(binlog_filter->get_ignore_db());
if (protocol->write())
DBUG_RETURN(TRUE);
}
diff --git a/sql/sql_repl.h b/sql/sql_repl.h
index 9eb6456ee20..ba64e626adc 100644
--- a/sql/sql_repl.h
+++ b/sql/sql_repl.h
@@ -31,7 +31,6 @@ typedef struct st_slave_info
extern my_bool opt_show_slave_auth_info;
extern char *master_host, *master_info_file;
extern bool server_id_supplied;
-extern I_List<i_string> binlog_do_db, binlog_ignore_db;
extern int max_binlog_dump_events;
extern my_bool opt_sporadic_binlog_dump_fail;
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index 04969b37012..0fc43345c04 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -941,23 +941,19 @@ JOIN::optimize()
}
/*
- Need to tell Innobase that to play it safe, it should fetch all
- columns of the tables: this is because MySQL may build row
- pointers for the rows, and for all columns of the primary key the
- field->query_id has not necessarily been set to thd->query_id by
- MySQL.
+ Need to tell handlers that to play it safe, it should fetch all
+ columns of the primary key of the tables: this is because MySQL may
+ build row pointers for the rows, and for all columns of the primary key
+ the read set has not necessarily been set by the server code.
*/
-
-#ifdef HAVE_INNOBASE_DB
if (need_tmp || select_distinct || group_list || order)
{
for (uint i_h = const_tables; i_h < tables; i_h++)
{
TABLE* table_h = join_tab[i_h].table;
- table_h->file->extra(HA_EXTRA_RETRIEVE_PRIMARY_KEY);
+ table_h->file->ha_retrieve_all_pk();
}
}
-#endif
DBUG_EXECUTE("info",TEST_join(this););
@@ -1301,6 +1297,9 @@ JOIN::exec()
/* Copy data to the temporary table */
thd->proc_info= "Copying to tmp table";
DBUG_PRINT("info", ("%s", thd->proc_info));
+ if (!curr_join->sort_and_group &&
+ curr_join->const_tables != curr_join->tables)
+ curr_join->join_tab[curr_join->const_tables].sorted= 0;
if ((tmp_error= do_select(curr_join, (List<Item> *) 0, curr_tmp_table, 0)))
{
error= tmp_error;
@@ -1447,6 +1446,9 @@ JOIN::exec()
1, TRUE))
DBUG_VOID_RETURN;
curr_join->group_list= 0;
+ if (!curr_join->sort_and_group &&
+ curr_join->const_tables != curr_join->tables)
+ curr_join->join_tab[curr_join->const_tables].sorted= 0;
if (setup_sum_funcs(curr_join->thd, curr_join->sum_funcs) ||
(tmp_error= do_select(curr_join, (List<Item> *) 0, curr_tmp_table,
0)))
@@ -1633,6 +1635,16 @@ JOIN::exec()
(select_options & OPTION_FOUND_ROWS ?
HA_POS_ERROR : unit->select_limit_cnt)))
DBUG_VOID_RETURN;
+ if (curr_join->const_tables != curr_join->tables &&
+ !curr_join->join_tab[curr_join->const_tables].table->sort.io_cache)
+ {
+ /*
+ If no IO cache exists for the first table then we are using an
+ INDEX SCAN and no filesort. Thus we should not remove the sorted
+ attribute on the INDEX SCAN.
+ */
+ skip_sort_order= 1;
+ }
}
}
/* XXX: When can we have here thd->net.report_error not zero? */
@@ -5728,6 +5740,7 @@ make_join_readinfo(JOIN *join, uint options)
uint i;
bool statistics= test(!(join->select_options & SELECT_DESCRIBE));
+ bool sorted= 1;
DBUG_ENTER("make_join_readinfo");
for (i=join->const_tables ; i < join->tables ; i++)
@@ -5737,6 +5750,8 @@ make_join_readinfo(JOIN *join, uint options)
tab->read_record.table= table;
tab->read_record.file=table->file;
tab->next_select=sub_select; /* normal select */
+ tab->sorted= sorted;
+ sorted= 0; // only first must be sorted
switch (tab->type) {
case JT_SYSTEM: // Only happens with left join
table->status=STATUS_NO_RECORD;
@@ -8143,7 +8158,7 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
uint hidden_null_count, hidden_null_pack_length, hidden_field_count;
uint blob_count,group_null_items, string_count;
uint temp_pool_slot=MY_BIT_NONE;
- ulong reclength, string_total_length;
+ ulong reclength, string_total_length, fieldnr= 0;
bool using_unique_constraint= 0;
bool use_packed_rows= 0;
bool not_all_columns= !(select_options & TMP_TABLE_ALL_COLUMNS);
@@ -8166,7 +8181,7 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
statistic_increment(thd->status_var.created_tmp_tables, &LOCK_status);
if (use_temp_pool)
- temp_pool_slot = bitmap_set_next(&temp_pool);
+ temp_pool_slot = bitmap_lock_set_next(&temp_pool);
if (temp_pool_slot != MY_BIT_NONE) // we got a slot
sprintf(path, "%s_%lx_%i", tmp_file_prefix,
@@ -8218,12 +8233,12 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
param->group_length : 0,
NullS))
{
- bitmap_clear_bit(&temp_pool, temp_pool_slot);
+ bitmap_lock_clear_bit(&temp_pool, temp_pool_slot);
DBUG_RETURN(NULL); /* purecov: inspected */
}
if (!(param->copy_field=copy=new Copy_field[field_count]))
{
- bitmap_clear_bit(&temp_pool, temp_pool_slot);
+ bitmap_lock_clear_bit(&temp_pool, temp_pool_slot);
my_free((gptr) table,MYF(0)); /* purecov: inspected */
DBUG_RETURN(NULL); /* purecov: inspected */
}
@@ -8254,6 +8269,7 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
table->s->tmp_table= TMP_TABLE;
table->s->db_low_byte_first=1; // True for HEAP and MyISAM
table->s->table_charset= param->table_charset;
+ table->s->primary_key= MAX_KEY; //Indicate no primary key
table->s->keys_for_keyread.init();
table->s->keys_in_use.init();
/* For easier error reporting */
@@ -8329,6 +8345,7 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
(*argp)->maybe_null=1;
}
new_field->query_id= thd->query_id;
+ new_field->fieldnr= ++fieldnr;
}
}
}
@@ -8376,6 +8393,7 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
new_field->flags|= GROUP_FLAG;
}
new_field->query_id= thd->query_id;
+ new_field->fieldnr= ++fieldnr;
new_field->field_index= (uint) (reg_field - table->field);
*(reg_field++) =new_field;
}
@@ -8385,6 +8403,7 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
DBUG_ASSERT(field_count >= (uint) (reg_field - table->field));
field_count= (uint) (reg_field - table->field);
*blob_field= 0; // End marker
+ table->s->fields= field_count;
/* If result table is small; use a heap */
if (blob_count || using_unique_constraint ||
@@ -8401,7 +8420,11 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
{
table->file=get_new_handler(table,table->s->db_type= DB_TYPE_HEAP);
}
-
+ if (table->s->fields)
+ {
+ table->file->ha_set_all_bits_in_read_set();
+ table->file->ha_set_all_bits_in_write_set();
+ }
if (!using_unique_constraint)
reclength+= group_null_items; // null flag is stored separately
@@ -8427,7 +8450,6 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
string_total_length / string_count >= AVG_STRING_LENGTH_TO_PACK_ROWS))
use_packed_rows= 1;
- table->s->fields= field_count;
table->s->reclength= reclength;
{
uint alloc_length=ALIGN_SIZE(reclength+MI_UNIQUE_HASH_LENGTH+1);
@@ -8672,7 +8694,7 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
err:
free_tmp_table(thd,table); /* purecov: inspected */
- bitmap_clear_bit(&temp_pool, temp_pool_slot);
+ bitmap_lock_clear_bit(&temp_pool, temp_pool_slot);
DBUG_RETURN(NULL); /* purecov: inspected */
}
@@ -8946,7 +8968,7 @@ free_tmp_table(THD *thd, TABLE *entry)
my_free((gptr) entry->record[0],MYF(0));
free_io_cache(entry);
- bitmap_clear_bit(&temp_pool, entry->temp_pool_slot);
+ bitmap_lock_clear_bit(&temp_pool, entry->temp_pool_slot);
my_free((gptr) entry,MYF(0));
thd->proc_info=save_proc_info;
@@ -9007,7 +9029,12 @@ bool create_myisam_from_heap(THD *thd, TABLE *table, TMP_TABLE_PARAM *param,
new_table.file->extra(HA_EXTRA_WRITE_CACHE);
#endif
- /* copy all old rows */
+ /*
+ copy all old rows from heap table to MyISAM table
+ This is the only code that uses record[1] to read/write but this
+ is safe as this is a temporary MyISAM table without timestamp/autoincrement
+ or partitioning.
+ */
while (!table->file->rnd_next(new_table.record[1]))
{
if ((write_err=new_table.file->write_row(new_table.record[1])))
@@ -9042,8 +9069,8 @@ bool create_myisam_from_heap(THD *thd, TABLE *table, TMP_TABLE_PARAM *param,
(void) new_table.file->close();
err1:
new_table.file->delete_table(new_table.s->table_name);
- delete new_table.file;
err2:
+ delete new_table.file;
thd->proc_info=save_proc_info;
DBUG_RETURN(1);
}
@@ -9138,7 +9165,7 @@ do_select(JOIN *join,List<Item> *fields,TABLE *table,Procedure *procedure)
empty_record(table);
if (table->group && join->tmp_table_param.sum_func_count &&
table->s->keys && !table->file->inited)
- table->file->ha_index_init(0);
+ table->file->ha_index_init(0, 0);
}
/* Set up select_end */
join->join_tab[join->tables-1].next_select= setup_end_select_func(join);
@@ -9837,7 +9864,7 @@ join_read_const(JOIN_TAB *tab)
table->status= STATUS_NOT_FOUND;
mark_as_null_row(tab->table);
empty_record(table);
- if (error != HA_ERR_KEY_NOT_FOUND)
+ if (error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE)
return report_error(table, error);
return -1;
}
@@ -9860,7 +9887,9 @@ join_read_key(JOIN_TAB *tab)
TABLE *table= tab->table;
if (!table->file->inited)
- table->file->ha_index_init(tab->ref.key);
+ {
+ table->file->ha_index_init(tab->ref.key, tab->sorted);
+ }
if (cmp_buffer_with_ref(tab) ||
(table->status & (STATUS_GARBAGE | STATUS_NO_PARENT | STATUS_NULL_ROW)))
{
@@ -9872,7 +9901,7 @@ join_read_key(JOIN_TAB *tab)
error=table->file->index_read(table->record[0],
tab->ref.key_buff,
tab->ref.key_length,HA_READ_KEY_EXACT);
- if (error && error != HA_ERR_KEY_NOT_FOUND)
+ if (error && error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE)
return report_error(table, error);
}
table->null_row=0;
@@ -9892,14 +9921,16 @@ join_read_always_key(JOIN_TAB *tab)
return -1;
}
if (!table->file->inited)
- table->file->ha_index_init(tab->ref.key);
+ {
+ table->file->ha_index_init(tab->ref.key, tab->sorted);
+ }
if (cp_buffer_from_ref(tab->join->thd, &tab->ref))
return -1;
if ((error=table->file->index_read(table->record[0],
tab->ref.key_buff,
tab->ref.key_length,HA_READ_KEY_EXACT)))
{
- if (error != HA_ERR_KEY_NOT_FOUND)
+ if (error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE)
return report_error(table, error);
return -1; /* purecov: inspected */
}
@@ -9919,14 +9950,14 @@ join_read_last_key(JOIN_TAB *tab)
TABLE *table= tab->table;
if (!table->file->inited)
- table->file->ha_index_init(tab->ref.key);
+ table->file->ha_index_init(tab->ref.key, tab->sorted);
if (cp_buffer_from_ref(tab->join->thd, &tab->ref))
return -1;
if ((error=table->file->index_read_last(table->record[0],
tab->ref.key_buff,
tab->ref.key_length)))
{
- if (error != HA_ERR_KEY_NOT_FOUND)
+ if (error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE)
return report_error(table, error);
return -1; /* purecov: inspected */
}
@@ -10029,7 +10060,7 @@ join_read_first(JOIN_TAB *tab)
tab->read_record.index=tab->index;
tab->read_record.record=table->record[0];
if (!table->file->inited)
- table->file->ha_index_init(tab->index);
+ table->file->ha_index_init(tab->index, tab->sorted);
if ((error=tab->table->file->index_first(tab->table->record[0])))
{
if (error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE)
@@ -10068,7 +10099,7 @@ join_read_last(JOIN_TAB *tab)
tab->read_record.index=tab->index;
tab->read_record.record=table->record[0];
if (!table->file->inited)
- table->file->ha_index_init(tab->index);
+ table->file->ha_index_init(tab->index, 1);
if ((error= tab->table->file->index_last(tab->table->record[0])))
return report_error(table, error);
return 0;
@@ -10092,7 +10123,7 @@ join_ft_read_first(JOIN_TAB *tab)
TABLE *table= tab->table;
if (!table->file->inited)
- table->file->ha_index_init(tab->ref.key);
+ table->file->ha_index_init(tab->ref.key, 1);
#if NOT_USED_YET
if (cp_buffer_from_ref(tab->join->thd, &tab->ref)) // as ft-key doesn't use store_key's
return -1; // see also FT_SELECT::init()
@@ -10487,7 +10518,7 @@ end_update(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
error, 0))
DBUG_RETURN(NESTED_LOOP_ERROR); // Not a table_is_full error
/* Change method to update rows */
- table->file->ha_index_init(0);
+ table->file->ha_index_init(0, 0);
join->join_tab[join->tables-1].next_select=end_unique_update;
}
join->send_records++;
diff --git a/sql/sql_select.h b/sql/sql_select.h
index 47906c2697e..ce40f657a8e 100644
--- a/sql/sql_select.h
+++ b/sql/sql_select.h
@@ -133,6 +133,7 @@ typedef struct st_join_table {
uint used_fields,used_fieldlength,used_blobs;
enum join_type type;
bool cached_eq_ref_table,eq_ref_table,not_used_in_distinct;
+ bool sorted;
TABLE_REF ref;
JOIN_CACHE cache;
JOIN *join;
diff --git a/sql/sql_show.cc b/sql/sql_show.cc
index 51330a6109b..c346f4cc291 100644
--- a/sql/sql_show.cc
+++ b/sql/sql_show.cc
@@ -971,11 +971,16 @@ store_create_info(THD *thd, TABLE_LIST *table_list, String *packet)
packet->append("\n)", 2);
if (!(thd->variables.sql_mode & MODE_NO_TABLE_OPTIONS) && !foreign_db_mode)
{
- if (thd->variables.sql_mode & (MODE_MYSQL323 | MODE_MYSQL40))
- packet->append(" TYPE=", 6);
- else
- packet->append(" ENGINE=", 8);
- packet->append(file->table_type());
+#ifdef HAVE_PARTITION_DB
+ if (!table->s->part_info)
+#endif
+ {
+ if (thd->variables.sql_mode & (MODE_MYSQL323 | MODE_MYSQL40))
+ packet->append(" TYPE=", 6);
+ else
+ packet->append(" ENGINE=", 8);
+ packet->append(file->table_type());
+ }
if (share->table_charset &&
!(thd->variables.sql_mode & MODE_MYSQL323) &&
@@ -1042,6 +1047,23 @@ store_create_info(THD *thd, TABLE_LIST *table_list, String *packet)
append_directory(thd, packet, "DATA", create_info.data_file_name);
append_directory(thd, packet, "INDEX", create_info.index_file_name);
}
+#ifdef HAVE_PARTITION_DB
+ {
+ /*
+ Partition syntax for CREATE TABLE is at the end of the syntax.
+ */
+ uint part_syntax_len;
+ char *part_syntax;
+ if (table->s->part_info &&
+ ((part_syntax= generate_partition_syntax(table->s->part_info,
+ &part_syntax_len,
+ FALSE))))
+ {
+ packet->append(part_syntax, part_syntax_len);
+ my_free(part_syntax, MYF(0));
+ }
+ }
+#endif
DBUG_RETURN(0);
}
@@ -2798,7 +2820,7 @@ int fill_schema_proc(THD *thd, TABLE_LIST *tables, COND *cond)
{
DBUG_RETURN(1);
}
- proc_table->file->ha_index_init(0);
+ proc_table->file->ha_index_init(0, 1);
if ((res= proc_table->file->index_first(proc_table->record[0])))
{
res= (res == HA_ERR_END_OF_FILE) ? 0 : 1;
diff --git a/sql/sql_table.cc b/sql/sql_table.cc
index 611ab0f16aa..6f60419ff94 100644
--- a/sql/sql_table.cc
+++ b/sql/sql_table.cc
@@ -30,6 +30,7 @@
#include <io.h>
#endif
+
const char *primary_key_name="PRIMARY";
static bool check_if_keyname_exists(const char *name,KEY *start, KEY *end);
@@ -44,6 +45,64 @@ static bool prepare_blob_field(THD *thd, create_field *sql_field);
static bool check_engine(THD *thd, const char *table_name,
enum db_type *new_engine);
+/*
+ SYNOPSIS
+ write_bin_log()
+ thd Thread object
+ clear_error is clear_error to be called
+ RETURN VALUES
+ NONE
+ DESCRIPTION
+ Write the binlog if open, routine used in multiple places in this
+ file
+*/
+
+static void write_bin_log(THD *thd, bool clear_error)
+{
+ if (mysql_bin_log.is_open())
+ {
+ if (clear_error)
+ thd->clear_error();
+ Query_log_event qinfo(thd, thd->query, thd->query_length, FALSE, FALSE);
+ mysql_bin_log.write(&qinfo);
+ }
+}
+
+/*
+ SYNOPSIS
+ abort_and_upgrade_lock()
+ thd Thread object
+ table Table object
+ db Database name
+ table_name Table name
+ old_lock_level Old lock level
+ RETURN VALUES
+ TRUE Failure
+ FALSE Success
+ DESCRIPTION
+ Remember old lock level (for possible downgrade later on), abort all
+ waiting threads and ensure that all keeping locks currently are
+ completed such that we own the lock exclusively and no other interaction
+ is ongoing.
+*/
+
+static bool abort_and_upgrade_lock(THD *thd, TABLE *table, const char *db,
+ const char *table_name,
+ uint *old_lock_level)
+{
+ uint flags= RTFC_WAIT_OTHER_THREAD_FLAG | RTFC_CHECK_KILLED_FLAG;
+ DBUG_ENTER("abort_and_upgrade_locks");
+
+ *old_lock_level= table->reginfo.lock_type;
+ mysql_lock_abort(thd, table);
+ VOID(remove_table_from_cache(thd, db, table_name, flags));
+ if (thd->killed)
+ {
+ thd->no_warnings_for_error= 0;
+ DBUG_RETURN(TRUE);
+ }
+ DBUG_RETURN(FALSE);
+}
/*
Build the path to a file for a table (or the base path that can
@@ -1347,6 +1406,34 @@ static int mysql_prepare_table(THD *thd, HA_CREATE_INFO *create_info,
/*
+ Set table default charset, if not set
+
+ SYNOPSIS
+ set_table_default_charset()
+ create_info Table create information
+
+ DESCRIPTION
+ If the table character set was not given explicitely,
+ let's fetch the database default character set and
+ apply it to the table.
+*/
+
+static void set_table_default_charset(THD *thd,
+ HA_CREATE_INFO *create_info, char *db)
+{
+ if (!create_info->default_table_charset)
+ {
+ HA_CREATE_INFO db_info;
+ char path[FN_REFLEN];
+ /* Abuse build_table_path() to build the path to the db.opt file */
+ build_table_path(path, sizeof(path), db, MY_DB_OPT_FILE, "");
+ load_db_opt(thd, path, &db_info);
+ create_info->default_table_charset= db_info.default_table_charset;
+ }
+}
+
+
+/*
Extend long VARCHAR fields to blob & prepare field if it's a blob
SYNOPSIS
@@ -1503,7 +1590,66 @@ bool mysql_create_table(THD *thd,const char *db, const char *table_name,
if (create_info->row_type == ROW_TYPE_DYNAMIC)
db_options|=HA_OPTION_PACK_RECORD;
alias= table_case_name(create_info, table_name);
- file=get_new_handler((TABLE*) 0, create_info->db_type);
+ if (!(file=get_new_handler((TABLE*) 0, create_info->db_type)))
+ {
+ my_error(ER_OUTOFMEMORY, MYF(0), 128);//128 bytes invented
+ DBUG_RETURN(TRUE);
+ }
+#ifdef HAVE_PARTITION_DB
+ partition_info *part_info= thd->lex->part_info;
+ if (part_info)
+ {
+ /*
+ The table has been specified as a partitioned table.
+ If this is part of an ALTER TABLE the handler will be the partition
+ handler but we need to specify the default handler to use for
+ partitions also in the call to check_partition_info. We transport
+ this information in the default_db_type variable, it is either
+ DB_TYPE_DEFAULT or the engine set in the ALTER TABLE command.
+ */
+ enum db_type part_engine_type= create_info->db_type;
+ char *part_syntax_buf;
+ uint syntax_len;
+ if (part_engine_type == DB_TYPE_PARTITION_DB)
+ {
+ /*
+ This only happens at ALTER TABLE.
+ default_engine_type was assigned from the engine set in the ALTER
+ TABLE command.
+ */
+ part_engine_type= ha_checktype(thd,
+ part_info->default_engine_type, 0, 0);
+ }
+ if (check_partition_info(part_info, part_engine_type,
+ file, create_info->max_rows))
+ DBUG_RETURN(TRUE);
+ /*
+ We reverse the partitioning parser and generate a standard format
+ for syntax stored in frm file.
+ */
+ if (!(part_syntax_buf= generate_partition_syntax(part_info,
+ &syntax_len,
+ TRUE)))
+ DBUG_RETURN(TRUE);
+ part_info->part_info_string= part_syntax_buf;
+ part_info->part_info_len= syntax_len;
+ if ((!(file->partition_flags() & HA_CAN_PARTITION)) ||
+ create_info->db_type == DB_TYPE_PARTITION_DB)
+ {
+ /*
+ The handler assigned to the table cannot handle partitioning.
+ Assign the partition handler as the handler of the table.
+ */
+ DBUG_PRINT("info", ("db_type= %d, part_flag= %d", create_info->db_type,file->partition_flags()));
+ delete file;
+ create_info->db_type= DB_TYPE_PARTITION_DB;
+ if (!(file= get_ha_partition(part_info)))
+ {
+ DBUG_RETURN(TRUE);
+ }
+ }
+ }
+#endif
#ifdef NOT_USED
/*
@@ -1517,30 +1663,17 @@ bool mysql_create_table(THD *thd,const char *db, const char *table_name,
(file->table_flags() & HA_NO_TEMP_TABLES))
{
my_error(ER_ILLEGAL_HA, MYF(0), table_name);
- DBUG_RETURN(TRUE);
+ goto err;
}
#endif
- /*
- If the table character set was not given explicitely,
- let's fetch the database default character set and
- apply it to the table.
- */
- if (!create_info->default_table_charset)
- {
- HA_CREATE_INFO db_info;
- char path[FN_REFLEN];
- /* Abuse build_table_path() to build the path to the db.opt file */
- build_table_path(path, sizeof(path), db, MY_DB_OPT_FILE, "");
- load_db_opt(thd, path, &db_info);
- create_info->default_table_charset= db_info.default_table_charset;
- }
+ set_table_default_charset(thd, create_info, (char*) db);
if (mysql_prepare_table(thd, create_info, &fields,
&keys, internal_tmp_table, &db_options, file,
&key_info_buffer, &key_count,
select_field_count))
- DBUG_RETURN(TRUE);
+ goto err;
/* Check if table exists */
if (create_info->options & HA_LEX_CREATE_TMP_TABLE)
@@ -1565,13 +1698,13 @@ bool mysql_create_table(THD *thd,const char *db, const char *table_name,
push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_NOTE,
ER_TABLE_EXISTS_ERROR, ER(ER_TABLE_EXISTS_ERROR),
alias);
- DBUG_RETURN(FALSE);
+ goto no_err;
}
my_error(ER_TABLE_EXISTS_ERROR, MYF(0), alias);
- DBUG_RETURN(TRUE);
+ goto err;
}
if (wait_if_global_read_lock(thd, 0, 1))
- DBUG_RETURN(error);
+ goto err;
VOID(pthread_mutex_lock(&LOCK_open));
if (!internal_tmp_table && !(create_info->options & HA_LEX_CREATE_TMP_TABLE))
{
@@ -1617,7 +1750,7 @@ bool mysql_create_table(THD *thd,const char *db, const char *table_name,
if (rea_create_table(thd, path, db, table_name,
create_info, fields, key_count,
- key_info_buffer))
+ key_info_buffer, file))
goto end;
if (create_info->options & HA_LEX_CREATE_TMP_TABLE)
{
@@ -1648,8 +1781,16 @@ warn:
end:
VOID(pthread_mutex_unlock(&LOCK_open));
start_waiting_global_read_lock(thd);
+ delete file;
thd->proc_info="After create";
DBUG_RETURN(error);
+
+err:
+ delete file;
+ DBUG_RETURN(TRUE);
+no_err:
+ delete file;
+ DBUG_RETURN(FALSE);
}
/*
@@ -2709,12 +2850,7 @@ bool mysql_create_like_table(THD* thd, TABLE_LIST* table,
}
// Must be written before unlock
- if (mysql_bin_log.is_open())
- {
- thd->clear_error();
- Query_log_event qinfo(thd, thd->query, thd->query_length, FALSE, FALSE);
- mysql_bin_log.write(&qinfo);
- }
+ write_bin_log(thd, TRUE);
res= FALSE;
goto err;
@@ -2820,11 +2956,7 @@ mysql_discard_or_import_tablespace(THD *thd,
error=1;
if (error)
goto err;
- if (mysql_bin_log.is_open())
- {
- Query_log_event qinfo(thd, thd->query, thd->query_length, FALSE, FALSE);
- mysql_bin_log.write(&qinfo);
- }
+ write_bin_log(thd, FALSE);
err:
close_thread_tables(thd);
thd->tablespace_op=FALSE;
@@ -3043,6 +3175,166 @@ int mysql_drop_indexes(THD *thd, TABLE_LIST *table_list,
#endif /* NOT_USED */
+
+#define ALTER_TABLE_DATA_CHANGED 1
+#define ALTER_TABLE_INDEX_CHANGED 2
+
+/*
+ SYNOPSIS
+ compare tables()
+ table original table
+ create_list fields in new table
+ key_list keys in new table
+ create_info create options in new table
+
+ DESCRIPTION
+ 'table' (first argument) contains information of the original
+ table, which includes all corresponding parts that the new
+ table has in arguments create_list, key_list and create_info.
+
+ By comparing the changes between the original and new table
+ we can determine how much it has changed after ALTER TABLE
+ and whether we need to make a copy of the table, or just change
+ the .frm file.
+
+ RETURN VALUES
+ 0 No copy needed
+ 1 Data changes, copy needed
+ 2 Index changes, copy needed
+*/
+
+uint compare_tables(TABLE *table, List<create_field> *create_list,
+ List<Key> *key_list, HA_CREATE_INFO *create_info,
+ ALTER_INFO *alter_info, uint order_num)
+{
+ Field **f_ptr, *field;
+ uint changes= 0, tmp;
+ List_iterator_fast<create_field> new_field_it(*create_list);
+ create_field *new_field;
+
+ /*
+ Some very basic checks. If number of fields changes, or the
+ handler, we need to run full ALTER TABLE. In the future
+ new fields can be added and old dropped without copy, but
+ not yet.
+
+ Test also that engine was not given during ALTER TABLE, or
+ we are force to run regular alter table (copy).
+ E.g. ALTER TABLE tbl_name ENGINE=MyISAM.
+
+ For the following ones we also want to run regular alter table:
+ ALTER TABLE tbl_name ORDER BY ..
+ ALTER TABLE tbl_name CONVERT TO CHARACTER SET ..
+
+ At the moment we can't handle altering temporary tables without a copy.
+ We also test if OPTIMIZE TABLE was given and was mapped to alter table.
+ In that case we always do full copy.
+ */
+ if (table->s->fields != create_list->elements ||
+ table->s->db_type != create_info->db_type ||
+ table->s->tmp_table ||
+ create_info->used_fields & HA_CREATE_USED_ENGINE ||
+ create_info->used_fields & HA_CREATE_USED_CHARSET ||
+ create_info->used_fields & HA_CREATE_USED_DEFAULT_CHARSET ||
+ (alter_info->flags & ALTER_RECREATE) ||
+ order_num)
+ return ALTER_TABLE_DATA_CHANGED;
+
+ /*
+ Go through fields and check if the original ones are compatible
+ with new table.
+ */
+ for (f_ptr= table->field, new_field= new_field_it++;
+ (field= *f_ptr); f_ptr++, new_field= new_field_it++)
+ {
+ /* Make sure we have at least the default charset in use. */
+ if (!new_field->charset)
+ new_field->charset= create_info->default_table_charset;
+
+ /* Check that NULL behavior is same for old and new fields */
+ if ((new_field->flags & NOT_NULL_FLAG) !=
+ (uint) (field->flags & NOT_NULL_FLAG))
+ return ALTER_TABLE_DATA_CHANGED;
+
+ /* Don't pack rows in old tables if the user has requested this. */
+ if (create_info->row_type == ROW_TYPE_DYNAMIC ||
+ (new_field->flags & BLOB_FLAG) ||
+ new_field->sql_type == MYSQL_TYPE_VARCHAR &&
+ create_info->row_type != ROW_TYPE_FIXED)
+ create_info->table_options|= HA_OPTION_PACK_RECORD;
+
+ /* Evaluate changes bitmap and send to check_if_incompatible_data() */
+ if (!(tmp= field->is_equal(new_field)))
+ return ALTER_TABLE_DATA_CHANGED;
+
+ changes|= tmp;
+ }
+ /* Check if changes are compatible with current handler without a copy */
+ if (table->file->check_if_incompatible_data(create_info, changes))
+ return ALTER_TABLE_DATA_CHANGED;
+
+ /*
+ Go through keys and check if the original ones are compatible
+ with new table.
+ */
+ KEY *table_key_info= table->key_info;
+ List_iterator_fast<Key> key_it(*key_list);
+ Key *key= key_it++;
+
+ /* Check if the number of key elements has changed */
+ if (table->s->keys != key_list->elements)
+ return ALTER_TABLE_INDEX_CHANGED;
+
+ for (uint i= 0; i < table->s->keys; i++, table_key_info++, key= key_it++)
+ {
+ /*
+ Check that the key types are compatible between old and new tables.
+ */
+ if (table_key_info->algorithm != key->algorithm ||
+ ((key->type == Key::PRIMARY || key->type == Key::UNIQUE) &&
+ !(table_key_info->flags & HA_NOSAME)) ||
+ (!(key->type == Key::PRIMARY || key->type == Key::UNIQUE) &&
+ (table_key_info->flags & HA_NOSAME)) ||
+ ((key->type == Key::SPATIAL) &&
+ !(table_key_info->flags & HA_SPATIAL)) ||
+ (!(key->type == Key::SPATIAL) &&
+ (table_key_info->flags & HA_SPATIAL)) ||
+ ((key->type == Key::FULLTEXT) &&
+ !(table_key_info->flags & HA_FULLTEXT)) ||
+ (!(key->type == Key::FULLTEXT) &&
+ (table_key_info->flags & HA_FULLTEXT)))
+ return ALTER_TABLE_INDEX_CHANGED;
+
+ if (table_key_info->key_parts != key->columns.elements)
+ return ALTER_TABLE_INDEX_CHANGED;
+
+ /*
+ Check that the key parts remain compatible between the old and
+ new tables.
+ */
+ KEY_PART_INFO *table_key_part= table_key_info->key_part;
+ List_iterator_fast<key_part_spec> key_part_it(key->columns);
+ key_part_spec *key_part= key_part_it++;
+ for (uint j= 0; j < table_key_info->key_parts; j++,
+ table_key_part++, key_part= key_part_it++)
+ {
+ /*
+ Key definition has changed if we are using a different field or
+ if the used key length is different
+ (If key_part->length == 0 it means we are using the whole field)
+ */
+ if (strcmp(key_part->field_name, table_key_part->field->field_name) ||
+ (key_part->length && key_part->length != table_key_part->length) ||
+ (key_part->length == 0 && table_key_part->length !=
+ table_key_part->field->pack_length()))
+ return ALTER_TABLE_INDEX_CHANGED;
+ }
+ }
+
+ return 0; // Tables are compatible
+}
+
+
/*
Alter table
*/
@@ -3064,7 +3356,13 @@ bool mysql_alter_table(THD *thd,char *new_db, char *new_name,
ulonglong next_insert_id;
uint db_create_options, used_fields;
enum db_type old_db_type,new_db_type;
- bool need_copy_table;
+ uint need_copy_table= 0;
+#ifdef HAVE_PARTITION_DB
+ bool online_add_empty_partition= FALSE;
+ bool online_drop_partition= FALSE;
+ bool partition_changed= FALSE;
+ enum db_type default_engine_type;
+#endif
DBUG_ENTER("mysql_alter_table");
thd->proc_info="init";
@@ -3140,6 +3438,423 @@ bool mysql_alter_table(THD *thd,char *new_db, char *new_name,
old_db_type= table->s->db_type;
if (create_info->db_type == DB_TYPE_DEFAULT)
create_info->db_type= old_db_type;
+
+#ifdef HAVE_PARTITION_DB
+ /*
+ We need to handle both partition management command such as Add Partition
+ and others here as well as an ALTER TABLE that completely changes the
+ partitioning and yet others that don't change anything at all. We start
+ by checking the partition management variants and then check the general
+ change patterns.
+ */
+ if (alter_info->flags & (ALTER_ADD_PARTITION +
+ ALTER_DROP_PARTITION + ALTER_COALESCE_PARTITION +
+ ALTER_REORGANISE_PARTITION))
+ {
+ partition_info *tab_part_info= table->s->part_info;
+ if (!tab_part_info)
+ {
+ my_error(ER_PARTITION_MGMT_ON_NONPARTITIONED, MYF(0));
+ DBUG_RETURN(TRUE);
+ }
+ {
+ List_iterator<partition_element> t_it(tab_part_info->partitions);
+ partition_element *t_part_elem= t_it++;
+ if (is_sub_partitioned(tab_part_info))
+ {
+ List_iterator<partition_element> s_it(t_part_elem->subpartitions);
+ t_part_elem= s_it++;
+ }
+ default_engine_type= t_part_elem->engine_type;
+ }
+ /*
+ We are going to manipulate the partition info on the table object
+ so we need to ensure that the data structure of the table object
+ is freed by setting version to 0.
+ */
+ table->s->version= 0L;
+ if (alter_info->flags == ALTER_ADD_PARTITION)
+ {
+ /*
+ We start by moving the new partitions to the list of temporary
+ partitions. We will then check that the new partitions fit in the
+ partitioning scheme as currently set-up.
+ Partitions are always added at the end in ADD PARTITION.
+ */
+ partition_info *alt_part_info= thd->lex->part_info;
+ uint no_new_partitions= alt_part_info->no_parts;
+ uint no_orig_partitions= tab_part_info->no_parts;
+ uint check_total_partitions= no_new_partitions + no_orig_partitions;
+ uint new_total_partitions= check_total_partitions;
+ /*
+ We allow quite a lot of values to be supplied by defaults, however we
+ must know the number of new partitions in this case.
+ */
+ if (no_new_partitions == 0)
+ {
+ my_error(ER_ADD_PARTITION_NO_NEW_PARTITION, MYF(0));
+ DBUG_RETURN(TRUE);
+ }
+ if (is_sub_partitioned(tab_part_info))
+ {
+ if (alt_part_info->no_subparts == 0)
+ alt_part_info->no_subparts= tab_part_info->no_subparts;
+ else if (alt_part_info->no_subparts != tab_part_info->no_subparts)
+ {
+ my_error(ER_ADD_PARTITION_SUBPART_ERROR, MYF(0));
+ DBUG_RETURN(TRUE);
+ }
+ check_total_partitions= new_total_partitions*
+ alt_part_info->no_subparts;
+ }
+ if (check_total_partitions > MAX_PARTITIONS)
+ {
+ my_error(ER_TOO_MANY_PARTITIONS_ERROR, MYF(0));
+ DBUG_RETURN(TRUE);
+ }
+ alt_part_info->part_type= tab_part_info->part_type;
+ if (set_up_defaults_for_partitioning(alt_part_info,
+ table->file,
+ (ulonglong)0ULL,
+ tab_part_info->no_parts))
+ {
+ DBUG_RETURN(TRUE);
+ }
+ /*
+ Need to concatenate the lists here to make it possible to check the
+ partition info for correctness using check_partition_info
+ */
+ {
+ List_iterator<partition_element> alt_it(alt_part_info->partitions);
+ uint part_count= 0;
+ do
+ {
+ partition_element *part_elem= alt_it++;
+ tab_part_info->partitions.push_back(part_elem);
+ tab_part_info->temp_partitions.push_back(part_elem);
+ } while (++part_count < no_new_partitions);
+ tab_part_info->no_parts+= no_new_partitions;
+ }
+ {
+ List_iterator<partition_element> tab_it(tab_part_info->partitions);
+ partition_element *part_elem= tab_it++;
+ if (is_sub_partitioned(tab_part_info))
+ {
+ List_iterator<partition_element> sub_it(part_elem->subpartitions);
+ part_elem= sub_it++;
+ }
+ if (check_partition_info(tab_part_info, part_elem->engine_type,
+ table->file, (ulonglong)0ULL))
+ {
+ DBUG_RETURN(TRUE);
+ }
+ }
+ create_info->db_type= DB_TYPE_PARTITION_DB;
+ thd->lex->part_info= tab_part_info;
+ if (table->file->alter_table_flags() & HA_ONLINE_ADD_EMPTY_PARTITION &&
+ (tab_part_info->part_type == RANGE_PARTITION ||
+ tab_part_info->part_type == LIST_PARTITION))
+ {
+ /*
+ For range and list partitions add partition is simply adding a new
+ empty partition to the table. If the handler support this we will
+ use the simple method of doing this. In this case we need to break
+ out the new partitions from the list again and only keep them in the
+ temporary list. Added partitions are always added at the end.
+ */
+ {
+ List_iterator<partition_element> tab_it(tab_part_info->partitions);
+ uint part_count= 0;
+ do
+ {
+ tab_it++;
+ } while (++part_count < no_orig_partitions);
+ do
+ {
+ tab_it++;
+ tab_it.remove();
+ } while (++part_count < new_total_partitions);
+ }
+ tab_part_info->no_parts-= no_new_partitions;
+ online_add_empty_partition= TRUE;
+ }
+ else
+ {
+ tab_part_info->temp_partitions.empty();
+ }
+ }
+ else if (alter_info->flags == ALTER_DROP_PARTITION)
+ {
+ /*
+ Drop a partition from a range partition and list partitioning is
+ always safe and can be made more or less immediate. It is necessary
+ however to ensure that the partition to be removed is safely removed
+ and that REPAIR TABLE can remove the partition if for some reason the
+ command to drop the partition failed in the middle.
+ */
+ uint part_count= 0;
+ uint no_parts_dropped= alter_info->partition_names.elements;
+ uint no_parts_found= 0;
+ List_iterator<partition_element> part_it(tab_part_info->partitions);
+ if (!(tab_part_info->part_type == RANGE_PARTITION ||
+ tab_part_info->part_type == LIST_PARTITION))
+ {
+ my_error(ER_ONLY_ON_RANGE_LIST_PARTITION, MYF(0), "DROP");
+ DBUG_RETURN(TRUE);
+ }
+ if (no_parts_dropped >= tab_part_info->no_parts)
+ {
+ my_error(ER_DROP_LAST_PARTITION, MYF(0));
+ DBUG_RETURN(TRUE);
+ }
+ do
+ {
+ partition_element *part_elem= part_it++;
+ if (is_partition_in_list(part_elem->partition_name,
+ alter_info->partition_names))
+ {
+ /*
+ Remove the partition from the list and put it instead in the
+ list of temporary partitions with a new state.
+ */
+ no_parts_found++;
+ part_elem->part_state= PART_IS_DROPPED;
+ }
+ } while (++part_count < tab_part_info->no_parts);
+ if (no_parts_found != no_parts_dropped)
+ {
+ my_error(ER_DROP_PARTITION_NON_EXISTENT, MYF(0));
+ DBUG_RETURN(TRUE);
+ }
+ if (!(table->file->alter_table_flags() & HA_ONLINE_DROP_PARTITION))
+ {
+ my_error(ER_DROP_PARTITION_FAILURE, MYF(0));
+ DBUG_RETURN(TRUE);
+ }
+ if (table->file->is_fk_defined_on_table_or_index(MAX_KEY))
+ {
+ my_error(ER_DROP_PARTITION_WHEN_FK_DEFINED, MYF(0));
+ DBUG_RETURN(TRUE);
+ }
+ /*
+ This code needs set-up of structures needed by mysql_create_table
+ before it is called and thus we only set a boolean variable to be
+ checked later down in the code when all needed data structures are
+ prepared.
+ */
+ online_drop_partition= TRUE;
+ }
+ else if (alter_info->flags == ALTER_COALESCE_PARTITION)
+ {
+ /*
+ In this version COALESCE PARTITION is implemented by simply removing
+ a partition from the table and using the normal ALTER TABLE code
+ and ensuring that copy to a new table occurs. Later on we can optimise
+ this function for Linear Hash partitions. In that case we can avoid
+ reorganising the entire table. For normal hash partitions it will
+ be a complete reorganise anyways so that can only be made on-line
+ if it still uses a copy table.
+ */
+ uint part_count= 0;
+ uint no_parts_coalesced= alter_info->no_parts;
+ uint no_parts_remain= tab_part_info->no_parts - no_parts_coalesced;
+ List_iterator<partition_element> part_it(tab_part_info->partitions);
+ if (tab_part_info->part_type != HASH_PARTITION)
+ {
+ my_error(ER_COALESCE_ONLY_ON_HASH_PARTITION, MYF(0));
+ DBUG_RETURN(TRUE);
+ }
+ if (no_parts_coalesced == 0)
+ {
+ my_error(ER_COALESCE_PARTITION_NO_PARTITION, MYF(0));
+ DBUG_RETURN(TRUE);
+ }
+ if (no_parts_coalesced >= tab_part_info->no_parts)
+ {
+ my_error(ER_DROP_LAST_PARTITION, MYF(0));
+ DBUG_RETURN(TRUE);
+ }
+ do
+ {
+ part_it++;
+ if (++part_count > no_parts_remain)
+ part_it.remove();
+ } while (part_count < tab_part_info->no_parts);
+ tab_part_info->no_parts= no_parts_remain;
+ }
+ else if (alter_info->flags == ALTER_REORGANISE_PARTITION)
+ {
+ /*
+ Reorganise partitions takes a number of partitions that are next
+ to each other (at least for RANGE PARTITIONS) and then uses those
+ to create a set of new partitions. So data is copied from those
+ partitions into the new set of partitions. Those new partitions
+ can have more values in the LIST value specifications or less both
+ are allowed. The ranges can be different but since they are
+ changing a set of consecutive partitions they must cover the same
+ range as those changed from.
+ This command can be used on RANGE and LIST partitions.
+ */
+ uint no_parts_reorged= alter_info->partition_names.elements;
+ uint no_parts_new= thd->lex->part_info->partitions.elements;
+ partition_info *alt_part_info= thd->lex->part_info;
+ uint check_total_partitions;
+ if (no_parts_reorged > tab_part_info->no_parts)
+ {
+ my_error(ER_REORG_PARTITION_NOT_EXIST, MYF(0));
+ DBUG_RETURN(TRUE);
+ }
+ if (!(tab_part_info->part_type == RANGE_PARTITION ||
+ tab_part_info->part_type == LIST_PARTITION))
+ {
+ my_error(ER_ONLY_ON_RANGE_LIST_PARTITION, MYF(0), "REORGANISE");
+ DBUG_RETURN(TRUE);
+ }
+ if (is_partitions_in_table(alt_part_info, tab_part_info))
+ {
+ my_error(ER_SAME_NAME_PARTITION, MYF(0));
+ DBUG_RETURN(TRUE);
+ }
+ check_total_partitions= tab_part_info->no_parts + no_parts_new;
+ check_total_partitions-= no_parts_reorged;
+ if (check_total_partitions > MAX_PARTITIONS)
+ {
+ my_error(ER_TOO_MANY_PARTITIONS_ERROR, MYF(0));
+ DBUG_RETURN(TRUE);
+ }
+ {
+ List_iterator<partition_element> tab_it(tab_part_info->partitions);
+ uint part_count= 0;
+ bool found_first= FALSE, found_last= FALSE;
+ uint drop_count= 0;
+ longlong tab_max_range, alt_max_range;
+ do
+ {
+ partition_element *part_elem= tab_it++;
+ if (is_partition_in_list(part_elem->partition_name,
+ alter_info->partition_names))
+ {
+ drop_count++;
+ tab_max_range= part_elem->range_value;
+ if (!found_first)
+ {
+ uint alt_part_count= 0;
+ found_first= TRUE;
+ List_iterator<partition_element> alt_it(alt_part_info->partitions);
+ do
+ {
+ partition_element *alt_part_elem= alt_it++;
+ alt_max_range= alt_part_elem->range_value;
+ if (alt_part_count == 0)
+ tab_it.replace(alt_part_elem);
+ else
+ tab_it.after(alt_part_elem);
+ } while (++alt_part_count < no_parts_new);
+ }
+ else if (found_last)
+ {
+ my_error(ER_CONSECUTIVE_REORG_PARTITIONS, MYF(0));
+ DBUG_RETURN(TRUE);
+ }
+ else
+ tab_it.remove();
+ }
+ else
+ {
+ if (found_first)
+ found_last= TRUE;
+ }
+ } while (++part_count < tab_part_info->no_parts);
+ if (drop_count != no_parts_reorged)
+ {
+ my_error(ER_DROP_PARTITION_NON_EXISTENT, MYF(0));
+ DBUG_RETURN(TRUE);
+ }
+ if (tab_part_info->part_type == RANGE_PARTITION &&
+ alt_max_range > tab_max_range)
+ {
+ my_error(ER_REORG_OUTSIDE_RANGE, MYF(0));
+ DBUG_RETURN(TRUE);
+ }
+ }
+ }
+ partition_changed= TRUE;
+ create_info->db_type= DB_TYPE_PARTITION_DB;
+ thd->lex->part_info= tab_part_info;
+ if (alter_info->flags == ALTER_ADD_PARTITION ||
+ alter_info->flags == ALTER_REORGANISE_PARTITION)
+ {
+ if (check_partition_info(tab_part_info, default_engine_type,
+ table->file, (ulonglong)0ULL))
+ {
+ DBUG_RETURN(TRUE);
+ }
+ }
+ }
+ else
+ {
+ /*
+ When thd->lex->part_info has a reference to a partition_info the
+ ALTER TABLE contained a definition of a partitioning.
+
+ Case I:
+ If there was a partition before and there is a new one defined.
+ We use the new partitioning. The new partitioning is already
+ defined in the correct variable so no work is needed to
+ accomplish this.
+ We do however need to update partition_changed to ensure that not
+ only the frm file is changed in the ALTER TABLE command.
+
+ Case IIa:
+ There was a partitioning before and there is no new one defined.
+ Also the user has not specified an explicit engine to use.
+
+ We use the old partitioning also for the new table. We do this
+ by assigning the partition_info from the table loaded in
+ open_ltable to the partition_info struct used by mysql_create_table
+ later in this method.
+
+ Case IIb:
+ There was a partitioning before and there is no new one defined.
+ The user has specified an explicit engine to use.
+
+ Since the user has specified an explicit engine to use we override
+ the old partitioning info and create a new table using the specified
+ engine. This is the reason for the extra check if old and new engine
+ is equal.
+ In this case the partition also is changed.
+
+ Case III:
+ There was no partitioning before altering the table, there is
+ partitioning defined in the altered table. Use the new partitioning.
+ No work needed since the partitioning info is already in the
+ correct variable.
+ Also here partition has changed and thus a new table must be
+ created.
+
+ Case IV:
+ There was no partitioning before and no partitioning defined.
+ Obviously no work needed.
+ */
+ if (table->s->part_info)
+ {
+ if (!thd->lex->part_info &&
+ create_info->db_type == old_db_type)
+ thd->lex->part_info= table->s->part_info;
+ }
+ if (thd->lex->part_info)
+ {
+ /*
+ Need to cater for engine types that can handle partition without
+ using the partition handler.
+ */
+ if (thd->lex->part_info != table->s->part_info)
+ partition_changed= TRUE;
+ thd->lex->part_info->default_engine_type= create_info->db_type;
+ create_info->db_type= DB_TYPE_PARTITION_DB;
+ }
+ }
+#endif
if (check_engine(thd, new_name, &create_info->db_type))
DBUG_RETURN(TRUE);
new_db_type= create_info->db_type;
@@ -3203,12 +3918,7 @@ bool mysql_alter_table(THD *thd,char *new_db, char *new_name,
}
if (!error)
{
- if (mysql_bin_log.is_open())
- {
- thd->clear_error();
- Query_log_event qinfo(thd, thd->query, thd->query_length, FALSE, FALSE);
- mysql_bin_log.write(&qinfo);
- }
+ write_bin_log(thd, TRUE);
if (do_send_ok)
send_ok(thd);
}
@@ -3289,8 +3999,8 @@ bool mysql_alter_table(THD *thd,char *new_db, char *new_name,
def_it.remove();
}
}
- else
- { // Use old field value
+ else // This field was not dropped and not changed, add it to the list
+ { // for the new table.
create_list.push_back(def=new create_field(field,field));
alter_it.rewind(); // Change default if ALTER
Alter_column *alter;
@@ -3503,17 +4213,122 @@ bool mysql_alter_table(THD *thd,char *new_db, char *new_name,
if (table->s->tmp_table)
create_info->options|=HA_LEX_CREATE_TMP_TABLE;
+ set_table_default_charset(thd, create_info, db);
+
+#ifdef HAVE_PARTITION_DB
+ if (thd->variables.old_alter_table || partition_changed)
+#else
+ if (thd->variables.old_alter_table)
+#endif
+ need_copy_table= 1;
+ else
+ need_copy_table= compare_tables(table, &create_list, &key_list,
+ create_info, alter_info, order_num);
+
/*
better have a negative test here, instead of positive, like
alter_info->flags & ALTER_ADD_COLUMN|ALTER_ADD_INDEX|...
so that ALTER TABLE won't break when somebody will add new flag
*/
- need_copy_table= (alter_info->flags &
- ~(ALTER_CHANGE_COLUMN_DEFAULT|ALTER_OPTIONS) ||
- (create_info->used_fields &
- ~(HA_CREATE_USED_COMMENT|HA_CREATE_USED_PASSWORD)) ||
- table->s->tmp_table);
- create_info->frm_only= !need_copy_table;
+
+ if (!need_copy_table)
+ create_info->frm_only= 1;
+
+#ifdef HAVE_PARTITION_DB
+ if (partition_changed)
+ {
+ if (online_drop_partition)
+ {
+ /*
+ Now after all checks and setting state on dropped partitions we can
+ start the actual dropping of the partitions.
+ 1) Lock table in TL_WRITE_ONLY to ensure all other accesses on table
+ are completed and no new ones are started until we have changed
+ the frm file.
+ 2) Write the new frm file where state of dropped partitions is
+ changed to PART_IS_DROPPED
+ 3) Perform the actual drop of the partition using the handler of the
+ table.
+ 4) Write a new frm file of the table where the partitions are dropped
+ from the table.
+
+ */
+ uint old_lock_type;
+ partition_info *part_info= table->s->part_info;
+ char path[FN_REFLEN+1];
+ uint db_options= 0, key_count, syntax_len;
+ KEY *key_info_buffer;
+ char *part_syntax_buf;
+
+ VOID(pthread_mutex_lock(&LOCK_open));
+ if (abort_and_upgrade_lock(thd, table, db, table_name, &old_lock_type))
+ {
+ DBUG_RETURN(TRUE);
+ }
+ VOID(pthread_mutex_unlock(&LOCK_open));
+ mysql_prepare_table(thd, create_info, &create_list,
+ &key_list, /*tmp_table*/ 0, &db_options,
+ table->file, &key_info_buffer, &key_count,
+ /*select_field_count*/ 0);
+ if (!(part_syntax_buf= generate_partition_syntax(part_info,
+ &syntax_len,
+ TRUE)))
+ {
+ DBUG_RETURN(TRUE);
+ }
+ part_info->part_info_string= part_syntax_buf;
+ part_info->part_info_len= syntax_len;
+ build_table_path(path, sizeof(path), db, table_name, reg_ext);
+ if (mysql_create_frm(thd, path, db, table_name, create_info,
+ create_list, key_count, key_info_buffer,
+ table->file))
+ {
+ DBUG_RETURN(TRUE);
+ }
+ thd->lex->part_info= part_info;
+ build_table_path(path, sizeof(path), db, table_name, "");
+ if (table->file->drop_partitions(path))
+ {
+ DBUG_RETURN(TRUE);
+ }
+ {
+ List_iterator<partition_element> part_it(part_info->partitions);
+ uint i= 0, remove_count= 0;
+ do
+ {
+ partition_element *part_elem= part_it++;
+ if (is_partition_in_list(part_elem->partition_name,
+ alter_info->partition_names))
+ {
+ part_it.remove();
+ remove_count++;
+ }
+ } while (++i < part_info->no_parts);
+ part_info->no_parts-= remove_count;
+ }
+ if (!(part_syntax_buf= generate_partition_syntax(part_info,
+ &syntax_len,
+ TRUE)))
+ {
+ DBUG_RETURN(TRUE);
+ }
+ part_info->part_info_string= part_syntax_buf;
+ part_info->part_info_len= syntax_len;
+ build_table_path(path, sizeof(path), db, table_name, reg_ext);
+ if (mysql_create_frm(thd, path, db, table_name, create_info,
+ create_list, key_count, key_info_buffer,
+ table->file) ||
+ table->file->create_handler_files(path))
+ {
+ DBUG_RETURN(TRUE);
+ }
+ thd->proc_info="end";
+ write_bin_log(thd, FALSE);
+ send_ok(thd);
+ DBUG_RETURN(FALSE);
+ }
+ }
+#endif
/*
Handling of symlinked tables:
@@ -3640,12 +4455,7 @@ bool mysql_alter_table(THD *thd,char *new_db, char *new_name,
my_free((gptr) new_table,MYF(0));
goto err;
}
- if (mysql_bin_log.is_open())
- {
- thd->clear_error();
- Query_log_event qinfo(thd, thd->query, thd->query_length, FALSE, FALSE);
- mysql_bin_log.write(&qinfo);
- }
+ write_bin_log(thd, TRUE);
goto end_temporary;
}
@@ -3776,15 +4586,14 @@ bool mysql_alter_table(THD *thd,char *new_db, char *new_name,
goto err;
}
thd->proc_info="end";
- if (mysql_bin_log.is_open())
- {
- thd->clear_error();
- Query_log_event qinfo(thd, thd->query, thd->query_length, FALSE, FALSE);
- mysql_bin_log.write(&qinfo);
- }
+ write_bin_log(thd, TRUE);
VOID(pthread_cond_broadcast(&COND_refresh));
VOID(pthread_mutex_unlock(&LOCK_open));
#ifdef HAVE_BERKELEY_DB
+ /*
+ TODO RONM: This problem needs to handled for Berkeley DB partitions
+ as well
+ */
if (old_db_type == DB_TYPE_BERKELEY_DB)
{
/*
@@ -3821,7 +4630,7 @@ end_temporary:
err:
DBUG_RETURN(TRUE);
}
-
+/* mysql_alter_table */
static int
copy_data_between_tables(TABLE *from,TABLE *to,
@@ -3927,7 +4736,8 @@ copy_data_between_tables(TABLE *from,TABLE *to,
this function does not set field->query_id in the columns to the
current query id
*/
- from->file->extra(HA_EXTRA_RETRIEVE_ALL_COLS);
+ to->file->ha_set_all_bits_in_write_set();
+ from->file->ha_retrieve_all_cols();
init_read_record(&info, thd, from, (SQL_SELECT *) 0, 1,1);
if (ignore ||
handle_duplicates == DUP_REPLACE)
@@ -4032,7 +4842,7 @@ bool mysql_recreate_table(THD *thd, TABLE_LIST *table_list,
create_info.row_type=ROW_TYPE_NOT_USED;
create_info.default_table_charset=default_charset_info;
/* Force alter table to recreate table */
- lex->alter_info.flags= ALTER_CHANGE_COLUMN;
+ lex->alter_info.flags= (ALTER_CHANGE_COLUMN | ALTER_RECREATE);
DBUG_RETURN(mysql_alter_table(thd, NullS, NullS, &create_info,
table_list, lex->create_list,
lex->key_list, 0, (ORDER *) 0,
@@ -4090,10 +4900,11 @@ bool mysql_checksum_table(THD *thd, TABLE_LIST *tables, HA_CHECK_OPT *check_opt)
/* calculating table's checksum */
ha_checksum crc= 0;
- /* InnoDB must be told explicitly to retrieve all columns, because
- this function does not set field->query_id in the columns to the
- current query id */
- t->file->extra(HA_EXTRA_RETRIEVE_ALL_COLS);
+ /*
+ Set all bits in read set and inform InnoDB that we are reading all
+ fields
+ */
+ t->file->ha_retrieve_all_cols();
if (t->file->ha_rnd_init(1))
protocol->store_null();
diff --git a/sql/sql_udf.cc b/sql/sql_udf.cc
index e0c3034a58a..453b9324e88 100644
--- a/sql/sql_udf.cc
+++ b/sql/sql_udf.cc
@@ -528,7 +528,7 @@ int mysql_drop_function(THD *thd,const LEX_STRING *udf_name)
if (!(table = open_ltable(thd,&tables,TL_WRITE)))
goto err;
table->field[0]->store(udf_name->str, udf_name->length, system_charset_info);
- table->file->extra(HA_EXTRA_RETRIEVE_ALL_COLS);
+ table->file->ha_retrieve_all_cols();
if (!table->file->index_read_idx(table->record[0], 0,
(byte*) table->field[0]->ptr,
table->key_info[0].key_length,
diff --git a/sql/sql_update.cc b/sql/sql_update.cc
index 42c06d478be..61185c5d710 100644
--- a/sql/sql_update.cc
+++ b/sql/sql_update.cc
@@ -119,10 +119,10 @@ int mysql_update(THD *thd,
{
bool using_limit= limit != HA_POS_ERROR;
bool safe_update= thd->options & OPTION_SAFE_UPDATES;
- bool used_key_is_modified, transactional_table;
+ bool used_key_is_modified, transactional_table, will_batch;
int res;
- int error=0;
- uint used_index;
+ int error=0, loc_error;
+ uint used_index, dup_key_found;
#ifndef NO_EMBEDDED_ACCESS_CHECKS
uint want_privilege;
#endif
@@ -148,7 +148,7 @@ int mysql_update(THD *thd,
/* pass counter value */
thd->lex->table_count= table_count;
/* convert to multiupdate */
- return 2;
+ DBUG_RETURN(2);
}
if (lock_tables(thd, table_list, table_count) ||
@@ -187,7 +187,11 @@ int mysql_update(THD *thd,
#ifndef NO_EMBEDDED_ACCESS_CHECKS
table_list->grant.want_privilege= table->grant.want_privilege= want_privilege;
#endif
- if (setup_fields_with_no_wrap(thd, 0, fields, 1, 0, 0))
+ /*
+ Indicate that the set of fields is to be updated by passing 2 for
+ set_query_id.
+ */
+ if (setup_fields_with_no_wrap(thd, 0, fields, 2, 0, 0))
DBUG_RETURN(1); /* purecov: inspected */
if (table_list->view && check_fields(thd, fields))
{
@@ -204,7 +208,10 @@ int mysql_update(THD *thd,
if (table->timestamp_field->query_id == thd->query_id)
table->timestamp_field_type= TIMESTAMP_NO_AUTO_SET;
else
+ {
table->timestamp_field->query_id=timestamp_query_id;
+ table->file->ha_set_bit_in_write_set(table->timestamp_field->fieldnr);
+ }
}
#ifndef NO_EMBEDDED_ACCESS_CHECKS
@@ -258,13 +265,18 @@ int mysql_update(THD *thd,
else
used_key_is_modified=0;
+#ifdef HAVE_PARTITION_DB
+ if (used_key_is_modified || order ||
+ partition_key_modified(table, fields))
+#else
if (used_key_is_modified || order)
+#endif
{
/*
We can't update table directly; We must first search after all
matching rows before updating the table!
*/
- table->file->extra(HA_EXTRA_RETRIEVE_ALL_COLS);
+ table->file->ha_retrieve_all_cols();
if (used_index < MAX_KEY && old_used_keys.is_set(used_index))
{
table->key_read=1;
@@ -390,7 +402,7 @@ int mysql_update(THD *thd,
(thd->variables.sql_mode &
(MODE_STRICT_TRANS_TABLES |
MODE_STRICT_ALL_TABLES)));
-
+ will_batch= !table->file->start_bulk_update();
while (!(error=info.read_record(&info)) && !thd->killed)
{
if (!(select && select->skip_record()))
@@ -417,8 +429,47 @@ int mysql_update(THD *thd,
break;
}
}
- if (!(error=table->file->update_row((byte*) table->record[1],
- (byte*) table->record[0])))
+ if (will_batch)
+ {
+ /*
+ Typically a batched handler can execute the batched jobs when:
+ 1) When specifically told to do so
+ 2) When it is not a good idea to batch anymore
+ 3) When it is necessary to send batch for other reasons
+ (One such reason is when READ's must be performed)
+
+ 1) is covered by exec_bulk_update calls.
+ 2) and 3) is handled by the bulk_update_row method.
+
+ bulk_update_row can execute the updates including the one
+ defined in the bulk_update_row or not including the row
+ in the call. This is up to the handler implementation and can
+ vary from call to call.
+
+ The dup_key_found reports the number of duplicate keys found
+ in those updates actually executed. It only reports those if
+ the extra call with HA_EXTRA_IGNORE_DUP_KEY have been issued.
+ If this hasn't been issued it returns an error code and can
+ ignore this number. Thus any handler that implements batching
+ for UPDATE IGNORE must also handle this extra call properly.
+
+ If a duplicate key is found on the record included in this
+ call then it should be included in the count of dup_key_found
+ and error should be set to 0 (only if these errors are ignored).
+ */
+ error= table->file->bulk_update_row(table->record[1],
+ table->record[0],
+ &dup_key_found);
+ limit+= dup_key_found;
+ updated-= dup_key_found;
+ }
+ else
+ {
+ /* Non-batched update */
+ error= table->file->update_row((byte*) table->record[1],
+ (byte*) table->record[0]);
+ }
+ if (!error)
{
updated++;
thd->no_trans_update= !transactional_table;
@@ -442,20 +493,74 @@ int mysql_update(THD *thd,
if (!--limit && using_limit)
{
- error= -1; // Simulate end of file
- break;
+ /*
+ We have reached end-of-file in most common situations where no
+ batching has occurred and if batching was supposed to occur but
+ no updates were made and finally when the batch execution was
+ performed without error and without finding any duplicate keys.
+ If the batched updates were performed with errors we need to
+ check and if no error but duplicate key's found we need to
+ continue since those are not counted for in limit.
+ */
+ if (will_batch &&
+ ((error= table->file->exec_bulk_update(&dup_key_found)) ||
+ !dup_key_found))
+ {
+ if (error)
+ {
+ /*
+ The handler should not report error of duplicate keys if they
+ are ignored. This is a requirement on batching handlers.
+ */
+ table->file->print_error(error,MYF(0));
+ error= 1;
+ break;
+ }
+ /*
+ Either an error was found and we are ignoring errors or there
+ were duplicate keys found. In both cases we need to correct
+ the counters and continue the loop.
+ */
+ limit= dup_key_found; //limit is 0 when we get here so need to +
+ updated-= dup_key_found;
+ }
+ else
+ {
+ error= -1; // Simulate end of file
+ break;
+ }
}
}
else
table->file->unlock_row();
thd->row_count++;
}
+ dup_key_found= 0;
if (thd->killed && !error)
error= 1; // Aborted
+ else if (will_batch &&
+ (loc_error= table->file->exec_bulk_update(&dup_key_found)))
+ /*
+ An error has occurred when a batched update was performed and returned
+ an error indication. It cannot be an allowed duplicate key error since
+ we require the batching handler to treat this as a normal behavior.
+
+ Otherwise we simply remove the number of duplicate keys records found
+ in the batched update.
+ */
+ {
+ thd->fatal_error();
+ table->file->print_error(loc_error,MYF(0));
+ error= 1;
+ }
+ else
+ updated-= dup_key_found;
+ if (will_batch)
+ table->file->end_bulk_update();
end_read_record(&info);
free_io_cache(table); // If ORDER BY
delete select;
- thd->proc_info="end";
+ thd->proc_info= "end";
VOID(table->file->extra(HA_EXTRA_NO_IGNORE_DUP_KEY));
/*
@@ -652,7 +757,7 @@ bool mysql_multi_update_prepare(THD *thd)
&lex->select_lex.leaf_tables, FALSE))
DBUG_RETURN(TRUE);
- if (setup_fields_with_no_wrap(thd, 0, *fields, 1, 0, 0))
+ if (setup_fields_with_no_wrap(thd, 0, *fields, 2, 0, 0))
DBUG_RETURN(TRUE);
for (tl= table_list; tl ; tl= tl->next_local)
@@ -769,7 +874,7 @@ bool mysql_multi_update_prepare(THD *thd)
&lex->select_lex.top_join_list,
table_list, &lex->select_lex.where,
&lex->select_lex.leaf_tables, FALSE) ||
- setup_fields_with_no_wrap(thd, 0, *fields, 1, 0, 0))
+ setup_fields_with_no_wrap(thd, 0, *fields, 2, 0, 0))
DBUG_RETURN(TRUE);
}
diff --git a/sql/sql_yacc.yy b/sql/sql_yacc.yy
index 520b6190410..b87fc9ffa36 100644
--- a/sql/sql_yacc.yy
+++ b/sql/sql_yacc.yy
@@ -73,6 +73,7 @@ inline Item *is_truth_value(Item *A, bool v1, bool v2)
int num;
ulong ulong_num;
ulonglong ulonglong_number;
+ longlong longlong_number;
LEX_STRING lex_str;
LEX_STRING *lex_str_ptr;
LEX_SYMBOL symbol;
@@ -357,13 +358,16 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize);
%token LEAVES
%token LEAVE_SYM
%token LEFT
+%token LESS_SYM
%token LEVEL_SYM
%token LEX_HOSTNAME
%token LIKE
%token LIMIT
+%token LINEAR_SYM
%token LINEFROMTEXT
%token LINES
%token LINESTRING
+%token LIST_SYM
%token LOAD
%token LOCAL_SYM
%token LOCATE
@@ -403,6 +407,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize);
%token MAX_SYM
%token MAX_UPDATES_PER_HOUR
%token MAX_USER_CONNECTIONS_SYM
+%token MAX_VALUE_SYM
%token MEDIUMBLOB
%token MEDIUMINT
%token MEDIUMTEXT
@@ -437,6 +442,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize);
%token NE
%token NEW_SYM
%token NEXT_SYM
+%token NODEGROUP_SYM
%token NONE_SYM
%token NOT2_SYM
%token NOT_SYM
@@ -465,6 +471,8 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize);
%token OUT_SYM
%token PACK_KEYS_SYM
%token PARTIAL
+%token PARTITION_SYM
+%token PARTITIONS_SYM
%token PASSWORD
%token PARAM_MARKER
%token PHASE_SYM
@@ -491,6 +499,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize);
%token RAID_STRIPED_SYM
%token RAID_TYPE
%token RAND
+%token RANGE_SYM
%token READS_SYM
%token READ_SYM
%token REAL
@@ -504,6 +513,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize);
%token RELEASE_SYM
%token RELOAD
%token RENAME
+%token REORGANISE_SYM
%token REPAIR
%token REPEATABLE_SYM
%token REPEAT_SYM
@@ -576,6 +586,8 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize);
%token STRING_SYM
%token SUBDATE_SYM
%token SUBJECT_SYM
+%token SUBPARTITION_SYM
+%token SUBPARTITIONS_SYM
%token SUBSTRING
%token SUBSTRING_INDEX
%token SUM_SYM
@@ -597,6 +609,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize);
%token TINYBLOB
%token TINYINT
%token TINYTEXT
+%token THAN_SYM
%token TO_SYM
%token TRAILING
%token TRANSACTION_SYM
@@ -621,11 +634,8 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize);
%token UNIX_TIMESTAMP
%token UNKNOWN_SYM
%token UNLOCK_SYM
-%token UNLOCK_SYM
%token UNSIGNED
%token UNTIL_SYM
-%token UNTIL_SYM
-%token UPDATE_SYM
%token UPDATE_SYM
%token USAGE
%token USER
@@ -713,6 +723,9 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize);
%type <ulonglong_number>
ulonglong_num
+%type <longlong_number>
+ part_bit_expr
+
%type <lock_type>
replace_lock_option opt_low_priority insert_lock_option load_data_lock
@@ -729,6 +742,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize);
sp_opt_default
simple_ident_nospvar simple_ident_q
field_or_var limit_option
+ part_func_expr
%type <item_num>
NUM_literal
@@ -830,6 +844,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize);
statement sp_suid opt_view_list view_list or_replace algorithm
sp_c_chistics sp_a_chistics sp_chistic sp_c_chistic xa
load_data opt_field_or_var_spec fields_or_vars opt_load_data_set_spec
+ partition_entry
END_OF_INPUT
%type <NONE> call sp_proc_stmts sp_proc_stmts1 sp_proc_stmt
@@ -895,6 +910,7 @@ statement:
| lock
| optimize
| keycache
+ | partition_entry
| preload
| prepare
| purge
@@ -2535,7 +2551,9 @@ trg_event:
create2:
'(' create2a {}
- | opt_create_table_options create3 {}
+ | opt_create_table_options
+ opt_partitioning {}
+ create3 {}
| LIKE table_ident
{
LEX *lex=Lex;
@@ -2551,8 +2569,12 @@ create2:
;
create2a:
- field_list ')' opt_create_table_options create3 {}
- | create_select ')' { Select->set_braces(1);} union_opt {}
+ field_list ')' opt_create_table_options
+ opt_partitioning {}
+ create3 {}
+ | opt_partitioning {}
+ create_select ')'
+ { Select->set_braces(1);} union_opt {}
;
create3:
@@ -2563,6 +2585,480 @@ create3:
{ Select->set_braces(1);} union_opt {}
;
+/*
+ This part of the parser is about handling of the partition information.
+
+ It's first version was written by Mikael Ronström with lots of answers to
+ questions provided by Antony Curtis.
+
+ The partition grammar can be called from three places.
+ 1) CREATE TABLE ... PARTITION ..
+ 2) ALTER TABLE table_name PARTITION ...
+ 3) PARTITION ...
+
+ The first place is called when a new table is created from a MySQL client.
+ The second place is called when a table is altered with the ALTER TABLE
+ command from a MySQL client.
+ The third place is called when opening an frm file and finding partition
+ info in the .frm file. It is necessary to avoid allowing PARTITION to be
+ an allowed entry point for SQL client queries. This is arranged by setting
+ some state variables before arriving here.
+
+ To be able to handle errors we will only set error code in this code
+ and handle the error condition in the function calling the parser. This
+ is necessary to ensure we can also handle errors when calling the parser
+ from the openfrm function.
+*/
+opt_partitioning:
+ /* empty */ {}
+ | partitioning
+ ;
+
+partitioning:
+ PARTITION_SYM
+ {
+ LEX *lex= Lex;
+ lex->part_info= new partition_info();
+ if (!lex->part_info)
+ {
+ my_error(ER_OUTOFMEMORY, MYF(0), sizeof(partition_info));
+ YYABORT;
+ }
+ }
+ partition
+ ;
+
+partition_entry:
+ PARTITION_SYM
+ {
+ LEX *lex= Lex;
+ if (lex->part_info)
+ {
+ /*
+ We enter here when opening the frm file to translate
+ partition info string into part_info data structure.
+ */
+ lex->part_info= new partition_info();
+ if (!lex->part_info)
+ {
+ my_error(ER_OUTOFMEMORY, MYF(0), sizeof(partition_info));
+ YYABORT;
+ }
+ }
+ else
+ {
+ yyerror(ER(ER_PARTITION_ENTRY_ERROR));
+ YYABORT;
+ }
+ }
+ partition {}
+ ;
+
+partition:
+ BY part_type_def opt_no_parts {} opt_sub_part {} part_defs
+ ;
+
+part_type_def:
+ opt_linear KEY_SYM '(' part_field_list ')'
+ {
+ LEX *lex= Lex;
+ lex->part_info->list_of_part_fields= TRUE;
+ lex->part_info->part_type= HASH_PARTITION;
+ }
+ | opt_linear HASH_SYM
+ { Lex->part_info->part_type= HASH_PARTITION; }
+ part_func {}
+ | RANGE_SYM
+ { Lex->part_info->part_type= RANGE_PARTITION; }
+ part_func {}
+ | LIST_SYM
+ { Lex->part_info->part_type= LIST_PARTITION; }
+ part_func {}
+ ;
+
+opt_linear:
+ /* empty */ {}
+ | LINEAR_SYM
+ { Lex->part_info->linear_hash_ind= TRUE;}
+ ;
+
+part_field_list:
+ part_field_item {}
+ | part_field_list ',' part_field_item {}
+ ;
+
+part_field_item:
+ ident
+ {
+ Lex->part_info->part_field_list.push_back($1.str);
+ }
+ ;
+
+part_func:
+ '(' remember_name part_func_expr remember_end ')'
+ {
+ LEX *lex= Lex;
+ uint expr_len= (uint)($4 - $2) - 1;
+ lex->part_info->list_of_part_fields= FALSE;
+ lex->part_info->part_expr= $3;
+ lex->part_info->part_func_string= $2+1;
+ lex->part_info->part_func_len= expr_len;
+ }
+ ;
+
+sub_part_func:
+ '(' remember_name part_func_expr remember_end ')'
+ {
+ LEX *lex= Lex;
+ uint expr_len= (uint)($4 - $2) - 1;
+ lex->part_info->list_of_subpart_fields= FALSE;
+ lex->part_info->subpart_expr= $3;
+ lex->part_info->subpart_func_string= $2+1;
+ lex->part_info->subpart_func_len= expr_len;
+ }
+ ;
+
+
+opt_no_parts:
+ /* empty */ {}
+ | PARTITIONS_SYM ulong_num
+ {
+ uint no_parts= $2;
+ if (no_parts == 0)
+ {
+ my_error(ER_NO_PARTS_ERROR, MYF(0), "partitions");
+ YYABORT;
+ }
+ Lex->part_info->no_parts= no_parts;
+ }
+ ;
+
+opt_sub_part:
+ /* empty */ {}
+ | SUBPARTITION_SYM BY opt_linear HASH_SYM sub_part_func
+ { Lex->part_info->subpart_type= HASH_PARTITION; }
+ opt_no_subparts {}
+ | SUBPARTITION_SYM BY opt_linear KEY_SYM
+ '(' sub_part_field_list ')'
+ {
+ LEX *lex= Lex;
+ lex->part_info->subpart_type= HASH_PARTITION;
+ lex->part_info->list_of_subpart_fields= TRUE;
+ }
+ opt_no_subparts {}
+ ;
+
+sub_part_field_list:
+ sub_part_field_item {}
+ | sub_part_field_list ',' sub_part_field_item {}
+ ;
+
+sub_part_field_item:
+ ident
+ { Lex->part_info->subpart_field_list.push_back($1.str); }
+ ;
+
+part_func_expr:
+ bit_expr
+ {
+ LEX *lex= Lex;
+ bool not_corr_func;
+ not_corr_func= !lex->safe_to_cache_query;
+ lex->safe_to_cache_query= 1;
+ if (not_corr_func)
+ {
+ yyerror(ER(ER_CONST_EXPR_IN_PARTITION_FUNC_ERROR));
+ YYABORT;
+ }
+ $$=$1;
+ }
+ ;
+
+opt_no_subparts:
+ /* empty */ {}
+ | SUBPARTITIONS_SYM ulong_num
+ {
+ uint no_parts= $2;
+ if (no_parts == 0)
+ {
+ my_error(ER_NO_PARTS_ERROR, MYF(0), "subpartitions");
+ YYABORT;
+ }
+ Lex->part_info->no_subparts= no_parts;
+ }
+ ;
+
+part_defs:
+ /* empty */
+ {}
+ | '(' part_def_list ')'
+ {
+ LEX *lex= Lex;
+ partition_info *part_info= lex->part_info;
+ if (part_info->no_parts != 0)
+ {
+ if (part_info->no_parts !=
+ part_info->count_curr_parts)
+ {
+ yyerror(ER(ER_PARTITION_WRONG_NO_PART_ERROR));
+ YYABORT;
+ }
+ }
+ else if (part_info->count_curr_parts > 0)
+ {
+ part_info->no_parts= part_info->count_curr_parts;
+ }
+ part_info->count_curr_subparts= 0;
+ part_info->count_curr_parts= 0;
+ }
+ ;
+
+part_def_list:
+ part_definition {}
+ | part_def_list ',' part_definition {}
+ ;
+
+part_definition:
+ PARTITION_SYM
+ {
+ LEX *lex= Lex;
+ partition_info *part_info= lex->part_info;
+ partition_element *p_elem= new partition_element();
+ if (!p_elem)
+ {
+ my_error(ER_OUTOFMEMORY, MYF(0), sizeof(partition_element));
+ YYABORT;
+ }
+ part_info->curr_part_elem= p_elem;
+ part_info->current_partition= p_elem;
+ part_info->use_default_partitions= FALSE;
+ part_info->partitions.push_back(p_elem);
+ p_elem->engine_type= DB_TYPE_UNKNOWN;
+ part_info->count_curr_parts++;
+ }
+ part_name {}
+ opt_part_values {}
+ opt_part_options {}
+ opt_sub_partition {}
+ ;
+
+part_name:
+ ident_or_text
+ { Lex->part_info->curr_part_elem->partition_name= $1.str; }
+ ;
+
+opt_part_values:
+ /* empty */
+ {
+ LEX *lex= Lex;
+ if (!is_partition_management(lex))
+ {
+ if (lex->part_info->part_type == RANGE_PARTITION)
+ {
+ my_error(ER_PARTITION_REQUIRES_VALUES_ERROR, MYF(0),
+ "RANGE", "LESS THAN");
+ YYABORT;
+ }
+ if (lex->part_info->part_type == LIST_PARTITION)
+ {
+ my_error(ER_PARTITION_REQUIRES_VALUES_ERROR, MYF(0),
+ "LIST", "IN");
+ YYABORT;
+ }
+ }
+ }
+ | VALUES LESS_SYM THAN_SYM part_func_max
+ {
+ LEX *lex= Lex;
+ if (!is_partition_management(lex))
+ {
+ if (Lex->part_info->part_type != RANGE_PARTITION)
+ {
+ my_error(ER_PARTITION_WRONG_VALUES_ERROR, MYF(0),
+ "RANGE", "LESS THAN");
+ YYABORT;
+ }
+ }
+ }
+ | VALUES IN_SYM '(' part_list_func ')'
+ {
+ LEX *lex= Lex;
+ if (!is_partition_management(lex))
+ {
+ if (Lex->part_info->part_type != LIST_PARTITION)
+ {
+ my_error(ER_PARTITION_WRONG_VALUES_ERROR, MYF(0),
+ "LIST", "IN");
+ YYABORT;
+ }
+ }
+ }
+ ;
+
+part_func_max:
+ MAX_VALUE_SYM
+ {
+ LEX *lex= Lex;
+ if (lex->part_info->defined_max_value)
+ {
+ yyerror(ER(ER_PARTITION_MAXVALUE_ERROR));
+ YYABORT;
+ }
+ lex->part_info->defined_max_value= TRUE;
+ lex->part_info->curr_part_elem->range_value= LONGLONG_MAX;
+ }
+ | part_range_func
+ {
+ if (Lex->part_info->defined_max_value)
+ {
+ yyerror(ER(ER_PARTITION_MAXVALUE_ERROR));
+ YYABORT;
+ }
+ }
+ ;
+
+part_range_func:
+ '(' part_bit_expr ')'
+ {
+ Lex->part_info->curr_part_elem->range_value= $2;
+ }
+ ;
+
+part_list_func:
+ part_list_item {}
+ | part_list_func ',' part_list_item {}
+ ;
+
+part_list_item:
+ part_bit_expr
+ {
+ longlong *value_ptr;
+ if (!(value_ptr= (longlong*)sql_alloc(sizeof(longlong))))
+ {
+ my_error(ER_OUTOFMEMORY, MYF(0), sizeof(longlong));
+ YYABORT;
+ }
+ *value_ptr= $1;
+ Lex->part_info->curr_part_elem->list_val_list.push_back(value_ptr);
+ }
+ ;
+
+part_bit_expr:
+ bit_expr
+ {
+ Item *part_expr= $1;
+ bool not_corr_func;
+ LEX *lex= Lex;
+ longlong item_value;
+ Name_resolution_context *context= &lex->current_select->context;
+ TABLE_LIST *save_list= context->table_list;
+
+ context->table_list= 0;
+ part_expr->fix_fields(YYTHD, (Item**)0);
+ context->table_list= save_list;
+ not_corr_func= !part_expr->const_item() ||
+ !lex->safe_to_cache_query;
+ if (not_corr_func)
+ {
+ yyerror(ER(ER_NO_CONST_EXPR_IN_RANGE_OR_LIST_ERROR));
+ YYABORT;
+ }
+ if (part_expr->result_type() != INT_RESULT)
+ {
+ yyerror(ER(ER_INCONSISTENT_TYPE_OF_FUNCTIONS_ERROR));
+ YYABORT;
+ }
+ item_value= part_expr->val_int();
+ $$= item_value;
+ }
+ ;
+
+opt_sub_partition:
+ /* empty */ {}
+ | '(' sub_part_list ')'
+ {
+ LEX *lex= Lex;
+ partition_info *part_info= lex->part_info;
+ if (part_info->no_subparts != 0)
+ {
+ if (part_info->no_subparts !=
+ part_info->count_curr_subparts)
+ {
+ yyerror(ER(ER_PARTITION_WRONG_NO_SUBPART_ERROR));
+ YYABORT;
+ }
+ }
+ else if (part_info->count_curr_subparts > 0)
+ {
+ part_info->no_subparts= part_info->count_curr_subparts;
+ }
+ part_info->count_curr_subparts= 0;
+ }
+ ;
+
+sub_part_list:
+ sub_part_definition {}
+ | sub_part_list ',' sub_part_definition {}
+ ;
+
+sub_part_definition:
+ SUBPARTITION_SYM
+ {
+ LEX *lex= Lex;
+ partition_info *part_info= lex->part_info;
+ partition_element *p_elem= new partition_element();
+ if (!p_elem)
+ {
+ my_error(ER_OUTOFMEMORY, MYF(0), sizeof(partition_element));
+ YYABORT;
+ }
+ part_info->curr_part_elem= p_elem;
+ part_info->current_partition->subpartitions.push_back(p_elem);
+ part_info->use_default_subpartitions= FALSE;
+ part_info->count_curr_subparts++;
+ p_elem->engine_type= DB_TYPE_UNKNOWN;
+ }
+ sub_name opt_part_options {}
+ ;
+
+sub_name:
+ ident_or_text
+ { Lex->part_info->curr_part_elem->partition_name= $1.str; }
+ ;
+
+opt_part_options:
+ /* empty */ {}
+ | opt_part_option_list {}
+ ;
+
+opt_part_option_list:
+ opt_part_option_list opt_part_option {}
+ | opt_part_option {}
+ ;
+
+opt_part_option:
+ TABLESPACE opt_equal ident_or_text
+ { Lex->part_info->curr_part_elem->tablespace_name= $3.str; }
+ | opt_storage ENGINE_SYM opt_equal storage_engines
+ { Lex->part_info->curr_part_elem->engine_type= $4; }
+ | NODEGROUP_SYM opt_equal ulong_num
+ { Lex->part_info->curr_part_elem->nodegroup_id= $3; }
+ | MAX_ROWS opt_equal ulonglong_num
+ { Lex->part_info->curr_part_elem->part_max_rows= $3; }
+ | MIN_ROWS opt_equal ulonglong_num
+ { Lex->part_info->curr_part_elem->part_min_rows= $3; }
+ | DATA_SYM DIRECTORY_SYM opt_equal TEXT_STRING_sys
+ { Lex->part_info->curr_part_elem->data_file_name= $4.str; }
+ | INDEX_SYM DIRECTORY_SYM opt_equal TEXT_STRING_sys
+ { Lex->part_info->curr_part_elem->index_file_name= $4.str; }
+ | COMMENT_SYM opt_equal TEXT_STRING_sys
+ { Lex->part_info->curr_part_elem->part_comment= $3.str; }
+ ;
+
+/*
+ End of partition parser part
+*/
+
create_select:
SELECT_SYM
{
@@ -3372,7 +3868,7 @@ alter:
lex->alter_info.reset();
lex->alter_info.flags= 0;
}
- alter_list
+ alter_commands
{}
| ALTER DATABASE ident_or_empty
{
@@ -3438,11 +3934,102 @@ ident_or_empty:
/* empty */ { $$= 0; }
| ident { $$= $1.str; };
-alter_list:
+alter_commands:
| DISCARD TABLESPACE { Lex->alter_info.tablespace_op= DISCARD_TABLESPACE; }
| IMPORT TABLESPACE { Lex->alter_info.tablespace_op= IMPORT_TABLESPACE; }
- | alter_list_item
- | alter_list ',' alter_list_item;
+ | alter_list
+ opt_partitioning
+ | partitioning
+/*
+ This part was added for release 5.1 by Mikael Ronström.
+ From here we insert a number of commands to manage the partitions of a
+ partitioned table such as adding partitions, dropping partitions,
+ reorganising partitions in various manners. In future releases the list
+ will be longer and also include moving partitions to a
+ new table and so forth.
+*/
+ | add_partition_rule
+ | DROP PARTITION_SYM alt_part_name_list
+ {
+ Lex->alter_info.flags|= ALTER_DROP_PARTITION;
+ }
+ | COALESCE PARTITION_SYM ulong_num
+ {
+ LEX *lex= Lex;
+ lex->alter_info.flags|= ALTER_COALESCE_PARTITION;
+ lex->alter_info.no_parts= $3;
+ }
+ | reorg_partition_rule
+ ;
+
+add_partition_rule:
+ ADD PARTITION_SYM
+ {
+ LEX *lex= Lex;
+ lex->part_info= new partition_info();
+ if (!lex->part_info)
+ {
+ my_error(ER_OUTOFMEMORY, MYF(0), sizeof(partition_info));
+ YYABORT;
+ }
+ lex->alter_info.flags|= ALTER_ADD_PARTITION;
+ }
+ add_part_extra
+ {}
+ ;
+
+add_part_extra:
+ | '(' part_def_list ')'
+ {
+ LEX *lex= Lex;
+ lex->part_info->no_parts= lex->part_info->count_curr_parts;
+ }
+ | PARTITIONS_SYM ulong_num
+ {
+ LEX *lex= Lex;
+ lex->part_info->no_parts= $2;
+ }
+ ;
+
+reorg_partition_rule:
+ REORGANISE_SYM PARTITION_SYM
+ {
+ LEX *lex= Lex;
+ lex->part_info= new partition_info();
+ if (!lex->part_info)
+ {
+ my_error(ER_OUTOFMEMORY, MYF(0), sizeof(partition_info));
+ YYABORT;
+ }
+ lex->alter_info.flags|= ALTER_REORGANISE_PARTITION;
+ }
+ alt_part_name_list INTO '(' part_def_list ')'
+ {
+ LEX *lex= Lex;
+ lex->part_info->no_parts= lex->part_info->count_curr_parts;
+ }
+ ;
+
+alt_part_name_list:
+ alt_part_name_item {}
+ | alt_part_name_list ',' alt_part_name_item {}
+ ;
+
+alt_part_name_item:
+ ident
+ {
+ Lex->alter_info.partition_names.push_back($1.str);
+ }
+ ;
+
+/*
+ End of management of partition commands
+*/
+
+alter_list:
+ alter_list_item
+ | alter_list ',' alter_list_item
+ ;
add_column:
ADD opt_column
@@ -4076,7 +4663,7 @@ select_options:
/* empty*/
| select_option_list
{
- if (test_all_bits(Select->options, SELECT_ALL | SELECT_DISTINCT))
+ if (Select->options & SELECT_DISTINCT && Select->options & SELECT_ALL)
{
my_error(ER_WRONG_USAGE, MYF(0), "ALL", "DISTINCT");
YYABORT;
@@ -7493,6 +8080,7 @@ keyword:
| LANGUAGE_SYM {}
| NO_SYM {}
| OPEN_SYM {}
+ | PARTITION_SYM {}
| PREPARE_SYM {}
| REPAIR {}
| RESET_SYM {}
@@ -7537,6 +8125,7 @@ keyword_sp:
| CHANGED {}
| CIPHER_SYM {}
| CLIENT_SYM {}
+ | COALESCE {}
| COLLATION_SYM {}
| COLUMNS {}
| COMMITTED_SYM {}
@@ -7593,8 +8182,10 @@ keyword_sp:
| RELAY_THREAD {}
| LAST_SYM {}
| LEAVES {}
+ | LESS_SYM {}
| LEVEL_SYM {}
| LINESTRING {}
+ | LIST_SYM {}
| LOCAL_SYM {}
| LOCKS_SYM {}
| LOGS_SYM {}
@@ -7618,6 +8209,7 @@ keyword_sp:
| MAX_QUERIES_PER_HOUR {}
| MAX_UPDATES_PER_HOUR {}
| MAX_USER_CONNECTIONS_SYM {}
+ | MAX_VALUE_SYM {}
| MEDIUM_SYM {}
| MERGE_SYM {}
| MICROSECOND_SYM {}
@@ -7638,6 +8230,7 @@ keyword_sp:
| NDBCLUSTER_SYM {}
| NEXT_SYM {}
| NEW_SYM {}
+ | NODEGROUP_SYM {}
| NONE_SYM {}
| NVARCHAR_SYM {}
| OFFSET_SYM {}
@@ -7646,6 +8239,7 @@ keyword_sp:
| ONE_SYM {}
| PACK_KEYS_SYM {}
| PARTIAL {}
+ | PARTITIONS_SYM {}
| PASSWORD {}
| PHASE_SYM {}
| POINT_SYM {}
@@ -7667,6 +8261,7 @@ keyword_sp:
| RELAY_LOG_FILE_SYM {}
| RELAY_LOG_POS_SYM {}
| RELOAD {}
+ | REORGANISE_SYM {}
| REPEATABLE_SYM {}
| REPLICATION {}
| RESOURCES {}
@@ -7696,6 +8291,8 @@ keyword_sp:
| STRING_SYM {}
| SUBDATE_SYM {}
| SUBJECT_SYM {}
+ | SUBPARTITION_SYM {}
+ | SUBPARTITIONS_SYM {}
| SUPER_SYM {}
| SUSPEND_SYM {}
| TABLES {}
@@ -7703,6 +8300,7 @@ keyword_sp:
| TEMPORARY {}
| TEMPTABLE_SYM {}
| TEXT_SYM {}
+ | THAN_SYM {}
| TRANSACTION_SYM {}
| TRIGGERS_SYM {}
| TIMESTAMP {}
diff --git a/sql/table.cc b/sql/table.cc
index 9d681141b1b..ce22a09cf6c 100644
--- a/sql/table.cc
+++ b/sql/table.cc
@@ -70,7 +70,7 @@ int openfrm(THD *thd, const char *name, const char *alias, uint db_stat,
int j,error, errarg= 0;
uint rec_buff_length,n_length,int_length,records,key_parts,keys,
interval_count,interval_parts,read_length,db_create_options;
- uint key_info_length, com_length;
+ uint key_info_length, com_length, part_info_len, extra_rec_buf_length;
ulong pos;
char index_file[FN_REFLEN], *names, *keynames, *comment_pos;
uchar head[288],*disk_buff,new_field_pack_flag;
@@ -153,6 +153,7 @@ int openfrm(THD *thd, const char *name, const char *alias, uint db_stat,
goto err; /* purecov: inspected */
*fn_ext(index_file)='\0'; // Remove .frm extension
+ part_info_len= uint4korr(head+55);
share->frm_version= head[2];
/*
Check if .frm file created by MySQL 5.0. In this case we want to
@@ -300,10 +301,6 @@ int openfrm(THD *thd, const char *name, const char *alias, uint db_stat,
}
#endif
- /* Allocate handler */
- if (!(outparam->file= get_new_handler(outparam, share->db_type)))
- goto err;
-
error=4;
outparam->reginfo.lock_type= TL_UNLOCK;
outparam->current_lock=F_UNLCK;
@@ -314,8 +311,9 @@ int openfrm(THD *thd, const char *name, const char *alias, uint db_stat,
if (prgflag & (READ_ALL+EXTRA_RECORD))
records++;
/* QQ: TODO, remove the +1 from below */
+ extra_rec_buf_length= uint2korr(head+59);
rec_buff_length= ALIGN_SIZE(share->reclength + 1 +
- outparam->file->extra_rec_buf_length());
+ extra_rec_buf_length);
share->rec_buff_length= rec_buff_length;
if (!(record= (char *) alloc_root(&outparam->mem_root,
rec_buff_length * records)))
@@ -435,9 +433,22 @@ int openfrm(THD *thd, const char *name, const char *alias, uint db_stat,
if (keynames)
fix_type_pointers(&int_array, &share->keynames, 1, &keynames);
+ if (part_info_len > 0)
+ {
+#ifdef HAVE_PARTITION_DB
+ if (mysql_unpack_partition(file, thd, part_info_len, outparam))
+ goto err;
+#else
+ goto err;
+#endif
+ }
VOID(my_close(file,MYF(MY_WME)));
file= -1;
+ /* Allocate handler */
+ if (!(outparam->file= get_new_handler(outparam, share->db_type)))
+ goto err;
+
record= (char*) outparam->record[0]-1; /* Fieldstart = 1 */
if (null_field_first)
{
@@ -594,6 +605,7 @@ int openfrm(THD *thd, const char *name, const char *alias, uint db_stat,
goto err; /* purecov: inspected */
}
+ reg_field->fieldnr= i+1; //Set field number
reg_field->field_index= i;
reg_field->comment=comment;
if (field_type == FIELD_TYPE_BIT && !f_bit_as_char(pack_flag))
@@ -854,7 +866,16 @@ int openfrm(THD *thd, const char *name, const char *alias, uint db_stat,
(*save++)= i;
}
}
+ if (outparam->file->ha_allocate_read_write_set(share->fields))
+ goto err;
+ /* Fix the partition functions and ensure they are not constant functions*/
+ if (part_info_len > 0)
+#ifdef HAVE_PARTITION_DB
+ if (fix_partition_func(thd,name,outparam))
+#endif
+ goto err;
+
/* The table struct is now initialized; Open the table */
error=2;
if (db_stat)
@@ -912,6 +933,13 @@ int openfrm(THD *thd, const char *name, const char *alias, uint db_stat,
if (! error_reported)
frm_error(error,outparam,name,ME_ERROR+ME_WAITTANG, errarg);
delete outparam->file;
+#ifdef HAVE_PARTITION_DB
+ if (outparam->s->part_info)
+ {
+ free_items(outparam->s->part_info->item_free_list);
+ outparam->s->part_info->item_free_list= 0;
+ }
+#endif
outparam->file=0; // For easier errorchecking
outparam->db_stat=0;
hash_free(&share->name_hash);
@@ -938,6 +966,13 @@ int closefrm(register TABLE *table)
table->field= 0;
}
delete table->file;
+#ifdef HAVE_PARTITION_DB
+ if (table->s->part_info)
+ {
+ free_items(table->s->part_info->item_free_list);
+ table->s->part_info->item_free_list= 0;
+ }
+#endif
table->file= 0; /* For easier errorchecking */
hash_free(&table->s->name_hash);
free_root(&table->mem_root, MYF(0));
diff --git a/sql/table.h b/sql/table.h
index d7c14e1938a..09b64398fd9 100644
--- a/sql/table.h
+++ b/sql/table.h
@@ -21,6 +21,7 @@ class Item; /* Needed by ORDER */
class GRANT_TABLE;
class st_select_lex_unit;
class st_select_lex;
+class partition_info;
class COND_EQUAL;
/* Order clause list element */
@@ -100,6 +101,9 @@ class Table_triggers_list;
typedef struct st_table_share
{
+#ifdef HAVE_PARTITION_DB
+ partition_info *part_info; /* Partition related information */
+#endif
/* hash of field names (contains pointers to elements of field array) */
HASH name_hash; /* hash of field names */
MEM_ROOT mem_root;
@@ -207,6 +211,8 @@ struct st_table {
ORDER *group;
const char *alias; /* alias or table name */
uchar *null_flags;
+ MY_BITMAP *read_set;
+ MY_BITMAP *write_set;
query_id_t query_id;
ha_rows quick_rows[MAX_KEY];
@@ -260,6 +266,7 @@ struct st_table {
my_bool auto_increment_field_not_null;
my_bool insert_or_update; /* Can be used by the handler */
my_bool alias_name_used; /* true if table_name is alias */
+ my_bool get_fields_in_item_tree; /* Signal to fix_field */
REGINFO reginfo; /* field connections */
MEM_ROOT mem_root;
diff --git a/sql/tztime.cc b/sql/tztime.cc
index 5a907f0d170..99a009968a7 100644
--- a/sql/tztime.cc
+++ b/sql/tztime.cc
@@ -1623,7 +1623,7 @@ my_tz_init(THD *org_thd, const char *default_tzname, my_bool bootstrap)
mysql.time_zone* tables are MyISAM and these operations always succeed
for MyISAM.
*/
- (void)table->file->ha_index_init(0);
+ (void)table->file->ha_index_init(0, 1);
tz_leapcnt= 0;
res= table->file->index_first(table->record[0]);
@@ -1800,7 +1800,7 @@ tz_load_from_open_tables(const String *tz_name, TABLE_LIST *tz_tables)
mysql.time_zone* tables are MyISAM and these operations always succeed
for MyISAM.
*/
- (void)table->file->ha_index_init(0);
+ (void)table->file->ha_index_init(0, 1);
if (table->file->index_read(table->record[0], (byte*)table->field[0]->ptr,
0, HA_READ_KEY_EXACT))
@@ -1827,7 +1827,7 @@ tz_load_from_open_tables(const String *tz_name, TABLE_LIST *tz_tables)
table= tz_tables->table;
tz_tables= tz_tables->next_local;
table->field[0]->store((longlong)tzid);
- (void)table->file->ha_index_init(0);
+ (void)table->file->ha_index_init(0, 1);
if (table->file->index_read(table->record[0], (byte*)table->field[0]->ptr,
0, HA_READ_KEY_EXACT))
@@ -1854,7 +1854,7 @@ tz_load_from_open_tables(const String *tz_name, TABLE_LIST *tz_tables)
table= tz_tables->table;
tz_tables= tz_tables->next_local;
table->field[0]->store((longlong)tzid);
- (void)table->file->ha_index_init(0);
+ (void)table->file->ha_index_init(0, 1);
// FIXME Is there any better approach than explicitly specifying 4 ???
res= table->file->index_read(table->record[0], (byte*)table->field[0]->ptr,
@@ -1926,7 +1926,7 @@ tz_load_from_open_tables(const String *tz_name, TABLE_LIST *tz_tables)
*/
table= tz_tables->table;
table->field[0]->store((longlong)tzid);
- (void)table->file->ha_index_init(0);
+ (void)table->file->ha_index_init(0, 1);
// FIXME Is there any better approach than explicitly specifying 4 ???
res= table->file->index_read(table->record[0], (byte*)table->field[0]->ptr,
diff --git a/sql/unireg.cc b/sql/unireg.cc
index a89d89426a6..d423e1bee4b 100644
--- a/sql/unireg.cc
+++ b/sql/unireg.cc
@@ -46,7 +46,8 @@ static bool pack_fields(File file, List<create_field> &create_fields,
static bool make_empty_rec(THD *thd, int file, enum db_type table_type,
uint table_options,
List<create_field> &create_fields,
- uint reclength, ulong data_offset);
+ uint reclength, ulong data_offset,
+ handler *handler);
/*
Create a frm (table definition) file
@@ -82,13 +83,18 @@ bool mysql_create_frm(THD *thd, my_string file_name,
uchar fileinfo[64],forminfo[288],*keybuff;
TYPELIB formnames;
uchar *screen_buff;
+#ifdef HAVE_PARTITION_DB
+ partition_info *part_info= thd->lex->part_info;
+#endif
DBUG_ENTER("mysql_create_frm");
+#ifdef HAVE_PARTITION_DB
+ thd->lex->part_info= NULL;
+#endif
formnames.type_names=0;
if (!(screen_buff=pack_screens(create_fields,&info_length,&screens,0)))
DBUG_RETURN(1);
- if (db_file == NULL)
- db_file= get_new_handler((TABLE*) 0, create_info->db_type);
+ DBUG_ASSERT(db_file != NULL);
/* If fixed row records, we need one bit to check for deleted rows */
if (!(create_info->table_options & HA_OPTION_PACK_RECORD))
@@ -139,6 +145,13 @@ bool mysql_create_frm(THD *thd, my_string file_name,
60);
forminfo[46]=(uchar) strlen((char*)forminfo+47); // Length of comment
+#ifdef HAVE_PARTITION_DB
+ if (part_info)
+ {
+ int4store(fileinfo+55,part_info->part_info_len);
+ }
+#endif
+ int2store(fileinfo+59,db_file->extra_rec_buf_length());
if (my_pwrite(file,(byte*) fileinfo,64,0L,MYF_RW) ||
my_pwrite(file,(byte*) keybuff,key_info_length,
(ulong) uint2korr(fileinfo+6),MYF_RW))
@@ -147,7 +160,7 @@ bool mysql_create_frm(THD *thd, my_string file_name,
(ulong) uint2korr(fileinfo+6)+ (ulong) key_buff_length,
MY_SEEK_SET,MYF(0)));
if (make_empty_rec(thd,file,create_info->db_type,create_info->table_options,
- create_fields,reclength, data_offset))
+ create_fields,reclength, data_offset, db_file))
goto err;
VOID(my_seek(file,filepos,MY_SEEK_SET,MYF(0)));
@@ -156,6 +169,14 @@ bool mysql_create_frm(THD *thd, my_string file_name,
pack_fields(file, create_fields, data_offset))
goto err;
+#ifdef HAVE_PARTITION_DB
+ if (part_info)
+ {
+ if (my_write(file, (byte*) part_info->part_info_string,
+ part_info->part_info_len, MYF_RW))
+ goto err;
+ }
+#endif
#ifdef HAVE_CRYPTED_FRM
if (create_info->password)
{
@@ -223,8 +244,7 @@ err3:
create_fields Fields to create
keys number of keys to create
key_info Keys to create
- db_file Handler to use. May be zero, in which case we use
- create_info->db_type
+ file Handler to use.
RETURN
0 ok
1 error
@@ -234,19 +254,21 @@ int rea_create_table(THD *thd, my_string file_name,
const char *db, const char *table,
HA_CREATE_INFO *create_info,
List<create_field> &create_fields,
- uint keys, KEY *key_info)
+ uint keys, KEY *key_info, handler *file)
{
DBUG_ENTER("rea_create_table");
if (mysql_create_frm(thd, file_name, db, table, create_info,
- create_fields, keys, key_info, NULL))
+ create_fields, keys, key_info, file))
DBUG_RETURN(1);
+ if (file->create_handler_files(file_name))
+ goto err_handler;
if (!create_info->frm_only && ha_create_table(file_name,create_info,0))
- {
- my_delete(file_name,MYF(0));
- DBUG_RETURN(1);
- }
+ goto err_handler;
DBUG_RETURN(0);
+err_handler:
+ my_delete(file_name, MYF(0));
+ DBUG_RETURN(1);
} /* rea_create_table */
@@ -670,7 +692,8 @@ static bool make_empty_rec(THD *thd, File file,enum db_type table_type,
uint table_options,
List<create_field> &create_fields,
uint reclength,
- ulong data_offset)
+ ulong data_offset,
+ handler *handler)
{
int error;
Field::utype type;
@@ -678,19 +701,15 @@ static bool make_empty_rec(THD *thd, File file,enum db_type table_type,
uchar *buff,*null_pos;
TABLE table;
create_field *field;
- handler *handler;
enum_check_fields old_count_cuted_fields= thd->count_cuted_fields;
DBUG_ENTER("make_empty_rec");
/* We need a table to generate columns for default values */
bzero((char*) &table,sizeof(table));
table.s= &table.share_not_to_be_used;
- handler= get_new_handler((TABLE*) 0, table_type);
- if (!handler ||
- !(buff=(uchar*) my_malloc((uint) reclength,MYF(MY_WME | MY_ZEROFILL))))
+ if (!(buff=(uchar*) my_malloc((uint) reclength,MYF(MY_WME | MY_ZEROFILL))))
{
- delete handler;
DBUG_RETURN(1);
}
@@ -747,6 +766,7 @@ static bool make_empty_rec(THD *thd, File file,enum db_type table_type,
{
my_error(ER_INVALID_DEFAULT, MYF(0), regfield->field_name);
error= 1;
+ delete regfield; //To avoid memory leak
goto err;
}
}
@@ -776,7 +796,6 @@ static bool make_empty_rec(THD *thd, File file,enum db_type table_type,
err:
my_free((gptr) buff,MYF(MY_FAE));
- delete handler;
thd->count_cuted_fields= old_count_cuted_fields;
DBUG_RETURN(error);
} /* make_empty_rec */
diff --git a/sql/unireg.h b/sql/unireg.h
index 6afefa579e8..2f1d3c2082d 100644
--- a/sql/unireg.h
+++ b/sql/unireg.h
@@ -84,6 +84,7 @@
#define PSEUDO_TABLE_BITS (PARAM_TABLE_BIT | OUTER_REF_TABLE_BIT | \
RAND_TABLE_BIT)
#define MAX_FIELDS 4096 /* Limit in the .frm file */
+#define MAX_PARTITIONS 1024
#define MAX_SORT_MEMORY (2048*1024-MALLOC_OVERHEAD)
#define MIN_SORT_MEMORY (32*1024-MALLOC_OVERHEAD)