summaryrefslogtreecommitdiff
path: root/storage
diff options
context:
space:
mode:
Diffstat (limited to 'storage')
-rw-r--r--storage/archive/archive_reader.c20
-rw-r--r--storage/archive/ha_archive.cc24
-rw-r--r--storage/csv/CMakeLists.txt3
-rw-r--r--storage/example/CMakeLists.txt1
-rw-r--r--storage/example/ha_example.cc69
-rw-r--r--storage/federated/CMakeLists.txt2
-rw-r--r--storage/federated/ha_federated.cc2
-rw-r--r--storage/federatedx/ha_federatedx.cc16
-rw-r--r--storage/heap/CMakeLists.txt3
-rw-r--r--storage/heap/ha_heap.cc46
-rw-r--r--storage/heap/ha_heap.h4
-rw-r--r--storage/heap/hp_clear.c2
-rw-r--r--storage/heap/hp_create.c18
-rw-r--r--storage/heap/hp_delete.c1
-rw-r--r--storage/heap/hp_hash.c58
-rw-r--r--storage/heap/hp_rfirst.c16
-rw-r--r--storage/heap/hp_rkey.c5
-rw-r--r--storage/heap/hp_rlast.c8
-rw-r--r--storage/heap/hp_rnext.c16
-rw-r--r--storage/heap/hp_rprev.c16
-rw-r--r--storage/heap/hp_rsame.c2
-rw-r--r--storage/heap/hp_scan.c2
-rw-r--r--storage/heap/hp_test2.c9
-rw-r--r--storage/heap/hp_update.c4
-rw-r--r--storage/heap/hp_write.c1
-rw-r--r--storage/innobase/handler/ha_innodb.cc358
-rw-r--r--storage/innobase/handler/ha_innodb.h15
-rw-r--r--storage/innobase/handler/i_s.cc2
-rw-r--r--storage/innobase/include/trx0trx.h2
-rw-r--r--storage/innobase/row/row0upd.c2
-rw-r--r--storage/innobase/trx/trx0i_s.c34
-rw-r--r--storage/innobase/trx/trx0sys.c2
-rw-r--r--storage/innobase/trx/trx0trx.c2
-rw-r--r--storage/maria/CMakeLists.txt6
-rw-r--r--storage/maria/compat_aliases.cc245
-rw-r--r--storage/maria/compat_aliases.h27
-rw-r--r--storage/maria/ha_maria.cc251
-rw-r--r--storage/maria/ha_maria.h7
-rw-r--r--storage/maria/lockman.c6
-rw-r--r--storage/maria/ma_bitmap.c412
-rw-r--r--storage/maria/ma_blockrec.c222
-rw-r--r--storage/maria/ma_blockrec.h19
-rw-r--r--storage/maria/ma_cache.c11
-rw-r--r--storage/maria/ma_check.c409
-rw-r--r--storage/maria/ma_check_standalone.h7
-rw-r--r--storage/maria/ma_checkpoint.c44
-rw-r--r--storage/maria/ma_close.c34
-rw-r--r--storage/maria/ma_create.c41
-rw-r--r--storage/maria/ma_delete.c49
-rw-r--r--storage/maria/ma_delete_all.c11
-rw-r--r--storage/maria/ma_delete_table.c20
-rw-r--r--storage/maria/ma_dynrec.c79
-rw-r--r--storage/maria/ma_extra.c77
-rw-r--r--storage/maria/ma_ft_boolean_search.c6
-rw-r--r--storage/maria/ma_init.c2
-rw-r--r--storage/maria/ma_key.c31
-rw-r--r--storage/maria/ma_key_recover.c4
-rw-r--r--storage/maria/ma_keycache.c4
-rw-r--r--storage/maria/ma_locking.c103
-rw-r--r--storage/maria/ma_loghandler.c298
-rw-r--r--storage/maria/ma_loghandler.h27
-rw-r--r--storage/maria/ma_norec.c66
-rw-r--r--storage/maria/ma_open.c142
-rw-r--r--storage/maria/ma_packrec.c17
-rw-r--r--storage/maria/ma_page.c10
-rw-r--r--storage/maria/ma_pagecache.c796
-rw-r--r--storage/maria/ma_pagecache.h2
-rw-r--r--storage/maria/ma_pagecrc.c10
-rw-r--r--storage/maria/ma_panic.c6
-rw-r--r--storage/maria/ma_recovery.c100
-rw-r--r--storage/maria/ma_recovery.h3
-rw-r--r--storage/maria/ma_recovery_util.c12
-rw-r--r--storage/maria/ma_rkey.c76
-rw-r--r--storage/maria/ma_rnext.c42
-rw-r--r--storage/maria/ma_rnext_same.c23
-rw-r--r--storage/maria/ma_rprev.c26
-rw-r--r--storage/maria/ma_rsame.c10
-rw-r--r--storage/maria/ma_rt_index.c1
-rw-r--r--storage/maria/ma_rt_split.c3
-rw-r--r--storage/maria/ma_rt_test.c7
-rw-r--r--storage/maria/ma_search.c55
-rw-r--r--storage/maria/ma_sort.c39
-rw-r--r--storage/maria/ma_static.c9
-rw-r--r--storage/maria/ma_statrec.c2
-rw-r--r--storage/maria/ma_test1.c26
-rw-r--r--storage/maria/ma_test2.c10
-rw-r--r--storage/maria/ma_unique.c31
-rw-r--r--storage/maria/ma_update.c20
-rw-r--r--storage/maria/ma_write.c38
-rw-r--r--storage/maria/maria_chk.c187
-rw-r--r--storage/maria/maria_def.h78
-rw-r--r--storage/maria/maria_dump_log.c192
-rw-r--r--storage/maria/maria_pack.c6
-rw-r--r--storage/maria/maria_read_log.c89
-rw-r--r--storage/maria/tablockman.c8
-rw-r--r--storage/maria/unittest/CMakeLists.txt1
-rw-r--r--storage/maria/unittest/ma_control_file-t.c26
-rw-r--r--storage/maria/unittest/ma_loghandler_examples.c3
-rw-r--r--storage/maria/unittest/ma_maria_log_cleanup.c24
-rw-r--r--storage/maria/unittest/ma_pagecache_consist.c31
-rw-r--r--storage/maria/unittest/ma_pagecache_rwconsist.c30
-rw-r--r--storage/maria/unittest/ma_pagecache_rwconsist2.c29
-rw-r--r--storage/maria/unittest/ma_pagecache_single.c32
-rwxr-xr-xstorage/maria/unittest/ma_test_all-t176
-rw-r--r--storage/maria/unittest/ma_test_loghandler-t.c16
-rw-r--r--storage/maria/unittest/ma_test_loghandler_first_lsn-t.c23
-rw-r--r--storage/maria/unittest/ma_test_loghandler_max_lsn-t.c13
-rw-r--r--storage/maria/unittest/ma_test_loghandler_multigroup-t.c23
-rw-r--r--storage/maria/unittest/ma_test_loghandler_multithread-t.c26
-rw-r--r--storage/maria/unittest/ma_test_loghandler_noflush-t.c16
-rw-r--r--storage/maria/unittest/ma_test_loghandler_nologs-t.c14
-rw-r--r--storage/maria/unittest/ma_test_loghandler_pagecache-t.c30
-rw-r--r--storage/maria/unittest/ma_test_loghandler_purge-t.c12
-rwxr-xr-xstorage/maria/unittest/ma_test_recovery.pl8
-rw-r--r--storage/maria/unittest/trnman-t.c12
-rw-r--r--storage/myisam/ft_nlq_search.c1
-rw-r--r--storage/myisam/ha_myisam.cc98
-rw-r--r--storage/myisam/ha_myisam.h13
-rw-r--r--storage/myisam/mi_check.c122
-rw-r--r--storage/myisam/mi_close.c2
-rw-r--r--storage/myisam/mi_create.c4
-rw-r--r--storage/myisam/mi_extra.c5
-rw-r--r--storage/myisam/mi_key.c19
-rw-r--r--storage/myisam/mi_locking.c15
-rw-r--r--storage/myisam/mi_open.c11
-rw-r--r--storage/myisam/mi_panic.c2
-rw-r--r--storage/myisam/mi_rkey.c98
-rw-r--r--storage/myisam/mi_rnext.c26
-rw-r--r--storage/myisam/mi_rnext_same.c23
-rw-r--r--storage/myisam/mi_rprev.c31
-rw-r--r--storage/myisam/mi_search.c5
-rw-r--r--storage/myisam/mi_static.c1
-rw-r--r--storage/myisam/myisamchk.c47
-rw-r--r--storage/myisam/myisamdef.h27
-rw-r--r--storage/myisam/myisampack.c2
-rw-r--r--storage/myisam/rt_index.c2
-rw-r--r--storage/myisammrg/ha_myisammrg.cc4
-rw-r--r--storage/ndb/include/ndbapi/NdbError.hpp2
-rw-r--r--storage/ndb/include/util/File.hpp2
-rw-r--r--storage/ndb/src/kernel/blocks/dbtup/DbtupExecQuery.cpp2
-rw-r--r--storage/oqgraph/CMakeLists.txt49
-rw-r--r--storage/oqgraph/ha_oqgraph.cc10
-rw-r--r--storage/pbxt/src/cache_xt.cc5
-rw-r--r--storage/pbxt/src/filesys_xt.cc3
-rw-r--r--storage/pbxt/src/ha_pbxt.cc41
-rw-r--r--storage/pbxt/src/heap_xt.cc1
-rw-r--r--storage/pbxt/src/lock_xt.cc11
-rw-r--r--storage/pbxt/src/lock_xt.h1
-rw-r--r--storage/pbxt/src/memory_xt.h17
-rwxr-xr-xstorage/pbxt/src/pthread_xt.cc80
-rw-r--r--storage/pbxt/src/table_xt.cc21
-rw-r--r--storage/pbxt/src/thread_xt.cc16
-rw-r--r--storage/pbxt/src/thread_xt.h3
-rw-r--r--storage/pbxt/src/xaction_xt.cc150
-rw-r--r--storage/pbxt/src/xaction_xt.h2
-rw-r--r--storage/sphinx/ha_sphinx.cc2
-rw-r--r--storage/xtradb/CMakeLists.txt7
-rw-r--r--storage/xtradb/dict/dict0load.c2
-rw-r--r--storage/xtradb/handler/ha_innodb.cc322
-rw-r--r--storage/xtradb/handler/ha_innodb.h24
-rw-r--r--storage/xtradb/handler/i_s.cc2
-rw-r--r--storage/xtradb/include/db0err.h3
-rw-r--r--storage/xtradb/include/fsp0types.h2
-rw-r--r--storage/xtradb/include/log0log.h17
-rw-r--r--storage/xtradb/include/os0file.h4
-rw-r--r--storage/xtradb/include/row0mysql.h11
-rw-r--r--storage/xtradb/include/srv0srv.h8
-rw-r--r--storage/xtradb/include/sync0sync.h2
-rw-r--r--storage/xtradb/include/trx0trx.h6
-rw-r--r--storage/xtradb/include/univ.i1
-rw-r--r--storage/xtradb/log/log0log.c100
-rw-r--r--storage/xtradb/log/log0recv.c3
-rw-r--r--storage/xtradb/os/os0file.c498
-rw-r--r--storage/xtradb/row/row0sel.c26
-rw-r--r--storage/xtradb/row/row0upd.c2
-rw-r--r--storage/xtradb/srv/srv0srv.c41
-rw-r--r--storage/xtradb/srv/srv0start.c10
-rw-r--r--storage/xtradb/trx/trx0sys.c2
-rw-r--r--storage/xtradb/trx/trx0trx.c2
-rw-r--r--storage/xtradb/ut/ut0ut.c6
180 files changed, 5063 insertions, 2918 deletions
diff --git a/storage/archive/archive_reader.c b/storage/archive/archive_reader.c
index ce4be92a521..ab54164dcc0 100644
--- a/storage/archive/archive_reader.c
+++ b/storage/archive/archive_reader.c
@@ -93,12 +93,16 @@ int main(int argc, char *argv[])
printf("\tFRM length %u\n", reader_handle.frm_length);
if (reader_handle.comment_start_pos)
{
- char *comment =
- (char *) malloc(sizeof(char) * reader_handle.comment_length);
- azread_comment(&reader_handle, comment);
- printf("\tComment length %u\n\t\t%.*s\n", reader_handle.comment_length,
- reader_handle.comment_length, comment);
- free(comment);
+ char *comment = (char *) my_malloc(reader_handle.comment_length,
+ MYF(MY_WME));
+ if (comment)
+ {
+ azread_comment(&reader_handle, comment);
+ printf("\tComment length %u\n\t\t%.*s\n",
+ reader_handle.comment_length,
+ reader_handle.comment_length, comment);
+ my_free(comment,MYF(0));
+ }
}
}
else
@@ -180,7 +184,7 @@ int main(int argc, char *argv[])
azio_stream writer_handle;
- buffer= (char *)malloc(reader_handle.longest_row);
+ buffer= (char *) my_malloc(reader_handle.longest_row, MYF(0));
if (buffer == NULL)
{
printf("Could not allocate memory for row %llu\n", row_count);
@@ -251,7 +255,7 @@ int main(int argc, char *argv[])
break;
}
- free(buffer);
+ my_free(buffer, MYF(0));
azclose(&writer_handle);
}
diff --git a/storage/archive/ha_archive.cc b/storage/archive/ha_archive.cc
index a4631f2fbc8..4112279d3c6 100644
--- a/storage/archive/ha_archive.cc
+++ b/storage/archive/ha_archive.cc
@@ -388,6 +388,7 @@ ARCHIVE_SHARE *ha_archive::get_share(const char *table_name, int *rc)
{
*rc= my_errno ? my_errno : -1;
mysql_mutex_unlock(&archive_mutex);
+ mysql_mutex_destroy(&share->mutex);
my_free(share);
DBUG_RETURN(NULL);
}
@@ -742,11 +743,11 @@ int ha_archive::create(const char *name, TABLE *table_arg,
{
if (!mysql_file_fstat(frm_file, &file_stat, MYF(MY_WME)))
{
- frm_ptr= (uchar *)my_malloc(sizeof(uchar) * file_stat.st_size, MYF(0));
+ frm_ptr= (uchar *)my_malloc(sizeof(uchar) * (size_t)file_stat.st_size, MYF(0));
if (frm_ptr)
{
- my_read(frm_file, frm_ptr, file_stat.st_size, MYF(0));
- azwrite_frm(&create_stream, (char *)frm_ptr, file_stat.st_size);
+ my_read(frm_file, frm_ptr, (size_t)file_stat.st_size, MYF(0));
+ azwrite_frm(&create_stream, (char *)frm_ptr, (size_t)file_stat.st_size);
my_free(frm_ptr);
}
}
@@ -893,7 +894,7 @@ int ha_archive::write_row(uchar *buf)
if (!share->archive_write_open)
if (init_archive_writer())
- DBUG_RETURN(HA_ERR_CRASHED_ON_USAGE);
+ DBUG_RETURN(errno);
if (table->next_number_field && record == table->record[0])
@@ -1083,7 +1084,8 @@ int ha_archive::rnd_init(bool scan)
if (share->crashed)
DBUG_RETURN(HA_ERR_CRASHED_ON_USAGE);
- init_archive_reader();
+ if (init_archive_reader())
+ DBUG_RETURN(errno);
/* We rewind the file so that we can read from the beginning if scan */
if (scan)
@@ -1389,7 +1391,8 @@ int ha_archive::optimize(THD* thd, HA_CHECK_OPT* check_opt)
char writer_filename[FN_REFLEN];
DBUG_ENTER("ha_archive::optimize");
- init_archive_reader();
+ if (init_archive_reader())
+ DBUG_RETURN(errno);
// now we close both our writer and our reader for the rename
if (share->archive_write_open)
@@ -1514,12 +1517,13 @@ THR_LOCK_DATA **ha_archive::store_lock(THD *thd,
/*
Here is where we get into the guts of a row level lock.
If TL_UNLOCK is set
- If we are not doing a LOCK TABLE or DISCARD/IMPORT
+ If we are not doing a LOCK TABLE, DELAYED LOCK or DISCARD/IMPORT
TABLESPACE, then allow multiple writers
*/
if ((lock_type >= TL_WRITE_CONCURRENT_INSERT &&
- lock_type <= TL_WRITE) && !thd_in_lock_tables(thd)
+ lock_type <= TL_WRITE) && delayed_insert == FALSE &&
+ !thd_in_lock_tables(thd)
&& !thd_tablespace_op(thd))
lock_type = TL_WRITE_ALLOW_WRITE;
@@ -1618,7 +1622,9 @@ int ha_archive::info(uint flag)
if (flag & HA_STATUS_AUTO)
{
- init_archive_reader();
+ if (init_archive_reader())
+ DBUG_RETURN(errno);
+
mysql_mutex_lock(&share->mutex);
azflush(&archive, Z_SYNC_FLUSH);
mysql_mutex_unlock(&share->mutex);
diff --git a/storage/csv/CMakeLists.txt b/storage/csv/CMakeLists.txt
index 910942325b9..31e41b6d3f8 100644
--- a/storage/csv/CMakeLists.txt
+++ b/storage/csv/CMakeLists.txt
@@ -13,8 +13,5 @@
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
-SET(CSV_PLUGIN_STATIC "csv")
-SET(CSV_PLUGIN_MANDATORY TRUE)
-
SET(CSV_SOURCES ha_tina.cc ha_tina.h transparent_file.cc transparent_file.h)
MYSQL_ADD_PLUGIN(csv ${CSV_SOURCES} STORAGE_ENGINE MANDATORY)
diff --git a/storage/example/CMakeLists.txt b/storage/example/CMakeLists.txt
index 3d94d09f075..53f5d6619a0 100644
--- a/storage/example/CMakeLists.txt
+++ b/storage/example/CMakeLists.txt
@@ -13,6 +13,5 @@
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
-SET(EXAMPLE_PLUGIN_DYNAMIC "ha_example")
SET(EXAMPLE_SOURCES ha_example.cc)
MYSQL_ADD_PLUGIN(example ${EXAMPLE_SOURCES} STORAGE_ENGINE MODULE_ONLY)
diff --git a/storage/example/ha_example.cc b/storage/example/ha_example.cc
index f8bb67a5665..bea8b4fb9c4 100644
--- a/storage/example/ha_example.cc
+++ b/storage/example/ha_example.cc
@@ -114,13 +114,14 @@ mysql_mutex_t example_mutex;
/**
- structure for CREATE TABLE options (table options)
+ Structure for CREATE TABLE options (table options).
+ It needs to be called ha_table_option_struct.
- These can be specified in the CREATE TABLE:
- CREATE TABLE ( ... ) {...here...}
+ The option values can be specified in the CREATE TABLE at the end:
+ CREATE TABLE ( ... ) *here*
*/
-struct example_table_options_struct
+struct ha_table_option_struct
{
const char *strparam;
ulonglong ullparam;
@@ -130,19 +131,26 @@ struct example_table_options_struct
/**
- structure for CREATE TABLE options (field options)
+ Structure for CREATE TABLE options (field options).
+ It needs to be called ha_field_option_struct.
- These can be specified in the CREATE TABLE per field:
- CREATE TABLE ( field ... {...here...}, ... )
+ The option values can be specified in the CREATE TABLE per field:
+ CREATE TABLE ( field ... *here*, ... )
*/
-struct example_field_options_struct
+struct ha_field_option_struct
{
- const char *compex_param_to_parse_it_in_engine;
+ const char *complex_param_to_parse_it_in_engine;
};
-/* HA_TOPTION_* macros expect the structure called ha_table_option_struct */
-#define ha_table_option_struct example_table_options_struct
+/*
+ no example here, but index options can be declared similarly
+ using the ha_index_option_struct structure.
+
+ Their values can be specified in the CREATE TABLE per index:
+ CREATE TABLE ( field ..., .., INDEX .... *here*, ... )
+*/
+
ha_create_table_option example_table_option_list[]=
{
/*
@@ -168,8 +176,6 @@ ha_create_table_option example_table_option_list[]=
HA_TOPTION_END
};
-/* HA_FOPTION_* macros expect the structure called ha_field_option_struct */
-#define ha_field_option_struct example_field_options_struct
ha_create_table_option example_field_option_list[]=
{
/*
@@ -177,7 +183,7 @@ ha_create_table_option example_field_option_list[]=
or boolean - for example a list - it needs to specify the option
as a string and parse it internally.
*/
- HA_FOPTION_STRING("COMPLEX", compex_param_to_parse_it_in_engine),
+ HA_FOPTION_STRING("COMPLEX", complex_param_to_parse_it_in_engine),
HA_FOPTION_END
};
@@ -394,8 +400,7 @@ int ha_example::open(const char *name, int mode, uint test_if_locked)
thr_lock_data_init(&share->lock,&lock,NULL);
#ifndef DBUG_OFF
- example_table_options_struct *options=
- (example_table_options_struct *)table->s->option_struct;
+ ha_table_option_struct *options= table->s->option_struct;
DBUG_ASSERT(options);
DBUG_PRINT("info", ("strparam: '%-.64s' ullparam: %llu enumparam: %u "\
@@ -945,8 +950,7 @@ int ha_example::create(const char *name, TABLE *table_arg,
HA_CREATE_INFO *create_info)
{
#ifndef DBUG_OFF
- example_table_options_struct *options=
- (example_table_options_struct *)table_arg->s->option_struct;
+ ha_table_option_struct *options= table_arg->s->option_struct;
DBUG_ENTER("ha_example::create");
/*
This example shows how to support custom engine specific table and field
@@ -959,13 +963,12 @@ int ha_example::create(const char *name, TABLE *table_arg,
options->ullparam, options->enumparam, options->boolparam));
for (Field **field= table_arg->s->field; *field; field++)
{
- example_field_options_struct *field_options=
- (example_field_options_struct *)(*field)->option_struct;
+ ha_field_option_struct *field_options= (*field)->option_struct;
DBUG_ASSERT(field_options);
DBUG_PRINT("info", ("field: %s complex: '%-.64s'",
(*field)->field_name,
- (field_options->compex_param_to_parse_it_in_engine ?
- field_options->compex_param_to_parse_it_in_engine :
+ (field_options->complex_param_to_parse_it_in_engine ?
+ field_options->complex_param_to_parse_it_in_engine :
"<NULL>")));
}
#endif
@@ -987,21 +990,21 @@ int ha_example::create(const char *name, TABLE *table_arg,
bool ha_example::check_if_incompatible_data(HA_CREATE_INFO *info,
uint table_changes)
{
- example_table_options_struct *param_old, *param_new;
+ ha_table_option_struct *param_old, *param_new;
uint i;
DBUG_ENTER("ha_example::check_if_incompatible_data");
/*
This example shows how custom engine specific table and field
options can be accessed from this function to be compared.
*/
- param_new= (example_table_options_struct *)info->option_struct;
+ param_new= info->option_struct;
DBUG_PRINT("info", ("new strparam: '%-.64s' ullparam: %llu enumparam: %u "
"boolparam: %u",
(param_new->strparam ? param_new->strparam : "<NULL>"),
param_new->ullparam, param_new->enumparam,
param_new->boolparam));
- param_old= (example_table_options_struct *)table->s->option_struct;
+ param_old= table->s->option_struct;
DBUG_PRINT("info", ("old strparam: '%-.64s' ullparam: %llu enumparam: %u "
"boolparam: %u",
(param_old->strparam ? param_old->strparam : "<NULL>"),
@@ -1020,19 +1023,19 @@ bool ha_example::check_if_incompatible_data(HA_CREATE_INFO *info,
for (i= 0; i < table->s->fields; i++)
{
- example_field_options_struct *f_old, *f_new;
- f_old= (example_field_options_struct *)table->s->field[i]->option_struct;
+ ha_field_option_struct *f_old, *f_new;
+ f_old= table->s->field[i]->option_struct;
DBUG_ASSERT(f_old);
DBUG_PRINT("info", ("old field: %u old complex: '%-.64s'", i,
- (f_old->compex_param_to_parse_it_in_engine ?
- f_old->compex_param_to_parse_it_in_engine :
+ (f_old->complex_param_to_parse_it_in_engine ?
+ f_old->complex_param_to_parse_it_in_engine :
"<NULL>")));
- if (info->fileds_option_struct[i])
+ if (info->fields_option_struct[i])
{
- f_new= (example_field_options_struct *)info->fileds_option_struct[i];
+ f_new= info->fields_option_struct[i];
DBUG_PRINT("info", ("old field: %u new complex: '%-.64s'", i,
- (f_new->compex_param_to_parse_it_in_engine ?
- f_new->compex_param_to_parse_it_in_engine :
+ (f_new->complex_param_to_parse_it_in_engine ?
+ f_new->complex_param_to_parse_it_in_engine :
"<NULL>")));
}
else
diff --git a/storage/federated/CMakeLists.txt b/storage/federated/CMakeLists.txt
index a0c601ab01a..9bd5b6b45d7 100644
--- a/storage/federated/CMakeLists.txt
+++ b/storage/federated/CMakeLists.txt
@@ -13,8 +13,6 @@
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
-SET(FEDERATED_PLUGIN_STATIC "federated")
-SET(FEDERATED_PLUGIN_DYNAMIC "ha_federated")
SET(FEDERATED_SOURCES ha_federated.cc)
IF(NOT WITH_FEDERATED AND NOT WITH_FEDERATED_STORAGE_ENGINE)
# Bug#45488- federated uses symbols that are not used anywhere in
diff --git a/storage/federated/ha_federated.cc b/storage/federated/ha_federated.cc
index f175b4c2ced..0756b77180a 100644
--- a/storage/federated/ha_federated.cc
+++ b/storage/federated/ha_federated.cc
@@ -2490,7 +2490,6 @@ int ha_federated::index_init(uint keynr, bool sorted)
{
DBUG_ENTER("ha_federated::index_init");
DBUG_PRINT("info", ("table: '%s' key: %u", table->s->table_name.str, keynr));
- active_index= keynr;
DBUG_RETURN(0);
}
@@ -2631,7 +2630,6 @@ int ha_federated::index_end(void)
{
DBUG_ENTER("ha_federated::index_end");
free_result();
- active_index= MAX_KEY;
DBUG_RETURN(0);
}
diff --git a/storage/federatedx/ha_federatedx.cc b/storage/federatedx/ha_federatedx.cc
index f79756c16ce..c07ec92eac8 100644
--- a/storage/federatedx/ha_federatedx.cc
+++ b/storage/federatedx/ha_federatedx.cc
@@ -427,7 +427,7 @@ int federatedx_db_init(void *p)
federatedx_hton->commit= ha_federatedx::commit;
federatedx_hton->rollback= ha_federatedx::rollback;
federatedx_hton->create= federatedx_create_handler;
- federatedx_hton->flags= HTON_ALTER_NOT_SUPPORTED | HTON_NO_PARTITION;
+ federatedx_hton->flags= HTON_ALTER_NOT_SUPPORTED;
if (mysql_mutex_init(fe_key_mutex_federatedx,
&federatedx_mutex, MY_MUTEX_INIT_FAST))
@@ -1476,9 +1476,11 @@ static void fill_server(MEM_ROOT *mem_root, FEDERATEDX_SERVER *server,
key.q_append('\0');
server->password= (const char *) (intptr) key.length();
key.append(password);
-
+ key.c_ptr_safe(); // Ensure we have end \0
+
server->key_length= key.length();
- server->key= (uchar *) memdup_root(mem_root, key.ptr(), key.length()+1);
+ /* Copy and add end \0 */
+ server->key= (uchar *) strmake_root(mem_root, key.ptr(), key.length());
/* pointer magic */
server->scheme+= (intptr) server->key;
@@ -1607,7 +1609,8 @@ static FEDERATEDX_SHARE *get_share(const char *table_name, TABLE *table)
tmp_share.table_name_length, ident_quote_char);
if (!(share= (FEDERATEDX_SHARE *) memdup_root(&mem_root, (char*)&tmp_share, sizeof(*share))) ||
- !(share->select_query= (char*) strmake_root(&mem_root, query.ptr(), query.length() + 1)))
+ !(share->share_key= (char*) memdup_root(&mem_root, tmp_share.share_key, tmp_share.share_key_length+1)) ||
+ !(share->select_query= (char*) strmake_root(&mem_root, query.ptr(), query.length())))
goto error;
share->mem_root= mem_root;
@@ -1746,6 +1749,7 @@ int ha_federatedx::disconnect(handlerton *hton, MYSQL_THD thd)
{
federatedx_txn *txn= (federatedx_txn *) thd_get_ha_data(thd, hton);
delete txn;
+ *((federatedx_txn **) thd_ha_data(thd, hton))= 0;
return 0;
}
@@ -3453,11 +3457,13 @@ bool ha_federatedx::get_error_message(int error, String* buf)
buf->qs_append(remote_error_number);
buf->append(STRING_WITH_LEN(": "));
buf->append(remote_error_buf);
+ /* Ensure string ends with \0 */
+ (void) buf->c_ptr_safe();
remote_error_number= 0;
remote_error_buf[0]= '\0';
}
- DBUG_PRINT("exit", ("message: %s", buf->ptr()));
+ DBUG_PRINT("exit", ("message: %s", buf->c_ptr_safe()));
DBUG_RETURN(FALSE);
}
diff --git a/storage/heap/CMakeLists.txt b/storage/heap/CMakeLists.txt
index 74f6ce8d333..4d8dc2bdd3e 100644
--- a/storage/heap/CMakeLists.txt
+++ b/storage/heap/CMakeLists.txt
@@ -13,9 +13,6 @@
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
-SET(HEAP_PLUGIN_STATIC "heap")
-SET(HEAP_PLUGIN_MANDATORY TRUE)
-
SET(HEAP_SOURCES _check.c _rectest.c hp_block.c hp_clear.c hp_close.c hp_create.c
ha_heap.cc
hp_delete.c hp_extra.c hp_hash.c hp_info.c hp_open.c hp_panic.c
diff --git a/storage/heap/ha_heap.cc b/storage/heap/ha_heap.cc
index 9701bc40499..7c5fc133679 100644
--- a/storage/heap/ha_heap.cc
+++ b/storage/heap/ha_heap.cc
@@ -197,6 +197,19 @@ void ha_heap::set_keys_for_scanning(void)
}
+int ha_heap::can_continue_handler_scan()
+{
+ int error= 0;
+ if ((file->key_version != file->s->key_version && inited == INDEX) ||
+ (file->file_version != file->s->file_version && inited == RND))
+ {
+ /* Data changed, not safe to do index or rnd scan */
+ error= HA_ERR_RECORD_CHANGED;
+ }
+ return error;
+}
+
+
void ha_heap::update_key_stats()
{
for (uint i= 0; i < table->s->keys; i++)
@@ -227,7 +240,6 @@ void ha_heap::update_key_stats()
int ha_heap::write_row(uchar * buf)
{
int res;
- ha_statistic_increment(&SSV::ha_write_count);
if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_INSERT)
table->timestamp_field->set_time();
if (table->next_number_field && buf == table->record[0])
@@ -251,7 +263,6 @@ int ha_heap::write_row(uchar * buf)
int ha_heap::update_row(const uchar * old_data, uchar * new_data)
{
int res;
- ha_statistic_increment(&SSV::ha_update_count);
if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_UPDATE)
table->timestamp_field->set_time();
res= heap_update(file,old_data,new_data);
@@ -270,7 +281,6 @@ int ha_heap::update_row(const uchar * old_data, uchar * new_data)
int ha_heap::delete_row(const uchar * buf)
{
int res;
- ha_statistic_increment(&SSV::ha_delete_count);
res= heap_delete(file,buf);
if (!res && table->s->tmp_table == NO_TMP_TABLE &&
++records_changed*HEAP_STATS_UPDATE_THRESHOLD > file->s->records)
@@ -289,7 +299,6 @@ int ha_heap::index_read_map(uchar *buf, const uchar *key,
enum ha_rkey_function find_flag)
{
DBUG_ASSERT(inited==INDEX);
- ha_statistic_increment(&SSV::ha_read_key_count);
int error = heap_rkey(file,buf,active_index, key, keypart_map, find_flag);
table->status = error ? STATUS_NOT_FOUND : 0;
return error;
@@ -299,7 +308,6 @@ int ha_heap::index_read_last_map(uchar *buf, const uchar *key,
key_part_map keypart_map)
{
DBUG_ASSERT(inited==INDEX);
- ha_statistic_increment(&SSV::ha_read_key_count);
int error= heap_rkey(file, buf, active_index, key, keypart_map,
HA_READ_PREFIX_LAST);
table->status= error ? STATUS_NOT_FOUND : 0;
@@ -310,7 +318,6 @@ int ha_heap::index_read_idx_map(uchar *buf, uint index, const uchar *key,
key_part_map keypart_map,
enum ha_rkey_function find_flag)
{
- ha_statistic_increment(&SSV::ha_read_key_count);
int error = heap_rkey(file, buf, index, key, keypart_map, find_flag);
table->status = error ? STATUS_NOT_FOUND : 0;
return error;
@@ -319,7 +326,6 @@ int ha_heap::index_read_idx_map(uchar *buf, uint index, const uchar *key,
int ha_heap::index_next(uchar * buf)
{
DBUG_ASSERT(inited==INDEX);
- ha_statistic_increment(&SSV::ha_read_next_count);
int error=heap_rnext(file,buf);
table->status=error ? STATUS_NOT_FOUND: 0;
return error;
@@ -328,7 +334,6 @@ int ha_heap::index_next(uchar * buf)
int ha_heap::index_prev(uchar * buf)
{
DBUG_ASSERT(inited==INDEX);
- ha_statistic_increment(&SSV::ha_read_prev_count);
int error=heap_rprev(file,buf);
table->status=error ? STATUS_NOT_FOUND: 0;
return error;
@@ -337,7 +342,6 @@ int ha_heap::index_prev(uchar * buf)
int ha_heap::index_first(uchar * buf)
{
DBUG_ASSERT(inited==INDEX);
- ha_statistic_increment(&SSV::ha_read_first_count);
int error=heap_rfirst(file, buf, active_index);
table->status=error ? STATUS_NOT_FOUND: 0;
return error;
@@ -346,7 +350,6 @@ int ha_heap::index_first(uchar * buf)
int ha_heap::index_last(uchar * buf)
{
DBUG_ASSERT(inited==INDEX);
- ha_statistic_increment(&SSV::ha_read_last_count);
int error=heap_rlast(file, buf, active_index);
table->status=error ? STATUS_NOT_FOUND: 0;
return error;
@@ -359,7 +362,6 @@ int ha_heap::rnd_init(bool scan)
int ha_heap::rnd_next(uchar *buf)
{
- ha_statistic_increment(&SSV::ha_read_rnd_next_count);
int error=heap_scan(file, buf);
table->status=error ? STATUS_NOT_FOUND: 0;
return error;
@@ -369,7 +371,6 @@ int ha_heap::rnd_pos(uchar * buf, uchar *pos)
{
int error;
HEAP_PTR heap_position;
- ha_statistic_increment(&SSV::ha_read_rnd_count);
memcpy(&heap_position, pos, sizeof(HEAP_PTR));
error=heap_rrnd(file, buf, heap_position);
table->status=error ? STATUS_NOT_FOUND: 0;
@@ -384,6 +385,10 @@ void ha_heap::position(const uchar *record)
int ha_heap::info(uint flag)
{
HEAPINFO hp_info;
+
+ if (!table)
+ return 1;
+
(void) heap_info(file,&hp_info,flag);
errkey= hp_info.errkey;
@@ -579,7 +584,7 @@ int ha_heap::delete_table(const char *name)
void ha_heap::drop_table(const char *name)
{
file->s->delete_on_close= 1;
- close();
+ ha_close();
}
@@ -670,7 +675,8 @@ heap_prepare_hp_create_info(TABLE *table_arg, bool internal_table,
seg->type != HA_KEYTYPE_VARTEXT1 &&
seg->type != HA_KEYTYPE_VARTEXT2 &&
seg->type != HA_KEYTYPE_VARBINARY1 &&
- seg->type != HA_KEYTYPE_VARBINARY2)
+ seg->type != HA_KEYTYPE_VARBINARY2 &&
+ seg->type != HA_KEYTYPE_BIT)
seg->type= HA_KEYTYPE_BINARY;
}
seg->start= (uint) key_part->offset;
@@ -702,6 +708,18 @@ heap_prepare_hp_create_info(TABLE *table_arg, bool internal_table,
auto_key= key+ 1;
auto_key_type= field->key_type();
}
+ if (seg->type == HA_KEYTYPE_BIT)
+ {
+ seg->bit_length= ((Field_bit *) field)->bit_len;
+ seg->bit_start= ((Field_bit *) field)->bit_ofs;
+ seg->bit_pos= (uint) (((Field_bit *) field)->bit_ptr -
+ (uchar*) table_arg->record[0]);
+ }
+ else
+ {
+ seg->bit_length= seg->bit_start= 0;
+ seg->bit_pos= 0;
+ }
}
}
mem_per_row+= MY_ALIGN(share->reclength + 1, sizeof(char*));
diff --git a/storage/heap/ha_heap.h b/storage/heap/ha_heap.h
index c8652d6db69..42c41c69ee0 100644
--- a/storage/heap/ha_heap.h
+++ b/storage/heap/ha_heap.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2000-2006 MySQL AB
+/* Copyright (C) 2000-2006 MySQL AB, 2009-2011 Monty Program Ab
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -53,6 +53,7 @@ public:
{
return (HA_FAST_KEY_READ | HA_NO_BLOBS | HA_NULL_IN_KEY |
HA_BINLOG_ROW_CAPABLE | HA_BINLOG_STMT_CAPABLE |
+ HA_CAN_SQL_HANDLER |
HA_REC_NOT_IN_SEQ | HA_CAN_INSERT_DELAYED | HA_NO_TRANSACTIONS |
HA_HAS_RECORDS | HA_STATS_RECORDS_IS_EXACT);
}
@@ -94,6 +95,7 @@ public:
int rnd_next(uchar *buf);
int rnd_pos(uchar * buf, uchar *pos);
void position(const uchar *record);
+ int can_continue_handler_scan();
int info(uint);
int extra(enum ha_extra_function operation);
int reset();
diff --git a/storage/heap/hp_clear.c b/storage/heap/hp_clear.c
index 9c04684e269..254e5d1a8ec 100644
--- a/storage/heap/hp_clear.c
+++ b/storage/heap/hp_clear.c
@@ -40,6 +40,8 @@ void hp_clear(HP_SHARE *info)
info->blength=1;
info->changed=0;
info->del_link=0;
+ info->key_version++;
+ info->file_version++;
DBUG_VOID_RETURN;
}
diff --git a/storage/heap/hp_create.c b/storage/heap/hp_create.c
index 67fe0b3136f..adc507aa28e 100644
--- a/storage/heap/hp_create.c
+++ b/storage/heap/hp_create.c
@@ -43,6 +43,10 @@ int heap_create(const char *name, HP_CREATE_INFO *create_info,
hp_free(share);
share= 0;
}
+ }
+ else
+ {
+ DBUG_PRINT("info", ("Creating internal (no named) temporary table"));
}
*created_new_share= (share == NULL);
@@ -110,6 +114,14 @@ int heap_create(const char *name, HP_CREATE_INFO *create_info,
*/
keyinfo->seg[j].type= HA_KEYTYPE_VARTEXT1;
break;
+ case HA_KEYTYPE_BIT:
+ /*
+ The odd bits which stored separately (if they are present
+ (bit_pos, bit_length)) are already present in seg[j].length as
+ additional byte.
+ See field.h, function key_length()
+ */
+ break;
default:
break;
}
@@ -256,10 +268,15 @@ static void init_block(HP_BLOCK *block, uint reclength, ulong min_records,
static inline void heap_try_free(HP_SHARE *share)
{
+ DBUG_ENTER("heap_try_free");
if (share->open_count == 0)
hp_free(share);
else
+ {
+ DBUG_PRINT("info", ("Table is still in use. Will be freed on close"));
share->delete_on_close= 1;
+ }
+ DBUG_VOID_RETURN;
}
@@ -278,6 +295,7 @@ int heap_delete_table(const char *name)
else
{
result= my_errno=ENOENT;
+ DBUG_PRINT("error", ("Could not find table '%s'", name));
}
mysql_mutex_unlock(&THR_LOCK_heap);
DBUG_RETURN(result);
diff --git a/storage/heap/hp_delete.c b/storage/heap/hp_delete.c
index ceba0fcf12e..db2c0df6128 100644
--- a/storage/heap/hp_delete.c
+++ b/storage/heap/hp_delete.c
@@ -47,6 +47,7 @@ int heap_delete(HP_INFO *info, const uchar *record)
share->del_link=pos;
pos[share->reclength]=0; /* Record deleted */
share->deleted++;
+ share->key_version++;
info->current_hash_ptr=0;
#if !defined(DBUG_OFF) && defined(EXTRA_HEAP_DEBUG)
DBUG_EXECUTE("check_heap",heap_check_heap(info, 0););
diff --git a/storage/heap/hp_hash.c b/storage/heap/hp_hash.c
index aaaa0fe833f..fb9ea44a424 100644
--- a/storage/heap/hp_hash.c
+++ b/storage/heap/hp_hash.c
@@ -349,6 +349,15 @@ ulong hp_rec_hashnr(register HP_KEYDEF *keydef, register const uchar *rec)
}
else
{
+ if (seg->type == HA_KEYTYPE_BIT && seg->bit_length)
+ {
+ uchar bits= get_rec_bits(rec + seg->bit_pos,
+ seg->bit_start, seg->bit_length);
+ nr^=(ulong) ((((uint) nr & 63)+nr2)*((uint) bits))+ (nr << 8);
+ nr2+=3;
+ end--;
+ }
+
for (; pos < end ; pos++)
{
nr^=(ulong) ((((uint) nr & 63)+nr2)*((uint) *pos))+ (nr << 8);
@@ -465,6 +474,14 @@ ulong hp_rec_hashnr(register HP_KEYDEF *keydef, register const uchar *rec)
else
{
uchar *end= pos+seg->length;
+ if (seg->type == HA_KEYTYPE_BIT && seg->bit_length)
+ {
+ uchar bits= get_rec_bits(rec + seg->bit_pos,
+ seg->bit_start, seg->bit_length);
+ nr *=16777619;
+ nr ^=(uint) bits;
+ end--;
+ }
for ( ; pos < end ; pos++)
{
nr *=16777619;
@@ -577,7 +594,18 @@ int hp_rec_key_cmp(HP_KEYDEF *keydef, const uchar *rec1, const uchar *rec2,
}
else
{
- if (bcmp(rec1+seg->start,rec2+seg->start,seg->length))
+ uint dec= 0;
+ if (seg->type == HA_KEYTYPE_BIT && seg->bit_length)
+ {
+ uchar bits1= get_rec_bits(rec1 + seg->bit_pos,
+ seg->bit_start, seg->bit_length);
+ uchar bits2= get_rec_bits(rec2 + seg->bit_pos,
+ seg->bit_start, seg->bit_length);
+ if (bits1 != bits2)
+ return 1;
+ dec= 1;
+ }
+ if (bcmp(rec1 + seg->start, rec2 + seg->start, seg->length - dec))
return 1;
}
}
@@ -660,7 +688,18 @@ int hp_key_cmp(HP_KEYDEF *keydef, const uchar *rec, const uchar *key)
}
else
{
- if (bcmp(rec+seg->start,key,seg->length))
+ uint dec= 0;
+ if (seg->type == HA_KEYTYPE_BIT && seg->bit_length)
+ {
+ uchar bits= get_rec_bits(rec + seg->bit_pos,
+ seg->bit_start, seg->bit_length);
+ if (bits != (*key))
+ return 1;
+ dec= 1;
+ key++;
+ }
+
+ if (bcmp(rec + seg->start, key, seg->length - dec))
return 1;
}
}
@@ -689,6 +728,12 @@ void hp_make_key(HP_KEYDEF *keydef, uchar *key, const uchar *rec)
}
if (seg->type == HA_KEYTYPE_VARTEXT1)
char_length+= seg->bit_start; /* Copy also length */
+ else if (seg->type == HA_KEYTYPE_BIT && seg->bit_length)
+ {
+ *key++= get_rec_bits(rec + seg->bit_pos,
+ seg->bit_start, seg->bit_length);
+ char_length--;
+ }
memcpy(key,rec+seg->start,(size_t) char_length);
key+= char_length;
}
@@ -720,7 +765,8 @@ uint hp_rb_make_key(HP_KEYDEF *keydef, uchar *key,
{
uint length= seg->length;
uchar *pos= (uchar*) rec + seg->start;
-
+ DBUG_ASSERT(seg->type != HA_KEYTYPE_BIT);
+
#ifdef HAVE_ISNAN
if (seg->type == HA_KEYTYPE_FLOAT)
{
@@ -784,6 +830,12 @@ uint hp_rb_make_key(HP_KEYDEF *keydef, uchar *key,
seg->charset->cset->fill(seg->charset, (char*) key + char_length,
seg->length - char_length, ' ');
}
+ if (seg->type == HA_KEYTYPE_BIT && seg->bit_length)
+ {
+ *key++= get_rec_bits(rec + seg->bit_pos,
+ seg->bit_start, seg->bit_length);
+ char_length--;
+ }
memcpy(key, rec + seg->start, (size_t) char_length);
key+= seg->length;
}
diff --git a/storage/heap/hp_rfirst.c b/storage/heap/hp_rfirst.c
index d0d2ec9b506..e45af4a219f 100644
--- a/storage/heap/hp_rfirst.c
+++ b/storage/heap/hp_rfirst.c
@@ -24,6 +24,8 @@ int heap_rfirst(HP_INFO *info, uchar *record, int inx)
DBUG_ENTER("heap_rfirst");
info->lastinx= inx;
+ info->key_version= info->s->key_version;
+
if (keyinfo->algorithm == HA_KEY_ALG_BTREE)
{
uchar *pos;
@@ -50,6 +52,7 @@ int heap_rfirst(HP_INFO *info, uchar *record, int inx)
}
else
{
+ info->update= HA_STATE_NO_KEY;
my_errno = HA_ERR_END_OF_FILE;
DBUG_RETURN(my_errno);
}
@@ -57,15 +60,8 @@ int heap_rfirst(HP_INFO *info, uchar *record, int inx)
}
else
{
- if (!(info->s->records))
- {
- my_errno=HA_ERR_END_OF_FILE;
- DBUG_RETURN(my_errno);
- }
- DBUG_ASSERT(0); /* TODO fix it */
- info->current_record=0;
- info->current_hash_ptr=0;
- info->update=HA_STATE_PREV_FOUND;
- DBUG_RETURN(heap_rnext(info,record));
+ /* We can't scan a non existing key value with hash index */
+ my_errno= HA_ERR_WRONG_COMMAND;
+ DBUG_RETURN(my_errno);
}
}
diff --git a/storage/heap/hp_rkey.c b/storage/heap/hp_rkey.c
index 27d1114770e..166ed28aed0 100644
--- a/storage/heap/hp_rkey.c
+++ b/storage/heap/hp_rkey.c
@@ -30,6 +30,7 @@ int heap_rkey(HP_INFO *info, uchar *record, int inx, const uchar *key,
}
info->lastinx= inx;
info->current_record= (ulong) ~0L; /* For heap_rrnd() */
+ info->key_version= info->s->key_version;
if (keyinfo->algorithm == HA_KEY_ALG_BTREE)
{
@@ -50,7 +51,7 @@ int heap_rkey(HP_INFO *info, uchar *record, int inx, const uchar *key,
if (!(pos= tree_search_key(&keyinfo->rb_tree, info->lastkey, info->parents,
&info->last_pos, find_flag, &custom_arg)))
{
- info->update= 0;
+ info->update= HA_STATE_NO_KEY;
DBUG_RETURN(my_errno= HA_ERR_KEY_NOT_FOUND);
}
memcpy(&pos, pos + (*keyinfo->get_key_length)(keyinfo, pos), sizeof(uchar*));
@@ -60,7 +61,7 @@ int heap_rkey(HP_INFO *info, uchar *record, int inx, const uchar *key,
{
if (!(pos= hp_search(info, share->keydef + inx, key, 0)))
{
- info->update= 0;
+ info->update= HA_STATE_NO_KEY;
DBUG_RETURN(my_errno);
}
if (!(keyinfo->flag & HA_NOSAME))
diff --git a/storage/heap/hp_rlast.c b/storage/heap/hp_rlast.c
index 45ad7c21f49..0710401e5a5 100644
--- a/storage/heap/hp_rlast.c
+++ b/storage/heap/hp_rlast.c
@@ -25,6 +25,7 @@ int heap_rlast(HP_INFO *info, uchar *record, int inx)
DBUG_ENTER("heap_rlast");
info->lastinx= inx;
+ info->key_version= info->s->key_version;
if (keyinfo->algorithm == HA_KEY_ALG_BTREE)
{
uchar *pos;
@@ -47,9 +48,8 @@ int heap_rlast(HP_INFO *info, uchar *record, int inx)
}
else
{
- info->current_ptr=0;
- info->current_hash_ptr=0;
- info->update=HA_STATE_NEXT_FOUND;
- DBUG_RETURN(heap_rprev(info,record));
+ /* We can't scan a non existing key value with hash index */
+ my_errno= HA_ERR_WRONG_COMMAND;
+ DBUG_RETURN(my_errno);
}
}
diff --git a/storage/heap/hp_rnext.c b/storage/heap/hp_rnext.c
index 3d715f4e6d3..7a759e70972 100644
--- a/storage/heap/hp_rnext.c
+++ b/storage/heap/hp_rnext.c
@@ -32,7 +32,20 @@ int heap_rnext(HP_INFO *info, uchar *record)
{
heap_rb_param custom_arg;
- if (info->last_pos)
+ /* If no active record and last was not deleted */
+ if (!(info->update & (HA_STATE_AKTIV | HA_STATE_NO_KEY |
+ HA_STATE_DELETED)))
+ {
+ if (info->update & HA_STATE_NEXT_FOUND)
+ pos= 0; /* Can't search after last row */
+ else
+ {
+ /* Last was 'prev' before first record; search after first record */
+ pos= tree_search_edge(&keyinfo->rb_tree, info->parents,
+ &info->last_pos, offsetof(TREE_ELEMENT, left));
+ }
+ }
+ else if (info->last_pos)
{
/*
We enter this branch for non-DELETE queries after heap_rkey()
@@ -70,6 +83,7 @@ int heap_rnext(HP_INFO *info, uchar *record)
custom_arg.keyseg = keyinfo->seg;
custom_arg.key_length = info->lastkey_len;
custom_arg.search_flag = SEARCH_SAME | SEARCH_FIND;
+ info->last_find_flag= HA_READ_KEY_OR_NEXT;
pos = tree_search_key(&keyinfo->rb_tree, info->lastkey, info->parents,
&info->last_pos, info->last_find_flag, &custom_arg);
}
diff --git a/storage/heap/hp_rprev.c b/storage/heap/hp_rprev.c
index 63bfffffba9..8a50444bb5f 100644
--- a/storage/heap/hp_rprev.c
+++ b/storage/heap/hp_rprev.c
@@ -32,7 +32,20 @@ int heap_rprev(HP_INFO *info, uchar *record)
{
heap_rb_param custom_arg;
- if (info->last_pos)
+ /* If no active record and last was not deleted */
+ if (!(info->update & (HA_STATE_AKTIV | HA_STATE_NO_KEY |
+ HA_STATE_DELETED)))
+ {
+ if (info->update & HA_STATE_PREV_FOUND)
+ pos= 0; /* Can't search before first row */
+ else
+ {
+ /* Last was 'next' after last record; search after last record */
+ pos= tree_search_edge(&keyinfo->rb_tree, info->parents,
+ &info->last_pos, offsetof(TREE_ELEMENT, right));
+ }
+ }
+ else if (info->last_pos)
pos = tree_search_next(&keyinfo->rb_tree, &info->last_pos,
offsetof(TREE_ELEMENT, right),
offsetof(TREE_ELEMENT, left));
@@ -41,6 +54,7 @@ int heap_rprev(HP_INFO *info, uchar *record)
custom_arg.keyseg = keyinfo->seg;
custom_arg.key_length = keyinfo->length;
custom_arg.search_flag = SEARCH_SAME;
+ info->last_find_flag= HA_READ_KEY_OR_PREV;
pos = tree_search_key(&keyinfo->rb_tree, info->lastkey, info->parents,
&info->last_pos, info->last_find_flag, &custom_arg);
}
diff --git a/storage/heap/hp_rsame.c b/storage/heap/hp_rsame.c
index 1a3724672b6..f93a443aa48 100644
--- a/storage/heap/hp_rsame.c
+++ b/storage/heap/hp_rsame.c
@@ -43,7 +43,7 @@ int heap_rsame(register HP_INFO *info, uchar *record, int inx)
hp_make_key(share->keydef + inx, info->lastkey, record);
if (!hp_search(info, share->keydef + inx, info->lastkey, 3))
{
- info->update=0;
+ info->update= 0;
DBUG_RETURN(my_errno);
}
}
diff --git a/storage/heap/hp_scan.c b/storage/heap/hp_scan.c
index e8913e92c86..397dd8b54d4 100644
--- a/storage/heap/hp_scan.c
+++ b/storage/heap/hp_scan.c
@@ -31,6 +31,8 @@ int heap_scan_init(register HP_INFO *info)
info->current_record= (ulong) ~0L; /* No current record */
info->update=0;
info->next_block=0;
+ info->key_version= info->s->key_version;
+ info->file_version= info->s->file_version;
DBUG_RETURN(0);
}
diff --git a/storage/heap/hp_test2.c b/storage/heap/hp_test2.c
index af388867c3c..5dab8443f53 100644
--- a/storage/heap/hp_test2.c
+++ b/storage/heap/hp_test2.c
@@ -1,5 +1,5 @@
-/* Copyright (C) 2000, 2011, Oracle and/or its affiliates. All rights
- reserved
+/* Copyright (C) 2000, 2011, Oracle and/or its affiliates.
+ Copyright (c) Monty Program Ab
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -310,7 +310,8 @@ int main(int argc, char *argv[])
if (!silent)
printf("- Read last key - delete - prev - prev - opt_delete - prev -> first\n");
- if (heap_rlast(file,record3,0)) goto err;
+ if (heap_rprev(file,record))
+ goto err;
if (heap_delete(file,record3)) goto err;
key_check-=atoi((char*) record3);
key1[atoi((char*) record+keyinfo[0].seg[0].start)]--;
@@ -517,7 +518,7 @@ int main(int argc, char *argv[])
}
ant=0;
- for (error=heap_rlast(file,record,0) ;
+ for (error=heap_rprev(file,record) ;
! error ;
error=heap_rprev(file,record))
{
diff --git a/storage/heap/hp_update.c b/storage/heap/hp_update.c
index 7f469af3c96..ab831382325 100644
--- a/storage/heap/hp_update.c
+++ b/storage/heap/hp_update.c
@@ -21,7 +21,7 @@ int heap_update(HP_INFO *info, const uchar *old, const uchar *heap_new)
{
HP_KEYDEF *keydef, *end, *p_lastinx;
uchar *pos;
- my_bool auto_key_changed= 0;
+ my_bool auto_key_changed= 0, key_changed= 0;
HP_SHARE *share= info->s;
DBUG_ENTER("heap_update");
@@ -54,6 +54,8 @@ int heap_update(HP_INFO *info, const uchar *old, const uchar *heap_new)
#endif
if (auto_key_changed)
heap_update_auto_increment(info, heap_new);
+ if (key_changed)
+ share->key_version++;
DBUG_RETURN(0);
err:
diff --git a/storage/heap/hp_write.c b/storage/heap/hp_write.c
index 4e8fa7e3580..bf27503de9b 100644
--- a/storage/heap/hp_write.c
+++ b/storage/heap/hp_write.c
@@ -56,6 +56,7 @@ int heap_write(HP_INFO *info, const uchar *record)
pos[share->reclength]=1; /* Mark record as not deleted */
if (++share->records == share->blength)
share->blength+= share->blength;
+ info->s->key_version++;
info->current_ptr=pos;
info->current_hash_ptr=0;
info->update|=HA_STATE_AKTIV;
diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
index 3561d824e81..b26a1522cf5 100644
--- a/storage/innobase/handler/ha_innodb.cc
+++ b/storage/innobase/handler/ha_innodb.cc
@@ -53,6 +53,9 @@ Place, Suite 330, Boston, MA 02111-1307 USA
#include <mysql/psi/psi.h>
#include <my_sys.h>
+#ifdef _WIN32
+#include <io.h>
+#endif
/** @file ha_innodb.cc */
/* Include necessary InnoDB headers */
@@ -98,7 +101,6 @@ extern "C" {
/** to protect innobase_open_files */
static mysql_mutex_t innobase_share_mutex;
/** to force correct commit order in binlog */
-static mysql_mutex_t prepare_commit_mutex;
static ulong commit_threads = 0;
static mysql_mutex_t commit_threads_m;
static mysql_cond_t commit_cond;
@@ -216,7 +218,6 @@ static const char* innobase_change_buffering_values[IBUF_USE_COUNT] = {
/* Keys to register pthread mutexes/cond in the current file with
performance schema */
static mysql_pfs_key_t innobase_share_mutex_key;
-static mysql_pfs_key_t prepare_commit_mutex_key;
static mysql_pfs_key_t commit_threads_m_key;
static mysql_pfs_key_t commit_cond_mutex_key;
static mysql_pfs_key_t commit_cond_key;
@@ -224,8 +225,7 @@ static mysql_pfs_key_t commit_cond_key;
static PSI_mutex_info all_pthread_mutexes[] = {
{&commit_threads_m_key, "commit_threads_m", 0},
{&commit_cond_mutex_key, "commit_cond_mutex", 0},
- {&innobase_share_mutex_key, "innobase_share_mutex", 0},
- {&prepare_commit_mutex_key, "prepare_commit_mutex", 0}
+ {&innobase_share_mutex_key, "innobase_share_mutex", 0}
};
static PSI_cond_info all_innodb_conds[] = {
@@ -338,6 +338,7 @@ static PSI_file_info all_innodb_files[] = {
static INNOBASE_SHARE *get_share(const char *table_name);
static void free_share(INNOBASE_SHARE *share);
static int innobase_close_connection(handlerton *hton, THD* thd);
+static void innobase_commit_ordered(handlerton *hton, THD* thd, bool all);
static int innobase_commit(handlerton *hton, THD* thd, bool all);
static int innobase_rollback(handlerton *hton, THD* thd, bool all);
static int innobase_rollback_to_savepoint(handlerton *hton, THD* thd,
@@ -1521,7 +1522,6 @@ innobase_trx_init(
trx_t* trx) /*!< in/out: InnoDB transaction handle */
{
DBUG_ENTER("innobase_trx_init");
- DBUG_ASSERT(EQ_CURRENT_THD(thd));
DBUG_ASSERT(thd == trx->mysql_thd);
trx->check_foreigns = !thd_test_options(
@@ -1570,8 +1570,6 @@ check_trx_exists(
{
trx_t*& trx = thd_to_trx(thd);
- ut_ad(EQ_CURRENT_THD(thd));
-
if (trx == NULL) {
trx = innobase_trx_allocate(thd);
} else if (UNIV_UNLIKELY(trx->magic_n != TRX_MAGIC_N)) {
@@ -1597,15 +1595,15 @@ trx_is_registered_for_2pc(
}
/*********************************************************************//**
-Note that a transaction owns the prepare_commit_mutex. */
+Note that innobase_commit_ordered() was run. */
static inline
void
-trx_owns_prepare_commit_mutex_set(
+trx_set_active_commit_ordered(
/*==============================*/
trx_t* trx) /* in: transaction */
{
ut_a(trx_is_registered_for_2pc(trx));
- trx->owns_prepare_mutex = 1;
+ trx->active_commit_ordered = 1;
}
/*********************************************************************//**
@@ -1617,7 +1615,7 @@ trx_register_for_2pc(
trx_t* trx) /* in: transaction */
{
trx->is_registered = 1;
- ut_ad(trx->owns_prepare_mutex == 0);
+ ut_ad(trx->active_commit_ordered == 0);
}
/*********************************************************************//**
@@ -1629,19 +1627,18 @@ trx_deregister_from_2pc(
trx_t* trx) /* in: transaction */
{
trx->is_registered = 0;
- trx->owns_prepare_mutex = 0;
+ trx->active_commit_ordered = 0;
}
/*********************************************************************//**
-Check whether atransaction owns the prepare_commit_mutex.
-@return true if transaction owns the prepare commit mutex */
+Check whether a transaction has active_commit_ordered set */
static inline
bool
-trx_has_prepare_commit_mutex(
+trx_is_active_commit_ordered(
/*=========================*/
const trx_t* trx) /* in: transaction */
{
- return(trx->owns_prepare_mutex == 1);
+ return(trx->active_commit_ordered == 1);
}
/*********************************************************************//**
@@ -2196,6 +2193,8 @@ innobase_init(
innobase_hton->savepoint_set=innobase_savepoint;
innobase_hton->savepoint_rollback=innobase_rollback_to_savepoint;
innobase_hton->savepoint_release=innobase_release_savepoint;
+ innobase_hton->prepare_ordered=NULL;
+ innobase_hton->commit_ordered=innobase_commit_ordered;
innobase_hton->commit=innobase_commit;
innobase_hton->rollback=innobase_rollback;
innobase_hton->prepare=innobase_xa_prepare;
@@ -2537,8 +2536,6 @@ innobase_change_buffering_inited_ok:
mysql_mutex_init(innobase_share_mutex_key,
&innobase_share_mutex,
MY_MUTEX_INIT_FAST);
- mysql_mutex_init(prepare_commit_mutex_key,
- &prepare_commit_mutex, MY_MUTEX_INIT_FAST);
mysql_mutex_init(commit_threads_m_key,
&commit_threads_m, MY_MUTEX_INIT_FAST);
mysql_mutex_init(commit_cond_mutex_key,
@@ -2589,7 +2586,6 @@ innobase_end(
srv_free_paths_and_sizes();
my_free(internal_innobase_data_file_path);
mysql_mutex_destroy(&innobase_share_mutex);
- mysql_mutex_destroy(&prepare_commit_mutex);
mysql_mutex_destroy(&commit_threads_m);
mysql_mutex_destroy(&commit_cond_m);
mysql_cond_destroy(&commit_cond);
@@ -2693,6 +2689,109 @@ innobase_start_trx_and_assign_read_view(
DBUG_RETURN(0);
}
+static
+void
+innobase_commit_ordered_2(
+/*============*/
+ trx_t* trx, /*!< in: Innodb transaction */
+ THD* thd) /*!< in: MySQL thread handle */
+{
+ ulonglong tmp_pos;
+ DBUG_ENTER("innobase_commit_ordered");
+
+ /* We need current binlog position for ibbackup to work.
+ Note, the position is current because commit_ordered is guaranteed
+ to be called in same sequenece as writing to binlog. */
+
+retry:
+ if (innobase_commit_concurrency > 0) {
+ mysql_mutex_lock(&commit_cond_m);
+ commit_threads++;
+
+ if (commit_threads > innobase_commit_concurrency) {
+ commit_threads--;
+ mysql_cond_wait(&commit_cond,
+ &commit_cond_m);
+ mysql_mutex_unlock(&commit_cond_m);
+ goto retry;
+ }
+ else {
+ mysql_mutex_unlock(&commit_cond_m);
+ }
+ }
+
+ mysql_bin_log_commit_pos(thd, &tmp_pos, &(trx->mysql_log_file_name));
+ trx->mysql_log_offset = (ib_int64_t) tmp_pos;
+
+ /* Don't do write + flush right now. For group commit
+ to work we want to do the flush in the innobase_commit()
+ method, which runs without holding any locks. */
+ trx->flush_log_later = TRUE;
+ innobase_commit_low(trx);
+ trx->flush_log_later = FALSE;
+
+ if (innobase_commit_concurrency > 0) {
+ mysql_mutex_lock(&commit_cond_m);
+ commit_threads--;
+ mysql_cond_signal(&commit_cond);
+ mysql_mutex_unlock(&commit_cond_m);
+ }
+
+ DBUG_VOID_RETURN;
+}
+
+/*****************************************************************//**
+Perform the first, fast part of InnoDB commit.
+
+Doing it in this call ensures that we get the same commit order here
+as in binlog and any other participating transactional storage engines.
+
+Note that we want to do as little as really needed here, as we run
+under a global mutex. The expensive fsync() is done later, in
+innobase_commit(), without a lock so group commit can take place.
+
+Note also that this method can be called from a different thread than
+the one handling the rest of the transaction. */
+static
+void
+innobase_commit_ordered(
+/*============*/
+ handlerton *hton, /*!< in: Innodb handlerton */
+ THD* thd, /*!< in: MySQL thread handle of the user for whom
+ the transaction should be committed */
+ bool all) /*!< in: TRUE - commit transaction
+ FALSE - the current SQL statement ended */
+{
+ trx_t* trx;
+ DBUG_ENTER("innobase_commit_ordered");
+ DBUG_ASSERT(hton == innodb_hton_ptr);
+
+ trx = check_trx_exists(thd);
+
+ /* Since we will reserve the kernel mutex, we must not be holding the
+ search system latch, or we will disobey the latching order. But we
+ already released it in innobase_xa_prepare() (if not before), so just
+ have an assert here.*/
+ ut_ad(!trx->has_search_latch);
+
+ if (!trx_is_registered_for_2pc(trx) && trx_is_started(trx)) {
+ /* We cannot throw error here; instead we will catch this error
+ again in innobase_commit() and report it from there. */
+ DBUG_VOID_RETURN;
+ }
+
+ /* commit_ordered is only called when committing the whole transaction
+ (or an SQL statement when autocommit is on). */
+ DBUG_ASSERT(all ||
+ (!thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)));
+
+ innobase_commit_ordered_2(trx, thd);
+
+ trx_set_active_commit_ordered(trx);
+
+ DBUG_VOID_RETURN;
+}
+
/*****************************************************************//**
Commits a transaction in an InnoDB database or marks an SQL statement
ended.
@@ -2718,7 +2817,7 @@ innobase_commit(
/* Since we will reserve the kernel mutex, we have to release
the search system latch first to obey the latching order. */
- if (trx->has_search_latch) {
+ if (trx->has_search_latch && !trx_is_active_commit_ordered(trx)) {
trx_search_latch_release_if_reserved(trx);
}
@@ -2736,68 +2835,19 @@ innobase_commit(
if (all
|| (!thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) {
- /* We were instructed to commit the whole transaction, or
- this is an SQL statement end and autocommit is on */
-
- /* We need current binlog position for ibbackup to work.
- Note, the position is current because of
- prepare_commit_mutex */
-retry:
- if (innobase_commit_concurrency > 0) {
- mysql_mutex_lock(&commit_cond_m);
- commit_threads++;
-
- if (commit_threads > innobase_commit_concurrency) {
- commit_threads--;
- mysql_cond_wait(&commit_cond,
- &commit_cond_m);
- mysql_mutex_unlock(&commit_cond_m);
- goto retry;
- }
- else {
- mysql_mutex_unlock(&commit_cond_m);
- }
- }
-
- /* The following calls to read the MySQL binary log
- file name and the position return consistent results:
- 1) Other InnoDB transactions cannot intervene between
- these calls as we are holding prepare_commit_mutex.
- 2) Binary logging of other engines is not relevant
- to InnoDB as all InnoDB requires is that committing
- InnoDB transactions appear in the same order in the
- MySQL binary log as they appear in InnoDB logs.
- 3) A MySQL log file rotation cannot happen because
- MySQL protects against this by having a counter of
- transactions in prepared state and it only allows
- a rotation when the counter drops to zero. See
- LOCK_prep_xids and COND_prep_xids in log.cc. */
- trx->mysql_log_file_name = mysql_bin_log_file_name();
- trx->mysql_log_offset = (ib_int64_t) mysql_bin_log_file_pos();
-
- /* Don't do write + flush right now. For group commit
- to work we want to do the flush after releasing the
- prepare_commit_mutex. */
- trx->flush_log_later = TRUE;
- innobase_commit_low(trx);
- trx->flush_log_later = FALSE;
-
- if (innobase_commit_concurrency > 0) {
- mysql_mutex_lock(&commit_cond_m);
- commit_threads--;
- mysql_cond_signal(&commit_cond);
- mysql_mutex_unlock(&commit_cond_m);
+ /* Run the fast part of commit if we did not already. */
+ if (!trx_is_active_commit_ordered(trx)) {
+ innobase_commit_ordered_2(trx, thd);
}
- if (trx_has_prepare_commit_mutex(trx)) {
-
- mysql_mutex_unlock(&prepare_commit_mutex);
- }
-
- trx_deregister_from_2pc(trx);
+ /* We were instructed to commit the whole transaction, or
+ this is an SQL statement end and autocommit is on */
- /* Now do a write + flush of logs. */
+ /* We did the first part already in innobase_commit_ordered(),
+ Now finish by doing a write + flush of logs. */
trx_commit_complete_for_mysql(trx);
+ trx_deregister_from_2pc(trx);
+
} else {
/* We just mark the SQL statement ended and do not do a
transaction commit */
@@ -3173,12 +3223,15 @@ UNIV_INTERN
ulong
ha_innobase::index_flags(
/*=====================*/
- uint,
+ uint index,
uint,
bool)
const
{
- return(HA_READ_NEXT | HA_READ_PREV | HA_READ_ORDER
+ ulong extra_flag= 0;
+ if (table && index == table->s->primary_key)
+ extra_flag= HA_CLUSTERED_INDEX;
+ return(HA_READ_NEXT | HA_READ_PREV | HA_READ_ORDER | extra_flag
| HA_READ_RANGE | HA_KEYREAD_ONLY);
}
@@ -4158,90 +4211,64 @@ get_innobase_type_from_mysql_type(
8 bits: this is used in ibuf and also when DATA_NOT_NULL is ORed to
the type */
- DBUG_ASSERT((ulint)MYSQL_TYPE_STRING < 256);
- DBUG_ASSERT((ulint)MYSQL_TYPE_VAR_STRING < 256);
- DBUG_ASSERT((ulint)MYSQL_TYPE_DOUBLE < 256);
- DBUG_ASSERT((ulint)MYSQL_TYPE_FLOAT < 256);
- DBUG_ASSERT((ulint)MYSQL_TYPE_DECIMAL < 256);
+ compile_time_assert((ulint)MYSQL_TYPE_STRING < 256);
+ compile_time_assert((ulint)MYSQL_TYPE_VAR_STRING < 256);
+ compile_time_assert((ulint)MYSQL_TYPE_DOUBLE < 256);
+ compile_time_assert((ulint)MYSQL_TYPE_FLOAT < 256);
+ compile_time_assert((ulint)MYSQL_TYPE_DECIMAL < 256);
- if (field->flags & UNSIGNED_FLAG) {
+ *unsigned_flag = 0;
+ switch (field->key_type()) {
+ case HA_KEYTYPE_USHORT_INT:
+ case HA_KEYTYPE_ULONG_INT:
+ case HA_KEYTYPE_UINT24:
+ case HA_KEYTYPE_ULONGLONG:
*unsigned_flag = DATA_UNSIGNED;
- } else {
- *unsigned_flag = 0;
- }
-
- if (field->real_type() == MYSQL_TYPE_ENUM
- || field->real_type() == MYSQL_TYPE_SET) {
-
- /* MySQL has field->type() a string type for these, but the
- data is actually internally stored as an unsigned integer
- code! */
-
- *unsigned_flag = DATA_UNSIGNED; /* MySQL has its own unsigned
- flag set to zero, even though
- internally this is an unsigned
- integer type */
+ /* fall through */
+ case HA_KEYTYPE_SHORT_INT:
+ case HA_KEYTYPE_LONG_INT:
+ case HA_KEYTYPE_INT24:
+ case HA_KEYTYPE_INT8:
+ case HA_KEYTYPE_LONGLONG:
return(DATA_INT);
- }
-
- switch (field->type()) {
- /* NOTE that we only allow string types in DATA_MYSQL and
- DATA_VARMYSQL */
- case MYSQL_TYPE_VAR_STRING: /* old <= 4.1 VARCHAR */
- case MYSQL_TYPE_VARCHAR: /* new >= 5.0.3 true VARCHAR */
- if (field->binary()) {
- return(DATA_BINARY);
- } else if (strcmp(
- field->charset()->name,
- "latin1_swedish_ci") == 0) {
+ case HA_KEYTYPE_FLOAT:
+ return(DATA_FLOAT);
+ case HA_KEYTYPE_DOUBLE:
+ return(DATA_DOUBLE);
+ case HA_KEYTYPE_BINARY:
+ if (field->type() == MYSQL_TYPE_TINY)
+ { // compatibility workaround
+ *unsigned_flag= DATA_UNSIGNED;
+ return DATA_INT;
+ }
+ return(DATA_FIXBINARY);
+ case HA_KEYTYPE_VARBINARY2:
+ if (field->type() != MYSQL_TYPE_VARCHAR)
+ return(DATA_BLOB);
+ /* fall through */
+ case HA_KEYTYPE_VARBINARY1:
+ return(DATA_BINARY);
+ case HA_KEYTYPE_VARTEXT2:
+ if (field->type() != MYSQL_TYPE_VARCHAR)
+ return(DATA_BLOB);
+ /* fall through */
+ case HA_KEYTYPE_VARTEXT1:
+ if (field->charset() == &my_charset_latin1) {
return(DATA_VARCHAR);
} else {
return(DATA_VARMYSQL);
}
- case MYSQL_TYPE_BIT:
- case MYSQL_TYPE_STRING: if (field->binary()) {
-
- return(DATA_FIXBINARY);
- } else if (strcmp(
- field->charset()->name,
- "latin1_swedish_ci") == 0) {
+ case HA_KEYTYPE_TEXT:
+ if (field->charset() == &my_charset_latin1) {
return(DATA_CHAR);
} else {
return(DATA_MYSQL);
}
- case MYSQL_TYPE_NEWDECIMAL:
- return(DATA_FIXBINARY);
- case MYSQL_TYPE_LONG:
- case MYSQL_TYPE_LONGLONG:
- case MYSQL_TYPE_TINY:
- case MYSQL_TYPE_SHORT:
- case MYSQL_TYPE_INT24:
- case MYSQL_TYPE_DATE:
- case MYSQL_TYPE_DATETIME:
- case MYSQL_TYPE_YEAR:
- case MYSQL_TYPE_NEWDATE:
- case MYSQL_TYPE_TIME:
- case MYSQL_TYPE_TIMESTAMP:
- return(DATA_INT);
- case MYSQL_TYPE_FLOAT:
- return(DATA_FLOAT);
- case MYSQL_TYPE_DOUBLE:
- return(DATA_DOUBLE);
- case MYSQL_TYPE_DECIMAL:
+ case HA_KEYTYPE_NUM:
return(DATA_DECIMAL);
- case MYSQL_TYPE_GEOMETRY:
- case MYSQL_TYPE_TINY_BLOB:
- case MYSQL_TYPE_MEDIUM_BLOB:
- case MYSQL_TYPE_BLOB:
- case MYSQL_TYPE_LONG_BLOB:
- return(DATA_BLOB);
- case MYSQL_TYPE_NULL:
- /* MySQL currently accepts "NULL" datatype, but will
- reject such datatype in the next release. We will cope
- with it and not trigger assertion failure in 5.1 */
- break;
- default:
+ case HA_KEYTYPE_BIT:
+ case HA_KEYTYPE_END:
ut_error;
}
@@ -7998,6 +8025,8 @@ ha_innobase::info_low(
}
stats.check_time = 0;
+ stats.mrr_length_per_rec= ref_length + 8; // 8 = max(sizeof(void *));
+
if (stats.records == 0) {
stats.mean_rec_length = 0;
@@ -10211,33 +10240,6 @@ innobase_xa_prepare(
srv_active_wake_master_thread();
- if (thd_sql_command(thd) != SQLCOM_XA_PREPARE
- && (all
- || !thd_test_options(
- thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) {
-
- /* For ibbackup to work the order of transactions in binlog
- and InnoDB must be the same. Consider the situation
-
- thread1> prepare; write to binlog; ...
- <context switch>
- thread2> prepare; write to binlog; commit
- thread1> ... commit
-
- To ensure this will not happen we're taking the mutex on
- prepare, and releasing it on commit.
-
- Note: only do it for normal commits, done via ha_commit_trans.
- If 2pc protocol is executed by external transaction
- coordinator, it will be just a regular MySQL client
- executing XA PREPARE and XA COMMIT commands.
- In this case we cannot know how many minutes or hours
- will be between XA PREPARE and XA COMMIT, and we don't want
- to block for undefined period of time. */
- mysql_mutex_lock(&prepare_commit_mutex);
- trx_owns_prepare_commit_mutex_set(trx);
- }
-
return(error);
}
diff --git a/storage/innobase/handler/ha_innodb.h b/storage/innobase/handler/ha_innodb.h
index 7ab91a12e81..e6c9e955827 100644
--- a/storage/innobase/handler/ha_innodb.h
+++ b/storage/innobase/handler/ha_innodb.h
@@ -238,16 +238,6 @@ extern "C" {
struct charset_info_st *thd_charset(MYSQL_THD thd);
LEX_STRING *thd_query_string(MYSQL_THD thd);
-/** Get the file name of the MySQL binlog.
- * @return the name of the binlog file
- */
-const char* mysql_bin_log_file_name(void);
-
-/** Get the current position of the MySQL binlog.
- * @return byte offset from the beginning of the binlog
- */
-ulonglong mysql_bin_log_file_pos(void);
-
/**
Check if a user thread is a replication slave thread
@param thd user thread
@@ -294,6 +284,11 @@ bool thd_binlog_filter_ok(const MYSQL_THD thd);
bool thd_sqlcom_can_generate_row_events(const MYSQL_THD thd);
}
+/** Get the file name and position of the MySQL binlog corresponding to the
+ * current commit.
+ */
+extern void mysql_bin_log_commit_pos(THD *thd, ulonglong *out_pos, const char **out_file);
+
typedef struct trx_struct trx_t;
/********************************************************************//**
@file handler/ha_innodb.h
diff --git a/storage/innobase/handler/i_s.cc b/storage/innobase/handler/i_s.cc
index de5cc682078..ad413c3cacd 100644
--- a/storage/innobase/handler/i_s.cc
+++ b/storage/innobase/handler/i_s.cc
@@ -157,7 +157,7 @@ field_store_time_t(
my_time.time_type = MYSQL_TIMESTAMP_DATETIME;
#endif
- return(field->store_time(&my_time, MYSQL_TIMESTAMP_DATETIME));
+ return(field->store_time(&my_time));
}
/*******************************************************************//**
diff --git a/storage/innobase/include/trx0trx.h b/storage/innobase/include/trx0trx.h
index 588ddd65e88..7572c766301 100644
--- a/storage/innobase/include/trx0trx.h
+++ b/storage/innobase/include/trx0trx.h
@@ -490,7 +490,7 @@ struct trx_struct{
transaction has been registered with
the coordinator using the XA API, and
is set to 0 after commit or rollback. */
- unsigned owns_prepare_mutex:1;/* 1 if owns prepare mutex, if
+ unsigned active_commit_ordered:1;/* 1 if owns prepare mutex, if
this is set to 1 then registered should
also be set to 1. This is used in the
XA code */
diff --git a/storage/innobase/row/row0upd.c b/storage/innobase/row/row0upd.c
index 04b3dcb3a4a..053b3513f1d 100644
--- a/storage/innobase/row/row0upd.c
+++ b/storage/innobase/row/row0upd.c
@@ -1265,7 +1265,7 @@ row_upd_changes_ord_field_binary_func(
const upd_field_t* upd_field;
const dfield_t* dfield;
dfield_t dfield_ext;
- ulint dfield_len;
+ ulint dfield_len= 0;
const byte* buf;
ind_field = dict_index_get_nth_field(index, i);
diff --git a/storage/innobase/trx/trx0i_s.c b/storage/innobase/trx/trx0i_s.c
index c18b747da6d..aa0a9c797f2 100644
--- a/storage/innobase/trx/trx0i_s.c
+++ b/storage/innobase/trx/trx0i_s.c
@@ -155,10 +155,6 @@ struct trx_i_s_cache_struct {
ullint last_read; /*!< last time the cache was read;
measured in microseconds since
epoch */
- mutex_t last_read_mutex;/*!< mutex protecting the
- last_read member - it is updated
- inside a shared lock of the
- rw_lock member */
i_s_table_cache_t innodb_trx; /*!< innodb_trx table */
i_s_table_cache_t innodb_locks; /*!< innodb_locks table */
i_s_table_cache_t innodb_lock_waits;/*!< innodb_lock_waits table */
@@ -1225,13 +1221,6 @@ can_cache_be_updated(
{
ullint now;
- /* Here we read cache->last_read without acquiring its mutex
- because last_read is only updated when a shared rw lock on the
- whole cache is being held (see trx_i_s_cache_end_read()) and
- we are currently holding an exclusive rw lock on the cache.
- So it is not possible for last_read to be updated while we are
- reading it. */
-
#ifdef UNIV_SYNC_DEBUG
ut_a(rw_lock_own(&cache->rw_lock, RW_LOCK_EX));
#endif
@@ -1329,6 +1318,12 @@ trx_i_s_possibly_fetch_data_into_cache(
/*===================================*/
trx_i_s_cache_t* cache) /*!< in/out: cache */
{
+ ullint now;
+
+#ifdef UNIV_SYNC_DEBUG
+ ut_a(rw_lock_own(&cache->rw_lock, RW_LOCK_EX));
+#endif
+
if (!can_cache_be_updated(cache)) {
return(1);
@@ -1341,6 +1336,10 @@ trx_i_s_possibly_fetch_data_into_cache(
mutex_exit(&kernel_mutex);
+ /* update cache last read time */
+ now = ut_time_us(NULL);
+ cache->last_read = now;
+
return(0);
}
@@ -1371,8 +1370,6 @@ trx_i_s_cache_init(
release kernel_mutex
release trx_i_s_cache_t::rw_lock
acquire trx_i_s_cache_t::rw_lock, S
- acquire trx_i_s_cache_t::last_read_mutex
- release trx_i_s_cache_t::last_read_mutex
release trx_i_s_cache_t::rw_lock */
rw_lock_create(trx_i_s_cache_lock_key, &cache->rw_lock,
@@ -1380,9 +1377,6 @@ trx_i_s_cache_init(
cache->last_read = 0;
- mutex_create(cache_last_read_mutex_key,
- &cache->last_read_mutex, SYNC_TRX_I_S_LAST_READ);
-
table_cache_init(&cache->innodb_trx, sizeof(i_s_trx_row_t));
table_cache_init(&cache->innodb_locks, sizeof(i_s_locks_row_t));
table_cache_init(&cache->innodb_lock_waits,
@@ -1433,18 +1427,10 @@ trx_i_s_cache_end_read(
/*===================*/
trx_i_s_cache_t* cache) /*!< in: cache */
{
- ullint now;
-
#ifdef UNIV_SYNC_DEBUG
ut_a(rw_lock_own(&cache->rw_lock, RW_LOCK_SHARED));
#endif
- /* update cache last read time */
- now = ut_time_us(NULL);
- mutex_enter(&cache->last_read_mutex);
- cache->last_read = now;
- mutex_exit(&cache->last_read_mutex);
-
rw_lock_s_unlock(&cache->rw_lock);
}
diff --git a/storage/innobase/trx/trx0sys.c b/storage/innobase/trx/trx0sys.c
index 8e595353024..da9075816f7 100644
--- a/storage/innobase/trx/trx0sys.c
+++ b/storage/innobase/trx/trx0sys.c
@@ -1412,7 +1412,7 @@ trx_sys_print_mysql_binlog_offset_from_page(
/* THESE ARE COPIED FROM NON-HOTBACKUP PART OF THE INNODB SOURCE TREE
- (This code duplicaton should be fixed at some point!)
+ (This code duplication should be fixed at some point!)
*/
#define TRX_SYS_SPACE 0 /* the SYSTEM tablespace */
diff --git a/storage/innobase/trx/trx0trx.c b/storage/innobase/trx/trx0trx.c
index 7b99b86c732..ab7677b5b35 100644
--- a/storage/innobase/trx/trx0trx.c
+++ b/storage/innobase/trx/trx0trx.c
@@ -110,7 +110,7 @@ trx_create(
trx->conc_state = TRX_NOT_STARTED;
trx->is_registered = 0;
- trx->owns_prepare_mutex = 0;
+ trx->active_commit_ordered = 0;
trx->start_time = ut_time();
diff --git a/storage/maria/CMakeLists.txt b/storage/maria/CMakeLists.txt
index 545c8cb9318..3fbddaf4580 100644
--- a/storage/maria/CMakeLists.txt
+++ b/storage/maria/CMakeLists.txt
@@ -35,14 +35,14 @@ SET(ARIA_SOURCES ma_init.c ma_open.c ma_extra.c ma_info.c ma_rkey.c
ha_maria.cc trnman.c lockman.c
ma_rt_index.c ma_rt_key.c ma_rt_mbr.c ma_rt_split.c
ma_sp_key.c ma_control_file.c ma_loghandler.c
- ma_pagecache.c ma_pagecaches.c compat_aliases.cc compat_aliases.h
+ ma_pagecache.c ma_pagecaches.c
ma_checkpoint.c ma_recovery.c ma_commit.c ma_pagecrc.c
ha_maria.h maria_def.h ma_recovery_util.c ma_servicethread.c
+ ma_norec.c
)
MYSQL_ADD_PLUGIN(aria ${ARIA_SOURCES}
STORAGE_ENGINE
- MANDATORY
RECOMPILE_FOR_EMBEDDED)
TARGET_LINK_LIBRARIES(aria myisam)
@@ -56,7 +56,7 @@ TARGET_LINK_LIBRARIES(aria_chk aria)
MYSQL_ADD_EXECUTABLE(aria_read_log maria_read_log.c)
TARGET_LINK_LIBRARIES(aria_read_log aria)
-MYSQL_ADD_EXECUTABLE(aria_dump_log ma_loghandler.c unittest/ma_loghandler_examples.c)
+MYSQL_ADD_EXECUTABLE(aria_dump_log maria_dump_log.c unittest/ma_loghandler_examples.c)
TARGET_LINK_LIBRARIES(aria_dump_log aria)
SET_TARGET_PROPERTIES(aria_dump_log PROPERTIES COMPILE_FLAGS "-DMARIA_DUMP_LOG")
diff --git a/storage/maria/compat_aliases.cc b/storage/maria/compat_aliases.cc
deleted file mode 100644
index 2d3c67d69a7..00000000000
--- a/storage/maria/compat_aliases.cc
+++ /dev/null
@@ -1,245 +0,0 @@
-/* Copyright (C) 2010 Monty Program Ab
-
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; version 2 of the License.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
-
-/*
- compatibility aliases for system and static variables
-*/
-#include <my_global.h>
-#include <maria.h>
-#include <mysql/plugin.h>
-#include "ma_loghandler.h"
-#include "compat_aliases.h"
-
-ulong block_size_alias;
-static MYSQL_SYSVAR_ULONG(block_size, block_size_alias,
- PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
- "Deprecated, use --aria-block-size instead", 0, 0,
- MARIA_KEY_BLOCK_LENGTH, MARIA_MIN_KEY_BLOCK_LENGTH,
- MARIA_MAX_KEY_BLOCK_LENGTH, MARIA_MIN_KEY_BLOCK_LENGTH);
-
-ulong checkpoint_interval_alias;
-static MYSQL_SYSVAR_ULONG(checkpoint_interval, checkpoint_interval_alias,
- PLUGIN_VAR_RQCMDARG,
- "Deprecated, use --aria-checkpoint-interval instead",
- NULL, NULL, 30, 0, UINT_MAX, 1);
-
-ulong force_start_after_recovery_failures_alias;
-static MYSQL_SYSVAR_ULONG(force_start_after_recovery_failures, force_start_after_recovery_failures_alias,
- PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
- "Deprecated, use --aria-force-start-after-recovery-failures instead",
- NULL, NULL, 0, 0, UINT_MAX8, 1);
-
-my_bool page_checksum_alias;
-static MYSQL_SYSVAR_BOOL(page_checksum, page_checksum_alias, 0,
- "Deprecated, use --aria-page-checksum instead", 0, 0, 1);
-
-char *log_dir_path_alias;
-static MYSQL_SYSVAR_STR(log_dir_path, log_dir_path_alias,
- PLUGIN_VAR_NOSYSVAR | PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
- "Deprecated, use --aria-log-dir-path instead",
- NULL, NULL, mysql_real_data_home);
-
-ulong log_file_size_alias;
-static MYSQL_SYSVAR_ULONG(log_file_size, log_file_size_alias,
- PLUGIN_VAR_RQCMDARG,
- "Deprecated, use --aria-log-file-size instead",
- NULL, NULL, TRANSLOG_FILE_SIZE,
- TRANSLOG_MIN_FILE_SIZE, 0xffffffffL, TRANSLOG_PAGE_SIZE);
-
-ulong group_commit_alias;
-static MYSQL_SYSVAR_ENUM(group_commit, group_commit_alias,
- PLUGIN_VAR_RQCMDARG,
- "Deprecated, use --aria-group-commit instead",
- NULL, NULL,
- TRANSLOG_GCOMMIT_NONE, &maria_group_commit_typelib);
-
-ulong group_commit_interval_alias;
-static MYSQL_SYSVAR_ULONG(group_commit_interval, group_commit_interval_alias,
- PLUGIN_VAR_RQCMDARG,
- "Deprecated, use --aria-group-commit-interval instead",
- NULL, NULL, 0, 0, UINT_MAX, 1);
-
-ulong log_purge_type_alias;
-static MYSQL_SYSVAR_ENUM(log_purge_type, log_purge_type_alias,
- PLUGIN_VAR_RQCMDARG,
- "Deprecated, use --aria-log-purge-type instead",
- NULL, NULL, TRANSLOG_PURGE_IMMIDIATE,
- &maria_translog_purge_type_typelib);
-
-ulonglong max_sort_file_size_alias;
-static MYSQL_SYSVAR_ULONGLONG(max_sort_file_size, max_sort_file_size_alias,
- PLUGIN_VAR_RQCMDARG,
- "Deprecated, use --aria-max-temp-length instead",
- 0, 0, MAX_FILE_SIZE, 0, MAX_FILE_SIZE, 1024*1024);
-
-ulong pagecache_age_threshold_alias;
-static MYSQL_SYSVAR_ULONG(pagecache_age_threshold, pagecache_age_threshold_alias,
- PLUGIN_VAR_RQCMDARG,
- "Deprecated, use --aria-pagecache-age-threshold instead",
- 0, 0, 300, 100, ~0L, 100);
-
-ulonglong pagecache_buffer_size_alias;
-static MYSQL_SYSVAR_ULONGLONG(pagecache_buffer_size, pagecache_buffer_size_alias,
- PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
- "Deprecated, use --aria-pagecache-buffer-size instead",
- 0, 0, KEY_CACHE_SIZE, MALLOC_OVERHEAD, ~0UL, IO_SIZE);
-
-ulong pagecache_division_limit_alias;
-static MYSQL_SYSVAR_ULONG(pagecache_division_limit, pagecache_division_limit_alias,
- PLUGIN_VAR_RQCMDARG,
- "Deprecated, use --aria-pagecache-division-limit instead",
- 0, 0, 100, 1, 100, 1);
-
-ulong recover_alias;
-static MYSQL_SYSVAR_ENUM(recover, recover_alias, PLUGIN_VAR_OPCMDARG,
- "Deprecated, use --aria-recover instead",
- NULL, NULL, HA_RECOVER_DEFAULT, &maria_recover_typelib);
-
-ulong repair_threads_alias;
-static MYSQL_THDVAR_ULONG(repair_threads, PLUGIN_VAR_RQCMDARG,
- "Deprecated, use --aria-repair-threads instead",
- 0, 0, 1, 1, ~0L, 1);
-
-ulong sort_buffer_size_alias;
-static MYSQL_THDVAR_ULONG(sort_buffer_size, PLUGIN_VAR_RQCMDARG,
- "Deprecated, use --aria-sort-buffer-size instead",
- 0, 0, 128L*1024L*1024L, 4, ~0L, 1);
-
-ulong stats_method_alias;
-static MYSQL_THDVAR_ENUM(stats_method, PLUGIN_VAR_RQCMDARG,
- "Deprecated, use --aria-stats-method instead",
- 0, 0, 0, &maria_stats_method_typelib);
-
-ulong sync_log_dir_alias;
-static MYSQL_SYSVAR_ENUM(sync_log_dir, sync_log_dir_alias,
- PLUGIN_VAR_RQCMDARG,
- "Deprecated, use --aria-sync-log-dir instead",
- NULL, NULL, TRANSLOG_SYNC_DIR_NEWFILE,
- &maria_sync_log_dir_typelib);
-
-my_bool used_for_temp_tables_alias= 1;
-static MYSQL_SYSVAR_BOOL(used_for_temp_tables,
- used_for_temp_tables_alias, PLUGIN_VAR_READONLY | PLUGIN_VAR_NOCMDOPT,
- NULL, 0, 0, 1);
-
-static struct st_mysql_show_var status_variables_aliases[]= {
- {"Maria", (char*) &status_variables, SHOW_ARRAY},
- {NullS, NullS, SHOW_LONG}
-};
-
-/*
- There is one problem with aliases for command-line options.
- Plugin initialization works like this
-
- for all plugins:
- prepare command-line options
- initialize command-line option variables to the default values
- parse command line, assign values as necessary
-
- for all plugins:
- call the plugin initialization function
-
- it means, we cannot have maria* and aria* command-line options to use
- the same underlying variables - because after assigning maria* values,
- MySQL will put there default values again preparing for parsing aria*
- values. So, maria* values will be lost.
-
- So, we create separate set of variables for maria* options,
- and take both values into account in ha_maria_init().
-
- When the command line was parsed, we patch maria* options
- to use the same variables as aria* options so that
- set @@maria_some_var would have the same value as @@aria_some_var
- without forcing us to copy the values around all the time.
-*/
-
-static struct st_mysql_sys_var* system_variables_aliases[]= {
- MYSQL_SYSVAR(block_size),
- MYSQL_SYSVAR(checkpoint_interval),
- MYSQL_SYSVAR(force_start_after_recovery_failures),
- MYSQL_SYSVAR(group_commit),
- MYSQL_SYSVAR(group_commit_interval),
- MYSQL_SYSVAR(log_dir_path),
- MYSQL_SYSVAR(log_file_size),
- MYSQL_SYSVAR(log_purge_type),
- MYSQL_SYSVAR(max_sort_file_size),
- MYSQL_SYSVAR(page_checksum),
- MYSQL_SYSVAR(pagecache_age_threshold),
- MYSQL_SYSVAR(pagecache_buffer_size),
- MYSQL_SYSVAR(pagecache_division_limit),
- MYSQL_SYSVAR(recover),
- MYSQL_SYSVAR(repair_threads),
- MYSQL_SYSVAR(sort_buffer_size),
- MYSQL_SYSVAR(stats_method),
- MYSQL_SYSVAR(sync_log_dir),
- MYSQL_SYSVAR(used_for_temp_tables),
- NULL
-};
-
-#define COPY_SYSVAR(name) \
- memcpy(&MYSQL_SYSVAR_NAME(name), system_variables[i++], \
- sizeof(MYSQL_SYSVAR_NAME(name))); \
- if (name ## _alias != MYSQL_SYSVAR_NAME(name).def_val && \
- *MYSQL_SYSVAR_NAME(name).value == MYSQL_SYSVAR_NAME(name).def_val) \
- *MYSQL_SYSVAR_NAME(name).value= name ## _alias;
-
-#define COPY_THDVAR(name) \
- name ## _alias= THDVAR(0, name); \
- memcpy(&MYSQL_SYSVAR_NAME(name), system_variables[i++], \
- sizeof(MYSQL_SYSVAR_NAME(name))); \
- if (name ## _alias != MYSQL_SYSVAR_NAME(name).def_val && \
- THDVAR(0, name) == MYSQL_SYSVAR_NAME(name).def_val) \
- THDVAR(0, name)= name ## _alias;
-
-void copy_variable_aliases()
-{
- int i= 0;
- COPY_SYSVAR(block_size);
- COPY_SYSVAR(checkpoint_interval);
- COPY_SYSVAR(force_start_after_recovery_failures);
- COPY_SYSVAR(group_commit);
- COPY_SYSVAR(group_commit_interval);
- COPY_SYSVAR(log_dir_path);
- COPY_SYSVAR(log_file_size);
- COPY_SYSVAR(log_purge_type);
- COPY_SYSVAR(max_sort_file_size);
- COPY_SYSVAR(page_checksum);
- COPY_SYSVAR(pagecache_age_threshold);
- COPY_SYSVAR(pagecache_buffer_size);
- COPY_SYSVAR(pagecache_division_limit);
- COPY_SYSVAR(recover);
- COPY_THDVAR(repair_threads);
- COPY_THDVAR(sort_buffer_size);
- COPY_THDVAR(stats_method);
- COPY_SYSVAR(sync_log_dir);
- COPY_SYSVAR(used_for_temp_tables);
-}
-
-struct st_maria_plugin compat_aliases= {
- MYSQL_DAEMON_PLUGIN,
- &maria_storage_engine,
- "Maria",
- "Monty Program Ab",
- "Compatibility aliases for the Aria engine",
- PLUGIN_LICENSE_GPL,
- NULL,
- NULL,
- 0x0105,
- status_variables_aliases,
- system_variables_aliases,
- "1.5",
- MariaDB_PLUGIN_MATURITY_GAMMA
-};
-
diff --git a/storage/maria/compat_aliases.h b/storage/maria/compat_aliases.h
deleted file mode 100644
index 46a4da74eec..00000000000
--- a/storage/maria/compat_aliases.h
+++ /dev/null
@@ -1,27 +0,0 @@
-/* Copyright (C) 2010 Monty Program Ab
-
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; version 2 of the License.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
-
-extern struct st_maria_plugin compat_aliases;
-extern char mysql_real_data_home[FN_REFLEN];
-extern TYPELIB maria_recover_typelib;
-extern TYPELIB maria_stats_method_typelib;
-extern TYPELIB maria_translog_purge_type_typelib;
-extern TYPELIB maria_sync_log_dir_typelib;
-extern TYPELIB maria_group_commit_typelib;
-extern struct st_mysql_storage_engine maria_storage_engine;
-extern my_bool use_maria_for_temp_tables;
-extern struct st_mysql_sys_var* system_variables[];
-extern st_mysql_show_var status_variables[];
-void copy_variable_aliases();
diff --git a/storage/maria/ha_maria.cc b/storage/maria/ha_maria.cc
index b582ad6577e..50b7bf9f5d1 100644
--- a/storage/maria/ha_maria.cc
+++ b/storage/maria/ha_maria.cc
@@ -28,7 +28,6 @@
#include "ha_maria.h"
#include "trnman_public.h"
#include "trnman.h"
-#include "compat_aliases.h"
C_MODE_START
#include "maria_def.h"
@@ -219,7 +218,8 @@ static MYSQL_SYSVAR_ULONGLONG(max_sort_file_size,
maria_max_temp_length, PLUGIN_VAR_RQCMDARG,
"Don't use the fast sort index method to created index if the "
"temporary file would get bigger than this.",
- 0, 0, MAX_FILE_SIZE & ~(1*MB-1), 0, MAX_FILE_SIZE, 1*MB);
+ 0, 0, MAX_FILE_SIZE & ~((ulonglong) (1*MB-1)),
+ 0, MAX_FILE_SIZE, 1*MB);
static MYSQL_SYSVAR_ULONG(pagecache_age_threshold,
pagecache_age_threshold, PLUGIN_VAR_RQCMDARG,
@@ -234,7 +234,7 @@ static MYSQL_SYSVAR_ULONGLONG(pagecache_buffer_size, pagecache_buffer_size,
"The size of the buffer used for index blocks for Aria tables. "
"Increase this to get better index handling (for all reads and "
"multiple writes) to as much as you can afford.", 0, 0,
- KEY_CACHE_SIZE, 0, ~(ulong) 0, 1);
+ KEY_CACHE_SIZE, 8192*16L, ~(ulong) 0, 1);
static MYSQL_SYSVAR_ULONG(pagecache_division_limit, pagecache_division_limit,
PLUGIN_VAR_RQCMDARG,
@@ -593,6 +593,8 @@ static int table2maria(TABLE *table_arg, data_file_type row_type,
if (found->flags & BLOB_FLAG)
recinfo_pos->type= FIELD_BLOB;
+ else if (found->type() == MYSQL_TYPE_TIMESTAMP)
+ recinfo_pos->type= FIELD_NORMAL;
else if (found->type() == MYSQL_TYPE_VARCHAR)
recinfo_pos->type= FIELD_VARCHAR;
else if (!(options & HA_OPTION_PACK_RECORD) ||
@@ -802,6 +804,34 @@ int _ma_killed_ptr(HA_CHECK *param)
}
+/*
+ Report progress to mysqld
+
+ This is a bit more complex than what a normal progress report
+ function normally is.
+
+ The reason is that this is called by enable_index/repair which
+ is one stage in ALTER TABLE and we can't use the external
+ stage/max_stage for this.
+
+ thd_progress_init/thd_progress_next_stage is to be called by
+ high level commands like CHECK TABLE or REPAIR TABLE, not
+ by sub commands like enable_index().
+
+ In ma_check.c it's easier to work with stages than with a total
+ progress, so we use internal stage/max_stage here to keep the
+ code simple.
+*/
+
+void _ma_report_progress(HA_CHECK *param, ulonglong progress,
+ ulonglong max_progress)
+{
+ thd_progress_report((THD*)param->thd,
+ progress + max_progress * param->stage,
+ max_progress * param->max_stage);
+}
+
+
void _ma_check_print_error(HA_CHECK *param, const char *fmt, ...)
{
va_list args;
@@ -851,7 +881,7 @@ void _ma_check_print_warning(HA_CHECK *param, const char *fmt, ...)
static int maria_create_trn_for_mysql(MARIA_HA *info)
{
- THD *thd= (THD*) info->external_ptr;
+ THD *thd= ((TABLE*) info->external_ref)->in_use;
TRN *trn= THD_TRN;
DBUG_ENTER("maria_create_trn_for_mysql");
@@ -890,6 +920,11 @@ static int maria_create_trn_for_mysql(MARIA_HA *info)
DBUG_RETURN(0);
}
+my_bool ma_killed_in_mariadb(MARIA_HA *info)
+{
+ return (((TABLE*) (info->external_ref))->in_use->killed != 0);
+}
+
} /* extern "C" */
/**
@@ -915,6 +950,7 @@ int_table_flags(HA_NULL_IN_KEY | HA_CAN_FULLTEXT | HA_CAN_SQL_HANDLER |
HA_DUPLICATE_POS | HA_CAN_INDEX_BLOBS | HA_AUTO_PART_KEY |
HA_FILE_BASED | HA_CAN_GEOMETRY | CANNOT_ROLLBACK_FLAG |
HA_CAN_BIT_FIELD | HA_CAN_RTREEKEYS | HA_CAN_REPAIR |
+ HA_CAN_VIRTUAL_COLUMNS |
HA_HAS_RECORDS | HA_STATS_RECORDS_IS_EXACT),
can_enable_indexes(1), bulk_insert_single_undo(BULK_INSERT_NONE)
{}
@@ -967,7 +1003,7 @@ double ha_maria::scan_time()
}
/*
- We need to be able to store at least two keys on an index page as the
+ We need to be able to store at least 2 keys on an index page as the
splitting algorithms depends on this. (With only one key on a page
we also can't use any compression, which may make the index file much
larger)
@@ -1107,6 +1143,8 @@ int ha_maria::open(const char *name, int mode, uint test_if_locked)
return (my_errno ? my_errno : -1);
file->s->chst_invalidator= query_cache_invalidate_by_MyISAM_filename_ref;
+ /* Set external_ref, mainly for temporary tables */
+ file->external_ref= (void*) table; // For ma_killed()
if (test_if_locked & (HA_OPEN_IGNORE_IF_LOCKED | HA_OPEN_TMP_TABLE))
maria_extra(file, HA_EXTRA_NO_WAIT_LOCK, 0);
@@ -1130,6 +1168,16 @@ int ha_maria::open(const char *name, int mode, uint test_if_locked)
if (file->s->options & (HA_OPTION_CHECKSUM | HA_OPTION_COMPRESS_RECORD))
int_table_flags |= HA_HAS_NEW_CHECKSUM;
+ /*
+ For static size rows, tell MariaDB that we will access all bytes
+ in the record when writing it. This signals MariaDB to initalize
+ the full row to ensure we don't get any errors from valgrind and
+ that all bytes in the row is properly reset.
+ */
+ if (file->s->data_file_type == STATIC_RECORD &&
+ (file->s->has_varchar_fields | file->s->has_null_fields))
+ int_table_flags|= HA_RECORD_MUST_BE_CLEAN_ON_WRITE;
+
for (i= 0; i < table->s->keys; i++)
{
plugin_ref parser= table->key_info[i].parser;
@@ -1155,8 +1203,6 @@ int ha_maria::close(void)
int ha_maria::write_row(uchar * buf)
{
- ha_statistic_increment(&SSV::ha_write_count);
-
/* If we have a timestamp column, update it to the current time */
if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_INSERT)
table->timestamp_field->set_time();
@@ -1180,7 +1226,7 @@ int ha_maria::check(THD * thd, HA_CHECK_OPT * check_opt)
int error;
HA_CHECK &param= *(HA_CHECK*) thd->alloc(sizeof(param));
MARIA_SHARE *share= file->s;
- const char *old_proc_info= thd_proc_info(thd, "Checking table");
+ const char *old_proc_info;
TRN *old_trn= file->trn;
if (!file || !&param) return HA_ADMIN_INTERNAL_ERROR;
@@ -1189,7 +1235,7 @@ int ha_maria::check(THD * thd, HA_CHECK_OPT * check_opt)
param.thd= thd;
param.op_name= "check";
param.db_name= table->s->db.str;
- param.table_name= table->alias;
+ param.table_name= table->alias.c_ptr();
param.testflag= check_opt->flags | T_CHECK | T_SILENT;
param.stats_method= (enum_handler_stats_method)THDVAR(thd,stats_method);
@@ -1199,8 +1245,7 @@ int ha_maria::check(THD * thd, HA_CHECK_OPT * check_opt)
if (!maria_is_crashed(file) &&
(((param.testflag & T_CHECK_ONLY_CHANGED) &&
- !(share->state.changed & (STATE_CHANGED | STATE_CRASHED |
- STATE_CRASHED_ON_REPAIR |
+ !(share->state.changed & (STATE_CHANGED | STATE_CRASHED_FLAGS |
STATE_IN_REPAIR)) &&
share->state.open_count == 0) ||
((param.testflag & T_FAST) && (share->state.open_count ==
@@ -1209,12 +1254,18 @@ int ha_maria::check(THD * thd, HA_CHECK_OPT * check_opt)
return HA_ADMIN_ALREADY_DONE;
maria_chk_init_for_check(&param, file);
+ old_proc_info= thd_proc_info(thd, "Checking status");
+ thd_progress_init(thd, 3);
(void) maria_chk_status(&param, file); // Not fatal
error= maria_chk_size(&param, file);
if (!error)
error|= maria_chk_del(&param, file, param.testflag);
+ thd_proc_info(thd, "Checking keys");
+ thd_progress_next_stage(thd);
if (!error)
error= maria_chk_key(&param, file);
+ thd_proc_info(thd, "Checking data");
+ thd_progress_next_stage(thd);
if (!error)
{
if ((!(param.testflag & T_QUICK) &&
@@ -1238,15 +1289,15 @@ int ha_maria::check(THD * thd, HA_CHECK_OPT * check_opt)
if (!error)
{
if ((share->state.changed & (STATE_CHANGED |
- STATE_CRASHED_ON_REPAIR | STATE_IN_REPAIR |
- STATE_CRASHED | STATE_NOT_ANALYZED)) ||
+ STATE_CRASHED_FLAGS |
+ STATE_IN_REPAIR | STATE_NOT_ANALYZED)) ||
(param.testflag & T_STATISTICS) || maria_is_crashed(file))
{
file->update |= HA_STATE_CHANGED | HA_STATE_ROW_CHANGED;
mysql_mutex_lock(&share->intern_lock);
DBUG_PRINT("info", ("Reseting crashed state"));
- share->state.changed&= ~(STATE_CHANGED | STATE_CRASHED |
- STATE_CRASHED_ON_REPAIR | STATE_IN_REPAIR);
+ share->state.changed&= ~(STATE_CHANGED | STATE_CRASHED_FLAGS |
+ STATE_IN_REPAIR);
if (!(table->db_stat & HA_READ_ONLY))
error= maria_update_state_info(&param, file,
UPDATE_TIME | UPDATE_OPEN_COUNT |
@@ -1265,6 +1316,7 @@ int ha_maria::check(THD * thd, HA_CHECK_OPT * check_opt)
/* Reset trn, that may have been set by repair */
_ma_set_trn_for_table(file, old_trn);
thd_proc_info(thd, old_proc_info);
+ thd_progress_end(thd);
return error ? HA_ADMIN_CORRUPT : HA_ADMIN_OK;
}
@@ -1280,6 +1332,7 @@ int ha_maria::analyze(THD *thd, HA_CHECK_OPT * check_opt)
int error= 0;
HA_CHECK &param= *(HA_CHECK*) thd->alloc(sizeof(param));
MARIA_SHARE *share= file->s;
+ const char *old_proc_info;
if (!&param)
return HA_ADMIN_INTERNAL_ERROR;
@@ -1288,7 +1341,7 @@ int ha_maria::analyze(THD *thd, HA_CHECK_OPT * check_opt)
param.thd= thd;
param.op_name= "analyze";
param.db_name= table->s->db.str;
- param.table_name= table->alias;
+ param.table_name= table->alias.c_ptr();
param.testflag= (T_FAST | T_CHECK | T_SILENT | T_STATISTICS |
T_DONT_CHECK_CHECKSUM);
param.using_global_keycache= 1;
@@ -1297,6 +1350,8 @@ int ha_maria::analyze(THD *thd, HA_CHECK_OPT * check_opt)
if (!(share->state.changed & STATE_NOT_ANALYZED))
return HA_ADMIN_ALREADY_DONE;
+ old_proc_info= thd_proc_info(thd, "Scanning");
+ thd_progress_init(thd, 1);
error= maria_chk_key(&param, file);
if (!error)
{
@@ -1306,6 +1361,8 @@ int ha_maria::analyze(THD *thd, HA_CHECK_OPT * check_opt)
}
else if (!maria_is_crashed(file) && !thd->killed)
maria_mark_crashed(file);
+ thd_proc_info(thd, old_proc_info);
+ thd_progress_end(thd);
return error ? HA_ADMIN_CORRUPT : HA_ADMIN_OK;
}
@@ -1314,6 +1371,7 @@ int ha_maria::repair(THD * thd, HA_CHECK_OPT *check_opt)
int error;
HA_CHECK &param= *(HA_CHECK*) thd->alloc(sizeof(param));
ha_rows start_records;
+ const char *old_proc_info;
if (!file || !&param)
return HA_ADMIN_INTERNAL_ERROR;
@@ -1325,7 +1383,10 @@ int ha_maria::repair(THD * thd, HA_CHECK_OPT *check_opt)
T_SILENT | T_FORCE_CREATE | T_CALC_CHECKSUM |
(check_opt->flags & T_EXTEND ? T_REP : T_REP_BY_SORT));
param.sort_buffer_length= THDVAR(thd, sort_buffer_size);
+ param.backup_time= check_opt->start_time;
start_records= file->state->records;
+ old_proc_info= thd_proc_info(thd, "Checking table");
+ thd_progress_init(thd, 1);
while ((error= repair(thd, &param, 0)) && param.retry_repair)
{
param.retry_repair= 0;
@@ -1361,6 +1422,8 @@ int ha_maria::repair(THD * thd, HA_CHECK_OPT *check_opt)
llstr(start_records, llbuff2),
table->s->path.str);
}
+ thd_proc_info(thd, old_proc_info);
+ thd_progress_end(thd);
return error;
}
@@ -1408,14 +1471,15 @@ int ha_maria::optimize(THD * thd, HA_CHECK_OPT *check_opt)
param.testflag= (check_opt->flags | T_SILENT | T_FORCE_CREATE |
T_REP_BY_SORT | T_STATISTICS | T_SORT_INDEX);
param.sort_buffer_length= THDVAR(thd, sort_buffer_size);
+ thd_progress_init(thd, 1);
if ((error= repair(thd, &param, 1)) && param.retry_repair)
{
sql_print_warning("Warning: Optimize table got errno %d on %s.%s, retrying",
my_errno, param.db_name, param.table_name);
param.testflag &= ~T_REP_BY_SORT;
- error= repair(thd, &param, 1);
+ error= repair(thd, &param, 0);
}
-
+ thd_progress_end(thd);
return error;
}
@@ -1457,7 +1521,7 @@ int ha_maria::repair(THD *thd, HA_CHECK *param, bool do_optimize)
_ma_copy_nontrans_state_information(file);
param->db_name= table->s->db.str;
- param->table_name= table->alias;
+ param->table_name= table->alias.c_ptr();
param->tmpfile_createflag= O_RDWR | O_TRUNC;
param->using_global_keycache= 1;
param->thd= thd;
@@ -1552,8 +1616,8 @@ int ha_maria::repair(THD *thd, HA_CHECK *param, bool do_optimize)
if ((share->state.changed & STATE_CHANGED) || maria_is_crashed(file))
{
DBUG_PRINT("info", ("Reseting crashed state"));
- share->state.changed&= ~(STATE_CHANGED | STATE_CRASHED |
- STATE_CRASHED_ON_REPAIR | STATE_IN_REPAIR);
+ share->state.changed&= ~(STATE_CHANGED | STATE_CRASHED_FLAGS |
+ STATE_IN_REPAIR);
file->update |= HA_STATE_CHANGED | HA_STATE_ROW_CHANGED;
}
/*
@@ -1577,7 +1641,7 @@ int ha_maria::repair(THD *thd, HA_CHECK *param, bool do_optimize)
llstr(rows, llbuff),
llstr(file->state->records, llbuff2));
/* Abort if warning was converted to error */
- if (current_thd->is_error())
+ if (table->in_use->is_error())
error= 1;
}
}
@@ -1589,6 +1653,7 @@ int ha_maria::repair(THD *thd, HA_CHECK *param, bool do_optimize)
}
mysql_mutex_unlock(&share->intern_lock);
thd_proc_info(thd, old_proc_info);
+ thd_progress_end(thd); // Mark done
if (!thd->locked_tables_mode)
maria_lock_database(file, F_UNLCK);
@@ -1812,7 +1877,7 @@ int ha_maria::enable_indexes(uint mode)
}
else if (mode == HA_KEY_SWITCH_NONUNIQ_SAVE)
{
- THD *thd= current_thd;
+ THD *thd= table->in_use;
HA_CHECK &param= *(HA_CHECK*) thd->alloc(sizeof(param));
if (!&param)
return HA_ADMIN_INTERNAL_ERROR;
@@ -1914,16 +1979,28 @@ int ha_maria::indexes_are_disabled(void)
void ha_maria::start_bulk_insert(ha_rows rows)
{
DBUG_ENTER("ha_maria::start_bulk_insert");
- THD *thd= current_thd;
- ulong size= min(thd->variables.read_buff_size,
- (ulong) (table->s->avg_row_length * rows));
+ THD *thd= table->in_use;
MARIA_SHARE *share= file->s;
- DBUG_PRINT("info", ("start_bulk_insert: rows %lu size %lu",
- (ulong) rows, size));
+ DBUG_PRINT("info", ("start_bulk_insert: rows %lu", (ulong) rows));
/* don't enable row cache if too few rows */
if (!rows || (rows > MARIA_MIN_ROWS_TO_USE_WRITE_CACHE))
- maria_extra(file, HA_EXTRA_WRITE_CACHE, (void*) &size);
+ {
+ ulonglong size= thd->variables.read_buff_size, tmp;
+ if (rows)
+ {
+ if (file->state->records)
+ {
+ MARIA_INFO maria_info;
+ maria_status(file, &maria_info, HA_STATUS_NO_LOCK |HA_STATUS_VARIABLE);
+ set_if_smaller(size, maria_info.mean_reclength * rows);
+ }
+ else if (table->s->avg_row_length)
+ set_if_smaller(size, (size_t) (table->s->avg_row_length * rows));
+ }
+ tmp= (ulong) size; // Safe becasue of limits
+ maria_extra(file, HA_EXTRA_WRITE_CACHE, (void*) &tmp);
+ }
can_enable_indexes= (maria_is_all_keys_active(share->state.key_map,
share->base.keys));
@@ -1938,25 +2015,34 @@ void ha_maria::start_bulk_insert(ha_rows rows)
we don't want to update the key statistics based of only a few rows.
Index file rebuild requires an exclusive lock, so if versioning is on
don't do it (see how ha_maria::store_lock() tries to predict repair).
- We can repair index only if we have an exclusive (TL_WRITE) lock. To
- see if table is empty, we shouldn't rely on the old records' count from
- our transaction's start (if that old count is 0 but now there are
- records in the table, we would wrongly destroy them).
- So we need to look at share->state.state.records.
- As a safety net for now, we don't remove the test of
- file->state->records, because there is uncertainty on what will happen
- during repair if the two states disagree.
+ We can repair index only if we have an exclusive (TL_WRITE) lock or
+ if this is inside an ALTER TABLE, in which case lock_type == TL_UNLOCK.
+
+ To see if table is empty, we shouldn't rely on the old record
+ count from our transaction's start (if that old count is 0 but
+ now there are records in the table, we would wrongly destroy
+ them). So we need to look at share->state.state.records. As a
+ safety net for now, we don't remove the test of
+ file->state->records, because there is uncertainty on what will
+ happen during repair if the two states disagree.
*/
if ((file->state->records == 0) &&
(share->state.state.records == 0) && can_enable_indexes &&
(!rows || rows >= MARIA_MIN_ROWS_TO_DISABLE_INDEXES) &&
- (file->lock.type == TL_WRITE))
+ (file->lock.type == TL_WRITE || file->lock.type == TL_UNLOCK))
{
/**
@todo for a single-row INSERT SELECT, we will go into repair, which
is more costly (flushes, syncs) than a row write.
*/
- maria_disable_non_unique_index(file, rows);
+ if (file->open_flags & HA_OPEN_INTERNAL_TABLE)
+ {
+ /* Internal table; If we get a duplicate something is very wrong */
+ file->update|= HA_STATE_CHANGED;
+ maria_clear_all_keys_active(file->s->state.key_map);
+ }
+ else
+ maria_disable_non_unique_index(file, rows);
if (share->now_transactional)
{
bulk_insert_single_undo= BULK_INSERT_SINGLE_UNDO_AND_NO_REPAIR;
@@ -2030,10 +2116,10 @@ bool ha_maria::check_and_repair(THD *thd)
DBUG_ENTER("ha_maria::check_and_repair");
check_opt.init();
+ check_opt.flags= T_MEDIUM | T_AUTO_REPAIR;
error= 1;
- if ((file->s->state.changed &
- (STATE_CRASHED | STATE_CRASHED_ON_REPAIR | STATE_MOVED)) ==
+ if ((file->s->state.changed & (STATE_CRASHED_FLAGS | STATE_MOVED)) ==
STATE_MOVED)
{
sql_print_information("Zerofilling moved table: '%s'",
@@ -2050,7 +2136,6 @@ bool ha_maria::check_and_repair(THD *thd)
DBUG_RETURN(error);
error= 0;
- check_opt.flags= T_MEDIUM | T_AUTO_REPAIR;
// Don't use quick if deleted rows
if (!file->state->del && (maria_recover_options & HA_RECOVER_QUICK))
check_opt.flags |= T_QUICK;
@@ -2081,7 +2166,7 @@ bool ha_maria::check_and_repair(THD *thd)
bool ha_maria::is_crashed() const
{
- return (file->s->state.changed & (STATE_CRASHED | STATE_MOVED) ||
+ return (file->s->state.changed & (STATE_CRASHED_FLAGS | STATE_MOVED) ||
(my_disable_locking && file->s->state.open_count));
}
@@ -2097,7 +2182,6 @@ bool ha_maria::is_crashed() const
int ha_maria::update_row(const uchar * old_data, uchar * new_data)
{
CHECK_UNTIL_WE_FULLY_IMPLEMENTED_VERSIONING("UPDATE in WRITE CONCURRENT");
- ha_statistic_increment(&SSV::ha_update_count);
if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_UPDATE)
table->timestamp_field->set_time();
return maria_update(file, old_data, new_data);
@@ -2107,7 +2191,6 @@ int ha_maria::update_row(const uchar * old_data, uchar * new_data)
int ha_maria::delete_row(const uchar * buf)
{
CHECK_UNTIL_WE_FULLY_IMPLEMENTED_VERSIONING("DELETE in WRITE CONCURRENT");
- ha_statistic_increment(&SSV::ha_delete_count);
return maria_delete(file, buf);
}
@@ -2131,7 +2214,6 @@ int ha_maria::index_read_map(uchar * buf, const uchar * key,
enum ha_rkey_function find_flag)
{
DBUG_ASSERT(inited == INDEX);
- ha_statistic_increment(&SSV::ha_read_key_count);
int error= maria_rkey(file, buf, active_index, key, keypart_map, find_flag);
table->status= error ? STATUS_NOT_FOUND : 0;
return error;
@@ -2142,8 +2224,15 @@ int ha_maria::index_read_idx_map(uchar * buf, uint index, const uchar * key,
key_part_map keypart_map,
enum ha_rkey_function find_flag)
{
- ha_statistic_increment(&SSV::ha_read_key_count);
- int error= maria_rkey(file, buf, index, key, keypart_map, find_flag);
+ int error;
+ /* Use the pushed index condition if it matches the index we're scanning */
+ end_range= NULL;
+ if (index == pushed_idx_cond_keyno)
+ ma_set_index_cond_func(file, index_cond_func_maria, this);
+
+ error= maria_rkey(file, buf, index, key, keypart_map, find_flag);
+
+ ma_set_index_cond_func(file, NULL, 0);
table->status= error ? STATUS_NOT_FOUND : 0;
return error;
}
@@ -2154,7 +2243,6 @@ int ha_maria::index_read_last_map(uchar * buf, const uchar * key,
{
DBUG_ENTER("ha_maria::index_read_last_map");
DBUG_ASSERT(inited == INDEX);
- ha_statistic_increment(&SSV::ha_read_key_count);
int error= maria_rkey(file, buf, active_index, key, keypart_map,
HA_READ_PREFIX_LAST);
table->status= error ? STATUS_NOT_FOUND : 0;
@@ -2165,7 +2253,6 @@ int ha_maria::index_read_last_map(uchar * buf, const uchar * key,
int ha_maria::index_next(uchar * buf)
{
DBUG_ASSERT(inited == INDEX);
- ha_statistic_increment(&SSV::ha_read_next_count);
int error= maria_rnext(file, buf, active_index);
table->status= error ? STATUS_NOT_FOUND : 0;
return error;
@@ -2175,7 +2262,6 @@ int ha_maria::index_next(uchar * buf)
int ha_maria::index_prev(uchar * buf)
{
DBUG_ASSERT(inited == INDEX);
- ha_statistic_increment(&SSV::ha_read_prev_count);
int error= maria_rprev(file, buf, active_index);
table->status= error ? STATUS_NOT_FOUND : 0;
return error;
@@ -2185,7 +2271,6 @@ int ha_maria::index_prev(uchar * buf)
int ha_maria::index_first(uchar * buf)
{
DBUG_ASSERT(inited == INDEX);
- ha_statistic_increment(&SSV::ha_read_first_count);
int error= maria_rfirst(file, buf, active_index);
table->status= error ? STATUS_NOT_FOUND : 0;
return error;
@@ -2195,7 +2280,6 @@ int ha_maria::index_first(uchar * buf)
int ha_maria::index_last(uchar * buf)
{
DBUG_ASSERT(inited == INDEX);
- ha_statistic_increment(&SSV::ha_read_last_count);
int error= maria_rlast(file, buf, active_index);
table->status= error ? STATUS_NOT_FOUND : 0;
return error;
@@ -2208,7 +2292,6 @@ int ha_maria::index_next_same(uchar * buf,
{
int error;
DBUG_ASSERT(inited == INDEX);
- ha_statistic_increment(&SSV::ha_read_next_count);
/*
TODO: Delete this loop in Maria 1.5 as versioning will ensure this never
happens
@@ -2260,7 +2343,6 @@ int ha_maria::rnd_end()
int ha_maria::rnd_next(uchar *buf)
{
- ha_statistic_increment(&SSV::ha_read_rnd_next_count);
int error= maria_scan(file, buf);
table->status= error ? STATUS_NOT_FOUND : 0;
return error;
@@ -2282,7 +2364,6 @@ int ha_maria::restart_rnd_next(uchar *buf)
int ha_maria::rnd_pos(uchar *buf, uchar *pos)
{
- ha_statistic_increment(&SSV::ha_read_rnd_count);
int error= maria_rrnd(file, buf, my_get_ptr(pos, ref_length));
table->status= error ? STATUS_NOT_FOUND : 0;
return error;
@@ -2401,6 +2482,7 @@ int ha_maria::reset(void)
{
pushed_idx_cond= NULL;
pushed_idx_cond_keyno= MAX_KEY;
+ in_range_check_pushed_down= FALSE;
ma_set_index_cond_func(file, NULL, 0);
ds_mrr.dsmrr_close();
if (file->trn)
@@ -2424,7 +2506,7 @@ int ha_maria::extra_opt(enum ha_extra_function operation, ulong cache_size)
int ha_maria::delete_all_rows()
{
- THD *thd= current_thd;
+ THD *thd= table->in_use;
(void) translog_log_debug_info(file->trn, LOGREC_DEBUG_INFO_QUERY,
(uchar*) thd->query(), thd->query_length());
if (file->s->now_transactional &&
@@ -2454,14 +2536,16 @@ int ha_maria::delete_table(const char *name)
void ha_maria::drop_table(const char *name)
{
- (void) close();
- (void) maria_delete_table(name);
+ DBUG_ASSERT(file->s->temporary);
+ (void) ha_close();
+ (void) maria_delete_table_files(name, 0);
}
int ha_maria::external_lock(THD *thd, int lock_type)
{
DBUG_ENTER("ha_maria::external_lock");
+ file->external_ref= (void*) table; // For ma_killed()
/*
We don't test now_transactional because it may vary between lock/unlock
and thus confuse our reference counting.
@@ -2480,8 +2564,6 @@ int ha_maria::external_lock(THD *thd, int lock_type)
/* Transactional table */
if (lock_type != F_UNLCK)
{
- file->external_ptr= thd; // For maria_register_trn()
-
if (!file->s->lock_key_trees) // If we don't use versioning
{
/*
@@ -2549,6 +2631,7 @@ int ha_maria::external_lock(THD *thd, int lock_type)
{
DBUG_PRINT("info",
("locked_tables: %u", trnman_has_locked_tables(trn)));
+ DBUG_ASSERT(trnman_has_locked_tables(trn) > 0);
if (trnman_has_locked_tables(trn) &&
!trnman_decrement_locked_tables(trn))
{
@@ -2678,12 +2761,12 @@ int ha_maria::implicit_commit(THD *thd, bool new_trn)
statement assuming they have a trn (see ha_maria::start_stmt()).
*/
trn= trnman_new_trn(& thd->transaction.wt);
- /* This is just a commit, tables stay locked if they were: */
- trnman_reset_locked_tables(trn, locked_tables);
THD_TRN= trn;
if (unlikely(trn == NULL))
+ {
error= HA_ERR_OUT_OF_MEM;
-
+ goto end;
+ }
/*
Move all locked tables to the new transaction
We must do it here as otherwise file->thd and file->state may be
@@ -2708,6 +2791,8 @@ int ha_maria::implicit_commit(THD *thd, bool new_trn)
}
}
}
+ /* This is just a commit, tables stay locked if they were: */
+ trnman_reset_locked_tables(trn, locked_tables);
}
end:
DBUG_RETURN(error);
@@ -2844,7 +2929,7 @@ int ha_maria::create(const char *name, register TABLE *table_arg,
ha_create_info->row_type != ROW_TYPE_PAGE &&
ha_create_info->row_type != ROW_TYPE_NOT_USED &&
ha_create_info->row_type != ROW_TYPE_DEFAULT)
- push_warning(current_thd, MYSQL_ERROR::WARN_LEVEL_NOTE,
+ push_warning(thd, MYSQL_ERROR::WARN_LEVEL_NOTE,
ER_ILLEGAL_HA_CREATE_OPTION,
"Row format set to PAGE because of TRANSACTIONAL=1 option");
@@ -3112,6 +3197,14 @@ bool maria_flush_logs(handlerton *hton)
}
+int maria_checkpoint_state(handlerton *hton, bool disabled)
+{
+ maria_checkpoint_disabled= (my_bool) disabled;
+ return 0;
+}
+
+
+
#define SHOW_MSG_LEN (FN_REFLEN + 20)
/**
@brief show status handler
@@ -3291,7 +3384,6 @@ bool ha_maria::is_changed() const
static int ha_maria_init(void *p)
{
int res;
- copy_variable_aliases();
const char *log_dir= maria_data_root;
#ifdef HAVE_PSI_INTERFACE
@@ -3305,6 +3397,10 @@ static int ha_maria_init(void *p)
maria_hton->panic= maria_hton_panic;
maria_hton->commit= maria_commit;
maria_hton->rollback= maria_rollback;
+ maria_hton->checkpoint_state= maria_checkpoint_state;
+#ifdef MARIA_CANNOT_ROLLBACK
+ maria_hton->commit= 0;
+#endif
maria_hton->flush_logs= maria_flush_logs;
maria_hton->show_status= maria_show_status;
/* TODO: decide if we support Maria being used for log tables */
@@ -3329,6 +3425,8 @@ static int ha_maria_init(void *p)
ma_checkpoint_init(checkpoint_interval);
maria_multi_threaded= maria_in_ha_maria= TRUE;
maria_create_trn_hook= maria_create_trn_for_mysql;
+ maria_pagecache->extra_debug= 1;
+ maria_assert_if_crashed_table= debug_assert_if_crashed_table;
#if defined(HAVE_REALPATH) && !defined(HAVE_valgrind) && !defined(HAVE_BROKEN_REALPATH)
/* We can only test for sub paths if my_symlink.c is using realpath */
@@ -3336,6 +3434,9 @@ static int ha_maria_init(void *p)
#endif
if (res)
maria_hton= 0;
+
+ ma_killed= ma_killed_in_mariadb;
+
return res ? HA_ERR_INITIALIZATION : 0;
}
@@ -3573,13 +3674,13 @@ static struct st_mysql_show_var aria_status_variables[]= {
***************************************************************************/
int ha_maria::multi_range_read_init(RANGE_SEQ_IF *seq, void *seq_init_param,
- uint n_ranges, uint mode,
- HANDLER_BUFFER *buf)
+ uint n_ranges, uint mode,
+ HANDLER_BUFFER *buf)
{
return ds_mrr.dsmrr_init(this, seq, seq_init_param, n_ranges, mode, buf);
}
-int ha_maria::multi_range_read_next(char **range_info)
+int ha_maria::multi_range_read_next(range_id_t *range_info)
{
return ds_mrr.dsmrr_next(range_info);
}
@@ -3600,13 +3701,18 @@ ha_rows ha_maria::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
}
ha_rows ha_maria::multi_range_read_info(uint keyno, uint n_ranges, uint keys,
- uint *bufsz, uint *flags,
- COST_VECT *cost)
+ uint key_parts, uint *bufsz,
+ uint *flags, COST_VECT *cost)
{
ds_mrr.init(this, table);
- return ds_mrr.dsmrr_info(keyno, n_ranges, keys, bufsz, flags, cost);
+ return ds_mrr.dsmrr_info(keyno, n_ranges, keys, key_parts, bufsz, flags, cost);
}
+int ha_maria::multi_range_read_explain_info(uint mrr_mode, char *str,
+ size_t size)
+{
+ return ds_mrr.dsmrr_explain_info(mrr_mode, str, size);
+}
/* MyISAM MRR implementation ends */
@@ -3630,7 +3736,6 @@ struct st_mysql_storage_engine maria_storage_engine=
{ MYSQL_HANDLERTON_INTERFACE_VERSION };
maria_declare_plugin(aria)
-compat_aliases,
{
MYSQL_STORAGE_ENGINE_PLUGIN,
&maria_storage_engine,
diff --git a/storage/maria/ha_maria.h b/storage/maria/ha_maria.h
index 53df1d2cfa6..39c23c8d1b6 100644
--- a/storage/maria/ha_maria.h
+++ b/storage/maria/ha_maria.h
@@ -158,7 +158,6 @@ public:
int assign_to_keycache(THD * thd, HA_CHECK_OPT * check_opt);
int preload_keys(THD * thd, HA_CHECK_OPT * check_opt);
bool check_if_incompatible_data(HA_CREATE_INFO * info, uint table_changes);
- bool check_if_supported_virtual_columns(void) { return TRUE;}
#ifdef HAVE_REPLICATION
int dump(THD * thd, int fd);
int net_read_dump(NET * net);
@@ -180,13 +179,15 @@ public:
*/
int multi_range_read_init(RANGE_SEQ_IF *seq, void *seq_init_param,
uint n_ranges, uint mode, HANDLER_BUFFER *buf);
- int multi_range_read_next(char **range_info);
+ int multi_range_read_next(range_id_t *range_info);
ha_rows multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
void *seq_init_param,
uint n_ranges, uint *bufsz,
uint *flags, COST_VECT *cost);
ha_rows multi_range_read_info(uint keyno, uint n_ranges, uint keys,
- uint *bufsz, uint *flags, COST_VECT *cost);
+ uint key_parts, uint *bufsz,
+ uint *flags, COST_VECT *cost);
+ int multi_range_read_explain_info(uint mrr_mode, char *str, size_t size);
/* Index condition pushdown implementation */
Item *idx_cond_push(uint keyno, Item* idx_cond);
diff --git a/storage/maria/lockman.c b/storage/maria/lockman.c
index 56d2e261da4..ae9e83e982a 100644
--- a/storage/maria/lockman.c
+++ b/storage/maria/lockman.c
@@ -690,12 +690,12 @@ enum lockman_getlock_result lockman_getlock(LOCKMAN *lm, LOCK_OWNER *lo,
}
/* yuck. waiting */
- deadline= my_getsystime() + lm->lock_timeout * 10000;
- set_timespec_nsec(timeout,lm->lock_timeout * 1000000);
+ deadline= my_hrtime().val*1000 + lm->lock_timeout * 1000000;
+ set_timespec_time_nsec(timeout, deadline);
do
{
pthread_cond_timedwait(wait_for_lo->cond, wait_for_lo->mutex, &timeout);
- } while (!DELETED(blocker->link) && my_getsystime() < deadline);
+ } while (!DELETED(blocker->link) && my_hrtime().val < deadline/1000);
pthread_mutex_unlock(wait_for_lo->mutex);
lf_rwlock_by_pins(pins);
if (!DELETED(blocker->link))
diff --git a/storage/maria/ma_bitmap.c b/storage/maria/ma_bitmap.c
index fb12ecdbe3b..c2cf7d32d48 100644
--- a/storage/maria/ma_bitmap.c
+++ b/storage/maria/ma_bitmap.c
@@ -104,10 +104,11 @@
- On checkpoint
(Ie: When we do a checkpoint, we have to ensure that all bitmaps are
put on disk even if they are not in the page cache).
- - When explicitely requested (for example on backup or after recvoery,
+ - When explicitely requested (for example on backup or after recovery,
to simplify things)
The flow of writing a row is that:
+ - Mark the bitmap not flushable (_ma_bitmap_flushable(X, 1))
- Lock the bitmap
- Decide which data pages we will write to
- Mark them full in the bitmap page so that other threads do not try to
@@ -119,6 +120,7 @@
pages (that is, we marked pages full but when we are done we realize
we didn't fill them)
- Unlock the bitmap.
+ - Mark the bitmap flushable (_ma_bitmap_flushable(X, -1))
*/
#include "maria_def.h"
@@ -127,6 +129,12 @@
#define FULL_HEAD_PAGE 4
#define FULL_TAIL_PAGE 7
+const char *bits_to_txt[]=
+{
+ "empty", "00-30% full", "30-60% full", "60-90% full", "full",
+ "tail 00-40 % full", "tail 40-80 % full", "tail/blob full"
+};
+
/*#define WRONG_BITMAP_FLUSH 1*/ /*define only for provoking bugs*/
#undef WRONG_BITMAP_FLUSH
@@ -136,12 +144,15 @@ static my_bool _ma_read_bitmap_page(MARIA_HA *info,
static my_bool _ma_bitmap_create_missing(MARIA_HA *info,
MARIA_FILE_BITMAP *bitmap,
pgcache_page_no_t page);
+static void _ma_bitmap_unpin_all(MARIA_SHARE *share);
+
/* Write bitmap page to key cache */
static inline my_bool write_changed_bitmap(MARIA_SHARE *share,
MARIA_FILE_BITMAP *bitmap)
{
+ my_bool res;
DBUG_ENTER("write_changed_bitmap");
DBUG_ASSERT(share->pagecache->block_size == bitmap->block_size);
DBUG_ASSERT(bitmap->file.write_callback != 0);
@@ -159,18 +170,28 @@ static inline my_bool write_changed_bitmap(MARIA_SHARE *share,
#endif
)
{
- my_bool res= pagecache_write(share->pagecache,
+ res= pagecache_write(share->pagecache,
&bitmap->file, bitmap->page, 0,
bitmap->map, PAGECACHE_PLAIN_PAGE,
PAGECACHE_LOCK_LEFT_UNLOCKED,
PAGECACHE_PIN_LEFT_UNPINNED,
PAGECACHE_WRITE_DELAY, 0, LSN_IMPOSSIBLE);
+ DBUG_ASSERT(!res);
DBUG_RETURN(res);
}
else
{
+ /*
+ bitmap->non_flushable means that someone has changed the bitmap,
+ but it's not yet complete so it can't yet be written to disk.
+ In this case we write the changed bitmap to the disk cache,
+ but keep it pinned until the change is completed. The page will
+ be unpinned later by _ma_bitmap_unpin_all() as soon as non_flushable
+ is set back to 0.
+ */
MARIA_PINNED_PAGE page_link;
- int res= pagecache_write(share->pagecache,
+ DBUG_PRINT("info", ("Writing pinned bitmap page"));
+ res= pagecache_write(share->pagecache,
&bitmap->file, bitmap->page, 0,
bitmap->map, PAGECACHE_PLAIN_PAGE,
PAGECACHE_LOCK_LEFT_UNLOCKED, PAGECACHE_PIN,
@@ -178,7 +199,8 @@ static inline my_bool write_changed_bitmap(MARIA_SHARE *share,
LSN_IMPOSSIBLE);
page_link.unlock= PAGECACHE_LOCK_LEFT_UNLOCKED;
page_link.changed= 1;
- push_dynamic(&bitmap->pinned_pages, (void*) &page_link);
+ push_dynamic(&bitmap->pinned_pages, (const uchar*) (void*) &page_link);
+ DBUG_ASSERT(!res);
DBUG_RETURN(res);
}
}
@@ -189,7 +211,10 @@ static inline my_bool write_changed_bitmap(MARIA_SHARE *share,
SYNOPSIS
_ma_bitmap_init()
share Share handler
- file data file handler
+ file Data file handler
+ last_page Pointer to last page (max_file_size) that needs to be
+ mapped by the bitmap. This is adjusted to bitmap
+ alignment.
NOTES
This is called the first time a file is opened.
@@ -199,12 +224,14 @@ static inline my_bool write_changed_bitmap(MARIA_SHARE *share,
1 error
*/
-my_bool _ma_bitmap_init(MARIA_SHARE *share, File file)
+my_bool _ma_bitmap_init(MARIA_SHARE *share, File file,
+ pgcache_page_no_t *last_page)
{
uint aligned_bit_blocks;
uint max_page_size;
MARIA_FILE_BITMAP *bitmap= &share->bitmap;
uint size= share->block_size;
+ pgcache_page_no_t first_bitmap_with_space;
#ifndef DBUG_OFF
/* We want to have a copy of the bitmap to be able to print differences */
size*= 2;
@@ -221,13 +248,14 @@ my_bool _ma_bitmap_init(MARIA_SHARE *share, File file)
/* Size needs to be aligned on 6 */
aligned_bit_blocks= (share->block_size - PAGE_SUFFIX_SIZE) / 6;
- bitmap->total_size= aligned_bit_blocks * 6;
+ bitmap->max_total_size= bitmap->total_size= aligned_bit_blocks * 6;
/*
In each 6 bytes, we have 6*8/3 = 16 pages covered
The +1 is to add the bitmap page, as this doesn't have to be covered
*/
bitmap->pages_covered= aligned_bit_blocks * 16 + 1;
- bitmap->flush_all_requested= 0;
+ bitmap->flush_all_requested= bitmap->waiting_for_flush_all_requested=
+ bitmap->waiting_for_non_flushable= 0;
bitmap->non_flushable= 0;
/* Update size for bits */
@@ -247,13 +275,35 @@ my_bool _ma_bitmap_init(MARIA_SHARE *share, File file)
mysql_cond_init(key_SHARE_BITMAP_cond,
&share->bitmap.bitmap_cond, 0);
+ first_bitmap_with_space= share->state.first_bitmap_with_space;
_ma_bitmap_reset_cache(share);
- if (share->state.first_bitmap_with_space == ~(pgcache_page_no_t) 0)
+ /*
+ The bitmap used to map the file are aligned on 6 bytes. We now
+ calculate the max file size that can be used by the bitmap. This
+ is needed to get ma_info() give a true file size so that the user can
+ estimate if there is still space free for records in the file.
+ */
{
- /* Start scanning for free space from start of file */
- share->state.first_bitmap_with_space = 0;
+ pgcache_page_no_t last_bitmap_page;
+ ulong blocks, bytes;
+
+ last_bitmap_page= *last_page - *last_page % bitmap->pages_covered;
+ blocks= *last_page - last_bitmap_page;
+ bytes= (blocks * 3) / 8; /* 3 bit per page / 8 bits per byte */
+ /* Size needs to be aligned on 6 */
+ bytes/= 6;
+ bytes*= 6;
+ bitmap->last_bitmap_page= last_bitmap_page;
+ bitmap->last_total_size= bytes;
+ *last_page= ((last_bitmap_page + bytes*8/3));
}
+
+ /* Restore first_bitmap_with_space if it's resonable */
+ if (first_bitmap_with_space <= (share->state.state.data_file_length /
+ share->block_size))
+ share->state.first_bitmap_with_space= first_bitmap_with_space;
+
return 0;
}
@@ -268,16 +318,63 @@ my_bool _ma_bitmap_init(MARIA_SHARE *share, File file)
my_bool _ma_bitmap_end(MARIA_SHARE *share)
{
- my_bool res= _ma_bitmap_flush(share);
+ my_bool res;
mysql_mutex_assert_owner(&share->close_lock);
+ DBUG_ASSERT(share->bitmap.non_flushable == 0);
+ DBUG_ASSERT(share->bitmap.flush_all_requested == 0);
+ DBUG_ASSERT(share->bitmap.waiting_for_non_flushable == 0 &&
+ share->bitmap.waiting_for_flush_all_requested == 0);
+ DBUG_ASSERT(share->bitmap.pinned_pages.elements == 0);
+
+ res= _ma_bitmap_flush(share);
mysql_mutex_destroy(&share->bitmap.bitmap_lock);
mysql_cond_destroy(&share->bitmap.bitmap_cond);
delete_dynamic(&share->bitmap.pinned_pages);
my_free(share->bitmap.map);
share->bitmap.map= 0;
+ /*
+ This is to not get an assert in checkpoint. The bitmap will be flushed
+ at once by _ma_once_end_block_record() as part of the normal flush
+ of the kfile.
+ */
+ share->bitmap.changed_not_flushed= 0;
return res;
}
+/*
+ Ensure that we have incremented open count before we try to read/write
+ a page while we have the bitmap lock.
+ This is needed to ensure that we don't call _ma_mark_file_changed() as
+ part of flushing a page to disk, as this locks share->internal_lock
+ and then mutex lock would happen in the wrong order.
+*/
+
+static inline void _ma_bitmap_mark_file_changed(MARIA_SHARE *share,
+ my_bool flush_translog)
+{
+ /*
+ It's extremely unlikely that the following test is true as it
+ only happens once if the table has changed.
+ */
+ if (unlikely(!share->global_changed &&
+ (share->state.changed & STATE_CHANGED)))
+ {
+ /* purecov: begin inspected */
+ /* unlock mutex as it can't be hold during _ma_mark_file_changed() */
+ mysql_mutex_unlock(&share->bitmap.bitmap_lock);
+
+ /*
+ We have to flush the translog to ensure we have registered that the
+ table is open.
+ */
+ if (flush_translog && share->now_transactional)
+ (void) translog_flush(share->state.logrec_file_id);
+
+ _ma_mark_file_changed(share);
+ mysql_mutex_lock(&share->bitmap.bitmap_lock);
+ /* purecov: end */
+ }
+}
/*
Send updated bitmap to the page cache
@@ -314,6 +411,12 @@ my_bool _ma_bitmap_flush(MARIA_SHARE *share)
mysql_mutex_lock(&share->bitmap.bitmap_lock);
if (share->bitmap.changed)
{
+ /*
+ We have to mark the file changed here, as otherwise the following
+ write to pagecache may force a page out from this file, which would
+ cause _ma_mark_file_changed() to be called with bitmaplock hold!
+ */
+ _ma_bitmap_mark_file_changed(share, 1);
res= write_changed_bitmap(share, &share->bitmap);
share->bitmap.changed= 0;
}
@@ -353,12 +456,45 @@ filter_flush_bitmap_pages(enum pagecache_page_type type
my_bool _ma_bitmap_flush_all(MARIA_SHARE *share)
{
my_bool res= 0;
+ uint send_signal= 0;
MARIA_FILE_BITMAP *bitmap= &share->bitmap;
DBUG_ENTER("_ma_bitmap_flush_all");
+
+#ifdef EXTRA_DEBUG_BITMAP
+ {
+ char buff[160];
+ uint len= my_sprintf(buff,
+ (buff, "bitmap_flush: fd: %d id: %u "
+ "changed: %d changed_not_flushed: %d "
+ "flush_all_requested: %d",
+ share->bitmap.file.file,
+ share->id,
+ bitmap->changed,
+ bitmap->changed_not_flushed,
+ bitmap->flush_all_requested));
+ (void) translog_log_debug_info(0, LOGREC_DEBUG_INFO_QUERY,
+ (uchar*) buff, len);
+ }
+#endif
+
mysql_mutex_lock(&bitmap->bitmap_lock);
+ if (!bitmap->changed && !bitmap->changed_not_flushed)
+ {
+ mysql_mutex_unlock(&bitmap->bitmap_lock);
+ DBUG_RETURN(0);
+ }
+
+ _ma_bitmap_mark_file_changed(share, 0);
+
+ /*
+ The following should be true as it was tested above. We have to test
+ this again as _ma_bitmap_mark_file_changed() did temporarly release
+ the bitmap mutex.
+ */
if (bitmap->changed || bitmap->changed_not_flushed)
{
bitmap->flush_all_requested++;
+ bitmap->waiting_for_non_flushable++;
#ifndef WRONG_BITMAP_FLUSH
while (bitmap->non_flushable > 0)
{
@@ -366,6 +502,16 @@ my_bool _ma_bitmap_flush_all(MARIA_SHARE *share)
mysql_cond_wait(&bitmap->bitmap_cond, &bitmap->bitmap_lock);
}
#endif
+ bitmap->waiting_for_non_flushable--;
+#ifdef EXTRA_DEBUG_BITMAP
+ {
+ char tmp[MAX_BITMAP_INFO_LENGTH];
+ _ma_get_bitmap_description(bitmap, bitmap->map, bitmap->page, tmp);
+ (void) translog_log_debug_info(0, LOGREC_DEBUG_INFO_QUERY,
+ (uchar*) tmp, strlen(tmp));
+ }
+#endif
+
DBUG_ASSERT(bitmap->flush_all_requested == 1);
/*
Bitmap is in a flushable state: its contents in memory are reflected by
@@ -401,9 +547,12 @@ my_bool _ma_bitmap_flush_all(MARIA_SHARE *share)
become false, wake them up.
*/
DBUG_PRINT("info", ("bitmap flusher waking up others"));
- mysql_cond_broadcast(&bitmap->bitmap_cond);
+ send_signal= (bitmap->waiting_for_flush_all_requested |
+ bitmap->waiting_for_non_flushable);
}
mysql_mutex_unlock(&bitmap->bitmap_lock);
+ if (send_signal)
+ mysql_cond_broadcast(&bitmap->bitmap_cond);
DBUG_RETURN(res);
}
@@ -433,11 +582,13 @@ void _ma_bitmap_lock(MARIA_SHARE *share)
mysql_mutex_lock(&bitmap->bitmap_lock);
bitmap->flush_all_requested++;
+ bitmap->waiting_for_non_flushable++;
while (bitmap->non_flushable)
{
DBUG_PRINT("info", ("waiting for bitmap to be flushable"));
mysql_cond_wait(&bitmap->bitmap_cond, &bitmap->bitmap_lock);
}
+ bitmap->waiting_for_non_flushable--;
/*
Ensure that _ma_bitmap_flush_all() and _ma_bitmap_lock() are blocked.
ma_bitmap_flushable() is blocked thanks to 'flush_all_requested'.
@@ -457,6 +608,7 @@ void _ma_bitmap_lock(MARIA_SHARE *share)
void _ma_bitmap_unlock(MARIA_SHARE *share)
{
MARIA_FILE_BITMAP *bitmap= &share->bitmap;
+ uint send_signal;
DBUG_ENTER("_ma_bitmap_unlock");
if (!share->now_transactional)
@@ -464,10 +616,14 @@ void _ma_bitmap_unlock(MARIA_SHARE *share)
DBUG_ASSERT(bitmap->flush_all_requested > 0 && bitmap->non_flushable == 1);
mysql_mutex_lock(&bitmap->bitmap_lock);
- bitmap->flush_all_requested--;
bitmap->non_flushable= 0;
+ _ma_bitmap_unpin_all(share);
+ send_signal= bitmap->waiting_for_non_flushable;
+ if (!--bitmap->flush_all_requested)
+ send_signal|= bitmap->waiting_for_flush_all_requested;
mysql_mutex_unlock(&bitmap->bitmap_lock);
- mysql_cond_broadcast(&bitmap->bitmap_cond);
+ if (send_signal)
+ mysql_cond_broadcast(&bitmap->bitmap_cond);
DBUG_VOID_RETURN;
}
@@ -494,7 +650,7 @@ static void _ma_bitmap_unpin_all(MARIA_SHARE *share)
while (pinned_page-- != page_link)
pagecache_unlock_by_link(share->pagecache, pinned_page->link,
pinned_page->unlock, PAGECACHE_UNPIN,
- LSN_IMPOSSIBLE, LSN_IMPOSSIBLE, TRUE, TRUE);
+ LSN_IMPOSSIBLE, LSN_IMPOSSIBLE, FALSE, TRUE);
bitmap->pinned_pages.elements= 0;
DBUG_VOID_RETURN;
}
@@ -520,7 +676,7 @@ void _ma_bitmap_delete_all(MARIA_SHARE *share)
bzero(bitmap->map, bitmap->block_size);
bitmap->changed= 1;
bitmap->page= 0;
- bitmap->used_size= bitmap->total_size;
+ bitmap->used_size= bitmap->total_size= bitmap->max_total_size;
}
DBUG_VOID_RETURN;
}
@@ -534,7 +690,8 @@ void _ma_bitmap_delete_all(MARIA_SHARE *share)
@notes
This is called after we have swapped file descriptors and we want
- bitmap to forget all cached information
+ bitmap to forget all cached information.
+ It's also called directly after we have opened a file.
*/
void _ma_bitmap_reset_cache(MARIA_SHARE *share)
@@ -550,13 +707,20 @@ void _ma_bitmap_reset_cache(MARIA_SHARE *share)
We can't read a page yet, as in some case we don't have an active
page cache yet.
Pretend we have a dummy, full and not changed bitmap page in memory.
+
+ We set bitmap->page to a value so that if we use it in
+ move_to_next_bitmap() it will point to page 0.
+ (This can only happen if writing to a bitmap page fails)
*/
- bitmap->page= ~(ulonglong) 0;
- bitmap->used_size= bitmap->total_size;
+ bitmap->page= ((pgcache_page_no_t) 0) - bitmap->pages_covered;
+ bitmap->used_size= bitmap->total_size= bitmap->max_total_size;
bfill(bitmap->map, share->block_size, 255);
#ifndef DBUG_OFF
memcpy(bitmap->map + bitmap->block_size, bitmap->map, bitmap->block_size);
#endif
+
+ /* Start scanning for free space from start of file */
+ share->state.first_bitmap_with_space = 0;
}
}
@@ -680,7 +844,7 @@ static inline uint pattern_to_size(MARIA_FILE_BITMAP *bitmap, uint pattern)
Print bitmap for debugging
SYNOPSIS
- _ma_print_bitmap()
+ _ma_print_bitmap_changes()
bitmap Bitmap to print
IMPLEMENTATION
@@ -691,12 +855,6 @@ static inline uint pattern_to_size(MARIA_FILE_BITMAP *bitmap, uint pattern)
#ifndef DBUG_OFF
-const char *bits_to_txt[]=
-{
- "empty", "00-30% full", "30-60% full", "60-90% full", "full",
- "tail 00-40 % full", "tail 40-80 % full", "tail/blob full"
-};
-
static void _ma_print_bitmap_changes(MARIA_FILE_BITMAP *bitmap)
{
uchar *pos, *end, *org_pos;
@@ -747,12 +905,11 @@ void _ma_print_bitmap(MARIA_FILE_BITMAP *bitmap, uchar *data,
uchar *pos, *end;
char llbuff[22];
- end= bitmap->map + bitmap->used_size;
DBUG_LOCK_FILE;
fprintf(DBUG_FILE,"\nDump of bitmap page at %s\n", llstr(page, llbuff));
page++; /* Skip bitmap page */
- for (pos= data, end= pos + bitmap->total_size;
+ for (pos= data, end= pos + bitmap->max_total_size;
pos < end ;
pos+= 6)
{
@@ -781,6 +938,70 @@ void _ma_print_bitmap(MARIA_FILE_BITMAP *bitmap, uchar *data,
#endif /* DBUG_OFF */
+/*
+ Return content of bitmap as a printable string
+*/
+
+void _ma_get_bitmap_description(MARIA_FILE_BITMAP *bitmap,
+ uchar *bitmap_data,
+ pgcache_page_no_t page,
+ char *out)
+{
+ uchar *pos, *end;
+ uint count=0, dot_printed= 0, len;
+ char buff[80], last[80];
+
+ page++;
+ last[0]=0;
+ for (pos= bitmap_data, end= pos+ bitmap->used_size ; pos < end ; pos+= 6)
+ {
+ ulonglong bits= uint6korr(pos); /* 6 bytes = 6*8/3= 16 patterns */
+ uint i;
+
+ for (i= 0; i < 16 ; i++, bits>>= 3)
+ {
+ if (count > 60)
+ {
+ if (memcmp(buff, last, count))
+ {
+ memcpy(last, buff, count);
+ len= sprintf(out, "%8lu: ", (ulong) page - count);
+ memcpy(out+len, buff, count);
+ out+= len + count + 1;
+ out[-1]= '\n';
+ dot_printed= 0;
+ }
+ else if (!(dot_printed++))
+ {
+ out= strmov(out, "...\n");
+ }
+ count= 0;
+ }
+ buff[count++]= '0' + (uint) (bits & 7);
+ page++;
+ }
+ }
+ len= sprintf(out, "%8lu: ", (ulong) page - count);
+ memcpy(out+len, buff, count);
+ out[len + count]= '\n';
+ out[len + count + 1]= 0;
+}
+
+
+/*
+ Adjust bitmap->total_size to not go over max_data_file_size
+*/
+
+static void adjust_total_size(MARIA_HA *info, pgcache_page_no_t page)
+{
+ MARIA_FILE_BITMAP *bitmap= &info->s->bitmap;
+
+ if (page < bitmap->last_bitmap_page)
+ bitmap->total_size= bitmap->max_total_size; /* Use all bits in bitmap */
+ else
+ bitmap->total_size= bitmap->last_total_size;
+}
+
/***************************************************************************
Reading & writing bitmap pages
***************************************************************************/
@@ -817,12 +1038,16 @@ static my_bool _ma_read_bitmap_page(MARIA_HA *info,
DBUG_ASSERT(!bitmap->changed);
bitmap->page= page;
- if (((page + 1) * bitmap->block_size) > share->state.state.data_file_length)
+ if ((page + 1) * bitmap->block_size > share->state.state.data_file_length)
{
/* Inexistent or half-created page */
res= _ma_bitmap_create_missing(info, bitmap, page);
+ if (!res)
+ adjust_total_size(info, page);
DBUG_RETURN(res);
}
+
+ adjust_total_size(info, page);
bitmap->used_size= bitmap->total_size;
DBUG_ASSERT(share->pagecache->block_size == bitmap->block_size);
res= pagecache_read(share->pagecache,
@@ -871,6 +1096,13 @@ static my_bool _ma_change_bitmap_page(MARIA_HA *info,
{
DBUG_ENTER("_ma_change_bitmap_page");
+ /*
+ We have to mark the file changed here, as otherwise the following
+ read/write to pagecache may force a page out from this file, which would
+ cause _ma_mark_file_changed() to be called with bitmaplock hold!
+ */
+ _ma_bitmap_mark_file_changed(info->s, 1);
+
if (bitmap->changed)
{
if (write_changed_bitmap(info->s, bitmap))
@@ -906,14 +1138,18 @@ static my_bool move_to_next_bitmap(MARIA_HA *info, MARIA_FILE_BITMAP *bitmap)
MARIA_STATE_INFO *state= &info->s->state;
DBUG_ENTER("move_to_next_bitmap");
- if (state->first_bitmap_with_space != ~(ulonglong) 0 &&
+ if (state->first_bitmap_with_space != ~(pgcache_page_no_t) 0 &&
state->first_bitmap_with_space != page)
{
page= state->first_bitmap_with_space;
- state->first_bitmap_with_space= ~(ulonglong) 0;
+ state->first_bitmap_with_space= ~(pgcache_page_no_t) 0;
+ DBUG_ASSERT(page % bitmap->pages_covered == 0);
}
else
+ {
page+= bitmap->pages_covered;
+ DBUG_ASSERT(page % bitmap->pages_covered == 0);
+ }
DBUG_RETURN(_ma_change_bitmap_page(info, bitmap, page));
}
@@ -1308,10 +1544,7 @@ static ulong allocate_full_pages(MARIA_FILE_BITMAP *bitmap,
best_prefix_bits|= tmp;
int6store(best_data, best_prefix_bits);
if (!(best_area_size-= best_prefix_area_size))
- {
- DBUG_EXECUTE("bitmap", _ma_print_bitmap_changes(bitmap););
- DBUG_RETURN(block->page_count);
- }
+ goto end;
best_data+= 6;
}
best_area_size*= 3; /* Bits to set */
@@ -1329,6 +1562,7 @@ static ulong allocate_full_pages(MARIA_FILE_BITMAP *bitmap,
bitmap->used_size= (uint) (best_data - bitmap->map);
DBUG_ASSERT(bitmap->used_size <= bitmap->total_size);
}
+end:
bitmap->changed= 1;
DBUG_EXECUTE("bitmap", _ma_print_bitmap_changes(bitmap););
DBUG_RETURN(block->page_count);
@@ -1621,7 +1855,7 @@ static void use_head(MARIA_HA *info, pgcache_page_no_t page, uint size,
find_where_to_split_row()
share Maria share
row Information of what is in the row (from calc_record_size())
- extents_length Number of bytes needed to store all extents
+ extents Max number of extents we have to store in header
split_size Free size on the page (The head length must be less
than this)
@@ -1630,7 +1864,7 @@ static void use_head(MARIA_HA *info, pgcache_page_no_t page, uint size,
*/
static uint find_where_to_split_row(MARIA_SHARE *share, MARIA_ROW *row,
- uint extents_length, uint split_size)
+ uint extents, uint split_size)
{
uint *lengths, *lengths_end;
/*
@@ -1640,19 +1874,20 @@ static uint find_where_to_split_row(MARIA_SHARE *share, MARIA_ROW *row,
- One extent
*/
uint row_length= (row->min_length +
- size_to_store_key_length(extents_length) +
+ size_to_store_key_length(extents) +
ROW_EXTENT_SIZE);
- DBUG_ASSERT(row_length < split_size);
+ DBUG_ASSERT(row_length <= split_size);
+
/*
Store first in all_field_lengths the different parts that are written
to the row. This needs to be in same order as in
ma_block_rec.c::write_block_record()
*/
- row->null_field_lengths[-3]= extents_length;
+ row->null_field_lengths[-3]= extents * ROW_EXTENT_SIZE;
row->null_field_lengths[-2]= share->base.fixed_not_null_fields_length;
row->null_field_lengths[-1]= row->field_lengths_length;
for (lengths= row->null_field_lengths - EXTRA_LENGTH_FIELDS,
- lengths_end= (lengths + share->base.pack_fields - share->base.blobs +
+ lengths_end= (lengths + share->base.fields - share->base.blobs +
EXTRA_LENGTH_FIELDS); lengths < lengths_end; lengths++)
{
if (row_length + *lengths > split_size)
@@ -1808,18 +2043,19 @@ my_bool _ma_bitmap_find_place(MARIA_HA *info, MARIA_ROW *row,
head_length+= ELEMENTS_RESERVED_FOR_MAIN_PART * ROW_EXTENT_SIZE;
/* The first segment size is stored in 'row_length' */
- row_length= find_where_to_split_row(share, row, extents_length,
+ row_length= find_where_to_split_row(share, row, row->extents_count +
+ ELEMENTS_RESERVED_FOR_MAIN_PART-1,
max_page_size);
full_page_size= MAX_TAIL_SIZE(share->block_size);
position= 0;
- if (head_length - row_length <= full_page_size)
+ rest_length= head_length - row_length;
+ if (rest_length <= full_page_size)
position= ELEMENTS_RESERVED_FOR_MAIN_PART -2; /* Only head and tail */
if (find_head(info, row_length, position))
goto abort;
row->space_on_head_page= row_length;
- rest_length= head_length - row_length;
if (write_rest_of_head(info, position, rest_length))
goto abort;
@@ -1886,8 +2122,7 @@ my_bool _ma_bitmap_find_new_place(MARIA_HA *info, MARIA_ROW *row,
goto abort;
/* Switch bitmap to current head page */
- bitmap_page= page / share->bitmap.pages_covered;
- bitmap_page*= share->bitmap.pages_covered;
+ bitmap_page= page - page % share->bitmap.pages_covered;
if (share->bitmap.page != bitmap_page &&
_ma_change_bitmap_page(info, &share->bitmap, bitmap_page))
@@ -1906,16 +2141,22 @@ my_bool _ma_bitmap_find_new_place(MARIA_HA *info, MARIA_ROW *row,
/* Allocate enough space */
head_length+= ELEMENTS_RESERVED_FOR_MAIN_PART * ROW_EXTENT_SIZE;
- /* The first segment size is stored in 'row_length' */
- row_length= find_where_to_split_row(share, row, extents_length, free_size);
+ /*
+ The first segment size is stored in 'row_length'
+ We have to add ELEMENTS_RESERVED_FOR_MAIN_PART here as the extent
+ information may be up to this size when the header splits.
+ */
+ row_length= find_where_to_split_row(share, row, row->extents_count +
+ ELEMENTS_RESERVED_FOR_MAIN_PART-1,
+ free_size);
position= 0;
- if (head_length - row_length < MAX_TAIL_SIZE(share->block_size))
+ rest_length= head_length - row_length;
+ if (rest_length <= MAX_TAIL_SIZE(share->block_size))
position= ELEMENTS_RESERVED_FOR_MAIN_PART -2; /* Only head and tail */
use_head(info, page, row_length, position);
row->space_on_head_page= row_length;
- rest_length= head_length - row_length;
if (write_rest_of_head(info, position, rest_length))
goto abort;
@@ -2003,7 +2244,7 @@ static my_bool set_page_bits(MARIA_HA *info, MARIA_FILE_BITMAP *bitmap,
Get bitmap pattern for a given page
SYNOPSIS
- get_page_bits()
+ bitmap_get_page_bits()
info Maria handler
bitmap Bitmap handler
page Page number
@@ -2013,8 +2254,8 @@ static my_bool set_page_bits(MARIA_HA *info, MARIA_FILE_BITMAP *bitmap,
~0 Error (couldn't read page)
*/
-uint _ma_bitmap_get_page_bits(MARIA_HA *info, MARIA_FILE_BITMAP *bitmap,
- pgcache_page_no_t page)
+static uint bitmap_get_page_bits(MARIA_HA *info, MARIA_FILE_BITMAP *bitmap,
+ pgcache_page_no_t page)
{
pgcache_page_no_t bitmap_page;
uint offset_page, offset, tmp;
@@ -2040,6 +2281,19 @@ uint _ma_bitmap_get_page_bits(MARIA_HA *info, MARIA_FILE_BITMAP *bitmap,
}
+/* As above, but take a lock while getting the data */
+
+uint _ma_bitmap_get_page_bits(MARIA_HA *info, MARIA_FILE_BITMAP *bitmap,
+ pgcache_page_no_t page)
+{
+ uint tmp;
+ mysql_mutex_lock(&bitmap->bitmap_lock);
+ tmp= bitmap_get_page_bits(info, bitmap, page);
+ mysql_mutex_unlock(&bitmap->bitmap_lock);
+ return tmp;
+}
+
+
/*
Mark all pages in a region as free
@@ -2119,6 +2373,7 @@ my_bool _ma_bitmap_reset_full_page_bits(MARIA_HA *info,
DBUG_RETURN(0);
}
+
/*
Set all pages in a region as used
@@ -2151,7 +2406,7 @@ my_bool _ma_bitmap_set_full_page_bits(MARIA_HA *info,
bitmap_page= page - page % bitmap->pages_covered;
if (page == bitmap_page ||
- page + page_count >= bitmap_page + bitmap->pages_covered)
+ page + page_count > bitmap_page + bitmap->pages_covered)
{
DBUG_ASSERT(0); /* Wrong in data */
DBUG_RETURN(1);
@@ -2250,7 +2505,7 @@ void _ma_bitmap_flushable(MARIA_HA *info, int non_flushable_inc)
the bitmap's mutex.
*/
_ma_bitmap_unpin_all(share);
- if (unlikely(bitmap->flush_all_requested))
+ if (unlikely(bitmap->waiting_for_non_flushable))
{
DBUG_PRINT("info", ("bitmap flushable waking up flusher"));
mysql_cond_broadcast(&bitmap->bitmap_cond);
@@ -2263,6 +2518,8 @@ void _ma_bitmap_flushable(MARIA_HA *info, int non_flushable_inc)
}
DBUG_ASSERT(non_flushable_inc == 1);
DBUG_ASSERT(info->non_flushable_state == 0);
+
+ bitmap->waiting_for_flush_all_requested++;
while (unlikely(bitmap->flush_all_requested))
{
/*
@@ -2279,6 +2536,7 @@ void _ma_bitmap_flushable(MARIA_HA *info, int non_flushable_inc)
DBUG_PRINT("info", ("waiting for bitmap flusher"));
mysql_cond_wait(&bitmap->bitmap_cond, &bitmap->bitmap_lock);
}
+ bitmap->waiting_for_flush_all_requested--;
bitmap->non_flushable++;
DBUG_PRINT("info", ("bitmap->non_flushable: %u", bitmap->non_flushable));
mysql_mutex_unlock(&bitmap->bitmap_lock);
@@ -2352,7 +2610,7 @@ my_bool _ma_bitmap_release_unused(MARIA_HA *info, MARIA_BITMAP_BLOCKS *blocks)
else
{
DBUG_ASSERT(current_bitmap_value ==
- _ma_bitmap_get_page_bits(info, bitmap, block->page));
+ bitmap_get_page_bits(info, bitmap, block->page));
}
/* Handle all full pages and tail pages (for head page and blob) */
@@ -2383,16 +2641,14 @@ my_bool _ma_bitmap_release_unused(MARIA_HA *info, MARIA_BITMAP_BLOCKS *blocks)
The page has all bits set; The following test is an optimization
to not set the bits to the same value as before.
*/
+ DBUG_ASSERT(current_bitmap_value ==
+ bitmap_get_page_bits(info, bitmap, block->page));
+
if (bits != current_bitmap_value)
{
if (set_page_bits(info, bitmap, block->page, bits))
goto err;
}
- else
- {
- DBUG_ASSERT(current_bitmap_value ==
- _ma_bitmap_get_page_bits(info, bitmap, block->page));
- }
}
else if (!(block->used & BLOCKUSED_USED) &&
_ma_bitmap_reset_full_page_bits(info, bitmap,
@@ -2408,7 +2664,7 @@ my_bool _ma_bitmap_release_unused(MARIA_HA *info, MARIA_BITMAP_BLOCKS *blocks)
if (--bitmap->non_flushable == 0)
{
_ma_bitmap_unpin_all(info->s);
- if (unlikely(bitmap->flush_all_requested))
+ if (unlikely(bitmap->waiting_for_non_flushable))
{
DBUG_PRINT("info", ("bitmap flushable waking up flusher"));
mysql_cond_broadcast(&bitmap->bitmap_cond);
@@ -2448,9 +2704,9 @@ my_bool _ma_bitmap_free_full_pages(MARIA_HA *info, const uchar *extents,
uint count)
{
MARIA_FILE_BITMAP *bitmap= &info->s->bitmap;
+ my_bool res;
DBUG_ENTER("_ma_bitmap_free_full_pages");
- mysql_mutex_lock(&bitmap->bitmap_lock);
for (; count--; extents+= ROW_EXTENT_SIZE)
{
pgcache_page_no_t page= uint5korr(extents);
@@ -2461,15 +2717,15 @@ my_bool _ma_bitmap_free_full_pages(MARIA_HA *info, const uchar *extents,
if (page == 0 && page_count == 0)
continue; /* Not used extent */
if (pagecache_delete_pages(info->s->pagecache, &info->dfile, page,
- page_count, PAGECACHE_LOCK_WRITE, 1) ||
- _ma_bitmap_reset_full_page_bits(info, bitmap, page, page_count))
- {
- mysql_mutex_unlock(&bitmap->bitmap_lock);
+ page_count, PAGECACHE_LOCK_WRITE, 1))
+ DBUG_RETURN(1);
+ mysql_mutex_lock(&bitmap->bitmap_lock);
+ res= _ma_bitmap_reset_full_page_bits(info, bitmap, page, page_count);
+ mysql_mutex_unlock(&bitmap->bitmap_lock);
+ if (res)
DBUG_RETURN(1);
- }
}
}
- mysql_mutex_unlock(&bitmap->bitmap_lock);
DBUG_RETURN(0);
}
@@ -2521,17 +2777,15 @@ my_bool _ma_bitmap_set(MARIA_HA *info, pgcache_page_no_t page, my_bool head,
page_type What kind of page this is
page Adress to page
empty_space Empty space on page
- bitmap_pattern Store here the pattern that was in the bitmap for the
- page. This is always updated.
+ bitmap_pattern Bitmap pattern for page (from bitmap)
RETURN
0 ok
1 error
*/
-my_bool _ma_check_bitmap_data(MARIA_HA *info,
- enum en_page_type page_type, pgcache_page_no_t page,
- uint empty_space, uint *bitmap_pattern)
+my_bool _ma_check_bitmap_data(MARIA_HA *info, enum en_page_type page_type,
+ uint empty_space, uint bitmap_pattern)
{
uint bits;
switch (page_type) {
@@ -2552,8 +2806,7 @@ my_bool _ma_check_bitmap_data(MARIA_HA *info,
bits= 0; /* to satisfy compiler */
DBUG_ASSERT(0);
}
- return ((*bitmap_pattern= _ma_bitmap_get_page_bits(info, &info->s->bitmap,
- page)) != bits);
+ return (bitmap_pattern != bits);
}
@@ -2798,6 +3051,11 @@ static my_bool _ma_bitmap_create_missing(MARIA_HA *info,
/* First (in offset order) bitmap page to create */
if (data_file_length < block_size)
goto err; /* corrupted, should have first bitmap page */
+ if (page * block_size >= share->base.max_data_file_length)
+ {
+ my_errno= HA_ERR_RECORD_FILE_FULL;
+ goto err;
+ }
from= (data_file_length / block_size - 1) / bitmap->pages_covered + 1;
from*= bitmap->pages_covered;
diff --git a/storage/maria/ma_blockrec.c b/storage/maria/ma_blockrec.c
index 1c0e6b88d89..669dbe84fdc 100644
--- a/storage/maria/ma_blockrec.c
+++ b/storage/maria/ma_blockrec.c
@@ -414,14 +414,29 @@ void _ma_init_block_record_data(void)
my_bool _ma_once_init_block_record(MARIA_SHARE *share, File data_file)
{
+ my_bool res;
+ pgcache_page_no_t last_page;
+
+ /*
+ First calculate the max file length with can have with a pointer of size
+ rec_reflength.
- share->base.max_data_file_length=
- (((ulonglong) 1 << ((share->base.rec_reflength-1)*8))-1) *
- share->block_size;
+ The 'rec_reflength - 1' is because one byte is used for row
+ position withing the page.
+ The /2 comes from _ma_transaction_recpos_to_keypos() where we use
+ the lowest bit to mark if there is a transid following the rownr.
+ */
+ last_page= ((ulonglong) 1 << ((share->base.rec_reflength-1)*8))/2;
+ if (!last_page) /* Overflow; set max size */
+ last_page= ~(pgcache_page_no_t) 0;
+
+ res= _ma_bitmap_init(share, data_file, &last_page);
+ share->base.max_data_file_length= _ma_safe_mul(last_page + 1,
+ share->block_size);
#if SIZEOF_OFF_T == 4
- set_if_smaller(share->base.max_data_file_length, INT_MAX32);
+ set_if_smaller(share->base.max_data_file_length, INT_MAX32);
#endif
- return _ma_bitmap_init(share, data_file);
+ return res;
}
@@ -891,8 +906,7 @@ static my_bool extend_area_on_page(MARIA_HA *info,
DBUG_PRINT("error", ("Not enough space: "
"length: %u request_length: %u",
length, request_length));
- my_errno= HA_ERR_WRONG_IN_RECORD; /* File crashed */
- DBUG_ASSERT(0); /* For debugging */
+ _ma_set_fatal_error(info->s, HA_ERR_WRONG_IN_RECORD);
DBUG_RETURN(1); /* Error in block */
}
*empty_space= length; /* All space is here */
@@ -1020,7 +1034,7 @@ make_space_for_directory(MARIA_HA *info,
UNDO of DELETE (in which case we know the row was on the
page before) or if the bitmap told us there was space on page
*/
- DBUG_ASSERT(0);
+ DBUG_ASSERT(!maria_assert_if_crashed_table);
return(1);
}
}
@@ -1707,7 +1721,7 @@ struct st_row_pos_info
static my_bool get_head_or_tail_page(MARIA_HA *info,
- MARIA_BITMAP_BLOCK *block,
+ const MARIA_BITMAP_BLOCK *block,
uchar *buff, uint length, uint page_type,
enum pagecache_page_lock lock,
struct st_row_pos_info *res)
@@ -1777,7 +1791,8 @@ static my_bool get_head_or_tail_page(MARIA_HA *info,
DBUG_RETURN(0);
crashed:
- my_errno= HA_ERR_WRONG_IN_RECORD; /* File crashed */
+ DBUG_ASSERT(!maria_assert_if_crashed_table);
+ _ma_set_fatal_error(share, HA_ERR_WRONG_IN_RECORD); /* File crashed */
DBUG_RETURN(1);
}
@@ -1806,7 +1821,7 @@ crashed:
*/
static my_bool get_rowpos_in_head_or_tail_page(MARIA_HA *info,
- MARIA_BITMAP_BLOCK *block,
+ const MARIA_BITMAP_BLOCK *block,
uchar *buff, uint length,
uint page_type,
enum pagecache_page_lock lock,
@@ -1870,7 +1885,8 @@ static my_bool get_rowpos_in_head_or_tail_page(MARIA_HA *info,
DBUG_RETURN(0);
err:
- my_errno= HA_ERR_WRONG_IN_RECORD; /* File crashed */
+ DBUG_ASSERT(!maria_assert_if_crashed_table);
+ _ma_set_fatal_error(share, HA_ERR_WRONG_IN_RECORD); /* File crashed */
DBUG_RETURN(1);
}
@@ -2018,6 +2034,7 @@ static my_bool write_tail(MARIA_HA *info,
PAGECACHE_WRITE_DELAY, &page_link.link,
LSN_IMPOSSIBLE)))
{
+ DBUG_ASSERT(page_link.link);
page_link.unlock= PAGECACHE_LOCK_READ_UNLOCK;
page_link.changed= 1;
push_dynamic(&info->pinned_pages, (void*) &page_link);
@@ -2094,8 +2111,7 @@ static my_bool write_full_pages(MARIA_HA *info,
{
if (!--sub_blocks)
{
- DBUG_ASSERT(0); /* Wrong in bitmap or UNDO */
- my_errno= HA_ERR_WRONG_IN_RECORD; /* File crashed */
+ _ma_set_fatal_error(share, HA_ERR_WRONG_IN_RECORD);
DBUG_RETURN(1);
}
@@ -2241,7 +2257,7 @@ static void store_extent_info(uchar *to,
for (block= first_block, end_block= first_block+count ;
block < end_block; block++)
{
- /* The following is only false for marker blocks */
+ /* The following is only false for marker (unused) blocks */
if (likely(block->used & BLOCKUSED_USED))
{
uint page_count= block->page_count;
@@ -2506,7 +2522,7 @@ static my_bool free_full_page_range(MARIA_HA *info, pgcache_page_no_t page,
}
if (delete_count &&
pagecache_delete_pages(share->pagecache, &info->dfile,
- page, delete_count, PAGECACHE_LOCK_WRITE, 0))
+ page, delete_count, PAGECACHE_LOCK_WRITE, 1))
res= 1;
if (share->now_transactional)
@@ -2756,7 +2772,7 @@ static my_bool write_block_record(MARIA_HA *info,
DBUG_ASSERT(length <= column->length);
break;
default: /* Wrong data */
- DBUG_ASSERT(0);
+ DBUG_ASSERT(!maria_assert_if_crashed_table);
length=0;
break;
}
@@ -2815,7 +2831,6 @@ static my_bool write_block_record(MARIA_HA *info,
DBUG_PRINT("info", ("Used head length on page: %u header_length: %u",
head_length,
(uint) (flag & ROW_FLAG_TRANSID ? TRANSID_SIZE : 0)));
- DBUG_ASSERT(data <= end_of_data);
if (head_length < share->base.min_block_length)
{
/* Extend row to be of size min_block_length */
@@ -2824,6 +2839,7 @@ static my_bool write_block_record(MARIA_HA *info,
data+= diff_length;
head_length= share->base.min_block_length;
}
+ DBUG_ASSERT(data <= end_of_data);
/*
If this is a redo entry (ie, undo_lsn != LSN_ERROR) then we should have
written exactly head_length bytes (same as original record).
@@ -3070,9 +3086,10 @@ static my_bool write_block_record(MARIA_HA *info,
extent_data= row_extents_second_part +
((last_head_block - head_block) - 2) * ROW_EXTENT_SIZE;
}
- DBUG_ASSERT(uint2korr(extent_data+5) & TAIL_BIT);
+ /* Write information for tail block in the reserved space */
page_store(extent_data, head_tail_block->page);
- int2store(extent_data + PAGE_STORE_SIZE, head_tail_block->page_count);
+ pagerange_store(extent_data + PAGE_STORE_SIZE,
+ head_tail_block->page_count);
}
}
else
@@ -3146,6 +3163,7 @@ static my_bool write_block_record(MARIA_HA *info,
PAGECACHE_WRITE_DELAY, &page_link.link,
LSN_IMPOSSIBLE))
goto disk_err;
+ DBUG_ASSERT(page_link.link);
page_link.unlock= PAGECACHE_LOCK_READ_UNLOCK;
page_link.changed= 1;
push_dynamic(&info->pinned_pages, (void*) &page_link);
@@ -3414,8 +3432,9 @@ static my_bool write_block_record(MARIA_HA *info,
DBUG_RETURN(0);
crashed:
+ DBUG_ASSERT(!maria_assert_if_crashed_table);
/* Something was wrong with data on page */
- my_errno= HA_ERR_WRONG_IN_RECORD;
+ _ma_set_fatal_error(share, HA_ERR_WRONG_IN_RECORD);
disk_err:
/**
@@ -3488,7 +3507,9 @@ static my_bool allocate_and_write_block_record(MARIA_HA *info,
/* page will be pinned & locked by get_head_or_tail_page */
if (get_head_or_tail_page(info, blocks->block, info->buff,
- row->space_on_head_page, HEAD_PAGE,
+ max(row->space_on_head_page,
+ info->s->base.min_block_length),
+ HEAD_PAGE,
PAGECACHE_LOCK_WRITE, &row_pos))
goto err;
row->lastpos= ma_recordpos(blocks->block->page, row_pos.rownr);
@@ -3619,6 +3640,7 @@ my_bool _ma_write_abort_block_record(MARIA_HA *info)
}
}
}
+ _ma_bitmap_unlock(share);
if (share->now_transactional)
{
if (_ma_write_clr(info, info->cur_row.orig_undo_lsn,
@@ -3628,7 +3650,6 @@ my_bool _ma_write_abort_block_record(MARIA_HA *info)
&lsn, (void*) 0))
res= 1;
}
- _ma_bitmap_unlock(share);
_ma_unpin_all_pages_and_finalize_row(info, lsn);
DBUG_RETURN(res);
}
@@ -3806,6 +3827,7 @@ static my_bool _ma_update_block_record2(MARIA_HA *info,
DBUG_RETURN(0);
err:
+ DBUG_ASSERT(!maria_assert_if_crashed_table);
DBUG_PRINT("error", ("errpos: %d", errpos));
if (info->non_flushable_state)
_ma_bitmap_flushable(info, -1);
@@ -3885,7 +3907,7 @@ static my_bool _ma_update_at_original_place(MARIA_HA *info,
("org_empty_size: %u head_length: %u length_on_page: %u",
org_empty_size, (uint) cur_row->head_length,
length_on_head_page));
- my_errno= HA_ERR_WRONG_IN_RECORD;
+ _ma_set_fatal_error(share, HA_ERR_WRONG_IN_RECORD);
goto err;
}
@@ -3918,8 +3940,11 @@ static my_bool _ma_update_at_original_place(MARIA_HA *info,
goto err;
block= blocks->block;
block->empty_space= row_pos.empty_space;
- block->org_bitmap_value= _ma_free_size_to_head_pattern(&share->bitmap,
- org_empty_size);
+ block->org_bitmap_value=
+ _ma_free_size_to_head_pattern(&share->bitmap,
+ (enough_free_entries_on_page(share, buff) ?
+ org_empty_size : 0));
+
DBUG_ASSERT(block->org_bitmap_value ==
_ma_bitmap_get_page_bits(info, &info->s->bitmap, page));
block->used|= BLOCKUSED_USE_ORG_BITMAP;
@@ -3943,6 +3968,7 @@ static my_bool _ma_update_at_original_place(MARIA_HA *info,
DBUG_RETURN(0);
err:
+ DBUG_ASSERT(!maria_assert_if_crashed_table);
_ma_mark_file_crashed(share);
if (info->non_flushable_state)
_ma_bitmap_flushable(info, -1);
@@ -4100,11 +4126,11 @@ static my_bool delete_head_or_tail(MARIA_HA *info,
{
MARIA_SHARE *share= info->s;
uint empty_space;
- uint block_size= share->block_size;
+ int res;
+ my_bool page_is_empty;
uchar *buff;
LSN lsn;
MARIA_PINNED_PAGE page_link;
- int res;
enum pagecache_page_lock lock_at_write, lock_at_unpin;
DBUG_ENTER("delete_head_or_tail");
DBUG_PRINT("enter", ("id: %lu (%lu:%u)",
@@ -4134,13 +4160,14 @@ static my_bool delete_head_or_tail(MARIA_HA *info,
lock_at_unpin= PAGECACHE_LOCK_READ_UNLOCK;
}
- res= delete_dir_entry(buff, block_size, record_number, &empty_space);
+ res= delete_dir_entry(buff, share->block_size, record_number, &empty_space);
if (res < 0)
DBUG_RETURN(1);
if (res == 0) /* after our deletion, page is still not empty */
{
uchar log_data[FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE];
LEX_CUSTRING log_array[TRANSLOG_INTERNAL_PARTS + 1];
+ page_is_empty= 0;
if (share->now_transactional)
{
/* Log REDO data */
@@ -4161,6 +4188,7 @@ static my_bool delete_head_or_tail(MARIA_HA *info,
}
else /* page is now empty */
{
+ page_is_empty= 1;
if (share->now_transactional)
{
uchar log_data[FILEID_STORE_SIZE + PAGE_STORE_SIZE];
@@ -4175,6 +4203,13 @@ static my_bool delete_head_or_tail(MARIA_HA *info,
log_data, NULL))
DBUG_RETURN(1);
}
+ /*
+ Mark that this page must be written to disk by page cache, even
+ if we could call pagecache_delete() on it.
+ This is needed to ensure that repair finds the empty page on disk
+ and not old data.
+ */
+ pagecache_set_write_on_delete_by_link(page_link.link);
DBUG_ASSERT(empty_space >= share->bitmap.sizes[0]);
}
@@ -4192,8 +4227,8 @@ static my_bool delete_head_or_tail(MARIA_HA *info,
If there is not enough space for all possible tails, mark the
page full
*/
- if (!head && !enough_free_entries(buff, share->block_size,
- 1 + share->base.blobs))
+ if (!head && !page_is_empty && !enough_free_entries(buff, share->block_size,
+ 1 + share->base.blobs))
empty_space= 0;
DBUG_RETURN(_ma_bitmap_set(info, page, head, empty_space));
@@ -4315,6 +4350,7 @@ my_bool _ma_delete_block_record(MARIA_HA *info, const uchar *record)
DBUG_RETURN(0);
err:
+ DBUG_ASSERT(!maria_assert_if_crashed_table);
_ma_bitmap_flushable(info, -1);
_ma_unpin_all_pages_and_finalize_row(info, LSN_IMPOSSIBLE);
DBUG_RETURN(1);
@@ -4515,7 +4551,8 @@ static uchar *read_next_extent(MARIA_HA *info, MARIA_EXTENT_CURSOR *extent,
crashed:
- my_errno= HA_ERR_WRONG_IN_RECORD; /* File crashed */
+ DBUG_ASSERT(!maria_assert_if_crashed_table);
+ _ma_set_fatal_error(share, HA_ERR_WRONG_IN_RECORD);
DBUG_PRINT("error", ("wrong extent information"));
DBUG_RETURN(0);
}
@@ -4660,7 +4697,12 @@ int _ma_read_block_record2(MARIA_HA *info, uchar *record,
{
cur_row->trid= transid_korr(data+1);
if (!info->trn)
- DBUG_RETURN(my_errno= HA_ERR_WRONG_IN_RECORD); /* File crashed */
+ {
+ /* File crashed */
+ DBUG_ASSERT(!maria_assert_if_crashed_table);
+ _ma_set_fatal_error(share, HA_ERR_WRONG_IN_RECORD);
+ DBUG_RETURN(HA_ERR_WRONG_IN_RECORD);
+ }
if (!trnman_can_read_from(info->trn, cur_row->trid))
DBUG_RETURN(my_errno= HA_ERR_ROW_NOT_VISIBLE);
}
@@ -4928,7 +4970,7 @@ int _ma_read_block_record2(MARIA_HA *info, uchar *record,
goto err;
}
#ifdef EXTRA_DEBUG
- if (share->calc_checksum)
+ if (share->calc_checksum && !info->in_check_table)
{
/* Esnure that row checksum is correct */
DBUG_ASSERT(((share->calc_checksum)(info, record) & 255) ==
@@ -4939,9 +4981,11 @@ int _ma_read_block_record2(MARIA_HA *info, uchar *record,
DBUG_RETURN(0);
err:
+ DBUG_ASSERT(!maria_assert_if_crashed_table);
/* Something was wrong with data on record */
DBUG_PRINT("error", ("Found record with wrong data"));
- DBUG_RETURN((my_errno= HA_ERR_WRONG_IN_RECORD));
+ _ma_set_fatal_error(share, HA_ERR_WRONG_IN_RECORD);
+ DBUG_RETURN(HA_ERR_WRONG_IN_RECORD);
}
@@ -5077,6 +5121,7 @@ int _ma_read_block_record(MARIA_HA *info, uchar *record,
DBUG_ASSERT((buff[PAGE_TYPE_OFFSET] & PAGE_TYPE_MASK) == HEAD_PAGE);
if (!(data= get_record_position(buff, block_size, offset, &end_of_data)))
{
+ DBUG_ASSERT(!maria_assert_if_crashed_table);
DBUG_PRINT("error", ("Wrong directory entry in data block"));
my_errno= HA_ERR_RECORD_DELETED; /* File crashed */
DBUG_RETURN(HA_ERR_RECORD_DELETED);
@@ -5154,7 +5199,7 @@ my_bool _ma_scan_init_block_record(MARIA_HA *info)
(uchar *) my_malloc(share->block_size * 2, MYF(MY_WME))))))
DBUG_RETURN(1);
info->scan.page_buff= info->scan.bitmap_buff + share->block_size;
- info->scan.bitmap_end= info->scan.bitmap_buff + share->bitmap.total_size;
+ info->scan.bitmap_end= info->scan.bitmap_buff + share->bitmap.max_total_size;
/* Set scan variables to get _ma_scan_block() to start with reading bitmap */
info->scan.number_of_rows= 0;
@@ -5307,7 +5352,7 @@ restart_record_read:
#ifdef SANITY_CHECKS
if (info->scan.dir < info->scan.dir_end)
{
- DBUG_ASSERT(0);
+ DBUG_ASSERT(!maria_assert_if_crashed_table);
goto err;
}
#endif
@@ -5391,7 +5436,8 @@ restart_bitmap_scan:
(uint) (uchar) info->scan.page_buff[DIR_COUNT_OFFSET]) == 0)
{
DBUG_PRINT("error", ("Wrong page header"));
- DBUG_RETURN((my_errno= HA_ERR_WRONG_IN_RECORD));
+ _ma_set_fatal_error(share, HA_ERR_WRONG_IN_RECORD);
+ DBUG_RETURN(HA_ERR_WRONG_IN_RECORD);
}
DBUG_PRINT("info", ("Page %lu has %u rows",
(ulong) page, info->scan.number_of_rows));
@@ -5418,7 +5464,7 @@ restart_bitmap_scan:
/* Read next bitmap */
info->scan.bitmap_page+= share->bitmap.pages_covered;
filepos= (my_off_t) info->scan.bitmap_page * block_size;
- if (unlikely(filepos >= share->state.state.data_file_length))
+ if (unlikely(info->scan.bitmap_page >= info->scan.max_page))
{
DBUG_PRINT("info", ("Found end of file"));
DBUG_RETURN((my_errno= HA_ERR_END_OF_FILE));
@@ -5436,8 +5482,10 @@ restart_bitmap_scan:
goto restart_bitmap_scan;
err:
+ DBUG_ASSERT(!maria_assert_if_crashed_table);
DBUG_PRINT("error", ("Wrong data on page"));
- DBUG_RETURN((my_errno= HA_ERR_WRONG_IN_RECORD));
+ _ma_set_fatal_error(share, HA_ERR_WRONG_IN_RECORD);
+ DBUG_RETURN(HA_ERR_WRONG_IN_RECORD);
}
@@ -6319,6 +6367,12 @@ uint _ma_apply_redo_insert_row_head_or_tail(MARIA_HA *info, LSN lsn,
empty_space-= (uint) data_length;
int2store(buff + EMPTY_SPACE_OFFSET, empty_space);
+ /* Fix bitmap */
+ if (!enough_free_entries_on_page(share, buff))
+ empty_space= 0; /* Page is full */
+ if (_ma_bitmap_set(info, page, page_type == HEAD_PAGE, empty_space))
+ goto err;
+
/*
If page was not read before, write it but keep it pinned.
We don't update its LSN When we have processed all REDOs for this page
@@ -6336,12 +6390,6 @@ uint _ma_apply_redo_insert_row_head_or_tail(MARIA_HA *info, LSN lsn,
LSN_IMPOSSIBLE))
result= my_errno;
- /* Fix bitmap */
- if (!enough_free_entries_on_page(share, buff))
- empty_space= 0; /* Page is full */
- if (_ma_bitmap_set(info, page, page_type == HEAD_PAGE, empty_space))
- goto err;
-
page_link.unlock= PAGECACHE_LOCK_WRITE_UNLOCK;
page_link.changed= 1;
push_dynamic(&info->pinned_pages, (void*) &page_link);
@@ -6355,7 +6403,7 @@ uint _ma_apply_redo_insert_row_head_or_tail(MARIA_HA *info, LSN lsn,
DBUG_RETURN(result);
crashed_file:
- my_errno= HA_ERR_WRONG_IN_RECORD;
+ _ma_set_fatal_error(share, HA_ERR_WRONG_IN_RECORD);
err:
error= my_errno;
if (unlock_method == PAGECACHE_LOCK_LEFT_WRITELOCKED)
@@ -6364,7 +6412,7 @@ err:
PAGECACHE_UNPIN, LSN_IMPOSSIBLE,
LSN_IMPOSSIBLE, 0, FALSE);
_ma_mark_file_crashed(share);
- DBUG_ASSERT(0); /* catch recovery errors early */
+ DBUG_ASSERT(!maria_assert_if_crashed_table); /* catch recovery error early */
DBUG_RETURN((my_errno= error));
}
@@ -6443,7 +6491,7 @@ uint _ma_apply_redo_purge_row_head_or_tail(MARIA_HA *info, LSN lsn,
if (delete_dir_entry(buff, block_size, rownr, &empty_space) < 0)
{
- my_errno= HA_ERR_WRONG_IN_RECORD;
+ _ma_set_fatal_error(share, HA_ERR_WRONG_IN_RECORD);
goto err;
}
@@ -6467,7 +6515,7 @@ err:
PAGECACHE_UNPIN, LSN_IMPOSSIBLE,
LSN_IMPOSSIBLE, 0, FALSE);
_ma_mark_file_crashed(share);
- DBUG_ASSERT(0);
+ DBUG_ASSERT(!maria_assert_if_crashed_table);
DBUG_RETURN((my_errno= error));
}
@@ -6479,7 +6527,13 @@ err:
@param info Maria handler
@param header Header (without FILEID)
- @note It marks the pages free in the bitmap
+ Mark the pages free in the bitmap.
+
+ We have to check against _ma_redo_not_needed_for_page()
+ to guard against the case where we first clear a block and after
+ that insert new data into the blocks. If we would unconditionally
+ clear the bitmap here, future changes would be ignored for the page
+ if it's not in the dirty list (ie, it would be flushed).
@return Operation status
@retval 0 OK
@@ -6488,19 +6542,25 @@ err:
uint _ma_apply_redo_free_blocks(MARIA_HA *info,
LSN lsn __attribute__((unused)),
+ LSN redo_lsn,
const uchar *header)
{
MARIA_SHARE *share= info->s;
uint ranges;
+ uint16 sid;
DBUG_ENTER("_ma_apply_redo_free_blocks");
share->state.changed|= (STATE_CHANGED | STATE_NOT_ZEROFILLED |
STATE_NOT_MOVABLE);
+ sid= fileid_korr(header);
+ header+= FILEID_STORE_SIZE;
ranges= pagerange_korr(header);
header+= PAGERANGE_STORE_SIZE;
DBUG_ASSERT(ranges > 0);
+ /** @todo leave bitmap lock to the bitmap code... */
+ mysql_mutex_lock(&share->bitmap.bitmap_lock);
while (ranges--)
{
my_bool res;
@@ -6517,18 +6577,22 @@ uint _ma_apply_redo_free_blocks(MARIA_HA *info,
DBUG_PRINT("info", ("page: %lu pages: %u", (long) page, page_range));
- /** @todo leave bitmap lock to the bitmap code... */
- mysql_mutex_lock(&share->bitmap.bitmap_lock);
- res= _ma_bitmap_reset_full_page_bits(info, &share->bitmap, start_page,
- page_range);
- mysql_mutex_unlock(&share->bitmap.bitmap_lock);
- if (res)
+ for ( ; page_range-- ; start_page++)
{
- _ma_mark_file_crashed(share);
- DBUG_ASSERT(0);
- DBUG_RETURN(res);
+ if (_ma_redo_not_needed_for_page(sid, redo_lsn, start_page, FALSE))
+ continue;
+ res= _ma_bitmap_reset_full_page_bits(info, &share->bitmap, start_page,
+ 1);
+ if (res)
+ {
+ mysql_mutex_unlock(&share->bitmap.bitmap_lock);
+ _ma_mark_file_crashed(share);
+ DBUG_ASSERT(!maria_assert_if_crashed_table);
+ DBUG_RETURN(res);
+ }
}
}
+ mysql_mutex_unlock(&share->bitmap.bitmap_lock);
DBUG_RETURN(0);
}
@@ -6609,7 +6673,7 @@ uint _ma_apply_redo_free_head_or_tail(MARIA_HA *info, LSN lsn,
err:
_ma_mark_file_crashed(share);
- DBUG_ASSERT(0);
+ DBUG_ASSERT(!maria_assert_if_crashed_table);
DBUG_RETURN(1);
}
@@ -6681,21 +6745,23 @@ uint _ma_apply_redo_insert_row_blobs(MARIA_HA *info,
uint page_range;
pgcache_page_no_t page, start_page;
uchar *buff;
+ uint data_on_page= data_size;
start_page= page= page_korr(header);
header+= PAGE_STORE_SIZE;
page_range= pagerange_korr(header);
header+= PAGERANGE_STORE_SIZE;
- for (i= page_range; i-- > 0 ; page++)
+ for (i= page_range; i-- > 0 ; page++, data+= data_on_page)
{
MARIA_PINNED_PAGE page_link;
enum pagecache_page_lock unlock_method;
enum pagecache_page_pin unpin_method;
- uint length;
set_if_smaller(first_page2, page);
set_if_bigger(last_page2, page);
+ if (i == 0 && sub_ranges == 0)
+ data_on_page= data_size - empty_space; /* data on last page */
if (_ma_redo_not_needed_for_page(sid, redo_lsn, page, FALSE))
continue;
@@ -6758,7 +6824,7 @@ uint _ma_apply_redo_insert_row_blobs(MARIA_HA *info,
PAGECACHE_LOCK_WRITE_UNLOCK,
PAGECACHE_UNPIN, LSN_IMPOSSIBLE,
LSN_IMPOSSIBLE, 0, FALSE);
- continue;
+ goto fix_bitmap;
}
DBUG_ASSERT((found_page_type == (uchar) BLOB_PAGE) ||
(found_page_type == (uchar) UNALLOCATED_PAGE));
@@ -6774,33 +6840,32 @@ uint _ma_apply_redo_insert_row_blobs(MARIA_HA *info,
lsn_store(buff, lsn);
buff[PAGE_TYPE_OFFSET]= BLOB_PAGE;
- length= data_size;
- if (i == 0 && sub_ranges == 0)
+ if (data_on_page != data_size)
{
/*
Last page may be only partly filled. We zero the rest, like
write_full_pages() does.
*/
- length-= empty_space;
bzero(buff + share->block_size - PAGE_SUFFIX_SIZE - empty_space,
empty_space);
}
- memcpy(buff+ PAGE_TYPE_OFFSET + 1, data, length);
- data+= length;
+ memcpy(buff+ PAGE_TYPE_OFFSET + 1, data, data_on_page);
if (pagecache_write(share->pagecache,
&info->dfile, page, 0,
buff, PAGECACHE_PLAIN_PAGE,
unlock_method, unpin_method,
PAGECACHE_WRITE_DELAY, 0, LSN_IMPOSSIBLE))
goto err;
- }
+
+ fix_bitmap:
/** @todo leave bitmap lock to the bitmap code... */
- mysql_mutex_lock(&share->bitmap.bitmap_lock);
- res= _ma_bitmap_set_full_page_bits(info, &share->bitmap, start_page,
- page_range);
- mysql_mutex_unlock(&share->bitmap.bitmap_lock);
- if (res)
- goto err;
+ mysql_mutex_lock(&share->bitmap.bitmap_lock);
+ res= _ma_bitmap_set_full_page_bits(info, &share->bitmap, page,
+ 1);
+ mysql_mutex_unlock(&share->bitmap.bitmap_lock);
+ if (res)
+ goto err;
+ }
}
}
*first_page= first_page2;
@@ -6809,7 +6874,7 @@ uint _ma_apply_redo_insert_row_blobs(MARIA_HA *info,
err:
_ma_mark_file_crashed(share);
- DBUG_ASSERT(0);
+ DBUG_ASSERT(!maria_assert_if_crashed_table);
DBUG_RETURN(1);
}
@@ -6879,6 +6944,7 @@ end:
DBUG_RETURN(res);
err:
+ DBUG_ASSERT(!maria_assert_if_crashed_table);
res= 1;
_ma_mark_file_crashed(share);
goto end;
@@ -7117,6 +7183,7 @@ my_bool _ma_apply_undo_row_delete(MARIA_HA *info, LSN undo_lsn,
DBUG_RETURN(0);
err:
+ DBUG_ASSERT(!maria_assert_if_crashed_table);
_ma_mark_file_crashed(share);
if (info->non_flushable_state)
_ma_bitmap_flushable(info, -1);
@@ -7292,6 +7359,7 @@ end:
DBUG_RETURN(error);
err:
+ DBUG_ASSERT(!maria_assert_if_crashed_table);
error= 1;
_ma_mark_file_crashed(share);
goto end;
diff --git a/storage/maria/ma_blockrec.h b/storage/maria/ma_blockrec.h
index a5858880dd0..45f5613bb60 100644
--- a/storage/maria/ma_blockrec.h
+++ b/storage/maria/ma_blockrec.h
@@ -59,7 +59,6 @@
/* Minimum header size needed for a new row */
#define BASE_ROW_HEADER_SIZE FLAG_SIZE
-#define TRANS_ROW_EXTRA_HEADER_SIZE TRANSID_SIZE
#define PAGE_TYPE_MASK 7
enum en_page_type { UNALLOCATED_PAGE, HEAD_PAGE, TAIL_PAGE, BLOB_PAGE, MAX_PAGE_TYPE };
@@ -78,6 +77,10 @@ enum en_page_type { UNALLOCATED_PAGE, HEAD_PAGE, TAIL_PAGE, BLOB_PAGE, MAX_PAGE_
#define ROW_FLAG_EXTENTS 128
#define ROW_FLAG_ALL (1+2+4+8+128)
+/* Size for buffer to hold information about bitmap */
+#define MAX_BITMAP_INFO_LENGTH ((MARIA_MAX_KEY_BLOCK_LENGTH*8/3)*(61*11/60)+10)
+
+
/******** Variables that affects how data pages are utilized ********/
/* Minium size of tail segment */
@@ -181,7 +184,10 @@ TRANSLOG_ADDRESS
maria_page_get_lsn(uchar *page, pgcache_page_no_t page_no, uchar* data_ptr);
/* ma_bitmap.c */
-my_bool _ma_bitmap_init(MARIA_SHARE *share, File file);
+extern const char *bits_to_txt[];
+
+my_bool _ma_bitmap_init(MARIA_SHARE *share, File file,
+ pgcache_page_no_t *last_page);
my_bool _ma_bitmap_end(MARIA_SHARE *share);
my_bool _ma_bitmap_flush(MARIA_SHARE *share);
my_bool _ma_bitmap_flush_all(MARIA_SHARE *share);
@@ -206,8 +212,7 @@ my_bool _ma_bitmap_find_new_place(MARIA_HA *info, MARIA_ROW *new_row,
MARIA_BITMAP_BLOCKS *result_blocks);
my_bool _ma_check_bitmap_data(MARIA_HA *info,
enum en_page_type page_type,
- pgcache_page_no_t page,
- uint empty_space, uint *bitmap_pattern);
+ uint empty_space, uint bitmap_pattern);
my_bool _ma_check_if_right_bitmap_type(MARIA_HA *info,
enum en_page_type page_type,
pgcache_page_no_t page,
@@ -225,6 +230,10 @@ void _ma_bitmap_set_pagecache_callbacks(PAGECACHE_FILE *file,
void _ma_print_bitmap(MARIA_FILE_BITMAP *bitmap, uchar *data,
pgcache_page_no_t page);
#endif
+void _ma_get_bitmap_description(MARIA_FILE_BITMAP *bitmap,
+ uchar *bitmap_data,
+ pgcache_page_no_t page,
+ char *out);
uint _ma_apply_redo_insert_row_head_or_tail(MARIA_HA *info, LSN lsn,
uint page_type,
@@ -235,7 +244,7 @@ uint _ma_apply_redo_insert_row_head_or_tail(MARIA_HA *info, LSN lsn,
uint _ma_apply_redo_purge_row_head_or_tail(MARIA_HA *info, LSN lsn,
uint page_type,
const uchar *header);
-uint _ma_apply_redo_free_blocks(MARIA_HA *info, LSN lsn,
+uint _ma_apply_redo_free_blocks(MARIA_HA *info, LSN lsn, LSN rec_lsn,
const uchar *header);
uint _ma_apply_redo_free_head_or_tail(MARIA_HA *info, LSN lsn,
const uchar *header);
diff --git a/storage/maria/ma_cache.c b/storage/maria/ma_cache.c
index 36dfe7cbd54..829189baeed 100644
--- a/storage/maria/ma_cache.c
+++ b/storage/maria/ma_cache.c
@@ -35,8 +35,8 @@
#include "maria_def.h"
-my_bool _ma_read_cache(IO_CACHE *info, uchar *buff, my_off_t pos,
- size_t length, uint flag)
+my_bool _ma_read_cache(MARIA_HA *handler, IO_CACHE *info, uchar *buff,
+ my_off_t pos, size_t length, uint flag)
{
size_t read_length,in_buff_length;
my_off_t offset;
@@ -98,7 +98,12 @@ my_bool _ma_read_cache(IO_CACHE *info, uchar *buff, my_off_t pos,
("Error %d reading next-multi-part block (Got %d bytes)",
my_errno, (int) read_length));
if (!my_errno || my_errno == HA_ERR_FILE_TOO_SHORT)
- my_errno= HA_ERR_WRONG_IN_RECORD;
+ {
+ if (!handler->in_check_table)
+ _ma_set_fatal_error(handler->s, HA_ERR_WRONG_IN_RECORD);
+ else
+ my_errno= HA_ERR_WRONG_IN_RECORD;
+ }
DBUG_RETURN(1);
}
bzero(buff+read_length,MARIA_BLOCK_INFO_HEADER_LENGTH - in_buff_length -
diff --git a/storage/maria/ma_check.c b/storage/maria/ma_check.c
index c6cff8ecd68..e183e715a6e 100644
--- a/storage/maria/ma_check.c
+++ b/storage/maria/ma_check.c
@@ -100,6 +100,9 @@ static my_bool _ma_flush_table_files_before_swap(HA_CHECK *param,
static TrID max_trid_in_system(void);
static void _ma_check_print_not_visible_error(HA_CHECK *param, TrID used_trid);
void retry_if_quick(MARIA_SORT_PARAM *param, int error);
+static void print_bitmap_description(MARIA_SHARE *share,
+ pgcache_page_no_t page,
+ uchar *buff);
/* Initialize check param with default values */
@@ -122,6 +125,7 @@ void maria_chk_init(HA_CHECK *param)
param->max_record_length= LONGLONG_MAX;
param->pagecache_block_size= KEY_CACHE_BLOCK_SIZE;
param->stats_method= MI_STATS_METHOD_NULLS_NOT_EQUAL;
+ param->max_stage= 1;
}
@@ -231,14 +235,14 @@ int maria_chk_del(HA_CHECK *param, register MARIA_HA *info,
{
if (test_flag & T_VERBOSE) puts("");
_ma_check_print_error(param,"Can't read delete-link at filepos: %s",
- llstr(next_link,buff));
+ llstr(next_link,buff));
DBUG_RETURN(1);
}
if (*buff != '\0')
{
if (test_flag & T_VERBOSE) puts("");
_ma_check_print_error(param,"Record at pos: %s is not remove-marked",
- llstr(next_link,buff));
+ llstr(next_link,buff));
goto wrong;
}
if (share->options & HA_OPTION_PACK_RECORD)
@@ -247,7 +251,9 @@ int maria_chk_del(HA_CHECK *param, register MARIA_HA *info,
if (empty && prev_link != old_link)
{
if (test_flag & T_VERBOSE) puts("");
- _ma_check_print_error(param,"Deleted block at %s doesn't point back at previous delete link",llstr(next_link,buff2));
+ _ma_check_print_error(param,
+ "Deleted block at %s doesn't point back at previous delete link",
+ llstr(next_link,buff2));
goto wrong;
}
old_link=next_link;
@@ -266,23 +272,23 @@ int maria_chk_del(HA_CHECK *param, register MARIA_HA *info,
if (empty != share->state.state.empty)
{
_ma_check_print_warning(param,
- "Found %s deleted space in delete link chain. Should be %s",
- llstr(empty,buff2),
- llstr(share->state.state.empty,buff));
+ "Found %s deleted space in delete link chain. Should be %s",
+ llstr(empty,buff2),
+ llstr(share->state.state.empty,buff));
}
if (next_link != HA_OFFSET_ERROR)
{
_ma_check_print_error(param,
- "Found more than the expected %s deleted rows in delete link chain",
- llstr(share->state.state.del, buff));
+ "Found more than the expected %s deleted rows in delete link chain",
+ llstr(share->state.state.del, buff));
goto wrong;
}
if (i != 0)
{
_ma_check_print_error(param,
- "Found %s deleted rows in delete link chain. Should be %s",
- llstr(share->state.state.del - i, buff2),
- llstr(share->state.state.del, buff));
+ "Found %s deleted rows in delete link chain. Should be %s",
+ llstr(share->state.state.del - i, buff2),
+ llstr(share->state.state.del, buff));
goto wrong;
}
}
@@ -402,26 +408,34 @@ int maria_chk_size(HA_CHECK *param, register MARIA_HA *info)
size= mysql_file_seek(share->kfile.file, 0L, MY_SEEK_END, MYF(MY_THREADSAFE));
if ((skr=(my_off_t) share->state.state.key_file_length) != size)
{
- /* Don't give error if file generated by mariapack */
+ /* Don't give error if file generated by maria_pack */
if (skr > size && maria_is_any_key_active(share->state.key_map))
{
error=1;
_ma_check_print_error(param,
- "Size of indexfile is: %-8s Should be: %s",
+ "Size of indexfile is: %-8s Expected: %s",
llstr(size,buff), llstr(skr,buff2));
+ share->state.state.key_file_length= size;
}
else if (!(param->testflag & T_VERY_SILENT))
_ma_check_print_warning(param,
- "Size of indexfile is: %-8s Should be: %s",
+ "Size of indexfile is: %-8s Expected: %s",
llstr(size,buff), llstr(skr,buff2));
}
- if (!(param->testflag & T_VERY_SILENT) &&
- ! (share->options & HA_OPTION_COMPRESS_RECORD) &&
- ulonglong2double(share->state.state.key_file_length) >
- ulonglong2double(share->base.margin_key_file_length)*0.9)
+ if (size > share->base.max_key_file_length)
+ {
+ _ma_check_print_warning(param,
+ "Size of indexfile is: %-8s which is bigger than max indexfile size: %s",
+ ullstr(size,buff),
+ ullstr(share->base.max_key_file_length, buff2));
+ }
+ else if (!(param->testflag & T_VERY_SILENT) &&
+ ! (share->options & HA_OPTION_COMPRESS_RECORD) &&
+ ulonglong2double(share->state.state.key_file_length) >
+ ulonglong2double(share->base.margin_key_file_length)*0.9)
_ma_check_print_warning(param,"Keyfile is almost full, %10s of %10s used",
- llstr(share->state.state.key_file_length,buff),
- llstr(share->base.max_key_file_length-1,buff));
+ llstr(share->state.state.key_file_length,buff),
+ llstr(share->base.max_key_file_length,buff));
size= mysql_file_seek(info->dfile.file, 0L, MY_SEEK_END, MYF(0));
skr=(my_off_t) share->state.state.data_file_length;
@@ -434,28 +448,34 @@ int maria_chk_size(HA_CHECK *param, register MARIA_HA *info)
#endif
if (skr != size)
{
+ share->state.state.data_file_length=size; /* Skip other errors */
if (skr > size && skr != size + MEMMAP_EXTRA_MARGIN)
{
- share->state.state.data_file_length=size; /* Skip other errors */
error=1;
- _ma_check_print_error(param,"Size of datafile is: %-9s Should be: %s",
+ _ma_check_print_error(param,"Size of datafile is: %-9s Expected: %s",
llstr(size,buff), llstr(skr,buff2));
param->testflag|=T_RETRY_WITHOUT_QUICK;
}
else
{
_ma_check_print_warning(param,
- "Size of datafile is: %-9s Should be: %s",
- llstr(size,buff), llstr(skr,buff2));
+ "Size of datafile is: %-9s Expected: %s",
+ llstr(size,buff), llstr(skr,buff2));
}
}
- if (!(param->testflag & T_VERY_SILENT) &&
- !(share->options & HA_OPTION_COMPRESS_RECORD) &&
- ulonglong2double(share->state.state.data_file_length) >
- (ulonglong2double(share->base.max_data_file_length)*0.9))
+ if (size > share->base.max_data_file_length)
+ {
+ _ma_check_print_warning(param,
+ "Size of datafile is: %-8s which is bigger than max datafile size: %s",
+ ullstr(size,buff),
+ ullstr(share->base.max_data_file_length, buff2));
+ } else if (!(param->testflag & T_VERY_SILENT) &&
+ !(share->options & HA_OPTION_COMPRESS_RECORD) &&
+ ulonglong2double(share->state.state.data_file_length) >
+ (ulonglong2double(share->base.max_data_file_length)*0.9))
_ma_check_print_warning(param, "Datafile is almost full, %10s of %10s used",
- llstr(share->state.state.data_file_length,buff),
- llstr(share->base.max_data_file_length-1,buff2));
+ llstr(share->state.state.data_file_length,buff),
+ llstr(share->base.max_data_file_length,buff2));
DBUG_RETURN(error);
} /* maria_chk_size */
@@ -511,6 +531,7 @@ int maria_chk_key(HA_CHECK *param, register MARIA_HA *info)
continue;
}
found_keys++;
+ _ma_report_progress(param, key, share->base.keys);
param->record_checksum=init_checksum;
@@ -878,6 +899,7 @@ static int chk_index(HA_CHECK *param, MARIA_HA *info, MARIA_KEYDEF *keyinfo,
}
info->last_key.keyinfo= tmp_key.keyinfo= keyinfo;
+ info->lastinx= ~0; /* Safety */
tmp_key.data= tmp_key_buff;
for ( ;; )
{
@@ -993,10 +1015,12 @@ static int chk_index(HA_CHECK *param, MARIA_HA *info, MARIA_KEYDEF *keyinfo,
/* fall through */
}
if ((share->data_file_type != BLOCK_RECORD &&
+ share->data_file_type != NO_RECORD &&
record >= share->state.state.data_file_length) ||
(share->data_file_type == BLOCK_RECORD &&
ma_recordpos_to_page(record) * share->base.min_block_length >=
- share->state.state.data_file_length))
+ share->state.state.data_file_length) ||
+ (share->data_file_type == NO_RECORD && record != 0))
{
#ifndef DBUG_OFF
char llbuff2[22], llbuff3[22];
@@ -1114,10 +1138,14 @@ static int check_keys_in_record(HA_CHECK *param, MARIA_HA *info, int extend,
param->tmp_record_checksum+= (ha_checksum) start_recpos;
param->records++;
- if (param->testflag & T_WRITE_LOOP && param->records % WRITE_COUNT == 0)
+ if (param->records % WRITE_COUNT == 0)
{
- printf("%s\r", llstr(param->records, llbuff));
- fflush(stdout);
+ if (param->testflag & T_WRITE_LOOP)
+ {
+ printf("%s\r", llstr(param->records, llbuff));
+ fflush(stdout);
+ }
+ _ma_report_progress(param, param->records, share->state.state.records);
}
/* Check if keys match the record */
@@ -1131,6 +1159,7 @@ static int check_keys_in_record(HA_CHECK *param, MARIA_HA *info, int extend,
{
(*keyinfo->make_key)(info, &key, keynr, info->lastkey_buff, record,
start_recpos, 0);
+ info->last_key.keyinfo= key.keyinfo;
if (extend)
{
/* We don't need to lock the key tree here as we don't allow
@@ -1242,7 +1271,7 @@ static int check_dynamic_record(HA_CHECK *param, MARIA_HA *info, int extend,
block_info.next_filepos=pos;
do
{
- if (_ma_read_cache(&param->read_cache, block_info.header,
+ if (_ma_read_cache(info, &param->read_cache, block_info.header,
(start_block=block_info.next_filepos),
sizeof(block_info.header),
(flag ? 0 : READING_NEXT) | READING_HEADER))
@@ -1260,7 +1289,7 @@ static int check_dynamic_record(HA_CHECK *param, MARIA_HA *info, int extend,
llstr(start_block,llbuff));
DBUG_RETURN(1);
}
- b_type= _ma_get_block_info(&block_info,-1,start_block);
+ b_type= _ma_get_block_info(info, &block_info,-1,start_block);
if (b_type & (BLOCK_DELETED | BLOCK_ERROR | BLOCK_SYNC_ERROR |
BLOCK_FATAL_ERROR))
{
@@ -1356,7 +1385,7 @@ static int check_dynamic_record(HA_CHECK *param, MARIA_HA *info, int extend,
got_error=1;
break;
}
- if (_ma_read_cache(&param->read_cache, to, block_info.filepos,
+ if (_ma_read_cache(info, &param->read_cache, to, block_info.filepos,
(uint) block_info.data_len,
flag == 1 ? READING_NEXT : 0))
{
@@ -1459,7 +1488,7 @@ static int check_compressed_record(HA_CHECK *param, MARIA_HA *info, int extend,
if (_ma_killed_ptr(param))
DBUG_RETURN(-1);
- if (_ma_read_cache(&param->read_cache, block_info.header, pos,
+ if (_ma_read_cache(info, &param->read_cache, block_info.header, pos,
share->pack.ref_length, READING_NEXT))
{
_ma_check_print_error(param,
@@ -1484,7 +1513,7 @@ static int check_compressed_record(HA_CHECK *param, MARIA_HA *info, int extend,
got_error=1;
goto end;
}
- if (_ma_read_cache(&param->read_cache, info->rec_buff,
+ if (_ma_read_cache(info, &param->read_cache, info->rec_buff,
block_info.filepos, block_info.rec_len, READING_NEXT))
{
_ma_check_print_error(param,
@@ -1794,7 +1823,7 @@ static int check_block_record(HA_CHECK *param, MARIA_HA *info, int extend,
char llbuff[22], llbuff2[22];
uint block_size= share->block_size;
ha_rows full_page_count, tail_count;
- my_bool full_dir;
+ my_bool full_dir, now_transactional;
uint offset_page, offset, free_count;
LINT_INIT(full_dir);
@@ -1805,6 +1834,10 @@ static int check_block_record(HA_CHECK *param, MARIA_HA *info, int extend,
my_errno);
return 1;
}
+
+ now_transactional= info->s->now_transactional;
+ info->s->now_transactional= 0; /* Don't log changes */
+
bitmap_buff= info->scan.bitmap_buff;
page_buff= info->scan.page_buff;
full_page_count= tail_count= 0;
@@ -1817,13 +1850,15 @@ static int check_block_record(HA_CHECK *param, MARIA_HA *info, int extend,
pos+= block_size, page++)
{
uint row_count, real_row_count, empty_space, page_type, bitmap_pattern;
+ uint bitmap_for_page;
LINT_INIT(row_count);
LINT_INIT(empty_space);
if (_ma_killed_ptr(param))
{
_ma_scan_end_block_record(info);
- return -1;
+ info->s->now_transactional= now_transactional;
+ return -1; /* Interrupted */
}
if ((page % share->bitmap.pages_covered) == 0)
{
@@ -1842,6 +1877,8 @@ static int check_block_record(HA_CHECK *param, MARIA_HA *info, int extend,
}
param->used+= block_size;
param->link_used+= block_size;
+ if (param->verbose > 2)
+ print_bitmap_description(share, page, bitmap_buff);
continue;
}
/* Skip pages marked as empty in bitmap */
@@ -1849,7 +1886,7 @@ static int check_block_record(HA_CHECK *param, MARIA_HA *info, int extend,
offset= offset_page & 7;
data= bitmap_buff + offset_page / 8;
bitmap_pattern= uint2korr(data);
- if (!((bitmap_pattern >> offset) & 7))
+ if (!(bitmap_for_page= ((bitmap_pattern >> offset) & 7)))
{
param->empty+= block_size;
param->del_blocks++;
@@ -1872,8 +1909,9 @@ static int check_block_record(HA_CHECK *param, MARIA_HA *info, int extend,
if (page_type == UNALLOCATED_PAGE || page_type >= MAX_PAGE_TYPE)
{
_ma_check_print_error(param,
- "Page: %9s Found wrong page type %d",
- llstr(page, llbuff), page_type);
+ "Page: %9s Found wrong page type %d. Bitmap: %d '%s'",
+ llstr(page, llbuff), page_type,
+ bitmap_for_page, bits_to_txt[bitmap_for_page]);
if (param->err_count++ > MAXERR || !(param->testflag & T_VERBOSE))
goto err;
continue;
@@ -1920,20 +1958,17 @@ static int check_block_record(HA_CHECK *param, MARIA_HA *info, int extend,
param->used+= block_size;
break;
}
- if (_ma_check_bitmap_data(info, page_type, page,
+ if (_ma_check_bitmap_data(info, page_type,
full_dir ? 0 : empty_space,
- &bitmap_pattern))
+ bitmap_for_page))
{
- if (bitmap_pattern == ~(uint) 0)
- _ma_check_print_error(param,
- "Page %9s: Wrong bitmap for data on page",
- llstr(page, llbuff));
- else
_ma_check_print_error(param,
"Page %9s: Wrong data in bitmap. Page_type: "
- "%d full: %d empty_space: %u Bitmap-bits: %d",
+ "%d full: %d empty_space: %u Bitmap-bits: %d "
+ "'%s'",
llstr(page, llbuff), page_type, full_dir,
- empty_space, bitmap_pattern);
+ empty_space, bitmap_for_page,
+ bits_to_txt[bitmap_for_page]);
if (param->err_count++ > MAXERR || !(param->testflag & T_VERBOSE))
goto err;
}
@@ -1956,14 +1991,22 @@ static int check_block_record(HA_CHECK *param, MARIA_HA *info, int extend,
{
/* Not at end of bitmap */
uint bitmap_pattern;
+ uint byte_offset;
+
offset_page= (uint) ((page % share->bitmap.pages_covered) -1) * 3;
offset= offset_page & 7;
- data= bitmap_buff + offset_page / 8;
+ byte_offset= offset_page / 8;
+ data= bitmap_buff + byte_offset;
bitmap_pattern= uint2korr(data);
+ if (byte_offset + 1 == share->bitmap.max_total_size)
+ {
+ /* On last byte of bitmap; Remove possible checksum */
+ bitmap_pattern&= 0xff;
+ }
if (((bitmap_pattern >> offset)) ||
- (data + 2 < bitmap_buff + share->bitmap.total_size &&
- _ma_check_if_zero(data+2, bitmap_buff + share->bitmap.total_size -
- data - 2)))
+ (byte_offset + 2 < share->bitmap.max_total_size &&
+ _ma_check_if_zero(data+2, share->bitmap.max_total_size -
+ byte_offset - 2)))
{
ulonglong bitmap_page;
bitmap_page= page / share->bitmap.pages_covered;
@@ -1991,10 +2034,12 @@ static int check_block_record(HA_CHECK *param, MARIA_HA *info, int extend,
llstr(param->tail_count, llbuff),
llstr(tail_count, llbuff2));
+ info->s->now_transactional= now_transactional;
return param->error_printed != 0;
err:
_ma_scan_end_block_record(info);
+ info->s->now_transactional= now_transactional;
return 1;
}
@@ -2034,6 +2079,8 @@ int maria_chk_data_link(HA_CHECK *param, MARIA_HA *info, my_bool extend)
bzero((char*) param->tmp_key_crc,
share->base.keys * sizeof(param->tmp_key_crc[0]));
+ info->in_check_table= 1; /* Don't assert on checksum errors */
+
switch (share->data_file_type) {
case BLOCK_RECORD:
error= check_block_record(param, info, extend, record);
@@ -2047,8 +2094,16 @@ int maria_chk_data_link(HA_CHECK *param, MARIA_HA *info, my_bool extend)
case COMPRESSED_RECORD:
error= check_compressed_record(param, info, extend, record);
break;
+ case NO_RECORD:
+ param->records= share->state.state.records;
+ param->record_checksum= 0;
+ extend= 1; /* No row checksums */
+ /* no data, nothing to do */
+ break;
} /* switch */
+ info->in_check_table= 0;
+
if (error)
goto err;
@@ -2065,23 +2120,23 @@ int maria_chk_data_link(HA_CHECK *param, MARIA_HA *info, my_bool extend)
llstr(share->state.state.records,llbuff2));
error=1;
}
- else if (param->record_checksum &&
+ if (param->record_checksum &&
param->record_checksum != param->tmp_record_checksum)
{
_ma_check_print_error(param,
"Key pointers and record positions doesn't match");
error=1;
}
- else if (param->glob_crc != share->state.state.checksum &&
- (share->options &
- (HA_OPTION_CHECKSUM | HA_OPTION_COMPRESS_RECORD)))
+ if (param->glob_crc != share->state.state.checksum &&
+ (share->options &
+ (HA_OPTION_CHECKSUM | HA_OPTION_COMPRESS_RECORD)))
{
_ma_check_print_warning(param,
"Record checksum is not the same as checksum "
"stored in the index file");
error=1;
}
- else if (!extend)
+ if (!extend)
{
uint key;
for (key=0 ; key < share->base.keys; key++)
@@ -2178,12 +2233,17 @@ int maria_chk_data_link(HA_CHECK *param, MARIA_HA *info, my_bool extend)
llstr(param->del_length, llbuff2));
printf("Empty space: %12s Linkdata: %10s\n",
llstr(param->empty, llbuff),llstr(param->link_used, llbuff2));
- if (param->lost)
- printf("Lost space: %12s", llstr(param->lost, llbuff));
- if (param->max_found_trid)
+ if (share->data_file_type == BLOCK_RECORD)
{
- printf("Max trans. id: %11s\n",
- llstr(param->max_found_trid, llbuff));
+ printf("Full pages: %12s Tail count: %12s\n",
+ llstr(param->full_page_count, llbuff),
+ llstr(param->tail_count, llbuff2));
+ printf("Lost space: %12s\n", llstr(param->lost, llbuff));
+ if (param->max_found_trid)
+ {
+ printf("Max trans. id: %11s\n",
+ llstr(param->max_found_trid, llbuff));
+ }
}
}
my_free(record);
@@ -2278,7 +2338,14 @@ static int initialize_variables_for_repair(HA_CHECK *param,
{
MARIA_SHARE *share= info->s;
- /* Ro allow us to restore state and check how state changed */
+ if (share->data_file_type == NO_RECORD)
+ {
+ _ma_check_print_error(param,
+ "Can't repair tables with record type NO_DATA");
+ return 1;
+ }
+
+ /* Allow us to restore state and check how state changed */
memcpy(org_share, share, sizeof(*share));
/* Repair code relies on share->state.state so we have to update it here */
@@ -2315,7 +2382,8 @@ static int initialize_variables_for_repair(HA_CHECK *param,
return 1;
/* calculate max_records */
- sort_info->filelength= mysql_file_seek(info->dfile.file, 0L, MY_SEEK_END, MYF(0));
+ sort_info->filelength= my_seek(info->dfile.file, 0L, MY_SEEK_END, MYF(0));
+ param->max_progress= sort_info->filelength;
if ((param->testflag & T_CREATE_MISSING_KEYS) ||
sort_info->org_data_file_type == COMPRESSED_RECORD)
sort_info->max_records= share->state.state.records;
@@ -2338,6 +2406,8 @@ static int initialize_variables_for_repair(HA_CHECK *param,
maria_ignore_trids(info);
/* Don't write transid's during repair */
maria_versioning(info, 0);
+ /* remember original number of rows */
+ *info->state= info->s->state.state;
return 0;
}
@@ -2536,11 +2606,12 @@ int maria_repair(HA_CHECK *param, register MARIA_HA *info,
if (!rep_quick)
{
/* Get real path for data file */
- if ((new_file= mysql_file_create(key_file_dfile, fn_format(param->temp_filename,
- share->data_file_name.str, "",
- DATA_TMP_EXT, 2+4),
- 0,param->tmpfile_createflag,
- MYF(0))) < 0)
+ if ((new_file= mysql_file_create(key_file_tmp,
+ fn_format(param->temp_filename,
+ share->data_file_name.str, "",
+ DATA_TMP_EXT, 2+4),
+ 0,param->tmpfile_createflag,
+ MYF(0))) < 0)
{
_ma_check_print_error(param,"Can't create new tempfile: '%s'",
param->temp_filename);
@@ -2617,6 +2688,7 @@ int maria_repair(HA_CHECK *param, register MARIA_HA *info,
maria_lock_memory(param); /* Everything is alloced */
+ sort_param.sort_info->info->in_check_table= 1;
/* Re-create all keys, which are set in key_map. */
while (!(error=sort_get_next_record(&sort_param)))
{
@@ -2745,7 +2817,7 @@ int maria_repair(HA_CHECK *param, register MARIA_HA *info,
new_file= -1;
change_data_file_descriptor(info, -1);
if (maria_change_to_newfile(share->data_file_name.str, MARIA_NAME_DEXT,
- DATA_TMP_EXT,
+ DATA_TMP_EXT, param->backup_time,
(param->testflag & T_BACKUP_DATA ?
MYF(MY_REDEL_MAKE_BACKUP): MYF(0)) |
sync_dir) ||
@@ -2785,6 +2857,7 @@ err:
end_io_cache(&sort_info.new_info->rec_cache);
info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED);
sort_info.new_info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED);
+ sort_param.sort_info->info->in_check_table= 0;
/* this below could fail, shouldn't we detect error? */
if (got_error)
{
@@ -2800,7 +2873,7 @@ err:
if (new_file >= 0)
{
mysql_file_close(new_file,MYF(0));
- mysql_file_delete(key_file_dfile, param->temp_filename, MYF(MY_WME));
+ mysql_file_delete(key_file_tmp, param->temp_filename, MYF(MY_WME));
}
maria_mark_crashed_on_repair(info);
}
@@ -3061,7 +3134,7 @@ int maria_sort_index(HA_CHECK *param, register MARIA_HA *info, char *name)
mysql_mutex_unlock(&share->intern_lock);
mysql_file_close(new_file, MYF(MY_WME));
if (maria_change_to_newfile(share->index_file_name.str, MARIA_NAME_IEXT,
- INDEX_TMP_EXT, sync_dir) ||
+ INDEX_TMP_EXT, 0, sync_dir) ||
_ma_open_keyfile(share))
goto err2;
info->lock_type= F_UNLCK; /* Force maria_readinfo to lock */
@@ -3094,7 +3167,7 @@ int maria_sort_index(HA_CHECK *param, register MARIA_HA *info, char *name)
err:
mysql_file_close(new_file, MYF(MY_WME));
err2:
- mysql_file_delete(key_file_dfile, param->temp_filename,MYF(MY_WME));
+ mysql_file_delete(key_file_tmp, param->temp_filename,MYF(MY_WME));
DBUG_RETURN(-1);
} /* maria_sort_index */
@@ -3135,7 +3208,8 @@ static int sort_one_index(HA_CHECK *param, MARIA_HA *info,
key.keyinfo= keyinfo;
if (!(buff= (uchar*) my_alloca((uint) keyinfo->block_length +
- keyinfo->maxlength)))
+ keyinfo->maxlength +
+ MARIA_INDEX_OVERHEAD_SIZE)))
{
_ma_check_print_error(param,"Not enough memory for key block");
DBUG_RETURN(-1);
@@ -3234,6 +3308,7 @@ static my_bool maria_zerofill_index(HA_CHECK *param, MARIA_HA *info,
uint block_size= share->block_size;
my_bool zero_lsn= (share->base.born_transactional &&
!(param->testflag & T_ZEROFILL_KEEP_LSN));
+ int error= 1;
DBUG_ENTER("maria_zerofill_index");
if (!(param->testflag & T_SILENT))
@@ -3258,7 +3333,7 @@ static my_bool maria_zerofill_index(HA_CHECK *param, MARIA_HA *info,
_ma_check_print_error(param,
"Page %9s: Got error %d when reading index file",
llstr(pos, llbuff), my_errno);
- DBUG_RETURN(1);
+ goto end;
}
if (zero_lsn)
bzero(buff, LSN_SIZE);
@@ -3266,7 +3341,7 @@ static my_bool maria_zerofill_index(HA_CHECK *param, MARIA_HA *info,
if (share->base.born_transactional)
{
uint keynr= _ma_get_keynr(share, buff);
- if (keynr != MARIA_DELETE_KEY_NR)
+ if (keynr < share->base.keys)
{
MARIA_PAGE page;
DBUG_ASSERT(keynr < share->base.keys);
@@ -3278,7 +3353,7 @@ static my_bool maria_zerofill_index(HA_CHECK *param, MARIA_HA *info,
"Page %9s: Got error %d when reading index "
"file",
llstr(pos, llbuff), my_errno);
- DBUG_RETURN(1);
+ goto end;
}
}
}
@@ -3292,10 +3367,13 @@ static my_bool maria_zerofill_index(HA_CHECK *param, MARIA_HA *info,
PAGECACHE_UNPIN, LSN_IMPOSSIBLE,
LSN_IMPOSSIBLE, 1, FALSE);
}
+ error= 0; /* ok */
+
+end:
if (flush_pagecache_blocks(share->pagecache, &share->kfile,
FLUSH_FORCE_WRITE))
DBUG_RETURN(1);
- DBUG_RETURN(0);
+ DBUG_RETURN(error);
}
@@ -3458,7 +3536,7 @@ int maria_zerofill(HA_CHECK *param, MARIA_HA *info, const char *name)
_ma_tmp_disable_logging_for_table(info, 0);
if (!(error= (maria_zerofill_index(param, info, name) ||
maria_zerofill_data(param, info, name) ||
- _ma_set_uuid(info, 0))))
+ _ma_set_uuid(info->s, 0))))
{
/*
Mark that we have done zerofill of data and index. If we zeroed pages'
@@ -3494,20 +3572,15 @@ int maria_zerofill(HA_CHECK *param, MARIA_HA *info, const char *name)
*/
int maria_change_to_newfile(const char * filename, const char * old_ext,
- const char * new_ext, myf MyFlags)
+ const char * new_ext, time_t backup_time,
+ myf MyFlags)
{
char old_filename[FN_REFLEN],new_filename[FN_REFLEN];
-#ifdef USE_RAID
- if (raid_chunks)
- return my_raid_redel(fn_format(old_filename,filename,"",old_ext,2+4),
- fn_format(new_filename,filename,"",new_ext,2+4),
- raid_chunks,
- MYF(MY_WME | MY_LINK_WARNING | MyFlags));
-#endif
/* Get real path to filename */
(void) fn_format(old_filename,filename,"",old_ext,2+4+32);
return my_redel(old_filename,
fn_format(new_filename,old_filename,"",new_ext,2+4),
+ backup_time,
MYF(MY_WME | MY_LINK_WARNING | MyFlags));
} /* maria_change_to_newfile */
@@ -3569,7 +3642,7 @@ int maria_repair_by_sort(HA_CHECK *param, register MARIA_HA *info,
const char * name, my_bool rep_quick)
{
int got_error;
- uint i;
+ uint i, keys_to_repair;
ha_rows start_records;
my_off_t new_header_length, org_header_length, del;
File new_file;
@@ -3610,11 +3683,12 @@ int maria_repair_by_sort(HA_CHECK *param, register MARIA_HA *info,
if (!rep_quick)
{
/* Get real path for data file */
- if ((new_file=mysql_file_create(key_file_dfile, fn_format(param->temp_filename,
- share->data_file_name.str, "",
- DATA_TMP_EXT, 2+4),
- 0,param->tmpfile_createflag,
- MYF(0))) < 0)
+ if ((new_file=mysql_file_create(key_file_tmp,
+ fn_format(param->temp_filename,
+ share->data_file_name.str, "",
+ DATA_TMP_EXT, 2+4),
+ 0,param->tmpfile_createflag,
+ MYF(0))) < 0)
{
_ma_check_print_error(param,"Can't create new tempfile: '%s'",
param->temp_filename);
@@ -3695,6 +3769,17 @@ int maria_repair_by_sort(HA_CHECK *param, register MARIA_HA *info,
del=share->state.state.del;
+ /* Calculate number of keys to repair */
+ keys_to_repair= 0;
+ for (sort_param.key=0 ; sort_param.key < share->base.keys ;
+ sort_param.key++)
+ {
+ if (maria_is_key_active(key_map, sort_param.key))
+ keys_to_repair++;
+ }
+ /* For each key we scan and merge sort the keys */
+ param->max_stage= keys_to_repair*2;
+
rec_per_key_part= param->new_rec_per_key_part;
for (sort_param.key=0 ; sort_param.key < share->base.keys ;
rec_per_key_part+=sort_param.keyinfo->keysegs, sort_param.key++)
@@ -3815,6 +3900,9 @@ int maria_repair_by_sort(HA_CHECK *param, register MARIA_HA *info,
/* Set for next loop */
sort_info.max_records= (ha_rows) sort_info.new_info->s->state.state.records;
+ param->stage++; /* Next stage */
+ param->progress= 0;
+
if (param->testflag & T_STATISTICS)
maria_update_key_parts(sort_param.keyinfo, rec_per_key_part,
sort_param.unique,
@@ -3842,11 +3930,13 @@ int maria_repair_by_sort(HA_CHECK *param, register MARIA_HA *info,
if (param->testflag & T_SAFE_REPAIR)
{
/* Don't repair if we loosed more than one row */
- if (share->state.state.records+1 < start_records)
+ if (sort_info.new_info->s->state.state.records+1 < start_records)
{
_ma_check_print_error(param,
- "Rows lost; Aborting because safe repair was "
- "requested");
+ "Rows lost (Found %lu of %lu); Aborting "
+ "because safe repair was requested",
+ (ulong) share->state.state.records,
+ (ulong) start_records);
share->state.state.records=start_records;
goto err;
}
@@ -3877,7 +3967,7 @@ int maria_repair_by_sort(HA_CHECK *param, register MARIA_HA *info,
}
change_data_file_descriptor(info, -1);
if (maria_change_to_newfile(share->data_file_name.str, MARIA_NAME_DEXT,
- DATA_TMP_EXT,
+ DATA_TMP_EXT, param->backup_time,
(param->testflag & T_BACKUP_DATA ?
MYF(MY_REDEL_MAKE_BACKUP): MYF(0)) |
sync_dir) ||
@@ -3893,6 +3983,10 @@ int maria_repair_by_sort(HA_CHECK *param, register MARIA_HA *info,
sort_info.org_data_file_type= share->data_file_type;
sort_info.filelength= share->state.state.data_file_length;
sort_param.fix_datafile=0;
+
+ /* Offsets are now in proportion to the new file length */
+ param->max_progress= sort_info.filelength;
+
}
else
share->state.state.data_file_length=sort_param.max_pos;
@@ -3981,7 +4075,7 @@ err:
if (new_file >= 0)
{
mysql_file_close(new_file, MYF(0));
- mysql_file_delete(key_file_dfile, param->temp_filename, MYF(MY_WME));
+ mysql_file_delete(key_file_tmp, param->temp_filename, MYF(MY_WME));
}
maria_mark_crashed_on_repair(info);
}
@@ -4166,12 +4260,13 @@ int maria_repair_parallel(HA_CHECK *param, register MARIA_HA *info,
if (!rep_quick)
{
/* Get real path for data file */
- if ((new_file= mysql_file_create(key_file_dfile, fn_format(param->temp_filename,
- share->data_file_name.str, "",
- DATA_TMP_EXT,
- 2+4),
- 0,param->tmpfile_createflag,
- MYF(0))) < 0)
+ if ((new_file= mysql_file_create(key_file_tmp,
+ fn_format(param->temp_filename,
+ share->data_file_name.str, "",
+ DATA_TMP_EXT,
+ 2+4),
+ 0,param->tmpfile_createflag,
+ MYF(0))) < 0)
{
_ma_check_print_error(param,"Can't create new tempfile: '%s'",
param->temp_filename);
@@ -4403,8 +4498,13 @@ int maria_repair_parallel(HA_CHECK *param, register MARIA_HA *info,
if (param->testflag & T_SAFE_REPAIR)
{
/* Don't repair if we loosed more than one row */
- if (share->state.state.records+1 < start_records)
+ if (sort_info.new_info->s->state.state.records+1 < start_records)
{
+ _ma_check_print_error(param,
+ "Rows lost (Found %lu of %lu); Aborting "
+ "because safe repair was requested",
+ (ulong) share->state.state.records,
+ (ulong) start_records);
share->state.state.records=start_records;
goto err;
}
@@ -4500,7 +4600,7 @@ err:
mysql_file_close(new_file,MYF(0));
info->dfile.file= new_file= -1;
if (maria_change_to_newfile(share->data_file_name.str, MARIA_NAME_DEXT,
- DATA_TMP_EXT,
+ DATA_TMP_EXT, param->backup_time,
MYF((param->testflag & T_BACKUP_DATA ?
MY_REDEL_MAKE_BACKUP : 0) |
sync_dir)) ||
@@ -4516,7 +4616,7 @@ err:
if (new_file >= 0)
{
mysql_file_close(new_file,MYF(0));
- mysql_file_delete(key_file_dfile, param->temp_filename, MYF(MY_WME));
+ mysql_file_delete(key_file_tmp, param->temp_filename, MYF(MY_WME));
if (info->dfile.file == new_file)
info->dfile.file= -1;
}
@@ -4685,6 +4785,11 @@ static int sort_get_next_record(MARIA_SORT_PARAM *sort_param)
if (_ma_killed_ptr(param))
DBUG_RETURN(1);
+ if (param->progress_counter++ >= WRITE_COUNT)
+ {
+ param->progress_counter= 0;
+ _ma_report_progress(param, param->progress, param->max_progress);
+ }
switch (sort_info->org_data_file_type) {
case BLOCK_RECORD:
@@ -4725,6 +4830,9 @@ static int sort_get_next_record(MARIA_SORT_PARAM *sort_param)
flag= HA_ERR_ROW_NOT_VISIBLE;
}
}
+ param->progress= (ma_recordpos_to_page(info->cur_row.lastpos)*
+ share->block_size);
+
share->page_type= save_page_type;
if (!flag)
{
@@ -4757,7 +4865,7 @@ static int sort_get_next_record(MARIA_SORT_PARAM *sort_param)
DBUG_RETURN(-1);
}
/* Retry only if wrong record, not if disk error */
- if (flag != HA_ERR_WRONG_IN_RECORD)
+ if (flag != HA_ERR_WRONG_IN_RECORD && flag != HA_ERR_WRONG_CRC)
{
retry_if_quick(sort_param, flag);
DBUG_RETURN(flag);
@@ -4777,6 +4885,7 @@ static int sort_get_next_record(MARIA_SORT_PARAM *sort_param)
DBUG_RETURN(-1);
}
sort_param->start_recpos=sort_param->pos;
+ param->progress= sort_param->pos;
if (!sort_param->fix_datafile)
{
sort_param->current_filepos= sort_param->pos;
@@ -4804,6 +4913,7 @@ static int sort_get_next_record(MARIA_SORT_PARAM *sort_param)
LINT_INIT(to);
pos=sort_param->pos;
+ param->progress= pos;
searching=(sort_param->fix_datafile && (param->testflag & T_EXTEND));
parallel_flag= (sort_param->read_cache.file < 0) ? READING_NEXT : 0;
for (;;)
@@ -4832,7 +4942,7 @@ static int sort_get_next_record(MARIA_SORT_PARAM *sort_param)
_ma_check_print_info(param,"Block: %s used by record at %s",
llstr(param->search_after_block,llbuff),
llstr(sort_param->start_recpos,llbuff2));
- if (_ma_read_cache(&sort_param->read_cache,
+ if (_ma_read_cache(info, &sort_param->read_cache,
block_info.header, pos,
MARIA_BLOCK_INFO_HEADER_LENGTH,
(! found_record ? READING_NEXT : 0) |
@@ -4854,7 +4964,7 @@ static int sort_get_next_record(MARIA_SORT_PARAM *sort_param)
param->testflag|=T_RETRY_WITHOUT_QUICK;
DBUG_RETURN(1); /* Something wrong with data */
}
- b_type= _ma_get_block_info(&block_info,-1,pos);
+ b_type= _ma_get_block_info(info, &block_info,-1,pos);
if ((b_type & (BLOCK_ERROR | BLOCK_FATAL_ERROR)) ||
((b_type & BLOCK_FIRST) &&
(block_info.rec_len < (uint) share->base.min_pack_length ||
@@ -5045,7 +5155,7 @@ static int sort_get_next_record(MARIA_SORT_PARAM *sort_param)
}
}
if (block_info.data_len &&
- _ma_read_cache(&sort_param->read_cache,to,block_info.filepos,
+ _ma_read_cache(info, &sort_param->read_cache,to,block_info.filepos,
block_info.data_len,
(found_record == 1 ? READING_NEXT : 0) |
parallel_flag))
@@ -5113,9 +5223,10 @@ static int sort_get_next_record(MARIA_SORT_PARAM *sort_param)
}
}
case COMPRESSED_RECORD:
+ param->progress= sort_param->pos;
for (searching=0 ;; searching=1, sort_param->pos++)
{
- if (_ma_read_cache(&sort_param->read_cache, block_info.header,
+ if (_ma_read_cache(info, &sort_param->read_cache, block_info.header,
sort_param->pos,
share->pack.ref_length,READING_NEXT))
DBUG_RETURN(-1);
@@ -5147,7 +5258,7 @@ static int sort_get_next_record(MARIA_SORT_PARAM *sort_param)
llstr(sort_param->pos,llbuff));
continue;
}
- if (_ma_read_cache(&sort_param->read_cache, sort_param->rec_buff,
+ if (_ma_read_cache(info, &sort_param->read_cache, sort_param->rec_buff,
block_info.filepos, block_info.rec_len,
READING_NEXT))
{
@@ -5187,8 +5298,10 @@ static int sort_get_next_record(MARIA_SORT_PARAM *sort_param)
}
DBUG_RETURN(0);
}
+ case NO_RECORD:
+ DBUG_RETURN(1); /* Impossible */
}
- DBUG_RETURN(1); /* Impossible */
+ DBUG_RETURN(1); /* Impossible */
}
@@ -5231,7 +5344,10 @@ int _ma_sort_write_record(MARIA_SORT_PARAM *sort_param)
if ((sort_param->current_filepos=
(*share->write_record_init)(info, sort_param->record)) ==
HA_OFFSET_ERROR)
+ {
+ _ma_check_print_error(param, "%d when writing to datafile", my_errno);
DBUG_RETURN(1);
+ }
/* Pointer to end of file */
sort_param->filepos= share->state.state.data_file_length;
break;
@@ -5308,6 +5424,8 @@ int _ma_sort_write_record(MARIA_SORT_PARAM *sort_param)
sort_param->filepos+=reclength+length;
share->state.split++;
break;
+ case NO_RECORD:
+ DBUG_RETURN(1); /* Impossible */
}
}
if (sort_param->master)
@@ -5851,6 +5969,9 @@ int maria_recreate_table(HA_CHECK *param, MARIA_HA **org_info, char *filename)
MARIA_CREATE_INFO create_info;
DBUG_ENTER("maria_recreate_table");
+ if ((!(param->testflag & T_SILENT)))
+ printf("Recreating table '%s'\n", param->isam_file_name);
+
error=1; /* Default error */
info= **org_info;
status_info= (*org_info)->state[0];
@@ -5996,7 +6117,7 @@ int maria_recreate_table(HA_CHECK *param, MARIA_HA **org_info, char *filename)
(*org_info)->s->state.state.records= info.state->records;
if (share.state.create_time)
(*org_info)->s->state.create_time=share.state.create_time;
-#ifdef EXTERNAL_LOCKING
+#ifdef MARIA_EXTERNAL_LOCKING
(*org_info)->s->state.unique= (*org_info)->this_unique= share.state.unique;
#endif
(*org_info)->s->state.state.checksum= info.state->checksum;
@@ -6051,6 +6172,7 @@ int maria_update_state_info(HA_CHECK *param, MARIA_HA *info,uint update)
{
share->state.open_count=0;
share->global_changed=0;
+ share->changed= 1;
}
if (update & UPDATE_STAT)
{
@@ -6078,7 +6200,6 @@ int maria_update_state_info(HA_CHECK *param, MARIA_HA *info,uint update)
MA_STATE_INFO_WRITE_DONT_MOVE_OFFSET |
MA_STATE_INFO_WRITE_FULL_INFO))
goto err;
- share->changed=0;
}
{ /* Force update of status */
int error;
@@ -6447,6 +6568,9 @@ static void change_data_file_descriptor(MARIA_HA *info, File new_file)
static void unuse_data_file_descriptor(MARIA_HA *info)
{
+ (void) flush_pagecache_blocks(info->s->pagecache,
+ &info->s->bitmap.file,
+ FLUSH_IGNORE_CHANGED);
info->dfile.file= info->s->bitmap.file.file= -1;
_ma_bitmap_reset_cache(info->s);
}
@@ -6473,6 +6597,17 @@ static void copy_data_file_state(MARIA_STATE_INFO *to,
}
+/* Return 1 if block is full of zero's */
+
+static my_bool zero_filled_block(uchar *tmp, uint length)
+{
+ while (length--)
+ if (*(tmp++) != 0)
+ return 0;
+ return 1;
+}
+
+
/*
Read 'safely' next record while scanning table.
@@ -6574,9 +6709,21 @@ read_next_page:
{
if (my_errno == HA_ERR_WRONG_CRC)
{
- _ma_check_print_info(sort_info->param,
- "Wrong CRC on datapage at %s",
- llstr(page, llbuff));
+ /*
+ Don't give errors for zero filled blocks. These can
+ sometimes be found at end of a bitmap when we wrote a big
+ record last that was moved to the next bitmap.
+ */
+ if (!zero_filled_block(info->scan.page_buff, share->block_size) ||
+ _ma_check_bitmap_data(info, UNALLOCATED_PAGE, 0,
+ _ma_bitmap_get_page_bits(info,
+ &share->bitmap,
+ page)))
+ {
+ _ma_check_print_info(sort_info->param,
+ "Wrong CRC on datapage at %s",
+ llstr(page, llbuff));
+ }
continue;
}
DBUG_RETURN(my_errno);
@@ -6802,3 +6949,17 @@ void retry_if_quick(MARIA_SORT_PARAM *sort_param, int error)
param->testflag|=T_RETRY_WITHOUT_QUICK;
}
}
+
+/* Print information about bitmap page */
+
+static void print_bitmap_description(MARIA_SHARE *share,
+ pgcache_page_no_t page,
+ uchar *bitmap_data)
+{
+ char *tmp= my_malloc(MAX_BITMAP_INFO_LENGTH, MYF(MY_WME));
+ if (!tmp)
+ return;
+ _ma_get_bitmap_description(&share->bitmap, bitmap_data, page, tmp);
+ printf("Bitmap page %lu\n%s", (ulong) page, tmp);
+ my_free(tmp);
+}
diff --git a/storage/maria/ma_check_standalone.h b/storage/maria/ma_check_standalone.h
index d692b2de94c..3ac8cdb5e38 100644
--- a/storage/maria/ma_check_standalone.h
+++ b/storage/maria/ma_check_standalone.h
@@ -45,6 +45,13 @@ int _ma_killed_ptr(HA_CHECK *param __attribute__((unused)))
return 0;
}
+
+void _ma_report_progress(HA_CHECK *param __attribute__((unused)),
+ ulonglong progress __attribute__((unused)),
+ ulonglong max_progress __attribute__((unused)))
+{
+}
+
/* print warnings and errors */
/* VARARGS */
diff --git a/storage/maria/ma_checkpoint.c b/storage/maria/ma_checkpoint.c
index 6ced2976c29..71f03f4db16 100644
--- a/storage/maria/ma_checkpoint.c
+++ b/storage/maria/ma_checkpoint.c
@@ -130,6 +130,9 @@ int ma_checkpoint_execute(CHECKPOINT_LEVEL level, my_bool no_wait)
/* from then on, we are sure to be and stay the only checkpointer */
result= really_execute_checkpoint();
+ DBUG_EXECUTE_IF("maria_crash_after_checkpoint",
+ { DBUG_PRINT("maria_crash", ("now")); DBUG_ABORT(); });
+
mysql_cond_broadcast(&COND_checkpoint);
end:
DBUG_RETURN(result);
@@ -531,10 +534,12 @@ filter_flush_file_evenly(enum pagecache_page_type type,
risk could be that while a checkpoint happens no LRD flushing happens.
*/
+static uint maria_checkpoint_min_activity= 2*1024*1024;
+
+
pthread_handler_t ma_checkpoint_background(void *arg)
{
/** @brief At least this of log/page bytes written between checkpoints */
- const uint checkpoint_min_activity= 2*1024*1024;
/*
If the interval could be changed by the user while we are in this thread,
it could be annoying: for example it could cause "case 2" to be executed
@@ -574,6 +579,12 @@ pthread_handler_t ma_checkpoint_background(void *arg)
switch (sleeps % interval)
{
case 0:
+ /* If checkpoints are disabled, wait 1 second and try again */
+ if (maria_checkpoint_disabled)
+ {
+ sleep_time= 1;
+ break;
+ }
/*
With background flushing evenly distributed over the time
between two checkpoints, we should have only little flushing to do
@@ -586,12 +597,13 @@ pthread_handler_t ma_checkpoint_background(void *arg)
would decrease the amount of read pages in recovery).
In case of one short statement per minute (very low load), we don't
want to checkpoint every minute, hence the positive
- checkpoint_min_activity.
+ maria_checkpoint_min_activity.
*/
+
if (((translog_get_horizon() - log_horizon_at_last_checkpoint) +
(maria_pagecache->global_cache_write -
pagecache_flushes_at_last_checkpoint) *
- maria_pagecache->block_size) < checkpoint_min_activity)
+ maria_pagecache->block_size) < maria_checkpoint_min_activity)
{
/* don't take checkpoint, so don't know what to flush */
pages_to_flush_before_next_checkpoint= 0;
@@ -1009,17 +1021,25 @@ static int collect_tables(LEX_STRING *str, LSN checkpoint_start_log_horizon)
possible that Recovery does not start from before the REDO and thus
the state is not recovered. A solution may be to set
share->changed=1 under log mutex when writing log records.
- But as anyway we have another problem below, this optimization would
- be of little use.
+
+ The current solution is to keep a copy the last saved state and
+ not write the state if it was same as last time. It's ok if
+ is_of_horizon would be different on disk if all other data is
+ the same.
*/
- /** @todo flush state only if changed since last checkpoint */
DBUG_ASSERT(share->last_version != 0);
state_copy->state.is_of_horizon= share->state.is_of_horizon=
- state_copies_horizon;
- if (kfile.file >= 0)
+ share->checkpoint_state.is_of_horizon= state_copies_horizon;
+ if (kfile.file >= 0 && memcmp(&share->checkpoint_state,
+ &state_copy->state,
+ sizeof(state_copy->state)))
+ {
sync_error|=
_ma_state_info_write_sub(kfile.file, &state_copy->state,
MA_STATE_INFO_WRITE_DONT_MOVE_OFFSET);
+ memcpy(&share->checkpoint_state,
+ &state_copy->state, sizeof(state_copy->state));
+ }
/*
We don't set share->changed=0 because it may interfere with a
concurrent _ma_writeinfo() doing share->changed=1 (cancel its
@@ -1028,6 +1048,14 @@ static int collect_tables(LEX_STRING *str, LSN checkpoint_start_log_horizon)
*/
}
}
+#ifdef EXTRA_DEBUG_BITMAP
+ else
+ {
+ DBUG_ASSERT(share->bitmap.changed == 0 &&
+ share->bitmap.changed_not_flushed == 0);
+ }
+#endif
+
/*
_ma_bitmap_flush_all() may wait, so don't keep intern_lock as
otherwise this would deadlock with allocate_and_write_block_record()
diff --git a/storage/maria/ma_close.c b/storage/maria/ma_close.c
index 88d63252693..a29fe607d6e 100644
--- a/storage/maria/ma_close.c
+++ b/storage/maria/ma_close.c
@@ -28,7 +28,8 @@ int maria_close(register MARIA_HA *info)
my_bool share_can_be_freed= FALSE;
MARIA_SHARE *share= info->s;
DBUG_ENTER("maria_close");
- DBUG_PRINT("enter",("base: 0x%lx reopen: %u locks: %u",
+ DBUG_PRINT("enter",("name: '%s' base: 0x%lx reopen: %u locks: %u",
+ share->open_file_name.str,
(long) info, (uint) share->reopen,
(uint) share->tot_locks));
@@ -39,9 +40,6 @@ int maria_close(register MARIA_HA *info)
if (info->lock_type == F_EXTRA_LCK)
info->lock_type=F_UNLCK; /* HA_EXTRA_NO_USER_CHANGE */
- if (share->reopen == 1 && share->kfile.file >= 0)
- _ma_decrement_open_count(info);
-
if (info->lock_type != F_UNLCK)
{
if (maria_lock_database(info,F_UNLCK))
@@ -76,6 +74,11 @@ int maria_close(register MARIA_HA *info)
if (share->kfile.file >= 0)
{
+ my_bool save_global_changed= share->global_changed;
+
+ /* Avoid _ma_mark_file_changed() when flushing pages */
+ share->global_changed= 1;
+
if ((*share->once_end)(share))
error= my_errno;
if (flush_pagecache_blocks(share->pagecache, &share->kfile,
@@ -97,6 +100,16 @@ int maria_close(register MARIA_HA *info)
if (((share->changed && share->base.born_transactional) ||
maria_is_crashed(info)))
{
+ if (save_global_changed)
+ {
+ /*
+ Reset effect of _ma_mark_file_changed(). Better to do it
+ here than in _ma_decrement_open_count(), as
+ _ma_state_info_write() will write the open_count.
+ */
+ save_global_changed= 0;
+ share->state.open_count--;
+ }
/*
State must be written to file as it was not done at table's
unlocking.
@@ -104,6 +117,19 @@ int maria_close(register MARIA_HA *info)
if (_ma_state_info_write(share, MA_STATE_INFO_WRITE_DONT_MOVE_OFFSET))
error= my_errno;
}
+ DBUG_ASSERT(maria_is_crashed(info) || !share->base.born_transactional ||
+ share->state.open_count == 0 ||
+ share->open_count_not_zero_on_open);
+
+ /* Ensure that open_count is zero on close */
+ share->global_changed= save_global_changed;
+ _ma_decrement_open_count(info, 0);
+
+ /* Ensure that open_count really is zero */
+ DBUG_ASSERT(maria_is_crashed(info) || share->temporary ||
+ share->state.open_count == 0 ||
+ share->open_count_not_zero_on_open);
+
/*
File must be synced as it is going out of the maria_open_list and so
becoming unknown to future Checkpoints.
diff --git a/storage/maria/ma_create.c b/storage/maria/ma_create.c
index 467c977da07..c89700be29a 100644
--- a/storage/maria/ma_create.c
+++ b/storage/maria/ma_create.c
@@ -204,7 +204,8 @@ int maria_create(const char *name, enum data_file_type datafile_type,
pack_reclength++;
not_block_record_extra_length++;
max_field_lengths++;
- packed++;
+ if (datafile_type != DYNAMIC_RECORD)
+ packed++;
column->fill_length= 1;
options|= HA_OPTION_NULL_FIELDS; /* Use ma_checksum() */
@@ -250,10 +251,16 @@ int maria_create(const char *name, enum data_file_type datafile_type,
datafile_type= BLOCK_RECORD;
}
+ if (datafile_type == NO_RECORD && uniques)
+ {
+ /* Can't do unique without data, revert to block records */
+ datafile_type= BLOCK_RECORD;
+ }
+
if (datafile_type == DYNAMIC_RECORD)
options|= HA_OPTION_PACK_RECORD; /* Must use packed records */
- if (datafile_type == STATIC_RECORD)
+ if (datafile_type == STATIC_RECORD || datafile_type == NO_RECORD)
{
/* We can't use checksum with static length rows */
flags&= ~HA_CREATE_CHECKSUM;
@@ -319,7 +326,15 @@ int maria_create(const char *name, enum data_file_type datafile_type,
(~(ulonglong) 0)/ci->max_rows < (ulonglong) pack_reclength)
ci->data_file_length= ~(ulonglong) 0;
else
- ci->data_file_length=(ulonglong) ci->max_rows*pack_reclength;
+ {
+ ci->data_file_length= _ma_safe_mul(ci->max_rows, pack_reclength);
+ if (datafile_type == BLOCK_RECORD)
+ {
+ /* Assume that blocks are only half full (very pessimistic!) */
+ ci->data_file_length= _ma_safe_mul(ci->data_file_length, 2);
+ set_if_bigger(ci->data_file_length, maria_block_size*2);
+ }
+ }
}
else if (!ci->max_rows)
{
@@ -331,7 +346,7 @@ int maria_create(const char *name, enum data_file_type datafile_type,
ulonglong data_file_length= ci->data_file_length;
if (!data_file_length)
data_file_length= ((((ulonglong) 1 << ((BLOCK_RECORD_POINTER_SIZE-1) *
- 8)) -1) * maria_block_size);
+ 8))/2 -1) * maria_block_size);
if (rows_per_page > 0)
{
set_if_smaller(rows_per_page, MAX_ROWS_PER_PAGE);
@@ -353,11 +368,11 @@ int maria_create(const char *name, enum data_file_type datafile_type,
{
/*
The + 1 is for record position withing page
- The / 2 is because we need one bit for knowing if there is transid's
+ The * 2 is because we need one bit for knowing if there is transid's
after the row pointer
*/
pointer= maria_get_pointer_length((ci->data_file_length /
- (maria_block_size * 2)), 3) + 1;
+ maria_block_size) * 2, 3) + 1;
set_if_smaller(pointer, BLOCK_RECORD_POINTER_SIZE);
if (!max_rows)
@@ -366,7 +381,9 @@ int maria_create(const char *name, enum data_file_type datafile_type,
}
else
{
- if (datafile_type != STATIC_RECORD)
+ if (datafile_type == NO_RECORD)
+ pointer= 0;
+ else if (datafile_type != STATIC_RECORD)
pointer= maria_get_pointer_length(ci->data_file_length,
maria_data_pointer_size);
else
@@ -676,7 +693,7 @@ int maria_create(const char *name, enum data_file_type datafile_type,
share.state.dellink = HA_OFFSET_ERROR;
share.state.first_bitmap_with_space= 0;
-#ifdef EXTERNAL_LOCKING
+#ifdef MARIA_EXTERNAL_LOCKING
share.state.process= (ulong) getpid();
#endif
share.state.version= (ulong) time((time_t*) 0);
@@ -1392,7 +1409,13 @@ int _ma_update_state_lsns_sub(MARIA_SHARE *share, LSN lsn, TrID create_trid,
share->state.skip_redo_lsn= share->state.is_of_horizon= lsn;
share->state.create_trid= create_trid;
mi_int8store(trid_buff, create_trid);
- if (update_create_rename_lsn)
+
+ /*
+ Update create_rename_lsn if update was requested or if the old one had an
+ impossible value.
+ */
+ if (update_create_rename_lsn ||
+ (share->state.create_rename_lsn > lsn && lsn != LSN_IMPOSSIBLE))
{
share->state.create_rename_lsn= lsn;
if (share->id != 0)
diff --git a/storage/maria/ma_delete.c b/storage/maria/ma_delete.c
index 22ffb05af38..fa2ee166b7c 100644
--- a/storage/maria/ma_delete.c
+++ b/storage/maria/ma_delete.c
@@ -63,7 +63,7 @@ int maria_delete(MARIA_HA *info,const uchar *record)
if ((*share->compare_record)(info,record))
goto err; /* Error on read-check */
- if (_ma_mark_file_changed(info))
+ if (_ma_mark_file_changed(share))
goto err;
/* Ensure we don't change the autoincrement value */
@@ -134,17 +134,12 @@ err:
save_errno= HA_ERR_INTERNAL_ERROR; /* Should never happen */
mi_sizestore(lastpos, info->cur_row.lastpos);
- if (save_errno != HA_ERR_RECORD_CHANGED)
- {
- maria_print_error(share, HA_ERR_CRASHED);
- maria_mark_crashed(info); /* mark table crashed */
- }
- _ma_writeinfo(info, WRITEINFO_UPDATE_KEYFILE);
+ (void) _ma_writeinfo(info,WRITEINFO_UPDATE_KEYFILE);
info->update|=HA_STATE_WRITTEN; /* Buffer changed */
- if (save_errno == HA_ERR_KEY_NOT_FOUND)
+ if (save_errno != HA_ERR_RECORD_CHANGED)
{
- maria_print_error(share, HA_ERR_CRASHED);
- my_errno=HA_ERR_CRASHED;
+ _ma_set_fatal_error(share, HA_ERR_CRASHED);
+ save_errno= HA_ERR_CRASHED;
}
DBUG_RETURN(my_errno= save_errno);
} /* maria_delete */
@@ -209,7 +204,7 @@ my_bool _ma_ck_real_delete(register MARIA_HA *info, MARIA_KEY *key,
if ((old_root=*root) == HA_OFFSET_ERROR)
{
- my_errno=HA_ERR_CRASHED;
+ _ma_set_fatal_error(info->s, HA_ERR_CRASHED);
DBUG_RETURN(1);
}
if (!(root_buff= (uchar*) my_alloca((uint) keyinfo->block_length+
@@ -344,7 +339,7 @@ static int d_search(MARIA_HA *info, MARIA_KEY *key, uint32 comp_flag,
if (!(tmp_key_length=(*keyinfo->get_key)(&tmp_key, page_flag, nod_flag,
&kpos)))
{
- my_errno= HA_ERR_CRASHED;
+ _ma_set_fatal_error(share, HA_ERR_CRASHED);
DBUG_RETURN(-1);
}
root= _ma_row_pos_from_key(&tmp_key);
@@ -406,8 +401,9 @@ static int d_search(MARIA_HA *info, MARIA_KEY *key, uint32 comp_flag,
{
if (!nod_flag)
{
+ /* This should newer happend */
DBUG_PRINT("error",("Didn't find key"));
- my_errno=HA_ERR_CRASHED; /* This should newer happend */
+ _ma_set_fatal_error(share, HA_ERR_CRASHED);
goto err;
}
save_flag=0;
@@ -571,6 +567,7 @@ static int del(MARIA_HA *info, MARIA_KEY *key,
endpos= leaf_page->buff + leaf_length;
tmp_key.keyinfo= keyinfo;
tmp_key.data= keybuff;
+ next_buff= 0;
if (!(key_start= _ma_get_last_key(&tmp_key, leaf_page, endpos)))
DBUG_RETURN(-1);
@@ -597,9 +594,11 @@ static int del(MARIA_HA *info, MARIA_KEY *key,
/* underflow writes "next_page" to disk */
ret_value= underflow(info, keyinfo, leaf_page, &next_page,
endpos);
- if (ret_value == 0 && leaf_page->size >
- share->max_index_block_size)
+ if (ret_value < 0)
+ goto err;
+ if (leaf_page->size > share->max_index_block_size)
{
+ DBUG_ASSERT(ret_value == 0);
ret_value= (_ma_split_page(info, key, leaf_page,
share->max_index_block_size,
(uchar*) 0, 0, 0,
@@ -632,6 +631,7 @@ static int del(MARIA_HA *info, MARIA_KEY *key,
goto err;
}
my_afree(next_buff);
+ DBUG_ASSERT(leaf_page->size <= share->max_index_block_size);
DBUG_RETURN(ret_value);
}
@@ -709,10 +709,14 @@ static int del(MARIA_HA *info, MARIA_KEY *key,
KEY_OP_DEBUG_LOG_ADD_2))
goto err;
+ DBUG_ASSERT(leaf_page->size <= share->max_index_block_size);
DBUG_RETURN(new_leaf_length <=
(info->quick_mode ? MARIA_MIN_KEYBLOCK_LENGTH :
(uint) keyinfo->underflow_block_length));
err:
+ if (next_buff)
+ my_afree(next_buff);
+
DBUG_RETURN(-1);
} /* del */
@@ -731,9 +735,18 @@ err:
leaf_page is saved to disk
Caller must save anc_buff
+ For the algoritm to work, we have to ensure for packed keys that
+ key_length + (underflow_length + max_block_length + key_length) / 2
+ <= block_length.
+ From which follows that underflow_length <= block_length - key_length *3
+ For not packed keys we have:
+ (underflow_length + max_block_length + key_length) / 2 <= block_length
+ From which follows that underflow_length < block_length - key_length
+ This is ensured by setting of underflow_block_length.
+
@return
@retval 0 ok
- @retval 1 ok, but anc_buff did underflow
+ @retval 1 ok, but anc_page did underflow
@retval -1 error
*/
@@ -1153,7 +1166,7 @@ static int underflow(MARIA_HA *info, MARIA_KEYDEF *keyinfo,
_ma_kpointer(info,leaf_key.data + leaf_key.data_length +
leaf_key.ref_length, leaf_page->pos);
- /* Save key in anc_page */
+ /* Save parting key found by _ma_find_half_pos() in anc_page */
DBUG_DUMP("anc_buff", anc_buff, new_anc_length);
DBUG_DUMP_KEY("key_to_anc", &leaf_key);
anc_end_pos= anc_buff + new_anc_length;
@@ -1191,6 +1204,7 @@ static int underflow(MARIA_HA *info, MARIA_KEYDEF *keyinfo,
bmove(leaf_buff+p_length+t_length, half_pos, tmp_length);
(*keyinfo->store_key)(keyinfo,leaf_buff+p_length, &key_inserted);
new_leaf_length= tmp_length + t_length + p_length;
+ DBUG_ASSERT(new_leaf_length <= share->max_index_block_size);
leaf_page->size= new_leaf_length;
leaf_page->flag= page_flag;
@@ -1232,7 +1246,6 @@ static int underflow(MARIA_HA *info, MARIA_KEYDEF *keyinfo,
/*
Log changes to next page
This contains original data with some suffix data deleted
-
*/
DBUG_ASSERT(new_buff_length <= buff_length);
if (_ma_log_suffix(&next_page, buff_length, new_buff_length))
diff --git a/storage/maria/ma_delete_all.c b/storage/maria/ma_delete_all.c
index 3716e679bb1..b5bb9d3ddf5 100644
--- a/storage/maria/ma_delete_all.c
+++ b/storage/maria/ma_delete_all.c
@@ -52,8 +52,6 @@ int maria_delete_all_rows(MARIA_HA *info)
if (_ma_readinfo(info,F_WRLCK,1))
DBUG_RETURN(my_errno);
log_record= share->now_transactional && !share->temporary;
- if (_ma_mark_file_changed(info))
- goto err;
if (log_record)
{
@@ -75,14 +73,19 @@ int maria_delete_all_rows(MARIA_HA *info)
If we fail in this function after this point, log and table will be
inconsistent.
*/
+ if (_ma_mark_file_changed(share))
+ goto err;
}
else
{
+ if (_ma_mark_file_changed(share))
+ goto err;
/* Other branch called function below when writing log record, in hook */
_ma_reset_status(info);
}
/* Remove old history as the table is now empty for everyone */
_ma_reset_state(info);
+ share->state.changed= 0;
/*
If we are using delayed keys or if the user has done changes to the tables
@@ -178,6 +181,10 @@ void _ma_reset_status(MARIA_HA *info)
state->state.data_file_length= 0;
state->state.empty= state->state.key_empty= 0;
state->state.checksum= 0;
+ share->state.open_count= 0;
+ share->global_changed= 0;
+
+ share->changed= 1; /* We must write state */
*info->state= state->state;
diff --git a/storage/maria/ma_delete_table.c b/storage/maria/ma_delete_table.c
index f8b7eefb4ad..9e91638fa27 100644
--- a/storage/maria/ma_delete_table.c
+++ b/storage/maria/ma_delete_table.c
@@ -28,10 +28,6 @@
int maria_delete_table(const char *name)
{
- char from[FN_REFLEN];
-#ifdef USE_RAID
- uint raid_type=0,raid_chunks=0;
-#endif
MARIA_HA *info;
myf sync_dir;
DBUG_ENTER("maria_delete_table");
@@ -53,17 +49,10 @@ int maria_delete_table(const char *name)
*/
if (!(info= maria_open(name, O_RDONLY, HA_OPEN_FOR_REPAIR)))
{
-#ifdef USE_RAID
- raid_type= 0;
-#endif
sync_dir= 0;
}
else
{
-#ifdef USE_RAID
- raid_type= info->s->base.raid_type;
- raid_chunks= info->s->base.raid_chunks;
-#endif
sync_dir= (info->s->now_transactional && !info->s->temporary &&
!maria_in_recovery) ?
MY_SYNC_DIR : 0;
@@ -93,6 +82,15 @@ int maria_delete_table(const char *name)
DBUG_RETURN(1);
}
+ DBUG_RETURN(maria_delete_table_files(name, sync_dir));
+}
+
+
+int maria_delete_table_files(const char *name, myf sync_dir)
+{
+ char from[FN_REFLEN];
+ DBUG_ENTER("maria_delete_table_files");
+
fn_format(from,name,"",MARIA_NAME_IEXT,MY_UNPACK_FILENAME|MY_APPEND_EXT);
if (mysql_file_delete_with_symlink(key_file_kfile, from,
MYF(MY_WME | sync_dir)))
diff --git a/storage/maria/ma_dynrec.c b/storage/maria/ma_dynrec.c
index 60423507792..cc03d621a26 100644
--- a/storage/maria/ma_dynrec.c
+++ b/storage/maria/ma_dynrec.c
@@ -389,12 +389,12 @@ static int _ma_find_writepos(MARIA_HA *info,
*filepos=info->s->state.dellink;
block_info.second_read=0;
info->rec_cache.seek_not_done=1;
- if (!(_ma_get_block_info(&block_info, info->dfile.file,
+ if (!(_ma_get_block_info(info, &block_info, info->dfile.file,
info->s->state.dellink) &
BLOCK_DELETED))
{
DBUG_PRINT("error",("Delete link crashed"));
- my_errno=HA_ERR_WRONG_IN_RECORD;
+ _ma_set_fatal_error(info->s, HA_ERR_WRONG_IN_RECORD);
DBUG_RETURN(-1);
}
info->s->state.dellink=block_info.next_filepos;
@@ -450,7 +450,8 @@ static my_bool unlink_deleted_block(MARIA_HA *info,
MARIA_BLOCK_INFO tmp;
tmp.second_read=0;
/* Unlink block from the previous block */
- if (!(_ma_get_block_info(&tmp, info->dfile.file, block_info->prev_filepos)
+ if (!(_ma_get_block_info(info, &tmp, info->dfile.file,
+ block_info->prev_filepos)
& BLOCK_DELETED))
DBUG_RETURN(1); /* Something is wrong */
mi_sizestore(tmp.header+4,block_info->next_filepos);
@@ -460,7 +461,7 @@ static my_bool unlink_deleted_block(MARIA_HA *info,
/* Unlink block from next block */
if (block_info->next_filepos != HA_OFFSET_ERROR)
{
- if (!(_ma_get_block_info(&tmp, info->dfile.file,
+ if (!(_ma_get_block_info(info, &tmp, info->dfile.file,
block_info->next_filepos)
& BLOCK_DELETED))
DBUG_RETURN(1); /* Something is wrong */
@@ -512,7 +513,7 @@ static my_bool update_backward_delete_link(MARIA_HA *info,
if (delete_block != HA_OFFSET_ERROR)
{
block_info.second_read=0;
- if (_ma_get_block_info(&block_info, info->dfile.file, delete_block)
+ if (_ma_get_block_info(info, &block_info, info->dfile.file, delete_block)
& BLOCK_DELETED)
{
uchar buff[8];
@@ -522,7 +523,7 @@ static my_bool update_backward_delete_link(MARIA_HA *info,
}
else
{
- my_errno=HA_ERR_WRONG_IN_RECORD;
+ _ma_set_fatal_error(info->s, HA_ERR_WRONG_IN_RECORD);
DBUG_RETURN(1); /* Wrong delete link */
}
}
@@ -548,19 +549,21 @@ static my_bool delete_dynamic_record(MARIA_HA *info, MARIA_RECORD_POS filepos,
do
{
/* Remove block at 'filepos' */
- if ((b_type= _ma_get_block_info(&block_info, info->dfile.file, filepos))
+ if ((b_type= _ma_get_block_info(info, &block_info, info->dfile.file,
+ filepos))
& (BLOCK_DELETED | BLOCK_ERROR | BLOCK_SYNC_ERROR |
BLOCK_FATAL_ERROR) ||
(length=(uint) (block_info.filepos-filepos) +block_info.block_len) <
MARIA_MIN_BLOCK_LENGTH)
{
- my_errno=HA_ERR_WRONG_IN_RECORD;
+ _ma_set_fatal_error(info->s, HA_ERR_WRONG_IN_RECORD);
DBUG_RETURN(1);
}
/* Check if next block is a delete block */
del_block.second_read=0;
remove_next_block=0;
- if (_ma_get_block_info(&del_block, info->dfile.file, filepos + length) &
+ if (_ma_get_block_info(info, &del_block, info->dfile.file,
+ filepos + length) &
BLOCK_DELETED && del_block.block_len+length <
MARIA_DYN_MAX_BLOCK_LENGTH)
{
@@ -720,7 +723,7 @@ int _ma_write_part_record(MARIA_HA *info,
if (next_block < info->state->data_file_length &&
info->s->state.dellink != HA_OFFSET_ERROR)
{
- if ((_ma_get_block_info(&del_block, info->dfile.file, next_block)
+ if ((_ma_get_block_info(info, &del_block, info->dfile.file, next_block)
& BLOCK_DELETED) &&
res_length + del_block.block_len < MARIA_DYN_MAX_BLOCK_LENGTH)
{
@@ -832,13 +835,14 @@ static my_bool update_dynamic_record(MARIA_HA *info, MARIA_RECORD_POS filepos,
if (filepos != info->s->state.dellink)
{
block_info.next_filepos= HA_OFFSET_ERROR;
- if ((error= _ma_get_block_info(&block_info, info->dfile.file, filepos))
+ if ((error= _ma_get_block_info(info, &block_info, info->dfile.file,
+ filepos))
& (BLOCK_DELETED | BLOCK_ERROR | BLOCK_SYNC_ERROR |
BLOCK_FATAL_ERROR))
{
DBUG_PRINT("error",("Got wrong block info"));
if (!(error & BLOCK_FATAL_ERROR))
- my_errno=HA_ERR_WRONG_IN_RECORD;
+ _ma_set_fatal_error(info->s, HA_ERR_WRONG_IN_RECORD);
goto err;
}
length=(ulong) (block_info.filepos-filepos) + block_info.block_len;
@@ -873,7 +877,7 @@ static my_bool update_dynamic_record(MARIA_HA *info, MARIA_RECORD_POS filepos,
MARIA_BLOCK_INFO del_block;
del_block.second_read=0;
- if (_ma_get_block_info(&del_block, info->dfile.file,
+ if (_ma_get_block_info(info, &del_block, info->dfile.file,
block_info.filepos + block_info.block_len) &
BLOCK_DELETED)
{
@@ -1344,7 +1348,7 @@ ulong _ma_rec_unpack(register MARIA_HA *info, register uchar *to, uchar *from,
DBUG_RETURN(found_length);
err:
- my_errno= HA_ERR_WRONG_IN_RECORD;
+ _ma_set_fatal_error(info->s, HA_ERR_WRONG_IN_RECORD);
DBUG_PRINT("error",("to_end: 0x%lx -> 0x%lx from_end: 0x%lx -> 0x%lx",
(long) to, (long) to_end, (long) from, (long) from_end));
DBUG_DUMP("from", info->rec_buff, info->s->base.min_pack_length);
@@ -1471,7 +1475,7 @@ int _ma_read_dynamic_record(MARIA_HA *info, uchar *buf,
flush_io_cache(&info->rec_cache))
goto err;
info->rec_cache.seek_not_done=1;
- if ((b_type= _ma_get_block_info(&block_info, file, filepos)) &
+ if ((b_type= _ma_get_block_info(info, &block_info, file, filepos)) &
(BLOCK_DELETED | BLOCK_ERROR | BLOCK_SYNC_ERROR |
BLOCK_FATAL_ERROR))
{
@@ -1543,7 +1547,7 @@ err:
DBUG_RETURN(my_errno);
panic:
- my_errno=HA_ERR_WRONG_IN_RECORD;
+ _ma_set_fatal_error(info->s, HA_ERR_WRONG_IN_RECORD);
goto err;
}
@@ -1622,7 +1626,7 @@ my_bool _ma_cmp_dynamic_record(register MARIA_HA *info,
block_info.next_filepos=filepos;
while (reclength > 0)
{
- if ((b_type= _ma_get_block_info(&block_info, info->dfile.file,
+ if ((b_type= _ma_get_block_info(info, &block_info, info->dfile.file,
block_info.next_filepos))
& (BLOCK_DELETED | BLOCK_ERROR | BLOCK_SYNC_ERROR |
BLOCK_FATAL_ERROR))
@@ -1641,7 +1645,7 @@ my_bool _ma_cmp_dynamic_record(register MARIA_HA *info,
}
} else if (reclength < block_info.data_len)
{
- my_errno=HA_ERR_WRONG_IN_RECORD;
+ _ma_set_fatal_error(info->s, HA_ERR_WRONG_IN_RECORD);
goto err;
}
reclength-= block_info.data_len;
@@ -1759,6 +1763,7 @@ int _ma_read_rnd_dynamic_record(MARIA_HA *info,
{
if (filepos >= info->state->data_file_length)
{
+#ifdef MARIA_EXTERNAL_LOCKING
if (!info_read)
{ /* Check if changed */
info_read=1;
@@ -1771,15 +1776,19 @@ int _ma_read_rnd_dynamic_record(MARIA_HA *info,
my_errno= HA_ERR_END_OF_FILE;
goto err;
}
+#else
+ my_errno= HA_ERR_END_OF_FILE;
+ goto err;
+#endif
}
if (info->opt_flag & READ_CACHE_USED)
{
- if (_ma_read_cache(&info->rec_cache, block_info.header, filepos,
+ if (_ma_read_cache(info, &info->rec_cache, block_info.header, filepos,
sizeof(block_info.header),
(!block_of_record && skip_deleted_blocks ?
READING_NEXT : 0) | READING_HEADER))
goto panic;
- b_type= _ma_get_block_info(&block_info,-1,filepos);
+ b_type= _ma_get_block_info(info, &block_info,-1,filepos);
}
else
{
@@ -1788,7 +1797,7 @@ int _ma_read_rnd_dynamic_record(MARIA_HA *info,
flush_io_cache(&info->rec_cache))
DBUG_RETURN(my_errno);
info->rec_cache.seek_not_done=1;
- b_type= _ma_get_block_info(&block_info, info->dfile.file, filepos);
+ b_type= _ma_get_block_info(info, &block_info, info->dfile.file, filepos);
}
if (b_type & (BLOCK_DELETED | BLOCK_ERROR | BLOCK_SYNC_ERROR |
@@ -1850,7 +1859,7 @@ int _ma_read_rnd_dynamic_record(MARIA_HA *info,
{
if (info->opt_flag & READ_CACHE_USED)
{
- if (_ma_read_cache(&info->rec_cache, to,filepos,
+ if (_ma_read_cache(info, &info->rec_cache, to,filepos,
block_info.data_len,
(!block_of_record && skip_deleted_blocks) ?
READING_NEXT : 0))
@@ -1867,7 +1876,10 @@ int _ma_read_rnd_dynamic_record(MARIA_HA *info,
if (mysql_file_read(info->dfile.file, to, block_info.data_len, MYF(MY_NABP)))
{
if (my_errno == HA_ERR_FILE_TOO_SHORT)
- my_errno= HA_ERR_WRONG_IN_RECORD; /* Unexpected end of file */
+ {
+ /* Unexpected end of file */
+ _ma_set_fatal_error(share, HA_ERR_WRONG_IN_RECORD);
+ }
goto err;
}
}
@@ -1894,7 +1906,8 @@ int _ma_read_rnd_dynamic_record(MARIA_HA *info,
DBUG_RETURN(my_errno); /* Wrong record */
panic:
- my_errno=HA_ERR_WRONG_IN_RECORD; /* Something is fatal wrong */
+ /* Something is fatal wrong */
+ _ma_set_fatal_error(share, HA_ERR_WRONG_IN_RECORD);
err:
fast_ma_writeinfo(info);
DBUG_RETURN(my_errno);
@@ -1903,7 +1916,8 @@ err:
/* Read and process header from a dynamic-record-file */
-uint _ma_get_block_info(MARIA_BLOCK_INFO *info, File file, my_off_t filepos)
+uint _ma_get_block_info(MARIA_HA *handler, MARIA_BLOCK_INFO *info, File file,
+ my_off_t filepos)
{
uint return_val=0;
uchar *header=info->header;
@@ -1918,7 +1932,14 @@ uint _ma_get_block_info(MARIA_BLOCK_INFO *info, File file, my_off_t filepos)
mysql_file_seek(file,filepos,MY_SEEK_SET,MYF(0));
if (mysql_file_read(file, header, sizeof(info->header),MYF(0)) !=
sizeof(info->header))
- goto err;
+ {
+ /*
+ This is either an error or just reading at end of file.
+ Don't give a fatal error for this case.
+ */
+ my_errno= HA_ERR_WRONG_IN_RECORD;
+ return BLOCK_ERROR;
+ }
}
DBUG_DUMP("header",header,MARIA_BLOCK_INFO_HEADER_LENGTH);
if (info->second_read)
@@ -2032,6 +2053,10 @@ uint _ma_get_block_info(MARIA_BLOCK_INFO *info, File file, my_off_t filepos)
}
err:
- my_errno=HA_ERR_WRONG_IN_RECORD; /* Garbage */
+ if (!handler->in_check_table)
+ {
+ /* We may be scanning the table for new rows; Don't give an error */
+ _ma_set_fatal_error(handler->s, HA_ERR_WRONG_IN_RECORD);
+ }
return BLOCK_ERROR;
}
diff --git a/storage/maria/ma_extra.c b/storage/maria/ma_extra.c
index d5c698e2087..c8b969363fa 100644
--- a/storage/maria/ma_extra.c
+++ b/storage/maria/ma_extra.c
@@ -50,7 +50,7 @@ int maria_extra(MARIA_HA *info, enum ha_extra_function function,
switch (function) {
case HA_EXTRA_RESET_STATE: /* Reset state (don't free buffers) */
- info->lastinx= 0; /* Use first index as def */
+ info->lastinx= ~0; /* Detect index changes */
info->last_search_keypage= info->cur_row.lastpos= HA_OFFSET_ERROR;
info->page_changed= 1;
/* Next/prev gives first/last */
@@ -143,7 +143,7 @@ int maria_extra(MARIA_HA *info, enum ha_extra_function function,
(READ_CACHE_USED | WRITE_CACHE_USED | OPT_NO_ROWS)) &&
!share->state.header.uniques)
if (!(init_io_cache(&info->rec_cache, info->dfile.file, cache_size,
- WRITE_CACHE,share->state.state.data_file_length,
+ WRITE_CACHE, info->state->data_file_length,
(pbool) (info->lock_type != F_UNLCK),
MYF(share->write_flag & MY_WAIT_IF_FULL))))
{
@@ -175,8 +175,8 @@ int maria_extra(MARIA_HA *info, enum ha_extra_function function,
{
if ((error= flush_io_cache(&info->rec_cache)))
{
- maria_print_error(info->s, HA_ERR_CRASHED);
- maria_mark_crashed(info); /* Fatal error found */
+ /* Fatal error found */
+ _ma_set_fatal_error(share, HA_ERR_CRASHED);
}
}
break;
@@ -254,8 +254,8 @@ int maria_extra(MARIA_HA *info, enum ha_extra_function function,
if (!share->changed)
{
- share->state.changed|= STATE_CHANGED | STATE_NOT_ANALYZED;
share->changed= 1; /* Update on close */
+ share->state.changed|= STATE_CHANGED | STATE_NOT_ANALYZED;
if (!share->global_changed)
{
share->global_changed= 1;
@@ -291,14 +291,15 @@ int maria_extra(MARIA_HA *info, enum ha_extra_function function,
if (!error && share->changed)
{
mysql_mutex_lock(&share->intern_lock);
- if (!(error= _ma_state_info_write(share,
- MA_STATE_INFO_WRITE_DONT_MOVE_OFFSET|
- MA_STATE_INFO_WRITE_FULL_INFO)))
- share->changed= 0;
+ error= _ma_state_info_write(share,
+ MA_STATE_INFO_WRITE_DONT_MOVE_OFFSET|
+ MA_STATE_INFO_WRITE_FULL_INFO);
mysql_mutex_unlock(&share->intern_lock);
}
mysql_mutex_lock(&THR_LOCK_maria);
mysql_mutex_lock(&share->intern_lock); /* protect against Checkpoint */
+ /* Safety against assert in checkpoint */
+ share->bitmap.changed_not_flushed= 0;
/* this makes the share not be re-used next time the table is opened */
share->last_version= 0L; /* Impossible version */
mysql_mutex_unlock(&share->intern_lock);
@@ -309,13 +310,15 @@ int maria_extra(MARIA_HA *info, enum ha_extra_function function,
share->deleting= TRUE;
share->global_changed= FALSE; /* force writing changed flag */
/* To force repair if reopened */
- _ma_mark_file_changed(info);
+ share->state.open_count= 1;
+ share->changed= 1;
+ _ma_mark_file_changed_now(share);
/* Fall trough */
case HA_EXTRA_PREPARE_FOR_RENAME:
{
my_bool do_flush= test(function != HA_EXTRA_PREPARE_FOR_DROP);
+ my_bool save_global_changed;
enum flush_type type;
- mysql_mutex_lock(&THR_LOCK_maria);
/*
This share, to have last_version=0, needs to save all its data/index
blocks to disk if this is not for a DROP TABLE. Otherwise they would be
@@ -338,7 +341,7 @@ int maria_extra(MARIA_HA *info, enum ha_extra_function function,
*/
mysql_mutex_lock(&share->intern_lock);
if (share->kfile.file >= 0 && function != HA_EXTRA_PREPARE_FOR_DROP)
- _ma_decrement_open_count(info);
+ _ma_decrement_open_count(info, 0);
if (info->trn)
{
_ma_remove_table_from_trnman(share, info->trn);
@@ -347,12 +350,17 @@ int maria_extra(MARIA_HA *info, enum ha_extra_function function,
}
type= do_flush ? FLUSH_RELEASE : FLUSH_IGNORE_CHANGED;
+ save_global_changed= share->global_changed;
+ share->global_changed= 1; /* Don't increment open count */
+ mysql_mutex_unlock(&share->intern_lock);
if (_ma_flush_table_files(info, MARIA_FLUSH_DATA | MARIA_FLUSH_INDEX,
type, type))
{
error=my_errno;
share->changed= 1;
}
+ mysql_mutex_lock(&share->intern_lock);
+ share->global_changed= save_global_changed;
if (info->opt_flag & (READ_CACHE_USED | WRITE_CACHE_USED))
{
info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED);
@@ -370,25 +378,27 @@ int maria_extra(MARIA_HA *info, enum ha_extra_function function,
MA_STATE_INFO_WRITE_FULL_INFO)) ||
mysql_file_sync(share->kfile.file, MYF(0)))
error= my_errno;
- else
- share->changed= 0;
}
else
{
/* be sure that state is not tried for write as file may be closed */
share->changed= 0;
+ share->global_changed= 0;
+ share->state.open_count= 0;
}
}
if (share->data_file_type == BLOCK_RECORD &&
share->bitmap.file.file >= 0)
{
- if (do_flush && mysql_file_sync(share->bitmap.file.file, MYF(0)))
+ DBUG_ASSERT(share->bitmap.non_flushable == 0 &&
+ share->bitmap.changed == 0);
+ if (do_flush && my_sync(share->bitmap.file.file, MYF(0)))
error= my_errno;
+ share->bitmap.changed_not_flushed= 0;
}
- /* For protection against Checkpoint, we set under intern_lock: */
+ /* last_version must be protected by intern_lock; See collect_tables() */
share->last_version= 0L; /* Impossible version */
mysql_mutex_unlock(&share->intern_lock);
- mysql_mutex_unlock(&THR_LOCK_maria);
break;
}
case HA_EXTRA_PREPARE_FOR_FORCED_CLOSE:
@@ -405,9 +415,8 @@ int maria_extra(MARIA_HA *info, enum ha_extra_function function,
if (!share->temporary)
error= _ma_flush_table_files(info, MARIA_FLUSH_DATA | MARIA_FLUSH_INDEX,
FLUSH_KEEP, FLUSH_KEEP);
-#ifdef HAVE_PWRITE
- _ma_decrement_open_count(info);
-#endif
+
+ _ma_decrement_open_count(info, 1);
if (share->not_flushed)
{
share->not_flushed= 0;
@@ -415,9 +424,9 @@ int maria_extra(MARIA_HA *info, enum ha_extra_function function,
error= my_errno;
if (error)
{
+ /* Fatal error found */
share->changed= 1;
- maria_print_error(info->s, HA_ERR_CRASHED);
- maria_mark_crashed(info); /* Fatal error found */
+ _ma_set_fatal_error(share, HA_ERR_CRASHED);
}
}
break;
@@ -553,7 +562,7 @@ int maria_reset(MARIA_HA *info)
#endif
info->opt_flag&= ~(KEY_READ_USED | REMEMBER_OLD_POS);
info->quick_mode= 0;
- info->lastinx= 0; /* Use first index as def */
+ info->lastinx= ~0; /* detect index changes */
info->last_search_keypage= info->cur_row.lastpos= HA_OFFSET_ERROR;
info->page_changed= 1;
info->update= ((info->update & HA_STATE_CHANGED) | HA_STATE_NEXT_FOUND |
@@ -568,6 +577,12 @@ int _ma_sync_table_files(const MARIA_HA *info)
mysql_file_sync(info->s->kfile.file, MYF(MY_WME)));
}
+uint _ma_file_callback_to_id(void *callback_data)
+{
+ MARIA_SHARE *share= (MARIA_SHARE*) callback_data;
+ return share ? share->id : 0;
+}
+
/**
@brief flushes the data and/or index file of a table
@@ -598,6 +613,8 @@ int _ma_flush_table_files(MARIA_HA *info, uint flush_data_or_index,
{
int error= 0;
MARIA_SHARE *share= info->s;
+ DBUG_ENTER("_ma_flush_table_files");
+
/* flush data file first because it's more critical */
if (flush_data_or_index & MARIA_FLUSH_DATA)
{
@@ -616,6 +633,7 @@ int _ma_flush_table_files(MARIA_HA *info, uint flush_data_or_index,
{
mysql_mutex_lock(&share->bitmap.bitmap_lock);
share->bitmap.changed= 0;
+ share->bitmap.changed_not_flushed= 0;
mysql_mutex_unlock(&share->bitmap.bitmap_lock);
}
if (flush_pagecache_blocks(share->pagecache, &info->dfile,
@@ -628,10 +646,15 @@ int _ma_flush_table_files(MARIA_HA *info, uint flush_data_or_index,
flush_type_for_index))
error= 1;
if (!error)
- return 0;
+ DBUG_RETURN(0);
- maria_print_error(info->s, HA_ERR_CRASHED);
- maria_mark_crashed(info);
- return 1;
+ _ma_set_fatal_error(info->s, HA_ERR_CRASHED);
+ DBUG_RETURN(1);
+}
+
+
+my_bool ma_killed_standalone(MARIA_HA *info __attribute__((unused)))
+{
+ return 0;
}
diff --git a/storage/maria/ma_ft_boolean_search.c b/storage/maria/ma_ft_boolean_search.c
index 10df277510d..ce0dca9e75e 100644
--- a/storage/maria/ma_ft_boolean_search.c
+++ b/storage/maria/ma_ft_boolean_search.c
@@ -356,7 +356,8 @@ static int _ft2_search(FTB *ftb, FTB_WORD *ftbw, my_bool init_search)
{
ftbw->key_root=info->s->state.key_root[ftb->keynr];
ftbw->keyinfo=info->s->keyinfo+ftb->keynr;
- key.keyinfo= ftbw->keyinfo;
+ info->last_key.keyinfo= key.keyinfo= ftbw->keyinfo;
+ info->lastinx= ~0; /* Safety */
key.data= ftbw->word;
key.data_length= ftbw->len;
key.ref_length= 0;
@@ -380,7 +381,8 @@ static int _ft2_search(FTB *ftb, FTB_WORD *ftbw, my_bool init_search)
max_docid);
}
- key.keyinfo= ftbw->keyinfo;
+ info->last_key.keyinfo= key.keyinfo= ftbw->keyinfo;
+ info->lastinx= ~0; /* Safety */
key.data= lastkey_buf;
key.data_length= USE_WHOLE_KEY;
key.ref_length= 0;
diff --git a/storage/maria/ma_init.c b/storage/maria/ma_init.c
index c3d52fc64e0..78ca7ed9bf8 100644
--- a/storage/maria/ma_init.c
+++ b/storage/maria/ma_init.c
@@ -103,7 +103,7 @@ void maria_end(void)
trid, recovery_failures);
}
trnman_destroy();
- if (translog_status == TRANSLOG_OK)
+ if (translog_status == TRANSLOG_OK || translog_status == TRANSLOG_READONLY)
translog_destroy();
end_pagecache(maria_log_pagecache, TRUE);
end_pagecache(maria_pagecache, TRUE);
diff --git a/storage/maria/ma_key.c b/storage/maria/ma_key.c
index 0e1891fb249..f62ffcc49a0 100644
--- a/storage/maria/ma_key.c
+++ b/storage/maria/ma_key.c
@@ -644,8 +644,7 @@ int _ma_read_key_record(MARIA_HA *info, uchar *buf, MARIA_RECORD_POS filepos)
{ /* Read only key */
if (_ma_put_key_in_record(info, (uint)info->lastinx, TRUE, buf))
{
- maria_print_error(info->s, HA_ERR_CRASHED);
- my_errno=HA_ERR_CRASHED;
+ _ma_set_fatal_error(info->s, HA_ERR_CRASHED);
return -1;
}
info->update|= HA_STATE_AKTIV; /* We should find a record */
@@ -669,25 +668,39 @@ int _ma_read_key_record(MARIA_HA *info, uchar *buf, MARIA_RECORD_POS filepos)
will look for column values there)
RETURN
- ICP_ERROR Error
+ ICP_ERROR Error ; my_errno set to HA_ERR_CRASHED
ICP_NO_MATCH Index condition is not satisfied, continue scanning
ICP_MATCH Index condition is satisfied
- ICP_OUT_OF_RANGE Index condition is not satisfied, end the scan.
+ ICP_OUT_OF_RANGE Index condition is not satisfied, end the scan.
+ my_errno set to HA_ERR_END_OF_FILE
+
+ info->cur_row.lastpos is set to HA_OFFSET_ERROR in case of ICP_ERROR or
+ ICP_OUT_OF_RANGE to indicate that we don't have any active row.
*/
-int ma_check_index_cond(register MARIA_HA *info, uint keynr, uchar *record)
+ICP_RESULT ma_check_index_cond(register MARIA_HA *info, uint keynr,
+ uchar *record)
{
+ ICP_RESULT res= ICP_MATCH;
if (info->index_cond_func)
{
if (_ma_put_key_in_record(info, keynr, FALSE, record))
{
+ /* Impossible case; Can only happen if bug in code */
maria_print_error(info->s, HA_ERR_CRASHED);
- my_errno=HA_ERR_CRASHED;
- return -1;
+ info->cur_row.lastpos= HA_OFFSET_ERROR; /* No active record */
+ my_errno= HA_ERR_CRASHED;
+ res= ICP_ERROR;
+ }
+ else if ((res= info->index_cond_func(info->index_cond_func_arg)) ==
+ ICP_OUT_OF_RANGE)
+ {
+ /* We got beyond the end of scanned range */
+ info->cur_row.lastpos= HA_OFFSET_ERROR; /* No active record */
+ my_errno= HA_ERR_END_OF_FILE;
}
- return info->index_cond_func(info->index_cond_func_arg);
}
- return 1;
+ return res;
}
diff --git a/storage/maria/ma_key_recover.c b/storage/maria/ma_key_recover.c
index bc85ad025ff..920f5a08013 100644
--- a/storage/maria/ma_key_recover.c
+++ b/storage/maria/ma_key_recover.c
@@ -66,7 +66,7 @@ void _ma_unpin_all_pages(MARIA_HA *info, LSN undo_lsn)
#ifdef EXTRA_DEBUG
DBUG_ASSERT((!pinned_page->changed ||
undo_lsn != LSN_IMPOSSIBLE || !info->s->now_transactional) ||
- (info->s->state.changed & STATE_CRASHED));
+ (info->s->state.changed & STATE_CRASHED_FLAGS));
#endif
pagecache_unlock_by_link(info->s->pagecache, pinned_page->link,
pinned_page->unlock, PAGECACHE_UNPIN,
@@ -1027,7 +1027,7 @@ uint _ma_apply_redo_index(MARIA_HA *info,
insert_length, changed_length));
DBUG_ASSERT(insert_length <= changed_length &&
- page_length + changed_length <= max_page_size);
+ page_length + insert_length <= max_page_size);
bmove_upp(buff + page_length + insert_length, buff + page_length,
page_length - keypage_header);
diff --git a/storage/maria/ma_keycache.c b/storage/maria/ma_keycache.c
index ef893d076bd..e3c57801410 100644
--- a/storage/maria/ma_keycache.c
+++ b/storage/maria/ma_keycache.c
@@ -79,8 +79,8 @@ int maria_assign_to_pagecache(MARIA_HA *info,
if (flush_pagecache_blocks(share->pagecache, &share->kfile, FLUSH_RELEASE))
{
error= my_errno;
- maria_print_error(info->s, HA_ERR_CRASHED);
- maria_mark_crashed(info); /* Mark that table must be checked */
+ /* Mark that table must be checked */
+ _ma_set_fatal_error(share, error);
}
/*
diff --git a/storage/maria/ma_locking.c b/storage/maria/ma_locking.c
index 8d2d3c0ad6e..9bab4cdfe0e 100644
--- a/storage/maria/ma_locking.c
+++ b/storage/maria/ma_locking.c
@@ -80,9 +80,8 @@ int maria_lock_database(MARIA_HA *info, int lock_type)
{
if (end_io_cache(&info->rec_cache))
{
- error=my_errno;
- maria_print_error(info->s, HA_ERR_CRASHED);
- maria_mark_crashed(info);
+ error= my_errno;
+ _ma_set_fatal_error(share, error);
}
}
if (!count)
@@ -104,7 +103,7 @@ int maria_lock_database(MARIA_HA *info, int lock_type)
mysql_rwlock_unlock(&share->mmap_lock);
}
#endif
-#ifdef EXTERNAL_LOCKING
+#ifdef MARIA_EXTERNAL_LOCKING
share->state.process= share->last_process=share->this_process;
share->state.unique= info->last_unique= info->this_unique;
share->state.update_count= info->last_loop= ++info->this_loop;
@@ -129,10 +128,7 @@ int maria_lock_database(MARIA_HA *info, int lock_type)
else
share->not_flushed=1;
if (error)
- {
- maria_print_error(info->s, HA_ERR_CRASHED);
- maria_mark_crashed(info);
- }
+ _ma_set_fatal_error(share, error);
}
}
info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED);
@@ -307,7 +303,7 @@ int _ma_writeinfo(register MARIA_HA *info, uint operation)
{ /* Two threads can't be here */
olderror= my_errno; /* Remember last error */
-#ifdef EXTERNAL_LOCKING
+#ifdef MARIA_EXTERNAL_LOCKING
/*
The following only makes sense if we want to be allow two different
processes access the same table at the same time
@@ -345,7 +341,7 @@ int _ma_writeinfo(register MARIA_HA *info, uint operation)
int _ma_test_if_changed(register MARIA_HA *info)
{
-#ifdef EXTERNAL_LOCKING
+#ifdef MARIA_EXTERNAL_LOCKING
MARIA_SHARE *share= info->s;
if (share->state.process != share->last_process ||
share->state.unique != info->last_unique ||
@@ -390,12 +386,39 @@ int _ma_test_if_changed(register MARIA_HA *info)
#define _MA_ALREADY_MARKED_FILE_CHANGED \
((share->state.changed & STATE_CHANGED) && share->global_changed)
-int _ma_mark_file_changed(MARIA_HA *info)
+int _ma_mark_file_changed(register MARIA_SHARE *share)
+{
+ if (!share->base.born_transactional)
+ {
+ if (!_MA_ALREADY_MARKED_FILE_CHANGED)
+ return _ma_mark_file_changed_now(share);
+ }
+ else
+ {
+ /*
+ For transactional tables, the table is marked changed when the first page
+ is written. Here we just mark the state to be updated so that caller
+ can do 'anaylze table' and find that is has changed before any pages
+ are written.
+ */
+ if (! test_all_bits(share->state.changed,
+ (STATE_CHANGED | STATE_NOT_ANALYZED |
+ STATE_NOT_OPTIMIZED_KEYS)))
+ {
+ mysql_mutex_lock(&share->intern_lock);
+ share->state.changed|=(STATE_CHANGED | STATE_NOT_ANALYZED |
+ STATE_NOT_OPTIMIZED_KEYS);
+ mysql_mutex_unlock(&share->intern_lock);
+ }
+ }
+ return 0;
+}
+
+int _ma_mark_file_changed_now(register MARIA_SHARE *share)
{
uchar buff[3];
- register MARIA_SHARE *share= info->s;
int error= 1;
- DBUG_ENTER("_ma_mark_file_changed");
+ DBUG_ENTER("_ma_mark_file_changed_now");
if (_MA_ALREADY_MARKED_FILE_CHANGED)
DBUG_RETURN(0);
@@ -406,7 +429,7 @@ int _ma_mark_file_changed(MARIA_HA *info)
STATE_NOT_OPTIMIZED_KEYS);
if (!share->global_changed)
{
- share->global_changed=1;
+ share->changed= share->global_changed= 1;
share->state.open_count++;
}
/*
@@ -434,7 +457,7 @@ int _ma_mark_file_changed(MARIA_HA *info)
!(share->state.changed & STATE_NOT_MOVABLE))
{
/* Lock table to current installation */
- if (_ma_set_uuid(info, 0) ||
+ if (_ma_set_uuid(share, 0) ||
(share->state.create_rename_lsn == LSN_NEEDS_NEW_STATE_LSNS &&
_ma_update_state_lsns_sub(share, LSN_IMPOSSIBLE,
trnman_get_min_trid(),
@@ -476,22 +499,31 @@ my_bool _ma_check_if_zero(uchar *pos, size_t length)
call. In these context the following code should be safe!
*/
-int _ma_decrement_open_count(MARIA_HA *info)
+int _ma_decrement_open_count(MARIA_HA *info, my_bool lock_tables)
{
uchar buff[2];
register MARIA_SHARE *share= info->s;
int lock_error=0,write_error=0;
+ DBUG_ENTER("_ma_decrement_open_count");
+
if (share->global_changed)
{
uint old_lock=info->lock_type;
share->global_changed=0;
- lock_error= my_disable_locking ? 0 : maria_lock_database(info, F_WRLCK);
+ lock_error= (my_disable_locking || ! lock_tables ? 0 :
+ maria_lock_database(info, F_WRLCK));
/* Its not fatal even if we couldn't get the lock ! */
if (share->state.open_count > 0)
{
share->state.open_count--;
share->changed= 1; /* We have to update state */
- if (!share->temporary)
+ /*
+ For temporary tables that will just be deleted, we don't have
+ to decrement state. For transactional tables the state will be
+ updated in maria_close().
+ */
+
+ if (!share->temporary && !share->now_transactional)
{
mi_int2store(buff,share->state.open_count);
write_error= (int) my_pwrite(share->kfile.file, buff, sizeof(buff),
@@ -500,10 +532,10 @@ int _ma_decrement_open_count(MARIA_HA *info)
MYF(MY_NABP));
}
}
- if (!lock_error && !my_disable_locking)
+ if (!lock_error && !my_disable_locking && lock_tables)
lock_error=maria_lock_database(info,old_lock);
}
- return test(lock_error || write_error);
+ DBUG_RETURN(test(lock_error || write_error));
}
@@ -528,17 +560,40 @@ void _ma_mark_file_crashed(MARIA_SHARE *share)
DBUG_VOID_RETURN;
}
+/*
+ Handle a fatal error
+
+ - Mark the table as crashed
+ - Print an error message, if we had not issued an error message before
+ that the table had been crashed.
+ - set my_errno to error
+ - If 'maria_assert_if_crashed_table is set, then assert.
+*/
+
+void _ma_set_fatal_error(MARIA_SHARE *share, int error)
+{
+ DBUG_PRINT("error", ("error: %d", error));
+ maria_mark_crashed_share(share);
+ if (!(share->state.changed & STATE_CRASHED_PRINTED))
+ {
+ share->state.changed|= STATE_CRASHED_PRINTED;
+ maria_print_error(share, error);
+ }
+ my_errno= error;
+ DBUG_ASSERT(!maria_assert_if_crashed_table);
+}
+
/**
@brief Set uuid of for a Maria file
@fn _ma_set_uuid()
- @param info Maria handler
+ @param share Maria share
@param reset_uuid Instead of setting file to maria_uuid, set it to
0 to mark it as movable
*/
-my_bool _ma_set_uuid(MARIA_HA *info, my_bool reset_uuid)
+my_bool _ma_set_uuid(MARIA_SHARE *share, my_bool reset_uuid)
{
uchar buff[MY_UUID_SIZE], *uuid;
@@ -548,7 +603,7 @@ my_bool _ma_set_uuid(MARIA_HA *info, my_bool reset_uuid)
bzero(buff, sizeof(buff));
uuid= buff;
}
- return (my_bool) my_pwrite(info->s->kfile.file, uuid, MY_UUID_SIZE,
- mi_uint2korr(info->s->state.header.base_pos),
+ return (my_bool) my_pwrite(share->kfile.file, uuid, MY_UUID_SIZE,
+ mi_uint2korr(share->state.header.base_pos),
MYF(MY_NABP));
}
diff --git a/storage/maria/ma_loghandler.c b/storage/maria/ma_loghandler.c
index 98818e9f4f1..18a6179d056 100644
--- a/storage/maria/ma_loghandler.c
+++ b/storage/maria/ma_loghandler.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2007 MySQL AB & Sanja Belkin
+/* Copyright (C) 2007 MySQL AB & Sanja Belkin. 2010 Monty Program Ab.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -458,7 +458,9 @@ void translog_lock_handler_assert_owner()
@param num how many records should be filled
*/
-static void check_translog_description_table(int num)
+static uint max_allowed_translog_type= 0;
+
+void check_translog_description_table(int num)
{
int i;
DBUG_ENTER("check_translog_description_table");
@@ -467,6 +469,7 @@ static void check_translog_description_table(int num)
/* last is reserved for extending the table */
DBUG_ASSERT(num < LOGREC_NUMBER_OF_TYPES - 1);
DBUG_ASSERT(log_record_type_descriptor[0].rclass == LOGRECTYPE_NOT_ALLOWED);
+ max_allowed_translog_type= num;
for (i= 0; i <= num; i++)
{
@@ -973,7 +976,7 @@ static File open_logfile_by_number_no_cache(uint32 file_no)
DBUG_ENTER("open_logfile_by_number_no_cache");
/* TODO: add O_DIRECT to open flags (when buffer is aligned) */
- /* TODO: use my_create() */
+ /* TODO: use mysql_file_create() */
if ((file= mysql_file_open(key_file_translog,
translog_filename_by_fileno(file_no, path),
log_descriptor.open_flags,
@@ -1080,7 +1083,7 @@ static my_bool translog_write_file_header()
memcpy(page, maria_trans_file_magic, sizeof(maria_trans_file_magic));
page+= sizeof(maria_trans_file_magic);
/* timestamp */
- timestamp= my_getsystime();
+ timestamp= my_hrtime().val;
int8store(page, timestamp);
page+= 8;
/* maria version */
@@ -1151,34 +1154,14 @@ static my_bool translog_max_lsn_to_header(File file, LSN lsn)
/*
- Information from transaction log file header
-*/
-
-typedef struct st_loghandler_file_info
-{
- /*
- LSN_IMPOSSIBLE for current file (not finished file).
- Maximum LSN of the record which parts stored in the
- file.
- */
- LSN max_lsn;
- ulonglong timestamp; /* Time stamp */
- ulong maria_version; /* Version of maria loghandler */
- ulong mysql_version; /* Version of mysql server */
- ulong server_id; /* Server ID */
- ulong page_size; /* Loghandler page size */
- ulong file_number; /* Number of the file (from the file header) */
-} LOGHANDLER_FILE_INFO;
-
-/*
@brief Extract hander file information from loghandler file page
@param desc header information descriptor to be filled with information
@param page_buff buffer with the page content
*/
-static void translog_interpret_file_header(LOGHANDLER_FILE_INFO *desc,
- uchar *page_buff)
+void translog_interpret_file_header(LOGHANDLER_FILE_INFO *desc,
+ uchar *page_buff)
{
uchar *ptr;
@@ -2560,24 +2543,13 @@ my_bool translog_prev_buffer_flush_wait(struct st_translog_buffer *buffer)
LSN_IN_PARTS(buffer->prev_sent_to_disk),
LSN_IN_PARTS(buffer->prev_buffer_offset)));
translog_buffer_lock_assert_owner(buffer);
- /*
- if prev_sent_to_disk == LSN_IMPOSSIBLE then
- prev_buffer_offset should be LSN_IMPOSSIBLE
- because it means that this buffer was never used
- */
- DBUG_ASSERT((buffer->prev_sent_to_disk == LSN_IMPOSSIBLE &&
- buffer->prev_buffer_offset == LSN_IMPOSSIBLE) ||
- buffer->prev_sent_to_disk != LSN_IMPOSSIBLE);
if (buffer->prev_buffer_offset != buffer->prev_sent_to_disk)
{
do {
mysql_cond_wait(&buffer->prev_sent_to_disk_cond, &buffer->mutex);
if (buffer->file != file || buffer->offset != offset ||
buffer->ver != ver)
- {
- translog_buffer_unlock(buffer);
DBUG_RETURN(1); /* some the thread flushed the buffer already */
- }
} while(buffer->prev_buffer_offset != buffer->prev_sent_to_disk);
}
DBUG_RETURN(0);
@@ -2624,11 +2596,10 @@ static my_bool translog_buffer_flush(struct st_translog_buffer *buffer)
{
/* some other flush in progress */
translog_wait_for_closing(buffer);
+ if (buffer->file != file || buffer->offset != offset || buffer->ver != ver)
+ DBUG_RETURN(0); /* some the thread flushed the buffer already */
}
- if (buffer->file != file || buffer->offset != offset || buffer->ver != ver)
- DBUG_RETURN(0); /* some the thread flushed the buffer already */
-
if (buffer->overlay && translog_prev_buffer_flush_wait(buffer))
DBUG_RETURN(0); /* some the thread flushed the buffer already */
@@ -3525,7 +3496,7 @@ my_bool translog_walk_filenames(const char *directory,
@brief Fills table of dependence length of page header from page flags
*/
-static void translog_fill_overhead_table()
+void translog_fill_overhead_table()
{
uint i;
for (i= 0; i < TRANSLOG_FLAGS_NUM; i++)
@@ -3620,6 +3591,7 @@ my_bool translog_init_with_table(const char *directory,
log_descriptor.flush_no= 0;
log_descriptor.next_pass_max_lsn= LSN_IMPOSSIBLE;
+ /* Normally in Aria this this calls translog_table_init() */
(*init_table_func)();
compile_time_assert(sizeof(log_descriptor.dirty_buffer_mask) * 8 >=
TRANSLOG_BUFFERS_NO);
@@ -6262,13 +6234,15 @@ my_bool translog_write_record(LSN *lsn,
(uint) short_trid, (ulong) rec_len));
DBUG_ASSERT(translog_status == TRANSLOG_OK ||
translog_status == TRANSLOG_READONLY);
+ DBUG_ASSERT(type != 0);
+ DBUG_ASSERT((uint)type <= max_allowed_translog_type);
if (unlikely(translog_status != TRANSLOG_OK))
{
DBUG_PRINT("error", ("Transaction log is write protected"));
DBUG_RETURN(1);
}
- if (tbl_info)
+ if (tbl_info && type != LOGREC_FILE_ID)
{
MARIA_SHARE *share= tbl_info->s;
DBUG_ASSERT(share->now_transactional);
@@ -6360,9 +6334,9 @@ my_bool translog_write_record(LSN *lsn,
/* process this parts */
if (!(rc= (log_record_type_descriptor[type].prewrite_hook &&
- (*log_record_type_descriptor[type].prewrite_hook) (type, trn,
- tbl_info,
- hook_arg))))
+ (*log_record_type_descriptor[type].prewrite_hook)(type, trn,
+ tbl_info,
+ hook_arg))))
{
switch (log_record_type_descriptor[type].rclass) {
case LOGRECTYPE_VARIABLE_LENGTH:
@@ -6375,6 +6349,7 @@ my_bool translog_write_record(LSN *lsn,
short_trid, &parts, trn, hook_arg);
break;
case LOGRECTYPE_NOT_ALLOWED:
+ DBUG_ASSERT(0);
default:
DBUG_ASSERT(0);
rc= 1;
@@ -7748,7 +7723,7 @@ static my_bool translog_sync_files(uint32 min, uint32 max,
flush_interval= group_commit_wait;
if (flush_interval)
- flush_start= my_micro_time();
+ flush_start= microsecond_interval_timer();
for (fn= min; fn <= max; fn++)
{
TRANSLOG_FILE *file= get_logfile_by_number(fn);
@@ -7796,6 +7771,7 @@ void translog_flush_buffers(TRANSLOG_ADDRESS *lsn,
uint i;
uint8 last_buffer_no, start_buffer_no;
DBUG_ENTER("translog_flush_buffers");
+ LINT_INIT(last_buffer_no);
/*
We will recheck information when will lock buffers one by
@@ -7816,7 +7792,6 @@ void translog_flush_buffers(TRANSLOG_ADDRESS *lsn,
(uint) start_buffer_no, (uint) log_descriptor.bc.buffer_no,
LSN_IN_PARTS(log_descriptor.bc.buffer->prev_last_lsn)));
-
/*
if LSN up to which we have to flush bigger then maximum LSN of previous
buffer and at least one LSN was saved in the current buffer (last_lsn !=
@@ -7828,18 +7803,28 @@ void translog_flush_buffers(TRANSLOG_ADDRESS *lsn,
struct st_translog_buffer *buffer= log_descriptor.bc.buffer;
*lsn= log_descriptor.bc.buffer->last_lsn; /* fix lsn if it was horizon */
DBUG_PRINT("info", ("LSN to flush fixed to last lsn: (%lu,0x%lx)",
- LSN_IN_PARTS(log_descriptor.bc.buffer->last_lsn)));
+ LSN_IN_PARTS(*lsn)));
last_buffer_no= log_descriptor.bc.buffer_no;
log_descriptor.is_everything_flushed= 1;
translog_force_current_buffer_to_finish();
translog_buffer_unlock(buffer);
}
- else
+ else if (log_descriptor.bc.buffer->prev_last_lsn != LSN_IMPOSSIBLE)
{
+ /* fix lsn if it was horizon */
+ *lsn= log_descriptor.bc.buffer->prev_last_lsn;
+ DBUG_PRINT("info", ("LSN to flush fixed to prev last lsn: (%lu,0x%lx)",
+ LSN_IN_PARTS(*lsn)));
last_buffer_no= ((log_descriptor.bc.buffer_no + TRANSLOG_BUFFERS_NO -1) %
TRANSLOG_BUFFERS_NO);
translog_unlock();
}
+ else if (log_descriptor.bc.buffer->last_lsn == LSN_IMPOSSIBLE)
+ {
+ DBUG_PRINT("info", ("There is no LSNs yet generated => do nothing"));
+ translog_unlock();
+ DBUG_VOID_RETURN;
+ }
/* flush buffers */
*sent_to_disk= translog_get_sent_to_disk();
@@ -8005,7 +7990,8 @@ retest:
/*
We do not check time here because mysql_mutex_lock rarely takes
a lot of time so we can sacrifice a bit precision to performance
- (taking into account that my_micro_time() might be expensive call).
+ (taking into account that microsecond_interval_timer() might be
+ expensive call).
*/
if (flush_interval == 0)
break; /* flush pass is ended */
@@ -8014,7 +8000,8 @@ retest:
if (log_descriptor.next_pass_max_lsn == LSN_IMPOSSIBLE)
{
if (flush_interval == 0 ||
- (time_spent= (my_micro_time() - flush_start)) >= flush_interval)
+ (time_spent= (microsecond_interval_timer() - flush_start)) >=
+ flush_interval)
{
mysql_mutex_unlock(&log_descriptor.log_flush_lock);
break;
@@ -8116,6 +8103,7 @@ out:
int translog_assign_id_to_share(MARIA_HA *tbl_info, TRN *trn)
{
+ uint16 id;
MARIA_SHARE *share= tbl_info->s;
/*
If you give an id to a non-BLOCK_RECORD table, you also need to release
@@ -8131,6 +8119,7 @@ int translog_assign_id_to_share(MARIA_HA *tbl_info, TRN *trn)
uchar log_data[FILEID_STORE_SIZE];
/* Inspired by set_short_trid() of trnman.c */
uint i= share->kfile.file % SHARE_ID_MAX + 1;
+ id= 0;
do
{
my_atomic_rwlock_wrlock(&LOCK_id_to_share);
@@ -8140,14 +8129,15 @@ int translog_assign_id_to_share(MARIA_HA *tbl_info, TRN *trn)
if (id_to_share[i] == NULL &&
my_atomic_casptr((void **)&id_to_share[i], &tmp, share))
{
- share->id= (uint16)i;
+ id= (uint16) i;
break;
}
}
my_atomic_rwlock_wrunlock(&LOCK_id_to_share);
i= 1; /* scan the whole array */
- } while (share->id == 0);
- DBUG_PRINT("info", ("id_to_share: 0x%lx -> %u", (ulong)share, share->id));
+ } while (id == 0);
+ DBUG_PRINT("info", ("id_to_share: 0x%lx -> %u", (ulong)share, id));
+ fileid_store(log_data, id);
log_array[TRANSLOG_INTERNAL_PARTS + 0].str= log_data;
log_array[TRANSLOG_INTERNAL_PARTS + 0].length= sizeof(log_data);
/*
@@ -8169,11 +8159,18 @@ int translog_assign_id_to_share(MARIA_HA *tbl_info, TRN *trn)
log_array[TRANSLOG_INTERNAL_PARTS +
1].length),
sizeof(log_array)/sizeof(log_array[0]),
- log_array, log_data, NULL)))
+ log_array, NULL, NULL)))
{
mysql_mutex_unlock(&share->intern_lock);
return 1;
}
+ /*
+ Now when translog record is done, we can set share->id.
+ If we set it before, then translog_write_record may pick up the id
+ before it's written to the log.
+ */
+ share->id= id;
+ share->state.logrec_file_id= lsn;
}
mysql_mutex_unlock(&share->intern_lock);
return 0;
@@ -8799,7 +8796,7 @@ ma_soft_sync_background( void *arg __attribute__((unused)))
DBUG_ENTER("ma_soft_sync_background");
for(;;)
{
- ulonglong prev_loop= my_micro_time();
+ ulonglong prev_loop= microsecond_interval_timer();
ulonglong time, sleep;
uint32 min, max, sync_request;
min= soft_sync_min;
@@ -8811,7 +8808,7 @@ ma_soft_sync_background( void *arg __attribute__((unused)))
sleep= group_commit_wait;
if (sync_request)
translog_sync_files(min, max, FALSE);
- time= my_micro_time() - prev_loop;
+ time= microsecond_interval_timer() - prev_loop;
if (time > sleep)
sleep= 0;
else
@@ -8869,116 +8866,6 @@ void translog_soft_sync_end(void)
}
-#ifdef MARIA_DUMP_LOG
-#include <my_getopt.h>
-extern void translog_example_table_init();
-static const char *load_default_groups[]= { "aria_dump_log",0 };
-static void get_options(int *argc,char * * *argv);
-#ifndef DBUG_OFF
-#if defined(__WIN__)
-const char *default_dbug_option= "d:t:i:O,\\aria_dump_log.trace";
-#else
-const char *default_dbug_option= "d:t:i:o,/tmp/aria_dump_log.trace";
-#endif
-#endif
-static ulonglong opt_offset;
-static ulong opt_pages;
-static const char *opt_file= NULL;
-static File handler= -1;
-static my_bool opt_unit= 0;
-static struct my_option my_long_options[] =
-{
-#ifdef IMPLTMENTED
- {"body", 'b',
- "Print chunk body dump",
- (uchar **) &opt_body, (uchar **) &opt_body, 0,
- GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},
-#endif
-#ifndef DBUG_OFF
- {"debug", '#', "Output debug log. Often the argument is 'd:t:o,filename'.",
- 0, 0, 0, GET_STR, OPT_ARG, 0, 0, 0, 0, 0, 0},
-#endif
- {"file", 'f', "Path to file which will be read",
- (uchar**) &opt_file, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
- {"help", '?', "Display this help and exit.",
- 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
- { "offset", 'o', "Start reading log from this offset",
- (uchar**) &opt_offset, (uchar**) &opt_offset,
- 0, GET_ULL, REQUIRED_ARG, 0, 0, ~(longlong) 0, 0, 0, 0 },
- { "pages", 'n', "Number of pages to read",
- (uchar**) &opt_pages, (uchar**) &opt_pages, 0,
- GET_ULONG, REQUIRED_ARG, (long) ~(ulong) 0,
- (long) 1, (long) ~(ulong) 0, (long) 0,
- (long) 1, 0},
- {"unit-test", 'U',
- "Use unit test record table (for logs created by unittests",
- (uchar **) &opt_unit, (uchar **) &opt_unit, 0,
- GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},
- {"version", 'V', "Print version and exit.",
- 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
- { 0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}
-};
-
-
-static void print_version(void)
-{
- printf("%s Ver 1.0 for %s on %s\n",
- my_progname_short, SYSTEM_TYPE, MACHINE_TYPE);
-}
-
-
-static void usage(void)
-{
- print_version();
- puts("Copyright (C) 2008 MySQL AB");
- puts("This software comes with ABSOLUTELY NO WARRANTY. This is free software,");
- puts("and you are welcome to modify and redistribute it under the GPL license\n");
-
- puts("Dump content of aria log pages.");
- printf("\nUsage: %s -f file OPTIONS\n", my_progname_short);
- my_print_help(my_long_options);
- print_defaults("my", load_default_groups);
- my_print_variables(my_long_options);
-}
-
-
-static my_bool
-get_one_option(int optid __attribute__((unused)),
- const struct my_option *opt __attribute__((unused)),
- char *argument __attribute__((unused)))
-{
- switch (optid) {
- case '?':
- usage();
- exit(0);
- case 'V':
- print_version();
- exit(0);
-#ifndef DBUG_OFF
- case '#':
- DBUG_SET_INITIAL(argument ? argument : default_dbug_option);
- break;
-#endif
- }
- return 0;
-}
-
-
-static void get_options(int *argc,char ***argv)
-{
- int ho_error;
-
- if ((ho_error=handle_options(argc, argv, my_long_options, get_one_option)))
- exit(ho_error);
-
- if (opt_file == NULL)
- {
- usage();
- exit(1);
- }
-}
-
-
/**
@brief Dump information about file header page.
*/
@@ -8987,7 +8874,6 @@ static void dump_header_page(uchar *buff)
{
LOGHANDLER_FILE_INFO desc;
char strbuff[21];
-
translog_interpret_file_header(&desc, buff);
printf(" This can be header page:\n"
" Timestamp: %s\n"
@@ -9164,7 +9050,7 @@ static uchar *dump_chunk(uchar *buffer, uchar *ptr)
@brief Dump information about page with data.
*/
-static void dump_datapage(uchar *buffer)
+static void dump_datapage(uchar *buffer, File handler)
{
uchar *ptr;
ulong offset;
@@ -9245,82 +9131,12 @@ static void dump_datapage(uchar *buffer)
@brief Dump information about page.
*/
-static void dump_page(uchar *buffer)
+void dump_page(uchar *buffer, File handler)
{
- printf("Page by offset %llu (0x%llx)\n", opt_offset, opt_offset);
if (strncmp((char*)maria_trans_file_magic, (char*)buffer,
sizeof(maria_trans_file_magic)) == 0)
{
dump_header_page(buffer);
}
- dump_datapage(buffer);
-}
-
-
-/**
- @brief maria_dump_log main function.
-*/
-
-int main(int argc, char **argv)
-{
- char **default_argv;
- uchar buffer[TRANSLOG_PAGE_SIZE];
- MY_INIT(argv[0]);
-
- load_defaults("my", load_default_groups, &argc, &argv);
- default_argv= argv;
- get_options(&argc, &argv);
-
- if (opt_unit)
- translog_example_table_init();
- else
- translog_table_init();
- translog_fill_overhead_table();
-
- maria_data_root= (char *)".";
-
- if ((handler= my_open(opt_file, O_RDONLY, MYF(MY_WME))) < 0)
- {
- fprintf(stderr, "Can't open file: '%s' errno: %d\n",
- opt_file, my_errno);
- goto err;
- }
- if (mysql_file_seek(handler, opt_offset, SEEK_SET, MYF(MY_WME)) !=
- opt_offset)
- {
- fprintf(stderr, "Can't set position %lld file: '%s' errno: %d\n",
- opt_offset, opt_file, my_errno);
- goto err;
- }
- for (;
- opt_pages;
- opt_offset+= TRANSLOG_PAGE_SIZE, opt_pages--)
- {
- if (mysql_file_pread(handler, buffer, TRANSLOG_PAGE_SIZE, opt_offset,
- MYF(MY_NABP)))
- {
- if (my_errno == HA_ERR_FILE_TOO_SHORT)
- goto end;
- fprintf(stderr, "Can't read page at position %lld file: '%s' "
- "errno: %d\n", opt_offset, opt_file, my_errno);
- goto err;
- }
- dump_page(buffer);
- }
-
-end:
- my_close(handler, MYF(0));
- free_defaults(default_argv);
- exit(0);
- return 0; /* No compiler warning */
-
-err:
- my_close(handler, MYF(0));
- fprintf(stderr, "%s: FAILED\n", my_progname_short);
- free_defaults(default_argv);
- exit(1);
+ dump_datapage(buffer, handler);
}
-
-#include "ma_check_standalone.h"
-#endif
-
diff --git a/storage/maria/ma_loghandler.h b/storage/maria/ma_loghandler.h
index 698a8ead7b6..5ac6d67413a 100644
--- a/storage/maria/ma_loghandler.h
+++ b/storage/maria/ma_loghandler.h
@@ -312,6 +312,9 @@ extern my_bool translog_init_with_table(const char *directory,
my_bool readonly,
void (*init_table_func)(),
my_bool no_error);
+#ifndef DBUG_OFF
+void check_translog_description_table(int num);
+#endif
extern my_bool
translog_write_record(LSN *lsn, enum translog_record_type type, TRN *trn,
@@ -360,6 +363,7 @@ translog_assign_id_to_share_from_recovery(struct st_maria_share *share,
extern my_bool translog_walk_filenames(const char *directory,
my_bool (*callback)(const char *,
const char *));
+extern void dump_page(uchar *buffer, File handler);
extern my_bool translog_log_debug_info(TRN *trn,
enum translog_debug_info_type type,
uchar *info, size_t length);
@@ -386,8 +390,31 @@ void translog_set_group_commit_interval(uint32 interval);
ma_loghandler_for_recovery.h ?
*/
+/*
+ Information from transaction log file header
+*/
+
+typedef struct st_loghandler_file_info
+{
+ /*
+ LSN_IMPOSSIBLE for current file (not finished file).
+ Maximum LSN of the record which parts stored in the
+ file.
+ */
+ LSN max_lsn;
+ ulonglong timestamp; /* Time stamp */
+ ulong maria_version; /* Version of maria loghandler */
+ ulong mysql_version; /* Version of mysql server */
+ ulong server_id; /* Server ID */
+ ulong page_size; /* Loghandler page size */
+ ulong file_number; /* Number of the file (from the file header) */
+} LOGHANDLER_FILE_INFO;
+
#define SHARE_ID_MAX 65535 /* array's size */
+extern void translog_fill_overhead_table();
+extern void translog_interpret_file_header(LOGHANDLER_FILE_INFO *desc,
+ uchar *page_buff);
extern LSN translog_first_lsn_in_log();
extern LSN translog_first_theoretical_lsn();
extern LSN translog_next_LSN(TRANSLOG_ADDRESS addr, TRANSLOG_ADDRESS horizon);
diff --git a/storage/maria/ma_norec.c b/storage/maria/ma_norec.c
new file mode 100644
index 00000000000..6d4f37e34fd
--- /dev/null
+++ b/storage/maria/ma_norec.c
@@ -0,0 +1,66 @@
+/* Copyright (C) 2010 Monty Program Ab
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/*
+ Functions to handle tables with no row data (only index)
+ This is useful when you just want to do key reads or want to use
+ the index to check against duplicates.
+*/
+
+#include "maria_def.h"
+
+my_bool _ma_write_no_record(MARIA_HA *info __attribute__((unused)),
+ const uchar *record __attribute__((unused)))
+{
+ return 0;
+}
+
+my_bool _ma_update_no_record(MARIA_HA *info __attribute__((unused)),
+ MARIA_RECORD_POS pos __attribute__((unused)),
+ const uchar *oldrec __attribute__((unused)),
+ const uchar *record __attribute__((unused)))
+{
+ return HA_ERR_WRONG_COMMAND;
+}
+
+
+my_bool _ma_delete_no_record(MARIA_HA *info __attribute__((unused)),
+ const uchar *record __attribute__((unused)))
+{
+ return HA_ERR_WRONG_COMMAND;
+}
+
+
+int _ma_read_no_record(MARIA_HA *info __attribute__((unused)),
+ uchar *record __attribute__((unused)),
+ MARIA_RECORD_POS pos __attribute__((unused)))
+{
+ return HA_ERR_WRONG_COMMAND;
+}
+
+
+int _ma_read_rnd_no_record(MARIA_HA *info __attribute__((unused)),
+ uchar *buf __attribute__((unused)),
+ MARIA_RECORD_POS filepos __attribute__((unused)),
+ my_bool skip_deleted_blocks __attribute__((unused)))
+{
+ return HA_ERR_WRONG_COMMAND;
+}
+
+my_off_t _ma_no_keypos_to_recpos(MARIA_SHARE *share __attribute__ ((unused)),
+ my_off_t pos __attribute__ ((unused)))
+{
+ return 0;
+}
diff --git a/storage/maria/ma_open.c b/storage/maria/ma_open.c
index 0784a567b45..d545ed76592 100644
--- a/storage/maria/ma_open.c
+++ b/storage/maria/ma_open.c
@@ -13,7 +13,7 @@
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
-/* open a isam-database */
+/* open an Aria table */
#include "ma_fulltext.h"
#include "ma_sp_defs.h"
@@ -41,10 +41,10 @@ static uchar *_ma_state_info_read(uchar *ptr, MARIA_STATE_INFO *state);
pos+=size;}
-#define disk_pos_assert(pos, end_pos) \
+#define disk_pos_assert(share, pos, end_pos) \
if (pos > end_pos) \
{ \
- my_errno=HA_ERR_CRASHED; \
+ _ma_set_fatal_error(share, HA_ERR_CRASHED); \
goto err; \
}
@@ -130,10 +130,12 @@ static MARIA_HA *maria_clone_internal(MARIA_SHARE *share, const char *name,
info.s=share;
info.cur_row.lastpos= HA_OFFSET_ERROR;
+ /* Impossible first index to force initialization in _ma_check_index() */
+ info.lastinx= ~0;
info.update= (short) (HA_STATE_NEXT_FOUND+HA_STATE_PREV_FOUND);
info.opt_flag=READ_CHECK_USED;
info.this_unique= (ulong) info.dfile.file; /* Uniq number in process */
-#ifdef EXTERNAL_LOCKING
+#ifdef MARIA_EXTERNAL_LOCKING
if (share->data_file_type == COMPRESSED_RECORD)
info.this_unique= share->state.unique;
info.this_loop=0; /* Update counter */
@@ -201,6 +203,10 @@ static MARIA_HA *maria_clone_internal(MARIA_SHARE *share, const char *name,
*m_info=info;
thr_lock_data_init(&share->lock,&m_info->lock,(void*) m_info);
+
+ if (share->options & HA_OPTION_TMP_TABLE)
+ m_info->lock.type= TL_WRITE;
+
m_info->open_list.data=(void*) m_info;
maria_open_list=list_add(maria_open_list,&m_info->open_list);
@@ -385,7 +391,7 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags)
errpos= 3;
if (mysql_file_pread(kfile, disk_cache, info_length, 0L, MYF(MY_NABP)))
{
- my_errno=HA_ERR_CRASHED;
+ _ma_set_fatal_error(share, HA_ERR_CRASHED);
goto err;
}
len=mi_uint2korr(share->state.header.state_info_length);
@@ -411,9 +417,11 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags)
}
disk_pos= _ma_base_info_read(disk_cache + base_pos, &share->base);
share->state.state_length=base_pos;
+ /* For newly opened tables we reset the error-has-been-printed flag */
+ share->state.changed&= ~STATE_CRASHED_PRINTED;
if (!(open_flags & HA_OPEN_FOR_REPAIR) &&
- ((share->state.changed & STATE_CRASHED) ||
+ ((share->state.changed & STATE_CRASHED_FLAGS) ||
((open_flags & HA_OPEN_ABORT_IF_CRASHED) &&
(my_disable_locking && share->state.open_count))))
{
@@ -425,6 +433,8 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags)
HA_ERR_CRASHED_ON_REPAIR : HA_ERR_CRASHED_ON_USAGE);
goto err;
}
+ if (share->state.open_count)
+ share->open_count_not_zero_on_open= 1;
/*
We can ignore testing uuid if STATE_NOT_MOVABLE is set, as in this
@@ -454,7 +464,7 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags)
/* sanity check */
if (share->base.keystart > 65535 || share->base.rec_reflength > 8)
{
- my_errno=HA_ERR_CRASHED;
+ _ma_set_fatal_error(share, HA_ERR_CRASHED);
goto err;
}
@@ -485,6 +495,10 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags)
(uint) share->base.block_size,
(uint) maria_block_size));
my_errno=HA_ERR_UNSUPPORTED;
+ my_printf_error(my_errno, "Wrong block size %u; Expected %u",
+ MYF(0),
+ (uint) share->base.block_size,
+ (uint) maria_block_size);
goto err;
}
@@ -496,7 +510,7 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags)
(ulonglong) 1 << (share->base.rec_reflength*8))-1);
max_key_file_length=
- _ma_safe_mul(maria_block_size,
+ _ma_safe_mul(share->base.block_size,
((ulonglong) 1 << (share->base.key_reflength*8))-1);
#if SIZEOF_OFF_T == 4
set_if_smaller(max_data_file_length, INT_MAX32);
@@ -557,20 +571,40 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags)
share->block_size= share->base.block_size; /* Convenience */
share->max_index_block_size= share->block_size - KEYPAGE_CHECKSUM_SIZE;
+ share->keypage_header= ((share->base.born_transactional ?
+ LSN_STORE_SIZE + TRANSID_SIZE :
+ 0) + KEYPAGE_KEYID_SIZE + KEYPAGE_FLAG_SIZE +
+ KEYPAGE_USED_SIZE);
{
HA_KEYSEG *pos=share->keyparts;
uint32 ftkey_nr= 1;
for (i=0 ; i < keys ; i++)
{
- share->keyinfo[i].share= share;
- disk_pos=_ma_keydef_read(disk_pos, &share->keyinfo[i]);
- share->keyinfo[i].key_nr= i;
- disk_pos_assert(disk_pos + share->keyinfo[i].keysegs * HA_KEYSEG_SIZE,
+ MARIA_KEYDEF *keyinfo= &share->keyinfo[i];
+ keyinfo->share= share;
+ disk_pos=_ma_keydef_read(disk_pos, keyinfo);
+ keyinfo->key_nr= i;
+
+ /* See ma_delete.cc::underflow() */
+ if (!(keyinfo->flag & (HA_BINARY_PACK_KEY | HA_PACK_KEY)))
+ keyinfo->underflow_block_length= keyinfo->block_length/3;
+ else
+ {
+ /* Packed key, ensure we don't get overflow in underflow() */
+ keyinfo->underflow_block_length=
+ max((int) (share->max_index_block_size - keyinfo->maxlength * 3),
+ (int) (share->keypage_header + share->base.key_reflength));
+ set_if_smaller(keyinfo->underflow_block_length,
+ keyinfo->block_length/3);
+ }
+
+ disk_pos_assert(share,
+ disk_pos + keyinfo->keysegs * HA_KEYSEG_SIZE,
end_pos);
- if (share->keyinfo[i].key_alg == HA_KEY_ALG_RTREE)
+ if (keyinfo->key_alg == HA_KEY_ALG_RTREE)
share->have_rtree= 1;
- share->keyinfo[i].seg=pos;
- for (j=0 ; j < share->keyinfo[i].keysegs; j++,pos++)
+ keyinfo->seg=pos;
+ for (j=0 ; j < keyinfo->keysegs; j++,pos++)
{
disk_pos=_ma_keyseg_read(disk_pos, pos);
if (pos->type == HA_KEYTYPE_TEXT ||
@@ -588,32 +622,32 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags)
else if (pos->type == HA_KEYTYPE_BINARY)
pos->charset= &my_charset_bin;
}
- if (share->keyinfo[i].flag & HA_SPATIAL)
+ if (keyinfo->flag & HA_SPATIAL)
{
#ifdef HAVE_SPATIAL
uint sp_segs=SPDIMS*2;
- share->keyinfo[i].seg=pos-sp_segs;
- share->keyinfo[i].keysegs--;
+ keyinfo->seg=pos-sp_segs;
+ keyinfo->keysegs--;
versioning= 0;
#else
my_errno=HA_ERR_UNSUPPORTED;
goto err;
#endif
}
- else if (share->keyinfo[i].flag & HA_FULLTEXT)
+ else if (keyinfo->flag & HA_FULLTEXT)
{
versioning= 0;
DBUG_ASSERT(fulltext_keys);
{
uint k;
- share->keyinfo[i].seg=pos;
+ keyinfo->seg=pos;
for (k=0; k < FT_SEGS; k++)
{
*pos= ft_keysegs[k];
pos[0].language= pos[-1].language;
if (!(pos[0].charset= pos[-1].charset))
{
- my_errno=HA_ERR_CRASHED;
+ _ma_set_fatal_error(share, HA_ERR_CRASHED);
goto err;
}
pos++;
@@ -621,8 +655,7 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags)
}
if (!share->ft2_keyinfo.seg)
{
- memcpy(&share->ft2_keyinfo, &share->keyinfo[i],
- sizeof(MARIA_KEYDEF));
+ memcpy(&share->ft2_keyinfo, keyinfo, sizeof(MARIA_KEYDEF));
share->ft2_keyinfo.keysegs=1;
share->ft2_keyinfo.flag=0;
share->ft2_keyinfo.keylength=
@@ -632,10 +665,10 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags)
share->ft2_keyinfo.end=pos;
setup_key_functions(& share->ft2_keyinfo);
}
- share->keyinfo[i].ftkey_nr= ftkey_nr++;
+ keyinfo->ftkey_nr= ftkey_nr++;
}
- setup_key_functions(share->keyinfo+i);
- share->keyinfo[i].end=pos;
+ setup_key_functions(keyinfo);
+ keyinfo->end=pos;
pos->type=HA_KEYTYPE_END; /* End */
pos->length=share->base.rec_reflength;
pos->null_bit=0;
@@ -645,7 +678,8 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags)
for (i=0 ; i < uniques ; i++)
{
disk_pos=_ma_uniquedef_read(disk_pos, &share->uniqueinfo[i]);
- disk_pos_assert(disk_pos + share->uniqueinfo[i].keysegs *
+ disk_pos_assert(share,
+ disk_pos + share->uniqueinfo[i].keysegs *
HA_KEYSEG_SIZE, end_pos);
share->uniqueinfo[i].seg=pos;
for (j=0 ; j < share->uniqueinfo[i].keysegs; j++,pos++)
@@ -678,10 +712,6 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags)
share->base.null_bytes +
share->base.pack_bytes +
test(share->options & HA_OPTION_CHECKSUM));
- share->keypage_header= ((share->base.born_transactional ?
- LSN_STORE_SIZE + TRANSID_SIZE :
- 0) + KEYPAGE_KEYID_SIZE + KEYPAGE_FLAG_SIZE +
- KEYPAGE_USED_SIZE);
share->kfile.file= kfile;
if (open_flags & HA_OPEN_COPY)
@@ -749,7 +779,8 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags)
share->base.extra_rec_buff_size,
share->base.max_key_length);
- disk_pos_assert(disk_pos + share->base.fields *MARIA_COLUMNDEF_SIZE,
+ disk_pos_assert(share,
+ disk_pos + share->base.fields *MARIA_COLUMNDEF_SIZE,
end_pos);
for (i= j= 0 ; i < share->base.fields ; i++)
{
@@ -763,6 +794,10 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags)
share->blobs[j].offset= share->columndef[i].offset;
j++;
}
+ if (share->columndef[i].type == FIELD_VARCHAR)
+ share->has_varchar_fields= 1;
+ if (share->columndef[i].null_bit)
+ share->has_null_fields= 1;
}
share->columndef[i].type= FIELD_LAST; /* End marker */
disk_pos= _ma_column_nr_read(disk_pos, share->column_nr,
@@ -783,7 +818,8 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags)
share->options|= HA_OPTION_READ_ONLY_DATA;
share->is_log_table= FALSE;
- if (open_flags & HA_OPEN_TMP_TABLE)
+ if (open_flags & HA_OPEN_TMP_TABLE ||
+ (share->options & HA_OPTION_TMP_TABLE))
{
share->options|= HA_OPTION_TMP_TABLE;
share->temporary= share->delay_key_write= 1;
@@ -794,7 +830,7 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags)
_ma_set_index_pagecache_callbacks(&share->kfile, share);
share->this_process=(ulong) getpid();
-#ifdef EXTERNAL_LOCKING
+#ifdef MARIA_EXTERNAL_LOCKING
share->last_process= share->state.process;
#endif
share->base.key_parts=key_parts;
@@ -805,7 +841,6 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags)
share->base.margin_key_file_length=(share->base.max_key_file_length -
(keys ? MARIA_INDEX_BLOCK_MARGIN *
share->block_size * keys : 0));
- share->block_size= share->base.block_size;
my_free(disk_cache);
_ma_setup_functions(share);
if ((*share->once_init)(share, info.dfile.file))
@@ -909,6 +944,19 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags)
share->lock.start_trans= _ma_block_start_trans_no_versioning;
}
}
+#ifdef SAFE_MUTEX
+ if (share->data_file_type == BLOCK_RECORD)
+ {
+ /*
+ We must have internal_lock before bitmap_lock because we call
+ _ma_flush_table_files() with internal_lock locked.
+ */
+ mysql_mutex_lock(&share->intern_lock);
+ mysql_mutex_lock(&share->bitmap.bitmap_lock);
+ mysql_mutex_unlock(&share->bitmap.bitmap_lock);
+ mysql_mutex_unlock(&share->intern_lock);
+ }
+#endif
/*
Memory mapping can only be requested after initializing intern_lock.
*/
@@ -933,6 +981,8 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags)
share->state.changed));
mysql_mutex_unlock(&THR_LOCK_maria);
+
+ m_info->open_flags= open_flags;
DBUG_RETURN(m_info);
err:
@@ -1074,6 +1124,20 @@ void _ma_setup_functions(register MARIA_SHARE *share)
else
share->calc_checksum= _ma_checksum;
break;
+ case NO_RECORD:
+ share->read_record= _ma_read_no_record;
+ share->scan= _ma_read_rnd_no_record;
+ share->delete_record= _ma_delete_no_record;
+ share->update_record= _ma_update_no_record;
+ share->write_record= _ma_write_no_record;
+ share->recpos_to_keypos= _ma_no_keypos_to_recpos;
+ share->keypos_to_recpos= _ma_no_keypos_to_recpos;
+
+ /* Abort if following functions are called */
+ share->compare_record= 0;
+ share->compare_unique= 0;
+ share->calc_checksum= 0;
+ break;
case BLOCK_RECORD:
share->once_init= _ma_once_init_block_record;
share->once_end= _ma_once_end_block_record;
@@ -1244,7 +1308,8 @@ uint _ma_state_info_write(MARIA_SHARE *share, uint pWrite)
res= _ma_state_info_write_sub(share->kfile.file, &share->state, pWrite);
if (pWrite & MA_STATE_INFO_WRITE_LOCK)
mysql_mutex_unlock(&share->intern_lock);
- share->changed= 0;
+ /* If open_count != 0 we have to write the state again at close */
+ share->changed= share->state.open_count != 0;
return res;
}
@@ -1419,7 +1484,7 @@ static uchar *_ma_state_info_read(uchar *ptr, MARIA_STATE_INFO *state)
uint _ma_state_info_read_dsk(File file __attribute__((unused)),
MARIA_STATE_INFO *state __attribute__((unused)))
{
-#ifdef EXTERNAL_LOCKING
+#ifdef MARIA_EXTERNAL_LOCKING
uchar buff[MARIA_STATE_INFO_SIZE + MARIA_STATE_EXTRA_SIZE];
/* trick to detect transactional tables */
@@ -1556,7 +1621,6 @@ uchar *_ma_keydef_read(uchar *ptr, MARIA_KEYDEF *keydef)
keydef->keylength = mi_uint2korr(ptr); ptr+= 2;
keydef->minlength = mi_uint2korr(ptr); ptr+= 2;
keydef->maxlength = mi_uint2korr(ptr); ptr+= 2;
- keydef->underflow_block_length=keydef->block_length/3;
keydef->version = 0; /* Not saved */
keydef->parser = &ft_default_parser;
keydef->ftkey_nr = 0;
@@ -1874,7 +1938,7 @@ int maria_enable_indexes(MARIA_HA *info)
DBUG_PRINT("error", ("data_file_length: %lu key_file_length: %lu",
(ulong) share->state.state.data_file_length,
(ulong) share->state.state.key_file_length));
- maria_print_error(info->s, HA_ERR_CRASHED);
+ _ma_set_fatal_error(share, HA_ERR_CRASHED);
error= HA_ERR_CRASHED;
}
else
diff --git a/storage/maria/ma_packrec.c b/storage/maria/ma_packrec.c
index ed4ecd0258d..025787f4a10 100644
--- a/storage/maria/ma_packrec.c
+++ b/storage/maria/ma_packrec.c
@@ -193,7 +193,7 @@ static my_bool _ma_read_pack_info(MARIA_SHARE *share, File file,
/* Only the first three bytes of magic number are independent of version. */
if (memcmp(header, maria_pack_file_magic, 3))
{
- my_errno=HA_ERR_WRONG_IN_RECORD;
+ _ma_set_fatal_error(share, HA_ERR_WRONG_IN_RECORD);
goto err0;
}
share->pack.version= header[3]; /* fourth uchar of magic number */
@@ -330,7 +330,7 @@ static my_bool _ma_read_pack_info(MARIA_SHARE *share, File file,
DBUG_RETURN(0);
err3:
- my_errno=HA_ERR_WRONG_IN_RECORD;
+ _ma_set_fatal_error(share, HA_ERR_WRONG_IN_RECORD);
err2:
my_free(share->decode_tables);
err1:
@@ -759,7 +759,7 @@ int _ma_read_pack_record(MARIA_HA *info, uchar *buf, MARIA_RECORD_POS filepos)
DBUG_RETURN(_ma_pack_rec_unpack(info,&info->bit_buff, buf,
info->rec_buff, block_info.rec_len));
panic:
- my_errno=HA_ERR_WRONG_IN_RECORD;
+ _ma_set_fatal_error(info->s, HA_ERR_WRONG_IN_RECORD);
err:
DBUG_RETURN(my_errno);
}
@@ -794,7 +794,8 @@ int _ma_pack_rec_unpack(register MARIA_HA *info, MARIA_BIT_BUFF *bit_buff,
bit_buff->pos - bit_buff->bits / 8 == bit_buff->end)
DBUG_RETURN(0);
info->update&= ~HA_STATE_AKTIV;
- DBUG_RETURN(my_errno=HA_ERR_WRONG_IN_RECORD);
+ _ma_set_fatal_error(share, HA_ERR_WRONG_IN_RECORD);
+ DBUG_RETURN(HA_ERR_WRONG_IN_RECORD);
} /* _ma_pack_rec_unpack */
@@ -1358,7 +1359,7 @@ int _ma_read_rnd_pack_record(MARIA_HA *info,
file= info->dfile.file;
if (info->opt_flag & READ_CACHE_USED)
{
- if (_ma_read_cache(&info->rec_cache, block_info.header,
+ if (_ma_read_cache(info, &info->rec_cache, block_info.header,
filepos, share->pack.ref_length,
skip_deleted_blocks ? READING_NEXT : 0))
goto err;
@@ -1371,14 +1372,14 @@ int _ma_read_rnd_pack_record(MARIA_HA *info,
#ifndef DBUG_OFF
if (block_info.rec_len > share->max_pack_length)
{
- my_errno=HA_ERR_WRONG_IN_RECORD;
+ _ma_set_fatal_error(share, HA_ERR_WRONG_IN_RECORD);
goto err;
}
#endif
if (info->opt_flag & READ_CACHE_USED)
{
- if (_ma_read_cache(&info->rec_cache, info->rec_buff,
+ if (_ma_read_cache(info, &info->rec_cache, info->rec_buff,
block_info.filepos, block_info.rec_len,
skip_deleted_blocks ? READING_NEXT : 0))
goto err;
@@ -1644,7 +1645,7 @@ static int _ma_read_rnd_mempack_record(MARIA_HA *info,
#ifndef DBUG_OFF
if (block_info.rec_len > info->s->max_pack_length)
{
- my_errno=HA_ERR_WRONG_IN_RECORD;
+ _ma_set_fatal_error(share, HA_ERR_WRONG_IN_RECORD);
goto err;
}
#endif
diff --git a/storage/maria/ma_page.c b/storage/maria/ma_page.c
index 6cca2fed559..ed62a80e4f7 100644
--- a/storage/maria/ma_page.c
+++ b/storage/maria/ma_page.c
@@ -127,8 +127,7 @@ my_bool _ma_fetch_keypage(MARIA_PAGE *page, MARIA_HA *info,
{
DBUG_PRINT("error",("Got errno: %d from pagecache_read",my_errno));
info->last_keypage=HA_OFFSET_ERROR;
- maria_print_error(share, HA_ERR_CRASHED);
- my_errno=HA_ERR_CRASHED;
+ _ma_set_fatal_error(share, HA_ERR_CRASHED);
DBUG_RETURN(1);
}
info->last_keypage= pos;
@@ -159,8 +158,7 @@ my_bool _ma_fetch_keypage(MARIA_PAGE *page, MARIA_HA *info,
_ma_get_keynr(share, tmp)));
DBUG_DUMP("page", tmp, page_size);
info->last_keypage = HA_OFFSET_ERROR;
- maria_print_error(share, HA_ERR_CRASHED);
- my_errno= HA_ERR_CRASHED;
+ _ma_set_fatal_error(share, HA_ERR_CRASHED);
DBUG_RETURN(1);
}
}
@@ -195,6 +193,7 @@ my_bool _ma_write_keypage(MARIA_PAGE *page, enum pagecache_page_lock lock,
nod_flag= _ma_test_if_nod(share, buff);
DBUG_ASSERT(page->size == page_length);
+ DBUG_ASSERT(page->size <= share->max_index_block_size);
DBUG_ASSERT(page->flag == _ma_get_keypage_flag(share, buff));
if (page->pos < share->base.keystart ||
@@ -552,8 +551,7 @@ my_bool _ma_compact_keypage(MARIA_PAGE *ma_page, TrID min_read_from)
{
DBUG_PRINT("error",("Couldn't find last key: page_pos: 0x%lx",
(long) page));
- maria_print_error(share, HA_ERR_CRASHED);
- my_errno=HA_ERR_CRASHED;
+ _ma_set_fatal_error(share, HA_ERR_CRASHED);
DBUG_RETURN(1);
}
if (key_has_transid(page-1))
diff --git a/storage/maria/ma_pagecache.c b/storage/maria/ma_pagecache.c
index 02d98cf1e66..2618d6a5b50 100644
--- a/storage/maria/ma_pagecache.c
+++ b/storage/maria/ma_pagecache.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2000-2008 MySQL AB
+/* Copyright (C) 2000-2008 MySQL AB, 2008-2011 Monty Program Ab
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -62,8 +62,8 @@
accessing it;
to set this number equal to <N> add
#define MAX_THREADS <N>
- - to substitute calls of pthread_cond_wait for calls of
- pthread_cond_timedwait (wait with timeout set up);
+ - to substitute calls of mysql_cond_wait for calls of
+ mysql_cond_timedwait (wait with timeout set up);
this setting should be used only when you want to trap a deadlock
situation, which theoretically should not happen;
to set timeout equal to <T> seconds add
@@ -97,9 +97,9 @@
#define PCBLOCK_INFO(B) \
DBUG_PRINT("info", \
- ("block: 0x%lx fd: %lu page: %lu s: %0x hshL: " \
- " 0x%lx req: %u/%u wrlocks: %u rdlocks %u " \
- "rdlocks_q: %u pins: %u status: %u type: %s", \
+ ("block: 0x%lx fd: %lu page: %lu status: 0x%x " \
+ "hshL: 0x%lx requests: %u/%u wrlocks: %u rdlocks: %u " \
+ "rdlocks_q: %u pins: %u type: %s", \
(ulong)(B), \
(ulong)((B)->hash_link ? \
(B)->hash_link->file.file : \
@@ -107,14 +107,14 @@
(ulong)((B)->hash_link ? \
(B)->hash_link->pageno : \
0), \
- (B)->status, \
+ (uint) (B)->status, \
(ulong)(B)->hash_link, \
(uint) (B)->requests, \
(uint)((B)->hash_link ? \
(B)->hash_link->requests : \
0), \
- block->wlocks, block->rlocks, block->rlocks_queue, \
- (uint)(B)->pins, (uint)(B)->status, \
+ (B)->wlocks, (B)->rlocks, (B)->rlocks_queue, \
+ (uint)(B)->pins, \
page_cache_page_type_str[(B)->type]))
/* TODO: put it to my_static.c */
@@ -129,6 +129,8 @@ my_bool my_disable_flush_pagecache_blocks= 0;
#define COND_FOR_WRLOCK 2 /* queue of write lock */
#define COND_SIZE 3 /* number of COND_* queues */
+typedef mysql_cond_t KEYCACHE_CONDVAR;
+
/* descriptor of the page in the page cache block buffer */
struct st_pagecache_page
{
@@ -151,11 +153,27 @@ struct st_pagecache_hash_link
/* simple states of a block */
#define PCBLOCK_ERROR 1 /* an error occurred when performing disk i/o */
#define PCBLOCK_READ 2 /* the is page in the block buffer */
-#define PCBLOCK_IN_SWITCH 4 /* block is preparing to read new page */
-#define PCBLOCK_REASSIGNED 8 /* block does not accept requests for old page */
+
+/*
+ A tread is reading the data to the page.
+ If the page contained old changed data, it will be written out with
+ this state set on the block.
+ The page is not yet ready to be used for reading.
+*/
+#define PCBLOCK_IN_SWITCH 4
+/*
+ Block does not accept new requests for old page that would cause
+ the page to be pinned or written to.
+ (Reads that copies the block can still continue).
+ This state happens when another thread is waiting for readers to finish
+ to read data to the block (after the block, if it was changed, has been
+ flushed out to disk).
+*/
+#define PCBLOCK_REASSIGNED 8
#define PCBLOCK_IN_FLUSH 16 /* block is in flush operation */
#define PCBLOCK_CHANGED 32 /* block buffer contains a dirty page */
#define PCBLOCK_DIRECT_W 64 /* possible direct write to the block */
+#define PCBLOCK_DEL_WRITE 128 /* should be written on delete */
/* page status, returned by find_block */
#define PAGE_READ 0
@@ -303,7 +321,7 @@ struct st_pagecache_block_link
PAGECACHE_PIN_INFO *pin_list;
PAGECACHE_LOCK_INFO *lock_list;
#endif
- mysql_cond_t *condvar; /* condition variable for 'no readers' event */
+ KEYCACHE_CONDVAR *condvar; /* condition variable for 'no readers' event */
uchar *buffer; /* buffer for the block page */
pthread_t write_locker;
@@ -476,6 +494,7 @@ error:
#define FLUSH_CACHE 2000 /* sort this many blocks at once */
static void free_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block);
+static void unlink_hash(PAGECACHE *pagecache, PAGECACHE_HASH_LINK *hash_link);
#ifndef DBUG_OFF
static void test_key_cache(PAGECACHE *pagecache,
const char *where, my_bool lock);
@@ -513,6 +532,7 @@ static void pagecache_debug_print _VARARGS((const char *fmt, ...));
#endif /* defined(PAGECACHE_DEBUG_LOG) */
#if defined(PAGECACHE_DEBUG_LOG) && defined(PAGECACHE_DEBUG)
+#define KEYCACHE_PRINT(l, m) KEYCACHE_DBUG_PRINT(l,m)
#define KEYCACHE_DBUG_PRINT(l, m) \
{ if (pagecache_debug_log) \
fprintf(pagecache_debug_log, "%s: ", l); \
@@ -521,8 +541,9 @@ static void pagecache_debug_print _VARARGS((const char *fmt, ...));
#define KEYCACHE_DBUG_ASSERT(a) \
{ if (! (a) && pagecache_debug_log) \
fclose(pagecache_debug_log); \
- assert(a); }
+ DBUG_ASSERT(a); }
#else
+#define KEYCACHE_PRINT(l, m)
#define KEYCACHE_DBUG_PRINT(l, m) DBUG_PRINT(l, m)
#define KEYCACHE_DBUG_ASSERT(a) DBUG_ASSERT(a)
#endif /* defined(PAGECACHE_DEBUG_LOG) && defined(PAGECACHE_DEBUG) */
@@ -540,6 +561,7 @@ static long pagecache_thread_id;
#define KEYCACHE_THREAD_TRACE_END(l) \
KEYCACHE_DBUG_PRINT(l,("]thread %ld",pagecache_thread_id))
#else
+#define KEYCACHE_PRINT(l,m)
#define KEYCACHE_THREAD_TRACE_BEGIN(l)
#define KEYCACHE_THREAD_TRACE_END(l)
#define KEYCACHE_THREAD_TRACE(l)
@@ -552,16 +574,16 @@ static long pagecache_thread_id;
sizeof(PAGECACHE_HASH_LINK)))
#if (defined(PAGECACHE_TIMEOUT) && !defined(__WIN__)) || defined(PAGECACHE_DEBUG)
-static int pagecache_pthread_cond_wait(pthread_cond_t *cond,
- pthread_mutex_t *mutex);
+static int pagecache_pthread_cond_wait(mysql_cond_t *cond,
+ mysql_mutex_t *mutex);
#else
-#define pagecache_pthread_cond_wait pthread_cond_wait
+#define pagecache_pthread_cond_wait mysql_cond_wait
#endif
#if defined(PAGECACHE_DEBUG)
-static int ___pagecache_pthread_mutex_lock(pthread_mutex_t *mutex);
-static void ___pagecache_pthread_mutex_unlock(pthread_mutex_t *mutex);
-static int ___pagecache_pthread_cond_signal(pthread_cond_t *cond);
+static int ___pagecache_pthread_mutex_lock(mysql_mutex_t *mutex);
+static void ___pagecache_pthread_mutex_unlock(mysql_mutex_t *mutex);
+static int ___pagecache_pthread_cond_signal(mysql_cond_t *cond);
#define pagecache_pthread_mutex_lock(M) \
{ DBUG_PRINT("lock", ("mutex lock 0x%lx %u", (ulong)(M), __LINE__)); \
___pagecache_pthread_mutex_lock(M);}
@@ -572,9 +594,9 @@ static int ___pagecache_pthread_cond_signal(pthread_cond_t *cond);
{ DBUG_PRINT("lock", ("signal 0x%lx %u", (ulong)(M), __LINE__)); \
___pagecache_pthread_cond_signal(M);}
#else
-#define pagecache_pthread_mutex_lock pthread_mutex_lock
-#define pagecache_pthread_mutex_unlock pthread_mutex_unlock
-#define pagecache_pthread_cond_signal pthread_cond_signal
+#define pagecache_pthread_mutex_lock mysql_mutex_lock
+#define pagecache_pthread_mutex_unlock mysql_mutex_unlock
+#define pagecache_pthread_cond_signal mysql_cond_signal
#endif /* defined(PAGECACHE_DEBUG) */
extern my_bool translog_flush(TRANSLOG_ADDRESS lsn);
@@ -606,6 +628,26 @@ static my_bool pagecache_fwrite(PAGECACHE *pagecache,
DBUG_ENTER("pagecache_fwrite");
DBUG_ASSERT(type != PAGECACHE_READ_UNKNOWN_PAGE);
+#ifdef EXTRA_DEBUG_BITMAP
+ /*
+ This code is very good when debugging changes in bitmaps or dirty lists
+ The above define should be defined for all Aria files if you want to
+ debug either of the above issues.
+ */
+
+ if (pagecache->extra_debug)
+ {
+ char buff[80];
+ uint len= my_sprintf(buff,
+ (buff, "fwrite: fd: %d id: %u page: %lu",
+ filedesc->file,
+ _ma_file_callback_to_id(filedesc->callback_data),
+ (ulong) pageno));
+ (void) translog_log_debug_info(0, LOGREC_DEBUG_INFO_QUERY,
+ (uchar*) buff, len);
+ }
+#endif
+
/* Todo: Integrate this with write_callback so we have only one callback */
if ((*filedesc->flush_log_callback)(buffer, pageno, filedesc->callback_data))
DBUG_RETURN(1);
@@ -723,9 +765,9 @@ ulong init_pagecache(PAGECACHE *pagecache, size_t use_mem,
if (mysql_mutex_init(key_PAGECACHE_cache_lock,
&pagecache->cache_lock, MY_MUTEX_INIT_FAST) ||
my_hash_init(&pagecache->files_in_flush, &my_charset_bin, 32,
- offsetof(struct st_file_in_flush, file),
- sizeof(((struct st_file_in_flush *)NULL)->file),
- NULL, NULL, 0))
+ offsetof(struct st_file_in_flush, file),
+ sizeof(((struct st_file_in_flush *)NULL)->file),
+ NULL, NULL, 0))
goto err;
pagecache->inited= 1;
pagecache->in_init= 0;
@@ -752,6 +794,8 @@ ulong init_pagecache(PAGECACHE *pagecache, size_t use_mem,
{
if (blocks < 8)
{
+ my_message(ENOMEM, "Not enough memory to allocate 8 pagecache pages",
+ MYF(0));
my_errno= ENOMEM;
goto err;
}
@@ -953,7 +997,7 @@ ulong resize_pagecache(PAGECACHE *pagecache,
DBUG_RETURN(pagecache->disk_blocks);
}
- mysql_mutex_lock(&pagecache->cache_lock);
+ pagecache_pthread_mutex_lock(&pagecache->cache_lock);
wqueue= &pagecache->resize_queue;
thread= my_thread_var;
@@ -961,7 +1005,7 @@ ulong resize_pagecache(PAGECACHE *pagecache,
while (wqueue->last_thread->next != thread)
{
- mysql_cond_wait(&thread->suspend, &pagecache->cache_lock);
+ pagecache_pthread_cond_wait(&thread->suspend, &pagecache->cache_lock);
}
pagecache->resize_in_flush= 1;
@@ -977,9 +1021,8 @@ ulong resize_pagecache(PAGECACHE *pagecache,
pagecache->can_be_used= 0;
while (pagecache->cnt_for_resize_op)
{
- KEYCACHE_DBUG_PRINT("resize_pagecache: wait",
- ("suspend thread %ld", thread->id));
- mysql_cond_wait(&thread->suspend, &pagecache->cache_lock);
+ DBUG_PRINT("wait", ("suspend thread %s %ld", thread->name, thread->id));
+ pagecache_pthread_cond_wait(&thread->suspend, &pagecache->cache_lock);
}
end_pagecache(pagecache, 0); /* Don't free mutex */
@@ -993,11 +1036,12 @@ finish:
/* Signal for the next resize request to proceeed if any */
if (wqueue->last_thread)
{
- KEYCACHE_DBUG_PRINT("resize_pagecache: signal",
- ("thread %ld", wqueue->last_thread->next->id));
- mysql_cond_signal(&wqueue->last_thread->next->suspend);
+ DBUG_PRINT("signal",
+ ("thread %s %ld", wqueue->last_thread->next->name,
+ wqueue->last_thread->next->id));
+ pagecache_pthread_cond_signal(&wqueue->last_thread->next->suspend);
}
- mysql_mutex_unlock(&pagecache->cache_lock);
+ pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
DBUG_RETURN(blocks);
}
#endif /* 0 */
@@ -1008,6 +1052,7 @@ finish:
*/
static inline void inc_counter_for_resize_op(PAGECACHE *pagecache)
{
+ mysql_mutex_assert_owner(&pagecache->cache_lock);
pagecache->cnt_for_resize_op++;
}
@@ -1016,15 +1061,18 @@ static inline void inc_counter_for_resize_op(PAGECACHE *pagecache)
Decrement counter blocking resize key cache operation;
Signal the operation to proceed when counter becomes equal zero
*/
+
static inline void dec_counter_for_resize_op(PAGECACHE *pagecache)
{
struct st_my_thread_var *last_thread;
+ mysql_mutex_assert_owner(&pagecache->cache_lock);
if (!--pagecache->cnt_for_resize_op &&
(last_thread= pagecache->resize_queue.last_thread))
{
- KEYCACHE_DBUG_PRINT("dec_counter_for_resize_op: signal",
- ("thread %ld", last_thread->next->id));
- mysql_cond_signal(&last_thread->next->suspend);
+ DBUG_PRINT("signal",
+ ("thread %s %ld", last_thread->next->name,
+ last_thread->next->id));
+ pagecache_pthread_cond_signal(&last_thread->next->suspend);
}
}
@@ -1051,16 +1099,47 @@ void change_pagecache_param(PAGECACHE *pagecache, uint division_limit,
{
DBUG_ENTER("change_pagecache_param");
- mysql_mutex_lock(&pagecache->cache_lock);
+ pagecache_pthread_mutex_lock(&pagecache->cache_lock);
if (division_limit)
pagecache->min_warm_blocks= (pagecache->disk_blocks *
division_limit / 100 + 1);
if (age_threshold)
pagecache->age_threshold= (pagecache->disk_blocks *
age_threshold / 100);
- mysql_mutex_unlock(&pagecache->cache_lock);
+ pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
+ DBUG_VOID_RETURN;
+}
+
+
+/*
+ Check that pagecache was used and cleaned up properly.
+*/
+
+#ifndef DBUG_OFF
+void check_pagecache_is_cleaned_up(PAGECACHE *pagecache)
+{
+ DBUG_ENTER("check_pagecache_is_cleaned_up");
+ /*
+ Ensure we called inc_counter_for_resize_op and dec_counter_for_resize_op
+ the same number of times. (If not, a resize() could never happen.
+ */
+ DBUG_ASSERT(pagecache->cnt_for_resize_op == 0);
+
+ if (pagecache->disk_blocks > 0)
+ {
+ if (pagecache->block_mem)
+ {
+ uint i;
+ for (i=0 ; i < pagecache->blocks_used ; i++)
+ {
+ DBUG_ASSERT(pagecache->block_root[i].status == 0);
+ DBUG_ASSERT(pagecache->block_root[i].type == PAGECACHE_EMPTY_PAGE);
+ }
+ }
+ }
DBUG_VOID_RETURN;
}
+#endif
/*
@@ -1085,6 +1164,10 @@ void end_pagecache(PAGECACHE *pagecache, my_bool cleanup)
if (pagecache->disk_blocks > 0)
{
+#ifndef DBUG_OFF
+ check_pagecache_is_cleaned_up(pagecache);
+#endif
+
if (pagecache->block_mem)
{
my_large_free(pagecache->block_mem);
@@ -1157,7 +1240,7 @@ static void link_to_file_list(PAGECACHE *pagecache,
link_changed(block, &pagecache->file_blocks[FILE_HASH(*file)]);
if (block->status & PCBLOCK_CHANGED)
{
- block->status&= ~PCBLOCK_CHANGED;
+ block->status&= ~(PCBLOCK_CHANGED | PCBLOCK_DEL_WRITE);
block->rec_lsn= LSN_MAX;
pagecache->blocks_changed--;
pagecache->global_blocks_changed--;
@@ -1223,6 +1306,7 @@ static void link_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block,
{
PAGECACHE_BLOCK_LINK *ins;
PAGECACHE_BLOCK_LINK **ptr_ins;
+ DBUG_ENTER("link_block");
PCBLOCK_INFO(block);
KEYCACHE_DBUG_ASSERT(! (block->hash_link && block->hash_link->requests));
@@ -1236,6 +1320,11 @@ static void link_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block,
PAGECACHE_HASH_LINK *hash_link=
(PAGECACHE_HASH_LINK *) first_thread->opt_info;
struct st_my_thread_var *thread;
+
+ DBUG_ASSERT(block->requests + block->wlocks + block->rlocks +
+ block->pins == 0);
+ DBUG_ASSERT(block->next_used == NULL);
+
do
{
thread= next_thread;
@@ -1246,22 +1335,25 @@ static void link_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block,
*/
if ((PAGECACHE_HASH_LINK *) thread->opt_info == hash_link)
{
- KEYCACHE_DBUG_PRINT("link_block: signal", ("thread: %ld", thread->id));
- mysql_cond_signal(&thread->suspend);
+ DBUG_PRINT("signal", ("thread: %s %ld", thread->name, thread->id));
+ pagecache_pthread_cond_signal(&thread->suspend);
wqueue_unlink_from_queue(&pagecache->waiting_for_block, thread);
block->requests++;
}
}
while (thread != last_thread);
hash_link->block= block;
- KEYCACHE_THREAD_TRACE("link_block: after signaling");
+ /* Ensure that no other thread tries to use this block */
+ block->status|= PCBLOCK_REASSIGNED;
+
+ DBUG_PRINT("signal", ("after signal"));
#if defined(PAGECACHE_DEBUG)
KEYCACHE_DBUG_PRINT("link_block",
("linked,unlinked block: %u status: %x #requests: %u #available: %u",
PCBLOCK_NUMBER(pagecache, block), block->status,
block->requests, pagecache->blocks_available));
#endif
- return;
+ DBUG_VOID_RETURN;
}
ptr_ins= hot ? &pagecache->used_ins : &pagecache->used_last;
ins= *ptr_ins;
@@ -1290,6 +1382,7 @@ static void link_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block,
KEYCACHE_DBUG_ASSERT((ulong) pagecache->blocks_available <=
pagecache->blocks_used);
#endif
+ DBUG_VOID_RETURN;
}
@@ -1298,7 +1391,7 @@ static void link_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block,
SYNOPSIS
unlink_block()
- pagecache pointer to a page cache data structure
+ pagecache pointer to a page cache data structure
block pointer to the block to unlink from the LRU chain
RETURN VALUE
@@ -1311,7 +1404,7 @@ static void link_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block,
static void unlink_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block)
{
DBUG_ENTER("unlink_block");
- DBUG_PRINT("unlink_block", ("unlink 0x%lx", (ulong)block));
+ DBUG_PRINT("pagecache", ("unlink 0x%lx", (ulong)block));
DBUG_ASSERT(block->next_used != NULL);
if (block->next_used == block)
{
@@ -1335,7 +1428,7 @@ static void unlink_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block)
#if defined(PAGECACHE_DEBUG)
KEYCACHE_DBUG_ASSERT(pagecache->blocks_available != 0);
pagecache->blocks_available--;
- KEYCACHE_DBUG_PRINT("unlink_block",
+ KEYCACHE_DBUG_PRINT("pagecache",
("unlinked block: 0x%lx (%u) status: %x #requests: %u #available: %u",
(ulong)block, PCBLOCK_NUMBER(pagecache, block),
block->status,
@@ -1363,9 +1456,6 @@ static void reg_requests(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block,
int count)
{
DBUG_ENTER("reg_requests");
- DBUG_PRINT("enter", ("block: 0x%lx (%u) status: %x reqs: %u",
- (ulong)block, PCBLOCK_NUMBER(pagecache, block),
- block->status, block->requests));
PCBLOCK_INFO(block);
if (! block->requests)
/* First request for the block unlinks it */
@@ -1408,7 +1498,7 @@ static void unreg_request(PAGECACHE *pagecache,
PAGECACHE_BLOCK_LINK *block, int at_end)
{
DBUG_ENTER("unreg_request");
- DBUG_PRINT("enter", ("block 0x%lx (%u) status: %x reqs: %u",
+ DBUG_PRINT("enter", ("block 0x%lx (%u) status: %x requests: %u",
(ulong)block, PCBLOCK_NUMBER(pagecache, block),
block->status, block->requests));
PCBLOCK_INFO(block);
@@ -1461,7 +1551,7 @@ static inline void remove_reader(PAGECACHE_BLOCK_LINK *block)
PCBLOCK_INFO(block);
DBUG_ASSERT(block->hash_link->requests > 0);
if (! --block->hash_link->requests && block->condvar)
- mysql_cond_signal(block->condvar);
+ pagecache_pthread_cond_signal(block->condvar);
DBUG_VOID_RETURN;
}
@@ -1473,22 +1563,51 @@ static inline void remove_reader(PAGECACHE_BLOCK_LINK *block)
static inline void wait_for_readers(PAGECACHE *pagecache
__attribute__((unused)),
- PAGECACHE_BLOCK_LINK *block)
+ PAGECACHE_BLOCK_LINK *block
+ __attribute__((unused)))
{
struct st_my_thread_var *thread= my_thread_var;
+ DBUG_ASSERT(block->condvar == NULL);
while (block->hash_link->requests)
{
- KEYCACHE_DBUG_PRINT("wait_for_readers: wait",
- ("suspend thread: %ld block: %u",
- thread->id, PCBLOCK_NUMBER(pagecache, block)));
+ DBUG_ENTER("wait_for_readers");
+ DBUG_PRINT("wait",
+ ("suspend thread: %s %ld block: %u",
+ thread->name, thread->id,
+ PCBLOCK_NUMBER(pagecache, block)));
block->condvar= &thread->suspend;
- mysql_cond_wait(&thread->suspend, &pagecache->cache_lock);
+ pagecache_pthread_cond_wait(&thread->suspend, &pagecache->cache_lock);
block->condvar= NULL;
+ DBUG_VOID_RETURN;
}
}
/*
+ Wait until the flush of the page is done.
+*/
+
+static void wait_for_flush(PAGECACHE *pagecache
+ __attribute__((unused)),
+ PAGECACHE_BLOCK_LINK *block
+ __attribute__((unused)))
+{
+ struct st_my_thread_var *thread= my_thread_var;
+ DBUG_ENTER("wait_for_flush");
+ wqueue_add_to_queue(&block->wqueue[COND_FOR_SAVED], thread);
+ do
+ {
+ DBUG_PRINT("wait",
+ ("suspend thread %s %ld", thread->name, thread->id));
+ pagecache_pthread_cond_wait(&thread->suspend,
+ &pagecache->cache_lock);
+ }
+ while(thread->next);
+ DBUG_VOID_RETURN;
+}
+
+
+/*
Add a hash link to a bucket in the hash_table
*/
@@ -1509,10 +1628,14 @@ static inline void link_hash(PAGECACHE_HASH_LINK **start,
static void unlink_hash(PAGECACHE *pagecache, PAGECACHE_HASH_LINK *hash_link)
{
- KEYCACHE_DBUG_PRINT("unlink_hash", ("fd: %u pos_ %lu #requests=%u",
- (uint) hash_link->file.file, (ulong) hash_link->pageno,
- hash_link->requests));
- KEYCACHE_DBUG_ASSERT(hash_link->requests == 0);
+ DBUG_ENTER("unlink_hash");
+ DBUG_PRINT("enter", ("hash_link: %p fd: %u pos: %lu requests: %u",
+ hash_link, (uint) hash_link->file.file,
+ (ulong) hash_link->pageno,
+ hash_link->requests));
+ DBUG_ASSERT(hash_link->requests == 0);
+ DBUG_ASSERT(!hash_link->block || hash_link->block->pins == 0);
+
if ((*hash_link->prev= hash_link->next))
hash_link->next->prev= hash_link->prev;
hash_link->block= NULL;
@@ -1542,20 +1665,29 @@ static void unlink_hash(PAGECACHE *pagecache, PAGECACHE_HASH_LINK *hash_link)
if (page->file.file == hash_link->file.file &&
page->pageno == hash_link->pageno)
{
- KEYCACHE_DBUG_PRINT("unlink_hash: signal", ("thread %ld", thread->id));
- mysql_cond_signal(&thread->suspend);
+ DBUG_PRINT("signal", ("thread %s %ld", thread->name, thread->id));
+ pagecache_pthread_cond_signal(&thread->suspend);
wqueue_unlink_from_queue(&pagecache->waiting_for_hash_link, thread);
}
}
while (thread != last_thread);
+
+ /*
+ Add this to the hash, so that the waiting threads can find it
+ when they retry the call to get_hash_link(). This entry is special
+ in that it has no associated block.
+ */
link_hash(&pagecache->hash_root[PAGECACHE_HASH(pagecache,
hash_link->file,
hash_link->pageno)],
hash_link);
- return;
+ DBUG_VOID_RETURN;
}
+
+ /* Add hash to free hash list */
hash_link->next= pagecache->free_hash_list;
pagecache->free_hash_list= hash_link;
+ DBUG_VOID_RETURN;
}
@@ -1585,9 +1717,7 @@ static PAGECACHE_HASH_LINK *get_present_hash_link(PAGECACHE *pagecache,
int cnt;
#endif
DBUG_ENTER("get_present_hash_link");
-
- KEYCACHE_DBUG_PRINT("get_present_hash_link", ("fd: %u pos: %lu",
- (uint) file->file, (ulong) pageno));
+ DBUG_PRINT("enter", ("fd: %u pos: %lu", (uint) file->file, (ulong) pageno));
/*
Find the bucket in the hash table for the pair (file, pageno);
@@ -1622,6 +1752,7 @@ static PAGECACHE_HASH_LINK *get_present_hash_link(PAGECACHE *pagecache,
}
if (hash_link)
{
+ DBUG_PRINT("exit", ("hash_link: %p", hash_link));
/* Register the request for the page */
hash_link->requests++;
}
@@ -1643,9 +1774,7 @@ static PAGECACHE_HASH_LINK *get_hash_link(PAGECACHE *pagecache,
{
reg1 PAGECACHE_HASH_LINK *hash_link;
PAGECACHE_HASH_LINK **start;
-
- KEYCACHE_DBUG_PRINT("get_hash_link", ("fd: %u pos: %lu",
- (uint) file->file, (ulong) pageno));
+ DBUG_ENTER("get_hash_link");
restart:
/* try to find the page in the cache */
@@ -1656,6 +1785,9 @@ restart:
/* There is no hash link in the hash table for the pair (file, pageno) */
if (pagecache->free_hash_list)
{
+ DBUG_PRINT("info", ("free_hash_list: %p free_hash_list->next: %p",
+ pagecache->free_hash_list,
+ pagecache->free_hash_list->next));
hash_link= pagecache->free_hash_list;
pagecache->free_hash_list= hash_link->next;
}
@@ -1668,16 +1800,16 @@ restart:
/* Wait for a free hash link */
struct st_my_thread_var *thread= my_thread_var;
PAGECACHE_PAGE page;
- KEYCACHE_DBUG_PRINT("get_hash_link", ("waiting"));
page.file= *file;
page.pageno= pageno;
thread->opt_info= (void *) &page;
wqueue_link_into_queue(&pagecache->waiting_for_hash_link, thread);
- KEYCACHE_DBUG_PRINT("get_hash_link: wait",
- ("suspend thread %ld", thread->id));
- mysql_cond_wait(&thread->suspend, &pagecache->cache_lock);
+ DBUG_PRINT("wait",
+ ("suspend thread %s %ld", thread->name, thread->id));
+ pagecache_pthread_cond_wait(&thread->suspend,
+ &pagecache->cache_lock);
thread->opt_info= NULL;
- DBUG_PRINT("info", ("restarting..."));
+ DBUG_PRINT("thread", ("restarting..."));
goto restart;
}
hash_link->file= *file;
@@ -1686,9 +1818,20 @@ restart:
link_hash(start, hash_link);
/* Register the request for the page */
hash_link->requests++;
+ DBUG_ASSERT(hash_link->block == 0);
+ DBUG_ASSERT(hash_link->requests == 1);
}
-
- return hash_link;
+ else
+ {
+ /*
+ We have to copy the flush_log callback, as it may change if the table
+ goes from non_transactional to transactional during recovery
+ */
+ hash_link->file.flush_log_callback= file->flush_log_callback;
+ }
+ DBUG_PRINT("exit", ("hash_link: %p block: %p", hash_link,
+ hash_link->block));
+ DBUG_RETURN(hash_link);
}
@@ -1705,7 +1848,12 @@ restart:
pageno number of the page in the file
init_hits_left how initialize the block counter for the page
wrmode <-> get for writing
- reg_req Register request to thye page
+ block_is_copied 1 if block will be copied from page cache under
+ the pagelock mutex.
+ reg_req Register request to the page. Normally all pages
+ should be registered; The only time it's ok to
+ not register a page is when the page is already
+ pinned (and thus registered) by the same thread.
page_st out {PAGE_READ,PAGE_TO_BE_READ,PAGE_WAIT_TO_BE_READ}
RETURN VALUE
@@ -1734,6 +1882,7 @@ static PAGECACHE_BLOCK_LINK *find_block(PAGECACHE *pagecache,
pgcache_page_no_t pageno,
int init_hits_left,
my_bool wrmode,
+ my_bool block_is_copied,
my_bool reg_req,
int *page_st)
{
@@ -1741,14 +1890,12 @@ static PAGECACHE_BLOCK_LINK *find_block(PAGECACHE *pagecache,
PAGECACHE_BLOCK_LINK *block;
int error= 0;
int page_status;
-
DBUG_ENTER("find_block");
- KEYCACHE_THREAD_TRACE("find_block:begin");
- DBUG_PRINT("enter", ("fd: %d pos: %lu wrmode: %d",
- file->file, (ulong) pageno, wrmode));
- KEYCACHE_DBUG_PRINT("find_block", ("fd: %d pos: %lu wrmode: %d",
- file->file, (ulong) pageno,
- wrmode));
+ DBUG_PRINT("enter", ("fd: %d pos: %lu wrmode: %d block_is_copied: %d",
+ file->file, (ulong) pageno, wrmode, block_is_copied));
+ KEYCACHE_PRINT("find_block", ("fd: %d pos: %lu wrmode: %d",
+ file->file, (ulong) pageno,
+ wrmode));
#if !defined(DBUG_OFF) && defined(EXTRA_DEBUG)
DBUG_EXECUTE("check_pagecache",
test_key_cache(pagecache, "start of find_block", 0););
@@ -1796,18 +1943,10 @@ restart:
/* Wait until the page is flushed on disk */
DBUG_ASSERT(hash_link->requests > 0);
hash_link->requests--;
- {
- struct st_my_thread_var *thread= my_thread_var;
- wqueue_add_to_queue(&block->wqueue[COND_FOR_SAVED], thread);
- do
- {
- KEYCACHE_DBUG_PRINT("find_block: wait",
- ("suspend thread %ld", thread->id));
- mysql_cond_wait(&thread->suspend, &pagecache->cache_lock);
- }
- while(thread->next);
- }
+ wait_for_flush(pagecache, block);
+
/* Invalidate page in the block if it has not been done yet */
+ DBUG_ASSERT(block->status); /* Should always be true */
if (block->status)
free_block(pagecache, block);
return 0;
@@ -1827,7 +1966,7 @@ restart:
Only reading requests can proceed until the old dirty page is flushed,
all others are to be suspended, then resubmitted
*/
- if (!wrmode && !(block->status & PCBLOCK_REASSIGNED))
+ if (!wrmode && block_is_copied && !(block->status & PCBLOCK_REASSIGNED))
{
if (reg_req)
reg_requests(pagecache, block, 1);
@@ -1845,9 +1984,10 @@ restart:
/* Wait until the request can be resubmitted */
do
{
- KEYCACHE_DBUG_PRINT("find_block: wait",
- ("suspend thread %ld", thread->id));
- mysql_cond_wait(&thread->suspend, &pagecache->cache_lock);
+ DBUG_PRINT("wait",
+ ("suspend thread %s %ld", thread->name, thread->id));
+ pagecache_pthread_cond_wait(&thread->suspend,
+ &pagecache->cache_lock);
}
while(thread->next);
}
@@ -1892,6 +2032,7 @@ restart:
#ifndef DBUG_OFF
block->type= PAGECACHE_EMPTY_PAGE;
#endif
+ DBUG_ASSERT(reg_req);
block->requests= 1;
block->temperature= PCBLOCK_COLD;
block->hits_left= init_hits_left;
@@ -1901,59 +2042,75 @@ restart:
block->hash_link= hash_link;
hash_link->block= block;
page_status= PAGE_TO_BE_READ;
- DBUG_PRINT("info", ("page to be read set for page 0x%lx",
- (ulong)block));
- KEYCACHE_DBUG_PRINT("find_block",
- ("got free or never used block %u",
- PCBLOCK_NUMBER(pagecache, block)));
+ DBUG_PRINT("info", ("page to be read set for page 0x%lx (%u)",
+ (ulong) block, PCBLOCK_NUMBER(pagecache, block)));
+ KEYCACHE_PRINT("find_block",
+ ("got free or never used block %u",
+ PCBLOCK_NUMBER(pagecache, block)));
}
else
{
/* There are no never used blocks, use a block from the LRU chain */
/*
- Wait until a new block is added to the LRU chain;
- several threads might wait here for the same page,
- all of them must get the same block
+ Ensure that we are going to register the block.
+ (This should be true as a new block could not have been
+ pinned by caller).
*/
+ DBUG_ASSERT(reg_req);
if (! pagecache->used_last)
{
+ /*
+ Wait until a new block is added to the LRU chain;
+ several threads might wait here for the same page,
+ all of them must get the same block.
+
+ The block is given to us by the next thread executing
+ link_block().
+ */
+
struct st_my_thread_var *thread= my_thread_var;
thread->opt_info= (void *) hash_link;
wqueue_link_into_queue(&pagecache->waiting_for_block, thread);
do
{
- KEYCACHE_DBUG_PRINT("find_block: wait",
- ("suspend thread %ld", thread->id));
- mysql_cond_wait(&thread->suspend, &pagecache->cache_lock);
+ DBUG_PRINT("wait",
+ ("suspend thread %s %ld", thread->name, thread->id));
+ pagecache_pthread_cond_wait(&thread->suspend,
+ &pagecache->cache_lock);
}
while (thread->next);
thread->opt_info= NULL;
+ block= hash_link->block;
+ /* Ensure that the block is registered */
+ DBUG_ASSERT(block->requests >= 1);
}
- block= hash_link->block;
- if (! block)
+ else
{
/*
Take the first block from the LRU chain
unlinking it from the chain
*/
block= pagecache->used_last->next_used;
- block->hits_left= init_hits_left;
- block->last_hit_time= 0;
if (reg_req)
reg_requests(pagecache, block, 1);
hash_link->block= block;
+ DBUG_ASSERT(block->requests == 1);
}
+
PCBLOCK_INFO(block);
- DBUG_ASSERT(block->wlocks == 0);
- DBUG_ASSERT(block->rlocks == 0);
- DBUG_ASSERT(block->rlocks_queue == 0);
- DBUG_ASSERT(block->pins == 0);
+
+ DBUG_ASSERT(block->hash_link == hash_link ||
+ !(block->status & PCBLOCK_IN_SWITCH));
if (block->hash_link != hash_link &&
! (block->status & PCBLOCK_IN_SWITCH) )
{
+ /* If another thread is flushing the block, wait for it. */
+ if (block->status & PCBLOCK_IN_FLUSH)
+ wait_for_flush(pagecache, block);
+
/* this is a primary request for a new page */
DBUG_ASSERT(block->wlocks == 0);
DBUG_ASSERT(block->rlocks == 0);
@@ -1971,19 +2128,19 @@ restart:
KEYCACHE_DBUG_PRINT("find_block", ("block is dirty"));
- mysql_mutex_unlock(&pagecache->cache_lock);
/*
The call is thread safe because only the current
thread might change the block->hash_link value
*/
DBUG_ASSERT(block->pins == 0);
+ pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
error= pagecache_fwrite(pagecache,
&block->hash_link->file,
block->buffer,
block->hash_link->pageno,
block->type,
pagecache->readwrite_flags);
- mysql_mutex_lock(&pagecache->cache_lock);
+ pagecache_pthread_mutex_lock(&pagecache->cache_lock);
pagecache->global_cache_write++;
}
@@ -2000,21 +2157,25 @@ restart:
/* Remove the hash link for this page from the hash table */
unlink_hash(pagecache, block->hash_link);
+
/* All pending requests for this page must be resubmitted */
if (block->wqueue[COND_FOR_SAVED].last_thread)
wqueue_release_queue(&block->wqueue[COND_FOR_SAVED]);
}
link_to_file_list(pagecache, block, file,
(my_bool)(block->hash_link ? 1 : 0));
+
+ block->hash_link= hash_link;
PCBLOCK_INFO(block);
+ block->hits_left= init_hits_left;
+ block->last_hit_time= 0;
block->status= error ? PCBLOCK_ERROR : 0;
- block->error= (int16) my_errno;
+ block->error= error ? (int16) my_errno : 0;
#ifndef DBUG_OFF
block->type= PAGECACHE_EMPTY_PAGE;
if (error)
my_debug_put_break_here();
#endif
- block->hash_link= hash_link;
page_status= PAGE_TO_BE_READ;
DBUG_PRINT("info", ("page to be read set for page 0x%lx",
(ulong)block));
@@ -2037,12 +2198,24 @@ restart:
}
else
{
+ /*
+ The block was found in the cache. It's either a already read
+ block or a block waiting to be read by another thread.
+ */
if (reg_req)
reg_requests(pagecache, block, 1);
KEYCACHE_DBUG_PRINT("find_block",
("block->hash_link: %p hash_link: %p "
"block->status: %u", block->hash_link,
hash_link, block->status ));
+ /*
+ block->hash_link != hash_link can only happen when
+ the block is in PCBLOCK_IN_SWITCH above (is flushed out
+ to be replaced by another block). The SWITCH code will change
+ block->hash_link to point to hash_link.
+ */
+ KEYCACHE_DBUG_ASSERT(block->hash_link == hash_link ||
+ block->status & PCBLOCK_IN_SWITCH);
page_status= (((block->hash_link == hash_link) &&
(block->status & PCBLOCK_READ)) ?
PAGE_READ : PAGE_WAIT_TO_BE_READ);
@@ -2055,11 +2228,11 @@ restart:
("block: 0x%lx fd: %u pos: %lu block->status: %u page_status: %u",
(ulong) block, (uint) file->file,
(ulong) pageno, block->status, (uint) page_status));
- KEYCACHE_DBUG_PRINT("find_block",
- ("block: 0x%lx fd: %d pos: %lu block->status: %u page_status: %d",
- (ulong) block,
- file->file, (ulong) pageno, block->status,
- page_status));
+ KEYCACHE_PRINT("find_block",
+ ("block: 0x%lx fd: %d pos: %lu block->status: %u page_status: %d",
+ (ulong) block,
+ file->file, (ulong) pageno, block->status,
+ page_status));
#if !defined(DBUG_OFF) && defined(EXTRA_DEBUG)
DBUG_EXECUTE("check_pagecache",
@@ -2175,21 +2348,26 @@ static my_bool pagecache_wait_lock(PAGECACHE *pagecache,
dec_counter_for_resize_op(pagecache);
do
{
- KEYCACHE_DBUG_PRINT("get_wrlock: wait",
- ("suspend thread %ld", thread->id));
- mysql_cond_wait(&thread->suspend, &pagecache->cache_lock);
+ DBUG_PRINT("wait",
+ ("suspend thread %s %ld", thread->name, thread->id));
+ pagecache_pthread_cond_wait(&thread->suspend,
+ &pagecache->cache_lock);
}
while(thread->next);
+ inc_counter_for_resize_op(pagecache);
PCBLOCK_INFO(block);
if ((block->status & (PCBLOCK_REASSIGNED | PCBLOCK_IN_SWITCH)) ||
+ !block->hash_link ||
file.file != block->hash_link->file.file ||
pageno != block->hash_link->pageno)
{
DBUG_PRINT("info", ("the block 0x%lx changed => need retry "
"status: %x files %d != %d or pages %lu != %lu",
(ulong)block, block->status,
- file.file, block->hash_link->file.file,
- (ulong) pageno, (ulong) block->hash_link->pageno));
+ file.file,
+ block->hash_link ? block->hash_link->file.file : -1,
+ (ulong) pageno,
+ (ulong) (block->hash_link ? block->hash_link->pageno : 0)));
DBUG_RETURN(1);
}
DBUG_RETURN(0);
@@ -2396,25 +2574,17 @@ static my_bool make_lock_and_pin(PAGECACHE *pagecache,
my_bool any)
{
DBUG_ENTER("make_lock_and_pin");
+ DBUG_PRINT("enter", ("block: 0x%lx (%u) lock: %s pin: %s any %d",
+ (ulong)block, PCBLOCK_NUMBER(pagecache, block),
+ page_cache_page_lock_str[lock],
+ page_cache_page_pin_str[pin], (int)any));
+ PCBLOCK_INFO(block);
- DBUG_PRINT("enter", ("block: 0x%lx", (ulong)block));
-#ifndef DBUG_OFF
- if (block)
- {
- DBUG_PRINT("enter", ("block: 0x%lx (%u) wrlocks: %u rdlocks: %u "
- "rdlocks_q: %u pins: %u lock: %s pin: %s any %d",
- (ulong)block, PCBLOCK_NUMBER(pagecache, block),
- block->wlocks, block->rlocks, block->rlocks_queue,
- block->pins,
- page_cache_page_lock_str[lock],
- page_cache_page_pin_str[pin], (int)any));
- PCBLOCK_INFO(block);
- }
-#endif
-
+ DBUG_ASSERT(block);
DBUG_ASSERT(!any ||
((lock == PAGECACHE_LOCK_LEFT_UNLOCKED) &&
(pin == PAGECACHE_UNPIN)));
+ DBUG_ASSERT(block->hash_link->block == block);
switch (lock) {
case PAGECACHE_LOCK_WRITE: /* free -> write */
@@ -2479,17 +2649,13 @@ static my_bool make_lock_and_pin(PAGECACHE *pagecache,
DBUG_ASSERT(0); /* Never should happened */
}
-#ifndef DBUG_OFF
- if (block)
- PCBLOCK_INFO(block);
-#endif
+ PCBLOCK_INFO(block);
DBUG_RETURN(0);
retry:
DBUG_PRINT("INFO", ("Retry block 0x%lx", (ulong)block));
PCBLOCK_INFO(block);
DBUG_ASSERT(block->hash_link->requests > 0);
block->hash_link->requests--;
- PCBLOCK_INFO(block);
DBUG_RETURN(1);
}
@@ -2521,7 +2687,6 @@ static void read_block(PAGECACHE *pagecache,
PAGECACHE_BLOCK_LINK *block,
my_bool primary)
{
-
DBUG_ENTER("read_block");
DBUG_PRINT("enter", ("read block: 0x%lx primary: %d",
(ulong)block, primary));
@@ -2534,19 +2699,20 @@ static void read_block(PAGECACHE *pagecache,
*/
pagecache->global_cache_read++;
- /* Page is not in buffer yet, is to be read from disk */
- mysql_mutex_unlock(&pagecache->cache_lock);
/*
+ Page is not in buffer yet, is to be read from disk
Here other threads may step in and register as secondary readers.
They will register in block->wqueue[COND_FOR_REQUESTED].
*/
+ pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
error= pagecache_fread(pagecache, &block->hash_link->file,
block->buffer,
block->hash_link->pageno,
pagecache->readwrite_flags);
- mysql_mutex_lock(&pagecache->cache_lock);
+ pagecache_pthread_mutex_lock(&pagecache->cache_lock);
if (error)
{
+ DBUG_ASSERT(maria_in_recovery || !maria_assert_if_crashed_table);
block->status|= PCBLOCK_ERROR;
block->error= (int16) my_errno;
my_debug_put_break_here();
@@ -2583,9 +2749,10 @@ static void read_block(PAGECACHE *pagecache,
wqueue_add_to_queue(&block->wqueue[COND_FOR_REQUESTED], thread);
do
{
- DBUG_PRINT("read_block: wait",
- ("suspend thread %ld", thread->id));
- mysql_cond_wait(&thread->suspend, &pagecache->cache_lock);
+ DBUG_PRINT("wait",
+ ("suspend thread %s %ld", thread->name, thread->id));
+ pagecache_pthread_cond_wait(&thread->suspend,
+ &pagecache->cache_lock);
}
while (thread->next);
DBUG_PRINT("read_block",
@@ -2675,10 +2842,9 @@ void pagecache_unlock(PAGECACHE *pagecache,
page_cache_page_pin_str[pin]));
/* we do not allow any lock/pin increasing here */
DBUG_ASSERT(pin != PAGECACHE_PIN);
- DBUG_ASSERT(lock != PAGECACHE_LOCK_READ);
- DBUG_ASSERT(lock != PAGECACHE_LOCK_WRITE);
+ DBUG_ASSERT(lock != PAGECACHE_LOCK_READ && lock != PAGECACHE_LOCK_WRITE);
- mysql_mutex_lock(&pagecache->cache_lock);
+ pagecache_pthread_mutex_lock(&pagecache->cache_lock);
/*
As soon as we keep lock cache can be used, and we have lock because want
to unlock.
@@ -2687,7 +2853,7 @@ void pagecache_unlock(PAGECACHE *pagecache,
inc_counter_for_resize_op(pagecache);
/* See NOTE for pagecache_unlock about registering requests */
- block= find_block(pagecache, file, pageno, 0, 0,
+ block= find_block(pagecache, file, pageno, 0, 0, 0,
pin == PAGECACHE_PIN_LEFT_UNPINNED, &page_st);
PCBLOCK_INFO(block);
DBUG_ASSERT(block != 0 && page_st == PAGE_READ);
@@ -2738,7 +2904,7 @@ void pagecache_unlock(PAGECACHE *pagecache,
dec_counter_for_resize_op(pagecache);
- mysql_mutex_unlock(&pagecache->cache_lock);
+ pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
DBUG_VOID_RETURN;
}
@@ -2767,7 +2933,7 @@ void pagecache_unpin(PAGECACHE *pagecache,
DBUG_ENTER("pagecache_unpin");
DBUG_PRINT("enter", ("fd: %u page: %lu",
(uint) file->file, (ulong) pageno));
- mysql_mutex_lock(&pagecache->cache_lock);
+ pagecache_pthread_mutex_lock(&pagecache->cache_lock);
/*
As soon as we keep lock cache can be used, and we have lock bacause want
aunlock.
@@ -2776,7 +2942,7 @@ void pagecache_unpin(PAGECACHE *pagecache,
inc_counter_for_resize_op(pagecache);
/* See NOTE for pagecache_unlock about registering requests */
- block= find_block(pagecache, file, pageno, 0, 0, 0, &page_st);
+ block= find_block(pagecache, file, pageno, 0, 0, 0, 0, &page_st);
DBUG_ASSERT(block != 0);
DBUG_ASSERT(page_st == PAGE_READ);
/* we can't unpin such page without unlock */
@@ -2805,7 +2971,7 @@ void pagecache_unpin(PAGECACHE *pagecache,
dec_counter_for_resize_op(pagecache);
- mysql_mutex_unlock(&pagecache->cache_lock);
+ pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
DBUG_VOID_RETURN;
}
@@ -2856,13 +3022,13 @@ void pagecache_unlock_by_link(PAGECACHE *pagecache,
DBUG_ASSERT(pin != PAGECACHE_PIN_LEFT_UNPINNED);
DBUG_ASSERT(lock != PAGECACHE_LOCK_READ);
DBUG_ASSERT(lock != PAGECACHE_LOCK_WRITE);
- mysql_mutex_lock(&pagecache->cache_lock);
+ pagecache_pthread_mutex_lock(&pagecache->cache_lock);
if (pin == PAGECACHE_PIN_LEFT_UNPINNED &&
lock == PAGECACHE_LOCK_READ_UNLOCK)
{
if (make_lock_and_pin(pagecache, block, lock, pin, FALSE))
DBUG_ASSERT(0); /* should not happend */
- mysql_mutex_unlock(&pagecache->cache_lock);
+ pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
DBUG_VOID_RETURN;
}
@@ -2931,7 +3097,7 @@ void pagecache_unlock_by_link(PAGECACHE *pagecache,
dec_counter_for_resize_op(pagecache);
- mysql_mutex_unlock(&pagecache->cache_lock);
+ pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
DBUG_VOID_RETURN;
}
@@ -2960,7 +3126,7 @@ void pagecache_unpin_by_link(PAGECACHE *pagecache,
(uint) block->hash_link->file.file,
(ulong) block->hash_link->pageno));
- mysql_mutex_lock(&pagecache->cache_lock);
+ pagecache_pthread_mutex_lock(&pagecache->cache_lock);
/*
As soon as we keep lock cache can be used, and we have lock because want
unlock.
@@ -2993,7 +3159,7 @@ void pagecache_unpin_by_link(PAGECACHE *pagecache,
dec_counter_for_resize_op(pagecache);
- mysql_mutex_unlock(&pagecache->cache_lock);
+ pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
DBUG_VOID_RETURN;
}
@@ -3207,10 +3373,10 @@ restart:
uint status;
int page_st;
- mysql_mutex_lock(&pagecache->cache_lock);
+ pagecache_pthread_mutex_lock(&pagecache->cache_lock);
if (!pagecache->can_be_used)
{
- mysql_mutex_unlock(&pagecache->cache_lock);
+ pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
goto no_key_cache;
}
@@ -3220,7 +3386,7 @@ restart:
reg_request= ((new_pin == PAGECACHE_PIN_LEFT_UNPINNED) ||
(new_pin == PAGECACHE_PIN));
block= find_block(pagecache, file, pageno, level,
- lock == PAGECACHE_LOCK_WRITE,
+ lock == PAGECACHE_LOCK_WRITE, buff != 0,
reg_request, &page_st);
DBUG_PRINT("info", ("Block type: %s current type %s",
page_cache_page_type_str[block->type],
@@ -3262,7 +3428,7 @@ restart:
*/
if (reg_request)
unreg_request(pagecache, block, 1);
- mysql_mutex_unlock(&pagecache->cache_lock);
+ pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
DBUG_PRINT("info", ("restarting..."));
goto restart;
}
@@ -3273,8 +3439,7 @@ restart:
buff= block->buffer;
/* possibly we will write here (resolved on unlock) */
if ((lock == PAGECACHE_LOCK_WRITE ||
- lock == PAGECACHE_LOCK_LEFT_WRITELOCKED) &&
- !(block->status & PCBLOCK_CHANGED))
+ lock == PAGECACHE_LOCK_LEFT_WRITELOCKED))
{
block->status|= PCBLOCK_DIRECT_W;
DBUG_PRINT("info", ("Set PCBLOCK_DIRECT_W for block: 0x%lx",
@@ -3283,10 +3448,10 @@ restart:
}
else
{
- if (!(status & PCBLOCK_ERROR))
+ if (status & PCBLOCK_READ)
{
#if !defined(SERIALIZED_READ_FROM_CACHE)
- mysql_mutex_unlock(&pagecache->cache_lock);
+ pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
#endif
DBUG_ASSERT((pagecache->block_size & 511) == 0);
@@ -3294,10 +3459,10 @@ restart:
memcpy(buff, block->buffer, pagecache->block_size);
#if !defined(SERIALIZED_READ_FROM_CACHE)
- mysql_mutex_lock(&pagecache->cache_lock);
+ pagecache_pthread_mutex_lock(&pagecache->cache_lock);
#endif
}
- else
+ if (status & PCBLOCK_ERROR)
my_errno= block->error;
}
@@ -3307,7 +3472,10 @@ restart:
if (make_lock_and_pin(pagecache, block,
lock_to_read[lock].unlock_lock,
unlock_pin, FALSE))
+ {
DBUG_ASSERT(0);
+ return (uchar*) 0;
+ }
}
/*
Link the block into the LRU chain if it's the last submitted request
@@ -3322,7 +3490,7 @@ restart:
dec_counter_for_resize_op(pagecache);
- mysql_mutex_unlock(&pagecache->cache_lock);
+ pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
if (status & PCBLOCK_ERROR)
{
@@ -3347,6 +3515,31 @@ no_key_cache: /* Key cache is not used */
/*
+ @brief Set/reset flag that page always should be flushed on delete
+
+ @param pagecache pointer to a page cache data structure
+ @param link direct link to page (returned by read or write)
+ @param write write on delete flag value
+
+*/
+
+void pagecache_set_write_on_delete_by_link(PAGECACHE_BLOCK_LINK *block)
+{
+ DBUG_ENTER("pagecache_set_write_on_delete_by_link");
+ DBUG_PRINT("enter", ("fd: %d block 0x%lx %d -> TRUE",
+ block->hash_link->file.file,
+ (ulong) block,
+ (int) block->status & PCBLOCK_DEL_WRITE));
+ DBUG_ASSERT(block->pins); /* should be pinned */
+ DBUG_ASSERT(block->wlocks); /* should be write locked */
+
+ block->status|= PCBLOCK_DEL_WRITE;
+
+ DBUG_VOID_RETURN;
+}
+
+
+/*
@brief Delete page from the buffer (common part for link and file/page)
@param pagecache pointer to a page cache data structure
@@ -3365,27 +3558,36 @@ static my_bool pagecache_delete_internal(PAGECACHE *pagecache,
my_bool flush)
{
my_bool error= 0;
+ if (block->status & PCBLOCK_IN_FLUSH)
+ {
+ /*
+ this call is just 'hint' for the cache to free the page so we will
+ not interferes with flushing process but must return success
+ */
+ goto out;
+ }
if (block->status & PCBLOCK_CHANGED)
{
+ flush= (flush || (block->status & PCBLOCK_DEL_WRITE));
if (flush)
{
/* The block contains a dirty page - push it out of the cache */
KEYCACHE_DBUG_PRINT("find_block", ("block is dirty"));
- mysql_mutex_unlock(&pagecache->cache_lock);
/*
The call is thread safe because only the current
thread might change the block->hash_link value
*/
DBUG_ASSERT(block->pins == 1);
+ pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
error= pagecache_fwrite(pagecache,
&block->hash_link->file,
block->buffer,
block->hash_link->pageno,
block->type,
pagecache->readwrite_flags);
- mysql_mutex_lock(&pagecache->cache_lock);
+ pagecache_pthread_mutex_lock(&pagecache->cache_lock);
pagecache->global_cache_write++;
if (error)
@@ -3393,7 +3595,26 @@ static my_bool pagecache_delete_internal(PAGECACHE *pagecache,
block->status|= PCBLOCK_ERROR;
block->error= (int16) my_errno;
my_debug_put_break_here();
- goto err;
+ goto out;
+ }
+ }
+ else
+ {
+ PAGECACHE_FILE *filedesc= &block->hash_link->file;
+ /* We are not going to write the page but have to call callbacks */
+ DBUG_PRINT("info", ("flush_callback :0x%lx"
+ "write_callback: 0x%lx data: 0x%lx",
+ (ulong) filedesc->flush_log_callback,
+ (ulong) filedesc->write_callback,
+ (ulong) filedesc->callback_data));
+ if ((*filedesc->flush_log_callback)
+ (block->buffer, block->hash_link->pageno, filedesc->callback_data) ||
+ (*filedesc->write_callback)
+ (block->buffer, block->hash_link->pageno, filedesc->callback_data))
+ {
+ DBUG_PRINT("error", ("flush or write callback problem"));
+ error= 1;
+ goto out;
}
}
pagecache->blocks_changed--;
@@ -3410,10 +3631,19 @@ static my_bool pagecache_delete_internal(PAGECACHE *pagecache,
DBUG_ASSERT(0);
DBUG_ASSERT(block->hash_link->requests > 0);
page_link->requests--;
- /* See NOTE for pagecache_unlock about registering requests. */
+ /* See NOTE for pagecache_unlock() about registering requests. */
free_block(pagecache, block);
+ dec_counter_for_resize_op(pagecache);
+ return 0;
-err:
+out:
+ /* Cache is locked, so we can relese page before freeing it */
+ if (make_lock_and_pin(pagecache, block,
+ PAGECACHE_LOCK_WRITE_UNLOCK,
+ PAGECACHE_UNPIN, FALSE))
+ DBUG_ASSERT(0);
+ page_link->requests--;
+ unreg_request(pagecache, block, 1);
dec_counter_for_resize_op(pagecache);
return error;
}
@@ -3454,7 +3684,7 @@ my_bool pagecache_delete_by_link(PAGECACHE *pagecache,
if (pagecache->can_be_used)
{
- mysql_mutex_lock(&pagecache->cache_lock);
+ pagecache_pthread_mutex_lock(&pagecache->cache_lock);
if (!pagecache->can_be_used)
goto end;
@@ -3464,6 +3694,8 @@ my_bool pagecache_delete_by_link(PAGECACHE *pagecache,
*/
DBUG_ASSERT((block->status &
(PCBLOCK_IN_SWITCH | PCBLOCK_REASSIGNED)) == 0);
+
+ inc_counter_for_resize_op(pagecache);
/*
make_lock_and_pin() can't fail here, because we are keeping pin on the
block and it can't be evicted (which is cause of lock fail and retry)
@@ -3480,7 +3712,7 @@ my_bool pagecache_delete_by_link(PAGECACHE *pagecache,
error= pagecache_delete_internal(pagecache, block, block->hash_link,
flush);
end:
- mysql_mutex_unlock(&pagecache->cache_lock);
+ pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
}
DBUG_RETURN(error);
@@ -3571,7 +3803,7 @@ restart:
reg1 PAGECACHE_BLOCK_LINK *block;
PAGECACHE_HASH_LINK **unused_start, *page_link;
- mysql_mutex_lock(&pagecache->cache_lock);
+ pagecache_pthread_mutex_lock(&pagecache->cache_lock);
if (!pagecache->can_be_used)
goto end;
@@ -3580,7 +3812,8 @@ restart:
if (!page_link)
{
DBUG_PRINT("info", ("There is no such page in the cache"));
- mysql_mutex_unlock(&pagecache->cache_lock);
+ dec_counter_for_resize_op(pagecache);
+ pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
DBUG_RETURN(0);
}
block= page_link->block;
@@ -3592,12 +3825,12 @@ restart:
"reassigned" : "in switch")));
PCBLOCK_INFO(block);
page_link->requests--;
+ dec_counter_for_resize_op(pagecache);
goto end;
}
/* See NOTE for pagecache_unlock about registering requests. */
if (pin == PAGECACHE_PIN)
reg_requests(pagecache, block, 1);
- DBUG_ASSERT(block != 0);
if (make_lock_and_pin(pagecache, block, lock, pin, FALSE))
{
/*
@@ -3606,7 +3839,7 @@ restart:
*/
if (pin == PAGECACHE_PIN)
unreg_request(pagecache, block, 1);
- mysql_mutex_unlock(&pagecache->cache_lock);
+ pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
DBUG_PRINT("info", ("restarting..."));
goto restart;
}
@@ -3616,7 +3849,7 @@ restart:
error= pagecache_delete_internal(pagecache, block, page_link, flush);
end:
- mysql_mutex_unlock(&pagecache->cache_lock);
+ pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
}
DBUG_RETURN(error);
@@ -3763,27 +3996,30 @@ restart:
int page_st;
my_bool need_page_ready_signal= FALSE;
- mysql_mutex_lock(&pagecache->cache_lock);
+ pagecache_pthread_mutex_lock(&pagecache->cache_lock);
if (!pagecache->can_be_used)
{
- mysql_mutex_unlock(&pagecache->cache_lock);
+ pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
goto no_key_cache;
}
inc_counter_for_resize_op(pagecache);
pagecache->global_cache_w_requests++;
- /* See NOTE for pagecache_unlock about registering requests. */
+ /*
+ Here we register a request if the page was not already pinned.
+ See NOTE for pagecache_unlock about registering requests.
+ */
reg_request= ((pin == PAGECACHE_PIN_LEFT_UNPINNED) ||
(pin == PAGECACHE_PIN));
block= find_block(pagecache, file, pageno, level,
- TRUE,
+ TRUE, FALSE,
reg_request, &page_st);
if (!block)
{
DBUG_ASSERT(write_mode != PAGECACHE_WRITE_DONE);
/* It happens only for requests submitted during resize operation */
dec_counter_for_resize_op(pagecache);
- mysql_mutex_unlock(&pagecache->cache_lock);
+ pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
/* Write to the disk key cache is in resize at the moment*/
goto no_key_cache;
}
@@ -3827,7 +4063,7 @@ restart:
*/
if (reg_request)
unreg_request(pagecache, block, 1);
- mysql_mutex_unlock(&pagecache->cache_lock);
+ pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
DBUG_PRINT("info", ("restarting..."));
goto restart;
}
@@ -3911,7 +4147,10 @@ restart:
block->hash_link->requests--;
/* See NOTE for pagecache_unlock about registering requests. */
if (pin == PAGECACHE_PIN_LEFT_UNPINNED || pin == PAGECACHE_UNPIN)
+ {
unreg_request(pagecache, block, 1);
+ DBUG_ASSERT(page_link == &fake_link);
+ }
else
*page_link= block;
@@ -3923,7 +4162,7 @@ restart:
dec_counter_for_resize_op(pagecache);
- mysql_mutex_unlock(&pagecache->cache_lock);
+ pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
goto end;
}
@@ -3987,11 +4226,13 @@ end:
static void free_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block)
{
+ uint status= block->status;
KEYCACHE_THREAD_TRACE("free block");
KEYCACHE_DBUG_PRINT("free_block",
("block: %u hash_link 0x%lx",
PCBLOCK_NUMBER(pagecache, block),
(long) block->hash_link));
+ mysql_mutex_assert_owner(&pagecache->cache_lock);
if (block->hash_link)
{
/*
@@ -4010,27 +4251,44 @@ static void free_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block)
DBUG_ASSERT(block->rlocks == 0);
DBUG_ASSERT(block->rlocks_queue == 0);
DBUG_ASSERT(block->pins == 0);
+ DBUG_ASSERT((block->status & ~(PCBLOCK_ERROR | PCBLOCK_READ | PCBLOCK_IN_FLUSH | PCBLOCK_CHANGED | PCBLOCK_REASSIGNED | PCBLOCK_DEL_WRITE)) == 0);
+ DBUG_ASSERT(block->requests >= 1);
+ DBUG_ASSERT(block->next_used == NULL);
block->status= 0;
#ifndef DBUG_OFF
block->type= PAGECACHE_EMPTY_PAGE;
#endif
block->rec_lsn= LSN_MAX;
+ block->hash_link= NULL;
+ if (block->temperature == PCBLOCK_WARM)
+ pagecache->warm_blocks--;
+ block->temperature= PCBLOCK_COLD;
KEYCACHE_THREAD_TRACE("free block");
KEYCACHE_DBUG_PRINT("free_block",
("block is freed"));
unreg_request(pagecache, block, 0);
- block->hash_link= NULL;
- /* Remove the free block from the LRU ring. */
- unlink_block(pagecache, block);
- if (block->temperature == PCBLOCK_WARM)
- pagecache->warm_blocks--;
- block->temperature= PCBLOCK_COLD;
- /* Insert the free block in the free list. */
- block->next_used= pagecache->free_block_list;
- pagecache->free_block_list= block;
- /* Keep track of the number of currently unused blocks. */
- pagecache->blocks_unused++;
+ /*
+ Block->requests is != 0 if unreg_requests()/link_block() gave the block
+ to a waiting thread
+ */
+ if (!block->requests)
+ {
+ DBUG_ASSERT(block->next_used != 0);
+
+ /* Remove the free block from the LRU ring. */
+ unlink_block(pagecache, block);
+ /* Insert the free block in the free list. */
+ block->next_used= pagecache->free_block_list;
+ pagecache->free_block_list= block;
+ /* Keep track of the number of currently unused blocks. */
+ pagecache->blocks_unused++;
+ }
+ else
+ {
+ /* keep flag set by link_block() */
+ block->status= status & PCBLOCK_REASSIGNED;
+ }
/* All pending requests for this page must be resubmitted. */
if (block->wqueue[COND_FOR_SAVED].last_thread)
@@ -4078,14 +4336,14 @@ static int flush_cached_blocks(PAGECACHE *pagecache,
*first_errno= 0;
/* Don't lock the cache during the flush */
- mysql_mutex_unlock(&pagecache->cache_lock);
+ pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
/*
As all blocks referred in 'cache' are marked by PCBLOCK_IN_FLUSH
we are guaranteed that no thread will change them
*/
qsort((uchar*) cache, count, sizeof(*cache), (qsort_cmp) cmp_sec_link);
- mysql_mutex_lock(&pagecache->cache_lock);
+ pagecache_pthread_mutex_lock(&pagecache->cache_lock);
for (; cache != end; cache++)
{
PAGECACHE_BLOCK_LINK *block= *cache;
@@ -4120,16 +4378,13 @@ static int flush_cached_blocks(PAGECACHE *pagecache,
PAGECACHE_LOCK_READ, PAGECACHE_PIN, FALSE))
DBUG_ASSERT(0);
- KEYCACHE_DBUG_PRINT("flush_cached_blocks",
- ("block: %u (0x%lx) to be flushed",
- PCBLOCK_NUMBER(pagecache, block), (ulong)block));
- DBUG_PRINT("info", ("block: %u (0x%lx) to be flushed",
+ KEYCACHE_PRINT("flush_cached_blocks",
+ ("block: %u (0x%lx) to be flushed",
+ PCBLOCK_NUMBER(pagecache, block), (ulong)block));
+ DBUG_PRINT("info", ("block: %u (0x%lx) to be flushed",
PCBLOCK_NUMBER(pagecache, block), (ulong)block));
PCBLOCK_INFO(block);
- mysql_mutex_unlock(&pagecache->cache_lock);
- DBUG_PRINT("info", ("block: %u (0x%lx) pins: %u",
- PCBLOCK_NUMBER(pagecache, block), (ulong)block,
- block->pins));
+
/**
@todo IO If page is contiguous with next page to flush, group flushes
in one single my_pwrite().
@@ -4140,12 +4395,13 @@ static int flush_cached_blocks(PAGECACHE *pagecache,
content (see StaleFilePointersInFlush in ma_checkpoint.c).
@todo change argument of functions to be File.
*/
+ pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
error= pagecache_fwrite(pagecache, &block->hash_link->file,
block->buffer,
block->hash_link->pageno,
block->type,
pagecache->readwrite_flags);
- mysql_mutex_lock(&pagecache->cache_lock);
+ pagecache_pthread_mutex_lock(&pagecache->cache_lock);
if (make_lock_and_pin(pagecache, block,
PAGECACHE_LOCK_READ_UNLOCK,
@@ -4285,9 +4541,10 @@ static int flush_pagecache_blocks_int(PAGECACHE *pagecache,
wqueue_add_to_queue(&other_flusher->flush_queue, thread);
do
{
- KEYCACHE_DBUG_PRINT("flush_pagecache_blocks_int: wait1",
- ("suspend thread %ld", thread->id));
- mysql_cond_wait(&thread->suspend,
+ DBUG_PRINT("wait",
+ ("(1) suspend thread %s %ld",
+ thread->name, thread->id));
+ pagecache_pthread_cond_wait(&thread->suspend,
&pagecache->cache_lock);
}
while (thread->next);
@@ -4307,11 +4564,11 @@ static int flush_pagecache_blocks_int(PAGECACHE *pagecache,
and thus require a table check.
*/
DBUG_ASSERT(0);
- mysql_mutex_unlock(&pagecache->cache_lock);
+ pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
if (my_thread_var->abort)
DBUG_RETURN(1); /* End if aborted by user */
sleep(10);
- mysql_mutex_lock(&pagecache->cache_lock);
+ pagecache_pthread_mutex_lock(&pagecache->cache_lock);
}
if (type != FLUSH_IGNORE_CHANGED)
@@ -4330,6 +4587,7 @@ static int flush_pagecache_blocks_int(PAGECACHE *pagecache,
KEYCACHE_DBUG_ASSERT(count<= pagecache->blocks_used);
}
}
+ count++; /* Allocate one extra for easy end-of-buffer test */
/* Allocate a new buffer only if its bigger than the one we have */
if (count > FLUSH_CACHE &&
!(cache=
@@ -4367,22 +4625,24 @@ restart:
DBUG_ASSERT(filter_res == FLUSH_FILTER_OK);
}
{
+ DBUG_ASSERT(!(block->status & PCBLOCK_IN_FLUSH));
/*
- Mark the block with BLOCK_IN_FLUSH in order not to let
- other threads to use it for new pages and interfere with
- our sequence of flushing dirty file pages
+ We care only for the blocks for which flushing was not
+ initiated by other threads as a result of page swapping
*/
- block->status|= PCBLOCK_IN_FLUSH;
-
if (! (block->status & PCBLOCK_IN_SWITCH))
{
- /*
- We care only for the blocks for which flushing was not
- initiated by other threads as a result of page swapping
+ /*
+ Mark the block with BLOCK_IN_FLUSH in order not to let
+ other threads to use it for new pages and interfere with
+ our sequence of flushing dirty file pages
*/
+ block->status|= PCBLOCK_IN_FLUSH;
+
reg_requests(pagecache, block, 1);
if (type != FLUSH_IGNORE_CHANGED)
{
+ *pos++= block;
/* It's not a temporary file */
if (pos == end)
{
@@ -4402,7 +4662,6 @@ restart:
*/
goto restart;
}
- *pos++= block;
}
else
{
@@ -4443,9 +4702,10 @@ restart:
wqueue_add_to_queue(&block->wqueue[COND_FOR_SAVED], thread);
do
{
- KEYCACHE_DBUG_PRINT("flush_pagecache_blocks_int: wait2",
- ("suspend thread %ld", thread->id));
- mysql_cond_wait(&thread->suspend,
+ DBUG_PRINT("wait",
+ ("(2) suspend thread %s %ld",
+ thread->name, thread->id));
+ pagecache_pthread_cond_wait(&thread->suspend,
&pagecache->cache_lock);
}
while (thread->next);
@@ -4539,11 +4799,11 @@ int flush_pagecache_blocks_with_filter(PAGECACHE *pagecache,
if (pagecache->disk_blocks <= 0)
DBUG_RETURN(0);
- mysql_mutex_lock(&pagecache->cache_lock);
+ pagecache_pthread_mutex_lock(&pagecache->cache_lock);
inc_counter_for_resize_op(pagecache);
res= flush_pagecache_blocks_int(pagecache, file, type, filter, filter_arg);
dec_counter_for_resize_op(pagecache);
- mysql_mutex_unlock(&pagecache->cache_lock);
+ pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
DBUG_RETURN(res);
}
@@ -4620,7 +4880,7 @@ my_bool pagecache_collect_changed_blocks_with_lsn(PAGECACHE *pagecache,
We lock the entire cache but will be quick, just reading/writing a few MBs
of memory at most.
*/
- mysql_mutex_lock(&pagecache->cache_lock);
+ pagecache_pthread_mutex_lock(&pagecache->cache_lock);
for (;;)
{
struct st_file_in_flush *other_flusher;
@@ -4647,9 +4907,9 @@ my_bool pagecache_collect_changed_blocks_with_lsn(PAGECACHE *pagecache,
wqueue_add_to_queue(&other_flusher->flush_queue, thread);
do
{
- KEYCACHE_DBUG_PRINT("pagecache_collect_changed_blocks_with_lsn: wait",
- ("suspend thread %ld", thread->id));
- mysql_cond_wait(&thread->suspend,
+ DBUG_PRINT("wait",
+ ("suspend thread %s %ld", thread->name, thread->id));
+ pagecache_pthread_cond_wait(&thread->suspend,
&pagecache->cache_lock);
}
while (thread->next);
@@ -4727,7 +4987,7 @@ my_bool pagecache_collect_changed_blocks_with_lsn(PAGECACHE *pagecache,
}
}
end:
- mysql_mutex_unlock(&pagecache->cache_lock);
+ pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
*min_rec_lsn= minimum_rec_lsn;
DBUG_RETURN(error);
@@ -4791,7 +5051,7 @@ static void pagecache_dump(PAGECACHE *pagecache)
PAGECACHE_PAGE *page;
uint i;
- fprintf(pagecache_dump_file, "thread:%u\n", thread->id);
+ fprintf(pagecache_dump_file, "thread: %s %ld\n", thread->name, thread->id);
i=0;
thread=last=waiting_for_hash_link.last_thread;
@@ -4802,8 +5062,9 @@ static void pagecache_dump(PAGECACHE *pagecache)
thread= thread->next;
page= (PAGECACHE_PAGE *) thread->opt_info;
fprintf(pagecache_dump_file,
- "thread:%u, (file,pageno)=(%u,%lu)\n",
- thread->id,(uint) page->file.file,(ulong) page->pageno);
+ "thread: %s %ld, (file,pageno)=(%u,%lu)\n",
+ thread->name, thread->id,
+ (uint) page->file.file,(ulong) page->pageno);
if (++i == MAX_QUEUE_LEN)
break;
}
@@ -4818,8 +5079,9 @@ static void pagecache_dump(PAGECACHE *pagecache)
thread=thread->next;
hash_link= (PAGECACHE_HASH_LINK *) thread->opt_info;
fprintf(pagecache_dump_file,
- "thread:%u hash_link:%u (file,pageno)=(%u,%lu)\n",
- thread->id, (uint) PAGECACHE_HASH_LINK_NUMBER(pagecache, hash_link),
+ "thread: %s %u hash_link:%u (file,pageno)=(%u,%lu)\n",
+ thread->name, thread->id,
+ (uint) PAGECACHE_HASH_LINK_NUMBER(pagecache, hash_link),
(uint) hash_link->file.file,(ulong) hash_link->pageno);
if (++i == MAX_QUEUE_LEN)
break;
@@ -4848,7 +5110,7 @@ static void pagecache_dump(PAGECACHE *pagecache)
{
thread=thread->next;
fprintf(pagecache_dump_file,
- "thread:%u\n", thread->id);
+ "thread: %s %ld\n", thread->name, thread->id);
if (++i == MAX_QUEUE_LEN)
break;
}
@@ -4878,8 +5140,8 @@ static void pagecache_dump(PAGECACHE *pagecache)
#if defined(PAGECACHE_TIMEOUT) && !defined(__WIN__)
-static int pagecache_pthread_cond_wait(pthread_cond_t *cond,
- pthread_mutex_t *mutex)
+static int pagecache_pthread_cond_wait(mysql_cond_t *cond,
+ mysql_mutex_t *mutex)
{
int rc;
struct timeval now; /* time when we started waiting */
@@ -4906,7 +5168,7 @@ static int pagecache_pthread_cond_wait(pthread_cond_t *cond,
fprintf(pagecache_debug_log, "waiting...\n");
fflush(pagecache_debug_log);
#endif
- rc= pthread_cond_timedwait(cond, mutex, &timeout);
+ rc= mysql_cond_timedwait(cond, mutex, &timeout);
KEYCACHE_THREAD_TRACE_BEGIN("finished waiting");
if (rc == ETIMEDOUT || rc == ETIME)
{
@@ -4927,12 +5189,12 @@ static int pagecache_pthread_cond_wait(pthread_cond_t *cond,
}
#else
#if defined(PAGECACHE_DEBUG)
-static int pagecache_pthread_cond_wait(pthread_cond_t *cond,
- pthread_mutex_t *mutex)
+static int pagecache_pthread_cond_wait(mysql_cond_t *cond,
+ mysql_mutex_t *mutex)
{
int rc;
KEYCACHE_THREAD_TRACE_END("started waiting");
- rc= pthread_cond_wait(cond, mutex);
+ rc= mysql_cond_wait(cond, mutex);
KEYCACHE_THREAD_TRACE_BEGIN("finished waiting");
return rc;
}
@@ -4940,27 +5202,27 @@ static int pagecache_pthread_cond_wait(pthread_cond_t *cond,
#endif /* defined(PAGECACHE_TIMEOUT) && !defined(__WIN__) */
#if defined(PAGECACHE_DEBUG)
-static int ___pagecache_pthread_mutex_lock(pthread_mutex_t *mutex)
+static int ___pagecache_pthread_mutex_lock(mysql_mutex_t *mutex)
{
int rc;
- rc= pthread_mutex_lock(mutex);
+ rc= mysql_mutex_lock(mutex);
KEYCACHE_THREAD_TRACE_BEGIN("");
return rc;
}
-static void ___pagecache_pthread_mutex_unlock(pthread_mutex_t *mutex)
+static void ___pagecache_pthread_mutex_unlock(mysql_mutex_t *mutex)
{
KEYCACHE_THREAD_TRACE_END("");
- pthread_mutex_unlock(mutex);
+ mysql_mutex_unlock(mutex);
}
-static int ___pagecache_pthread_cond_signal(pthread_cond_t *cond)
+static int ___pagecache_pthread_cond_signal(mysql_cond_t *cond)
{
int rc;
KEYCACHE_THREAD_TRACE("signal");
- rc= pthread_cond_signal(cond);
+ rc= mysql_cond_signal(cond);
return rc;
}
diff --git a/storage/maria/ma_pagecache.h b/storage/maria/ma_pagecache.h
index 648f93d5c74..8460eaddc57 100644
--- a/storage/maria/ma_pagecache.h
+++ b/storage/maria/ma_pagecache.h
@@ -173,6 +173,7 @@ typedef struct st_pagecache
my_bool resize_in_flush; /* true during flush of resize operation */
my_bool can_be_used; /* usage of cache for read/write is allowed */
my_bool in_init; /* Set to 1 in MySQL during init/resize */
+ my_bool extra_debug; /* set to 1 if one wants extra logging */
HASH files_in_flush; /**< files in flush_pagecache_blocks_int() */
} PAGECACHE;
@@ -251,6 +252,7 @@ extern void pagecache_unpin(PAGECACHE *pagecache,
extern void pagecache_unpin_by_link(PAGECACHE *pagecache,
PAGECACHE_BLOCK_LINK *link,
LSN lsn);
+extern void pagecache_set_write_on_delete_by_link(PAGECACHE_BLOCK_LINK *block);
/* Results of flush operation (bit field in fact) */
diff --git a/storage/maria/ma_pagecrc.c b/storage/maria/ma_pagecrc.c
index 640bb8880f4..58e3b4b203d 100644
--- a/storage/maria/ma_pagecrc.c
+++ b/storage/maria/ma_pagecrc.c
@@ -355,9 +355,7 @@ my_bool maria_flush_log_for_page(uchar *page,
uchar *data_ptr __attribute__((unused)))
{
LSN lsn;
-#ifndef DBUG_OFF
- const MARIA_SHARE *share= (MARIA_SHARE*) data_ptr;
-#endif
+ MARIA_SHARE *share= (MARIA_SHARE*) data_ptr;
DBUG_ENTER("maria_flush_log_for_page");
/* share is 0 here only in unittest */
DBUG_ASSERT(!share || (share->page_type == PAGECACHE_LSN_PAGE &&
@@ -365,6 +363,12 @@ my_bool maria_flush_log_for_page(uchar *page,
lsn= lsn_korr(page);
if (translog_flush(lsn))
DBUG_RETURN(1);
+ /*
+ Now when log is written, it's safe to incremented 'open' counter for
+ the table so that we know it was not closed properly.
+ */
+ if (share && !share->global_changed)
+ _ma_mark_file_changed_now(share);
DBUG_RETURN(0);
}
diff --git a/storage/maria/ma_panic.c b/storage/maria/ma_panic.c
index f3380e9b68e..8ccb17af81d 100644
--- a/storage/maria/ma_panic.c
+++ b/storage/maria/ma_panic.c
@@ -67,8 +67,8 @@ int maria_panic(enum ha_panic_function flag)
if (info->s->options & HA_OPTION_READ_ONLY_DATA)
break;
#endif
- if (flush_pagecache_blocks(info->s->pagecache, &info->s->kfile,
- FLUSH_RELEASE))
+ if (_ma_flush_table_files(info, MARIA_FLUSH_DATA | MARIA_FLUSH_INDEX,
+ FLUSH_RELEASE, FLUSH_RELEASE))
error=my_errno;
if (info->opt_flag & WRITE_CACHE_USED)
if (flush_io_cache(&info->rec_cache))
@@ -92,8 +92,8 @@ int maria_panic(enum ha_panic_function flag)
if (info->dfile.file >= 0 && mysql_file_close(info->dfile.file, MYF(0)))
error = my_errno;
info->s->kfile.file= info->dfile.file= -1;/* Files aren't open anymore */
- break;
#endif
+ break;
case HA_PANIC_READ: /* Restore to before WRITE */
#ifdef CANT_OPEN_FILES_TWICE
{ /* Open closed files */
diff --git a/storage/maria/ma_recovery.c b/storage/maria/ma_recovery.c
index 4e1e3dd0608..d773c4fc343 100644
--- a/storage/maria/ma_recovery.c
+++ b/storage/maria/ma_recovery.c
@@ -1,5 +1,5 @@
/* Copyright (C) 2006, 2007 MySQL AB
- Copyright (C) 2010 Monty Program Ab
+ Copyright (C) 2010-2011 Monty Program Ab
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -28,6 +28,7 @@
#include "trnman.h"
#include "ma_key_recover.h"
#include "ma_recovery_util.h"
+#include "hash.h"
struct st_trn_for_recovery /* used only in the REDO phase */
{
@@ -58,6 +59,8 @@ static ulonglong now; /**< for tracking execution time of phases */
static void (*save_error_handler_hook)(uint, const char *,myf);
static uint recovery_warnings; /**< count of warnings */
static uint recovery_found_crashed_tables;
+HASH tables_to_redo; /* For maria_read_log */
+ulong maria_recovery_force_crash_counter;
#define prototype_redo_exec_hook(R) \
static int exec_REDO_LOGREC_ ## R(const TRANSLOG_HEADER_BUFFER *rec)
@@ -184,6 +187,21 @@ static void print_preamble()
}
+static my_bool table_is_part_of_recovery_set(LEX_STRING *file_name)
+{
+ uint offset =0;
+ if (!tables_to_redo.records)
+ return 1; /* Default, recover table */
+
+ /* Skip base directory */
+ if (file_name->str[0] == '.' &&
+ (file_name->str[1] == '/' || file_name->str[1] == '\\'))
+ offset= 2;
+ /* Only recover if table is in hash */
+ return my_hash_search(&tables_to_redo, (uchar*) file_name->str + offset,
+ file_name->length - offset) != 0;
+}
+
/**
@brief Recovers from the last checkpoint.
@@ -302,25 +320,32 @@ int maria_apply_log(LSN from_lsn, LSN end_lsn,
skip_DDLs= skip_DDLs_arg;
skipped_undo_phase= 0;
+ trnman_init(max_trid_in_control_file);
+
if (from_lsn == LSN_IMPOSSIBLE)
{
if (last_checkpoint_lsn == LSN_IMPOSSIBLE)
{
from_lsn= translog_first_lsn_in_log();
if (unlikely(from_lsn == LSN_ERROR))
+ {
+ trnman_destroy();
goto err;
+ }
}
else
{
from_lsn= parse_checkpoint_record(last_checkpoint_lsn);
if (from_lsn == LSN_ERROR)
+ {
+ trnman_destroy();
goto err;
+ }
}
}
- now= my_getsystime();
+ now= microsecond_interval_timer();
in_redo_phase= TRUE;
- trnman_init(max_trid_in_control_file);
if (run_redo_phase(from_lsn, end_lsn, apply))
{
ma_message_no_user(0, "Redo phase failed");
@@ -349,10 +374,10 @@ int maria_apply_log(LSN from_lsn, LSN end_lsn,
in_redo_phase= FALSE;
old_now= now;
- now= my_getsystime();
+ now= microsecond_interval_timer();
if (recovery_message_printed == REC_MSG_REDO)
{
- double phase_took= (now - old_now)/10000000.0;
+ double phase_took= (now - old_now)/1000000.0;
/*
Detailed progress info goes to stderr, because ma_message_no_user()
cannot put several messages on one line.
@@ -418,10 +443,10 @@ int maria_apply_log(LSN from_lsn, LSN end_lsn,
}
old_now= now;
- now= my_getsystime();
+ now= microsecond_interval_timer();
if (recovery_message_printed == REC_MSG_UNDO)
{
- double phase_took= (now - old_now)/10000000.0;
+ double phase_took= (now - old_now)/1000000.0;
procent_printed= 1;
fprintf(stderr, " (%.1f seconds); ", phase_took);
fflush(stderr);
@@ -438,10 +463,10 @@ int maria_apply_log(LSN from_lsn, LSN end_lsn,
}
old_now= now;
- now= my_getsystime();
+ now= microsecond_interval_timer();
if (recovery_message_printed == REC_MSG_FLUSH)
{
- double phase_took= (now - old_now)/10000000.0;
+ double phase_took= (now - old_now)/1000000.0;
procent_printed= 1;
fprintf(stderr, " (%.1f seconds); ", phase_took);
fflush(stderr);
@@ -625,6 +650,7 @@ static void new_transaction(uint16 sid, TrID long_id, LSN undo_lsn,
prototype_redo_exec_hook_dummy(CHECKPOINT)
{
/* the only checkpoint we care about was found via control file, ignore */
+ tprint(tracef, "CHECKPOINT found\n");
return 0;
}
@@ -1276,6 +1302,22 @@ prototype_redo_exec_hook(FILE_ID)
{
tprint(tracef, " Closing table '%s'\n", info->s->open_file_name.str);
prepare_table_for_close(info, rec->lsn);
+
+ /*
+ Ensure that open count is 1 on close. This is needed as the
+ table may initially had an open_count > 0 when we initially
+ opened it as the server may have crashed without closing it
+ properly. As we now have applied all redo's for the table up to
+ now, we know the table is ok, so it's safe to reset the open
+ count to 0.
+ */
+ if (info->s->state.open_count != 0 && info->s->reopen == 1)
+ {
+ /* let ma_close() mark the table properly closed */
+ info->s->state.open_count= 1;
+ info->s->global_changed= 1;
+ info->s->changed= 1;
+ }
if (maria_close(info))
{
eprint(tracef, "Failed to close table");
@@ -1645,8 +1687,8 @@ prototype_redo_exec_hook(REDO_FREE_BLOCKS)
}
buff= log_record_buffer.str;
- if (_ma_apply_redo_free_blocks(info, current_group_end_lsn,
- buff + FILEID_STORE_SIZE))
+ if (_ma_apply_redo_free_blocks(info, current_group_end_lsn, rec->lsn,
+ buff))
goto end;
error= 0;
end:
@@ -2907,6 +2949,12 @@ static int run_undo_phase(uint uncommitted)
translog_free_record_header(&rec);
}
+ /* Force a crash to test recovery of recovery */
+ if (maria_recovery_force_crash_counter)
+ {
+ DBUG_ASSERT(--maria_recovery_force_crash_counter > 0);
+ }
+
if (trnman_rollback_trn(trn))
DBUG_RETURN(1);
/* We could want to span a few threads (4?) instead of 1 */
@@ -3017,10 +3065,11 @@ static MARIA_HA *get_MARIA_HA_from_REDO_record(const
page= page_korr(rec->header + FILEID_STORE_SIZE);
llstr(page, llbuf);
break;
+ case LOGREC_REDO_FREE_BLOCKS:
/*
- For REDO_FREE_BLOCKS, no need to look at dirty pages list: it does not
- read data pages, only reads/modifies bitmap page(s) which is cheap.
+ We are checking against the dirty pages in _ma_apply_redo_free_blocks()
*/
+ break;
default:
break;
}
@@ -3038,6 +3087,12 @@ static MARIA_HA *get_MARIA_HA_from_REDO_record(const
share= info->s;
tprint(tracef, ", '%s'", share->open_file_name.str);
DBUG_ASSERT(in_redo_phase);
+ if (!table_is_part_of_recovery_set(&share->open_file_name))
+ {
+ tprint(tracef, ", skipped by user\n");
+ return NULL;
+ }
+
if (cmp_translog_addr(rec->lsn, share->lsn_of_file_id) <= 0)
{
/*
@@ -3071,7 +3126,6 @@ static MARIA_HA *get_MARIA_HA_from_REDO_record(const
REDO_INSERT_ROW_BLOBS will consult list by itself, as it covers several
pages.
*/
- tprint(tracef, " page %s", llbuf);
if (_ma_redo_not_needed_for_page(sid, rec->lsn, page,
index_page_redo_entry))
return NULL;
@@ -3108,6 +3162,13 @@ static MARIA_HA *get_MARIA_HA_from_UNDO_record(const
}
share= info->s;
tprint(tracef, ", '%s'", share->open_file_name.str);
+
+ if (!table_is_part_of_recovery_set(&share->open_file_name))
+ {
+ tprint(tracef, ", skipped by user\n");
+ return NULL;
+ }
+
if (cmp_translog_addr(rec->lsn, share->lsn_of_file_id) <= 0)
{
tprint(tracef, ", table's LOGREC_FILE_ID has LSN (%lu,0x%lx) more recent"
@@ -3383,13 +3444,20 @@ static int close_all_tables(void)
*/
if (info->s->state.open_count != 0)
{
- /* let ma_close() mark the table properly closed */
+ /* let maria_close() mark the table properly closed */
info->s->state.open_count= 1;
info->s->global_changed= 1;
+ info->s->changed= 1;
}
prepare_table_for_close(info, addr);
error|= maria_close(info);
mysql_mutex_lock(&THR_LOCK_maria);
+
+ /* Force a crash to test recovery of recovery */
+ if (maria_recovery_force_crash_counter)
+ {
+ DBUG_ASSERT(--maria_recovery_force_crash_counter > 0);
+ }
}
end:
mysql_mutex_unlock(&THR_LOCK_maria);
@@ -3464,7 +3532,7 @@ void _ma_tmp_disable_logging_for_table(MARIA_HA *info,
/*
Reset state pointers. This is needed as in ALTER table we may do
- commit fllowed by _ma_renable_logging_for_table and then
+ commit followed by _ma_renable_logging_for_table and then
info->state may point to a state that was deleted by
_ma_trnman_end_trans_hook()
*/
diff --git a/storage/maria/ma_recovery.h b/storage/maria/ma_recovery.h
index 0bfcdd17d39..45dba0e86b3 100644
--- a/storage/maria/ma_recovery.h
+++ b/storage/maria/ma_recovery.h
@@ -30,4 +30,7 @@ int maria_apply_log(LSN lsn, LSN lsn_end, enum maria_apply_log_way apply,
FILE *trace_file,
my_bool execute_undo_phase, my_bool skip_DDLs,
my_bool take_checkpoints, uint *warnings_count);
+/* Table of tables to recover */
+extern HASH tables_to_redo;
+extern ulong maria_recovery_force_crash_counter;
C_MODE_END
diff --git a/storage/maria/ma_recovery_util.c b/storage/maria/ma_recovery_util.c
index 53f3c2f94b1..57cb5724561 100644
--- a/storage/maria/ma_recovery_util.c
+++ b/storage/maria/ma_recovery_util.c
@@ -59,9 +59,11 @@ void tprint(FILE *trace_file __attribute__ ((unused)),
va_list args;
#ifndef DBUG_OFF
{
- char buff[1024];
+ char buff[1024], *end;
va_start(args, format);
vsnprintf(buff, sizeof(buff)-1, format, args);
+ if (*(end= strend(buff)) == '\n')
+ *end= 0; /* Don't print end \n */
DBUG_PRINT("info", ("%s", buff));
va_end(args);
}
@@ -129,16 +131,20 @@ my_bool _ma_redo_not_needed_for_page(uint16 shortid, LSN lsn,
Next 2 bytes: table's short id
Next 5 bytes: page number
*/
+ char llbuf[22];
uint64 file_and_page_id=
(((uint64)((index << 16) | shortid)) << 40) | page;
struct st_dirty_page *dirty_page= (struct st_dirty_page *)
my_hash_search(&all_dirty_pages,
(uchar *)&file_and_page_id, sizeof(file_and_page_id));
- DBUG_PRINT("info", ("in dirty pages list: %d", dirty_page != NULL));
+ DBUG_PRINT("info", ("page %lld in dirty pages list: %d",
+ (ulonglong) page,
+ dirty_page != NULL));
if ((dirty_page == NULL) ||
cmp_translog_addr(lsn, dirty_page->rec_lsn) < 0)
{
- tprint(tracef, ", ignoring because of dirty_pages list\n");
+ tprint(tracef, ", ignoring page %s because of dirty_pages list\n",
+ llstr((ulonglong) page, llbuf));
return TRUE;
}
}
diff --git a/storage/maria/ma_rkey.c b/storage/maria/ma_rkey.c
index 3df7f1b9941..06db57dfab7 100644
--- a/storage/maria/ma_rkey.c
+++ b/storage/maria/ma_rkey.c
@@ -34,7 +34,7 @@ int maria_rkey(MARIA_HA *info, uchar *buf, int inx, const uchar *key_data,
HA_KEYSEG *last_used_keyseg;
uint32 nextflag;
MARIA_KEY key;
- int icp_res= 1;
+ ICP_RESULT icp_res= ICP_MATCH;
DBUG_ENTER("maria_rkey");
DBUG_PRINT("enter", ("base: 0x%lx buf: 0x%lx inx: %d search_flag: %d",
(long) info, (long) buf, inx, search_flag));
@@ -44,7 +44,7 @@ int maria_rkey(MARIA_HA *info, uchar *buf, int inx, const uchar *key_data,
info->update&= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED);
info->last_key_func= search_flag;
- keyinfo= share->keyinfo + inx;
+ keyinfo= info->last_key.keyinfo;
key_buff= info->lastkey_buff+info->s->base.max_key_length;
@@ -83,17 +83,17 @@ int maria_rkey(MARIA_HA *info, uchar *buf, int inx, const uchar *key_data,
mysql_rwlock_rdlock(&keyinfo->root_lock);
nextflag= maria_read_vec[search_flag] | key.flag;
- if (search_flag != HA_READ_KEY_EXACT ||
- ((keyinfo->flag & (HA_NOSAME | HA_NULL_PART)) != HA_NOSAME))
+ if (search_flag != HA_READ_KEY_EXACT)
+ {
+ /* Assume we will get a read next/previous call after this one */
nextflag|= SEARCH_SAVE_BUFF;
-
+ }
switch (keyinfo->key_alg) {
#ifdef HAVE_RTREE_KEYS
case HA_KEY_ALG_RTREE:
if (maria_rtree_find_first(info, &key, nextflag) < 0)
{
- maria_print_error(info->s, HA_ERR_CRASHED);
- my_errno= HA_ERR_CRASHED;
+ _ma_set_fatal_error(share, HA_ERR_CRASHED);
info->cur_row.lastpos= HA_OFFSET_ERROR;
}
break;
@@ -103,8 +103,6 @@ int maria_rkey(MARIA_HA *info, uchar *buf, int inx, const uchar *key_data,
if (!_ma_search(info, &key, nextflag, info->s->state.key_root[inx]))
{
MARIA_KEY lastkey;
- lastkey.keyinfo= keyinfo;
- lastkey.data= info->lastkey_buff;
/*
Found a key, but it might not be usable. We cannot use rows that
are inserted by other threads after we got our table lock
@@ -116,7 +114,7 @@ int maria_rkey(MARIA_HA *info, uchar *buf, int inx, const uchar *key_data,
not satisfied with an out-of-range condition.
*/
if ((*share->row_is_visible)(info) &&
- ((icp_res= ma_check_index_cond(info, inx, buf)) != 0))
+ ((icp_res= ma_check_index_cond(info, inx, buf)) != ICP_NO_MATCH))
break;
/* The key references a concurrently inserted record. */
@@ -129,6 +127,8 @@ int maria_rkey(MARIA_HA *info, uchar *buf, int inx, const uchar *key_data,
break;
}
+ lastkey.keyinfo= keyinfo;
+ lastkey.data= info->lastkey_buff;
do
{
uint not_used[2];
@@ -144,6 +144,18 @@ int maria_rkey(MARIA_HA *info, uchar *buf, int inx, const uchar *key_data,
if (_ma_search_next(info, &lastkey, maria_readnext_vec[search_flag],
info->s->state.key_root[inx]))
break; /* purecov: inspected */
+
+ /*
+ If we are at the last key on the key page, allow writers to
+ access the index.
+ */
+ if (info->int_keypos >= info->int_maxpos &&
+ ma_yield_and_check_if_killed(info, inx))
+ {
+ DBUG_ASSERT(info->cur_row.lastpos == HA_OFFSET_ERROR);
+ break;
+ }
+
/*
Check that the found key does still match the search.
_ma_search_next() delivers the next key regardless of its
@@ -163,15 +175,19 @@ int maria_rkey(MARIA_HA *info, uchar *buf, int inx, const uchar *key_data,
} while (!(*share->row_is_visible)(info) ||
((icp_res= ma_check_index_cond(info, inx, buf)) == 0));
}
+ else
+ {
+ DBUG_ASSERT(info->cur_row.lastpos);
+ }
}
if (share->lock_key_trees)
mysql_rwlock_unlock(&keyinfo->root_lock);
- if (info->cur_row.lastpos == HA_OFFSET_ERROR || (icp_res != 1))
+ if (info->cur_row.lastpos == HA_OFFSET_ERROR)
{
- if (icp_res == 2)
+ if (icp_res == ICP_OUT_OF_RANGE)
{
- info->cur_row.lastpos= HA_OFFSET_ERROR;
+ /* We don't want HA_ERR_END_OF_FILE in this particular case */
my_errno= HA_ERR_KEY_NOT_FOUND;
}
fast_ma_writeinfo(info);
@@ -213,3 +229,37 @@ err:
info->update|=HA_STATE_NEXT_FOUND; /* Previous gives last row */
DBUG_RETURN(my_errno);
} /* _ma_rkey */
+
+
+/*
+ Yield to possible other writers during a index scan.
+ Check also if we got killed by the user and if yes, return
+ HA_ERR_LOCK_WAIT_TIMEOUT
+
+ return 0 ok
+ return 1 Query has been requested to be killed
+*/
+
+my_bool ma_yield_and_check_if_killed(MARIA_HA *info, int inx)
+{
+ MARIA_SHARE *share;
+ if (ma_killed(info))
+ {
+ /* purecov: begin tested */
+ /* Mark that we don't have an active row */
+ info->cur_row.lastpos= HA_OFFSET_ERROR;
+ /* Set error that we where aborted by kill from application */
+ my_errno= HA_ERR_ABORTED_BY_USER;
+ return 1;
+ /* purecov: end */
+ }
+
+ if ((share= info->s)->lock_key_trees)
+ {
+ /* Give writers a chance to access index */
+ mysql_rwlock_unlock(&share->keyinfo[inx].root_lock);
+ mysql_rwlock_rdlock(&share->keyinfo[inx].root_lock);
+ }
+ return 0;
+}
+
diff --git a/storage/maria/ma_rnext.c b/storage/maria/ma_rnext.c
index 9142921dbb5..d3fab041d75 100644
--- a/storage/maria/ma_rnext.c
+++ b/storage/maria/ma_rnext.c
@@ -30,7 +30,8 @@ int maria_rnext(MARIA_HA *info, uchar *buf, int inx)
uint flag;
MARIA_SHARE *share= info->s;
MARIA_KEYDEF *keyinfo;
- int icp_res= 1;
+ ICP_RESULT icp_res= ICP_MATCH;
+ uint update_mask= HA_STATE_NEXT_FOUND;
DBUG_ENTER("maria_rnext");
if ((inx = _ma_check_index(info,inx)) < 0)
@@ -62,6 +63,20 @@ int maria_rnext(MARIA_HA *info, uchar *buf, int inx)
error= _ma_search_first(info, keyinfo, share->state.key_root[inx]);
break;
}
+ /*
+ "search first" failed. This means we have no pivot for
+ "search next", or in other words MI_INFO::lastkey is
+ likely uninitialized.
+
+ Normally SQL layer would never request "search next" if
+ "search first" failed. But HANDLER may do anything.
+
+ As mi_rnext() without preceeding mi_rkey()/mi_rfirst()
+ equals to mi_rfirst(), we must restore original state
+ as if failing mi_rfirst() was not called.
+ */
+ if (error)
+ update_mask|= HA_STATE_PREV_FOUND;
}
else
{
@@ -92,8 +107,20 @@ int maria_rnext(MARIA_HA *info, uchar *buf, int inx)
if (!error)
{
while (!(*share->row_is_visible)(info) ||
- ((icp_res= ma_check_index_cond(info, inx, buf)) == 0))
+ ((icp_res= ma_check_index_cond(info, inx, buf)) == ICP_NO_MATCH))
{
+ /*
+ If we are at the last key on the key page, allow writers to
+ access the index.
+ */
+ if (info->int_keypos >= info->int_maxpos &&
+ ma_yield_and_check_if_killed(info, inx))
+ {
+ /* my_errno is set by ma_yield_and_check_if_killed() */
+ error= 1;
+ break;
+ }
+
/* Skip rows inserted by other threads since we got a lock */
if ((error= _ma_search_next(info, &info->last_key,
SEARCH_BIGGER,
@@ -106,18 +133,17 @@ int maria_rnext(MARIA_HA *info, uchar *buf, int inx)
/* Don't clear if database-changed */
info->update&= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED);
- info->update|= HA_STATE_NEXT_FOUND;
+ info->update|= update_mask;
- if (icp_res == 2)
- my_errno=HA_ERR_END_OF_FILE; /* got beyond the end of scanned range */
-
- if (error || icp_res != 1)
+ if (error || icp_res != ICP_MATCH)
{
+ fast_ma_writeinfo(info);
if (my_errno == HA_ERR_KEY_NOT_FOUND)
- my_errno=HA_ERR_END_OF_FILE;
+ my_errno= HA_ERR_END_OF_FILE;
}
else if (!buf)
{
+ fast_ma_writeinfo(info);
DBUG_RETURN(info->cur_row.lastpos == HA_OFFSET_ERROR ? my_errno : 0);
}
else if (!(*info->read_record)(info, buf, info->cur_row.lastpos))
diff --git a/storage/maria/ma_rnext_same.c b/storage/maria/ma_rnext_same.c
index 5822e8787e1..353d06adaf4 100644
--- a/storage/maria/ma_rnext_same.c
+++ b/storage/maria/ma_rnext_same.c
@@ -30,7 +30,7 @@ int maria_rnext_same(MARIA_HA *info, uchar *buf)
int error;
uint inx,not_used[2];
MARIA_KEYDEF *keyinfo;
- int icp_res= 1;
+ ICP_RESULT icp_res= ICP_MATCH;
DBUG_ENTER("maria_rnext_same");
if ((int) (inx= info->lastinx) < 0 ||
@@ -80,9 +80,19 @@ int maria_rnext_same(MARIA_HA *info, uchar *buf)
info->cur_row.lastpos= HA_OFFSET_ERROR;
break;
}
+ /*
+ If we are at the last key on the key page, allow writers to
+ access the index.
+ */
+ if (info->int_keypos >= info->int_maxpos &&
+ ma_yield_and_check_if_killed(info, inx))
+ {
+ error= 1;
+ break;
+ }
/* Skip rows that are inserted by other threads since we got a lock */
if ((info->s->row_is_visible)(info) &&
- ((icp_res= ma_check_index_cond(info, inx, buf)) != 0))
+ ((icp_res= ma_check_index_cond(info, inx, buf)) != ICP_NO_MATCH))
break;
}
}
@@ -92,16 +102,15 @@ int maria_rnext_same(MARIA_HA *info, uchar *buf)
info->update&= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED);
info->update|= HA_STATE_NEXT_FOUND | HA_STATE_RNEXT_SAME;
- if (icp_res == 2)
- my_errno=HA_ERR_END_OF_FILE; /* got beyond the end of scanned range */
-
- if (error || icp_res != 1)
+ if (error || icp_res != ICP_MATCH)
{
+ fast_ma_writeinfo(info);
if (my_errno == HA_ERR_KEY_NOT_FOUND)
- my_errno=HA_ERR_END_OF_FILE;
+ my_errno= HA_ERR_END_OF_FILE;
}
else if (!buf)
{
+ fast_ma_writeinfo(info);
DBUG_RETURN(info->cur_row.lastpos == HA_OFFSET_ERROR ? my_errno : 0);
}
else if (!(*info->read_record)(info, buf, info->cur_row.lastpos))
diff --git a/storage/maria/ma_rprev.c b/storage/maria/ma_rprev.c
index f64e875c2ba..f4d25c0f676 100644
--- a/storage/maria/ma_rprev.c
+++ b/storage/maria/ma_rprev.c
@@ -28,6 +28,7 @@ int maria_rprev(MARIA_HA *info, uchar *buf, int inx)
register uint flag;
MARIA_SHARE *share= info->s;
MARIA_KEYDEF *keyinfo;
+ ICP_RESULT icp_res= ICP_MATCH;
DBUG_ENTER("maria_rprev");
if ((inx = _ma_check_index(info,inx)) < 0)
@@ -55,8 +56,24 @@ int maria_rprev(MARIA_HA *info, uchar *buf, int inx)
if (!error)
{
- while (!(*share->row_is_visible)(info))
+ my_off_t cur_keypage= info->last_keypage;
+ while (!(*share->row_is_visible)(info) ||
+ ((icp_res= ma_check_index_cond(info, inx, buf)) == ICP_NO_MATCH))
{
+ /*
+ If we are at the last (i.e. first?) key on the key page,
+ allow writers to access the index.
+ */
+ if (info->last_keypage != cur_keypage)
+ {
+ cur_keypage= info->last_keypage;
+ if (ma_yield_and_check_if_killed(info, inx))
+ {
+ error= 1;
+ break;
+ }
+ }
+
/* Skip rows that are inserted by other threads since we got a lock */
if ((error= _ma_search_next(info, &info->last_key,
SEARCH_SMALLER,
@@ -68,13 +85,16 @@ int maria_rprev(MARIA_HA *info, uchar *buf, int inx)
mysql_rwlock_unlock(&keyinfo->root_lock);
info->update&= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED);
info->update|= HA_STATE_PREV_FOUND;
- if (error)
+
+ if (error || icp_res != ICP_MATCH)
{
+ fast_ma_writeinfo(info);
if (my_errno == HA_ERR_KEY_NOT_FOUND)
- my_errno=HA_ERR_END_OF_FILE;
+ my_errno= HA_ERR_END_OF_FILE;
}
else if (!buf)
{
+ fast_ma_writeinfo(info);
DBUG_RETURN(info->cur_row.lastpos == HA_OFFSET_ERROR ? my_errno : 0);
}
else if (!(*info->read_record)(info, buf, info->cur_row.lastpos))
diff --git a/storage/maria/ma_rsame.c b/storage/maria/ma_rsame.c
index c20faf965b4..0f29cb71370 100644
--- a/storage/maria/ma_rsame.c
+++ b/storage/maria/ma_rsame.c
@@ -19,7 +19,7 @@
Find current row with read on position or read on key
@notes
- If inx >= 0 find record using key
+ If inx >= 0 find record using key else re-read row on last position
@warning
This function is not row version safe.
@@ -29,6 +29,7 @@
@retval 0 Ok
@retval HA_ERR_KEY_NOT_FOUND Row is deleted
@retval HA_ERR_END_OF_FILE End of file
+ @retval HA_ERR_WRONG_INDEX Wrong inx argument
*/
@@ -36,10 +37,10 @@ int maria_rsame(MARIA_HA *info, uchar *record, int inx)
{
DBUG_ENTER("maria_rsame");
- if (inx != -1 && ! maria_is_key_active(info->s->state.key_map, inx))
+ if (inx >= 0 && _ma_check_index(info, inx) < 0)
{
DBUG_PRINT("error", ("wrong index usage"));
- DBUG_RETURN(my_errno=HA_ERR_WRONG_INDEX);
+ DBUG_RETURN(my_errno);
}
if (info->cur_row.lastpos == HA_OFFSET_ERROR ||
info->update & HA_STATE_DELETED)
@@ -55,8 +56,7 @@ int maria_rsame(MARIA_HA *info, uchar *record, int inx)
if (inx >= 0)
{
- MARIA_KEYDEF *keyinfo= info->s->keyinfo + inx;
- info->lastinx= inx;
+ MARIA_KEYDEF *keyinfo= info->last_key.keyinfo;
(*keyinfo->make_key)(info, &info->last_key, (uint) inx,
info->lastkey_buff, record,
info->cur_row.lastpos,
diff --git a/storage/maria/ma_rt_index.c b/storage/maria/ma_rt_index.c
index 8feac7711e2..2c2090bf343 100644
--- a/storage/maria/ma_rt_index.c
+++ b/storage/maria/ma_rt_index.c
@@ -134,7 +134,6 @@ static int maria_rtree_find_req(MARIA_HA *info, MARIA_KEYDEF *keyinfo,
tmp_key.data_length= key_data_length;
info->cur_row.lastpos= _ma_row_pos_from_key(&tmp_key);
- info->last_key.keyinfo= keyinfo;
info->last_key.data_length= key_data_length;
info->last_key.ref_length= share->base.rec_reflength;
info->last_key.flag= 0;
diff --git a/storage/maria/ma_rt_split.c b/storage/maria/ma_rt_split.c
index 856edc60490..6f32a60c073 100644
--- a/storage/maria/ma_rt_split.c
+++ b/storage/maria/ma_rt_split.c
@@ -544,8 +544,7 @@ int maria_rtree_split_page(const MARIA_KEY *key, MARIA_PAGE *page,
}
DBUG_PRINT("rtree", ("split new block: %lu", (ulong) *new_page_offs));
- my_afree(new_page);
-
+ my_afree(new_page_buff);
split_err:
my_afree(coord_buf);
DBUG_RETURN(err_code);
diff --git a/storage/maria/ma_rt_test.c b/storage/maria/ma_rt_test.c
index 4c0ffcf72b1..29244bab6ce 100644
--- a/storage/maria/ma_rt_test.c
+++ b/storage/maria/ma_rt_test.c
@@ -93,9 +93,10 @@ static enum data_file_type record_type= DYNAMIC_RECORD;
int main(int argc, char *argv[])
{
+ char buff[FN_REFLEN];
MY_INIT(argv[0]);
- get_options(argc, argv);
maria_data_root= (char *)".";
+ get_options(argc, argv);
/* Maria requires that we always have a page cache */
if (maria_init() ||
(init_pagecache(maria_pagecache, maria_block_size * 16, 0, 0,
@@ -113,7 +114,7 @@ int main(int argc, char *argv[])
exit(1);
}
- exit(run_test("rt_test"));
+ exit(run_test(fn_format(buff, "test1", maria_data_root, "", MYF(0))));
}
@@ -614,6 +615,8 @@ static struct my_option my_long_options[] =
#endif
{"help", '?', "Display help and exit",
0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"datadir", 'h', "Path to the database root.", &maria_data_root,
+ &maria_data_root, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
{"row-fixed-size", 'S', "Fixed size records",
0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
{"rows-in-block", 'M', "Store rows in block format",
diff --git a/storage/maria/ma_search.c b/storage/maria/ma_search.c
index 9e5513f388b..5dd0296e17b 100644
--- a/storage/maria/ma_search.c
+++ b/storage/maria/ma_search.c
@@ -38,12 +38,18 @@ int _ma_check_index(MARIA_HA *info, int inx)
if (info->lastinx != inx) /* Index changed */
{
info->lastinx = inx;
+ info->last_key.keyinfo= info->s->keyinfo + inx;
+ info->last_key.flag= 0;
info->page_changed=1;
info->update= ((info->update & (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED)) |
HA_STATE_NEXT_FOUND | HA_STATE_PREV_FOUND);
}
- if (info->opt_flag & WRITE_CACHE_USED && flush_io_cache(&info->rec_cache))
+ if ((info->opt_flag & WRITE_CACHE_USED) && flush_io_cache(&info->rec_cache))
+ {
+ if (unlikely(!my_errno))
+ my_errno= HA_ERR_INTERNAL_ERROR; /* Impossible */
return(-1);
+ }
return(inx);
} /* _ma_check_index */
@@ -95,6 +101,7 @@ int _ma_search(register MARIA_HA *info, MARIA_KEY *key, uint32 nextflag,
@note
Position to row is stored in info->lastpos
+ Last used key is stored in info->last_key
@return
@retval 0 ok (key found)
@@ -120,6 +127,7 @@ static int _ma_search_no_save(register MARIA_HA *info, MARIA_KEY *key,
(ulong) (pos / info->s->block_size),
nextflag, (ulong) info->cur_row.lastpos));
DBUG_EXECUTE("key", _ma_print_key(DBUG_FILE, key););
+ DBUG_ASSERT(info->last_key.keyinfo == key->keyinfo);
if (pos == HA_OFFSET_ERROR)
{
@@ -141,7 +149,11 @@ static int _ma_search_no_save(register MARIA_HA *info, MARIA_KEY *key,
flag= (*keyinfo->bin_search)(key, &page, nextflag, &keypos, lastkey,
&last_key_not_used);
if (flag == MARIA_FOUND_WRONG_KEY)
- DBUG_RETURN(-1);
+ {
+ maria_print_error(info->s, HA_ERR_CRASHED);
+ my_errno= HA_ERR_CRASHED;
+ goto err;
+ }
page_flag= page.flag;
used_length= page.size;
nod_flag= page.node;
@@ -180,7 +192,6 @@ static int _ma_search_no_save(register MARIA_HA *info, MARIA_KEY *key,
}
}
- info->last_key.keyinfo= keyinfo;
if ((nextflag & (SEARCH_SMALLER | SEARCH_LAST)) && flag != 0)
{
uint not_used[2];
@@ -372,8 +383,7 @@ int _ma_seq_search(const MARIA_KEY *key, const MARIA_PAGE *ma_page,
length=(*keyinfo->get_key)(&tmp_key, page_flag, nod_flag, &page);
if (length == 0 || page > end)
{
- maria_print_error(share, HA_ERR_CRASHED);
- my_errno=HA_ERR_CRASHED;
+ _ma_set_fatal_error(share, HA_ERR_CRASHED);
DBUG_PRINT("error",
("Found wrong key: length: %u page: 0x%lx end: 0x%lx",
length, (long) page, (long) end));
@@ -555,8 +565,7 @@ int _ma_prefix_search(const MARIA_KEY *key, const MARIA_PAGE *ma_page,
if (page > end)
{
- maria_print_error(share, HA_ERR_CRASHED);
- my_errno=HA_ERR_CRASHED;
+ _ma_set_fatal_error(share, HA_ERR_CRASHED);
DBUG_PRINT("error",
("Found wrong key: length: %u page: 0x%lx end: %lx",
length, (long) page, (long) end));
@@ -785,6 +794,7 @@ MARIA_RECORD_POS _ma_row_pos_from_key(const MARIA_KEY *key)
case 4: pos= (my_off_t) mi_uint4korr(after_key); break;
case 3: pos= (my_off_t) mi_uint3korr(after_key); break;
case 2: pos= (my_off_t) mi_uint2korr(after_key); break;
+ case 0: /* NO_RECORD */
default:
pos=0L; /* Shut compiler up */
}
@@ -894,6 +904,7 @@ void _ma_dpointer(MARIA_SHARE *share, uchar *buff, my_off_t pos)
case 4: mi_int4store(buff,pos); break;
case 3: mi_int3store(buff,pos); break;
case 2: mi_int2store(buff,(uint) pos); break;
+ case 0: break; /* For NO_RECORD */
default: abort(); /* Impossible */
}
} /* _ma_dpointer */
@@ -1036,8 +1047,7 @@ uint _ma_get_pack_key(MARIA_KEY *int_key, uint page_flag,
{
if (length > (uint) keyseg->length)
{
- maria_print_error(keyinfo->share, HA_ERR_CRASHED);
- my_errno=HA_ERR_CRASHED;
+ _ma_set_fatal_error(keyinfo->share, HA_ERR_CRASHED);
return 0; /* Error */
}
if (length == 0) /* Same key */
@@ -1052,8 +1062,7 @@ uint _ma_get_pack_key(MARIA_KEY *int_key, uint page_flag,
("Found too long null packed key: %u of %u at 0x%lx",
length, keyseg->length, (long) *page_pos));
DBUG_DUMP("key", *page_pos, 16);
- maria_print_error(keyinfo->share, HA_ERR_CRASHED);
- my_errno=HA_ERR_CRASHED;
+ _ma_set_fatal_error(keyinfo->share, HA_ERR_CRASHED);
return 0;
}
continue;
@@ -1110,8 +1119,7 @@ uint _ma_get_pack_key(MARIA_KEY *int_key, uint page_flag,
DBUG_PRINT("error",("Found too long packed key: %u of %u at 0x%lx",
length, keyseg->length, (long) *page_pos));
DBUG_DUMP("key", *page_pos, 16);
- maria_print_error(keyinfo->share, HA_ERR_CRASHED);
- my_errno=HA_ERR_CRASHED;
+ _ma_set_fatal_error(keyinfo->share, HA_ERR_CRASHED);
return 0; /* Error */
}
store_key_length_inc(key,length);
@@ -1270,8 +1278,7 @@ uint _ma_get_binary_pack_key(MARIA_KEY *int_key, uint page_flag, uint nod_flag,
("Found too long binary packed key: %u of %u at 0x%lx",
length, keyinfo->maxlength, (long) *page_pos));
DBUG_DUMP("key", *page_pos, 16);
- maria_print_error(keyinfo->share, HA_ERR_CRASHED);
- my_errno=HA_ERR_CRASHED;
+ _ma_set_fatal_error(keyinfo->share, HA_ERR_CRASHED);
DBUG_RETURN(0); /* Wrong key */
}
/* Key is packed against prev key, take prefix from prev key. */
@@ -1362,8 +1369,7 @@ uint _ma_get_binary_pack_key(MARIA_KEY *int_key, uint page_flag, uint nod_flag,
if (from_end != page_end)
{
DBUG_PRINT("error",("Error when unpacking key"));
- maria_print_error(keyinfo->share, HA_ERR_CRASHED);
- my_errno=HA_ERR_CRASHED;
+ _ma_set_fatal_error(keyinfo->share, HA_ERR_CRASHED);
DBUG_RETURN(0); /* Error */
}
}
@@ -1449,8 +1455,7 @@ uchar *_ma_get_key(MARIA_KEY *key, MARIA_PAGE *ma_page, uchar *keypos)
{
if (!(*keyinfo->get_key)(key, page_flag, nod_flag, &page))
{
- maria_print_error(keyinfo->share, HA_ERR_CRASHED);
- my_errno=HA_ERR_CRASHED;
+ _ma_set_fatal_error(keyinfo->share, HA_ERR_CRASHED);
DBUG_RETURN(0);
}
}
@@ -1500,8 +1505,7 @@ static my_bool _ma_get_prev_key(MARIA_KEY *key, MARIA_PAGE *ma_page,
{
if (! (*keyinfo->get_key)(key, page_flag, nod_flag, &page))
{
- maria_print_error(keyinfo->share, HA_ERR_CRASHED);
- my_errno=HA_ERR_CRASHED;
+ _ma_set_fatal_error(keyinfo->share, HA_ERR_CRASHED);
DBUG_RETURN(1);
}
}
@@ -1554,8 +1558,7 @@ uchar *_ma_get_last_key(MARIA_KEY *key, MARIA_PAGE *ma_page, uchar *endpos)
{
DBUG_PRINT("error",("Couldn't find last key: page: 0x%lx",
(long) page));
- maria_print_error(keyinfo->share, HA_ERR_CRASHED);
- my_errno=HA_ERR_CRASHED;
+ _ma_set_fatal_error(keyinfo->share, HA_ERR_CRASHED);
DBUG_RETURN(0);
}
}
@@ -1696,7 +1699,7 @@ int _ma_search_next(register MARIA_HA *info, MARIA_KEY *key,
}
tmp_key.data= lastkey;
- info->last_key.keyinfo= tmp_key.keyinfo= keyinfo;
+ tmp_key.keyinfo= keyinfo;
if (nextflag & SEARCH_BIGGER) /* Next key */
{
@@ -1778,8 +1781,6 @@ int _ma_search_first(MARIA_HA *info, MARIA_KEYDEF *keyinfo,
first_pos= page.buff + share->keypage_header + page.node;
} while ((pos= _ma_kpos(page.node, first_pos)) != HA_OFFSET_ERROR);
- info->last_key.keyinfo= keyinfo;
-
if (!(*keyinfo->get_key)(&info->last_key, page.flag, page.node, &first_pos))
DBUG_RETURN(-1); /* Crashed */
@@ -1830,8 +1831,6 @@ int _ma_search_last(MARIA_HA *info, MARIA_KEYDEF *keyinfo,
end_of_page= page.buff + page.size;
} while ((pos= _ma_kpos(page.node, end_of_page)) != HA_OFFSET_ERROR);
- info->last_key.keyinfo= keyinfo;
-
if (!_ma_get_last_key(&info->last_key, &page, end_of_page))
DBUG_RETURN(-1);
info->cur_row.lastpos= _ma_row_pos_from_key(&info->last_key);
diff --git a/storage/maria/ma_sort.c b/storage/maria/ma_sort.c
index 0a4259d0cb3..88e82d647a8 100644
--- a/storage/maria/ma_sort.c
+++ b/storage/maria/ma_sort.c
@@ -191,6 +191,9 @@ int _ma_create_index_by_sort(MARIA_SORT_PARAM *info, my_bool no_messages,
&tempfile,&tempfile_for_exceptions))
== HA_POS_ERROR)
goto err; /* purecov: tested */
+
+ info->sort_info->param->stage++; /* Merge stage */
+
if (maxbuffer == 0)
{
if (!no_messages)
@@ -275,12 +278,13 @@ static ha_rows find_all_keys(MARIA_SORT_PARAM *info, uint keys,
idx=error=0;
sort_keys[0]= (uchar*) (sort_keys+keys);
+ info->sort_info->info->in_check_table= 1;
while (!(error=(*info->key_read)(info,sort_keys[idx])))
{
if (info->real_key_length > info->key_length)
{
if (write_key(info,sort_keys[idx],tempfile_for_exceptions))
- DBUG_RETURN(HA_POS_ERROR); /* purecov: inspected */
+ goto err; /* purecov: inspected */
continue;
}
@@ -289,7 +293,7 @@ static ha_rows find_all_keys(MARIA_SORT_PARAM *info, uint keys,
if (info->write_keys(info,sort_keys,idx-1,
(BUFFPEK *)alloc_dynamic(buffpek),
tempfile))
- DBUG_RETURN(HA_POS_ERROR); /* purecov: inspected */
+ goto err; /* purecov: inspected */
sort_keys[0]=(uchar*) (sort_keys+keys);
memcpy(sort_keys[0],sort_keys[idx-1],(size_t) info->key_length);
@@ -298,18 +302,23 @@ static ha_rows find_all_keys(MARIA_SORT_PARAM *info, uint keys,
sort_keys[idx]=sort_keys[idx-1]+info->key_length;
}
if (error > 0)
- DBUG_RETURN(HA_POS_ERROR); /* Aborted by get_key */ /* purecov: inspected */
+ goto err; /* purecov: inspected */
if (buffpek->elements)
{
if (info->write_keys(info,sort_keys,idx,(BUFFPEK *)alloc_dynamic(buffpek),
tempfile))
- DBUG_RETURN(HA_POS_ERROR); /* purecov: inspected */
+ goto err; /* purecov: inspected */
*maxbuffer=buffpek->elements-1;
}
else
*maxbuffer=0;
+ info->sort_info->info->in_check_table= 0;
DBUG_RETURN((*maxbuffer)*(keys-1)+idx);
+
+err:
+ info->sort_info->info->in_check_table= 0; /* purecov: inspected */
+ DBUG_RETURN(HA_POS_ERROR); /* purecov: inspected */
} /* find_all_keys */
@@ -761,6 +770,8 @@ static int write_index(MARIA_SORT_PARAM *info,
if ((*info->key_write)(info, *sort_keys++))
DBUG_RETURN(-1); /* purecov: inspected */
}
+ if (info->sort_info->param->max_stage != 1) /* If not parallel */
+ _ma_report_progress(info->sort_info->param, 1, 1);
DBUG_RETURN(0);
} /* write_index */
@@ -771,7 +782,7 @@ static int merge_many_buff(MARIA_SORT_PARAM *info, uint keys,
uchar **sort_keys, BUFFPEK *buffpek,
int *maxbuffer, IO_CACHE *t_file)
{
- register int i;
+ int tmp, merges, max_merges;
IO_CACHE t_file2, *from_file, *to_file, *temp;
BUFFPEK *lastbuff;
DBUG_ENTER("merge_many_buff");
@@ -783,9 +794,21 @@ static int merge_many_buff(MARIA_SORT_PARAM *info, uint keys,
DISK_BUFFER_SIZE, info->sort_info->param->myf_rw))
DBUG_RETURN(1); /* purecov: inspected */
+ /* Calculate how many merges are needed */
+ max_merges= 1; /* Count merge_index */
+ tmp= *maxbuffer;
+ while (tmp >= MERGEBUFF2)
+ {
+ merges= (tmp-MERGEBUFF*3/2 + 1) / MERGEBUFF + 1;
+ max_merges+= merges;
+ tmp= merges;
+ }
+ merges= 0;
+
from_file= t_file ; to_file= &t_file2;
while (*maxbuffer >= MERGEBUFF2)
{
+ int i;
reinit_io_cache(from_file,READ_CACHE,0L,0,0);
reinit_io_cache(to_file,WRITE_CACHE,0L,0,0);
lastbuff=buffpek;
@@ -794,6 +817,8 @@ static int merge_many_buff(MARIA_SORT_PARAM *info, uint keys,
if (merge_buffers(info,keys,from_file,to_file,sort_keys,lastbuff++,
buffpek+i,buffpek+i+MERGEBUFF-1))
goto cleanup;
+ if (info->sort_info->param->max_stage != 1) /* If not parallel */
+ _ma_report_progress(info->sort_info->param, merges++, max_merges);
}
if (merge_buffers(info,keys,from_file,to_file,sort_keys,lastbuff++,
buffpek+i,buffpek+ *maxbuffer))
@@ -802,6 +827,8 @@ static int merge_many_buff(MARIA_SORT_PARAM *info, uint keys,
break; /* purecov: inspected */
temp=from_file; from_file=to_file; to_file=temp;
*maxbuffer= (int) (lastbuff-buffpek)-1;
+ if (info->sort_info->param->max_stage != 1) /* If not parallel */
+ _ma_report_progress(info->sort_info->param, merges++, max_merges);
}
cleanup:
close_cached_file(to_file); /* This holds old result */
@@ -1058,6 +1085,8 @@ merge_index(MARIA_SORT_PARAM *info, uint keys, uchar **sort_keys,
if (merge_buffers(info,keys,tempfile,(IO_CACHE*) 0,sort_keys,buffpek,buffpek,
buffpek+maxbuffer))
DBUG_RETURN(1); /* purecov: inspected */
+ if (info->sort_info->param->max_stage != 1) /* If not parallel */
+ _ma_report_progress(info->sort_info->param, 1, 1);
DBUG_RETURN(0);
} /* merge_index */
diff --git a/storage/maria/ma_static.c b/storage/maria/ma_static.c
index 19f7cfa4ea2..a075459d389 100644
--- a/storage/maria/ma_static.c
+++ b/storage/maria/ma_static.c
@@ -38,12 +38,17 @@ my_bool maria_delay_key_write= 0, maria_page_checksums= 1;
my_bool maria_inited= FALSE;
my_bool maria_in_ha_maria= FALSE; /* If used from ha_maria or not */
my_bool maria_recovery_changed_data= 0, maria_recovery_verbose= 0;
+my_bool maria_assert_if_crashed_table= 0;
+my_bool maria_checkpoint_disabled= 0;
+
mysql_mutex_t THR_LOCK_maria;
#ifdef DONT_USE_RW_LOCKS
ulong maria_concurrent_insert= 0;
#else
+/* Do concurrent inserts at file end or in old holes */
ulong maria_concurrent_insert= 2;
#endif
+
my_off_t maria_max_temp_length= MAX_FILE_SIZE;
ulong maria_bulk_insert_tree_size=8192*1024;
ulong maria_data_pointer_size= 4;
@@ -107,6 +112,7 @@ static int always_valid(const char *filename __attribute__((unused)))
}
int (*maria_test_invalid_symlink)(const char *filename)= always_valid;
+my_bool (*ma_killed)(MARIA_HA *)= ma_killed_standalone;
#ifdef HAVE_PSI_INTERFACE
@@ -138,7 +144,6 @@ PSI_thread_key key_thread_checkpoint, key_thread_find_all_keys,
key_thread_soft_sync;
PSI_file_key key_file_translog, key_file_kfile, key_file_dfile,
- key_file_control;
+ key_file_control, key_file_tmp;
#endif /* HAVE_PSI_INTERFACE */
-
diff --git a/storage/maria/ma_statrec.c b/storage/maria/ma_statrec.c
index e085821b9d0..89a5a30f490 100644
--- a/storage/maria/ma_statrec.c
+++ b/storage/maria/ma_statrec.c
@@ -294,6 +294,6 @@ int _ma_read_rnd_static_record(MARIA_HA *info, uchar *buf,
}
/* my_errno should be set if rec_cache.error == -1 */
if (info->rec_cache.error != -1 || my_errno == 0)
- my_errno=HA_ERR_WRONG_IN_RECORD;
+ _ma_set_fatal_error(share, HA_ERR_WRONG_IN_RECORD);
DBUG_RETURN(my_errno); /* Something wrong (EOF?) */
}
diff --git a/storage/maria/ma_test1.c b/storage/maria/ma_test1.c
index cb83116a7cd..945654a0bbe 100644
--- a/storage/maria/ma_test1.c
+++ b/storage/maria/ma_test1.c
@@ -70,12 +70,13 @@ extern int _ma_flush_table_files(MARIA_HA *info, uint flush_data_or_index,
int main(int argc,char *argv[])
{
+ char buff[FN_REFLEN];
#ifdef SAFE_MUTEX
safe_mutex_deadlock_detector= 1;
#endif
MY_INIT(argv[0]);
- get_options(argc,argv);
maria_data_root= (char *)".";
+ get_options(argc,argv);
/* Maria requires that we always have a page cache */
if (maria_init() ||
(init_pagecache(maria_pagecache, maria_block_size * 16, 0, 0,
@@ -95,7 +96,7 @@ int main(int argc,char *argv[])
if (opt_versioning)
init_thr_lock();
- exit(run_test("test1"));
+ exit(run_test(fn_format(buff, "test1", maria_data_root, "", MYF(0))));
}
@@ -409,6 +410,10 @@ static int run_test(const char *filename)
if (!silent)
printf("- Reading rows with key\n");
record[1]= 0; /* For nicer printf */
+
+ if (record_type == NO_RECORD)
+ maria_extra(file, HA_EXTRA_KEYREAD, 0);
+
for (i=0 ; i <= 25 ; i++)
{
create_key(key,i);
@@ -422,9 +427,15 @@ static int run_test(const char *filename)
(int) key_length,key+offset_to_key,error,my_errno,record+1);
}
}
+ if (record_type == NO_RECORD)
+ {
+ maria_extra(file, HA_EXTRA_NO_KEYREAD, 0);
+ goto end;
+ }
if (!silent)
printf("- Reading rows with position\n");
+
if (maria_scan_init(file))
{
fprintf(stderr, "maria_scan_init failed\n");
@@ -724,6 +735,8 @@ static struct my_option my_long_options[] =
{"debug", '#', "Undocumented",
0, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
#endif
+ {"datadir", 'h', "Path to the database root.", &maria_data_root,
+ &maria_data_root, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
{"delete-rows", 'd', "Abort after this many rows has been deleted",
(uchar**) &remove_count, (uchar**) &remove_count, 0, GET_UINT, REQUIRED_ARG,
1000, 0, 0, 0, 0, 0},
@@ -757,6 +770,8 @@ static struct my_option my_long_options[] =
0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
{"rows-in-block", 'M', "Store rows in block format",
0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"rows-no-data", 'n', "Don't store any data, only keys",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
{"row-pointer-size", 'R', "Undocumented", (uchar**) &rec_pointer_size,
(uchar**) &rec_pointer_size, 0, GET_INT, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
{"silent", 's', "Undocumented",
@@ -816,6 +831,9 @@ get_one_option(int optid, const struct my_option *opt __attribute__((unused)),
case 'M':
record_type= BLOCK_RECORD;
break;
+ case 'n':
+ record_type= NO_RECORD;
+ break;
case 'S':
if (key_field == FIELD_VARCHAR)
{
@@ -887,6 +905,10 @@ static void get_options(int argc, char *argv[])
exit(ho_error);
if (transactional)
record_type= BLOCK_RECORD;
+ if (record_type == NO_RECORD)
+ skip_update= skip_delete= 1;
+
+
return;
} /* get options */
diff --git a/storage/maria/ma_test2.c b/storage/maria/ma_test2.c
index 5fa27d331ba..5d0882f3fcb 100644
--- a/storage/maria/ma_test2.c
+++ b/storage/maria/ma_test2.c
@@ -69,24 +69,25 @@ int main(int argc, char *argv[])
MARIA_KEYDEF keyinfo[10];
MARIA_COLUMNDEF recinfo[10];
MARIA_INFO info;
- const char *filename;
char *blob_buffer;
MARIA_CREATE_INFO create_info;
+ char filename[FN_REFLEN];
#ifdef SAFE_MUTEX
safe_mutex_deadlock_detector= 1;
#endif
MY_INIT(argv[0]);
- filename= "test2";
+ maria_data_root= (char *)".";
get_options(argc,argv);
+ fn_format(filename, "test2", maria_data_root, "", MYF(0));
+
if (! async_io)
my_disable_async_io=1;
/* If we sync or not have no affect on this test */
my_disable_sync= 1;
- maria_data_root= (char *)".";
/* Maria requires that we always have a page cache */
if (maria_init() ||
(init_pagecache(maria_pagecache, pagecache_size, 0, 0,
@@ -1101,6 +1102,9 @@ static void get_options(int argc, char **argv)
case 'H':
checkpoint= atoi(++pos);
break;
+ case 'h':
+ maria_data_root= ++pos;
+ break;
case 'k':
if ((keys=(uint) atoi(++pos)) < 1 ||
keys > (uint) (MARIA_KEYS-first_key))
diff --git a/storage/maria/ma_unique.c b/storage/maria/ma_unique.c
index d9f8306488e..ef7aec86834 100644
--- a/storage/maria/ma_unique.c
+++ b/storage/maria/ma_unique.c
@@ -34,6 +34,7 @@ my_bool _ma_check_unique(MARIA_HA *info, MARIA_UNIQUEDEF *def, uchar *record,
MARIA_KEYDEF *keyinfo= &info->s->keyinfo[def->key];
uchar *key_buff= info->lastkey_buff2;
MARIA_KEY key;
+ int error= 0;
DBUG_ENTER("_ma_check_unique");
DBUG_PRINT("enter",("unique_hash: %lu", (ulong) unique_hash));
@@ -44,12 +45,19 @@ my_bool _ma_check_unique(MARIA_HA *info, MARIA_UNIQUEDEF *def, uchar *record,
/* The above changed info->lastkey_buff2. Inform maria_rnext_same(). */
info->update&= ~HA_STATE_RNEXT_SAME;
+ /* Setup that unique key is active key */
+ info->last_key.keyinfo= keyinfo;
+
+ /* any key pointer in data is destroyed */
+ info->lastinx= ~0;
+
DBUG_ASSERT(key.data_length == MARIA_UNIQUE_HASH_LENGTH);
- if (_ma_search(info, &key, SEARCH_FIND, info->s->state.key_root[def->key]))
+ if (_ma_search(info, &key, SEARCH_FIND | SEARCH_SAVE_BUFF,
+ info->s->state.key_root[def->key]))
{
info->page_changed=1; /* Can't optimize read next */
info->cur_row.lastpos= lastpos;
- DBUG_RETURN(0); /* No matching rows */
+ goto end;
}
for (;;)
@@ -63,7 +71,8 @@ my_bool _ma_check_unique(MARIA_HA *info, MARIA_UNIQUEDEF *def, uchar *record,
info->page_changed= 1; /* Can't optimize read next */
info->cur_row.lastpos= lastpos;
DBUG_PRINT("info",("Found duplicate"));
- DBUG_RETURN(1); /* Found identical */
+ error= 1; /* Found identical */
+ goto end;
}
DBUG_ASSERT(info->last_key.data_length == MARIA_UNIQUE_HASH_LENGTH);
if (_ma_search_next(info, &info->last_key, SEARCH_BIGGER,
@@ -72,9 +81,12 @@ my_bool _ma_check_unique(MARIA_HA *info, MARIA_UNIQUEDEF *def, uchar *record,
{
info->page_changed= 1; /* Can't optimize read next */
info->cur_row.lastpos= lastpos;
- DBUG_RETURN(0); /* end of tree */
+ break; /* end of tree */
}
}
+
+end:
+ DBUG_RETURN(error);
}
@@ -134,13 +146,14 @@ ha_checksum _ma_unique_hash(MARIA_UNIQUEDEF *def, const uchar *record)
keyseg->charset->coll->hash_sort(keyseg->charset,
(const uchar*) pos, length, &seed1,
&seed2);
- crc^= seed1;
+ crc+= seed1;
}
else
- while (pos != end)
- crc=((crc << 8) +
- (((uchar) *pos++))) +
- (crc >> (8*sizeof(ha_checksum)-8));
+ {
+ my_hash_sort_bin((CHARSET_INFO*) 0, pos, (size_t) (end-pos),
+ &seed1, &seed2);
+ crc+= seed1;
+ }
}
return crc;
}
diff --git a/storage/maria/ma_update.c b/storage/maria/ma_update.c
index 4051da022c0..0a726c1b7f9 100644
--- a/storage/maria/ma_update.c
+++ b/storage/maria/ma_update.c
@@ -74,7 +74,8 @@ int maria_update(register MARIA_HA *info, const uchar *oldrec, uchar *newrec)
goto err_end;
}
}
- if (_ma_mark_file_changed(info))
+
+ if (_ma_mark_file_changed(share))
{
save_errno=my_errno;
goto err_end;
@@ -215,7 +216,10 @@ err:
{
if ((flag++ && _ma_ft_del(info,i,new_key_buff,newrec,pos)) ||
_ma_ft_add(info,i,old_key_buff,oldrec,pos))
+ {
+ _ma_set_fatal_error(share, my_errno);
break;
+ }
}
else
{
@@ -227,25 +231,23 @@ err:
oldrec, pos, info->cur_row.trid);
if ((flag++ && _ma_ck_delete(info, &new_key)) ||
_ma_ck_write(info, &old_key))
+ {
+ _ma_set_fatal_error(share, my_errno);
break;
+ }
}
}
} while (i-- != 0);
}
else
- {
- maria_print_error(share, HA_ERR_CRASHED);
- maria_mark_crashed(info);
- }
+ _ma_set_fatal_error(share, save_errno);
+
info->update= (HA_STATE_CHANGED | HA_STATE_AKTIV | HA_STATE_ROW_CHANGED |
key_changed);
err_end:
_ma_writeinfo(info, WRITEINFO_UPDATE_KEYFILE);
if (save_errno == HA_ERR_KEY_NOT_FOUND)
- {
- maria_print_error(share, HA_ERR_CRASHED);
- save_errno=HA_ERR_CRASHED;
- }
+ _ma_set_fatal_error(share, HA_ERR_CRASHED);
DBUG_RETURN(my_errno=save_errno);
} /* maria_update */
diff --git a/storage/maria/ma_write.c b/storage/maria/ma_write.c
index 9fdbc9e8a98..5e70fde956a 100644
--- a/storage/maria/ma_write.c
+++ b/storage/maria/ma_write.c
@@ -22,8 +22,6 @@
#include "ma_key_recover.h"
#include "ma_blockrec.h"
-#define MAX_POINTER_LENGTH 8
-
/* Functions declared in this file */
static int w_search(MARIA_HA *info, uint32 comp_flag,
@@ -121,16 +119,27 @@ int maria_write(MARIA_HA *info, uchar *record)
my_errno=HA_ERR_INDEX_FILE_FULL;
goto err2;
}
- if (_ma_mark_file_changed(info))
+ if (_ma_mark_file_changed(share))
goto err2;
/* Calculate and check all unique constraints */
- for (i=0 ; i < share->state.header.uniques ; i++)
+
+ if (share->state.header.uniques)
{
- if (_ma_check_unique(info,share->uniqueinfo+i,record,
- _ma_unique_hash(share->uniqueinfo+i,record),
- HA_OFFSET_ERROR))
- goto err2;
+ for (i=0 ; i < share->state.header.uniques ; i++)
+ {
+ MARIA_UNIQUEDEF *def= share->uniqueinfo + i;
+ ha_checksum unique_hash= _ma_unique_hash(share->uniqueinfo+i,record);
+ if (maria_is_key_active(share->state.key_map, def->key))
+ {
+ if (_ma_check_unique(info, def, record,
+ unique_hash, HA_OFFSET_ERROR))
+ goto err2;
+ }
+ else
+ maria_unique_store(record+ share->keyinfo[def->key].seg->start,
+ unique_hash);
+ }
}
/* Ensure we don't try to restore auto_increment if it doesn't change */
@@ -798,18 +807,18 @@ int _ma_insert(register MARIA_HA *info, MARIA_KEY *key,
#endif
if (t_length > 0)
{
- if (t_length >= keyinfo->maxlength*2+MAX_POINTER_LENGTH)
+ if (t_length >= keyinfo->maxlength*2+MARIA_INDEX_OVERHEAD_SIZE)
{
- my_errno=HA_ERR_CRASHED;
+ _ma_set_fatal_error(share, HA_ERR_CRASHED);
DBUG_RETURN(-1);
}
bmove_upp(endpos+t_length, endpos, (uint) (endpos-key_pos));
}
else
{
- if (-t_length >= keyinfo->maxlength*2+MAX_POINTER_LENGTH)
+ if (-t_length >= keyinfo->maxlength*2+MARIA_INDEX_OVERHEAD_SIZE)
{
- my_errno=HA_ERR_CRASHED;
+ _ma_set_fatal_error(share, HA_ERR_CRASHED);
DBUG_RETURN(-1);
}
bmove(key_pos,key_pos-t_length,(uint) (endpos-key_pos)+t_length);
@@ -1066,7 +1075,6 @@ int _ma_split_page(MARIA_HA *info, MARIA_KEY *key, MARIA_PAGE *split_page,
Returns pointer to start of key.
key will contain the key.
- return_key_length will contain the length of key
after_key will contain the position to where the next key starts
*/
@@ -1174,7 +1182,7 @@ static uchar *_ma_find_last_pos(MARIA_KEY *int_key, MARIA_PAGE *ma_page,
if (!(length=(*keyinfo->get_key)(&tmp_key, page_flag, 0, &page)))
{
- my_errno=HA_ERR_CRASHED;
+ _ma_set_fatal_error(share, HA_ERR_CRASHED);
DBUG_RETURN(0);
}
@@ -1187,7 +1195,7 @@ static uchar *_ma_find_last_pos(MARIA_KEY *int_key, MARIA_PAGE *ma_page,
memcpy(int_key->data, key_buff, length); /* previous key */
if (!(length=(*keyinfo->get_key)(&tmp_key, page_flag, 0, &page)))
{
- my_errno=HA_ERR_CRASHED;
+ _ma_set_fatal_error(share, HA_ERR_CRASHED);
DBUG_RETURN(0);
}
} while (page < end);
diff --git a/storage/maria/maria_chk.c b/storage/maria/maria_chk.c
index 41e15018e3e..22d1433b008 100644
--- a/storage/maria/maria_chk.c
+++ b/storage/maria/maria_chk.c
@@ -28,20 +28,17 @@
#include <sys/mman.h>
#endif
-#ifndef USE_RAID
-#define my_raid_create(A,B,C,D,E,F,G) my_create(A,B,C,G)
-#define my_raid_delete(A,B,C) my_delete(A,B)
-#endif
-
static uint decode_bits;
static char **default_argv;
static const char *load_default_groups[]= { "aria_chk", 0 };
static const char *set_collation_name, *opt_tmpdir, *opt_log_dir;
+static const char *default_log_dir;
static CHARSET_INFO *set_collation;
static int stopwords_inited= 0;
static MY_TMPDIR maria_chk_tmpdir;
-static my_bool opt_transaction_logging, opt_debug, opt_require_control_file;
-static my_bool opt_warning_for_wrong_transid;
+static my_bool opt_transaction_logging, opt_debug;
+static my_bool opt_ignore_control_file, opt_require_control_file;
+static my_bool opt_warning_for_wrong_transid, opt_update_state;
static const char *type_names[]=
{
@@ -67,7 +64,7 @@ static const char *field_pack[]=
static const char *record_formats[]=
{
- "Fixed length", "Packed", "Compressed", "Block", "?"
+ "Fixed length", "Packed", "Compressed", "Block", "No data", "?", "?"
};
static const char *bitmap_description[]=
@@ -104,7 +101,7 @@ int main(int argc, char **argv)
int error;
MY_INIT(argv[0]);
- opt_log_dir= maria_data_root= (char *)".";
+ default_log_dir= opt_log_dir= maria_data_root= (char *)".";
maria_chk_init(&check_param);
check_param.opt_lock_memory= 1; /* Lock memory if possible */
check_param.using_global_keycache = 0;
@@ -114,10 +111,11 @@ int main(int argc, char **argv)
maria_init();
maria_block_size= 0; /* Use block size from control file */
- if (ma_control_file_open(FALSE, opt_require_control_file ||
- !(check_param.testflag & T_SILENT)) &&
- (opt_require_control_file ||
- (opt_transaction_logging && (check_param.testflag & T_REP_ANY))))
+ if (!opt_ignore_control_file &&
+ (ma_control_file_open(FALSE, opt_require_control_file ||
+ !(check_param.testflag & T_SILENT)) &&
+ (opt_require_control_file ||
+ (opt_transaction_logging && (check_param.testflag & T_REP_ANY)))))
{
error= 1;
goto end;
@@ -202,8 +200,9 @@ enum options_mc {
OPT_SORT_KEY_BLOCKS, OPT_DECODE_BITS, OPT_FT_MIN_WORD_LEN,
OPT_FT_MAX_WORD_LEN, OPT_FT_STOPWORD_FILE,
OPT_MAX_RECORD_LENGTH, OPT_AUTO_CLOSE, OPT_STATS_METHOD, OPT_TRANSACTION_LOG,
- OPT_ZEROFILL_KEEP_LSN, OPT_REQUIRE_CONTROL_FILE,
- OPT_LOG_DIR, OPT_DATADIR, OPT_WARNING_FOR_WRONG_TRANSID
+ OPT_ZEROFILL_KEEP_LSN,
+ OPT_REQUIRE_CONTROL_FILE, OPT_IGNORE_CONTROL_FILE,
+ OPT_LOG_DIR, OPT_WARNING_FOR_WRONG_TRANSID
};
static struct my_option my_long_options[] =
@@ -264,12 +263,16 @@ static struct my_option my_long_options[] =
{"information", 'i',
"Print statistics information about table that is checked.",
0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ { "ignore-control-file", OPT_IGNORE_CONTROL_FILE,
+ "Ignore the control file",
+ (uchar**)&opt_ignore_control_file, 0, 0, GET_BOOL, NO_ARG,
+ 0, 0, 0, 0, 0, 0},
{"keys-used", 'k',
"Tell Aria to update only some specific keys. # is a bit mask of which keys to use. This can be used to get faster inserts.",
&check_param.keys_in_use,
&check_param.keys_in_use,
0, GET_ULL, REQUIRED_ARG, -1, 0, 0, 0, 0, 0},
- {"datadir", OPT_DATADIR,
+ {"datadir", 'h',
"Path for control file (and logs if --logdir not used).",
&maria_data_root, 0, 0, GET_STR, REQUIRED_ARG,
0, 0, 0, 0, 0, 0},
@@ -337,10 +340,13 @@ static struct my_option my_long_options[] =
&opt_transaction_logging, &opt_transaction_logging,
0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},
{"update-state", 'U',
- "Mark tables as crashed if any errors were found and clean if check didn't "
- "find any errors. This allows one to get rid of warnings like 'table not "
- "properly closed'",
- 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ "Mark tables as crashed if any errors were found and clean if check "
+ "didn't find any errors but table was marked as 'not clean' before. This "
+ "allows one to get rid of warnings like 'table not properly closed'. "
+ "If table was updated, update also the timestamp for when check was made. "
+ "This option is on by default!",
+ &opt_update_state, &opt_update_state, 0, GET_BOOL, NO_ARG,
+ 1, 0, 0, 0, 0, 0},
{"unpack", 'u',
"Unpack file packed with aria_pack.",
0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
@@ -415,7 +421,7 @@ static struct my_option my_long_options[] =
static void print_version(void)
{
- printf("%s Ver 1.0 for %s at %s\n", my_progname, SYSTEM_TYPE,
+ printf("%s Ver 1.1 for %s at %s\n", my_progname, SYSTEM_TYPE,
MACHINE_TYPE);
}
@@ -438,6 +444,9 @@ static void usage(void)
-?, --help Display this help and exit.\n\
--datadir=path Path for control file (and logs if --logdir not used)\n\
--logdir=path Path for log files\n\
+ --ignore-control-file Don't open the control file. Only use this if you\n\
+ are sure the tables are not in use by another\n\
+ program!\n\
--require-control-file Abort if we can't find/read the maria_log_control\n\
file\n\
-s, --silent Only print errors. One can use two -s to make\n\
@@ -472,8 +481,18 @@ static void usage(void)
-i, --information Print statistics information about table that is checked.\n\
-m, --medium-check Faster than extend-check, but only finds 99.99% of\n\
all errors. Should be good enough for most cases.\n\
- -U, --update-state Mark tables as crashed if you find any errors.\n\
- -T, --read-only Don't mark table as checked.\n");
+ -T, --read-only Don't mark table as checked.\n\
+ -U, --update-state Mark tables as crashed if any errors were found and\n\
+ clean if check didn't find any errors but table was\n\
+ marked as 'not clean' before. This allows one to get\n\
+ rid of warnings like 'table not properly closed'. If\n\
+ table was updated, update also the timestamp for when\n\
+ the check was made. This option is on by default!\n\
+ Use --skip-update-state to disable.\n\
+ --warning-for-wrong-transaction-id\n\
+ Give a warning if we find a transaction id in the table that is bigger\n\
+ than what exists in the control file. Use --skip-... to disable warning\n\
+ ");
puts("\
Recover (repair)/ options (When using '--recover' or '--safe-recover'):\n\
@@ -836,6 +855,7 @@ static void get_options(register int *argc,register char ***argv)
load_defaults("my", load_default_groups, argc, argv);
default_argv= *argv;
+ check_param.testflag= T_UPDATE_STATE;
if (isatty(fileno(stdout)))
check_param.testflag|=T_WRITE_LOOP;
@@ -884,15 +904,27 @@ static void get_options(register int *argc,register char ***argv)
MYF(MY_WME))))
exit(1);
+ if (maria_data_root != default_log_dir && opt_log_dir == default_log_dir)
+ {
+ /* --datadir was used and --log-dir was not. Set log-dir to datadir */
+ opt_log_dir= maria_data_root;
+ }
return;
} /* get options */
- /* Check table */
+/**
+ Check/repair table
+
+ @return 0 table is ok
+ @return 1 Got warning during check
+ @return 2 Got error during check/repair.
+*/
static int maria_chk(HA_CHECK *param, char *filename)
{
int error,lock_type,recreate;
+ uint warning_printed_by_chk_status;
my_bool rep_quick= test(param->testflag & (T_QUICK | T_FORCE_UNIQUENESS));
MARIA_HA *info;
File datafile;
@@ -905,6 +937,7 @@ static int maria_chk(HA_CHECK *param, char *filename)
recreate=0;
datafile=0;
param->isam_file_name=filename; /* For error messages */
+ warning_printed_by_chk_status= 0;
if (!(info=maria_open(filename,
(param->testflag & (T_DESCRIPT | T_READONLY)) ?
O_RDONLY : O_RDWR,
@@ -992,8 +1025,8 @@ static int maria_chk(HA_CHECK *param, char *filename)
share->state.open_count != 0);
if ((param->testflag & (T_REP_ANY | T_SORT_RECORDS)) &&
- ((share->state.changed & (STATE_CHANGED | STATE_CRASHED |
- STATE_CRASHED_ON_REPAIR | STATE_IN_REPAIR) ||
+ ((share->state.changed & (STATE_CHANGED | STATE_CRASHED_FLAGS |
+ STATE_IN_REPAIR) ||
!(param->testflag & T_CHECK_ONLY_CHANGED))))
need_to_check=1;
@@ -1010,8 +1043,8 @@ static int maria_chk(HA_CHECK *param, char *filename)
need_to_check=1;
}
if ((param->testflag & T_CHECK_ONLY_CHANGED) &&
- (share->state.changed & (STATE_CHANGED | STATE_CRASHED |
- STATE_CRASHED_ON_REPAIR | STATE_IN_REPAIR)))
+ (share->state.changed & (STATE_CHANGED | STATE_CRASHED_FLAGS |
+ STATE_IN_REPAIR)))
need_to_check=1;
if (!need_to_check)
{
@@ -1188,9 +1221,9 @@ static int maria_chk(HA_CHECK *param, char *filename)
#ifndef TO_BE_REMOVED
if (param->out_flag & O_NEW_DATA)
{ /* Change temp file to org file */
- my_close(info->dfile.file, MYF(MY_WME)); /* Close new file */
+ mysql_file_close(info->dfile.file, MYF(MY_WME)); /* Close new file */
error|=maria_change_to_newfile(filename,MARIA_NAME_DEXT,DATA_TMP_EXT,
- MYF(0));
+ 0, MYF(0));
if (_ma_open_datafile(info,info->s, NullS, -1))
error=1;
param->out_flag&= ~O_NEW_DATA; /* We are using new datafile */
@@ -1229,8 +1262,8 @@ static int maria_chk(HA_CHECK *param, char *filename)
if (!error)
{
DBUG_PRINT("info", ("Reseting crashed state"));
- share->state.changed&= ~(STATE_CHANGED | STATE_CRASHED |
- STATE_CRASHED_ON_REPAIR | STATE_IN_REPAIR);
+ share->state.changed&= ~(STATE_CHANGED | STATE_CRASHED_FLAGS |
+ STATE_IN_REPAIR);
}
else
maria_mark_crashed(info);
@@ -1246,7 +1279,12 @@ static int maria_chk(HA_CHECK *param, char *filename)
maria_chk_init_for_check(param, info);
if (opt_warning_for_wrong_transid == 0)
param->max_trid= ~ (ulonglong) 0;
+
error= maria_chk_status(param,info);
+ /* Forget warning printed by maria_chk_status if no problems found */
+ warning_printed_by_chk_status= param->warning_printed;
+ param->warning_printed= 0;
+
maria_intersect_keys_active(share->state.key_map, param->keys_in_use);
error|= maria_chk_size(param,info);
if (!error || !(param->testflag & (T_FAST | T_FORCE_CREATE)))
@@ -1283,14 +1321,13 @@ static int maria_chk(HA_CHECK *param, char *filename)
if (!error)
{
if (((share->state.changed &
- (STATE_CHANGED | STATE_CRASHED | STATE_CRASHED_ON_REPAIR |
- STATE_IN_REPAIR)) ||
+ (STATE_CHANGED | STATE_CRASHED_FLAGS | STATE_IN_REPAIR)) ||
share->state.open_count != 0)
&& (param->testflag & T_UPDATE_STATE))
info->update|=HA_STATE_CHANGED | HA_STATE_ROW_CHANGED;
DBUG_PRINT("info", ("Reseting crashed state"));
- share->state.changed&= ~(STATE_CHANGED | STATE_CRASHED |
- STATE_CRASHED_ON_REPAIR | STATE_IN_REPAIR);
+ share->state.changed&= ~(STATE_CHANGED | STATE_CRASHED_FLAGS |
+ STATE_IN_REPAIR);
}
else if (!maria_is_crashed(info) &&
(param->testflag & T_UPDATE_STATE))
@@ -1306,33 +1343,40 @@ static int maria_chk(HA_CHECK *param, char *filename)
(my_bool) !test(param->testflag & T_AUTO_INC));
if (info->update & HA_STATE_CHANGED && ! (param->testflag & T_READONLY))
+ {
error|=maria_update_state_info(param, info,
UPDATE_OPEN_COUNT |
- (((param->testflag & T_REP_ANY) ?
+ (((param->testflag &
+ (T_REP_ANY | T_UPDATE_STATE)) ?
UPDATE_TIME : 0) |
(state_updated ? UPDATE_STAT : 0) |
((param->testflag & T_SORT_RECORDS) ?
UPDATE_SORT : 0)));
+ if (warning_printed_by_chk_status)
+ _ma_check_print_info(param, "Aria table '%s' was ok. Status updated",
+ filename);
+ else if (!(param->testflag & T_SILENT))
+ printf("State updated\n");
+ warning_printed_by_chk_status= 0;
+ }
info->update&= ~HA_STATE_CHANGED;
_ma_reenable_logging_for_table(info, FALSE);
maria_lock_database(info, F_UNLCK);
end2:
- end_pagecache(maria_pagecache, 1);
if (maria_close(info))
{
_ma_check_print_error(param, default_close_errmsg, my_errno, filename);
DBUG_RETURN(1);
}
+ end_pagecache(maria_pagecache, 1);
if (error == 0)
{
if (param->out_flag & O_NEW_DATA)
error|=maria_change_to_newfile(filename,MARIA_NAME_DEXT,DATA_TMP_EXT,
+ param->backup_time,
((param->testflag & T_BACKUP_DATA) ?
MYF(MY_REDEL_MAKE_BACKUP) : MYF(0)));
- if (param->out_flag & O_NEW_INDEX)
- error|=maria_change_to_newfile(filename,MARIA_NAME_IEXT,INDEX_TMP_EXT,
- MYF(0));
}
if (opt_transaction_logging &&
share->base.born_transactional && !error &&
@@ -1352,6 +1396,7 @@ end2:
if (param->error_printed)
{
+ error= 2;
if (param->testflag & (T_REP_ANY | T_SORT_RECORDS | T_SORT_INDEX))
{
fprintf(stderr, "Aria table '%s' is not fixed because of errors\n",
@@ -1366,12 +1411,17 @@ end2:
fprintf(stderr, "Aria table '%s' is corrupted\nFix it using switch "
"\"-r\" or \"-o\"\n", filename);
}
- else if (param->warning_printed &&
+ else if ((param->warning_printed || warning_printed_by_chk_status) &&
! (param->testflag & (T_REP_ANY | T_SORT_RECORDS | T_SORT_INDEX |
T_FORCE_CREATE)))
- fprintf(stderr, "Aria table '%s' is usable but should be fixed\n",
- filename);
- fflush(stderr);
+ {
+ if (!error)
+ error= 1;
+ (void) fprintf(stderr, "Aria table '%s' is usable but should be fixed\n",
+ filename);
+ }
+
+ (void) fflush(stderr);
DBUG_RETURN(error);
} /* maria_chk */
@@ -1400,7 +1450,7 @@ static void descript(HA_CHECK *param, register MARIA_HA *info, char *name)
DBUG_VOID_RETURN;
}
- printf("Aria file: %s\n",name);
+ printf("Aria file: %s\n",name);
printf("Record format: %s\n", record_formats[share->data_file_type]);
printf("Crashsafe: %s\n",
share->base.born_transactional ? "yes" : "no");
@@ -1420,7 +1470,7 @@ static void descript(HA_CHECK *param, register MARIA_HA *info, char *name)
if (share->state.check_time)
{
get_date(buff,1,share->state.check_time);
- printf("Recover time: %s\n",buff);
+ printf("Check/recover time: %s\n",buff);
}
if (share->base.born_transactional)
{
@@ -1436,7 +1486,8 @@ static void descript(HA_CHECK *param, register MARIA_HA *info, char *name)
printf("UUID: %s\n", buff);
pos=buff;
if (share->state.changed & STATE_CRASHED)
- strmov(buff,"crashed");
+ strmov(buff, share->state.changed & STATE_CRASHED_ON_REPAIR ?
+ "crashed on repair" : "crashed");
else
{
if (share->state.open_count)
@@ -1499,8 +1550,8 @@ static void descript(HA_CHECK *param, register MARIA_HA *info, char *name)
if (share->base.max_data_file_length != HA_OFFSET_ERROR ||
share->base.max_key_file_length != HA_OFFSET_ERROR)
printf("Max datafile length: %16s Max keyfile length: %18s\n",
- llstr(share->base.max_data_file_length-1,llbuff),
- llstr(share->base.max_key_file_length-1,llbuff2));
+ ullstr(share->base.max_data_file_length,llbuff),
+ ullstr(share->base.max_key_file_length,llbuff2));
}
}
printf("Block_size: %16d\n",(int) share->block_size);
@@ -1700,14 +1751,14 @@ static int maria_sort_records(HA_CHECK *param,
{
_ma_check_print_warning(param,
"Can't sort table '%s' on key %d; No such key",
- name,sort_key+1);
+ name,sort_key+1);
param->error_printed=0;
DBUG_RETURN(0); /* Nothing to do */
}
if (keyinfo->flag & HA_FULLTEXT)
{
_ma_check_print_warning(param,"Can't sort table '%s' on FULLTEXT key %d",
- name,sort_key+1);
+ name,sort_key+1);
param->error_printed=0;
DBUG_RETURN(0); /* Nothing to do */
}
@@ -1759,12 +1810,12 @@ static int maria_sort_records(HA_CHECK *param,
}
fn_format(param->temp_filename,name,"", MARIA_NAME_DEXT,2+4+32);
- new_file= my_create(fn_format(param->temp_filename,
- param->temp_filename,"",
- DATA_TMP_EXT,
- MY_REPLACE_EXT | MY_UNPACK_FILENAME),
- 0, param->tmpfile_createflag,
- MYF(0));
+ new_file= mysql_file_create(key_file_tmp,
+ fn_format(param->temp_filename,
+ param->temp_filename, "",
+ DATA_TMP_EXT,
+ MY_REPLACE_EXT | MY_UNPACK_FILENAME),
+ 0, param->tmpfile_createflag, MYF(0));
if (new_file < 0)
{
_ma_check_print_error(param,"Can't create new tempfile: '%s'",
@@ -1782,10 +1833,10 @@ static int maria_sort_records(HA_CHECK *param,
for (key=0 ; key < share->base.keys ; key++)
share->keyinfo[key].flag|= HA_SORT_ALLOWS_SAME;
- if (my_pread(share->kfile.file, temp_buff,
- (uint) keyinfo->block_length,
- share->state.key_root[sort_key],
- MYF(MY_NABP+MY_WME)))
+ if (mysql_file_pread(share->kfile.file, temp_buff,
+ (uint) keyinfo->block_length,
+ share->state.key_root[sort_key],
+ MYF(MY_NABP+MY_WME)))
{
_ma_check_print_error(param, "Can't read indexpage from filepos: %s",
llstr(share->state.key_root[sort_key], llbuff));
@@ -1818,7 +1869,7 @@ static int maria_sort_records(HA_CHECK *param,
goto err;
}
- my_close(info->dfile.file, MYF(MY_WME));
+ mysql_file_close(info->dfile.file, MYF(MY_WME));
param->out_flag|=O_NEW_DATA; /* Data in new file */
info->dfile.file= new_file; /* Use new datafile */
_ma_set_data_pagecache_callbacks(&info->dfile, info->s);
@@ -1843,8 +1894,8 @@ err:
if (got_error && new_file >= 0)
{
end_io_cache(&info->rec_cache);
- (void) my_close(new_file,MYF(MY_WME));
- (void) my_delete(param->temp_filename, MYF(MY_WME));
+ (void) mysql_file_close(new_file,MYF(MY_WME));
+ (void) mysql_file_delete(key_file_tmp, param->temp_filename, MYF(MY_WME));
}
if (temp_buff)
{
@@ -1902,9 +1953,9 @@ static int sort_record_index(MARIA_SORT_PARAM *sort_param,
if (nod_flag)
{
next_page= _ma_kpos(nod_flag, keypos);
- if (my_pread(share->kfile.file, temp_buff,
- (uint) tmp_key.keyinfo->block_length, next_page,
- MYF(MY_NABP+MY_WME)))
+ if (mysql_file_pread(share->kfile.file, temp_buff,
+ (uint) tmp_key.keyinfo->block_length, next_page,
+ MYF(MY_NABP+MY_WME)))
{
_ma_check_print_error(param,"Can't read keys from filepos: %s",
llstr(next_page,llbuff));
diff --git a/storage/maria/maria_def.h b/storage/maria/maria_def.h
index ef65a9eb3af..cd3294e8975 100644
--- a/storage/maria/maria_def.h
+++ b/storage/maria/maria_def.h
@@ -123,6 +123,8 @@ typedef struct st_maria_state_info
increased.
*/
LSN skip_redo_lsn;
+ /* LSN when we wrote file id to the log */
+ LSN logrec_file_id;
/* the following isn't saved on disk */
uint state_diff_length; /* Should be 0 */
@@ -149,11 +151,13 @@ typedef struct st_maria_state_info
#define MARIA_COLUMNDEF_SIZE (2*7+1+1+4)
#define MARIA_BASE_INFO_SIZE (MY_UUID_SIZE + 5*8 + 6*4 + 11*2 + 6 + 5*2 + 1 + 16)
#define MARIA_INDEX_BLOCK_MARGIN 16 /* Safety margin for .MYI tables */
+#define MARIA_MAX_POINTER_LENGTH 7 /* Node pointer */
/* Internal management bytes needed to store 2 transid/key on an index page */
#define MARIA_MAX_PACK_TRANSID_SIZE (TRANSID_SIZE+1)
#define MARIA_TRANSID_PACK_OFFSET (256- TRANSID_SIZE - 1)
#define MARIA_MIN_TRANSID_PACK_OFFSET (MARIA_TRANSID_PACK_OFFSET-TRANSID_SIZE)
-#define MARIA_INDEX_OVERHEAD_SIZE (MARIA_MAX_PACK_TRANSID_SIZE * 2)
+#define MARIA_INDEX_OVERHEAD_SIZE (MARIA_MAX_PACK_TRANSID_SIZE * 2 + \
+ MARIA_MAX_POINTER_LENGTH)
#define MARIA_DELETE_KEY_NR 255 /* keynr for deleted blocks */
/*
@@ -240,11 +244,14 @@ typedef struct st_maria_file_bitmap
{
uchar *map;
pgcache_page_no_t page; /* Page number for current bitmap */
- uint used_size; /* Size of bitmap head that is not 0 */
+ pgcache_page_no_t last_bitmap_page; /* Last possible bitmap page */
my_bool changed; /* 1 if page needs to be written */
my_bool changed_not_flushed; /* 1 if some bitmap is not flushed */
+ uint used_size; /* Size of bitmap head that is not 0 */
uint flush_all_requested; /**< If _ma_bitmap_flush_all waiting */
+ uint waiting_for_flush_all_requested; /* If someone is waiting for above */
uint non_flushable; /**< 0 if bitmap and log are in sync */
+ uint waiting_for_non_flushable; /* If someone is waiting for above */
PAGECACHE_FILE file; /* datafile where bitmap is stored */
mysql_mutex_t bitmap_lock;
@@ -252,6 +259,8 @@ typedef struct st_maria_file_bitmap
/* Constants, allocated when initiating bitmaps */
uint sizes[8]; /* Size per bit combination */
uint total_size; /* Total usable size of bitmap page */
+ uint max_total_size; /* Max value for total_size */
+ uint last_total_size; /* Size of bitmap on last_bitmap_page */
uint block_size; /* Block size of file */
ulong pages_covered; /* Pages covered by bitmap + 1 */
DYNAMIC_ARRAY pinned_pages; /**< not-yet-flushable bitmap pages */
@@ -264,6 +273,7 @@ typedef struct st_maria_file_bitmap
typedef struct st_maria_share
{ /* Shared between opens */
MARIA_STATE_INFO state;
+ MARIA_STATE_INFO checkpoint_state; /* Copy of saved state by checkpoint */
MARIA_BASE_INFO base;
MARIA_STATE_HISTORY *state_history;
MARIA_KEYDEF ft2_keyinfo; /* Second-level ft-key definition */
@@ -371,6 +381,13 @@ typedef struct st_maria_share
my_bool temporary;
/* Below flag is needed to make log tables work with concurrent insert */
my_bool is_log_table;
+ my_bool has_null_fields;
+ my_bool has_varchar_fields; /* If table has varchar fields */
+ /*
+ Set to 1 if open_count was wrong at open. Set to avoid asserts for
+ wrong open count on close.
+ */
+ my_bool open_count_not_zero_on_open;
my_bool changed, /* If changed since lock */
global_changed, /* If changed since open */
@@ -475,11 +492,12 @@ typedef struct st_maria_block_scan
MARIA_RECORD_POS row_base_page;
} MARIA_BLOCK_SCAN;
+//typedef ICP_RESULT (*index_cond_func_t)(void *param);
+
struct st_maria_handler
{
MARIA_SHARE *s; /* Shared between open:s */
struct st_ma_transaction *trn; /* Pointer to active transaction */
- void *external_ptr; /* Pointer to THD in mysql */
MARIA_STATUS_INFO *state, state_save;
MARIA_STATUS_INFO *state_start; /* State at start of transaction */
MARIA_ROW cur_row; /* The active row that we just read */
@@ -496,6 +514,7 @@ struct st_maria_handler
DYNAMIC_ARRAY *ft1_to_ft2; /* used only in ft1->ft2 conversion */
MEM_ROOT ft_memroot; /* used by the parser */
MYSQL_FTPARSER_PARAM *ftparser_param; /* share info between init/deinit */
+ void *external_ref; /* For MariaDB TABLE */
uchar *buff; /* page buffer */
uchar *keyread_buff; /* Buffer for last key read */
uchar *lastkey_buff; /* Last used search key */
@@ -537,6 +556,7 @@ struct st_maria_handler
ulong row_base_length; /* Length of row header */
uint row_flag; /* Flag to store in row header */
uint opt_flag; /* Optim. for space/speed */
+ uint open_flags; /* Flags used in open() */
uint update; /* If file changed since open */
int lastinx; /* Last used index */
uint last_rkey_length; /* Last length in maria_rkey() */
@@ -560,6 +580,7 @@ struct st_maria_handler
my_bool was_locked; /* Was locked in panic */
my_bool append_insert_at_end; /* Set if concurrent insert */
my_bool quick_mode;
+ my_bool in_check_table; /* We are running check tables */
/* Marker if key_del_changed */
/* If info->keyread_buff can't be used for rnext */
my_bool page_changed;
@@ -609,6 +630,9 @@ struct st_maria_handler
#define STATE_NOT_MOVABLE 256
#define STATE_MOVED 512 /* set if base->uuid != maria_uuid */
#define STATE_IN_REPAIR 1024 /* We are running repair on table */
+#define STATE_CRASHED_PRINTED 2048
+
+#define STATE_CRASHED_FLAGS (STATE_CRASHED | STATE_CRASHED_ON_REPAIR | STATE_CRASHED_PRINTED)
/* options to maria_read_cache */
@@ -691,7 +715,6 @@ struct st_maria_handler
#endif
#define DBUG_DUMP_KEY(name, key) DBUG_DUMP(name, (key)->data, (key)->data_length + (key)->ref_length)
-
/* Functions to store length of space packed keys, VARCHAR or BLOB keys */
#define store_key_length(key,length) \
@@ -715,7 +738,7 @@ struct st_maria_handler
{ length=mi_uint2korr((key)+1)+3; } \
}
-#define maria_max_key_length() ((maria_block_size - MAX_KEYPAGE_HEADER_SIZE)/2 - MARIA_INDEX_OVERHEAD_SIZE)
+#define maria_max_key_length() ((maria_block_size - MAX_KEYPAGE_HEADER_SIZE)/3 - MARIA_INDEX_OVERHEAD_SIZE)
#define get_pack_length(length) ((length) >= 255 ? 3 : 1)
#define _ma_have_versioning(info) ((info)->row_flag & ROW_FLAG_TRANSID)
@@ -767,9 +790,9 @@ struct st_maria_handler
extern mysql_mutex_t THR_LOCK_maria;
#ifdef DONT_USE_RW_LOCKS
-#define rw_wrlock(A) {}
-#define rw_rdlock(A) {}
-#define rw_unlock(A) {}
+#define mysql_rwlock_wrlock(A) {}
+#define mysql_rwlock_rdlock(A) {}
+#define mysql_rwlock_unlock(A) {}
#endif
/* Some tuning parameters */
@@ -792,9 +815,11 @@ extern uint maria_quick_table_bits;
extern char *maria_data_root;
extern uchar maria_zero_string[];
extern my_bool maria_inited, maria_in_ha_maria, maria_recovery_changed_data;
-extern my_bool maria_recovery_verbose;
+extern my_bool maria_recovery_verbose, maria_checkpoint_disabled;
+extern my_bool maria_assert_if_crashed_table;
extern HASH maria_stored_state;
extern int (*maria_create_trn_hook)(MARIA_HA *);
+extern my_bool (*ma_killed)(MARIA_HA *);
#ifdef HAVE_PSI_INTERFACE
extern PSI_mutex_key key_SHARE_BITMAP_lock, key_SORT_INFO_mutex,
@@ -825,7 +850,7 @@ extern PSI_thread_key key_thread_checkpoint, key_thread_find_all_keys,
key_thread_soft_sync;
extern PSI_file_key key_file_translog, key_file_kfile, key_file_dfile,
- key_file_control;
+ key_file_control, key_file_tmp;
#endif
@@ -887,6 +912,18 @@ extern my_bool _ma_update_static_record(MARIA_HA *, MARIA_RECORD_POS,
const uchar *, const uchar *);
extern my_bool _ma_delete_static_record(MARIA_HA *info, const uchar *record);
extern my_bool _ma_cmp_static_record(MARIA_HA *info, const uchar *record);
+
+extern my_bool _ma_write_no_record(MARIA_HA *info, const uchar *record);
+extern my_bool _ma_update_no_record(MARIA_HA *info, MARIA_RECORD_POS pos,
+ const uchar *oldrec, const uchar *record);
+extern my_bool _ma_delete_no_record(MARIA_HA *info, const uchar *record);
+extern int _ma_read_no_record(MARIA_HA *info, uchar *record,
+ MARIA_RECORD_POS pos);
+extern int _ma_read_rnd_no_record(MARIA_HA *info, uchar *buf,
+ MARIA_RECORD_POS filepos,
+ my_bool skip_deleted_blocks);
+my_off_t _ma_no_keypos_to_recpos(MARIA_SHARE *share, my_off_t pos);
+
extern my_bool _ma_ck_write(MARIA_HA *info, MARIA_KEY *key);
extern my_bool _ma_enlarge_root(MARIA_HA *info, MARIA_KEY *key,
MARIA_RECORD_POS *root);
@@ -937,11 +974,13 @@ extern my_bool _ma_ck_real_delete(register MARIA_HA *info, MARIA_KEY *key,
extern int _ma_readinfo(MARIA_HA *info, int lock_flag, int check_keybuffer);
extern int _ma_writeinfo(MARIA_HA *info, uint options);
extern int _ma_test_if_changed(MARIA_HA *info);
-extern int _ma_mark_file_changed(MARIA_HA *info);
+extern int _ma_mark_file_changed(MARIA_SHARE *info);
+extern int _ma_mark_file_changed_now(MARIA_SHARE *info);
extern void _ma_mark_file_crashed(MARIA_SHARE *share);
-extern my_bool _ma_set_uuid(MARIA_HA *info, my_bool reset_uuid);
+void _ma_set_fatal_error(MARIA_SHARE *share, int error);
+extern my_bool _ma_set_uuid(MARIA_SHARE *info, my_bool reset_uuid);
extern my_bool _ma_check_if_zero(uchar *pos, size_t size);
-extern int _ma_decrement_open_count(MARIA_HA *info);
+extern int _ma_decrement_open_count(MARIA_HA *info, my_bool lock_table);
extern int _ma_check_index(MARIA_HA *info, int inx);
extern int _ma_search(MARIA_HA *info, MARIA_KEY *key, uint32 nextflag,
my_off_t pos);
@@ -1035,7 +1074,7 @@ extern MARIA_KEY *_ma_pack_key(MARIA_HA *info, MARIA_KEY *int_key,
HA_KEYSEG ** last_used_keyseg);
extern void _ma_copy_key(MARIA_KEY *to, const MARIA_KEY *from);
extern int _ma_read_key_record(MARIA_HA *info, uchar *buf, MARIA_RECORD_POS);
-extern my_bool _ma_read_cache(IO_CACHE *info, uchar *buff,
+extern my_bool _ma_read_cache(MARIA_HA *, IO_CACHE *info, uchar *buff,
MARIA_RECORD_POS pos, size_t length,
uint re_read_if_possibly);
extern ulonglong ma_retrieve_auto_increment(const uchar *key, uint8 key_type);
@@ -1117,7 +1156,7 @@ typedef struct st_maria_block_info
#define fast_ma_writeinfo(INFO) if (!(INFO)->s->tot_locks) (void) _ma_writeinfo((INFO),0)
#define fast_ma_readinfo(INFO) ((INFO)->lock_type == F_UNLCK) && _ma_readinfo((INFO),F_RDLCK,1)
-extern uint _ma_get_block_info(MARIA_BLOCK_INFO *, File, my_off_t);
+extern uint _ma_get_block_info(MARIA_HA *, MARIA_BLOCK_INFO *, File, my_off_t);
extern uint _ma_rec_pack(MARIA_HA *info, uchar *to, const uchar *from);
extern uint _ma_pack_get_block_info(MARIA_HA *maria, MARIA_BIT_BUFF *bit_buff,
MARIA_BLOCK_INFO *info, uchar **rec_buff_p,
@@ -1196,6 +1235,7 @@ void _ma_remap_file(MARIA_HA *info, my_off_t size);
MARIA_RECORD_POS _ma_write_init_default(MARIA_HA *info, const uchar *record);
my_bool _ma_write_abort_default(MARIA_HA *info);
+int maria_delete_table_files(const char *name, myf sync_dir);
C_MODE_START
#define MARIA_FLUSH_DATA 1
@@ -1208,6 +1248,8 @@ int _ma_flush_table_files(MARIA_HA *info, uint flush_data_or_index,
See ma_check_standalone.h .
*/
int _ma_killed_ptr(HA_CHECK *param);
+void _ma_report_progress(HA_CHECK *param, ulonglong progress,
+ ulonglong max_progress);
void _ma_check_print_error(HA_CHECK *param, const char *fmt, ...)
ATTRIBUTE_FORMAT(printf, 2, 3);
void _ma_check_print_warning(HA_CHECK *param, const char *fmt, ...)
@@ -1282,5 +1324,9 @@ extern my_bool maria_flush_log_for_page_none(uchar *page,
extern PAGECACHE *maria_log_pagecache;
extern void ma_set_index_cond_func(MARIA_HA *info, index_cond_func_t func,
void *func_arg);
-int ma_check_index_cond(register MARIA_HA *info, uint keynr, uchar *record);
+ICP_RESULT ma_check_index_cond(register MARIA_HA *info, uint keynr, uchar *record);
+
+extern my_bool ma_yield_and_check_if_killed(MARIA_HA *info, int inx);
+extern my_bool ma_killed_standalone(MARIA_HA *);
+extern uint _ma_file_callback_to_id(void *callback_data);
diff --git a/storage/maria/maria_dump_log.c b/storage/maria/maria_dump_log.c
new file mode 100644
index 00000000000..d5ce3913474
--- /dev/null
+++ b/storage/maria/maria_dump_log.c
@@ -0,0 +1,192 @@
+/* Copyright (C) 2007 MySQL AB & Sanja Belkin
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+#include "maria_def.h"
+#include <my_getopt.h>
+extern void translog_example_table_init();
+static const char *load_default_groups[]= { "aria_dump_log",0 };
+static void get_options(int *argc,char * * *argv);
+#ifndef DBUG_OFF
+#if defined(__WIN__)
+const char *default_dbug_option= "d:t:i:O,\\aria_dump_log.trace";
+#else
+const char *default_dbug_option= "d:t:i:o,/tmp/aria_dump_log.trace";
+#endif
+#endif
+static ulonglong opt_offset;
+static ulong opt_pages;
+static const char *opt_file= NULL;
+static File handler= -1;
+static my_bool opt_unit= 0;
+static struct my_option my_long_options[] =
+{
+#ifdef IMPLTMENTED
+ {"body", 'b',
+ "Print chunk body dump",
+ (uchar **) &opt_body, (uchar **) &opt_body, 0,
+ GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},
+#endif
+#ifndef DBUG_OFF
+ {"debug", '#', "Output debug log. Often the argument is 'd:t:o,filename'.",
+ 0, 0, 0, GET_STR, OPT_ARG, 0, 0, 0, 0, 0, 0},
+#endif
+ {"file", 'f', "Path to file which will be read",
+ (uchar**) &opt_file, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
+ {"help", '?', "Display this help and exit.",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ { "offset", 'o', "Start reading log from this offset",
+ (uchar**) &opt_offset, (uchar**) &opt_offset,
+ 0, GET_ULL, REQUIRED_ARG, 0, 0, ~(longlong) 0, 0, 0, 0 },
+ { "pages", 'n', "Number of pages to read",
+ (uchar**) &opt_pages, (uchar**) &opt_pages, 0,
+ GET_ULONG, REQUIRED_ARG, (long) ~(ulong) 0,
+ (long) 1, (long) ~(ulong) 0, (long) 0,
+ (long) 1, 0},
+ {"unit-test", 'U',
+ "Use unit test record table (for logs created by unittests",
+ (uchar **) &opt_unit, (uchar **) &opt_unit, 0,
+ GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"version", 'V', "Print version and exit.",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ { 0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}
+};
+
+
+static void print_version(void)
+{
+ printf("%s Ver 1.0 for %s on %s\n",
+ my_progname_short, SYSTEM_TYPE, MACHINE_TYPE);
+}
+
+
+static void usage(void)
+{
+ print_version();
+ puts("Copyright (C) 2008 MySQL AB");
+ puts("This software comes with ABSOLUTELY NO WARRANTY. This is free software,");
+ puts("and you are welcome to modify and redistribute it under the GPL license\n");
+
+ puts("Dump content of aria log pages.");
+ printf("\nUsage: %s -f file OPTIONS\n", my_progname_short);
+ my_print_help(my_long_options);
+ print_defaults("my", load_default_groups);
+ my_print_variables(my_long_options);
+}
+
+
+static my_bool
+get_one_option(int optid __attribute__((unused)),
+ const struct my_option *opt __attribute__((unused)),
+ char *argument __attribute__((unused)))
+{
+ switch (optid) {
+ case '?':
+ usage();
+ exit(0);
+ case 'V':
+ print_version();
+ exit(0);
+#ifndef DBUG_OFF
+ case '#':
+ DBUG_SET_INITIAL(argument ? argument : default_dbug_option);
+ break;
+#endif
+ }
+ return 0;
+}
+
+
+static void get_options(int *argc,char ***argv)
+{
+ int ho_error;
+
+ if ((ho_error=handle_options(argc, argv, my_long_options, get_one_option)))
+ exit(ho_error);
+
+ if (opt_file == NULL)
+ {
+ usage();
+ exit(1);
+ }
+}
+
+
+/**
+ @brief maria_dump_log main function.
+*/
+
+int main(int argc, char **argv)
+{
+ char **default_argv;
+ uchar buffer[TRANSLOG_PAGE_SIZE];
+ MY_INIT(argv[0]);
+
+ load_defaults("my", load_default_groups, &argc, &argv);
+ default_argv= argv;
+ get_options(&argc, &argv);
+
+ if (opt_unit)
+ translog_example_table_init();
+ else
+ translog_table_init();
+ translog_fill_overhead_table();
+
+ maria_data_root= (char *)".";
+
+ if ((handler= my_open(opt_file, O_RDONLY, MYF(MY_WME))) < 0)
+ {
+ fprintf(stderr, "Can't open file: '%s' errno: %d\n",
+ opt_file, my_errno);
+ goto err;
+ }
+ if (my_seek(handler, opt_offset, SEEK_SET, MYF(MY_WME)) !=
+ opt_offset)
+ {
+ fprintf(stderr, "Can't set position %lld file: '%s' errno: %d\n",
+ opt_offset, opt_file, my_errno);
+ goto err;
+ }
+ for (;
+ opt_pages;
+ opt_offset+= TRANSLOG_PAGE_SIZE, opt_pages--)
+ {
+ if (my_pread(handler, buffer, TRANSLOG_PAGE_SIZE, opt_offset,
+ MYF(MY_NABP)))
+ {
+ if (my_errno == HA_ERR_FILE_TOO_SHORT)
+ goto end;
+ fprintf(stderr, "Can't read page at position %lld file: '%s' "
+ "errno: %d\n", opt_offset, opt_file, my_errno);
+ goto err;
+ }
+ printf("Page by offset %llu (0x%llx)\n", opt_offset, opt_offset);
+ dump_page(buffer, handler);
+ }
+
+end:
+ my_close(handler, MYF(0));
+ free_defaults(default_argv);
+ exit(0);
+ return 0; /* No compiler warning */
+
+err:
+ my_close(handler, MYF(0));
+ fprintf(stderr, "%s: FAILED\n", my_progname_short);
+ free_defaults(default_argv);
+ exit(1);
+}
+
+#include "ma_check_standalone.h"
+
diff --git a/storage/maria/maria_pack.c b/storage/maria/maria_pack.c
index 351a2014059..4480dabbcad 100644
--- a/storage/maria/maria_pack.c
+++ b/storage/maria/maria_pack.c
@@ -683,6 +683,8 @@ static int compress(PACK_MRG_INFO *mrg,char *result_table)
error|=my_close(new_file,MYF(MY_WME));
if (!result_table)
{
+ (void) flush_pagecache_blocks(isam_file->s->pagecache, &isam_file->dfile,
+ FLUSH_RELEASE);
error|=my_close(isam_file->dfile.file, MYF(MY_WME));
isam_file->dfile.file= -1; /* Tell maria_close file is closed */
isam_file->s->bitmap.file.file= -1;
@@ -729,7 +731,7 @@ static int compress(PACK_MRG_INFO *mrg,char *result_table)
my_delete(new_name,MYF(MY_WME));
}
else
- error=my_redel(org_name,new_name,MYF(MY_WME | MY_COPYTIME));
+ error=my_redel(org_name, new_name, 0, MYF(MY_WME | MY_COPYTIME));
}
if (! error)
error=save_state(isam_file,mrg,new_length,glob_crc);
@@ -756,13 +758,13 @@ static int compress(PACK_MRG_INFO *mrg,char *result_table)
DBUG_RETURN(0);
err:
- end_pagecache(maria_pagecache, 1);
free_counts_and_tree_and_queue(huff_trees,trees,huff_counts,fields);
if (new_file >= 0)
my_close(new_file,MYF(0));
if (join_maria_file >= 0)
my_close(join_maria_file,MYF(0));
mrg_close(mrg);
+ end_pagecache(maria_pagecache, 1);
fprintf(stderr, "Aborted: %s is not compressed\n", org_name);
DBUG_RETURN(-1);
}
diff --git a/storage/maria/maria_read_log.c b/storage/maria/maria_read_log.c
index 6f273b11ce8..74aa8bd9d11 100644
--- a/storage/maria/maria_read_log.c
+++ b/storage/maria/maria_read_log.c
@@ -32,7 +32,7 @@ const char *default_dbug_option= "d:t:o,/tmp/aria_read_log.trace";
static my_bool opt_display_only, opt_apply, opt_apply_undo, opt_silent;
static my_bool opt_check;
static const char *opt_tmpdir;
-static ulong opt_page_buffer_size;
+static ulong opt_page_buffer_size, opt_translog_buffer_size;
static ulonglong opt_start_from_lsn, opt_end_lsn, opt_start_from_checkpoint;
static MY_TMPDIR maria_chk_tmpdir;
@@ -44,9 +44,9 @@ int main(int argc, char **argv)
uint warnings_count;
MY_INIT(argv[0]);
+ maria_data_root= (char *)".";
load_defaults("my", load_default_groups, &argc, &argv);
default_argv= argv;
- maria_data_root= (char *)".";
get_options(&argc, &argv);
maria_in_recovery= TRUE;
@@ -80,9 +80,8 @@ int main(int argc, char **argv)
But if it finds a log and this log was crashed, it will create a new log,
which is useless. TODO: start log handler in read-only mode.
*/
- if (init_pagecache(maria_log_pagecache,
- TRANSLOG_PAGECACHE_SIZE, 0, 0,
- TRANSLOG_PAGE_SIZE, MY_WME) == 0 ||
+ if (init_pagecache(maria_log_pagecache, opt_translog_buffer_size,
+ 0, 0, TRANSLOG_PAGE_SIZE, MY_WME) == 0 ||
translog_init(maria_data_root, TRANSLOG_FILE_SIZE,
0, 0, maria_log_pagecache, TRANSLOG_DEFAULT_FLAGS,
opt_display_only))
@@ -166,7 +165,7 @@ err:
#include "ma_check_standalone.h"
enum options_mc {
- OPT_CHARSETS_DIR=256
+ OPT_CHARSETS_DIR=256, OPT_FORCE_CRASH, OPT_TRANSLOG_BUFFER_SIZE
};
static struct my_option my_long_options[] =
@@ -186,20 +185,27 @@ static struct my_option my_long_options[] =
#ifndef DBUG_OFF
{"debug", '#', "Output debug log. Often the argument is 'd:t:o,filename'.",
0, 0, 0, GET_STR, OPT_ARG, 0, 0, 0, 0, 0, 0},
+ {"force-crash", OPT_FORCE_CRASH, "Force crash after # recovery events",
+ &maria_recovery_force_crash_counter, 0,0, GET_ULONG, REQUIRED_ARG,
+ 0, 0, ~(long) 0, 0, 0, 0},
#endif
{"help", '?', "Display this help and exit.",
0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
{"display-only", 'd', "display brief info read from records' header",
&opt_display_only, &opt_display_only, 0, GET_BOOL,
NO_ARG,0, 0, 0, 0, 0, 0},
- {"aria-log-dir-path", 'l',
+ { "end-lsn", 'e', "Stop applying at this lsn. If end-lsn is used, UNDO:s "
+ "will not be applied", &opt_end_lsn, &opt_end_lsn,
+ 0, GET_ULL, REQUIRED_ARG, 0, 0, ~(longlong) 0, 0, 0, 0 },
+ {"aria-log-dir-path", 'h',
"Path to the directory where to store transactional log",
(uchar **) &maria_data_root, (uchar **) &maria_data_root, 0,
GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
- { "page-buffer-size", 'P', "",
+ { "page-buffer-size", 'P',
+ "The size of the buffer used for index blocks for Aria tables",
&opt_page_buffer_size, &opt_page_buffer_size, 0,
GET_ULONG, REQUIRED_ARG, (long) USE_BUFFER_INIT,
- (long) USE_BUFFER_INIT, (long) ~(ulong) 0, (long) MALLOC_OVERHEAD,
+ 1024L*1024L, (long) ~(ulong) 0, (long) MALLOC_OVERHEAD,
(long) IO_SIZE, 0},
{ "start-from-lsn", 'o', "Start reading log from this lsn",
&opt_start_from_lsn, &opt_start_from_lsn,
@@ -207,15 +213,12 @@ static struct my_option my_long_options[] =
{"start-from-checkpoint", 'C', "Start applying from last checkpoint",
&opt_start_from_checkpoint, &opt_start_from_checkpoint, 0,
GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},
- { "end-lsn", 'e', "Stop applying at this lsn. If end-lsn is used, UNDO:s "
- "will not be applied", &opt_end_lsn, &opt_end_lsn,
- 0, GET_ULL, REQUIRED_ARG, 0, 0, ~(longlong) 0, 0, 0, 0 },
{"silent", 's', "Print less information during apply/undo phase",
&opt_silent, &opt_silent, 0,
GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},
- {"verbose", 'v', "Print more information during apply/undo phase",
- &maria_recovery_verbose, &maria_recovery_verbose, 0,
- GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"tables-to-redo", 'T',
+ "List of tables sepearated with , that we should apply REDO on. Use this if you only want to recover some tables",
+ 0, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
{"tmpdir", 't', "Path for temporary files. Multiple paths can be specified, "
"separated by "
#if defined( __WIN__) || defined(__NETWARE__)
@@ -224,9 +227,18 @@ static struct my_option my_long_options[] =
"colon (:)"
#endif
, (char**) &opt_tmpdir, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
+ { "translog-buffer-size", OPT_TRANSLOG_BUFFER_SIZE,
+ "The size of the buffer used for transaction log for Aria tables",
+ &opt_translog_buffer_size, &opt_translog_buffer_size, 0,
+ GET_ULONG, REQUIRED_ARG, (long) TRANSLOG_PAGECACHE_SIZE,
+ 1024L*1024L, (long) ~(ulong) 0, (long) MALLOC_OVERHEAD,
+ (long) IO_SIZE, 0},
{"undo", 'u', "Apply UNDO records to tables. (disable with --disable-undo)",
(uchar **) &opt_apply_undo, (uchar **) &opt_apply_undo, 0,
GET_BOOL, NO_ARG, 1, 0, 0, 0, 0, 0},
+ {"verbose", 'v', "Print more information during apply/undo phase",
+ &maria_recovery_verbose, &maria_recovery_verbose, 0,
+ GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},
{"version", 'V', "Print version and exit.",
0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
{ 0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}
@@ -243,7 +255,7 @@ static void print_version(void)
static void usage(void)
{
print_version();
- puts("Copyright (C) 2007 MySQL AB");
+ puts("Copyright (C) 2007 MySQL AB, 2009-2011 Monty Program Ab");
puts("This software comes with ABSOLUTELY NO WARRANTY. This is free software,");
puts("and you are welcome to modify and redistribute it under the GPL license\n");
@@ -263,10 +275,18 @@ static void usage(void)
}
+static uchar* my_hash_get_string(const uchar *record, size_t *length,
+ my_bool first __attribute__ ((unused)))
+{
+ *length= (size_t) (strcend((const char*) record,',')- (const char*) record);
+ return (uchar*) record;
+}
+
+
static my_bool
get_one_option(int optid __attribute__((unused)),
const struct my_option *opt __attribute__((unused)),
- char *argument __attribute__((unused)))
+ char *argument)
{
switch (optid) {
case '?':
@@ -275,6 +295,23 @@ get_one_option(int optid __attribute__((unused)),
case 'V':
print_version();
exit(0);
+ case 'T':
+ {
+ char *pos;
+ if (!my_hash_inited(&tables_to_redo))
+ {
+ my_hash_init2(&tables_to_redo, 16, &my_charset_bin,
+ 16, 0, 0, my_hash_get_string, 0, HASH_UNIQUE);
+ }
+ do
+ {
+ pos= strcend(argument, ',');
+ if (pos != argument) /* Skip empty strings */
+ my_hash_insert(&tables_to_redo, (uchar*) argument);
+ argument= pos+1;
+ } while (*(pos++));
+ break;
+ }
#ifndef DBUG_OFF
case '#':
DBUG_SET_INITIAL(argument ? argument : default_dbug_option);
@@ -287,6 +324,7 @@ get_one_option(int optid __attribute__((unused)),
static void get_options(int *argc,char ***argv)
{
int ho_error;
+ my_bool need_help= 0;
if ((ho_error=handle_options(argc, argv, my_long_options, get_one_option)))
exit(ho_error);
@@ -294,8 +332,23 @@ static void get_options(int *argc,char ***argv)
if (!opt_apply)
opt_apply_undo= FALSE;
- if (((opt_display_only + opt_apply) != 1) || (*argc > 0))
+ if (*argc > 0)
+ {
+ need_help= 1;
+ fprintf(stderr, "Too many arguments given\n");
+ }
+ if ((opt_display_only + opt_apply) != 1)
+ {
+ need_help= 1;
+ fprintf(stderr,
+ "You must use one and only one of the options 'display-only' or "
+ "'apply'\n");
+ }
+
+ if (need_help)
{
+ fflush(stderr);
+ need_help =1;
usage();
exit(1);
}
diff --git a/storage/maria/tablockman.c b/storage/maria/tablockman.c
index e4fede54425..6b538381329 100644
--- a/storage/maria/tablockman.c
+++ b/storage/maria/tablockman.c
@@ -445,7 +445,7 @@ tablockman_getlock(TABLOCKMAN *lm, TABLE_LOCK_OWNER *lo,
mysql_mutex_unlock(& table->mutex);
/* now really wait */
- i= pthread_cond_timedwait(wait_for->cond, wait_for->mutex, & timeout);
+ i= mysql_cond_timedwait(wait_for->cond, wait_for->mutex, & timeout);
mysql_mutex_unlock(wait_for->mutex);
@@ -543,7 +543,7 @@ void tablockman_release_locks(TABLOCKMAN *lm, TABLE_LOCK_OWNER *lo)
lock_compatibility_matrix[lock->next->lock_type][lock->lock_type])
{
mysql_mutex_lock(lo->waiting_for->mutex);
- pthread_cond_broadcast(lo->waiting_for->cond);
+ mysql_cond_broadcast(lo->waiting_for->cond);
mysql_mutex_unlock(lo->waiting_for->mutex);
}
lo->waiting_for= 0;
@@ -589,7 +589,7 @@ void tablockman_release_locks(TABLOCKMAN *lm, TABLE_LOCK_OWNER *lo)
in case somebody's waiting for it
*/
mysql_mutex_lock(lo->mutex);
- pthread_cond_broadcast(lo->cond);
+ mysql_cond_broadcast(lo->cond);
mysql_mutex_unlock(lo->mutex);
/* and push all freed locks to the lockman's pool */
@@ -605,7 +605,7 @@ void tablockman_init(TABLOCKMAN *lm, loid_to_tlo_func *func, uint timeout)
lm->loid_to_tlo= func;
lm->lock_timeout= timeout;
mysql_mutex_init(& lm->pool_mutex, MY_MUTEX_INIT_FAST);
- my_getsystime(); /* ensure that my_getsystime() is initialized */
+ my_interval_timer(); /* ensure that my_interval_timer() is initialized */
}
void tablockman_destroy(TABLOCKMAN *lm)
diff --git a/storage/maria/unittest/CMakeLists.txt b/storage/maria/unittest/CMakeLists.txt
index 1d63bed8e8e..8a83a589706 100644
--- a/storage/maria/unittest/CMakeLists.txt
+++ b/storage/maria/unittest/CMakeLists.txt
@@ -13,6 +13,7 @@
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/include ${CMAKE_SOURCE_DIR}/zlib
${CMAKE_SOURCE_DIR}/unittest/mytap)
LINK_LIBRARIES(aria myisam mytap mysys ${DBUG_LIBRARY} strings ${ZLIB_LIBRARY})
diff --git a/storage/maria/unittest/ma_control_file-t.c b/storage/maria/unittest/ma_control_file-t.c
index aad1a6978b2..8533e461361 100644
--- a/storage/maria/unittest/ma_control_file-t.c
+++ b/storage/maria/unittest/ma_control_file-t.c
@@ -117,6 +117,26 @@ static CONTROL_FILE_ERROR local_ma_control_file_open(void)
return error;
}
+static char *create_tmpdir(const char *progname)
+{
+ static char test_dirname[FN_REFLEN];
+ char tmp_name[FN_REFLEN];
+ uint length;
+
+ /* Create a temporary directory of name TMP-'executable', but without the -t extension */
+ fn_format(tmp_name, progname, "", "", MY_REPLACE_DIR | MY_REPLACE_EXT);
+ length= strlen(tmp_name);
+ if (length > 2 && tmp_name[length-2] == '-' && tmp_name[length-1] == 't')
+ tmp_name[length-2]= 0;
+ strxmov(test_dirname, "TMP-", tmp_name, NullS);
+
+ /*
+ Don't give an error if we can't create dir, as it may already exist from a previously aborted
+ run
+ */
+ (void) my_mkdir(test_dirname, 0777, MYF(0));
+ return test_dirname;
+}
int main(int argc,char *argv[])
@@ -124,11 +144,12 @@ int main(int argc,char *argv[])
MY_INIT(argv[0]);
my_init();
- maria_data_root= (char *)".";
default_error_handler_hook= error_handler_hook;
plan(12);
+ maria_data_root= create_tmpdir(argv[0]);
+
diag("Unit tests for control file");
get_options(argc,argv);
@@ -155,6 +176,9 @@ int main(int argc,char *argv[])
ok(0 == test_bad_blocksize(), "test of bad blocksize");
ok(0 == test_bad_size(), "test of too small/big file");
+ delete_file(0);
+ rmdir(maria_data_root);
+
return exit_status();
}
diff --git a/storage/maria/unittest/ma_loghandler_examples.c b/storage/maria/unittest/ma_loghandler_examples.c
index 0c11a3b9a8e..cd5d927587a 100644
--- a/storage/maria/unittest/ma_loghandler_examples.c
+++ b/storage/maria/unittest/ma_loghandler_examples.c
@@ -59,6 +59,9 @@ void translog_example_table_init()
i < LOGREC_NUMBER_OF_TYPES;
i++)
log_record_type_descriptor[i].rclass= LOGRECTYPE_NOT_ALLOWED;
+#ifndef DBUG_OFF
+ check_translog_description_table(LOGREC_VARIABLE_RECORD_2LSN_EXAMPLE);
+#endif
}
diff --git a/storage/maria/unittest/ma_maria_log_cleanup.c b/storage/maria/unittest/ma_maria_log_cleanup.c
index f85c75b1a88..5e84acf41af 100644
--- a/storage/maria/unittest/ma_maria_log_cleanup.c
+++ b/storage/maria/unittest/ma_maria_log_cleanup.c
@@ -16,7 +16,7 @@
#include "../maria_def.h"
#include <my_dir.h>
-my_bool maria_log_remove()
+my_bool maria_log_remove(const char *testdir)
{
MY_DIR *dirp;
uint i;
@@ -59,6 +59,28 @@ my_bool maria_log_remove()
}
}
my_dirend(dirp);
+ if (testdir)
+ rmdir(testdir);
return 0;
}
+char *create_tmpdir(const char *progname)
+{
+ static char test_dirname[FN_REFLEN];
+ char tmp_name[FN_REFLEN];
+ uint length;
+
+ /* Create a temporary directory of name TMP-'executable', but without the -t extension */
+ fn_format(tmp_name, progname, "", "", MY_REPLACE_DIR | MY_REPLACE_EXT);
+ length= strlen(tmp_name);
+ if (length > 2 && tmp_name[length-2] == '-' && tmp_name[length-1] == 't')
+ tmp_name[length-2]= 0;
+ strxmov(test_dirname, "TMP-", tmp_name, NullS);
+
+ /*
+ Don't give an error if we can't create dir, as it may already exist from a previously aborted
+ run
+ */
+ (void) my_mkdir(test_dirname, 0777, MYF(0));
+ return test_dirname;
+}
diff --git a/storage/maria/unittest/ma_pagecache_consist.c b/storage/maria/unittest/ma_pagecache_consist.c
index d9b814e92f1..60c196fddbf 100644
--- a/storage/maria/unittest/ma_pagecache_consist.c
+++ b/storage/maria/unittest/ma_pagecache_consist.c
@@ -30,7 +30,8 @@
static const char* default_dbug_option;
#endif
-static char *file1_name= (char*)"page_cache_test_file_1";
+static const char *base_file1_name= "page_cache_test_file_1";
+static char file1_name[FN_REFLEN];
static PAGECACHE_FILE file1;
static pthread_cond_t COND_thread_count;
static pthread_mutex_t LOCK_thread_count;
@@ -330,6 +331,27 @@ static void *test_thread_writer(void *arg)
return 0;
}
+static char *create_tmpdir(const char *progname)
+{
+ static char test_dirname[FN_REFLEN];
+ char tmp_name[FN_REFLEN];
+ uint length;
+
+ /* Create a temporary directory of name TMP-'executable', but without the -t extension */
+ fn_format(tmp_name, progname, "", "", MY_REPLACE_DIR | MY_REPLACE_EXT);
+ length= strlen(tmp_name);
+ if (length > 2 && tmp_name[length-2] == '-' && tmp_name[length-1] == 't')
+ tmp_name[length-2]= 0;
+ strxmov(test_dirname, "TMP-", tmp_name, NullS);
+
+ /*
+ Don't give an error if we can't create dir, as it may already exist from a previously aborted
+ run
+ */
+ (void) my_mkdir(test_dirname, 0777, MYF(0));
+ return test_dirname;
+}
+
int main(int argc __attribute__((unused)),
char **argv __attribute__((unused)))
@@ -337,7 +359,6 @@ int main(int argc __attribute__((unused)),
pthread_t tid;
pthread_attr_t thr_attr;
int *param, error, pagen;
-
MY_INIT(argv[0]);
#ifndef DBUG_OFF
@@ -357,9 +378,13 @@ int main(int argc __attribute__((unused)),
DBUG_ENTER("main");
DBUG_PRINT("info", ("Main thread: %s\n", my_thread_name()));
plan(number_of_writers + number_of_readers);
+
SKIP_BIG_TESTS(number_of_writers + number_of_readers)
{
+ char *test_dirname= create_tmpdir(argv[0]);
+ fn_format(file1_name, base_file1_name, test_dirname, "", MYF(0));
+
if ((file1.file= my_open(file1_name,
O_CREAT | O_TRUNC | O_RDWR, MYF(0))) == -1)
{
@@ -476,6 +501,7 @@ int main(int argc __attribute__((unused)),
pthread_mutex_unlock(&LOCK_thread_count);
DBUG_PRINT("info", ("thread ended"));
+ flush_pagecache_blocks(&pagecache, &file1, FLUSH_IGNORE_CHANGED);
end_pagecache(&pagecache, 1);
DBUG_PRINT("info", ("Page cache ended"));
@@ -490,6 +516,7 @@ int main(int argc __attribute__((unused)),
DBUG_PRINT("info", ("file1 (%d) closed", file1.file));
DBUG_PRINT("info", ("Program end"));
+ rmdir(test_dirname);
} /* SKIP_BIG_TESTS */
my_end(0);
diff --git a/storage/maria/unittest/ma_pagecache_rwconsist.c b/storage/maria/unittest/ma_pagecache_rwconsist.c
index 88ecbe864e8..4ade9c536ed 100644
--- a/storage/maria/unittest/ma_pagecache_rwconsist.c
+++ b/storage/maria/unittest/ma_pagecache_rwconsist.c
@@ -33,7 +33,8 @@ static const char* default_dbug_option;
#define SLEEP my_sleep(5)
-static char *file1_name= (char*)"page_cache_test_file_1";
+static const char *base_file1_name= "page_cache_test_file_1";
+static char file1_name[FN_REFLEN];
static PAGECACHE_FILE file1;
static pthread_cond_t COND_thread_count;
static pthread_mutex_t LOCK_thread_count;
@@ -200,6 +201,27 @@ static void *test_thread_writer(void *arg)
return 0;
}
+char *create_tmpdir(const char *progname)
+{
+ static char test_dirname[FN_REFLEN];
+ char tmp_name[FN_REFLEN];
+ uint length;
+
+ /* Create a temporary directory of name TMP-'executable', but without the -t extension */
+ fn_format(tmp_name, progname, "", "", MY_REPLACE_DIR | MY_REPLACE_EXT);
+ length= strlen(tmp_name);
+ if (length > 2 && tmp_name[length-2] == '-' && tmp_name[length-1] == 't')
+ tmp_name[length-2]= 0;
+ strxmov(test_dirname, "TMP-", tmp_name, NullS);
+
+ /*
+ Don't give an error if we can't create dir, as it may already exist from a previously aborted
+ run
+ */
+ (void) my_mkdir(test_dirname, 0777, MYF(0));
+ return test_dirname;
+}
+
int main(int argc __attribute__((unused)),
char **argv __attribute__((unused)))
@@ -230,6 +252,9 @@ int main(int argc __attribute__((unused)),
SKIP_BIG_TESTS(number_of_writers + number_of_readers)
{
+ char *test_dirname= create_tmpdir(argv[0]);
+ fn_format(file1_name, base_file1_name, test_dirname, "", MYF(0));
+
if ((file1.file= my_open(file1_name,
O_CREAT | O_TRUNC | O_RDWR, MYF(0))) == -1)
{
@@ -341,6 +366,7 @@ int main(int argc __attribute__((unused)),
pthread_mutex_unlock(&LOCK_thread_count);
DBUG_PRINT("info", ("thread ended"));
+ flush_pagecache_blocks(&pagecache, &file1, FLUSH_IGNORE_CHANGED);
end_pagecache(&pagecache, 1);
DBUG_PRINT("info", ("Page cache ended"));
@@ -354,6 +380,8 @@ int main(int argc __attribute__((unused)),
DBUG_PRINT("info", ("file1 (%d) closed", file1.file));
DBUG_PRINT("info", ("Program end"));
+
+ rmdir(test_dirname);
} /* SKIP_BIG_TESTS */
my_end(0);
diff --git a/storage/maria/unittest/ma_pagecache_rwconsist2.c b/storage/maria/unittest/ma_pagecache_rwconsist2.c
index e63d45ceb3a..a5c50bc15da 100644
--- a/storage/maria/unittest/ma_pagecache_rwconsist2.c
+++ b/storage/maria/unittest/ma_pagecache_rwconsist2.c
@@ -40,7 +40,8 @@ static const char* default_dbug_option;
#define SLEEP my_sleep(5)
-static char *file1_name= (char*)"page_cache_test_file_1";
+static const char *base_file1_name= "page_cache_test_file_1";
+static char file1_name[FN_REFLEN];
static PAGECACHE_FILE file1;
static pthread_cond_t COND_thread_count;
static pthread_mutex_t LOCK_thread_count;
@@ -196,6 +197,27 @@ static void *test_thread_writer(void *arg)
return 0;
}
+static char *create_tmpdir(const char *progname)
+{
+ static char test_dirname[FN_REFLEN];
+ char tmp_name[FN_REFLEN];
+ uint length;
+
+ /* Create a temporary directory of name TMP-'executable', but without the -t extension */
+ fn_format(tmp_name, progname, "", "", MY_REPLACE_DIR | MY_REPLACE_EXT);
+ length= strlen(tmp_name);
+ if (length > 2 && tmp_name[length-2] == '-' && tmp_name[length-1] == 't')
+ tmp_name[length-2]= 0;
+ strxmov(test_dirname, "TMP-", tmp_name, NullS);
+
+ /*
+ Don't give an error if we can't create dir, as it may already exist from a previously aborted
+ run
+ */
+ (void) my_mkdir(test_dirname, 0777, MYF(0));
+ return test_dirname;
+}
+
int main(int argc __attribute__((unused)),
char **argv __attribute__((unused)))
@@ -226,6 +248,9 @@ int main(int argc __attribute__((unused)),
SKIP_BIG_TESTS(number_of_writers + number_of_readers)
{
+ char *test_dirname= create_tmpdir(argv[0]);
+ fn_format(file1_name, base_file1_name, test_dirname, "", MYF(0));
+
if ((file1.file= my_open(file1_name,
O_CREAT | O_TRUNC | O_RDWR, MYF(0))) == -1)
{
@@ -350,6 +375,8 @@ int main(int argc __attribute__((unused)),
DBUG_PRINT("info", ("file1 (%d) closed", file1.file));
DBUG_PRINT("info", ("Program end"));
+
+ rmdir(test_dirname);
} /* SKIP_BIG_TESTS */
my_end(0);
diff --git a/storage/maria/unittest/ma_pagecache_single.c b/storage/maria/unittest/ma_pagecache_single.c
index 3291346a8b5..bb39b20ce59 100644
--- a/storage/maria/unittest/ma_pagecache_single.c
+++ b/storage/maria/unittest/ma_pagecache_single.c
@@ -35,8 +35,9 @@ static const char* default_dbug_option;
#define SKIP_BIG_TESTS(X) /* no-op */
#endif
-static char *file1_name= (char*)"page_cache_test_file_1";
-static char *file2_name= (char*)"page_cache_test_file_2";
+static const char *base_file1_name= "page_cache_test_file_1";
+static const char *base_file2_name= "page_cache_test_file_2";
+static char file1_name[FN_REFLEN], file2_name[FN_REFLEN];
static PAGECACHE_FILE file1;
static pthread_cond_t COND_thread_count;
static pthread_mutex_t LOCK_thread_count;
@@ -720,6 +721,28 @@ static void *test_thread(void *arg)
}
+static char *create_tmpdir(const char *progname)
+{
+ static char test_dirname[FN_REFLEN];
+ char tmp_name[FN_REFLEN];
+ uint length;
+
+ /* Create a temporary directory of name TMP-'executable', but without the -t extension */
+ fn_format(tmp_name, progname, "", "", MY_REPLACE_DIR | MY_REPLACE_EXT);
+ length= strlen(tmp_name);
+ if (length > 2 && tmp_name[length-2] == '-' && tmp_name[length-1] == 't')
+ tmp_name[length-2]= 0;
+ strxmov(test_dirname, "TMP-", tmp_name, NullS);
+
+ /*
+ Don't give an error if we can't create dir, as it may already exist from a previously aborted
+ run
+ */
+ (void) my_mkdir(test_dirname, 0777, MYF(0));
+ return test_dirname;
+}
+
+
int main(int argc __attribute__((unused)),
char **argv __attribute__((unused)))
{
@@ -748,6 +771,9 @@ int main(int argc __attribute__((unused)),
plan(18);
SKIP_BIG_TESTS(18)
{
+ char *test_dirname= create_tmpdir(argv[0]);
+ fn_format(file1_name, base_file1_name, test_dirname, "", MYF(0));
+ fn_format(file2_name, base_file2_name, test_dirname, "", MYF(0));
if ((tmp_file= my_open(file2_name, O_CREAT | O_TRUNC | O_RDWR,
MYF(MY_WME))) < 0)
@@ -841,13 +867,13 @@ int main(int argc __attribute__((unused)),
exit(1);
my_delete(file1_name, MYF(0));
+ rmdir(test_dirname);
} /* SKIP_BIG_TESTS */
DBUG_PRINT("info", ("file1 (%d) closed", file1.file));
DBUG_PRINT("info", ("Program end"));
my_end(0);
-
}
return exit_status();
}
diff --git a/storage/maria/unittest/ma_test_all-t b/storage/maria/unittest/ma_test_all-t
index 0b11daf7f98..e66d269ab93 100755
--- a/storage/maria/unittest/ma_test_all-t
+++ b/storage/maria/unittest/ma_test_all-t
@@ -8,7 +8,7 @@ use File::Basename;
$|= 1;
$^W = 1; # warnings, because env cannot parse 'perl -w'
-$VER= "1.4";
+$VER= "1.5";
$opt_version= 0;
$opt_help= 0;
@@ -28,7 +28,10 @@ my $NEW_TEST= 0; # Test group separator in an array of tests
my $test_begin= 0;
my $test_end= 0;
my $test_counter= 0;
-
+my $using_internal_tmpdir= 0;
+my $full_tmpdir;
+my $tmpdir="tmp";
+my $exec_dir="TMP-ma_test_all"; # Run test in this directory
run_tests();
####
@@ -46,6 +49,7 @@ sub run_tests
"abort-on-error" => \$opt_abort_on_error,
"valgrind=s" => \$opt_valgrind,
"silent=s" => \$opt_silent,
+ "tmpdir=s" => \$full_tmpdir,
"number-of-tests" => \$opt_number_of_tests,
"run-tests=s" => \$opt_run_tests,
"start-from=s" => \$opt_run_tests))
@@ -57,7 +61,14 @@ sub run_tests
print "$my_progname version $VER\n";
exit(0);
}
- $maria_path= dirname($0) . "/..";
+
+ if (! -d $exec_dir)
+ {
+ die if (!mkdir("$exec_dir"));
+ }
+ chdir($exec_dir);
+
+ $maria_path= "../" . dirname($0) . "/..";
my $suffix= ( $^O =~ /win/i && $^O !~ /darwin/i ) ? ".exe" : "";
$maria_exe_path= "$maria_path/release";
@@ -73,14 +84,28 @@ sub run_tests
$maria_exe_path= $maria_path;
if ( ! -f "$maria_exe_path/ma_test1$suffix" )
{
- die("Cannot find ma_test1 executable\n");
+ die("Cannot find ma_test1 executable in $maria_path\n");
}
}
}
- }
+ }
usage() if ($opt_help || $flag_exit);
+ if (defined($full_tmpdir))
+ {
+ $tmpdir= $full_tmpdir;
+ }
+ else
+ {
+ $full_tmpdir= $tmpdir;
+ $using_internal_tmpdir= 1;
+ if (! -d "$full_tmpdir")
+ {
+ die if (!mkdir("$full_tmpdir"));
+ }
+ }
+
#
# IMPORTANT: If you modify this file, please read this:
#
@@ -146,7 +171,7 @@ sub run_tests
# clean-up
#
- unlink <*.TMD aria_log*>; # Delete temporary files
+ unlink_all_possible_tmp_files();
#
# Run tests
@@ -210,6 +235,14 @@ sub run_tests
run_ma_test_recovery($opt_verbose, 0);
run_tests_on_clrs($suffix, $opt_verbose, 0);
+ unlink_all_possible_tmp_files();
+ if ($using_internal_tmpdir)
+ {
+ rmdir($tmpdir);
+ }
+ rmdir($exec_dir);
+ chdir("..");
+ rmdir($exec_dir);
exit($runtime_error);
}
@@ -250,6 +283,7 @@ sub run_check_tests
["-p -B --key_length=480","-sm"],
["--checksum --unique","-se"],
["--unique","-se"],
+ ["--rows-no-data", "-s"],
["--key_multiple -N -S","-sm"],
["--key_multiple -a -p --key_length=480","-sm"],
["--key_multiple -a -B --key_length=480","-sm"],
@@ -280,38 +314,38 @@ sub run_check_tests
for ($i= 0; defined($ma_test1_opt[$i]); $i++)
{
- unlink <aria_log_control aria_log.*>;
- ok("$maria_exe_path/ma_test1$suffix $silent $ma_test1_opt[$i][0] $row_type",
+ unlink_log_files();
+ ok("$maria_exe_path/ma_test1$suffix $silent -h$tmpdir $ma_test1_opt[$i][0] $row_type",
$verbose, $i + 1);
- ok("$maria_exe_path/aria_chk$suffix $ma_test1_opt[$i][1] test1",
+ ok("$maria_exe_path/aria_chk$suffix -h$tmpdir $ma_test1_opt[$i][1] $tmpdir/test1",
$verbose, $i + 1);
}
#
# These tests are outside the loops. Make sure to include them in
# nr_tests manually
#
- ok("$maria_exe_path/aria_pack$suffix --force -s test1", $verbose, 0);
- ok("$maria_exe_path/aria_chk$suffix -ess test1", $verbose, 0);
+ ok("$maria_exe_path/aria_pack$suffix --force -s $tmpdir/test1", $verbose, 0);
+ ok("$maria_exe_path/aria_chk$suffix -ess $tmpdir/test1", $verbose, 0);
for ($i= 0; defined($ma_test2_opt[$i]); $i++)
{
- unlink <aria_log_control aria_log.*>;
- ok("$maria_exe_path/ma_test2$suffix $silent $ma_test2_opt[$i][0] $row_type",
+ unlink_log_files();
+ ok("$maria_exe_path/ma_test2$suffix $silent -h$tmpdir $ma_test2_opt[$i][0] $row_type",
$verbose, $i + 1);
- ok("$maria_exe_path/aria_chk$suffix $ma_test2_opt[$i][1] test2",
+ ok("$maria_exe_path/aria_chk$suffix -h$tmpdir $ma_test2_opt[$i][1] $tmpdir/test2",
$verbose, $i + 1);
}
for ($i= 0; defined($ma_rt_test_opt[$i]); $i++)
{
- unlink <aria_log_control aria_log.*>;
- ok("$maria_exe_path/ma_rt_test$suffix $silent $ma_rt_test_opt[$i][0] $row_type",
+ unlink_log_files();
+ ok("$maria_exe_path/ma_rt_test$suffix $silent -h$tmpdir $ma_rt_test_opt[$i][0] $row_type",
$verbose, $i + 1);
- ok("$maria_exe_path/aria_chk$suffix $ma_rt_test_opt[$i][1] rt_test",
+ ok("$maria_exe_path/aria_chk$suffix -h$tmpdir $ma_rt_test_opt[$i][1] $tmpdir/rt_test",
$verbose, $i + 1);
}
- unlink <aria_log_control aria_log.*>;
+ unlink_log_files();
return 0;
}
@@ -412,13 +446,16 @@ sub run_pack_tests()
"cp test1.MAD test2.MAD",
"cp test1.MAI test2.MAI",
"$maria_exe_path/aria_pack$suffix --force -s --join=test3 test1 test2",
- "$maria_exe_path/aria_chk -s test3",
- "$maria_exe_path/aria_chk -s --safe-recover test3",
- "$maria_exe_path/aria_chk -s test3"
);
- return &count_tests(\@t) if ($count);
+ return (&count_tests(\@t) + 3) if ($count);
&run_test_bunch(\@t, $verbose, 0);
+
+ ok("$maria_exe_path/aria_chk -s test3", $verbose, 0, 1);
+ @t= ("$maria_exe_path/aria_chk -s --safe-recover test3",
+ "$maria_exe_path/aria_chk -s test3");
+ &run_test_bunch(\@t, $verbose, 0);
+
return 0;
}
@@ -433,25 +470,25 @@ sub run_tests_on_warnings_and_errors
return 9 if ($count); # Number of tests in this function, e.g. calls to ok()
- ok("$maria_exe_path/ma_test2$suffix $silent -L -K -W -P -S -R1 -m500",
+ ok("$maria_exe_path/ma_test2$suffix -h$tmpdir $silent -L -K -W -P -S -R1 -m500",
$verbose, 0);
- ok("$maria_exe_path/aria_chk$suffix -sm test2", $verbose, 0);
+ ok("$maria_exe_path/aria_chk$suffix -h$tmpdir -sm $tmpdir/test2", $verbose, 0);
# ma_test2$suffix $silent -L -K -R1 -m2000 ; Should give error 135\n
# In the following a failure is a success and success is a failure
- $com= "$maria_exe_path/ma_test2$suffix $silent -L -K -R1 -m2000 ";
+ $com= "$maria_exe_path/ma_test2$suffix -h$tmpdir $silent -L -K -R1 -m2000 ";
$com.= ">ma_test2_message.txt 2>&1";
ok($com, $verbose, 0, 1);
ok("cat ma_test2_message.txt", $verbose, 0);
ok("grep \"Error: 135\" ma_test2_message.txt > /dev/null", $verbose, 0);
- # maria_exe_path/aria_chk$suffix -sm test2 will warn that
+ # maria_exe_path/aria_chk$suffix -h$tmpdir -sm $tmpdir/test2 will warn that
# Datafile is almost full
- ok("$maria_exe_path/aria_chk$suffix -sm test2 >ma_test2_message.txt 2>&1",
- $verbose, 0);
+ ok("$maria_exe_path/aria_chk$suffix -h$tmpdir -sm $tmpdir/test2 >ma_test2_message.txt 2>&1",
+ $verbose, 0, 1);
ok("cat ma_test2_message.txt", $verbose, 0);
ok("grep \"warning: Datafile is almost full\" ma_test2_message.txt>/dev/null",
$verbose, 0);
unlink <ma_test2_message.txt>;
- ok("$maria_exe_path/aria_chk$suffix -ssm test2", $verbose, 0);
+ ok("$maria_exe_path/aria_chk$suffix -h$tmpdir -ssm $tmpdir/test2", $verbose, 0);
return 0;
}
@@ -479,33 +516,33 @@ sub run_tests_on_clrs
my ($i);
my @t= ($NEW_TEST,
- "$maria_exe_path/ma_test2$suffix -s -L -K -W -P -M -T -c -b -t2 -A1",
- "cp aria_log_control tmp",
- "$maria_exe_path/aria_read_log$suffix -a -s",
- "$maria_exe_path/aria_chk$suffix -s -e test2",
- "cp tmp/aria_log_control .",
- "rm test2.MA?",
- "$maria_exe_path/aria_read_log$suffix -a -s",
- "$maria_exe_path/aria_chk$suffix -s -e test2",
- "rm test2.MA?",
+ "$maria_exe_path/ma_test2$suffix -h$tmpdir -s -L -K -W -P -M -T -c -b -t2 -A1",
+ "cp $tmpdir/aria_log_control $tmpdir/aria_log_control.backup",
+ "$maria_exe_path/aria_read_log$suffix -a -s -h$tmpdir",
+ "$maria_exe_path/aria_chk$suffix -h$tmpdir -s -e $tmpdir/test2",
+ "mv $tmpdir/aria_log_control.backup $tmpdir/aria_log_control",
+ "rm $tmpdir/test2.MA?",
+ "$maria_exe_path/aria_read_log$suffix -a -s -h$tmpdir",
+ "$maria_exe_path/aria_chk$suffix -h$tmpdir -s -e $tmpdir/test2",
+ "rm $tmpdir/test2.MA?",
$NEW_TEST,
- "$maria_exe_path/ma_test2$suffix -s -L -K -W -P -M -T -c -b -t2 -A1",
- "$maria_exe_path/aria_read_log$suffix -a -s",
- "$maria_exe_path/aria_chk$suffix -s -e test2",
- "rm test2.MA?",
- "$maria_exe_path/aria_read_log$suffix -a -s",
- "$maria_exe_path/aria_chk$suffix -e -s test2",
- "rm test2.MA?",
+ "$maria_exe_path/ma_test2$suffix -h$tmpdir -s -L -K -W -P -M -T -c -b -t2 -A1",
+ "$maria_exe_path/aria_read_log$suffix -a -s -h$tmpdir ",
+ "$maria_exe_path/aria_chk$suffix -h$tmpdir -s -e $tmpdir/test2",
+ "rm $tmpdir/test2.MA?",
+ "$maria_exe_path/aria_read_log$suffix -a -s -h$tmpdir",
+ "$maria_exe_path/aria_chk$suffix -h$tmpdir -e -s $tmpdir/test2",
+ "rm $tmpdir/test2.MA?",
$NEW_TEST,
- "$maria_exe_path/ma_test2$suffix -s -L -K -W -P -M -T -c -b32768 -t4 -A1",
- "$maria_exe_path/aria_read_log$suffix -a -s",
- "$maria_exe_path/aria_chk$suffix -es test2",
- "$maria_exe_path/aria_read_log$suffix -a -s",
- "$maria_exe_path/aria_chk$suffix -es test2",
- "rm test2.MA?",
- "$maria_exe_path/aria_read_log$suffix -a -s",
- "$maria_exe_path/aria_chk$suffix -es test2",
- "rm test2.MA?"
+ "$maria_exe_path/ma_test2$suffix -h$tmpdir -s -L -K -W -P -M -T -c -b32768 -t4 -A1",
+ "$maria_exe_path/aria_read_log$suffix -a -s -h$tmpdir",
+ "$maria_exe_path/aria_chk$suffix -h$tmpdir -es $tmpdir/test2",
+ "$maria_exe_path/aria_read_log$suffix -a -s -h$tmpdir ",
+ "$maria_exe_path/aria_chk$suffix -h$tmpdir -es $tmpdir/test2",
+ "rm $tmpdir/test2.MA?",
+ "$maria_exe_path/aria_read_log$suffix -a -s -h$tmpdir",
+ "$maria_exe_path/aria_chk$suffix -h$tmpdir -es $tmpdir/test2",
+ "rm $tmpdir/test2.MA?"
);
return &count_tests(\@t) if ($count);
@@ -533,7 +570,7 @@ sub run_tests_on_clrs
sub ok
{
my ($com, $verbose, $iteration, $expected_error)= @_;
- my ($msg, $output, $err, $len);
+ my ($msg, $output, $err, $errcode, $len);
$test_counter++;
if ($test_begin > $test_counter)
@@ -550,17 +587,22 @@ sub ok
if ($verbose)
{
- print "$com ";
+ # Print command with out the long unittest/../ prefix
+ my $tmp;
+ $tmp= $com;
+ $tmp =~ s|^unittest/../||;
+ print "$tmp ";
+ $len= length($tmp);
}
$output= `$com 2>&1`;
- $len= length($com);
if ($verbose)
{
print " " x (62 - $len);
}
$err= $?;
+ $errcode= ($? >> 8);
if ((!$err && !$expected_error) ||
- (($err >> 8) == $expected_error && $expected_error))
+ ($errcode == $expected_error && $expected_error))
{
print "[ " if ($verbose);
print "ok";
@@ -597,7 +639,7 @@ sub ok
}
$msg.= "at line ";
$msg.= (caller)[2];
- $msg.= "\n(errcode: $err, test: $test_counter)\n";
+ $msg.= "\n(errcode: $errcode, test: $test_counter)\n";
if ($expected_error)
{
$msg.= "Was expecting errcode: $expected_error\n";
@@ -650,6 +692,19 @@ sub count_tests
return $nr_tests;
}
+sub unlink_log_files
+{
+ unlink "$full_tmpdir/aria_log_control", "$full_tmpdir/aria_log.00000001", "$full_tmpdir/aria_log.00000002";
+}
+
+sub unlink_all_possible_tmp_files()
+{
+ unlink_log_files();
+
+ # Unlink tmp files that may have been created when testing the test programs
+ unlink <$full_tmpdir/*.TMD $full_tmpdir/aria_read_log_test1.txt $full_tmpdir/test1*.MA? $full_tmpdir/ma_test_recovery.output aria_log_control aria_log.00000001 aria_log.00000002 aria_logtest1.MA? test1.MA? test2.MA? test3.MA?>;
+}
+
####
#### Run a bunch of tests
#### Arguments: $t: an array of the tests
@@ -666,7 +721,7 @@ sub run_test_bunch
{
if ($clear && @$t[$i] eq $NEW_TEST)
{
- unlink <aria_log.* aria_log_control>;
+ unlink_log_files();
}
if (@$t[$i] ne $NEW_TEST)
{
@@ -699,6 +754,7 @@ Options
might depend on previous ones.
--start-from=... Alias for --run-tests
--silent=... Silent option passed to ma_test* tests ('$opt_silent')
+--tmpdir=... Store tests data in this directory (works for most tests)
--valgrind=... Options for valgrind.
('$opt_valgrind')
--verbose Be more verbose. Will print each unittest on a line
diff --git a/storage/maria/unittest/ma_test_loghandler-t.c b/storage/maria/unittest/ma_test_loghandler-t.c
index 2de06914412..ccaa6f7dc8e 100644
--- a/storage/maria/unittest/ma_test_loghandler-t.c
+++ b/storage/maria/unittest/ma_test_loghandler-t.c
@@ -19,7 +19,8 @@
#include <tap.h>
#include "../trnman.h"
-extern my_bool maria_log_remove();
+extern my_bool maria_log_remove(const char *testdir);
+extern char *create_tmpdir(const char *progname);
extern void example_loghandler_init();
#ifndef DBUG_OFF
@@ -161,7 +162,6 @@ int main(int argc __attribute__((unused)), char *argv[])
LEX_CUSTRING parts[TRANSLOG_INTERNAL_PARTS + 3];
struct st_translog_scanner_data scanner;
int rc;
-
MY_INIT(argv[0]);
if (my_set_max_open_files(100) < 100)
@@ -170,10 +170,14 @@ int main(int argc __attribute__((unused)), char *argv[])
exit(1);
}
bzero(&pagecache, sizeof(pagecache));
- maria_data_root= (char *)".";
- if (maria_log_remove())
+
+ maria_data_root= create_tmpdir(argv[0]);
+ if (maria_log_remove(0))
exit(1);
+ /* We don't need to do physical syncs in this test */
+ my_disable_sync= 1;
+
for (i= 0; i < (LONG_BUFFER_SIZE + LSN_STORE_SIZE * 2 + 2); i+= 2)
{
int2store(long_buffer + i, (i >> 1));
@@ -205,7 +209,7 @@ int main(int argc __attribute__((unused)), char *argv[])
fprintf(stderr, "Got error: init_pagecache() (errno: %d)\n", errno);
exit(1);
}
- if (translog_init_with_table(".", LOG_FILE_SIZE, 50112, 0, &pagecache,
+ if (translog_init_with_table(maria_data_root, LOG_FILE_SIZE, 50112, 0, &pagecache,
LOG_FLAGS, 0, &translog_example_table_init,
0))
{
@@ -654,7 +658,7 @@ err:
end_pagecache(&pagecache, 1);
ma_control_file_end();
- if (maria_log_remove())
+ if (maria_log_remove(maria_data_root))
exit(1);
return(test(exit_status()));
diff --git a/storage/maria/unittest/ma_test_loghandler_first_lsn-t.c b/storage/maria/unittest/ma_test_loghandler_first_lsn-t.c
index e941d860adb..9ebd56c754c 100644
--- a/storage/maria/unittest/ma_test_loghandler_first_lsn-t.c
+++ b/storage/maria/unittest/ma_test_loghandler_first_lsn-t.c
@@ -19,7 +19,8 @@
#include <tap.h>
#include "../trnman.h"
-extern my_bool maria_log_remove();
+extern my_bool maria_log_remove(const char *testdir);
+extern char *create_tmpdir(const char *progname);
extern void translog_example_table_init();
#ifndef DBUG_OFF
@@ -31,7 +32,6 @@ static const char *default_dbug_option;
#define LOG_FILE_SIZE (1024L*1024L*1024L + 1024L*1024L*512)
#define LOG_FLAGS 0
-static char *first_translog_file= (char*)"maria_log.00000001";
int main(int argc __attribute__((unused)), char *argv[])
{
@@ -40,18 +40,18 @@ int main(int argc __attribute__((unused)), char *argv[])
PAGECACHE pagecache;
LSN lsn, first_lsn, theor_lsn;
LEX_CUSTRING parts[TRANSLOG_INTERNAL_PARTS + 1];
-
MY_INIT(argv[0]);
plan(2);
bzero(&pagecache, sizeof(pagecache));
- maria_data_root= (char *)".";
- if (maria_log_remove())
+ /*
+ Don't give an error if we can't create dir, as it may already exist from a previously aborted
+ run
+ */
+ maria_data_root= create_tmpdir(argv[0]);
+ if (maria_log_remove(0))
exit(1);
- /* be sure that we have no logs in the directory*/
- my_delete(CONTROL_FILE_BASE_NAME, MYF(0));
- my_delete(first_translog_file, MYF(0));
bzero(long_tr_id, 6);
#ifndef DBUG_OFF
@@ -78,9 +78,8 @@ int main(int argc __attribute__((unused)), char *argv[])
fprintf(stderr, "Got error: init_pagecache() (errno: %d)\n", errno);
exit(1);
}
- if (translog_init_with_table(".", LOG_FILE_SIZE, 50112, 0, &pagecache,
- LOG_FLAGS, 0, &translog_example_table_init,
- 0))
+ if (translog_init_with_table(maria_data_root, LOG_FILE_SIZE, 50112, 0, &pagecache,
+ LOG_FLAGS, 0, &translog_example_table_init, 0))
{
fprintf(stderr, "Can't init loghandler (%d)\n", errno);
exit(1);
@@ -154,7 +153,7 @@ int main(int argc __attribute__((unused)), char *argv[])
translog_destroy();
end_pagecache(&pagecache, 1);
ma_control_file_end();
- if (maria_log_remove())
+ if (maria_log_remove(maria_data_root))
exit(1);
exit(0);
}
diff --git a/storage/maria/unittest/ma_test_loghandler_max_lsn-t.c b/storage/maria/unittest/ma_test_loghandler_max_lsn-t.c
index 924daac5f3c..4ae9def8598 100644
--- a/storage/maria/unittest/ma_test_loghandler_max_lsn-t.c
+++ b/storage/maria/unittest/ma_test_loghandler_max_lsn-t.c
@@ -19,7 +19,8 @@
#include <tap.h>
#include "../trnman.h"
-extern my_bool maria_log_remove();
+extern my_bool maria_log_remove(const char *testdir);
+extern char *create_tmpdir(const char *progname);
extern void translog_example_table_init();
#ifndef DBUG_OFF
@@ -40,14 +41,14 @@ int main(int argc __attribute__((unused)), char *argv[])
PAGECACHE pagecache;
LSN lsn, max_lsn, last_lsn= LSN_IMPOSSIBLE;
LEX_CUSTRING parts[TRANSLOG_INTERNAL_PARTS + 1];
-
MY_INIT(argv[0]);
plan(2);
bzero(&pagecache, sizeof(pagecache));
- maria_data_root= (char *)".";
- if (maria_log_remove())
+
+ maria_data_root= create_tmpdir(argv[0]);
+ if (maria_log_remove(0))
exit(1);
bzero(long_tr_id, 6);
@@ -75,7 +76,7 @@ int main(int argc __attribute__((unused)), char *argv[])
fprintf(stderr, "Got error: init_pagecache() (errno: %d)\n", errno);
exit(1);
}
- if (translog_init_with_table(".", LOG_FILE_SIZE, 50112, 0, &pagecache,
+ if (translog_init_with_table(maria_data_root, LOG_FILE_SIZE, 50112, 0, &pagecache,
LOG_FLAGS, 0, &translog_example_table_init,
0))
{
@@ -150,7 +151,7 @@ int main(int argc __attribute__((unused)), char *argv[])
translog_destroy();
end_pagecache(&pagecache, 1);
ma_control_file_end();
- if (maria_log_remove())
+ if (maria_log_remove(maria_data_root))
exit(1);
exit(0);
}
diff --git a/storage/maria/unittest/ma_test_loghandler_multigroup-t.c b/storage/maria/unittest/ma_test_loghandler_multigroup-t.c
index 44c174ee1b0..56d0e55607e 100644
--- a/storage/maria/unittest/ma_test_loghandler_multigroup-t.c
+++ b/storage/maria/unittest/ma_test_loghandler_multigroup-t.c
@@ -21,7 +21,8 @@
#include "sequence_storage.h"
#include <my_getopt.h>
-extern my_bool maria_log_remove();
+extern my_bool maria_log_remove(const char *testdir);
+extern char *create_tmpdir(const char *progname);
extern void translog_example_table_init();
#ifndef DBUG_OFF
@@ -238,19 +239,23 @@ int main(int argc __attribute__((unused)), char *argv[])
TRANSLOG_HEADER_BUFFER rec;
LEX_CUSTRING parts[TRANSLOG_INTERNAL_PARTS + 2];
struct st_translog_scanner_data scanner;
+ const char *progname=argv[0];
int rc;
-
MY_INIT(argv[0]);
- bzero(&pagecache, sizeof(pagecache));
- maria_data_root= (char *)".";
+
load_defaults("my", load_default_groups, &argc, &argv);
- default_argv= argv;
get_options(&argc, &argv);
+ default_argv= argv;
- if (maria_log_remove())
+ bzero(&pagecache, sizeof(pagecache));
+ maria_data_root= create_tmpdir(progname);
+ if (maria_log_remove(0))
exit(1);
+ /* We don't need to do physical syncs in this test */
+ my_disable_sync= 1;
+
{
uchar buff[4];
for (i= 0; i < (LONG_BUFFER_SIZE + LSN_STORE_SIZE * 2 + 2); i++)
@@ -274,7 +279,7 @@ int main(int argc __attribute__((unused)), char *argv[])
fprintf(stderr, "Got error: init_pagecache() (errno: %d)\n", errno);
exit(1);
}
- if (translog_init_with_table(".", LOG_FILE_SIZE, 50112, 0, &pagecache,
+ if (translog_init_with_table(maria_data_root, LOG_FILE_SIZE, 50112, 0, &pagecache,
0, 0, &translog_example_table_init, 0))
{
fprintf(stderr, "Can't init loghandler (%d)\n", errno);
@@ -437,7 +442,7 @@ int main(int argc __attribute__((unused)), char *argv[])
fprintf(stderr, "pass2: Got error: init_pagecache() (errno: %d)\n", errno);
exit(1);
}
- if (translog_init_with_table(".", LOG_FILE_SIZE, 50112, 0, &pagecache,
+ if (translog_init_with_table(maria_data_root, LOG_FILE_SIZE, 50112, 0, &pagecache,
0, READONLY, &translog_example_table_init, 0))
{
fprintf(stderr, "pass2: Can't init loghandler (%d)\n", errno);
@@ -739,7 +744,7 @@ err:
ma_control_file_end();
free_defaults(default_argv);
seq_storage_destroy(&seq);
- if (maria_log_remove())
+ if (maria_log_remove(maria_data_root))
exit(1);
return (test(exit_status()));
diff --git a/storage/maria/unittest/ma_test_loghandler_multithread-t.c b/storage/maria/unittest/ma_test_loghandler_multithread-t.c
index 1e9120e655f..86543ca60fb 100644
--- a/storage/maria/unittest/ma_test_loghandler_multithread-t.c
+++ b/storage/maria/unittest/ma_test_loghandler_multithread-t.c
@@ -19,8 +19,8 @@
#include <tap.h>
#include "../trnman.h"
-extern my_bool maria_log_remove();
-extern void translog_example_table_init();
+extern my_bool maria_log_remove(const char *testdir);
+extern char *create_tmpdir(const char *progname);
#ifndef DBUG_OFF
static const char *default_dbug_option;
@@ -268,17 +268,18 @@ int main(int argc __attribute__((unused)),
pthread_attr_t thr_attr;
int *param, error;
int rc;
-
- /* Disabled until Sanja tests */
- plan(1);
- ok(1, "disabled");
- exit(0);
+ MY_INIT(argv[0]);
plan(WRITERS + FLUSHERS +
ITERATIONS * WRITERS * 3 + FLUSH_ITERATIONS * FLUSHERS );
+ /* We don't need to do physical syncs in this test */
+ my_disable_sync= 1;
bzero(&pagecache, sizeof(pagecache));
- maria_data_root= (char *)".";
+ maria_data_root= create_tmpdir(argv[0]);
+ if (maria_log_remove(0))
+ exit(1);
+
long_buffer= malloc(LONG_BUFFER_SIZE + 7 * 2 + 2);
if (long_buffer == 0)
{
@@ -288,11 +289,6 @@ int main(int argc __attribute__((unused)),
for (i= 0; i < (LONG_BUFFER_SIZE + 7 * 2 + 2); i++)
long_buffer[i]= (i & 0xFF);
- MY_INIT(argv[0]);
- if (maria_log_remove())
- exit(1);
-
-
#ifndef DBUG_OFF
#if defined(__WIN__)
default_dbug_option= "d:t:i:O,\\ma_test_loghandler.trace";
@@ -350,7 +346,7 @@ int main(int argc __attribute__((unused)),
fprintf(stderr, "Got error: init_pagecache() (errno: %d)\n", errno);
exit(1);
}
- if (translog_init_with_table(".", LOG_FILE_SIZE, 50112, 0, &pagecache,
+ if (translog_init_with_table(maria_data_root, LOG_FILE_SIZE, 50112, 0, &pagecache,
LOG_FLAGS, 0, &translog_example_table_init,
0))
{
@@ -549,7 +545,7 @@ err:
translog_destroy();
end_pagecache(&pagecache, 1);
ma_control_file_end();
- if (maria_log_remove())
+ if (maria_log_remove(maria_data_root))
exit(1);
return(exit_status());
diff --git a/storage/maria/unittest/ma_test_loghandler_noflush-t.c b/storage/maria/unittest/ma_test_loghandler_noflush-t.c
index 2994ead8c3a..c8c0f7d1873 100644
--- a/storage/maria/unittest/ma_test_loghandler_noflush-t.c
+++ b/storage/maria/unittest/ma_test_loghandler_noflush-t.c
@@ -19,7 +19,8 @@
#include <tap.h>
#include "../trnman.h"
-extern my_bool maria_log_remove();
+extern my_bool maria_log_remove(const char *testdir);
+extern char *create_tmpdir(const char *progname);
extern void translog_example_table_init();
#ifndef DBUG_OFF
@@ -31,8 +32,6 @@ static const char *default_dbug_option;
#define LOG_FILE_SIZE (1024L*1024L*1024L + 1024L*1024L*512)
#define LOG_FLAGS 0
-static char *first_translog_file= (char*)"maria_log.00000001";
-
int main(int argc __attribute__((unused)), char *argv[])
{
uint pagen;
@@ -49,12 +48,9 @@ int main(int argc __attribute__((unused)), char *argv[])
plan(1);
bzero(&pagecache, sizeof(pagecache));
- maria_data_root= (char *)".";
- if (maria_log_remove())
+ maria_data_root= create_tmpdir(argv[0]);
+ if (maria_log_remove(0))
exit(1);
- /* be sure that we have no logs in the directory*/
- my_delete(CONTROL_FILE_BASE_NAME, MYF(0));
- my_delete(first_translog_file, MYF(0));
bzero(long_tr_id, 6);
#ifndef DBUG_OFF
@@ -81,7 +77,7 @@ int main(int argc __attribute__((unused)), char *argv[])
fprintf(stderr, "Got error: init_pagecache() (errno: %d)\n", errno);
exit(1);
}
- if (translog_init_with_table(".", LOG_FILE_SIZE, 50112, 0, &pagecache,
+ if (translog_init_with_table(maria_data_root, LOG_FILE_SIZE, 50112, 0, &pagecache,
LOG_FLAGS, 0, &translog_example_table_init,
0))
{
@@ -139,7 +135,7 @@ err:
translog_destroy();
end_pagecache(&pagecache, 1);
ma_control_file_end();
- if (maria_log_remove())
+ if (maria_log_remove(maria_data_root))
exit(1);
exit(rc);
diff --git a/storage/maria/unittest/ma_test_loghandler_nologs-t.c b/storage/maria/unittest/ma_test_loghandler_nologs-t.c
index 32ada1e58bd..24c93e428e1 100644
--- a/storage/maria/unittest/ma_test_loghandler_nologs-t.c
+++ b/storage/maria/unittest/ma_test_loghandler_nologs-t.c
@@ -19,8 +19,8 @@
#include <tap.h>
#include "../trnman.h"
-extern my_bool maria_log_remove();
-extern void example_loghandler_init();
+extern my_bool maria_log_remove(const char *testdir);
+extern char *create_tmpdir(const char *progname);
#ifndef DBUG_OFF
static const char *default_dbug_option;
@@ -49,8 +49,8 @@ int main(int argc __attribute__((unused)), char *argv[])
bzero(&pagecache, sizeof(pagecache));
bzero(long_buffer, LONG_BUFFER_SIZE);
- maria_data_root= (char *)".";
- if (maria_log_remove())
+ maria_data_root= create_tmpdir(argv[0]);
+ if (maria_log_remove(0))
exit(1);
bzero(long_tr_id, 6);
@@ -78,7 +78,7 @@ int main(int argc __attribute__((unused)), char *argv[])
fprintf(stderr, "Got error: init_pagecache() (errno: %d)\n", errno);
exit(1);
}
- if (translog_init_with_table(".", LOG_FILE_SIZE, 50112, 0, &pagecache,
+ if (translog_init_with_table(maria_data_root, LOG_FILE_SIZE, 50112, 0, &pagecache,
LOG_FLAGS, 0, &translog_example_table_init,
0))
{
@@ -151,7 +151,7 @@ int main(int argc __attribute__((unused)), char *argv[])
fprintf(stderr, "Got error: init_pagecache() (errno: %d)\n", errno);
exit(1);
}
- if (translog_init_with_table(".", LOG_FILE_SIZE, 50112, 0, &pagecache,
+ if (translog_init_with_table(maria_data_root, LOG_FILE_SIZE, 50112, 0, &pagecache,
LOG_FLAGS, 0, &translog_example_table_init,
1))
{
@@ -189,7 +189,7 @@ int main(int argc __attribute__((unused)), char *argv[])
ok(1, "New log is OK");
- if (maria_log_remove())
+ if (maria_log_remove(maria_data_root))
exit(1);
exit(0);
}
diff --git a/storage/maria/unittest/ma_test_loghandler_pagecache-t.c b/storage/maria/unittest/ma_test_loghandler_pagecache-t.c
index 5b115b426b7..0cc94befb39 100644
--- a/storage/maria/unittest/ma_test_loghandler_pagecache-t.c
+++ b/storage/maria/unittest/ma_test_loghandler_pagecache-t.c
@@ -19,8 +19,8 @@
#include <tap.h>
#include "../trnman.h"
-extern my_bool maria_log_remove();
-extern void translog_example_table_init();
+extern my_bool maria_log_remove(const char *testdir);
+extern char *create_tmpdir(const char *progname);
#ifndef DBUG_OFF
static const char *default_dbug_option;
@@ -31,8 +31,10 @@ static const char *default_dbug_option;
#define LOG_FILE_SIZE (1024L*1024L*1024L + 1024L*1024L*512)
#define LOG_FLAGS 0
-static char *first_translog_file= (char*)"aria_log.00000001";
-static char *file1_name= (char*)"page_cache_test_file_1";
+static const char *base_first_translog_file= "aria_log.00000001";
+static const char *base_file1_name= "page_cache_test_file_1";
+static char file1_name[FN_REFLEN], first_translog_file[FN_REFLEN];
+
static PAGECACHE_FILE file1;
@@ -68,18 +70,15 @@ int main(int argc __attribute__((unused)), char *argv[])
LSN lsn;
my_off_t file_size;
LEX_CUSTRING parts[TRANSLOG_INTERNAL_PARTS + 1];
-
MY_INIT(argv[0]);
plan(1);
bzero(&pagecache, sizeof(pagecache));
- maria_data_root= (char *)".";
- if (maria_log_remove())
+ maria_data_root= create_tmpdir(argv[0]);
+ if (maria_log_remove(0))
exit(1);
- /* be sure that we have no logs in the directory*/
- my_delete(CONTROL_FILE_BASE_NAME, MYF(0));
- my_delete(first_translog_file, MYF(0));
+ fn_format(first_translog_file, base_first_translog_file, maria_data_root, "", MYF(0));
bzero(long_tr_id, 6);
#ifndef DBUG_OFF
@@ -106,7 +105,7 @@ int main(int argc __attribute__((unused)), char *argv[])
fprintf(stderr, "Got error: init_pagecache() (errno: %d)\n", errno);
exit(1);
}
- if (translog_init_with_table(".", LOG_FILE_SIZE, 50112, 0, &pagecache,
+ if (translog_init_with_table(maria_data_root, LOG_FILE_SIZE, 50112, 0, &pagecache,
LOG_FLAGS, 0, &translog_example_table_init,
0))
{
@@ -145,6 +144,7 @@ int main(int argc __attribute__((unused)), char *argv[])
exit(1);
}
+ fn_format(file1_name, base_file1_name, maria_data_root, "", MYF(0));
if ((file1.file= my_open(file1_name,
O_CREAT | O_TRUNC | O_RDWR, MYF(0))) == -1)
{
@@ -168,7 +168,7 @@ int main(int argc __attribute__((unused)), char *argv[])
PAGECACHE_PIN_LEFT_UNPINNED,
PAGECACHE_WRITE_DELAY,
0, LSN_IMPOSSIBLE);
- flush_pagecache_blocks(&pagecache, &file1, FLUSH_FORCE_WRITE);
+ flush_pagecache_blocks(&pagecache, &file1, FLUSH_RELEASE);
}
my_close(file1.file, MYF(MY_WME));
if ((file1.file= my_open(first_translog_file, O_RDONLY, MYF(MY_WME))) < 0)
@@ -192,10 +192,10 @@ int main(int argc __attribute__((unused)), char *argv[])
translog_destroy();
end_pagecache(&pagecache, 1);
ma_control_file_end();
- my_delete(CONTROL_FILE_BASE_NAME, MYF(0));
- my_delete(first_translog_file, MYF(0));
- my_delete(file1_name, MYF(0));
+ my_delete(file1_name, MYF(MY_WME));
+ if (maria_log_remove(maria_data_root))
+ exit(1);
exit(0);
}
diff --git a/storage/maria/unittest/ma_test_loghandler_purge-t.c b/storage/maria/unittest/ma_test_loghandler_purge-t.c
index e7b604eb172..6ae0e7830ae 100644
--- a/storage/maria/unittest/ma_test_loghandler_purge-t.c
+++ b/storage/maria/unittest/ma_test_loghandler_purge-t.c
@@ -19,8 +19,8 @@
#include <tap.h>
#include "../trnman.h"
-extern my_bool maria_log_remove();
-extern void translog_example_table_init();
+extern my_bool maria_log_remove(const char *testdir);
+extern char *create_tmpdir(const char *progname);
#ifndef DBUG_OFF
static const char *default_dbug_option;
@@ -49,8 +49,8 @@ int main(int argc __attribute__((unused)), char *argv[])
bzero(&pagecache, sizeof(pagecache));
bzero(long_buffer, LONG_BUFFER_SIZE);
- maria_data_root= (char *)".";
- if (maria_log_remove())
+ maria_data_root= create_tmpdir(argv[0]);
+ if (maria_log_remove(0))
exit(1);
bzero(long_tr_id, 6);
@@ -78,7 +78,7 @@ int main(int argc __attribute__((unused)), char *argv[])
fprintf(stderr, "Got error: init_pagecache() (errno: %d)\n", errno);
exit(1);
}
- if (translog_init_with_table(".", LOG_FILE_SIZE, 50112, 0, &pagecache,
+ if (translog_init_with_table(maria_data_root, LOG_FILE_SIZE, 50112, 0, &pagecache,
LOG_FLAGS, 0, &translog_example_table_init,
0))
{
@@ -186,7 +186,7 @@ int main(int argc __attribute__((unused)), char *argv[])
translog_destroy();
end_pagecache(&pagecache, 1);
ma_control_file_end();
- if (maria_log_remove())
+ if (maria_log_remove(maria_data_root))
exit(1);
exit(0);
}
diff --git a/storage/maria/unittest/ma_test_recovery.pl b/storage/maria/unittest/ma_test_recovery.pl
index d9be82f4e58..f3a5bffbc36 100755
--- a/storage/maria/unittest/ma_test_recovery.pl
+++ b/storage/maria/unittest/ma_test_recovery.pl
@@ -114,7 +114,7 @@ sub main
die("can't guess table name");
}
$com= "$maria_exe_path/aria_chk$suffix -dvv $table ";
- $com.= "| grep -v \"Creation time:\" | grep -v \"file length\" | grep -v \"LSNs:\" | grep -v \"UUID:\"";
+ $com.= "| grep -v \"Creation time:\" | grep -v \"recover time:\" | grep -v \"file length\" | grep -v \"LSNs:\" | grep -v \"UUID:\"";
$com.= "> $tmp/aria_chk_message.good.txt 2>&1";
my_exec($com);
my $checksum= my_exec("$maria_exe_path/aria_chk$suffix -dss $table");
@@ -197,7 +197,7 @@ sub main
die("can't guess table name");
}
$com= "$maria_exe_path/aria_chk$suffix -dvv $table ";
- $com.= "| grep -v \"Creation time:\" | grep -v \"file length\" | grep -v \"LSNs:\" | grep -v \"UUID:\" ";
+ $com.= "| grep -v \"Creation time:\" | grep -v \"recover time:\" | grep -v \"recover time:\" |grep -v \"file length\" | grep -v \"LSNs:\" | grep -v \"UUID:\" ";
$com.= "> $tmp/aria_chk_message.good.txt 2>&1";
$res= my_exec($com);
print MY_LOG $res;
@@ -296,7 +296,7 @@ sub check_table_is_same
print "checking if table $table has changed\n";
}
- $com= "$maria_exe_path/aria_chk$suffix -dvv $table | grep -v \"Creation time:\" ";
+ $com= "$maria_exe_path/aria_chk$suffix -dvv $table | grep -v \"Creation time:\" | grep -v \"recover time:\"";
$com.= "| grep -v \"file length\" | grep -v \"LSNs:\" | grep -v \"UUID:\" > $tmp/aria_chk_message.txt 2>&1";
$res= `$com`;
print MY_LOG $res;
@@ -415,7 +415,7 @@ sub physical_cmp
# save original tables to restore them later
copy("$table.MAD", "$tmp/before_zerofill$table_no.MAD") || die();
copy("$table.MAI", "$tmp/before_zerofill$table_no.MAI") || die();
- $com= "$maria_exe_path/aria_chk$suffix -ss --zerofill-keep-lsn $table";
+ $com= "$maria_exe_path/aria_chk$suffix -ss --zerofill-keep-lsn --skip-update-state $table";
$res= `$com`;
print MY_LOG $res;
$table_no= $table_no + 1;
diff --git a/storage/maria/unittest/trnman-t.c b/storage/maria/unittest/trnman-t.c
index 5d27fe39d14..c2bc993e2ff 100644
--- a/storage/maria/unittest/trnman-t.c
+++ b/storage/maria/unittest/trnman-t.c
@@ -75,7 +75,7 @@ pthread_handler_t test_trnman(void *arg)
void run_test(const char *test, pthread_handler handler, int n, int m)
{
pthread_t *threads;
- ulonglong now= my_getsystime();
+ ulonglong now= microsecond_interval_timer();
int i;
litmus= 0;
@@ -97,8 +97,8 @@ void run_test(const char *test, pthread_handler handler, int n, int m)
}
for (i= 0 ; i < n ; i++)
pthread_join(threads[i], 0);
- now= my_getsystime()-now;
- ok(litmus == 0, "Tested %s in %g secs (%d)", test, ((double)now)/1e7, litmus);
+ now= microsecond_interval_timer() - now;
+ ok(litmus == 0, "Tested %s in %g secs (%d)", test, ((double)now)/1e6, litmus);
my_free(threads);
}
@@ -162,10 +162,10 @@ int main(int argc __attribute__((unused)), char **argv)
diag("mallocs: %d", trnman_allocated_transactions);
{
- ulonglong now= my_getsystime();
+ ulonglong now= microsecond_interval_timer();
trnman_destroy();
- now= my_getsystime()-now;
- diag("trnman_destroy: %g", ((double)now)/1e7);
+ now= microsecond_interval_timer() - now;
+ diag("trnman_destroy: %g", ((double)now)/1e6);
}
pthread_mutex_destroy(&rt_mutex);
diff --git a/storage/myisam/ft_nlq_search.c b/storage/myisam/ft_nlq_search.c
index deb645b3b15..366b7bc3061 100644
--- a/storage/myisam/ft_nlq_search.c
+++ b/storage/myisam/ft_nlq_search.c
@@ -79,7 +79,6 @@ static int walk_and_match(FT_WORD *word, uint32 count, ALL_IN_ONE *aio)
#else
#error
#endif
-
DBUG_ENTER("walk_and_match");
word->weight=LWS_FOR_QUERY;
diff --git a/storage/myisam/ha_myisam.cc b/storage/myisam/ha_myisam.cc
index 4ba7bfbb21d..f20182f8823 100644
--- a/storage/myisam/ha_myisam.cc
+++ b/storage/myisam/ha_myisam.cc
@@ -24,9 +24,7 @@
#include "sql_plugin.h"
#include <m_ctype.h>
#include <my_bit.h>
-#include <myisampack.h>
#include "ha_myisam.h"
-#include <stdarg.h>
#include "myisamdef.h"
#include "rt_index.h"
#include "sql_table.h" // tablename_to_filename
@@ -37,7 +35,7 @@ static ulong opt_myisam_block_size;
/* bits in myisam_recover_options */
const char *myisam_recover_names[] =
-{ "DEFAULT", "BACKUP", "FORCE", "QUICK", "OFF", NullS};
+{ "DEFAULT", "BACKUP", "FORCE", "QUICK", "BACKUP_ALL", "OFF", NullS};
TYPELIB myisam_recover_typelib= {array_elements(myisam_recover_names)-1,"",
myisam_recover_names, NULL};
@@ -48,7 +46,7 @@ TYPELIB myisam_stats_method_typelib= {
myisam_stats_method_names, NULL};
static MYSQL_SYSVAR_ULONG(block_size, opt_myisam_block_size,
- PLUGIN_VAR_NOSYSVAR | PLUGIN_VAR_RQCMDARG,
+ PLUGIN_VAR_READONLY | PLUGIN_VAR_RQCMDARG,
"Block size to be used for MyISAM index pages", NULL, NULL,
MI_KEY_BLOCK_LENGTH, MI_MIN_KEY_BLOCK_LENGTH, MI_MAX_KEY_BLOCK_LENGTH,
MI_MIN_KEY_BLOCK_LENGTH);
@@ -66,7 +64,7 @@ static MYSQL_SYSVAR_ULONGLONG(max_sort_file_size, myisam_max_temp_length,
static MYSQL_SYSVAR_SET(recover_options, myisam_recover_options,
PLUGIN_VAR_OPCMDARG|PLUGIN_VAR_READONLY,
"Syntax: myisam-recover-options[=option[,option...]], where option can be "
- "DEFAULT, BACKUP, FORCE, QUICK, or OFF",
+ "DEFAULT, BACKUP, BACKUP_ALL, FORCE, QUICK, or OFF",
NULL, NULL, 1, &myisam_recover_typelib);
static MYSQL_THDVAR_ULONG(repair_threads, PLUGIN_VAR_RQCMDARG,
@@ -178,6 +176,8 @@ static void mi_check_print_msg(HA_CHECK *param, const char* msg_type,
if (protocol->write())
sql_print_error("Failed on my_net_write, writing to stderr instead: %s\n",
msgbuf);
+ else if (thd->variables.log_warnings > 2)
+ sql_print_error("%s", msgbuf);
if (param->need_print_msg_lock)
mysql_mutex_unlock(&param->print_msg_mutex);
@@ -348,6 +348,8 @@ int table2myisam(TABLE *table_arg, MI_KEYDEF **keydef_out,
if (found->flags & BLOB_FLAG)
recinfo_pos->type= FIELD_BLOB;
+ else if (found->type() == MYSQL_TYPE_TIMESTAMP)
+ recinfo_pos->type= FIELD_NORMAL;
else if (found->type() == MYSQL_TYPE_VARCHAR)
recinfo_pos->type= FIELD_VARCHAR;
else if (!(options & HA_OPTION_PACK_RECORD))
@@ -628,6 +630,13 @@ void _mi_report_crashed(MI_INFO *file, const char *message,
mysql_mutex_unlock(&file->s->intern_lock);
}
+/* Return 1 if user have requested query to be killed */
+
+my_bool mi_killed_in_mariadb(MI_INFO *info)
+{
+ return (((TABLE*) (info->external_ref))->in_use->killed != 0);
+}
+
}
@@ -635,6 +644,7 @@ ha_myisam::ha_myisam(handlerton *hton, TABLE_SHARE *table_arg)
:handler(hton, table_arg), file(0),
int_table_flags(HA_NULL_IN_KEY | HA_CAN_FULLTEXT | HA_CAN_SQL_HANDLER |
HA_BINLOG_ROW_CAPABLE | HA_BINLOG_STMT_CAPABLE |
+ HA_CAN_VIRTUAL_COLUMNS |
HA_DUPLICATE_POS | HA_CAN_INDEX_BLOBS | HA_AUTO_PART_KEY |
HA_FILE_BASED | HA_CAN_GEOMETRY | HA_NO_TRANSACTIONS |
HA_CAN_INSERT_DELAYED | HA_CAN_BIT_FIELD | HA_CAN_RTREEKEYS |
@@ -705,6 +715,10 @@ int ha_myisam::open(const char *name, int mode, uint test_if_locked)
if (!(file=mi_open(name, mode, test_if_locked | HA_OPEN_FROM_SQL_LAYER)))
return (my_errno ? my_errno : -1);
+ file->s->chst_invalidator= query_cache_invalidate_by_MyISAM_filename_ref;
+ /* Set external_ref, mainly for temporary tables */
+ file->external_ref= (void*) table; // For mi_killed()
+
if (!table->s->tmp_table) /* No need to perform a check for tmp table */
{
if ((my_errno= table2myisam(table, &keyinfo, &recinfo, &recs)))
@@ -750,6 +764,16 @@ int ha_myisam::open(const char *name, int mode, uint test_if_locked)
int_table_flags|= HA_HAS_OLD_CHECKSUM;
}
+ /*
+ For static size rows, tell MariaDB that we will access all bytes
+ in the record when writing it. This signals MariaDB to initalize
+ the full row to ensure we don't get any errors from valgrind and
+ that all bytes in the row is properly reset.
+ */
+ if ((file->s->options & HA_OPTION_PACK_RECORD) &&
+ (file->s->has_varchar_fields | file->s->has_null_fields))
+ int_table_flags|= HA_RECORD_MUST_BE_CLEAN_ON_WRITE;
+
for (i= 0; i < table->s->keys; i++)
{
plugin_ref parser= table->key_info[i].parser;
@@ -816,7 +840,7 @@ int ha_myisam::check(THD* thd, HA_CHECK_OPT* check_opt)
param.thd = thd;
param.op_name = "check";
param.db_name= table->s->db.str;
- param.table_name= table->alias;
+ param.table_name= table->alias.c_ptr();
param.testflag = check_opt->flags | T_CHECK | T_SILENT;
param.stats_method= (enum_handler_stats_method)THDVAR(thd, stats_method);
@@ -909,7 +933,7 @@ int ha_myisam::analyze(THD *thd, HA_CHECK_OPT* check_opt)
param.thd = thd;
param.op_name= "analyze";
param.db_name= table->s->db.str;
- param.table_name= table->alias;
+ param.table_name= table->alias.c_ptr();
param.testflag= (T_FAST | T_CHECK | T_SILENT | T_STATISTICS |
T_DONT_CHECK_CHECKSUM);
param.using_global_keycache = 1;
@@ -946,6 +970,7 @@ int ha_myisam::repair(THD* thd, HA_CHECK_OPT *check_opt)
T_SILENT | T_FORCE_CREATE | T_CALC_CHECKSUM |
(check_opt->flags & T_EXTEND ? T_REP : T_REP_BY_SORT));
param.sort_buffer_length= THDVAR(thd, sort_buffer_size);
+ param.backup_time= check_opt->start_time;
start_records=file->state->records;
while ((error=repair(thd,param,0)) && param.retry_repair)
{
@@ -956,7 +981,7 @@ int ha_myisam::repair(THD* thd, HA_CHECK_OPT *check_opt)
param.testflag&= ~(T_RETRY_WITHOUT_QUICK | T_QUICK);
/* Ensure we don't loose any rows when retrying without quick */
param.testflag|= T_SAFE_REPAIR;
- sql_print_information("Retrying repair of: '%s' without quick",
+ sql_print_information("Retrying repair of: '%s' including modifying data file",
table->s->path.str);
continue;
}
@@ -1018,7 +1043,7 @@ int ha_myisam::repair(THD *thd, HA_CHECK &param, bool do_optimize)
DBUG_ENTER("ha_myisam::repair");
param.db_name= table->s->db.str;
- param.table_name= table->alias;
+ param.table_name= table->alias.c_ptr();
param.tmpfile_createflag = O_RDWR | O_TRUNC;
param.using_global_keycache = 1;
param.thd= thd;
@@ -1360,7 +1385,7 @@ int ha_myisam::enable_indexes(uint mode)
}
else if (mode == HA_KEY_SWITCH_NONUNIQ_SAVE)
{
- THD *thd=current_thd;
+ THD *thd= table->in_use;
HA_CHECK &param= *(HA_CHECK*) thd->alloc(sizeof(param));
const char *save_proc_info=thd->proc_info;
@@ -1474,7 +1499,15 @@ void ha_myisam::start_bulk_insert(ha_rows rows)
*/
if (file->state->records == 0 && can_enable_indexes &&
(!rows || rows >= MI_MIN_ROWS_TO_DISABLE_INDEXES))
- mi_disable_non_unique_index(file,rows);
+ {
+ if (file->open_flag & HA_OPEN_INTERNAL_TABLE)
+ {
+ file->update|= HA_STATE_CHANGED;
+ mi_clear_all_keys_active(file->s->state.key_map);
+ }
+ else
+ mi_disable_non_unique_index(file,rows);
+ }
else
if (!file->bulk_insert &&
(!rows || rows >= MI_MIN_ROWS_TO_USE_BULK_INSERT))
@@ -1548,8 +1581,18 @@ bool ha_myisam::check_and_repair(THD *thd)
if ((marked_crashed= mi_is_crashed(file)) || check(thd, &check_opt))
{
sql_print_warning("Recovering table: '%s'",table->s->path.str);
+ if (myisam_recover_options & HA_RECOVER_FULL_BACKUP)
+ {
+ char buff[MY_BACKUP_NAME_EXTRA_LENGTH+1];
+ my_create_backup_name(buff, "", check_opt.start_time);
+ sql_print_information("Making backup of index file with extension '%s'",
+ buff);
+ mi_make_backup_of_index(file, check_opt.start_time,
+ MYF(MY_WME | ME_JUST_WARNING));
+ }
check_opt.flags=
- ((myisam_recover_options & HA_RECOVER_BACKUP ? T_BACKUP_DATA : 0) |
+ (((myisam_recover_options &
+ (HA_RECOVER_BACKUP | HA_RECOVER_FULL_BACKUP)) ? T_BACKUP_DATA : 0) |
(marked_crashed ? 0 : T_QUICK) |
(myisam_recover_options & HA_RECOVER_FORCE ? 0 : T_SAFE_REPAIR) |
T_AUTO_REPAIR);
@@ -1633,8 +1676,14 @@ int ha_myisam::index_read_idx_map(uchar *buf, uint index, const uchar *key,
key_part_map keypart_map,
enum ha_rkey_function find_flag)
{
- int error=mi_rkey(file, buf, index, key, keypart_map, find_flag);
- return error;
+ int res;
+ /* Use the pushed index condition if it matches the index we're scanning */
+ end_range= NULL;
+ if (index == pushed_idx_cond_keyno)
+ mi_set_index_cond_func(file, index_cond_func_myisam, this);
+ res= mi_rkey(file, buf, index, key, keypart_map, find_flag);
+ mi_set_index_cond_func(file, NULL, 0);
+ return res;
}
int ha_myisam::index_next(uchar *buf)
@@ -1720,6 +1769,9 @@ int ha_myisam::info(uint flag)
MI_ISAMINFO misam_info;
char name_buff[FN_REFLEN];
+ if (!table)
+ return 1;
+
(void) mi_status(file,&misam_info,flag);
if (flag & HA_STATUS_VARIABLE)
{
@@ -1807,6 +1859,7 @@ int ha_myisam::reset(void)
{
pushed_idx_cond= NULL;
pushed_idx_cond_keyno= MAX_KEY;
+ in_range_check_pushed_down= FALSE;
mi_set_index_cond_func(file, NULL, 0);
ds_mrr.dsmrr_close();
return mi_reset(file);
@@ -1842,6 +1895,7 @@ int ha_myisam::delete_table(const char *name)
int ha_myisam::external_lock(THD *thd, int lock_type)
{
file->in_use.data= thd;
+ file->external_ref= (void*) table; // For mi_killed()
return mi_lock_database(file, !table->s->tmp_table ?
lock_type : ((lock_type == F_UNLCK) ?
F_UNLCK : F_EXTRA_LCK));
@@ -2102,6 +2156,7 @@ static int myisam_init(void *p)
myisam_hton->create= myisam_create_handler;
myisam_hton->panic= myisam_panic;
myisam_hton->flags= HTON_CAN_RECREATE | HTON_SUPPORT_LOG_TABLES;
+ mi_killed= mi_killed_in_mariadb;
return 0;
}
@@ -2129,7 +2184,7 @@ int ha_myisam::multi_range_read_init(RANGE_SEQ_IF *seq, void *seq_init_param,
return ds_mrr.dsmrr_init(this, seq, seq_init_param, n_ranges, mode, buf);
}
-int ha_myisam::multi_range_read_next(char **range_info)
+int ha_myisam::multi_range_read_next(range_id_t *range_info)
{
return ds_mrr.dsmrr_next(range_info);
}
@@ -2150,11 +2205,18 @@ ha_rows ha_myisam::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
}
ha_rows ha_myisam::multi_range_read_info(uint keyno, uint n_ranges, uint keys,
- uint *bufsz, uint *flags,
- COST_VECT *cost)
+ uint key_parts, uint *bufsz,
+ uint *flags, COST_VECT *cost)
{
ds_mrr.init(this, table);
- return ds_mrr.dsmrr_info(keyno, n_ranges, keys, bufsz, flags, cost);
+ return ds_mrr.dsmrr_info(keyno, n_ranges, keys, key_parts, bufsz, flags, cost);
+}
+
+
+int ha_myisam::multi_range_read_explain_info(uint mrr_mode, char *str,
+ size_t size)
+{
+ return ds_mrr.dsmrr_explain_info(mrr_mode, str, size);
}
/* MyISAM MRR implementation ends */
diff --git a/storage/myisam/ha_myisam.h b/storage/myisam/ha_myisam.h
index b1916881dab..35d52a22c71 100644
--- a/storage/myisam/ha_myisam.h
+++ b/storage/myisam/ha_myisam.h
@@ -21,7 +21,6 @@
/* class for the the myisam handler */
#include <myisam.h>
-#include <myisamchk.h>
#include <ft_global.h>
#include "handler.h" /* handler */
#include "table.h" /* TABLE_SHARE */
@@ -33,7 +32,8 @@ typedef struct st_ha_create_information HA_CREATE_INFO;
#define HA_RECOVER_BACKUP 2 /* Make a backupfile on recover */
#define HA_RECOVER_FORCE 4 /* Recover even if we loose rows */
#define HA_RECOVER_QUICK 8 /* Don't check rows in data file */
-#define HA_RECOVER_OFF 16 /* No automatic recover */
+#define HA_RECOVER_FULL_BACKUP 16 /* Make a copy of index file too */
+#define HA_RECOVER_OFF 32 /* No automatic recover */
extern ulong myisam_sort_buffer_size;
extern TYPELIB myisam_recover_typelib;
@@ -145,7 +145,6 @@ class ha_myisam: public handler
int assign_to_keycache(THD* thd, HA_CHECK_OPT* check_opt);
int preload_keys(THD* thd, HA_CHECK_OPT* check_opt);
bool check_if_incompatible_data(HA_CREATE_INFO *info, uint table_changes);
- bool check_if_supported_virtual_columns(void) { return TRUE;}
#ifdef HAVE_QUERY_CACHE
my_bool register_query_cache_table(THD *thd, char *table_key,
uint key_length,
@@ -163,14 +162,16 @@ public:
*/
int multi_range_read_init(RANGE_SEQ_IF *seq, void *seq_init_param,
uint n_ranges, uint mode, HANDLER_BUFFER *buf);
- int multi_range_read_next(char **range_info);
+ int multi_range_read_next(range_id_t *range_info);
ha_rows multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
void *seq_init_param,
uint n_ranges, uint *bufsz,
uint *flags, COST_VECT *cost);
ha_rows multi_range_read_info(uint keyno, uint n_ranges, uint keys,
- uint *bufsz, uint *flags, COST_VECT *cost);
-
+ uint key_parts, uint *bufsz,
+ uint *flags, COST_VECT *cost);
+ int multi_range_read_explain_info(uint mrr_mode, char *str, size_t size);
+
/* Index condition pushdown implementation */
Item *idx_cond_push(uint keyno, Item* idx_cond);
private:
diff --git a/storage/myisam/mi_check.c b/storage/myisam/mi_check.c
index e86c5a7c0fb..65d218b216d 100644
--- a/storage/myisam/mi_check.c
+++ b/storage/myisam/mi_check.c
@@ -80,6 +80,8 @@ static SORT_KEY_BLOCKS *alloc_key_blocks(HA_CHECK *param, uint blocks,
uint buffer_length);
static ha_checksum mi_byte_checksum(const uchar *buf, uint length);
static void set_data_file_type(MI_SORT_INFO *sort_info, MYISAM_SHARE *share);
+static int replace_data_file(HA_CHECK *param, MI_INFO *info,
+ const char *name, File new_file);
void myisamchk_init(HA_CHECK *param)
{
@@ -989,9 +991,6 @@ int chk_data_link(HA_CHECK *param, MI_INFO *info, my_bool extend)
if (killed_ptr(param))
goto err2;
switch (info->s->data_file_type) {
- case BLOCK_RECORD:
- DBUG_ASSERT(0); /* Impossible */
- break;
case STATIC_RECORD:
if (my_b_read(&param->read_cache,(uchar*) record,
info->s->base.pack_reclength))
@@ -1209,6 +1208,9 @@ int chk_data_link(HA_CHECK *param, MI_INFO *info, my_bool extend)
link_used+= (block_info.filepos - start_recpos);
used+= (pos-start_recpos);
break;
+ default:
+ DBUG_ASSERT(0); /* Impossible */
+ break;
} /* switch */
if (! got_error)
{
@@ -1727,29 +1729,8 @@ err:
/* Replace the actual file with the temporary file */
if (new_file >= 0)
{
- mysql_file_close(new_file, MYF(0));
- info->dfile=new_file= -1;
- /*
- On Windows, the old data file cannot be deleted if it is either
- open, or memory mapped. Closing the file won't remove the memory
- map implicilty on Windows. We closed the data file, but we keep
- the MyISAM table open. A memory map will be closed on the final
- mi_close() only. So we need to unmap explicitly here. After
- renaming the new file under the hook, we couldn't use the map of
- the old file any more anyway.
- */
- if (info->s->file_map)
- {
- (void) my_munmap((char*) info->s->file_map,
- (size_t) info->s->mmaped_length);
- info->s->file_map= NULL;
- }
- if (change_to_newfile(share->data_file_name, MI_NAME_DEXT, DATA_TMP_EXT,
- (param->testflag & T_BACKUP_DATA ?
- MYF(MY_REDEL_MAKE_BACKUP): MYF(0))) ||
- mi_open_datafile(info,share,name,-1))
- got_error=1;
-
+ got_error= replace_data_file(param, info, name, new_file);
+ new_file= -1;
param->retry_repair= 0;
}
}
@@ -2007,8 +1988,8 @@ int mi_sort_index(HA_CHECK *param, register MI_INFO *info, char * name)
(void) mysql_file_close(share->kfile, MYF(MY_WME));
share->kfile = -1;
(void) mysql_file_close(new_file, MYF(MY_WME));
- if (change_to_newfile(share->index_file_name, MI_NAME_IEXT, INDEX_TMP_EXT,
- MYF(0)) ||
+ if (change_to_newfile(share->index_file_name,MI_NAME_IEXT,INDEX_TMP_EXT,
+ 0, MYF(0)) ||
mi_open_keyfile(share))
goto err2;
info->lock_type= F_UNLCK; /* Force mi_readinfo to lock */
@@ -2137,14 +2118,16 @@ err:
*/
int change_to_newfile(const char * filename, const char * old_ext,
- const char * new_ext, myf MyFlags)
+ const char * new_ext,
+ time_t backup_time,
+ myf MyFlags)
{
char old_filename[FN_REFLEN],new_filename[FN_REFLEN];
/* Get real path to filename */
(void) fn_format(old_filename,filename,"",old_ext,2+4+32);
return my_redel(old_filename,
fn_format(new_filename,old_filename,"",new_ext,2+4),
- MYF(MY_WME | MY_LINK_WARNING | MyFlags));
+ backup_time, MYF(MY_WME | MY_LINK_WARNING | MyFlags));
} /* change_to_newfile */
@@ -2548,13 +2531,8 @@ err:
/* Replace the actual file with the temporary file */
if (new_file >= 0)
{
- mysql_file_close(new_file, MYF(0));
- info->dfile=new_file= -1;
- if (change_to_newfile(share->data_file_name,MI_NAME_DEXT, DATA_TMP_EXT,
- (param->testflag & T_BACKUP_DATA ?
- MYF(MY_REDEL_MAKE_BACKUP): MYF(0))) ||
- mi_open_datafile(info,share,name,-1))
- got_error=1;
+ got_error= replace_data_file(param, info, name, new_file);
+ new_file= -1;
}
}
if (got_error)
@@ -3080,13 +3058,8 @@ err:
/* Replace the actual file with the temporary file */
if (new_file >= 0)
{
- mysql_file_close(new_file, MYF(0));
- info->dfile=new_file= -1;
- if (change_to_newfile(share->data_file_name, MI_NAME_DEXT, DATA_TMP_EXT,
- (param->testflag & T_BACKUP_DATA ?
- MYF(MY_REDEL_MAKE_BACKUP): MYF(0))) ||
- mi_open_datafile(info,share,name,-1))
- got_error=1;
+ got_error= replace_data_file(param, info, name, new_file);
+ new_file= -1;
}
}
if (got_error)
@@ -3257,9 +3230,6 @@ static int sort_get_next_record(MI_SORT_PARAM *sort_param)
DBUG_RETURN(1);
switch (share->data_file_type) {
- case BLOCK_RECORD:
- DBUG_ASSERT(0); /* Impossible */
- break;
case STATIC_RECORD:
for (;;)
{
@@ -3653,6 +3623,9 @@ static int sort_get_next_record(MI_SORT_PARAM *sort_param)
record));
DBUG_RETURN(0);
}
+ default:
+ DBUG_ASSERT(0); /* Impossible */
+ break;
}
DBUG_RETURN(1); /* Impossible */
}
@@ -3689,9 +3662,6 @@ int sort_write_record(MI_SORT_PARAM *sort_param)
if (sort_param->fix_datafile)
{
switch (sort_info->new_data_file_type) {
- case BLOCK_RECORD:
- DBUG_ASSERT(0); /* Impossible */
- break;
case STATIC_RECORD:
if (my_b_write(&info->rec_cache,sort_param->record,
share->base.pack_reclength))
@@ -3765,6 +3735,9 @@ int sort_write_record(MI_SORT_PARAM *sort_param)
sort_param->filepos+=reclength+length;
info->s->state.split++;
break;
+ default:
+ DBUG_ASSERT(0); /* Impossible */
+ break;
}
}
if (sort_param->master)
@@ -4747,3 +4720,52 @@ set_data_file_type(MI_SORT_INFO *sort_info, MYISAM_SHARE *share)
}
}
+
+int mi_make_backup_of_index(MI_INFO *info, time_t backup_time, myf flags)
+{
+ char backup_name[FN_REFLEN + MY_BACKUP_NAME_EXTRA_LENGTH];
+ my_create_backup_name(backup_name, info->s->index_file_name, backup_time);
+ return my_copy(info->s->index_file_name, backup_name, flags);
+}
+
+
+static int replace_data_file(HA_CHECK *param, MI_INFO *info,
+ const char *name, File new_file)
+{
+ MYISAM_SHARE *share=info->s;
+
+ mysql_file_close(new_file,MYF(0));
+ info->dfile= -1;
+ if (param->testflag & T_BACKUP_DATA)
+ {
+ char buff[MY_BACKUP_NAME_EXTRA_LENGTH+1];
+ my_create_backup_name(buff, "", param->backup_time);
+ my_printf_error(0, /* No error, just info */
+ "Making backup of data file with extension '%s'",
+ MYF(ME_JUST_INFO | ME_NOREFRESH), buff);
+ }
+
+ /*
+ On Windows, the old data file cannot be deleted if it is either
+ open, or memory mapped. Closing the file won't remove the memory
+ map implicilty on Windows. We closed the data file, but we keep
+ the MyISAM table open. A memory map will be closed on the final
+ mi_close() only. So we need to unmap explicitly here. After
+ renaming the new file under the hook, we couldn't use the map of
+ the old file any more anyway.
+ */
+ if (info->s->file_map)
+ {
+ (void) my_munmap((char*) info->s->file_map,
+ (size_t) info->s->mmaped_length);
+ info->s->file_map= NULL;
+ }
+
+ if (change_to_newfile(share->data_file_name,MI_NAME_DEXT,
+ DATA_TMP_EXT, param->backup_time,
+ (param->testflag & T_BACKUP_DATA ?
+ MYF(MY_REDEL_MAKE_BACKUP): MYF(0))) ||
+ mi_open_datafile(info, share, name, -1))
+ return 1;
+ return 0;
+}
diff --git a/storage/myisam/mi_close.c b/storage/myisam/mi_close.c
index 660863d4f7c..473c3086c41 100644
--- a/storage/myisam/mi_close.c
+++ b/storage/myisam/mi_close.c
@@ -61,7 +61,7 @@ int mi_close(register MI_INFO *info)
if (flag)
{
DBUG_EXECUTE_IF("crash_before_flush_keys",
- if (share->kfile >= 0) abort(););
+ if (share->kfile >= 0) DBUG_ABORT(););
if (share->kfile >= 0 &&
flush_key_blocks(share->key_cache, share->kfile,
&share->dirty_part_map,
diff --git a/storage/myisam/mi_create.c b/storage/myisam/mi_create.c
index 3ec429535d4..8347c17260d 100644
--- a/storage/myisam/mi_create.c
+++ b/storage/myisam/mi_create.c
@@ -34,7 +34,7 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs,
MI_CREATE_INFO *ci,uint flags)
{
register uint i,j;
- File UNINIT_VAR(dfile), UNINIT_VAR(file);
+ File dfile,file;
int errpos,save_errno, create_mode= O_RDWR | O_TRUNC;
myf create_flag;
uint fields,length,max_key_length,packed,pack_bytes,pointer,real_length_diff,
@@ -69,6 +69,8 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs,
{
DBUG_RETURN(my_errno=HA_WRONG_CREATE_OPTION);
}
+ LINT_INIT(dfile);
+ LINT_INIT(file);
errpos=0;
options=0;
diff --git a/storage/myisam/mi_extra.c b/storage/myisam/mi_extra.c
index d1512970683..f139dd1aad7 100644
--- a/storage/myisam/mi_extra.c
+++ b/storage/myisam/mi_extra.c
@@ -473,3 +473,8 @@ int mi_reset(MI_INFO *info)
HA_STATE_PREV_FOUND);
DBUG_RETURN(error);
}
+
+my_bool mi_killed_standalone(MI_INFO *info __attribute__((unused)))
+{
+ return 0;
+}
diff --git a/storage/myisam/mi_key.c b/storage/myisam/mi_key.c
index f64a602e2be..e51534b3180 100644
--- a/storage/myisam/mi_key.c
+++ b/storage/myisam/mi_key.c
@@ -509,15 +509,26 @@ int _mi_read_key_record(MI_INFO *info, my_off_t filepos, uchar *buf)
ICP_OUT_OF_RANGE Index condition is not satisfied, end the scan.
*/
-int mi_check_index_cond(register MI_INFO *info, uint keynr, uchar *record)
+ICP_RESULT mi_check_index_cond(register MI_INFO *info, uint keynr,
+ uchar *record)
{
+ ICP_RESULT res;
if (_mi_put_key_in_record(info, keynr, FALSE, record))
{
+ /* Impossible case; Can only happen if bug in code */
mi_print_error(info->s, HA_ERR_CRASHED);
- my_errno=HA_ERR_CRASHED;
- return ICP_ERROR;
+ info->lastpos= HA_OFFSET_ERROR; /* No active record */
+ my_errno= HA_ERR_CRASHED;
+ res= ICP_ERROR;
}
- return info->index_cond_func(info->index_cond_func_arg);
+ else if ((res= info->index_cond_func(info->index_cond_func_arg)) ==
+ ICP_OUT_OF_RANGE)
+ {
+ /* We got beyond the end of scanned range */
+ info->lastpos= HA_OFFSET_ERROR; /* No active record */
+ my_errno= HA_ERR_END_OF_FILE;
+ }
+ return res;
}
/*
diff --git a/storage/myisam/mi_locking.c b/storage/myisam/mi_locking.c
index aa4a01db3bc..4ee1763193e 100644
--- a/storage/myisam/mi_locking.c
+++ b/storage/myisam/mi_locking.c
@@ -328,7 +328,14 @@ void mi_update_status(void* param)
(long) info->s->state.state.data_file_length));
#endif
info->s->state.state= *info->state;
+#ifdef HAVE_QUERY_CACHE
+ DBUG_PRINT("info", ("invalidator... '%s' (status update)",
+ info->filename));
+ DBUG_ASSERT(info->s->chst_invalidator != NULL);
+ (*info->s->chst_invalidator)((const char *)info->filename);
+#endif
}
+
info->state= &info->s->state.state;
info->append_insert_at_end= 0;
@@ -642,3 +649,11 @@ int _mi_decrement_open_count(MI_INFO *info)
}
return test(lock_error || write_error);
}
+
+
+void _mi_report_crashed_ignore(MI_INFO *file __attribute__((unused)),
+ const char *message __attribute__((unused)),
+ const char *sfile __attribute__((unused)),
+ uint sline __attribute__((unused)))
+{
+}
diff --git a/storage/myisam/mi_open.c b/storage/myisam/mi_open.c
index 3c52016a1ba..53ecdaeda21 100644
--- a/storage/myisam/mi_open.c
+++ b/storage/myisam/mi_open.c
@@ -72,7 +72,7 @@ MI_INFO *mi_open(const char *name, int mode, uint open_flags)
{
int lock_error,kfile,open_mode,save_errno,have_rtree=0, realpath_err;
uint i,j,len,errpos,head_length,base_pos,offset,info_length,keys,
- key_parts,unique_key_parts,fulltext_keys,uniques;
+ key_parts,unique_key_parts,base_key_parts,fulltext_keys,uniques;
char name_buff[FN_REFLEN], org_name[FN_REFLEN], index_name[FN_REFLEN],
data_name[FN_REFLEN];
uchar *disk_cache, *disk_pos, *end_pos;
@@ -109,7 +109,7 @@ MI_INFO *mi_open(const char *name, int mode, uint open_flags)
dflt_key_cache);
DBUG_EXECUTE_IF("myisam_pretend_crashed_table_on_open",
- if (strstr(name, "/crashed"))
+ if (strstr(name, "crashed"))
{
my_errno= HA_ERR_CRASHED;
goto err;
@@ -197,7 +197,7 @@ MI_INFO *mi_open(const char *name, int mode, uint open_flags)
keys= (uint) share->state.header.keys;
uniques= (uint) share->state.header.uniques;
fulltext_keys= (uint) share->state.header.fulltext_keys;
- key_parts= mi_uint2korr(share->state.header.key_parts);
+ base_key_parts= key_parts= mi_uint2korr(share->state.header.key_parts);
unique_key_parts= mi_uint2korr(share->state.header.unique_key_parts);
if (len != MI_STATE_INFO_SIZE)
{
@@ -276,7 +276,8 @@ MI_INFO *mi_open(const char *name, int mode, uint open_flags)
if (!my_multi_malloc(MY_WME,
&share,sizeof(*share),
- &share->state.rec_per_key_part,sizeof(long)*key_parts,
+ &share->state.rec_per_key_part,
+ sizeof(long)*base_key_parts,
&share->keyinfo,keys*sizeof(MI_KEYDEF),
&share->uniqueinfo,uniques*sizeof(MI_UNIQUEDEF),
&share->keyparts,
@@ -298,7 +299,7 @@ MI_INFO *mi_open(const char *name, int mode, uint open_flags)
errpos=4;
*share=share_buff;
memcpy((char*) share->state.rec_per_key_part,
- (char*) rec_per_key_part, sizeof(long)*key_parts);
+ (char*) rec_per_key_part, sizeof(long)*base_key_parts);
memcpy((char*) share->state.key_root,
(char*) key_root, sizeof(my_off_t)*keys);
memcpy((char*) share->state.key_del,
diff --git a/storage/myisam/mi_panic.c b/storage/myisam/mi_panic.c
index e6a1d54a516..93be70d2af3 100644
--- a/storage/myisam/mi_panic.c
+++ b/storage/myisam/mi_panic.c
@@ -72,8 +72,8 @@ int mi_panic(enum ha_panic_function flag)
if (info->dfile >= 0 && mysql_file_close(info->dfile, MYF(0)))
error = my_errno;
info->s->kfile=info->dfile= -1; /* Files aren't open anymore */
- break;
#endif
+ break;
case HA_PANIC_READ: /* Restore to before WRITE */
#ifdef CANT_OPEN_FILES_TWICE
{ /* Open closed files */
diff --git a/storage/myisam/mi_rkey.c b/storage/myisam/mi_rkey.c
index 3ca6cdd257f..c88a81962d8 100644
--- a/storage/myisam/mi_rkey.c
+++ b/storage/myisam/mi_rkey.c
@@ -88,6 +88,7 @@ int mi_rkey(MI_INFO *info, uchar *buf, int inx, const uchar *key,
my_errno=HA_ERR_CRASHED;
if (share->concurrent_insert)
mysql_rwlock_unlock(&share->key_root_lock[inx]);
+ fast_mi_writeinfo(info);
goto err;
}
break;
@@ -131,7 +132,10 @@ int mi_rkey(MI_INFO *info, uchar *buf, int inx, const uchar *key,
info->lastkey_length,
myisam_readnext_vec[search_flag],
info->s->state.key_root[inx]))
+ {
+ info->lastpos= HA_OFFSET_ERROR;
break;
+ }
/*
Check that the found key does still match the search.
_mi_search_next() delivers the next key regardless of its
@@ -145,13 +149,22 @@ int mi_rkey(MI_INFO *info, uchar *buf, int inx, const uchar *key,
info->lastpos= HA_OFFSET_ERROR;
break;
}
+ /*
+ If we are at the last key on the key page, allow writers to
+ access the index.
+ */
+ if (info->int_keypos >= info->int_maxpos &&
+ mi_yield_and_check_if_killed(info, inx))
+ {
+ /* Aborted by user */
+ buf= 0; /* Fast abort */
+ }
}
if (res == ICP_OUT_OF_RANGE)
{
- info->lastpos= HA_OFFSET_ERROR;
- if (share->concurrent_insert)
- mysql_rwlock_unlock(&share->key_root_lock[inx]);
- DBUG_RETURN((my_errno= HA_ERR_KEY_NOT_FOUND));
+ /* Change error from HA_ERR_END_OF_FILE */
+ DBUG_ASSERT(info->lastpos == HA_OFFSET_ERROR);
+ my_errno= HA_ERR_KEY_NOT_FOUND;
}
/*
Error if no row found within the data file. (Bug #29838)
@@ -164,29 +177,43 @@ int mi_rkey(MI_INFO *info, uchar *buf, int inx, const uchar *key,
my_errno= HA_ERR_KEY_NOT_FOUND;
}
}
+ else
+ {
+ DBUG_ASSERT(info->lastpos= HA_OFFSET_ERROR);
+ }
}
if (share->concurrent_insert)
mysql_rwlock_unlock(&share->key_root_lock[inx]);
- /* Calculate length of the found key; Used by mi_rnext_same */
- if ((keyinfo->flag & HA_VAR_LENGTH_KEY) && last_used_keyseg &&
- info->lastpos != HA_OFFSET_ERROR)
- info->last_rkey_length= _mi_keylength_part(keyinfo, info->lastkey,
- last_used_keyseg);
- else
- info->last_rkey_length= pack_key_length;
-
- /* Check if we don't want to have record back, only error message */
- if (!buf)
- DBUG_RETURN(info->lastpos == HA_OFFSET_ERROR ? my_errno : 0);
+ info->last_rkey_length= pack_key_length;
- if (!(*info->read_record)(info,info->lastpos,buf))
+ if (info->lastpos == HA_OFFSET_ERROR) /* No such record */
{
- info->update|= HA_STATE_AKTIV; /* Record is read */
- DBUG_RETURN(0);
+ fast_mi_writeinfo(info);
+ if (!buf)
+ DBUG_RETURN(my_errno);
}
+ else
+ {
+ /* Calculate length of the found key; Used by mi_rnext_same */
+ if ((keyinfo->flag & HA_VAR_LENGTH_KEY) && last_used_keyseg)
+ info->last_rkey_length= _mi_keylength_part(keyinfo, info->lastkey,
+ last_used_keyseg);
- info->lastpos = HA_OFFSET_ERROR; /* Didn't find key */
+ /* Check if we don't want to have record back, only error message */
+ if (!buf)
+ {
+ fast_mi_writeinfo(info);
+ DBUG_RETURN(0);
+ }
+ if (!(*info->read_record)(info,info->lastpos,buf))
+ {
+ info->update|= HA_STATE_AKTIV; /* Record is read */
+ DBUG_RETURN(0);
+ }
+ DBUG_PRINT("error", ("Didn't find row. Error %d", my_errno));
+ info->lastpos= HA_OFFSET_ERROR; /* Didn't find row */
+ }
/* Store last used key as a base for read next */
memcpy(info->lastkey,key_buff,pack_key_length);
@@ -199,3 +226,36 @@ int mi_rkey(MI_INFO *info, uchar *buf, int inx, const uchar *key,
err:
DBUG_RETURN(my_errno);
} /* _mi_rkey */
+
+
+/*
+ Yield to possible other writers during a index scan.
+ Check also if we got killed by the user and if yes, return
+ HA_ERR_LOCK_WAIT_TIMEOUT
+
+ return 0 ok
+ return 1 Query has been requested to be killed
+*/
+
+my_bool mi_yield_and_check_if_killed(MI_INFO *info, int inx)
+{
+ MYISAM_SHARE *share;
+ if (mi_killed(info))
+ {
+ /* purecov: begin tested */
+ info->lastpos= HA_OFFSET_ERROR;
+ /* Set error that we where aborted by kill from application */
+ my_errno= HA_ERR_ABORTED_BY_USER;
+ return 1;
+ /* purecov: end */
+
+ }
+
+ if ((share= info->s)->concurrent_insert)
+ {
+ /* Give writers a chance to access index */
+ mysql_rwlock_unlock(&share->key_root_lock[inx]);
+ mysql_rwlock_rdlock(&share->key_root_lock[inx]);
+ }
+ return 0;
+}
diff --git a/storage/myisam/mi_rnext.c b/storage/myisam/mi_rnext.c
index 29de0e98d3d..51a60a76f79 100644
--- a/storage/myisam/mi_rnext.c
+++ b/storage/myisam/mi_rnext.c
@@ -28,7 +28,7 @@ int mi_rnext(MI_INFO *info, uchar *buf, int inx)
{
int error,changed;
uint flag;
- ICP_RESULT res= 0;
+ ICP_RESULT icp_res= ICP_MATCH;
uint update_mask= HA_STATE_NEXT_FOUND;
DBUG_ENTER("mi_rnext");
@@ -102,8 +102,19 @@ int mi_rnext(MI_INFO *info, uchar *buf, int inx)
while ((info->s->concurrent_insert &&
info->lastpos >= info->state->data_file_length) ||
(info->index_cond_func &&
- (res= mi_check_index_cond(info, inx, buf)) == ICP_NO_MATCH))
+ (icp_res= mi_check_index_cond(info, inx, buf)) == ICP_NO_MATCH))
{
+ /*
+ If we are at the last key on the key page, allow writers to
+ access the index.
+ */
+ if (info->int_keypos >= info->int_maxpos &&
+ mi_yield_and_check_if_killed(info, inx))
+ {
+ error= 1;
+ break;
+ }
+
/*
Skip rows that are either inserted by other threads since
we got a lock or do not match pushed index conditions
@@ -115,13 +126,6 @@ int mi_rnext(MI_INFO *info, uchar *buf, int inx)
info->s->state.key_root[inx])))
break;
}
- if (!error && res == ICP_OUT_OF_RANGE)
- {
- if (info->s->concurrent_insert)
- mysql_rwlock_unlock(&info->s->key_root_lock[inx]);
- info->lastpos= HA_OFFSET_ERROR;
- DBUG_RETURN(my_errno= HA_ERR_END_OF_FILE);
- }
}
if (info->s->concurrent_insert)
@@ -131,13 +135,15 @@ int mi_rnext(MI_INFO *info, uchar *buf, int inx)
info->update&= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED);
info->update|= update_mask;
- if (error)
+ if (error || icp_res != ICP_MATCH)
{
+ fast_mi_writeinfo(info);
if (my_errno == HA_ERR_KEY_NOT_FOUND)
my_errno=HA_ERR_END_OF_FILE;
}
else if (!buf)
{
+ fast_mi_writeinfo(info);
DBUG_RETURN(info->lastpos==HA_OFFSET_ERROR ? my_errno : 0);
}
else if (!(*info->read_record)(info,info->lastpos,buf))
diff --git a/storage/myisam/mi_rnext_same.c b/storage/myisam/mi_rnext_same.c
index 54de367016b..ea1449f2c98 100644
--- a/storage/myisam/mi_rnext_same.c
+++ b/storage/myisam/mi_rnext_same.c
@@ -29,6 +29,7 @@ int mi_rnext_same(MI_INFO *info, uchar *buf)
int error;
uint inx,not_used[2];
MI_KEYDEF *keyinfo;
+ ICP_RESULT icp_res= ICP_MATCH;
DBUG_ENTER("mi_rnext_same");
if ((int) (inx=info->lastinx) < 0 || info->lastpos == HA_OFFSET_ERROR)
@@ -63,6 +64,17 @@ int mi_rnext_same(MI_INFO *info, uchar *buf)
}
for (;;)
{
+ /*
+ If we are at the last key on the key page, allow writers to
+ access the index.
+ */
+ if (info->int_keypos >= info->int_maxpos &&
+ mi_yield_and_check_if_killed(info, inx))
+ {
+ error=1;
+ break;
+ }
+
if ((error=_mi_search_next(info,keyinfo,info->lastkey,
info->lastkey_length,SEARCH_BIGGER,
info->s->state.key_root[inx])))
@@ -78,26 +90,31 @@ int mi_rnext_same(MI_INFO *info, uchar *buf)
/*
Skip
- rows that are inserted by other threads since we got a lock
- - rows that don't match index condition */
+ - rows that don't match index condition
+ */
if (info->lastpos < info->state->data_file_length &&
(!info->index_cond_func ||
- mi_check_index_cond(info, inx, buf) != ICP_NO_MATCH))
+ (icp_res= mi_check_index_cond(info, inx, buf)) != ICP_NO_MATCH))
break;
}
}
if (info->s->concurrent_insert)
mysql_rwlock_unlock(&info->s->key_root_lock[inx]);
+
+
/* Don't clear if database-changed */
info->update&= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED);
info->update|= HA_STATE_NEXT_FOUND | HA_STATE_RNEXT_SAME;
- if (error)
+ if (error || icp_res != ICP_MATCH)
{
+ fast_mi_writeinfo(info);
if (my_errno == HA_ERR_KEY_NOT_FOUND)
my_errno=HA_ERR_END_OF_FILE;
}
else if (!buf)
{
+ fast_mi_writeinfo(info);
DBUG_RETURN(info->lastpos==HA_OFFSET_ERROR ? my_errno : 0);
}
else if (!(*info->read_record)(info,info->lastpos,buf))
diff --git a/storage/myisam/mi_rprev.c b/storage/myisam/mi_rprev.c
index 95aa8069e1d..a0b4ec4d927 100644
--- a/storage/myisam/mi_rprev.c
+++ b/storage/myisam/mi_rprev.c
@@ -27,6 +27,7 @@ int mi_rprev(MI_INFO *info, uchar *buf, int inx)
int error,changed;
register uint flag;
MYISAM_SHARE *share=info->s;
+ ICP_RESULT icp_res= ICP_MATCH;
DBUG_ENTER("mi_rprev");
if ((inx = _mi_check_index(info,inx)) < 0)
@@ -53,12 +54,26 @@ int mi_rprev(MI_INFO *info, uchar *buf, int inx)
if (!error)
{
- int res= 0;
+ my_off_t cur_keypage= info->last_keypage;
while ((share->concurrent_insert &&
info->lastpos >= info->state->data_file_length) ||
(info->index_cond_func &&
- !(res= mi_check_index_cond(info, inx, buf))))
+ (icp_res= mi_check_index_cond(info, inx, buf)) == ICP_NO_MATCH))
{
+ /*
+ If we are at the last (i.e. first?) key on the key page,
+ allow writers to access the index.
+ */
+ if (info->last_keypage != cur_keypage)
+ {
+ cur_keypage= info->last_keypage;
+ if (mi_yield_and_check_if_killed(info, inx))
+ {
+ error= 1;
+ break;
+ }
+ }
+
/*
Skip rows that are either inserted by other threads since
we got a lock or do not match pushed index conditions
@@ -69,13 +84,6 @@ int mi_rprev(MI_INFO *info, uchar *buf, int inx)
share->state.key_root[inx])))
break;
}
- if (!error && res == 2)
- {
- if (share->concurrent_insert)
- mysql_rwlock_unlock(&share->key_root_lock[inx]);
- info->lastpos= HA_OFFSET_ERROR;
- DBUG_RETURN(my_errno= HA_ERR_END_OF_FILE);
- }
}
if (share->concurrent_insert)
@@ -83,13 +91,16 @@ int mi_rprev(MI_INFO *info, uchar *buf, int inx)
info->update&= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED);
info->update|= HA_STATE_PREV_FOUND;
- if (error)
+
+ if (error || icp_res != ICP_MATCH)
{
+ fast_mi_writeinfo(info);
if (my_errno == HA_ERR_KEY_NOT_FOUND)
my_errno=HA_ERR_END_OF_FILE;
}
else if (!buf)
{
+ fast_mi_writeinfo(info);
DBUG_RETURN(info->lastpos==HA_OFFSET_ERROR ? my_errno : 0);
}
else if (!(*info->read_record)(info,info->lastpos,buf))
diff --git a/storage/myisam/mi_search.c b/storage/myisam/mi_search.c
index 89d1b801695..c49fddf0f3b 100644
--- a/storage/myisam/mi_search.c
+++ b/storage/myisam/mi_search.c
@@ -89,7 +89,10 @@ int _mi_search(register MI_INFO *info, register MI_KEYDEF *keyinfo,
flag=(*keyinfo->bin_search)(info,keyinfo,buff,key,key_len,nextflag,
&keypos,lastkey, &last_key);
if (flag == MI_FOUND_WRONG_KEY)
- DBUG_RETURN(-1);
+ {
+ my_errno= HA_ERR_CRASHED;
+ goto err;
+ }
nod_flag=mi_test_if_nod(buff);
maxpos=buff+mi_getint(buff)-1;
diff --git a/storage/myisam/mi_static.c b/storage/myisam/mi_static.c
index 711287ca16f..9d480cb414d 100644
--- a/storage/myisam/mi_static.c
+++ b/storage/myisam/mi_static.c
@@ -40,6 +40,7 @@ ulong myisam_concurrent_insert= 0;
ulonglong myisam_max_temp_length= MAX_FILE_SIZE;
ulong myisam_data_pointer_size=4;
ulonglong myisam_mmap_size= SIZE_T_MAX, myisam_mmap_used= 0;
+my_bool (*mi_killed)(MI_INFO *)= mi_killed_standalone;
static int always_valid(const char *filename __attribute__((unused)))
{
diff --git a/storage/myisam/myisamchk.c b/storage/myisam/myisamchk.c
index 79c4ef03a56..72be3d2f810 100644
--- a/storage/myisam/myisamchk.c
+++ b/storage/myisam/myisamchk.c
@@ -670,7 +670,8 @@ get_one_option(int optid,
case OPT_STATS_METHOD:
{
int method;
- enum_handler_stats_method UNINIT_VAR(method_conv);
+ enum_handler_stats_method method_conv;
+ LINT_INIT(method_conv);
myisam_stats_method_str= argument;
if ((method= find_type(argument, &myisam_stats_method_typelib,
FIND_TYPE_BASIC)) <= 0)
@@ -1003,8 +1004,10 @@ static int myisamchk(HA_CHECK *param, char * filename)
#ifndef TO_BE_REMOVED
if (param->out_flag & O_NEW_DATA)
{ /* Change temp file to org file */
- (void) my_close(info->dfile,MYF(MY_WME)); /* Close new file */
- error|=change_to_newfile(filename, MI_NAME_DEXT, DATA_TMP_EXT, MYF(0));
+ (void) mysql_file_close(info->dfile,
+ MYF(MY_WME)); /* Close new file */
+ error|=change_to_newfile(filename, MI_NAME_DEXT, DATA_TMP_EXT,
+ 0, MYF(0));
if (mi_open_datafile(info,info->s, NULL, -1))
error=1;
param->out_flag&= ~O_NEW_DATA; /* We are using new datafile */
@@ -1136,10 +1139,9 @@ end2:
{
if (param->out_flag & O_NEW_DATA)
error|=change_to_newfile(filename,MI_NAME_DEXT,DATA_TMP_EXT,
+ param->backup_time,
((param->testflag & T_BACKUP_DATA) ?
MYF(MY_REDEL_MAKE_BACKUP) : MYF(0)));
- if (param->out_flag & O_NEW_INDEX)
- error|=change_to_newfile(filename, MI_NAME_IEXT, INDEX_TMP_EXT, MYF(0));
}
(void) fflush(stdout); (void) fflush(stderr);
if (param->error_printed)
@@ -1211,7 +1213,8 @@ static void descript(HA_CHECK *param, register MI_INFO *info, char * name)
}
pos=buff;
if (share->state.changed & STATE_CRASHED)
- strmov(buff,"crashed");
+ strmov(buff, share->state.changed & STATE_CRASHED_ON_REPAIR ?
+ "crashed on repair" : "crashed");
else
{
if (share->state.open_count)
@@ -1508,11 +1511,12 @@ static int mi_sort_records(HA_CHECK *param,
goto err;
}
fn_format(param->temp_filename,name,"", MI_NAME_DEXT,2+4+32);
- new_file= my_create(fn_format(param->temp_filename,
- param->temp_filename, "",
- DATA_TMP_EXT, 2+4),
- 0, param->tmpfile_createflag,
- MYF(0));
+ new_file= mysql_file_create(mi_key_file_datatmp,
+ fn_format(param->temp_filename,
+ param->temp_filename, "",
+ DATA_TMP_EXT, 2+4),
+ 0, param->tmpfile_createflag,
+ MYF(0));
if (new_file < 0)
{
mi_check_print_error(param,"Can't create new tempfile: '%s'",
@@ -1529,10 +1533,10 @@ static int mi_sort_records(HA_CHECK *param,
for (key=0 ; key < share->base.keys ; key++)
share->keyinfo[key].flag|= HA_SORT_ALLOWS_SAME;
- if (my_pread(share->kfile,(uchar*) temp_buff,
- (uint) keyinfo->block_length,
- share->state.key_root[sort_key],
- MYF(MY_NABP+MY_WME)))
+ if (mysql_file_pread(share->kfile,(uchar*) temp_buff,
+ (uint) keyinfo->block_length,
+ share->state.key_root[sort_key],
+ MYF(MY_NABP+MY_WME)))
{
mi_check_print_error(param,"Can't read indexpage from filepos: %s",
(ulong) share->state.key_root[sort_key]);
@@ -1564,7 +1568,7 @@ static int mi_sort_records(HA_CHECK *param,
goto err;
}
- (void) my_close(info->dfile,MYF(MY_WME));
+ (void) mysql_file_close(info->dfile,MYF(MY_WME));
param->out_flag|=O_NEW_DATA; /* Data in new file */
info->dfile=new_file; /* Use new datafile */
info->state->del=0;
@@ -1586,8 +1590,9 @@ err:
if (got_error && new_file >= 0)
{
(void) end_io_cache(&info->rec_cache);
- (void) my_close(new_file,MYF(MY_WME));
- (void) my_delete(param->temp_filename, MYF(MY_WME));
+ (void) mysql_file_close(new_file,MYF(MY_WME));
+ (void) mysql_file_delete(mi_key_file_dfile, param->temp_filename,
+ MYF(MY_WME));
}
if (temp_buff)
{
@@ -1639,9 +1644,9 @@ static int sort_record_index(MI_SORT_PARAM *sort_param,MI_INFO *info,
if (nod_flag)
{
next_page=_mi_kpos(nod_flag,keypos);
- if (my_pread(info->s->kfile,(uchar*) temp_buff,
- (uint) keyinfo->block_length, next_page,
- MYF(MY_NABP+MY_WME)))
+ if (mysql_file_pread(info->s->kfile,(uchar*) temp_buff,
+ (uint) keyinfo->block_length, next_page,
+ MYF(MY_NABP+MY_WME)))
{
mi_check_print_error(param,"Can't read keys from filepos: %s",
llstr(next_page,llbuff));
diff --git a/storage/myisam/myisamdef.h b/storage/myisam/myisamdef.h
index e400089b881..8f49ce5a5ed 100644
--- a/storage/myisam/myisamdef.h
+++ b/storage/myisam/myisamdef.h
@@ -15,17 +15,14 @@
/* This file is included by all internal myisam files */
-#include "myisam.h" /* Structs & some defines */
-#include "myisampack.h" /* packing of keys */
+#include <myisam.h> /* Structs & some defines */
+#include <myisampack.h> /* packing of keys */
#include <my_tree.h>
#include <my_pthread.h>
#include <thr_lock.h>
#include <mysql/psi/mysql_file.h>
-/* undef map from my_nosys; We need test-if-disk full */
-#if defined(my_write)
-#undef my_write
-#endif
+C_MODE_START
typedef struct st_mi_status_info
{
@@ -188,6 +185,8 @@ typedef struct st_mi_isam_share
size_t (*file_read) (MI_INFO *, uchar *, size_t, my_off_t, myf);
size_t (*file_write) (MI_INFO *, const uchar *, size_t, my_off_t, myf);
invalidator_by_filename invalidator; /* query cache invalidator */
+ /* query cache invalidator for changing state */
+ invalidator_by_filename chst_invalidator;
ulong this_process; /* processid */
ulong last_process; /* For table-change-check */
ulong last_version; /* Version on start */
@@ -225,6 +224,8 @@ typedef struct st_mi_isam_share
mysql_rwlock_t mmap_lock;
} MYISAM_SHARE;
+//typedef ICP_RESULT (*index_cond_func_t)(void *param);
+
struct st_myisam_info
{
MYISAM_SHARE *s; /* Shared between open:s */
@@ -236,6 +237,7 @@ struct st_myisam_info
DYNAMIC_ARRAY *ft1_to_ft2; /* used only in ft1->ft2 conversion */
MEM_ROOT ft_memroot; /* used by the parser */
MYSQL_FTPARSER_PARAM *ftparser_param; /* share info between init/deinit */
+ void *external_ref; /* For MariaDB TABLE */
LIST in_use; /* Thread using this table */
char *filename; /* parameter to open filename */
uchar *buff, /* Temp area for key */
@@ -420,7 +422,9 @@ extern uint myisam_read_vec[], myisam_readnext_vec[];
extern uint myisam_quick_table_bits;
extern File myisam_log_file;
extern ulong myisam_pid;
-
+extern my_bool (*mi_killed)(MI_INFO *);
+extern void _mi_report_crashed(MI_INFO *file, const char *message,
+ const char *sfile, uint sline);
/* This is used by _mi_calc_xxx_key_length och _mi_store_key */
typedef struct st_mi_s_param
@@ -493,6 +497,8 @@ extern int _mi_writeinfo(MI_INFO *info, uint options);
extern int _mi_test_if_changed(MI_INFO *info);
extern int _mi_mark_file_changed(MI_INFO *info);
extern int _mi_decrement_open_count(MI_INFO *info);
+void _mi_report_crashed_ignore(MI_INFO *file, const char *message,
+ const char *sfile, uint sline);
extern int _mi_check_index(MI_INFO *info, int inx);
extern int _mi_search(MI_INFO *info, MI_KEYDEF *keyinfo, uchar *key,
uint key_len, uint nextflag, my_off_t pos);
@@ -576,6 +582,8 @@ extern ulonglong mi_safe_mul(ulonglong a, ulonglong b);
extern int _mi_ft_update(MI_INFO *info, uint keynr, uchar *keybuf,
const uchar *oldrec, const uchar *newrec,
my_off_t pos);
+extern my_bool mi_yield_and_check_if_killed(MI_INFO *info, int inx);
+extern my_bool mi_killed_standalone(MI_INFO *);
struct st_sort_info;
@@ -634,7 +642,6 @@ enum myisam_log_commands
#define fast_mi_writeinfo(INFO) if (!(INFO)->s->tot_locks) (void) _mi_writeinfo((INFO),0)
#define fast_mi_readinfo(INFO) ((INFO)->lock_type == F_UNLCK) && _mi_readinfo((INFO),F_RDLCK,1)
-C_MODE_START
extern uint _mi_get_block_info(MI_BLOCK_INFO *, File, my_off_t);
extern uint _mi_rec_pack(MI_INFO *info, uchar *to, const uchar *from);
extern uint _mi_pack_get_block_info(MI_INFO *myisam, MI_BIT_BUFF *bit_buff,
@@ -709,10 +716,8 @@ void mi_setup_functions(register MYISAM_SHARE *share);
my_bool mi_dynmap_file(MI_INFO *info, my_off_t size);
int mi_munmap_file(MI_INFO *info);
void mi_remap_file(MI_INFO *info, my_off_t size);
-void _mi_report_crashed(MI_INFO *file, const char *message,
- const char *sfile, uint sline);
-int mi_check_index_cond(register MI_INFO *info, uint keynr, uchar *record);
+ICP_RESULT mi_check_index_cond(register MI_INFO *info, uint keynr, uchar *record);
/* Functions needed by mi_check */
int killed_ptr(HA_CHECK *param);
void mi_check_print_error(HA_CHECK *param, const char *fmt, ...);
diff --git a/storage/myisam/myisampack.c b/storage/myisam/myisampack.c
index b48e95f1171..320954147d1 100644
--- a/storage/myisam/myisampack.c
+++ b/storage/myisam/myisampack.c
@@ -712,7 +712,7 @@ static int compress(PACK_MRG_INFO *mrg,char *result_table)
(void) my_delete(new_name,MYF(MY_WME));
}
else
- error=my_redel(org_name,new_name,MYF(MY_WME | MY_COPYTIME));
+ error=my_redel(org_name, new_name, 0, MYF(MY_WME | MY_COPYTIME));
}
if (! error)
error=save_state(isam_file,mrg,new_length,glob_crc);
diff --git a/storage/myisam/rt_index.c b/storage/myisam/rt_index.c
index 888d3c7e56a..48eb48cc5e8 100644
--- a/storage/myisam/rt_index.c
+++ b/storage/myisam/rt_index.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+/* Copyright (C) 2002-2006 MySQL AB & Ramil Kalimullin
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
diff --git a/storage/myisammrg/ha_myisammrg.cc b/storage/myisammrg/ha_myisammrg.cc
index 513b144cd30..0db96444451 100644
--- a/storage/myisammrg/ha_myisammrg.cc
+++ b/storage/myisammrg/ha_myisammrg.cc
@@ -1401,9 +1401,9 @@ THR_LOCK_DATA **ha_myisammrg::store_lock(THD *thd,
/*
When MERGE table is open, but not yet attached, other threads
- could flush it, which means call mysql_lock_abort_for_thread()
+ could flush it, which means calling mysql_lock_abort_for_thread()
on this threads TABLE. 'children_attached' is FALSE in this
- situaton. Since the table is not locked, return no lock data.
+ situation. Since the table is not locked, return no lock data.
*/
if (!this->file->children_attached)
goto end; /* purecov: tested */
diff --git a/storage/ndb/include/ndbapi/NdbError.hpp b/storage/ndb/include/ndbapi/NdbError.hpp
index aa27caf78f9..b752e578bc1 100644
--- a/storage/ndb/include/ndbapi/NdbError.hpp
+++ b/storage/ndb/include/ndbapi/NdbError.hpp
@@ -66,7 +66,7 @@ struct NdbError {
/**
* The error code indicates a permanent error.<br>
- * (Includes classificatons: NdbError::PermanentError,
+ * (Includes classifications: NdbError::PermanentError,
* NdbError::ApplicationError, NdbError::NoDataFound,
* NdbError::ConstraintViolation, NdbError::SchemaError,
* NdbError::UserDefinedError, NdbError::InternalError, and,
diff --git a/storage/ndb/include/util/File.hpp b/storage/ndb/include/util/File.hpp
index b9d348683ec..bbddc24583a 100644
--- a/storage/ndb/include/util/File.hpp
+++ b/storage/ndb/include/util/File.hpp
@@ -31,7 +31,7 @@ public:
* Returns time for last contents modification of a file.
*
* @param aFileName a filename to check.
- * @return the time for last contents modificaton of the file.
+ * @return the time for last contents modification of the file.
*/
static time_t mtime(const char* aFileName);
diff --git a/storage/ndb/src/kernel/blocks/dbtup/DbtupExecQuery.cpp b/storage/ndb/src/kernel/blocks/dbtup/DbtupExecQuery.cpp
index 4f2cb877bc2..87b31c18d68 100644
--- a/storage/ndb/src/kernel/blocks/dbtup/DbtupExecQuery.cpp
+++ b/storage/ndb/src/kernel/blocks/dbtup/DbtupExecQuery.cpp
@@ -2253,7 +2253,7 @@ int Dbtup::interpreterNextLab(Signal* signal,
if(AttributeOffset::getCharsetFlag(TattrDesc2))
{
Uint32 pos = AttributeOffset::getCharsetPos(TattrDesc2);
- cs = tabptr.p->charsetArray[pos];
+ cs = (void*) tabptr.p->charsetArray[pos];
}
const NdbSqlUtil::Type& sqlType = NdbSqlUtil::getType(typeId);
diff --git a/storage/oqgraph/CMakeLists.txt b/storage/oqgraph/CMakeLists.txt
index caba9ce3481..c791081d4ca 100644
--- a/storage/oqgraph/CMakeLists.txt
+++ b/storage/oqgraph/CMakeLists.txt
@@ -1,22 +1,27 @@
-CHECK_CXX_SOURCE_COMPILES(
-"#include <boost/version.hpp>
-#if BOOST_VERSION >= 104000
-#else
-#error oops
-#endif
-int main() { return 0; }" BOOST_OK)
-
-IF(BOOST_OK)
- ADD_DEFINITIONS(-DHAVE_OQGRAPH)
- IF(MSVC)
- SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /EHsc")
- ENDIF(MSVC)
- IF(CMAKE_CXX_FLAGS)
- STRING(REPLACE "-fno-exceptions" "" CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
- STRING(REPLACE "-fno-implicit-templates" "" CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
- SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-deprecated")
- ENDIF()
-
- MYSQL_ADD_PLUGIN(oqgraph ha_oqgraph.cc graphcore.cc STORAGE_ENGINE
- MODULE_ONLY)
-ENDIF(BOOST_OK)
+CHECK_CXX_SOURCE_COMPILES(
+"#include <boost/version.hpp>
+#if BOOST_VERSION >= 104000
+#else
+#error oops
+#endif
+int main() { return 0; }" BOOST_OK)
+
+# lp:756966 OQGRAPH on Win64 does not compile
+IF(MSVC AND CMAKE_SIZEOF_VOID_P EQUAL 8)
+ SET(BOOST_OK 0)
+ENDIF()
+
+IF(BOOST_OK)
+ ADD_DEFINITIONS(-DHAVE_OQGRAPH)
+ IF(MSVC)
+ SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /EHsc")
+ ENDIF(MSVC)
+ IF(CMAKE_CXX_FLAGS)
+ STRING(REPLACE "-fno-exceptions" "" CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
+ STRING(REPLACE "-fno-implicit-templates" "" CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
+ SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-deprecated")
+ ENDIF()
+
+ MYSQL_ADD_PLUGIN(oqgraph ha_oqgraph.cc graphcore.cc STORAGE_ENGINE
+ MODULE_ONLY)
+ENDIF(BOOST_OK)
diff --git a/storage/oqgraph/ha_oqgraph.cc b/storage/oqgraph/ha_oqgraph.cc
index a46269f8883..0352fee8293 100644
--- a/storage/oqgraph/ha_oqgraph.cc
+++ b/storage/oqgraph/ha_oqgraph.cc
@@ -750,19 +750,19 @@ int ha_oqgraph::fill_record(byte *record, const open_query::row &row)
if (row.latch_indicator)
{
field[0]->set_notnull();
- field[0]->store((longlong) row.latch);
+ field[0]->store((longlong) row.latch, 0);
}
if (row.orig_indicator)
{
field[1]->set_notnull();
- field[1]->store((longlong) row.orig);
+ field[1]->store((longlong) row.orig, 0);
}
if (row.dest_indicator)
{
field[2]->set_notnull();
- field[2]->store((longlong) row.dest);
+ field[2]->store((longlong) row.dest, 0);
}
if (row.weight_indicator)
@@ -774,13 +774,13 @@ int ha_oqgraph::fill_record(byte *record, const open_query::row &row)
if (row.seq_indicator)
{
field[4]->set_notnull();
- field[4]->store((longlong) row.seq);
+ field[4]->store((longlong) row.seq, 0);
}
if (row.link_indicator)
{
field[5]->set_notnull();
- field[5]->store((longlong) row.link);
+ field[5]->store((longlong) row.link, 0);
}
if (ptrdiff)
diff --git a/storage/pbxt/src/cache_xt.cc b/storage/pbxt/src/cache_xt.cc
index 24e42d9e984..090250dd802 100644
--- a/storage/pbxt/src/cache_xt.cc
+++ b/storage/pbxt/src/cache_xt.cc
@@ -717,6 +717,11 @@ xtPublic void xt_ind_exit(XTThreadPtr self)
ind_handle_exit(self);
if (ind_cac_globals.cg_blocks) {
+ XTIndBlockPtr block = ind_cac_globals.cg_blocks;
+ for (u_int i=0; i<ind_cac_globals.cg_block_count; i++) {
+ XT_IPAGE_FREE_LOCK(self, &block->cb_lock);
+ block++;
+ }
xt_free(self, ind_cac_globals.cg_blocks);
ind_cac_globals.cg_blocks = NULL;
xt_free_mutex(&ind_cac_globals.cg_lock);
diff --git a/storage/pbxt/src/filesys_xt.cc b/storage/pbxt/src/filesys_xt.cc
index 31e2cf961b6..ebe0ed146b0 100644
--- a/storage/pbxt/src/filesys_xt.cc
+++ b/storage/pbxt/src/filesys_xt.cc
@@ -369,8 +369,7 @@ xtPublic xtBool xt_fs_stat(XTThreadPtr self, char *path, off_t *size, struct tim
CloseHandle(fh);
if (size)
*size = (off_t) info.nFileSizeLow | (((off_t) info.nFileSizeHigh) << 32);
- if (mod_time)
- mod_time->tv.ft = info.ftLastWriteTime;
+ memset(mod_time, 0, sizeof(*mod_time));
#else
struct stat sb;
diff --git a/storage/pbxt/src/ha_pbxt.cc b/storage/pbxt/src/ha_pbxt.cc
index 1b0e0f0f5ff..7305f80f0fb 100644
--- a/storage/pbxt/src/ha_pbxt.cc
+++ b/storage/pbxt/src/ha_pbxt.cc
@@ -110,6 +110,9 @@ static int pbxt_end(void *p);
static int pbxt_panic(handlerton *hton, enum ha_panic_function flag);
static void pbxt_drop_database(handlerton *hton, char *path);
static int pbxt_close_connection(handlerton *hton, THD* thd);
+#ifdef MARIADB_BASE_VERSION
+static void pbxt_commit_ordered(handlerton *hton, THD *thd, bool all);
+#endif
static int pbxt_commit(handlerton *hton, THD *thd, bool all);
static int pbxt_rollback(handlerton *hton, THD *thd, bool all);
static int pbxt_prepare(handlerton *hton, THD *thd, bool all);
@@ -1149,6 +1152,9 @@ static int pbxt_init(void *p)
pbxt_hton->state = SHOW_OPTION_YES;
pbxt_hton->db_type = DB_TYPE_PBXT; // Wow! I have my own!
pbxt_hton->close_connection = pbxt_close_connection; /* close_connection, cleanup thread related data. */
+#ifdef MARIADB_BASE_VERSION
+ pbxt_hton->commit_ordered = pbxt_commit_ordered;
+#endif
pbxt_hton->commit = pbxt_commit; /* commit */
pbxt_hton->rollback = pbxt_rollback; /* rollback */
if (pbxt_support_xa) {
@@ -1486,6 +1492,29 @@ static int pbxt_start_consistent_snapshot(handlerton *hton, THD *thd)
return err;
}
+#ifdef MARIADB_BASE_VERSION
+/*
+ * Quickly commit the transaction to memory and make it visible to others.
+ * The remaining part of commit will happen later, in pbxt_commit().
+ */
+static void pbxt_commit_ordered(handlerton *hton, THD *thd, bool all)
+{
+ XTThreadPtr self;
+
+ if ((self = (XTThreadPtr) *thd_ha_data(thd, hton))) {
+ XT_PRINT2(self, "%s pbxt_commit_ordered all=%d\n", all ? "END CONN XACT" : "END STAT", all);
+
+ if (self->st_xact_data) {
+ if (all || self->st_auto_commit) {
+ self->st_commit_ordered = TRUE;
+ self->st_writer = self->st_xact_writer;
+ self->st_delayed_error= !xt_xn_commit_fast(self, self->st_writer);
+ }
+ }
+ }
+}
+#endif
+
/*
* Commit the PBXT transaction of the given thread.
* thd is the MySQL thread structure.
@@ -1514,7 +1543,13 @@ static int pbxt_commit(handlerton *hton, THD *thd, bool all)
if (all || self->st_auto_commit) {
XT_PRINT0(self, "xt_xn_commit in pbxt_commit\n");
- if (!xt_xn_commit(self))
+ if (self->st_commit_ordered) {
+ self->st_commit_ordered = FALSE;
+ err = !xt_xn_commit_slow(self, self->st_writer) || self->st_delayed_error;
+ } else {
+ err = !xt_xn_commit(self);
+ }
+ if (err)
err = xt_ha_pbxt_thread_error_for_mysql(thd, self, FALSE);
}
}
@@ -1617,7 +1652,7 @@ static int pbxt_prepare(handlerton *hton, THD *thd, bool all)
static XTThreadPtr ha_temp_open_global_database(handlerton *hton, THD **ret_thd, int *temp_thread, const char *thread_name, int *err)
{
THD *thd;
- XTThreadPtr self = NULL;
+ XTThreadPtr volatile self = NULL;
*temp_thread = 0;
if ((thd = current_thd))
@@ -6042,7 +6077,7 @@ static MYSQL_SYSVAR_INT(max_threads, pbxt_max_threads,
NULL, NULL, 0, 0, 20000, 1);
#endif
-#ifndef DEBUG
+#if !defined(DEBUG) || defined(MARIADB_BASE_VERSION)
static MYSQL_SYSVAR_BOOL(support_xa, pbxt_support_xa,
PLUGIN_VAR_OPCMDARG,
"Enable PBXT support for the XA two-phase commit, default is enabled",
diff --git a/storage/pbxt/src/heap_xt.cc b/storage/pbxt/src/heap_xt.cc
index a4e3fec1611..11331f6489f 100644
--- a/storage/pbxt/src/heap_xt.cc
+++ b/storage/pbxt/src/heap_xt.cc
@@ -109,6 +109,7 @@ xtPublic void xt_heap_release(XTThreadPtr self, XTHeapPtr hp)
if (hp->h_finalize)
(*hp->h_finalize)(self, hp);
xt_spinlock_unlock(&hp->h_lock);
+ xt_spinlock_free(NULL, &hp->h_lock);
xt_free(self, hp);
return;
}
diff --git a/storage/pbxt/src/lock_xt.cc b/storage/pbxt/src/lock_xt.cc
index 0e9af277c7b..e4a38716845 100644
--- a/storage/pbxt/src/lock_xt.cc
+++ b/storage/pbxt/src/lock_xt.cc
@@ -726,11 +726,15 @@ xtBool xt_init_row_locks(XTRowLocksPtr rl)
rl->rl_groups[i].lg_list_in_use = 0;
rl->rl_groups[i].lg_list = NULL;
}
+ rl->valid = 1;
return OK;
}
void xt_exit_row_locks(XTRowLocksPtr rl)
{
+ if (!rl->valid)
+ return;
+
for (int i=0; i<XT_ROW_LOCK_GROUP_COUNT; i++) {
xt_spinlock_free(NULL, &rl->rl_groups[i].lg_lock);
rl->rl_groups[i].lg_wait_queue = NULL;
@@ -741,6 +745,7 @@ void xt_exit_row_locks(XTRowLocksPtr rl)
rl->rl_groups[i].lg_list = NULL;
}
}
+ rl->valid = 0;
}
/*
@@ -1424,6 +1429,7 @@ xtPublic void xt_spinxslock_init(struct XTThread *XT_UNUSED(self), XTSpinXSLockP
#endif
{
sxs->sxs_xlocked = 0;
+ sxs->sxs_xwaiter = 0;
sxs->sxs_rlock_count = 0;
sxs->sxs_wait_count = 0;
#ifdef DEBUG
@@ -2058,11 +2064,12 @@ static void lck_free_thread_data(XTThreadPtr XT_UNUSED(self), void *XT_UNUSED(da
static void lck_do_job(XTThreadPtr self, int job, XSLockTestPtr data, xtBool reader)
{
- char b1[2048], b2[2048];
+ char b1[1024], b2[1024];
switch (job) {
case JOB_MEMCPY:
- memcpy(b1, b2, 2048);
+ memset(b1, 0, sizeof(b1));
+ memset(b2, 1, sizeof(b2));
data->xs_inc++;
break;
case JOB_SLEEP:
diff --git a/storage/pbxt/src/lock_xt.h b/storage/pbxt/src/lock_xt.h
index 4e5af648c37..28737478d48 100644
--- a/storage/pbxt/src/lock_xt.h
+++ b/storage/pbxt/src/lock_xt.h
@@ -658,6 +658,7 @@ typedef struct XTLockGroup {
struct XTLockWait;
typedef struct XTRowLocks {
+ int valid;
XTLockGroupRec rl_groups[XT_ROW_LOCK_GROUP_COUNT];
void xt_cancel_temp_lock(XTLockWaitPtr lw);
diff --git a/storage/pbxt/src/memory_xt.h b/storage/pbxt/src/memory_xt.h
index 1785cd0bd51..bfc7990f914 100644
--- a/storage/pbxt/src/memory_xt.h
+++ b/storage/pbxt/src/memory_xt.h
@@ -29,8 +29,21 @@
struct XTThread;
-#ifdef DEBUG
-#define DEBUG_MEMORY
+#if (defined DEBUG)
+/*
+ Disable PBXT debug malloc on Windows, as it is not properly aligned.
+ malloc() alignment requiremebt on x64 is documented as 16 bytes. PBXT debug
+ malloc is only 8 bytes aligned. Improper alignment will lead to a crash if
+ e.g SSE instructions access heap memory.
+
+ This might be general problem , however crashes were seen so far only
+ on Windows (crash during setjmp() on memory allocated with pbxt debug malloc).
+
+ Besides, on Windows there is already a debug malloc by C runtime.
+*/
+#ifndef _WIN32
+ #define DEBUG_MEMORY
+#endif
#endif
#ifdef DEBUG_MEMORY
diff --git a/storage/pbxt/src/pthread_xt.cc b/storage/pbxt/src/pthread_xt.cc
index e7f0632e9ae..d704e977c21 100755
--- a/storage/pbxt/src/pthread_xt.cc
+++ b/storage/pbxt/src/pthread_xt.cc
@@ -396,48 +396,7 @@ xtPublic int xt_p_cond_wait(xt_cond_type *cond, xt_mutex_type *mutex)
xtPublic int xt_p_cond_timedwait(xt_cond_type *cond, xt_mutex_type *mt, struct timespec *abstime)
{
- pthread_mutex_t *mutex = &mt->mt_cs;
- int result;
- long timeout;
- union ft64 now;
-
- if (abstime != NULL) {
- GetSystemTimeAsFileTime(&now.ft);
-
- timeout = (long)((abstime->tv.i64 - now.i64) / 10000);
- if (timeout < 0)
- timeout = 0L;
- if (timeout > abstime->max_timeout_msec)
- timeout = abstime->max_timeout_msec;
- }
- else
- timeout= INFINITE;
-
- WaitForSingleObject(cond->broadcast_block_event, INFINITE);
-
- EnterCriticalSection(&cond->lock_waiting);
- cond->waiting++;
- LeaveCriticalSection(&cond->lock_waiting);
-
- LeaveCriticalSection(mutex);
-
- result= WaitForMultipleObjects(2, cond->events, FALSE, timeout);
-
- EnterCriticalSection(&cond->lock_waiting);
- cond->waiting--;
-
- if (cond->waiting == 0) {
- /* The last waiter must reset the broadcast
- * state (whther there was a broadcast or not)!
- */
- ResetEvent(cond->events[xt_cond_type::BROADCAST]);
- SetEvent(cond->broadcast_block_event);
- }
- LeaveCriticalSection(&cond->lock_waiting);
-
- EnterCriticalSection(mutex);
-
- return result == WAIT_TIMEOUT ? ETIMEDOUT : 0;
+ return pthread_cond_timedwait(cond, &mt->mt_cs, abstime);
}
xtPublic int xt_p_join(pthread_t thread, void **value)
@@ -547,42 +506,23 @@ xtPublic void xt_p_init_threading(void)
xtPublic int xt_p_set_low_priority(pthread_t thr)
{
- if (pth_min_priority == pth_max_priority) {
- /* Under Linux the priority of normal (non-runtime)
- * threads are set using the standard methods
- * for setting process priority.
- */
-
- /* We could set who == 0 because it should have the same affect
- * as using the PID.
- */
-
- /* -20 = highest, 20 = lowest */
- if (setpriority(PRIO_PROCESS, getpid(), 20) == -1)
- return errno;
- return 0;
- }
- return pth_set_priority(thr, pth_min_priority);
+ if (pth_min_priority != pth_max_priority)
+ return pth_set_priority(thr, pth_min_priority);
+ return 0;
}
xtPublic int xt_p_set_normal_priority(pthread_t thr)
{
- if (pth_min_priority == pth_max_priority) {
- if (setpriority(PRIO_PROCESS, getpid(), 0) == -1)
- return errno;
- return 0;
- }
- return pth_set_priority(thr, pth_normal_priority);
+ if (pth_min_priority != pth_max_priority)
+ return pth_set_priority(thr, pth_normal_priority);
+ return 0;
}
xtPublic int xt_p_set_high_priority(pthread_t thr)
{
- if (pth_min_priority == pth_max_priority) {
- if (setpriority(PRIO_PROCESS, getpid(), -20) == -1)
- return errno;
- return 0;
- }
- return pth_set_priority(thr, pth_max_priority);
+ if (pth_min_priority != pth_max_priority)
+ return pth_set_priority(thr, pth_max_priority);
+ return 0;
}
#ifdef DEBUG_LOCKING
diff --git a/storage/pbxt/src/table_xt.cc b/storage/pbxt/src/table_xt.cc
index c6eaeeba2a1..443fc3ee193 100644
--- a/storage/pbxt/src/table_xt.cc
+++ b/storage/pbxt/src/table_xt.cc
@@ -726,7 +726,7 @@ xtPublic void xt_check_tables(XTThreadPtr self)
{
u_int edx;
XTTableEntryPtr te_ptr;
- volatile XTTableHPtr tab;
+ volatile XTTableHPtr tab= 0;
char path[PATH_MAX];
enter_();
@@ -1132,7 +1132,7 @@ static int tab_new_handle(XTThreadPtr self, XTTableHPtr *r_tab, XTDatabaseHPtr d
XTOpenFilePtr of_rec, of_ind;
XTTableEntryPtr te_ptr;
size_t tab_format_offset;
- size_t tab_head_size;
+ size_t tab_head_size= 0;
enter_();
@@ -1755,6 +1755,8 @@ xtPublic void xt_drop_table(XTThreadPtr self, XTPathStrPtr tab_name, xtBool drop
tab_close_mapped_files(self, tab);
tab_delete_table_files(self, tab_name, tab_id);
+ /* Remove table from "repair-pending" */
+ xt_tab_table_repaired(tab);
ASSERT(xt_get_self() == self);
if ((te_ptr = (XTTableEntryPtr) xt_sl_find(self, db->db_table_by_id, &tab_id))) {
@@ -1822,8 +1824,8 @@ xtPublic void xt_tab_check_free_lists(XTThreadPtr self, XTOpenTablePtr ot, bool
}
if (free_count != tab->tab_rec_fnum) {
if (correct_count) {
- tab->tab_rec_fnum = free_count;
- tab->tab_head_rec_fnum = free_count;
+ tab->tab_rec_fnum = (uint) free_count;
+ tab->tab_head_rec_fnum = (uint) free_count;
tab->tab_flush_pending = TRUE;
xt_logf(XT_NT_INFO, "Table %s: free record count (%llu) has been set to the number of records on the list: %llu\n", table_name, (u_llong) tab->tab_rec_fnum, (u_llong) free_count);
}
@@ -1875,8 +1877,8 @@ xtPublic void xt_tab_check_free_lists(XTThreadPtr self, XTOpenTablePtr ot, bool
* The correct way to do this at run time would be to add the change to the
* transaction log, so that it is applied by the writer.
*/
- tab->tab_row_fnum = free_count;
- tab->tab_head_row_fnum = free_count;
+ tab->tab_row_fnum = (uint) free_count;
+ tab->tab_head_row_fnum = (uint) free_count;
tab->tab_flush_pending = TRUE;
xt_logf(XT_NT_INFO, "Table %s: free row count (%llu) has been set to the number of rows on the list: %llu\n", table_name, (u_llong) tab->tab_row_fnum, (u_llong) free_count);
}
@@ -4450,10 +4452,10 @@ xtPublic int xt_tab_maybe_committed(XTOpenTablePtr ot, xtRecordID rec_id, xtXact
xtXactID rec_xn_id = 0;
xtBool wait = FALSE;
xtXactID wait_xn_id = 0;
- xtRowID row_id;
+ xtRowID row_id= 0;
xtRecordID var_rec_id;
xtXactID xn_id;
- register XTTableHPtr tab;
+ register XTTableHPtr tab = 0;
#ifdef TRACE_VARIATIONS_IN_DUP_CHECK
char t_buf[500];
int len;
@@ -4628,7 +4630,8 @@ xtPublic int xt_tab_maybe_committed(XTOpenTablePtr ot, xtRecordID rec_id, xtXact
return FALSE;
failed:
- XT_TAB_ROW_UNLOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], ot->ot_thread);
+ if (tab)
+ XT_TAB_ROW_UNLOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], ot->ot_thread);
return XT_ERR;
}
diff --git a/storage/pbxt/src/thread_xt.cc b/storage/pbxt/src/thread_xt.cc
index 52c2c6c29c5..eb9941f13fb 100644
--- a/storage/pbxt/src/thread_xt.cc
+++ b/storage/pbxt/src/thread_xt.cc
@@ -54,6 +54,9 @@ void xt_db_exit_thread(XTThreadPtr self);
static void thr_accumulate_statistics(XTThreadPtr self);
+#ifdef _WIN32
+#include <my_sys.h>
+#endif
/*
* -----------------------------------------------------------------------
* THREAD GLOBALS
@@ -1962,18 +1965,7 @@ xtPublic xtBool xt_timed_wait_cond(XTThreadPtr self, xt_cond_type *cond, xt_mute
XTThreadPtr me = self ? self : xt_get_self();
#ifdef XT_WIN
- union ft64 now;
-
- GetSystemTimeAsFileTime(&now.ft);
-
- /* System time is measured in 100ns units.
- * This calculation will be reversed by the Windows implementation
- * of pthread_cond_timedwait(), in order to extract the
- * milli-second timeout!
- */
- abstime.tv.i64 = now.i64 + (milli_sec * 10000);
-
- abstime.max_timeout_msec = milli_sec;
+ set_timespec_nsec(abstime, 1000000ULL* milli_sec);
#else
struct timeval now;
u_llong micro_sec;
diff --git a/storage/pbxt/src/thread_xt.h b/storage/pbxt/src/thread_xt.h
index a07f7b7ae01..282df46a5d5 100644
--- a/storage/pbxt/src/thread_xt.h
+++ b/storage/pbxt/src/thread_xt.h
@@ -299,6 +299,9 @@ typedef struct XTThread {
xtBool st_stat_ended; /* TRUE if the statement was ended. */
xtBool st_stat_trans; /* TRUE if a statement transaction is running (started on UPDATE). */
xtBool st_stat_modify; /* TRUE if the statement is an INSERT/UPDATE/DELETE */
+ xtBool st_commit_ordered; /* TRUE if we have run commit_ordered() */
+ xtBool st_delayed_error; /* TRUE if we got an error in commit_ordered() */
+ xtBool st_writer; /* Copy of thread->st_xact_writer (which is clobbered by xlog_append()) */
#ifdef XT_IMPLEMENT_NO_ACTION
XTBasicListRec st_restrict_list; /* These records have been deleted and should have no reference. */
#endif
diff --git a/storage/pbxt/src/xaction_xt.cc b/storage/pbxt/src/xaction_xt.cc
index 48abc5d2b66..fd7ae88a4ae 100644
--- a/storage/pbxt/src/xaction_xt.cc
+++ b/storage/pbxt/src/xaction_xt.cc
@@ -1123,7 +1123,6 @@ xtPublic void xt_xn_init_db(XTThreadPtr self, XTDatabaseHPtr db)
*/
for (u_int i=0; i<XT_XN_NO_OF_SEGMENTS; i++) {
seg = &db->db_xn_idx[i];
- XT_XACT_INIT_LOCK(self, &seg->xs_tab_lock);
seg->xs_last_xn_id = db->db_xn_curr_id;
}
@@ -1287,27 +1286,61 @@ xtPublic xtBool xt_xn_begin(XTThreadPtr self)
return OK;
}
-static xtBool xn_end_xact(XTThreadPtr thread, u_int status)
+static void xn_end_release_locks(XTThreadPtr thread)
+{
+ XTXactDataPtr xact = thread->st_xact_data;
+ XTDatabaseHPtr db = thread->st_database;
+ ASSERT_NS(xact);
+
+ /* {REMOVE-LOCKS} Drop locks if you have any: */
+ thread->st_lock_list.xt_remove_all_locks(db, thread);
+
+ /* Do this afterwards to make sure the sweeper
+ * does not cleanup transactions start cleaning up
+ * before any transactions that were waiting for
+ * this transaction have completed!
+ */
+ xact->xd_end_xn_id = db->db_xn_curr_id;
+
+ /* Now you can sweep! */
+ xact->xd_flags |= XT_XN_XAC_SWEEP;
+}
+
+/* The commit is split into two phases: one "fast" for MariaDB commit_ordered(),
+ * and one "slow" for commit(). When not using internal 2pc, there is only one
+ * call combining both phases.
+ */
+
+enum {
+ XN_END_PHASE_FAST = 1,
+ XN_END_PHASE_SLOW = 2,
+ XN_END_PHASE_BOTH = 3
+};
+
+static xtBool xn_end_xact(XTThreadPtr thread, u_int status, xtBool writer, int phase)
{
XTXactDataPtr xact;
xtBool ok = TRUE;
+ xtBool err;
ASSERT_NS(thread->st_xact_data);
if ((xact = thread->st_xact_data)) {
XTDatabaseHPtr db = thread->st_database;
xtXactID xn_id = xact->xd_start_xn_id;
- xtBool writer;
- if ((writer = thread->st_xact_writer)) {
+ if (writer) {
/* The transaction wrote something: */
XTXactEndEntryDRec entry;
xtWord4 sum;
- sum = XT_CHECKSUM4_XACT(xn_id) ^ XT_CHECKSUM4_XACT(0);
- entry.xe_status_1 = status;
- entry.xe_checksum_1 = XT_CHECKSUM_1(sum);
- XT_SET_DISK_4(entry.xe_xact_id_4, xn_id);
- XT_SET_DISK_4(entry.xe_not_used_4, 0);
+ if (phase & XN_END_PHASE_FAST)
+ {
+ sum = XT_CHECKSUM4_XACT(xn_id) ^ XT_CHECKSUM4_XACT(0);
+ entry.xe_status_1 = status;
+ entry.xe_checksum_1 = XT_CHECKSUM_1(sum);
+ XT_SET_DISK_4(entry.xe_xact_id_4, xn_id);
+ XT_SET_DISK_4(entry.xe_not_used_4, 0);
+ }
#ifdef XT_IMPLEMENT_NO_ACTION
/* This will check any resticts that have been delayed to the end of the statement. */
@@ -1319,20 +1352,35 @@ static xtBool xn_end_xact(XTThreadPtr thread, u_int status)
}
#endif
- /* Flush the data log: */
- if (!thread->st_dlog_buf.dlb_flush_log(TRUE, thread)) {
+ /* Flush the data log (in the "fast" case we already did it in prepare: */
+ if ((phase & XN_END_PHASE_SLOW) && !thread->st_dlog_buf.dlb_flush_log(TRUE, thread)) {
ok = FALSE;
status = XT_LOG_ENT_ABORT;
}
/* Write and flush the transaction log: */
- if (!xt_xlog_log_data(thread, sizeof(XTXactEndEntryDRec), (XTXactLogBufferDPtr) &entry, xt_db_flush_log_at_trx_commit)) {
+ if (phase == XN_END_PHASE_FAST) {
+ /* Fast phase, delay any write or flush to later. */
+ err = !xt_xlog_log_data(thread, sizeof(XTXactEndEntryDRec), (XTXactLogBufferDPtr) &entry, XT_XLOG_NO_WRITE_NO_FLUSH);
+ } else if (phase == XN_END_PHASE_SLOW) {
+ /* We already appended the commit record in the fast phase.
+ * Now just call with empty record to ensure we write/flush
+ * the log as needed for this commit.
+ */
+ err = !xt_xlog_log_data(thread, 0, NULL, xt_db_flush_log_at_trx_commit);
+ } else /* phase == XN_END_PHASE_BOTH */ {
+ /* Both phases at once, append commit record and write/flush normally. */
+ ASSERT_NS(phase == XN_END_PHASE_BOTH);
+ err = !xt_xlog_log_data(thread, sizeof(XTXactEndEntryDRec), (XTXactLogBufferDPtr) &entry, xt_db_flush_log_at_trx_commit);
+ }
+
+ if (err) {
ok = FALSE;
status = XT_LOG_ENT_ABORT;
/* Make sure this is done, if we failed to log
* the transction end!
*/
- if (thread->st_xact_writer) {
+ if (writer) {
/* Adjust this in case of error, but don't forget
* to lock!
*/
@@ -1347,46 +1395,46 @@ static xtBool xn_end_xact(XTThreadPtr thread, u_int status)
}
}
- /* Setting this flag completes the transaction,
- * Do this before we release the locks, because
- * the unlocked transactions expect the
- * transaction they are waiting for to be
- * gone!
- */
- xact->xd_end_time = ++db->db_xn_end_time;
- if (status == XT_LOG_ENT_COMMIT) {
- thread->st_statistics.st_commits++;
- xact->xd_flags |= (XT_XN_XAC_COMMITTED | XT_XN_XAC_ENDED);
- }
- else {
- thread->st_statistics.st_rollbacks++;
- xact->xd_flags |= XT_XN_XAC_ENDED;
+ if (phase & XN_END_PHASE_FAST) {
+ /* Setting this flag completes the transaction,
+ * Do this before we release the locks, because
+ * the unlocked transactions expect the
+ * transaction they are waiting for to be
+ * gone!
+ */
+ xact->xd_end_time = ++db->db_xn_end_time;
+ if (status == XT_LOG_ENT_COMMIT) {
+ thread->st_statistics.st_commits++;
+ xact->xd_flags |= (XT_XN_XAC_COMMITTED | XT_XN_XAC_ENDED);
+ }
+ else {
+ thread->st_statistics.st_rollbacks++;
+ xact->xd_flags |= XT_XN_XAC_ENDED;
+ }
}
- /* {REMOVE-LOCKS} Drop locks is you have any: */
- thread->st_lock_list.xt_remove_all_locks(db, thread);
-
- /* Do this afterwards to make sure the sweeper
- * does not cleanup transactions start cleaning up
- * before any transactions that were waiting for
- * this transaction have completed!
+ /* Be as fast as possible in the "fast" path, as we want to be as
+ * fast as possible here (we will release slow locks immediately
+ * after in the "slow" part).
+ * ToDo: If we ran the fast part, the slow part could release locks
+ * _before_ fsync(), rather than after.
*/
- xact->xd_end_xn_id = db->db_xn_curr_id;
+ if (!(phase & XN_END_PHASE_SLOW))
+ return ok;
- /* Now you can sweep! */
- xact->xd_flags |= XT_XN_XAC_SWEEP;
+ xn_end_release_locks(thread);
}
else {
/* Read-only transaction can be removed, immediately */
- xact->xd_end_time = ++db->db_xn_end_time;
- xact->xd_flags |= (XT_XN_XAC_COMMITTED | XT_XN_XAC_ENDED);
-
- /* Drop locks is you have any: */
- thread->st_lock_list.xt_remove_all_locks(db, thread);
+ if (phase & XN_END_PHASE_FAST) {
+ xact->xd_end_time = ++db->db_xn_end_time;
+ xact->xd_flags |= (XT_XN_XAC_COMMITTED | XT_XN_XAC_ENDED);
- xact->xd_end_xn_id = db->db_xn_curr_id;
+ if (!(phase & XN_END_PHASE_SLOW))
+ return ok;
+ }
- xact->xd_flags |= XT_XN_XAC_SWEEP;
+ xn_end_release_locks(thread);
if (xt_xn_delete_xact(db, xn_id, thread)) {
if (db->db_xn_min_ram_id == xn_id)
@@ -1478,12 +1526,22 @@ static xtBool xn_end_xact(XTThreadPtr thread, u_int status)
xtPublic xtBool xt_xn_commit(XTThreadPtr thread)
{
- return xn_end_xact(thread, XT_LOG_ENT_COMMIT);
+ return xn_end_xact(thread, XT_LOG_ENT_COMMIT, thread->st_xact_writer, XN_END_PHASE_BOTH);
+}
+
+xtPublic xtBool xt_xn_commit_fast(XTThreadPtr thread, xtBool writer)
+{
+ return xn_end_xact(thread, XT_LOG_ENT_COMMIT, writer, XN_END_PHASE_FAST);
+}
+
+xtPublic xtBool xt_xn_commit_slow(XTThreadPtr thread, xtBool writer)
+{
+ return xn_end_xact(thread, XT_LOG_ENT_COMMIT, writer, XN_END_PHASE_SLOW);
}
xtPublic xtBool xt_xn_rollback(XTThreadPtr thread)
{
- return xn_end_xact(thread, XT_LOG_ENT_ABORT);
+ return xn_end_xact(thread, XT_LOG_ENT_ABORT, thread->st_xact_writer, XN_END_PHASE_BOTH);
}
xtPublic xtBool xt_xn_log_tab_id(XTThreadPtr self, xtTableID tab_id)
diff --git a/storage/pbxt/src/xaction_xt.h b/storage/pbxt/src/xaction_xt.h
index e679a0f38f0..cd350200506 100644
--- a/storage/pbxt/src/xaction_xt.h
+++ b/storage/pbxt/src/xaction_xt.h
@@ -193,6 +193,8 @@ void xt_wakeup_sweeper(struct XTDatabase *db);
xtBool xt_xn_begin(struct XTThread *self);
xtBool xt_xn_commit(struct XTThread *self);
+xtBool xt_xn_commit_fast(struct XTThread *self, xtBool writer);
+xtBool xt_xn_commit_slow(struct XTThread *self, xtBool writer);
xtBool xt_xn_rollback(struct XTThread *self);
xtBool xt_xn_log_tab_id(struct XTThread *self, xtTableID tab_id);
int xt_xn_status(struct XTOpenTable *ot, xtXactID xn_id, xtRecordID rec_id);
diff --git a/storage/sphinx/ha_sphinx.cc b/storage/sphinx/ha_sphinx.cc
index 5a810e032b4..3d3c0da8496 100644
--- a/storage/sphinx/ha_sphinx.cc
+++ b/storage/sphinx/ha_sphinx.cc
@@ -2680,7 +2680,7 @@ int ha_sphinx::get_rec ( byte * buf, const byte *, uint )
if ( pCur < sBuf+sizeof(sBuf)-16 ) // 10 chars per 32bit value plus some safety bytes
{
sprintf ( pCur, "%u", uEntry );
- while ( *pCur ) *pCur++;
+ while ( *pCur ) pCur++;
if ( uValue>1 )
*pCur++ = ','; // non-trailing commas
}
diff --git a/storage/xtradb/CMakeLists.txt b/storage/xtradb/CMakeLists.txt
index 68b0e0cacc4..ecca0d05e63 100644
--- a/storage/xtradb/CMakeLists.txt
+++ b/storage/xtradb/CMakeLists.txt
@@ -190,6 +190,13 @@ ENDIF()
IF(MSVC)
ADD_DEFINITIONS(-DHAVE_WINDOWS_ATOMICS)
+
+ # Avoid "unreferenced label" warning in generated file
+ GET_FILENAME_COMPONENT(_SRC_DIR ${CMAKE_CURRENT_LIST_FILE} PATH)
+ SET_SOURCE_FILES_PROPERTIES(${_SRC_DIR}/pars/pars0grm.c
+ PROPERTIES COMPILE_FLAGS "/wd4102")
+ SET_SOURCE_FILES_PROPERTIES(${_SRC_DIR}/pars/lexyy.c
+ PROPERTIES COMPILE_FLAGS "/wd4003")
ENDIF()
diff --git a/storage/xtradb/dict/dict0load.c b/storage/xtradb/dict/dict0load.c
index c5bd84f84ad..0ec810430a4 100644
--- a/storage/xtradb/dict/dict0load.c
+++ b/storage/xtradb/dict/dict0load.c
@@ -43,7 +43,6 @@ Created 4/24/1996 Heikki Tuuri
#include "ha_prototypes.h" /* innobase_casedn_str() */
#include "trx0sys.h"
-
/** Following are six InnoDB system tables */
static const char* SYSTEM_TABLE_NAME[] = {
"SYS_TABLES",
@@ -54,6 +53,7 @@ static const char* SYSTEM_TABLE_NAME[] = {
"SYS_FOREIGN_COLS",
"SYS_STATS"
};
+
/****************************************************************//**
Compare the name of an index column.
@return TRUE if the i'th column of index is 'name'. */
diff --git a/storage/xtradb/handler/ha_innodb.cc b/storage/xtradb/handler/ha_innodb.cc
index e4208e8d0ea..0964f4925f3 100644
--- a/storage/xtradb/handler/ha_innodb.cc
+++ b/storage/xtradb/handler/ha_innodb.cc
@@ -61,6 +61,9 @@ Place, Suite 330, Boston, MA 02111-1307 USA
#include <log_event.h> // rpl_get_position_info
#endif /* MYSQL_SERVER */
+#ifdef _WIN32
+#include <io.h>
+#endif
/** @file ha_innodb.cc */
/* Include necessary InnoDB headers */
@@ -113,8 +116,6 @@ extern ib_int64_t trx_sys_mysql_relay_log_pos;
/** to protect innobase_open_files */
static mysql_mutex_t innobase_share_mutex;
-/** to force correct commit order in binlog */
-static mysql_mutex_t prepare_commit_mutex;
static ulong commit_threads = 0;
static mysql_mutex_t commit_threads_m;
static mysql_cond_t commit_cond;
@@ -122,7 +123,7 @@ static mysql_mutex_t commit_cond_m;
static bool innodb_inited = 0;
C_MODE_START
-static int index_cond_func_innodb(void *arg);
+static xtradb_icp_result_t index_cond_func_innodb(void *arg);
C_MODE_END
@@ -249,7 +250,6 @@ static const char* innobase_change_buffering_values[IBUF_USE_COUNT] = {
/* Keys to register pthread mutexes/cond in the current file with
performance schema */
static mysql_pfs_key_t innobase_share_mutex_key;
-static mysql_pfs_key_t prepare_commit_mutex_key;
static mysql_pfs_key_t commit_threads_m_key;
static mysql_pfs_key_t commit_cond_mutex_key;
static mysql_pfs_key_t commit_cond_key;
@@ -257,8 +257,7 @@ static mysql_pfs_key_t commit_cond_key;
static PSI_mutex_info all_pthread_mutexes[] = {
{&commit_threads_m_key, "commit_threads_m", 0},
{&commit_cond_mutex_key, "commit_cond_mutex", 0},
- {&innobase_share_mutex_key, "innobase_share_mutex", 0},
- {&prepare_commit_mutex_key, "prepare_commit_mutex", 0}
+ {&innobase_share_mutex_key, "innobase_share_mutex", 0}
};
static PSI_cond_info all_innodb_conds[] = {
@@ -376,6 +375,7 @@ static PSI_file_info all_innodb_files[] = {
static INNOBASE_SHARE *get_share(const char *table_name);
static void free_share(INNOBASE_SHARE *share);
static int innobase_close_connection(handlerton *hton, THD* thd);
+static void innobase_commit_ordered(handlerton *hton, THD* thd, bool all);
static int innobase_commit(handlerton *hton, THD* thd, bool all);
static int innobase_rollback(handlerton *hton, THD* thd, bool all);
static int innobase_rollback_to_savepoint(handlerton *hton, THD* thd,
@@ -632,6 +632,17 @@ bool innobase_show_status(handlerton *hton, THD* thd,
stat_print_fn* stat_print,
enum ha_stat_type stat_type);
+/* Enable / disable checkpoints */
+static int innobase_checkpoint_state(handlerton *hton, bool disable)
+{
+ if (disable)
+ (void) log_disable_checkpoint();
+ else
+ log_enable_checkpoint();
+ return 0;
+}
+
+
/*****************************************************************//**
Commits a transaction in an InnoDB database. */
static
@@ -1104,6 +1115,9 @@ convert_error_code_to_mysql(
case DB_RECORD_NOT_FOUND:
return(HA_ERR_NO_ACTIVE_RECORD);
+ case DB_SEARCH_ABORTED_BY_USER:
+ return(HA_ERR_ABORTED_BY_USER);
+
case DB_DEADLOCK:
/* Since we rolled back the whole transaction, we must
tell it also to MySQL so that MySQL knows to empty the
@@ -1675,7 +1689,6 @@ innobase_trx_init(
trx_t* trx) /*!< in/out: InnoDB transaction handle */
{
DBUG_ENTER("innobase_trx_init");
- DBUG_ASSERT(EQ_CURRENT_THD(thd));
DBUG_ASSERT(thd == trx->mysql_thd);
trx->check_foreigns = !thd_test_options(
@@ -1734,8 +1747,6 @@ check_trx_exists(
{
trx_t*& trx = thd_to_trx(thd);
- ut_ad(EQ_CURRENT_THD(thd));
-
if (trx == NULL) {
trx = innobase_trx_allocate(thd);
} else if (UNIV_UNLIKELY(trx->magic_n != TRX_MAGIC_N)) {
@@ -1787,15 +1798,15 @@ trx_is_registered_for_2pc(
}
/*********************************************************************//**
-Note that a transaction owns the prepare_commit_mutex. */
+Note that innobase_commit_ordered() was run. */
static inline
void
-trx_owns_prepare_commit_mutex_set(
+trx_set_active_commit_ordered(
/*==============================*/
trx_t* trx) /* in: transaction */
{
ut_a(trx_is_registered_for_2pc(trx));
- trx->owns_prepare_mutex = 1;
+ trx->active_commit_ordered = 1;
}
/*********************************************************************//**
@@ -1807,7 +1818,7 @@ trx_register_for_2pc(
trx_t* trx) /* in: transaction */
{
trx->is_registered = 1;
- ut_ad(trx->owns_prepare_mutex == 0);
+ ut_ad(trx->active_commit_ordered == 0);
}
/*********************************************************************//**
@@ -1819,19 +1830,18 @@ trx_deregister_from_2pc(
trx_t* trx) /* in: transaction */
{
trx->is_registered = 0;
- trx->owns_prepare_mutex = 0;
+ trx->active_commit_ordered = 0;
}
/*********************************************************************//**
-Check whether atransaction owns the prepare_commit_mutex.
-@return true if transaction owns the prepare commit mutex */
+Check whether a transaction has active_commit_ordered set */
static inline
bool
-trx_has_prepare_commit_mutex(
+trx_is_active_commit_ordered(
/*=========================*/
const trx_t* trx) /* in: transaction */
{
- return(trx->owns_prepare_mutex == 1);
+ return(trx->active_commit_ordered == 1);
}
/*********************************************************************//**
@@ -1852,7 +1862,7 @@ UNIV_INTERN
ha_innobase::ha_innobase(handlerton *hton, TABLE_SHARE *table_arg)
:handler(hton, table_arg),
int_table_flags(HA_REC_NOT_IN_SEQ |
- HA_NULL_IN_KEY |
+ HA_NULL_IN_KEY | HA_CAN_VIRTUAL_COLUMNS |
HA_CAN_INDEX_BLOBS |
HA_CAN_SQL_HANDLER |
HA_PRIMARY_KEY_REQUIRED_FOR_POSITION |
@@ -2386,12 +2396,14 @@ innobase_init(
innobase_hton->savepoint_set=innobase_savepoint;
innobase_hton->savepoint_rollback=innobase_rollback_to_savepoint;
innobase_hton->savepoint_release=innobase_release_savepoint;
+ innobase_hton->commit_ordered=innobase_commit_ordered;
innobase_hton->commit=innobase_commit;
innobase_hton->rollback=innobase_rollback;
innobase_hton->prepare=innobase_xa_prepare;
innobase_hton->recover=innobase_xa_recover;
innobase_hton->commit_by_xid=innobase_commit_by_xid;
innobase_hton->rollback_by_xid=innobase_rollback_by_xid;
+ innobase_hton->checkpoint_state= innobase_checkpoint_state;
innobase_hton->create_cursor_read_view=innobase_create_cursor_view;
innobase_hton->set_cursor_read_view=innobase_set_cursor_view;
innobase_hton->close_cursor_read_view=innobase_close_cursor_view;
@@ -2958,8 +2970,6 @@ skip_overwrite:
mysql_mutex_init(innobase_share_mutex_key,
&innobase_share_mutex,
MY_MUTEX_INIT_FAST);
- mysql_mutex_init(prepare_commit_mutex_key,
- &prepare_commit_mutex, MY_MUTEX_INIT_FAST);
mysql_mutex_init(commit_threads_m_key,
&commit_threads_m, MY_MUTEX_INIT_FAST);
mysql_mutex_init(commit_cond_mutex_key,
@@ -3010,7 +3020,6 @@ innobase_end(
srv_free_paths_and_sizes();
my_free(internal_innobase_data_file_path);
mysql_mutex_destroy(&innobase_share_mutex);
- mysql_mutex_destroy(&prepare_commit_mutex);
mysql_mutex_destroy(&commit_threads_m);
mysql_mutex_destroy(&commit_cond_m);
mysql_cond_destroy(&commit_cond);
@@ -3135,6 +3144,108 @@ innobase_start_trx_and_assign_read_view(
DBUG_RETURN(0);
}
+static
+void
+innobase_commit_ordered_2(
+/*============*/
+ trx_t* trx, /*!< in: Innodb transaction */
+ THD* thd) /*!< in: MySQL thread handle */
+{
+ ulonglong tmp_pos;
+ DBUG_ENTER("innobase_commit_ordered");
+
+ /* We need current binlog position for ibbackup to work.
+ Note, the position is current because commit_ordered is guaranteed
+ to be called in same sequenece as writing to binlog. */
+
+retry:
+ if (innobase_commit_concurrency > 0) {
+ mysql_mutex_lock(&commit_cond_m);
+ commit_threads++;
+
+ if (commit_threads > innobase_commit_concurrency) {
+ commit_threads--;
+ mysql_cond_wait(&commit_cond,
+ &commit_cond_m);
+ mysql_mutex_unlock(&commit_cond_m);
+ goto retry;
+ }
+ else {
+ mysql_mutex_unlock(&commit_cond_m);
+ }
+ }
+
+ mysql_bin_log_commit_pos(thd, &tmp_pos, &(trx->mysql_log_file_name));
+ trx->mysql_log_offset = (ib_int64_t) tmp_pos;
+
+ /* Don't do write + flush right now. For group commit
+ to work we want to do the flush in the innobase_commit()
+ method, which runs without holding any locks. */
+ trx->flush_log_later = TRUE;
+ innobase_commit_low(trx);
+ trx->flush_log_later = FALSE;
+
+ if (innobase_commit_concurrency > 0) {
+ mysql_mutex_lock(&commit_cond_m);
+ commit_threads--;
+ mysql_cond_signal(&commit_cond);
+ mysql_mutex_unlock(&commit_cond_m);
+ }
+
+ DBUG_VOID_RETURN;
+}
+
+/*****************************************************************//**
+Perform the first, fast part of InnoDB commit.
+
+Doing it in this call ensures that we get the same commit order here
+as in binlog and any other participating transactional storage engines.
+
+Note that we want to do as little as really needed here, as we run
+under a global mutex. The expensive fsync() is done later, in
+innobase_commit(), without a lock so group commit can take place.
+
+Note also that this method can be called from a different thread than
+the one handling the rest of the transaction. */
+static
+void
+innobase_commit_ordered(
+/*============*/
+ handlerton *hton, /*!< in: Innodb handlerton */
+ THD* thd, /*!< in: MySQL thread handle of the user for whom
+ the transaction should be committed */
+ bool all) /*!< in: TRUE - commit transaction
+ FALSE - the current SQL statement ended */
+{
+ trx_t* trx;
+ DBUG_ENTER("innobase_commit_ordered");
+ DBUG_ASSERT(hton == innodb_hton_ptr);
+
+ trx = check_trx_exists(thd);
+
+ /* Since we will reserve the kernel mutex, we must not be holding the
+ search system latch, or we will disobey the latching order. But we
+ already released it in innobase_xa_prepare() (if not before), so just
+ have an assert here.*/
+ ut_ad(!trx->has_search_latch);
+
+ if (!trx_is_registered_for_2pc(trx) && trx_is_started(trx)) {
+ /* We cannot throw error here; instead we will catch this error
+ again in innobase_commit() and report it from there. */
+ DBUG_VOID_RETURN;
+ }
+
+ /* commit_ordered is only called when committing the whole transaction
+ (or an SQL statement when autocommit is on). */
+ DBUG_ASSERT(all ||
+ (!thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)));
+
+ innobase_commit_ordered_2(trx, thd);
+
+ trx_set_active_commit_ordered(trx);
+ DBUG_VOID_RETURN;
+}
+
/*****************************************************************//**
Commits a transaction in an InnoDB database or marks an SQL statement
ended.
@@ -3160,7 +3271,7 @@ innobase_commit(
/* Since we will reserve the kernel mutex, we have to release
the search system latch first to obey the latching order. */
- if (trx->has_search_latch) {
+ if (trx->has_search_latch && !trx_is_active_commit_ordered(trx)) {
trx_search_latch_release_if_reserved(trx);
}
@@ -3178,68 +3289,18 @@ innobase_commit(
if (all
|| (!thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) {
- /* We were instructed to commit the whole transaction, or
- this is an SQL statement end and autocommit is on */
-
- /* We need current binlog position for ibbackup to work.
- Note, the position is current because of
- prepare_commit_mutex */
-retry:
- if (innobase_commit_concurrency > 0) {
- mysql_mutex_lock(&commit_cond_m);
- commit_threads++;
-
- if (commit_threads > innobase_commit_concurrency) {
- commit_threads--;
- mysql_cond_wait(&commit_cond,
- &commit_cond_m);
- mysql_mutex_unlock(&commit_cond_m);
- goto retry;
- }
- else {
- mysql_mutex_unlock(&commit_cond_m);
- }
- }
-
- /* The following calls to read the MySQL binary log
- file name and the position return consistent results:
- 1) Other InnoDB transactions cannot intervene between
- these calls as we are holding prepare_commit_mutex.
- 2) Binary logging of other engines is not relevant
- to InnoDB as all InnoDB requires is that committing
- InnoDB transactions appear in the same order in the
- MySQL binary log as they appear in InnoDB logs.
- 3) A MySQL log file rotation cannot happen because
- MySQL protects against this by having a counter of
- transactions in prepared state and it only allows
- a rotation when the counter drops to zero. See
- LOCK_prep_xids and COND_prep_xids in log.cc. */
- trx->mysql_log_file_name = mysql_bin_log_file_name();
- trx->mysql_log_offset = (ib_int64_t) mysql_bin_log_file_pos();
-
- /* Don't do write + flush right now. For group commit
- to work we want to do the flush after releasing the
- prepare_commit_mutex. */
- trx->flush_log_later = TRUE;
- innobase_commit_low(trx);
- trx->flush_log_later = FALSE;
-
- if (innobase_commit_concurrency > 0) {
- mysql_mutex_lock(&commit_cond_m);
- commit_threads--;
- mysql_cond_signal(&commit_cond);
- mysql_mutex_unlock(&commit_cond_m);
+ /* Run the fast part of commit if we did not already. */
+ if (!trx_is_active_commit_ordered(trx)) {
+ innobase_commit_ordered_2(trx, thd);
}
- if (trx_has_prepare_commit_mutex(trx)) {
-
- mysql_mutex_unlock(&prepare_commit_mutex);
- }
-
- trx_deregister_from_2pc(trx);
+ /* We were instructed to commit the whole transaction, or
+ this is an SQL statement end and autocommit is on */
- /* Now do a write + flush of logs. */
+ /* We did the first part already in innobase_commit_ordered(),
+ Now finish by doing a write + flush of logs. */
trx_commit_complete_for_mysql(trx);
+ trx_deregister_from_2pc(trx);
} else {
/* We just mark the SQL statement ended and do not do a
transaction commit */
@@ -3615,12 +3676,15 @@ UNIV_INTERN
ulong
ha_innobase::index_flags(
/*=====================*/
- uint,
- uint,
- bool)
+ uint index,
+ uint part,
+ bool all_parts)
const
{
- return(HA_READ_NEXT | HA_READ_PREV | HA_READ_ORDER
+ ulong extra_flag= 0;
+ if (table && index == table->s->primary_key)
+ extra_flag= HA_CLUSTERED_INDEX;
+ return(HA_READ_NEXT | HA_READ_PREV | HA_READ_ORDER | extra_flag
| HA_READ_RANGE | HA_KEYREAD_ONLY | HA_DO_INDEX_COND_PUSHDOWN);
}
@@ -4273,6 +4337,7 @@ retry:
of length ref_length! */
if (!row_table_got_default_clust_index(ib_table)) {
+
prebuilt->clust_index_was_generated = FALSE;
if (UNIV_UNLIKELY(primary_key >= MAX_KEY)) {
@@ -4678,12 +4743,24 @@ get_innobase_type_from_mysql_type(
case MYSQL_TYPE_SHORT:
case MYSQL_TYPE_INT24:
case MYSQL_TYPE_DATE:
- case MYSQL_TYPE_DATETIME:
case MYSQL_TYPE_YEAR:
case MYSQL_TYPE_NEWDATE:
+ return(DATA_INT);
+
case MYSQL_TYPE_TIME:
+ case MYSQL_TYPE_DATETIME:
case MYSQL_TYPE_TIMESTAMP:
- return(DATA_INT);
+ /*
+ XtraDB should ideally just check field->keytype() and never
+ field->type(). The following check is here to only
+ change the new hires datetime/timestamp/time fields to
+ use DATA_FIXBINARY. We can't convert this function to
+ just test for field->keytype() as then the check if a
+ table is compatible will fail for old tables.
+ */
+ if (field->key_type() == HA_KEYTYPE_BINARY)
+ return(DATA_FIXBINARY);
+ return(DATA_INT);
case MYSQL_TYPE_FLOAT:
return(DATA_FLOAT);
case MYSQL_TYPE_DOUBLE:
@@ -4697,10 +4774,7 @@ get_innobase_type_from_mysql_type(
case MYSQL_TYPE_LONG_BLOB:
return(DATA_BLOB);
case MYSQL_TYPE_NULL:
- /* MySQL currently accepts "NULL" datatype, but will
- reject such datatype in the next release. We will cope
- with it and not trigger assertion failure in 5.1 */
- break;
+ return(DATA_FIXBINARY);
default:
ut_error;
}
@@ -9387,6 +9461,7 @@ ha_innobase::extra(
pushed_idx_cond= FALSE;
pushed_idx_cond_keyno= MAX_KEY;
prebuilt->idx_cond_func= NULL;
+ in_range_check_pushed_down= FALSE;
break;
case HA_EXTRA_NO_KEYREAD:
prebuilt->read_just_key = 0;
@@ -9437,6 +9512,7 @@ ha_innobase::reset()
/* Reset index condition pushdown state */
pushed_idx_cond_keyno= MAX_KEY;
pushed_idx_cond= NULL;
+ in_range_check_pushed_down= FALSE;
ds_mrr.dsmrr_close();
prebuilt->idx_cond_func= NULL;
@@ -10881,33 +10957,6 @@ innobase_xa_prepare(
srv_active_wake_master_thread();
- if (thd_sql_command(thd) != SQLCOM_XA_PREPARE
- && (all
- || !thd_test_options(
- thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) {
-
- /* For ibbackup to work the order of transactions in binlog
- and InnoDB must be the same. Consider the situation
-
- thread1> prepare; write to binlog; ...
- <context switch>
- thread2> prepare; write to binlog; commit
- thread1> ... commit
-
- To ensure this will not happen we're taking the mutex on
- prepare, and releasing it on commit.
-
- Note: only do it for normal commits, done via ha_commit_trans.
- If 2pc protocol is executed by external transaction
- coordinator, it will be just a regular MySQL client
- executing XA PREPARE and XA COMMIT commands.
- In this case we cannot know how many minutes or hours
- will be between XA PREPARE and XA COMMIT, and we don't want
- to block for undefined period of time. */
- mysql_mutex_lock(&prepare_commit_mutex);
- trx_owns_prepare_commit_mutex_set(trx);
- }
-
return(error);
}
@@ -12230,7 +12279,7 @@ static MYSQL_SYSVAR_ENUM(corrupt_table_action, srv_pass_corrupt_table,
"except for the deletion.",
NULL, NULL, 0, &corrupt_table_action_typelib);
-static MYSQL_SYSVAR_ULONG(lazy_drop_table, srv_lazy_drop_table,
+static MYSQL_SYSVAR_ULINT(lazy_drop_table, srv_lazy_drop_table,
PLUGIN_VAR_RQCMDARG,
"At deleting tablespace, only miminum needed processes at the time are done. "
"e.g. for http://bugs.mysql.com/51325",
@@ -12563,12 +12612,13 @@ test_innobase_convert_name()
*/
int ha_innobase::multi_range_read_init(RANGE_SEQ_IF *seq, void *seq_init_param,
- uint n_ranges, uint mode, HANDLER_BUFFER *buf)
+ uint n_ranges, uint mode,
+ HANDLER_BUFFER *buf)
{
return ds_mrr.dsmrr_init(this, seq, seq_init_param, n_ranges, mode, buf);
}
-int ha_innobase::multi_range_read_next(char **range_info)
+int ha_innobase::multi_range_read_next(range_id_t *range_info)
{
return ds_mrr.dsmrr_next(range_info);
}
@@ -12590,16 +12640,29 @@ ha_rows ha_innobase::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
return res;
}
-ha_rows ha_innobase::multi_range_read_info(uint keyno, uint n_ranges,
- uint keys, uint *bufsz,
+ha_rows ha_innobase::multi_range_read_info(uint keyno, uint n_ranges, uint keys,
+ uint key_parts, uint *bufsz,
uint *flags, COST_VECT *cost)
{
ds_mrr.init(this, table);
- ha_rows res= ds_mrr.dsmrr_info(keyno, n_ranges, keys, bufsz, flags, cost);
+ ha_rows res= ds_mrr.dsmrr_info(keyno, n_ranges, keys, key_parts, bufsz,
+ flags, cost);
return res;
}
+int ha_innobase::multi_range_read_explain_info(uint mrr_mode, char *str, size_t size)
+{
+ return ds_mrr.dsmrr_explain_info(mrr_mode, str, size);
+}
+
+/*
+ A helper function used only in index_cond_func_innodb
+*/
+bool ha_innobase::is_thd_killed()
+{
+ return thd_killed(user_thd);
+}
/**
* Index Condition Pushdown interface implementation
@@ -12612,15 +12675,18 @@ C_MODE_START
See note on ICP_RESULT for return values description.
*/
-static int index_cond_func_innodb(void *arg)
+static xtradb_icp_result_t index_cond_func_innodb(void *arg)
{
ha_innobase *h= (ha_innobase*)arg;
+ if (h->is_thd_killed())
+ return XTRADB_ICP_ABORTED_BY_USER;
+
if (h->end_range)
{
if (h->compare_key2(h->end_range) > 0)
- return ICP_OUT_OF_RANGE; /* caller should return HA_ERR_END_OF_FILE already */
+ return XTRADB_ICP_OUT_OF_RANGE; /* caller should return HA_ERR_END_OF_FILE already */
}
- return h->pushed_idx_cond->val_int()? ICP_MATCH : ICP_NO_MATCH;
+ return h->pushed_idx_cond->val_int()? XTRADB_ICP_MATCH : XTRADB_ICP_NO_MATCH;
}
C_MODE_END
@@ -12628,7 +12694,7 @@ C_MODE_END
Item *ha_innobase::idx_cond_push(uint keyno_arg, Item* idx_cond_arg)
{
- if ((keyno_arg != primary_key) && (prebuilt->select_lock_type == LOCK_NONE))
+ if (keyno_arg != primary_key && prebuilt->select_lock_type != LOCK_X)
{
pushed_idx_cond_keyno= keyno_arg;
pushed_idx_cond= idx_cond_arg;
diff --git a/storage/xtradb/handler/ha_innodb.h b/storage/xtradb/handler/ha_innodb.h
index d2da3df6422..f368d08f954 100644
--- a/storage/xtradb/handler/ha_innodb.h
+++ b/storage/xtradb/handler/ha_innodb.h
@@ -233,16 +233,21 @@ public:
*/
int multi_range_read_init(RANGE_SEQ_IF *seq, void *seq_init_param,
uint n_ranges, uint mode, HANDLER_BUFFER *buf);
- int multi_range_read_next(char **range_info);
+ int multi_range_read_next(range_id_t *range_info);
ha_rows multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
void *seq_init_param,
uint n_ranges, uint *bufsz,
uint *flags, COST_VECT *cost);
ha_rows multi_range_read_info(uint keyno, uint n_ranges, uint keys,
- uint *bufsz, uint *flags, COST_VECT *cost);
+ uint key_parts, uint *bufsz,
+ uint *flags, COST_VECT *cost);
+ int multi_range_read_explain_info(uint mrr_mode, char *str, size_t size);
DsMrr_impl ds_mrr;
Item *idx_cond_push(uint keyno, Item* idx_cond);
+
+ /* An helper function for index_cond_func_innodb: */
+ bool is_thd_killed();
};
/* Some accessor functions which the InnoDB plugin needs, but which
@@ -257,16 +262,6 @@ extern "C" {
struct charset_info_st *thd_charset(MYSQL_THD thd);
LEX_STRING *thd_query_string(MYSQL_THD thd);
-/** Get the file name of the MySQL binlog.
- * @return the name of the binlog file
- */
-const char* mysql_bin_log_file_name(void);
-
-/** Get the current position of the MySQL binlog.
- * @return byte offset from the beginning of the binlog
- */
-ulonglong mysql_bin_log_file_pos(void);
-
/**
Check if a user thread is a replication slave thread
@param thd user thread
@@ -313,6 +308,11 @@ bool thd_binlog_filter_ok(const MYSQL_THD thd);
bool thd_sqlcom_can_generate_row_events(const MYSQL_THD thd);
}
+/** Get the file name and position of the MySQL binlog corresponding to the
+ * current commit.
+ */
+extern void mysql_bin_log_commit_pos(THD *thd, ulonglong *out_pos, const char **out_file);
+
typedef struct trx_struct trx_t;
/********************************************************************//**
@file handler/ha_innodb.h
diff --git a/storage/xtradb/handler/i_s.cc b/storage/xtradb/handler/i_s.cc
index 6154ddb3cb4..4f19debdd0d 100644
--- a/storage/xtradb/handler/i_s.cc
+++ b/storage/xtradb/handler/i_s.cc
@@ -164,7 +164,7 @@ field_store_time_t(
my_time.time_type = MYSQL_TIMESTAMP_DATETIME;
#endif
- return(field->store_time(&my_time, MYSQL_TIMESTAMP_DATETIME));
+ return(field->store_time(&my_time));
}
/*******************************************************************//**
diff --git a/storage/xtradb/include/db0err.h b/storage/xtradb/include/db0err.h
index 74a2354bce3..18ff74a025e 100644
--- a/storage/xtradb/include/db0err.h
+++ b/storage/xtradb/include/db0err.h
@@ -120,7 +120,8 @@ enum db_err {
DB_STRONG_FAIL,
DB_ZIP_OVERFLOW,
DB_RECORD_NOT_FOUND = 1500,
- DB_END_OF_INDEX
+ DB_END_OF_INDEX,
+ DB_SEARCH_ABORTED_BY_USER= 1533
};
#endif
diff --git a/storage/xtradb/include/fsp0types.h b/storage/xtradb/include/fsp0types.h
index 43e385b7eb0..6678dacb547 100644
--- a/storage/xtradb/include/fsp0types.h
+++ b/storage/xtradb/include/fsp0types.h
@@ -42,7 +42,7 @@ fseg_alloc_free_page) */
/* @} */
/** File space extent size (one megabyte) in pages */
-#define FSP_EXTENT_SIZE ((ulint)1 << (20 - UNIV_PAGE_SIZE_SHIFT))
+#define FSP_EXTENT_SIZE (1ULL << (20 - UNIV_PAGE_SIZE_SHIFT))
/** On a page of any file segment, data may be put starting from this
offset */
diff --git a/storage/xtradb/include/log0log.h b/storage/xtradb/include/log0log.h
index e0bffe6f725..281dd61a105 100644
--- a/storage/xtradb/include/log0log.h
+++ b/storage/xtradb/include/log0log.h
@@ -249,12 +249,15 @@ log_checkpoint(
/*===========*/
ibool sync, /*!< in: TRUE if synchronous operation is
desired */
- ibool write_always); /*!< in: the function normally checks if the
+ ibool write_always, /*!< in: the function normally checks if the
the new checkpoint would have a greater
lsn than the previous one: if not, then no
physical write is done; by setting this
parameter TRUE, a physical write will always be
made to log files */
+ ibool safe_to_ignore);/*!< in: TRUE if checkpoint can be ignored in
+ the case checkpoint's are disabled */
+
/****************************************************************//**
Makes a checkpoint at a given lsn or later. */
UNIV_INTERN
@@ -272,6 +275,18 @@ log_make_checkpoint_at(
physical write will always be made to
log files */
/****************************************************************//**
+Disable checkpoints. This is used when doing a volume snapshot
+to ensure that we don't get checkpoint between snapshoting two
+different volumes */
+UNIV_INTERN
+ibool log_disable_checkpoint();
+
+/****************************************************************//**
+Enable checkpoints that was disabled with log_disable_checkpoint() */
+UNIV_INTERN
+void log_enable_checkpoint();
+
+/****************************************************************//**
Makes a checkpoint at the latest lsn and writes it to first page of each
data file in the database, so that we know that the file spaces contain
all modifications up to that lsn. This can only be called at database
diff --git a/storage/xtradb/include/os0file.h b/storage/xtradb/include/os0file.h
index b778adaa809..c476d14d51d 100644
--- a/storage/xtradb/include/os0file.h
+++ b/storage/xtradb/include/os0file.h
@@ -147,8 +147,8 @@ log. */
#define OS_FILE_LOG 256 /* This can be ORed to type */
/* @} */
-#define OS_AIO_N_PENDING_IOS_PER_THREAD 32 /*!< Win NT does not allow more
- than 64 */
+#define OS_AIO_N_PENDING_IOS_PER_THREAD 256 /*!< Windows might be able to handle
+more */
/** Modes for aio operations @{ */
#define OS_AIO_NORMAL 21 /*!< Normal asynchronous i/o not for ibuf
diff --git a/storage/xtradb/include/row0mysql.h b/storage/xtradb/include/row0mysql.h
index ff00f031e91..a75aea1d046 100644
--- a/storage/xtradb/include/row0mysql.h
+++ b/storage/xtradb/include/row0mysql.h
@@ -596,7 +596,16 @@ struct mysql_row_templ_struct {
#define ROW_PREBUILT_ALLOCATED 78540783
#define ROW_PREBUILT_FREED 26423527
-typedef int (*idx_cond_func_t)(void *param);
+
+typedef enum xtradb_icp_result {
+ XTRADB_ICP_ERROR=-1,
+ XTRADB_ICP_NO_MATCH=0,
+ XTRADB_ICP_MATCH=1,
+ XTRADB_ICP_OUT_OF_RANGE=2,
+ XTRADB_ICP_ABORTED_BY_USER=3,
+} xtradb_icp_result_t;
+
+typedef xtradb_icp_result_t (*idx_cond_func_t)(void *param);
/** A struct for (sometimes lazily) prebuilt structures in an Innobase table
handle used within MySQL; these are used to save CPU time. */
diff --git a/storage/xtradb/include/srv0srv.h b/storage/xtradb/include/srv0srv.h
index ebd83043040..1c00e219cd3 100644
--- a/storage/xtradb/include/srv0srv.h
+++ b/storage/xtradb/include/srv0srv.h
@@ -57,9 +57,6 @@ extern const char srv_mysql50_table_name_prefix[9];
thread starts running */
extern os_event_t srv_lock_timeout_thread_event;
-/* This event is set to tell the purge thread to shut down */
-extern os_event_t srv_purge_thread_event;
-
/* The monitor thread waits on this event. */
extern os_event_t srv_monitor_event;
@@ -69,6 +66,9 @@ extern os_event_t srv_timeout_event;
/* The error monitor thread waits on this event. */
extern os_event_t srv_error_event;
+/* This event is set at shutdown to wakeup threads from sleep */
+extern os_event_t srv_shutdown_event;
+
/* If the last data file is auto-extended, we add this many pages to it
at a time */
#define SRV_AUTO_EXTEND_INCREMENT \
@@ -239,7 +239,7 @@ extern ulong srv_ibuf_active_contract;
extern ulong srv_ibuf_accel_rate;
extern ulint srv_checkpoint_age_target;
extern ulint srv_flush_neighbor_pages;
-extern ulint srv_enable_unsafe_group_commit;
+extern ulint srv_deprecated_enable_unsafe_group_commit;
extern ulint srv_read_ahead;
extern ulint srv_adaptive_flushing_method;
diff --git a/storage/xtradb/include/sync0sync.h b/storage/xtradb/include/sync0sync.h
index 32a20807ba0..2418162aca1 100644
--- a/storage/xtradb/include/sync0sync.h
+++ b/storage/xtradb/include/sync0sync.h
@@ -45,7 +45,7 @@ Created 9/5/1995 Heikki Tuuri
extern my_bool timed_mutexes;
#endif /* UNIV_DEBUG && !UNIV_HOTBACKUP */
-#ifdef HAVE_WINDOWS_ATOMICS
+#ifdef _WIN32
typedef LONG lock_word_t; /*!< On Windows, InterlockedExchange operates
on LONG variable */
#else
diff --git a/storage/xtradb/include/trx0trx.h b/storage/xtradb/include/trx0trx.h
index ab7404c5eff..4d3ef8f3e57 100644
--- a/storage/xtradb/include/trx0trx.h
+++ b/storage/xtradb/include/trx0trx.h
@@ -490,7 +490,7 @@ struct trx_struct{
transaction has been registered with
the coordinator using the XA API, and
is set to 0 after commit or rollback. */
- unsigned owns_prepare_mutex:1;/* 1 if owns prepare mutex, if
+ unsigned active_commit_ordered:1;/* 1 if owns prepare mutex, if
this is set to 1 then registered should
also be set to 1. This is used in the
XA code */
@@ -829,6 +829,10 @@ Multiple flags can be combined with bitwise OR. */
#define TRX_SIG_OTHER_SESS 1 /* sent by another session (which
must hold rights to this) */
+/* Flag bits for trx_struct.active_flag */
+#define TRX_ACTIVE_IN_MYSQL (1<<0)
+#define TRX_ACTIVE_COMMIT_ORDERED (1<<1)
+
/** Commit node states */
enum commit_node_state {
COMMIT_NODE_SEND = 1, /*!< about to send a commit signal to
diff --git a/storage/xtradb/include/univ.i b/storage/xtradb/include/univ.i
index 17cc21946a1..0287ea40509 100644
--- a/storage/xtradb/include/univ.i
+++ b/storage/xtradb/include/univ.i
@@ -57,7 +57,6 @@ Created 1/20/1994 Heikki Tuuri
#define PERCONA_INNODB_VERSION 20.1
#endif
-
/* The following is the InnoDB version as shown in
SELECT plugin_version FROM information_schema.plugins;
calculated in make_version_string() in sql/sql_show.cc like this:
diff --git a/storage/xtradb/log/log0log.c b/storage/xtradb/log/log0log.c
index 80449e1c23a..d9676707cfd 100644
--- a/storage/xtradb/log/log0log.c
+++ b/storage/xtradb/log/log0log.c
@@ -110,6 +110,8 @@ archive */
UNIV_INTERN byte log_archive_io;
#endif /* UNIV_LOG_ARCHIVE */
+UNIV_INTERN ulint log_disable_checkpoint_active= 0;
+
/* A margin for free space in the log buffer before a log entry is catenated */
#define LOG_BUF_WRITE_MARGIN (4 * OS_FILE_LOG_BLOCK_SIZE)
@@ -187,7 +189,7 @@ log_fsp_current_free_limit_set_and_checkpoint(
success = FALSE;
while (!success) {
- success = log_checkpoint(TRUE, TRUE);
+ success = log_checkpoint(TRUE, TRUE, FALSE);
}
}
@@ -2007,12 +2009,14 @@ log_checkpoint(
/*===========*/
ibool sync, /*!< in: TRUE if synchronous operation is
desired */
- ibool write_always) /*!< in: the function normally checks if the
+ ibool write_always, /*!< in: the function normally checks if the
the new checkpoint would have a greater
lsn than the previous one: if not, then no
physical write is done; by setting this
parameter TRUE, a physical write will always be
made to log files */
+ ibool safe_to_ignore) /*!< in: TRUE if checkpoint can be ignored in
+ the case checkpoint's are disabled */
{
ib_uint64_t oldest_lsn;
@@ -2043,14 +2047,27 @@ log_checkpoint(
mutex_enter(&(log_sys->mutex));
+ /* Return if this is not a forced checkpoint and either there is no
+ need for a checkpoint or if checkpoints are disabled */
if (!write_always
- && log_sys->last_checkpoint_lsn >= oldest_lsn) {
+ && (log_sys->last_checkpoint_lsn >= oldest_lsn ||
+ (safe_to_ignore && log_disable_checkpoint_active)))
+ {
mutex_exit(&(log_sys->mutex));
return(TRUE);
}
+ if (log_disable_checkpoint_active)
+ {
+ /* Wait until we are allowed to do a checkpoint */
+ mutex_exit(&(log_sys->mutex));
+ rw_lock_s_lock(&(log_sys->checkpoint_lock));
+ rw_lock_s_unlock(&(log_sys->checkpoint_lock));
+ mutex_enter(&(log_sys->mutex));
+ }
+
ut_ad(log_sys->flushed_to_disk_lsn >= oldest_lsn);
if (log_sys->n_pending_checkpoint_writes > 0) {
@@ -2111,7 +2128,73 @@ log_make_checkpoint_at(
while (!log_preflush_pool_modified_pages(lsn, TRUE));
- while (!log_checkpoint(TRUE, write_always));
+ while (!log_checkpoint(TRUE, write_always, FALSE));
+}
+
+/****************************************************************//**
+Disable checkpoints. This is used when doing a volumne snapshot
+to ensure that we don't get checkpoint between snapshoting two
+different volumes */
+
+UNIV_INTERN
+ibool log_disable_checkpoint()
+{
+ mutex_enter(&(log_sys->mutex));
+
+ /*
+ Wait if a checkpoint write is running.
+ This is the same code that is used in log_checkpoint() to ensure
+ that two checkpoints are not happening at the same time.
+ */
+ while (log_sys->n_pending_checkpoint_writes > 0)
+ {
+ mutex_exit(&(log_sys->mutex));
+ rw_lock_s_lock(&(log_sys->checkpoint_lock));
+ rw_lock_s_unlock(&(log_sys->checkpoint_lock));
+ mutex_enter(&(log_sys->mutex));
+ }
+ /*
+ The following should never be true; It's is here just in case of
+ wrong usage of this function. (Better safe than sorry).
+ */
+
+ if (log_disable_checkpoint_active)
+ {
+ mutex_exit(&(log_sys->mutex));
+ return 1; /* Already disabled */
+ }
+ /*
+ Take the checkpoint lock to ensure we will not get any checkpoints
+ running
+ */
+ rw_lock_x_lock_gen(&(log_sys->checkpoint_lock), LOG_CHECKPOINT);
+ log_disable_checkpoint_active= 1;
+ mutex_exit(&(log_sys->mutex));
+ return 0;
+}
+
+
+/****************************************************************//**
+Enable checkpoints that was disabled with log_disable_checkpoint()
+This lock is called by MariaDB and only when we have done call earlier
+to log_disable_checkpoint().
+
+Note: We can't take a log->mutex lock here running log_checkpoint()
+which is waiting (log_sys->checkpoint_lock may already have it.
+This is however safe to do without a mutex as log_disable_checkpoint
+is protected by log_sys->checkpoint_lock.
+*/
+
+UNIV_INTERN
+void log_enable_checkpoint()
+{
+ ut_ad(log_disable_checkpoint_active);
+ /* Test variable, mostly to protect against wrong usage */
+ if (log_disable_checkpoint_active)
+ {
+ log_disable_checkpoint_active= 0;
+ rw_lock_x_unlock_gen(&(log_sys->checkpoint_lock), LOG_CHECKPOINT);
+ }
}
/****************************************************************//**
@@ -2208,7 +2291,7 @@ loop:
}
if (do_checkpoint) {
- log_checkpoint(checkpoint_sync, FALSE);
+ log_checkpoint(checkpoint_sync, FALSE, FALSE);
if (checkpoint_sync) {
@@ -3121,11 +3204,16 @@ logs_empty_and_mark_files_at_shutdown(void)
ut_print_timestamp(stderr);
fprintf(stderr, " InnoDB: Starting shutdown...\n");
}
+
+ /* Enable checkpoints if someone had turned them off */
+ if (log_disable_checkpoint_active)
+ log_enable_checkpoint();
+
/* Wait until the master thread and all other operations are idle: our
algorithm only works if the server is idle at shutdown */
srv_shutdown_state = SRV_SHUTDOWN_CLEANUP;
- os_event_set(srv_purge_thread_event);
+ os_event_set(srv_shutdown_event);
loop:
os_thread_sleep(100000);
diff --git a/storage/xtradb/log/log0recv.c b/storage/xtradb/log/log0recv.c
index e0952a1ed0b..0c0b83e34b7 100644
--- a/storage/xtradb/log/log0recv.c
+++ b/storage/xtradb/log/log0recv.c
@@ -2980,9 +2980,10 @@ recv_recovery_from_checkpoint_start_func(
#endif /* UNIV_LOG_ARCHIVE */
byte* buf;
byte* log_hdr_buf;
- byte log_hdr_buf_base[LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE];
+ byte *log_hdr_buf_base;
ulint err;
+ log_hdr_buf_base= alloca(LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE);
log_hdr_buf = ut_align(log_hdr_buf_base, OS_FILE_LOG_BLOCK_SIZE);
#ifdef UNIV_LOG_ARCHIVE
diff --git a/storage/xtradb/os/os0file.c b/storage/xtradb/os/os0file.c
index 835210140f8..909c72fcdbb 100644
--- a/storage/xtradb/os/os0file.c
+++ b/storage/xtradb/os/os0file.c
@@ -62,6 +62,10 @@ Created 10/21/1995 Heikki Tuuri
#include <libaio.h>
#endif
+#ifdef _WIN32
+#define IOCP_SHUTDOWN_KEY (ULONG_PTR)-1
+#endif
+
/* This specifies the file permissions InnoDB uses when it creates files in
Unix; the value of os_innodb_umask is initialized in ha_innodb.cc to
my_umask */
@@ -161,6 +165,12 @@ typedef struct os_aio_slot_struct os_aio_slot_t;
/** The asynchronous i/o array slot structure */
struct os_aio_slot_struct{
+#ifdef WIN_ASYNC_IO
+ OVERLAPPED control; /*!< Windows control block for the
+ aio request, MUST be first element in the structure*/
+ void *arr; /*!< Array this slot belongs to*/
+#endif
+
ibool is_read; /*!< TRUE if a read operation */
ulint pos; /*!< index of the slot in the aio
array */
@@ -186,12 +196,7 @@ struct os_aio_slot_struct{
and which can be used to identify
which pending aio operation was
completed */
-#ifdef WIN_ASYNC_IO
- HANDLE handle; /*!< handle object we need in the
- OVERLAPPED struct */
- OVERLAPPED control; /*!< Windows control block for the
- aio request */
-#elif defined(LINUX_NATIVE_AIO)
+#ifdef LINUX_NATIVE_AIO
struct iocb control; /* Linux control block for aio */
int n_bytes; /* bytes written/read. */
int ret; /* AIO return code */
@@ -228,15 +233,6 @@ struct os_aio_array_struct{
/*!< Number of reserved slots in the
aio array outside the ibuf segment */
os_aio_slot_t* slots; /*!< Pointer to the slots in the array */
-#ifdef __WIN__
- HANDLE* handles;
- /*!< Pointer to an array of OS native
- event handles where we copied the
- handles from slots, in the same
- order. This can be used in
- WaitForMultipleObjects; used only in
- Windows */
-#endif
#if defined(LINUX_NATIVE_AIO)
io_context_t* aio_ctx;
@@ -335,6 +331,13 @@ os_aio_validate_skip(void)
}
#endif /* UNIV_DEBUG */
+#ifdef _WIN32
+/** IO completion port used by background io threads */
+static HANDLE completion_port;
+/** Thread local storage index for the per-thread event used for synchronous IO */
+static DWORD tls_sync_io = TLS_OUT_OF_INDEXES;
+#endif
+
#ifdef __WIN__
/***********************************************************************//**
Gets the operating system version. Currently works only on Windows.
@@ -376,6 +379,86 @@ os_get_os_version(void)
}
#endif /* __WIN__ */
+
+#ifdef _WIN32
+/*
+Windows : Handling synchronous IO on files opened asynchronously.
+
+If file is opened for asynchronous IO (FILE_FLAG_OVERLAPPED) and also bound to
+a completion port, then every IO on this file would normally be enqueued to the
+completion port. Sometimes however we would like to do a synchronous IO. This is
+possible if we initialitze have overlapped.hEvent with a valid event and set its
+lowest order bit to 1 (see MSDN ReadFile and WriteFile description for more info)
+
+We'll create this special event once for each thread and store in thread local
+storage.
+*/
+
+
+/***********************************************************************//**
+Initialize tls index.for event handle used for synchronized IO on files that
+might be opened with FILE_FLAG_OVERLAPPED.
+*/
+static void win_init_syncio_event()
+{
+ tls_sync_io = TlsAlloc();
+ ut_a(tls_sync_io != TLS_OUT_OF_INDEXES);
+}
+
+/***********************************************************************//**
+Retrieve per-thread event for doing synchronous io on asyncronously opened files
+*/
+static HANDLE win_get_syncio_event()
+{
+ HANDLE h;
+ if(tls_sync_io == TLS_OUT_OF_INDEXES){
+ win_init_syncio_event();
+ }
+
+ h = (HANDLE)TlsGetValue(tls_sync_io);
+ if (h)
+ return h;
+ h = CreateEventA(NULL, FALSE, FALSE, NULL);
+ ut_a(h);
+ h = (HANDLE)((uintptr_t)h | 1);
+ TlsSetValue(tls_sync_io, h);
+ return h;
+}
+
+/*
+ TLS destructor, inspired by Chromium code
+ http://src.chromium.org/svn/trunk/src/base/threading/thread_local_storage_win.cc
+*/
+
+static void win_free_syncio_event()
+{
+ HANDLE h = win_get_syncio_event();
+ if (h) {
+ CloseHandle(h);
+ }
+}
+
+static void NTAPI win_tls_thread_exit(PVOID module, DWORD reason, PVOID reserved) {
+ if (DLL_THREAD_DETACH == reason || DLL_PROCESS_DETACH == reason)
+ win_free_syncio_event();
+}
+
+#ifdef _WIN64
+#pragma comment(linker, "/INCLUDE:_tls_used")
+#pragma comment(linker, "/INCLUDE:p_thread_callback_base")
+#pragma const_seg(".CRT$XLB")
+extern const PIMAGE_TLS_CALLBACK p_thread_callback_base;
+const PIMAGE_TLS_CALLBACK p_thread_callback_base = win_tls_thread_exit;
+#pragma data_seg()
+#else
+#pragma comment(linker, "/INCLUDE:__tls_used")
+#pragma comment(linker, "/INCLUDE:_p_thread_callback_base")
+#pragma data_seg(".CRT$XLB")
+PIMAGE_TLS_CALLBACK p_thread_callback_base = win_tls_thread_exit;
+#pragma data_seg()
+#endif
+#endif /*_WIN32 */
+
/***********************************************************************//**
Retrieves the last error number if an error occurs in a file io function.
The number should be retrieved before any other OS calls (because they may
@@ -716,6 +799,9 @@ os_io_init_simple(void)
for (i = 0; i < OS_FILE_N_SEEK_MUTEXES; i++) {
os_file_seek_mutexes[i] = os_mutex_create();
}
+#ifdef _WIN32
+ win_init_syncio_event();
+#endif
}
/***********************************************************************//**
@@ -1461,6 +1547,16 @@ try_again:
ut_error;
}
+ if (type == OS_LOG_FILE) {
+ if (srv_unix_file_flush_method == SRV_UNIX_O_DSYNC) {
+ /* Map O_DSYNC to WRITE_THROUGH */
+ attributes |= FILE_FLAG_WRITE_THROUGH;
+ } else if (srv_unix_file_flush_method == SRV_UNIX_ALL_O_DIRECT) {
+ /* Open log file without buffering */
+ attributes |= FILE_FLAG_NO_BUFFERING;
+ }
+ }
+
file = CreateFile((LPCTSTR) name,
GENERIC_READ | GENERIC_WRITE, /* read and write
access */
@@ -1505,6 +1601,9 @@ try_again:
}
} else {
*success = TRUE;
+ if (os_aio_use_native_aio && ((attributes & FILE_FLAG_OVERLAPPED) != 0)) {
+ ut_a(CreateIoCompletionPort(file, completion_port, 0, 0));
+ }
}
return(file);
@@ -2514,13 +2613,9 @@ os_file_read_func(
#ifdef __WIN__
BOOL ret;
DWORD len;
- DWORD ret2;
- DWORD low;
- DWORD high;
ibool retry;
-#ifndef UNIV_HOTBACKUP
- ulint i;
-#endif /* !UNIV_HOTBACKUP */
+ OVERLAPPED overlapped;
+
/* On 64-bit Windows, ulint is 64 bits. But offset and n should be
no more than 32 bits. */
@@ -2535,41 +2630,21 @@ try_again:
ut_ad(buf);
ut_ad(n > 0);
- low = (DWORD) offset;
- high = (DWORD) offset_high;
-
os_mutex_enter(os_file_count_mutex);
os_n_pending_reads++;
os_mutex_exit(os_file_count_mutex);
-#ifndef UNIV_HOTBACKUP
- /* Protect the seek / read operation with a mutex */
- i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
-
- os_mutex_enter(os_file_seek_mutexes[i]);
-#endif /* !UNIV_HOTBACKUP */
-
- ret2 = SetFilePointer(file, low, &high, FILE_BEGIN);
-
- if (ret2 == 0xFFFFFFFF && GetLastError() != NO_ERROR) {
-
-#ifndef UNIV_HOTBACKUP
- os_mutex_exit(os_file_seek_mutexes[i]);
-#endif /* !UNIV_HOTBACKUP */
-
- os_mutex_enter(os_file_count_mutex);
- os_n_pending_reads--;
- os_mutex_exit(os_file_count_mutex);
-
- goto error_handling;
+ memset (&overlapped, 0, sizeof (overlapped));
+ overlapped.Offset = (DWORD)offset;
+ overlapped.OffsetHigh = (DWORD)offset_high;
+ overlapped.hEvent = win_get_syncio_event();
+ ret = ReadFile(file, buf, n, NULL, &overlapped);
+ if (ret) {
+ ret = GetOverlappedResult(file, &overlapped, (DWORD *)&len, FALSE);
}
-
- ret = ReadFile(file, buf, (DWORD) n, &len, NULL);
-
-#ifndef UNIV_HOTBACKUP
- os_mutex_exit(os_file_seek_mutexes[i]);
-#endif /* !UNIV_HOTBACKUP */
-
+ else if(GetLastError() == ERROR_IO_PENDING) {
+ ret = GetOverlappedResult(file, &overlapped, (DWORD *)&len, TRUE);
+ }
os_mutex_enter(os_file_count_mutex);
os_n_pending_reads--;
os_mutex_exit(os_file_count_mutex);
@@ -2597,9 +2672,6 @@ try_again:
(ulong)n, (ulong)offset_high,
(ulong)offset, (long)ret);
#endif /* __WIN__ */
-#ifdef __WIN__
-error_handling:
-#endif
retry = os_file_handle_error(NULL, "read");
if (retry) {
@@ -2643,13 +2715,11 @@ os_file_read_no_error_handling_func(
#ifdef __WIN__
BOOL ret;
DWORD len;
- DWORD ret2;
- DWORD low;
- DWORD high;
ibool retry;
-#ifndef UNIV_HOTBACKUP
- ulint i;
-#endif /* !UNIV_HOTBACKUP */
+ OVERLAPPED overlapped;
+ overlapped.Offset = (DWORD)offset;
+ overlapped.OffsetHigh = (DWORD)offset_high;
+
/* On 64-bit Windows, ulint is 64 bits. But offset and n should be
no more than 32 bits. */
@@ -2664,41 +2734,21 @@ try_again:
ut_ad(buf);
ut_ad(n > 0);
- low = (DWORD) offset;
- high = (DWORD) offset_high;
-
os_mutex_enter(os_file_count_mutex);
os_n_pending_reads++;
os_mutex_exit(os_file_count_mutex);
-#ifndef UNIV_HOTBACKUP
- /* Protect the seek / read operation with a mutex */
- i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
-
- os_mutex_enter(os_file_seek_mutexes[i]);
-#endif /* !UNIV_HOTBACKUP */
-
- ret2 = SetFilePointer(file, low, &high, FILE_BEGIN);
-
- if (ret2 == 0xFFFFFFFF && GetLastError() != NO_ERROR) {
-
-#ifndef UNIV_HOTBACKUP
- os_mutex_exit(os_file_seek_mutexes[i]);
-#endif /* !UNIV_HOTBACKUP */
-
- os_mutex_enter(os_file_count_mutex);
- os_n_pending_reads--;
- os_mutex_exit(os_file_count_mutex);
-
- goto error_handling;
+ memset (&overlapped, 0, sizeof (overlapped));
+ overlapped.Offset = (DWORD)offset;
+ overlapped.OffsetHigh = (DWORD)offset_high;
+ overlapped.hEvent = win_get_syncio_event();
+ ret = ReadFile(file, buf, n, NULL, &overlapped);
+ if (ret) {
+ ret = GetOverlappedResult(file, &overlapped, (DWORD *)&len, FALSE);
}
-
- ret = ReadFile(file, buf, (DWORD) n, &len, NULL);
-
-#ifndef UNIV_HOTBACKUP
- os_mutex_exit(os_file_seek_mutexes[i]);
-#endif /* !UNIV_HOTBACKUP */
-
+ else if(GetLastError() == ERROR_IO_PENDING) {
+ ret = GetOverlappedResult(file, &overlapped, (DWORD *)&len, TRUE);
+ }
os_mutex_enter(os_file_count_mutex);
os_n_pending_reads--;
os_mutex_exit(os_file_count_mutex);
@@ -2720,9 +2770,6 @@ try_again:
return(TRUE);
}
#endif /* __WIN__ */
-#ifdef __WIN__
-error_handling:
-#endif
retry = os_file_handle_error_no_exit(NULL, "read");
if (retry) {
@@ -2777,14 +2824,9 @@ os_file_write_func(
#ifdef __WIN__
BOOL ret;
DWORD len;
- DWORD ret2;
- DWORD low;
- DWORD high;
ulint n_retries = 0;
ulint err;
-#ifndef UNIV_HOTBACKUP
- ulint i;
-#endif /* !UNIV_HOTBACKUP */
+ OVERLAPPED overlapped;
/* On 64-bit Windows, ulint is 64 bits. But offset and n should be
no more than 32 bits. */
@@ -2797,64 +2839,30 @@ os_file_write_func(
ut_ad(buf);
ut_ad(n > 0);
retry:
- low = (DWORD) offset;
- high = (DWORD) offset_high;
os_mutex_enter(os_file_count_mutex);
os_n_pending_writes++;
os_mutex_exit(os_file_count_mutex);
-#ifndef UNIV_HOTBACKUP
- /* Protect the seek / write operation with a mutex */
- i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
-
- os_mutex_enter(os_file_seek_mutexes[i]);
-#endif /* !UNIV_HOTBACKUP */
-
- ret2 = SetFilePointer(file, low, &high, FILE_BEGIN);
-
- if (ret2 == 0xFFFFFFFF && GetLastError() != NO_ERROR) {
-
-#ifndef UNIV_HOTBACKUP
- os_mutex_exit(os_file_seek_mutexes[i]);
-#endif /* !UNIV_HOTBACKUP */
-
- os_mutex_enter(os_file_count_mutex);
- os_n_pending_writes--;
- os_mutex_exit(os_file_count_mutex);
-
- ut_print_timestamp(stderr);
-
- fprintf(stderr,
- " InnoDB: Error: File pointer positioning to"
- " file %s failed at\n"
- "InnoDB: offset %lu %lu. Operating system"
- " error number %lu.\n"
- "InnoDB: Some operating system error numbers"
- " are described at\n"
- "InnoDB: "
- REFMAN "operating-system-error-codes.html\n",
- name, (ulong) offset_high, (ulong) offset,
- (ulong) GetLastError());
+ memset (&overlapped, 0, sizeof (overlapped));
+ overlapped.Offset = (DWORD)offset;
+ overlapped.OffsetHigh = (DWORD)offset_high;
- return(FALSE);
+ overlapped.hEvent = win_get_syncio_event();
+ ret = WriteFile(file, buf, n, NULL, &overlapped);
+ if (ret) {
+ ret = GetOverlappedResult(file, &overlapped, (DWORD *)&len, FALSE);
+ }
+ else if(GetLastError() == ERROR_IO_PENDING) {
+ ret = GetOverlappedResult(file, &overlapped, (DWORD *)&len, TRUE);
}
-
- ret = WriteFile(file, buf, (DWORD) n, &len, NULL);
-
- /* Always do fsync to reduce the probability that when the OS crashes,
- a database page is only partially physically written to disk. */
# ifdef UNIV_DO_FLUSH
if (!os_do_not_call_flush_at_each_write) {
- ut_a(TRUE == os_file_flush(file, TRUE));
+ ut_a(TRUE == os_file_flush(file));
}
# endif /* UNIV_DO_FLUSH */
-#ifndef UNIV_HOTBACKUP
- os_mutex_exit(os_file_seek_mutexes[i]);
-#endif /* !UNIV_HOTBACKUP */
-
os_mutex_enter(os_file_count_mutex);
os_n_pending_writes--;
os_mutex_exit(os_file_count_mutex);
@@ -3330,9 +3338,7 @@ os_aio_array_create(
os_aio_array_t* array;
ulint i;
os_aio_slot_t* slot;
-#ifdef WIN_ASYNC_IO
- OVERLAPPED* over;
-#elif defined(LINUX_NATIVE_AIO)
+#ifdef LINUX_NATIVE_AIO
struct io_event* io_event = NULL;
#endif
ut_a(n > 0);
@@ -3351,9 +3357,6 @@ os_aio_array_create(
array->n_reserved = 0;
array->cur_seg = 0;
array->slots = ut_malloc(n * sizeof(os_aio_slot_t));
-#ifdef __WIN__
- array->handles = ut_malloc(n * sizeof(HANDLE));
-#endif
#if defined(LINUX_NATIVE_AIO)
array->aio_ctx = NULL;
@@ -3392,19 +3395,9 @@ skip_native_aio:
#endif /* LINUX_NATIVE_AIO */
for (i = 0; i < n; i++) {
slot = os_aio_array_get_nth_slot(array, i);
-
slot->pos = i;
slot->reserved = FALSE;
-#ifdef WIN_ASYNC_IO
- slot->handle = CreateEvent(NULL,TRUE, FALSE, NULL);
-
- over = &(slot->control);
-
- over->hEvent = slot->handle;
-
- *((array->handles) + i) = over->hEvent;
-
-#elif defined(LINUX_NATIVE_AIO)
+#ifdef LINUX_NATIVE_AIO
memset(&slot->control, 0x0, sizeof(slot->control));
slot->n_bytes = 0;
@@ -3423,18 +3416,6 @@ os_aio_array_free(
/*==============*/
os_aio_array_t* array) /*!< in, own: array to free */
{
-#ifdef WIN_ASYNC_IO
- ulint i;
-
- for (i = 0; i < array->n_slots; i++) {
- os_aio_slot_t* slot = os_aio_array_get_nth_slot(array, i);
- CloseHandle(slot->handle);
- }
-#endif /* WIN_ASYNC_IO */
-
-#ifdef __WIN__
- ut_free(array->handles);
-#endif /* __WIN__ */
os_mutex_free(array->mutex);
os_event_free(array->not_full);
os_event_free(array->is_empty);
@@ -3536,11 +3517,16 @@ os_aio_init(
os_last_printout = time(NULL);
+#ifdef _WIN32
+ ut_a(completion_port == 0);
+ completion_port = CreateIoCompletionPort(INVALID_HANDLE_VALUE, NULL, 0, 0);
+ ut_a(completion_port);
+#endif
+
return(TRUE);
err_exit:
return(FALSE);
-
}
/***********************************************************************
@@ -3582,11 +3568,9 @@ os_aio_array_wake_win_aio_at_shutdown(
/*==================================*/
os_aio_array_t* array) /*!< in: aio array */
{
- ulint i;
-
- for (i = 0; i < array->n_slots; i++) {
-
- SetEvent((array->slots + i)->handle);
+ if(completion_port)
+ {
+ PostQueuedCompletionStatus(completion_port, 0, IOCP_SHUTDOWN_KEY, NULL);
}
}
#endif
@@ -3831,7 +3815,8 @@ found:
control = &(slot->control);
control->Offset = (DWORD)offset;
control->OffsetHigh = (DWORD)offset_high;
- ResetEvent(slot->handle);
+ control->hEvent = 0;
+ slot->arr = array;
#elif defined(LINUX_NATIVE_AIO)
@@ -3901,11 +3886,7 @@ os_aio_array_free_slot(
os_event_set(array->is_empty);
}
-#ifdef WIN_ASYNC_IO
-
- ResetEvent(slot->handle);
-
-#elif defined(LINUX_NATIVE_AIO)
+#ifdef LINUX_NATIVE_AIO
if (srv_use_native_aio) {
memset(&slot->control, 0x0, sizeof(slot->control));
@@ -4119,13 +4100,9 @@ os_aio_func(
os_aio_array_t* array;
os_aio_slot_t* slot;
#ifdef WIN_ASYNC_IO
- ibool retval;
- BOOL ret = TRUE;
DWORD len = (DWORD) n;
- struct fil_node_struct * dummy_mess1;
- void* dummy_mess2;
- ulint dummy_type;
-#endif /* WIN_ASYNC_IO */
+ BOOL ret;
+#endif
ibool retry;
ulint wake_later;
@@ -4142,33 +4119,23 @@ os_aio_func(
wake_later = mode & OS_AIO_SIMULATED_WAKE_LATER;
mode = mode & (~OS_AIO_SIMULATED_WAKE_LATER);
- if (mode == OS_AIO_SYNC
-#ifdef WIN_ASYNC_IO
- && !srv_use_native_aio
-#endif /* WIN_ASYNC_IO */
- ) {
+ if (mode == OS_AIO_SYNC)
+ {
+ ibool ret;
/* This is actually an ordinary synchronous read or write:
- no need to use an i/o-handler thread. NOTE that if we use
- Windows async i/o, Windows does not allow us to use
- ordinary synchronous os_file_read etc. on the same file,
- therefore we have built a special mechanism for synchronous
- wait in the Windows case.
- Also note that the Performance Schema instrumentation has
- been performed by current os_aio_func()'s wrapper function
- pfs_os_aio_func(). So we would no longer need to call
- Performance Schema instrumented os_file_read() and
- os_file_write(). Instead, we should use os_file_read_func()
- and os_file_write_func() */
+ no need to use an i/o-handler thread */
if (type == OS_FILE_READ) {
- return(os_file_read_trx(file, buf, offset,
- offset_high, n, trx));
+ ret = os_file_read_func(file, buf, offset,
+ offset_high, n, trx);
}
+ else {
+ ut_a(type == OS_FILE_WRITE);
- ut_a(type == OS_FILE_WRITE);
-
- return(os_file_write_func(name, file, buf, offset,
- offset_high, n));
+ ret = os_file_write(name, file, buf, offset, offset_high, n);
+ }
+ ut_a(ret);
+ return ret;
}
try_again:
@@ -4217,6 +4184,8 @@ try_again:
#ifdef WIN_ASYNC_IO
ret = ReadFile(file, buf, (DWORD)n, &len,
&(slot->control));
+ if(!ret && GetLastError() != ERROR_IO_PENDING)
+ goto err_exit;
#elif defined(LINUX_NATIVE_AIO)
if (!os_aio_linux_dispatch(array, slot)) {
@@ -4237,6 +4206,8 @@ try_again:
ret = WriteFile(file, buf, (DWORD)n, &len,
&(slot->control));
+ if(!ret && GetLastError() != ERROR_IO_PENDING)
+ goto err_exit;
#elif defined(LINUX_NATIVE_AIO)
if (!os_aio_linux_dispatch(array, slot)) {
goto err_exit;
@@ -4253,33 +4224,6 @@ try_again:
ut_error;
}
-#ifdef WIN_ASYNC_IO
- if (srv_use_native_aio) {
- if ((ret && len == n)
- || (!ret && GetLastError() == ERROR_IO_PENDING)) {
- /* aio was queued successfully! */
-
- if (mode == OS_AIO_SYNC) {
- /* We want a synchronous i/o operation on a
- file where we also use async i/o: in Windows
- we must use the same wait mechanism as for
- async i/o */
-
- retval = os_aio_windows_handle(ULINT_UNDEFINED,
- slot->pos,
- &dummy_mess1,
- &dummy_mess2,
- &dummy_type);
-
- return(retval);
- }
-
- return(TRUE);
- }
-
- goto err_exit;
- }
-#endif /* WIN_ASYNC_IO */
/* aio was queued successfully! */
return(TRUE);
@@ -4332,42 +4276,15 @@ os_aio_windows_handle(
ulint* space_id)
{
ulint orig_seg = segment;
- os_aio_array_t* array;
os_aio_slot_t* slot;
- ulint n;
- ulint i;
ibool ret_val;
BOOL ret;
DWORD len;
BOOL retry = FALSE;
+ ULONG_PTR key;
- if (segment == ULINT_UNDEFINED) {
- array = os_aio_sync_array;
- segment = 0;
- } else {
- segment = os_aio_get_array_and_local_segment(&array, segment);
- }
-
- /* NOTE! We only access constant fields in os_aio_array. Therefore
- we do not have to acquire the protecting mutex yet */
-
- ut_ad(os_aio_validate_skip());
- ut_ad(segment < array->n_segments);
-
- n = array->n_slots / array->n_segments;
-
- if (array == os_aio_sync_array) {
- WaitForSingleObject(
- os_aio_array_get_nth_slot(array, pos)->handle,
- INFINITE);
- i = pos;
- } else {
- srv_set_io_thread_op_info(orig_seg, "wait Windows aio");
- i = WaitForMultipleObjects((DWORD) n,
- array->handles + segment * n,
- FALSE,
- INFINITE);
- }
+ ret = GetQueuedCompletionStatus(completion_port, &len, &key,
+ (OVERLAPPED **)&slot, INFINITE);
if (srv_recovery_stats && recv_recovery_is_on() && n_consecutive) {
mutex_enter(&(recv_sys->mutex));
@@ -4381,29 +4298,16 @@ os_aio_windows_handle(
mutex_exit(&(recv_sys->mutex));
}
- os_mutex_enter(array->mutex);
-
- if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS
- && array->n_reserved == 0) {
- *message1 = NULL;
- *message2 = NULL;
- os_mutex_exit(array->mutex);
- return(TRUE);
+ /* If shutdown key was received, repost the shutdown message and exit */
+ if (ret && (key == IOCP_SHUTDOWN_KEY)) {
+ PostQueuedCompletionStatus(completion_port, 0, key, NULL);
+ os_thread_exit(NULL);
}
- ut_a(i >= WAIT_OBJECT_0 && i <= WAIT_OBJECT_0 + n);
-
- slot = os_aio_array_get_nth_slot(array, i + segment * n);
-
- ut_a(slot->reserved);
-
- if (orig_seg != ULINT_UNDEFINED) {
- srv_set_io_thread_op_info(orig_seg,
- "get windows aio return value");
+ if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) {
+ os_thread_exit(NULL);
}
- ret = GetOverlappedResult(slot->file, &(slot->control), &len, TRUE);
-
*message1 = slot->message1;
*message2 = slot->message2;
@@ -4429,8 +4333,6 @@ os_aio_windows_handle(
ret_val = FALSE;
}
- os_mutex_exit(array->mutex);
-
if (retry) {
/* retry failed read/write operation synchronously.
No need to hold array->mutex. */
@@ -4451,16 +4353,12 @@ os_aio_windows_handle(
switch (slot->type) {
case OS_FILE_WRITE:
- ret = WriteFile(slot->file, slot->buf,
- (DWORD) slot->len, &len,
- &(slot->control));
-
+ ret_val = os_file_write(slot->name, slot->file, slot->buf,
+ slot->control.Offset, slot->control.OffsetHigh, slot->len);
break;
case OS_FILE_READ:
- ret = ReadFile(slot->file, slot->buf,
- (DWORD) slot->len, &len,
- &(slot->control));
-
+ ret_val = os_file_read(slot->file, slot->buf,
+ slot->control.Offset, slot->control.OffsetHigh, slot->len);
break;
default:
ut_error;
@@ -4485,7 +4383,7 @@ os_aio_windows_handle(
ret_val = ret && len == slot->len;
}
- os_aio_array_free_slot(array, slot);
+ os_aio_array_free_slot((os_aio_array_t *)slot->arr, slot);
return(ret_val);
}
diff --git a/storage/xtradb/row/row0sel.c b/storage/xtradb/row/row0sel.c
index fe8f33b7b4d..1d91b1f4b53 100644
--- a/storage/xtradb/row/row0sel.c
+++ b/storage/xtradb/row/row0sel.c
@@ -3233,7 +3233,8 @@ row_sel_push_cache_row_for_mysql(
prebuilt->fetch_cache[
prebuilt->n_fetch_cached],
prebuilt,
- rec, rec_clust,
+ rec,
+ rec_clust,
offsets,
start_field_no,
prebuilt->n_template))) {
@@ -3352,7 +3353,8 @@ and fetch prev. NOTE that if we do a search with a full key value
from a unique index (ROW_SEL_EXACT), then we will not store the cursor
position and fetch next or fetch prev must not be tried to the cursor!
@return DB_SUCCESS, DB_RECORD_NOT_FOUND, DB_END_OF_INDEX, DB_DEADLOCK,
-DB_LOCK_TABLE_FULL, DB_CORRUPTION, or DB_TOO_BIG_RECORD */
+DB_LOCK_TABLE_FULL, DB_CORRUPTION, DB_SEARCH_ABORTED_BY_USER or
+DB_TOO_BIG_RECORD */
UNIV_INTERN
ulint
row_search_for_mysql(
@@ -3459,6 +3461,12 @@ row_search_for_mysql(
ut_error;
}
+ /* init null bytes with default values as they might be
+ left uninitialized in some cases and these uninited bytes
+ might be copied into mysql record buffer that leads to
+ valgrind warnings */
+ memcpy(buf, prebuilt->default_rec, prebuilt->null_bitmap_len);
+
#if 0
/* August 19, 2005 by Heikki: temporarily disable this error
print until the cursor lock count is done correctly.
@@ -3708,7 +3716,8 @@ row_search_for_mysql(
ut_ad(!rec_get_deleted_flag(rec, comp));
if (!row_sel_store_mysql_rec(buf, prebuilt,
- rec, FALSE, offsets, 0,
+ rec, FALSE,
+ offsets, 0,
prebuilt->n_template)) {
/* Only fresh inserts may contain
incomplete externally stored
@@ -4447,7 +4456,7 @@ idx_cond_check:
ut_ad(prebuilt->template_type != ROW_MYSQL_DUMMY_TEMPLATE);
offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap);
ib_res= row_sel_store_mysql_rec(buf, prebuilt, rec, FALSE,
- offsets, 0, prebuilt->n_index_fields);
+ offsets, 0, prebuilt->n_index_fields);
/*
The above call will fail and return FALSE when requested to
store an "externally stored column" (afaiu, a blob). Index
@@ -4456,12 +4465,15 @@ idx_cond_check:
*/
ut_ad(ib_res);
res= prebuilt->idx_cond_func(prebuilt->idx_cond_func_arg);
- if (res == 0)
+ if (res == XTRADB_ICP_NO_MATCH)
goto next_rec;
- if (res == 2) {
- err = DB_RECORD_NOT_FOUND;
+ else if (res != XTRADB_ICP_MATCH) {
+ err= (res == XTRADB_ICP_ABORTED_BY_USER ?
+ DB_SEARCH_ABORTED_BY_USER :
+ DB_RECORD_NOT_FOUND);
goto idx_cond_failed;
}
+ /* res == XTRADB_ICP_MATCH */
}
/* Get the clustered index record if needed, if we did not do the
diff --git a/storage/xtradb/row/row0upd.c b/storage/xtradb/row/row0upd.c
index 01fb44f42de..2c7f3056329 100644
--- a/storage/xtradb/row/row0upd.c
+++ b/storage/xtradb/row/row0upd.c
@@ -1283,7 +1283,7 @@ row_upd_changes_ord_field_binary_func(
const upd_field_t* upd_field;
const dfield_t* dfield;
dfield_t dfield_ext;
- ulint dfield_len;
+ ulint dfield_len= 0;
const byte* buf;
ind_field = dict_index_get_nth_field(index, i);
diff --git a/storage/xtradb/srv/srv0srv.c b/storage/xtradb/srv/srv0srv.c
index f6482813c2a..d82e1cda416 100644
--- a/storage/xtradb/srv/srv0srv.c
+++ b/storage/xtradb/srv/srv0srv.c
@@ -136,6 +136,20 @@ UNIV_INTERN ulint srv_max_file_format_at_startup = DICT_TF_FORMAT_MAX;
/** Place locks to records only i.e. do not use next-key locking except
on duplicate key checking and foreign key checking */
UNIV_INTERN ibool srv_locks_unsafe_for_binlog = FALSE;
+#ifdef __WIN__
+/* Windows native condition variables. We use runtime loading / function
+pointers, because they are not available on Windows Server 2003 and
+Windows XP/2000.
+
+We use condition for events on Windows if possible, even if os_event
+resembles Windows kernel event object well API-wise. The reason is
+performance, kernel objects are heavyweights and WaitForSingleObject() is a
+performance killer causing calling thread to context switch. Besides, Innodb
+is preallocating large number (often millions) of os_events. With kernel event
+objects it takes a big chunk out of non-paged pool, which is better suited
+for tasks like IO than for storing idle event objects. */
+UNIV_INTERN ibool srv_use_native_conditions = FALSE;
+#endif /* __WIN__ */
/* If this flag is TRUE, then we will use the native aio of the
OS (provided we compiled Innobase with it in), otherwise we will
@@ -433,7 +447,7 @@ UNIV_INTERN ulong srv_ibuf_accel_rate = 100;
UNIV_INTERN ulint srv_checkpoint_age_target = 0;
UNIV_INTERN ulint srv_flush_neighbor_pages = 1; /* 0:disable 1:enable */
-UNIV_INTERN ulint srv_enable_unsafe_group_commit = 0; /* 0:disable 1:enable */
+UNIV_INTERN ulint srv_deprecated_enable_unsafe_group_commit = 0;
UNIV_INTERN ulint srv_read_ahead = 3; /* 1: random 2: linear 3: Both */
UNIV_INTERN ulint srv_adaptive_flushing_method = 0; /* 0: native 1: estimate 2: keep_average */
@@ -751,7 +765,7 @@ UNIV_INTERN os_event_t srv_error_event;
UNIV_INTERN os_event_t srv_lock_timeout_thread_event;
-UNIV_INTERN os_event_t srv_purge_thread_event;
+UNIV_INTERN os_event_t srv_shutdown_event;
UNIV_INTERN srv_sys_t* srv_sys = NULL;
@@ -1090,7 +1104,7 @@ srv_init(void)
srv_monitor_event = os_event_create(NULL);
srv_lock_timeout_thread_event = os_event_create(NULL);
- srv_purge_thread_event = os_event_create(NULL);
+ srv_shutdown_event = os_event_create(NULL);
for (i = 0; i < SRV_MASTER + 1; i++) {
srv_n_threads_active[i] = 0;
@@ -1208,7 +1222,7 @@ retry:
enter_innodb_with_tickets(trx);
return;
}
- os_atomic_increment_lint(&srv_conc_n_threads, -1);
+ (void) os_atomic_increment_lint(&srv_conc_n_threads, -1);
}
if (!has_yielded)
{
@@ -1238,7 +1252,7 @@ retry:
static void
srv_conc_exit_innodb_timer_based(trx_t* trx)
{
- os_atomic_increment_lint(&srv_conc_n_threads, -1);
+ (void) os_atomic_increment_lint(&srv_conc_n_threads, -1);
trx->declared_to_be_inside_innodb = FALSE;
trx->n_tickets_to_enter_innodb = 0;
return;
@@ -1460,7 +1474,7 @@ srv_conc_force_enter_innodb(
ut_ad(srv_conc_n_threads >= 0);
#ifdef HAVE_ATOMIC_BUILTINS
if (srv_thread_concurrency_timer_based) {
- os_atomic_increment_lint(&srv_conc_n_threads, 1);
+ (void) os_atomic_increment_lint(&srv_conc_n_threads, 1);
trx->declared_to_be_inside_innodb = TRUE;
trx->n_tickets_to_enter_innodb = 1;
return;
@@ -3110,6 +3124,7 @@ srv_master_thread(
srv_main_thread_process_no = os_proc_get_number();
srv_main_thread_id = os_thread_pf(os_thread_get_curr_id());
+ memset(&prev_flush_info, 0, sizeof(prev_flush_info));
mutex_enter(&kernel_mutex);
slot = srv_table_reserve_slot(SRV_MASTER);
@@ -3131,6 +3146,8 @@ loop:
buf_get_total_stat(&buf_stat);
n_ios_very_old = log_sys->n_log_ios + buf_stat.n_pages_read
+ buf_stat.n_pages_written;
+ n_pages_flushed= 0;
+
mutex_enter(&kernel_mutex);
/* Store the user activity counter at the start of this loop */
@@ -3441,8 +3458,8 @@ retry_flush_batch:
blocks_sum += blocks_num;
}
- n_flush = blocks_sum * (lsn - lsn_old) / log_sys->max_modified_age_async;
- if (flushed_blocks_sum > n_pages_flushed_prev) {
+ n_flush = (lint) (blocks_sum * (lsn - lsn_old) / log_sys->max_modified_age_async);
+ if ((ulint) flushed_blocks_sum > n_pages_flushed_prev) {
n_flush -= (flushed_blocks_sum - n_pages_flushed_prev);
}
@@ -3575,7 +3592,7 @@ retry_flush_batch:
/* Make a new checkpoint about once in 10 seconds */
- log_checkpoint(TRUE, FALSE);
+ log_checkpoint(TRUE, FALSE, TRUE);
srv_main_thread_op_info = "reserving kernel mutex";
@@ -3684,7 +3701,7 @@ flush_loop:
srv_main_thread_op_info = "making checkpoint";
- log_checkpoint(TRUE, FALSE);
+ log_checkpoint(TRUE, FALSE, TRUE);
if (buf_get_modified_ratio_pct() > srv_max_buf_pool_modified_pct) {
@@ -3857,9 +3874,9 @@ srv_purge_thread(
srv_sync_log_buffer_in_background();
cur_time = ut_time_ms();
- os_event_reset(srv_purge_thread_event);
+ os_event_reset(srv_shutdown_event);
if (next_itr_time > cur_time) {
- os_event_wait_time(srv_purge_thread_event,
+ os_event_wait_time(srv_shutdown_event,
ut_min(1000000,
(next_itr_time - cur_time)
* 1000));
diff --git a/storage/xtradb/srv/srv0start.c b/storage/xtradb/srv/srv0start.c
index 71364cb5ded..4df3b158019 100644
--- a/storage/xtradb/srv/srv0start.c
+++ b/storage/xtradb/srv/srv0start.c
@@ -1304,6 +1304,7 @@ innobase_start_or_create_for_mysql(void)
case OS_WIN95:
case OS_WIN31:
case OS_WINNT:
+ srv_use_native_conditions = FALSE;
/* On Win 95, 98, ME, Win32 subsystem for Windows 3.1,
and NT use simulated aio. In NT Windows provides async i/o,
but when run in conjunction with InnoDB Hot Backup, it seemed
@@ -1314,9 +1315,10 @@ innobase_start_or_create_for_mysql(void)
case OS_WIN2000:
case OS_WINXP:
- /* On 2000 and XP, async IO is available. */
+ /* On 2000 and XP, async IO is available, but no condition variables. */
srv_use_native_aio = TRUE;
- break;
+ srv_use_native_conditions = FALSE;
+ break;
default:
/* Vista and later have both async IO and condition variables */
@@ -1346,7 +1348,6 @@ innobase_start_or_create_for_mysql(void)
srv_unix_file_flush_method = SRV_UNIX_FSYNC;
srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED;
-#ifndef __WIN__
} else if (0 == ut_strcmp(srv_file_flush_method_str, "fsync")) {
srv_unix_file_flush_method = SRV_UNIX_FSYNC;
@@ -1364,7 +1365,7 @@ innobase_start_or_create_for_mysql(void)
} else if (0 == ut_strcmp(srv_file_flush_method_str, "nosync")) {
srv_unix_file_flush_method = SRV_UNIX_NOSYNC;
-#else
+#ifdef _WIN32
} else if (0 == ut_strcmp(srv_file_flush_method_str, "normal")) {
srv_win_file_flush_method = SRV_WIN_IO_NORMAL;
srv_use_native_aio = FALSE;
@@ -1376,6 +1377,7 @@ innobase_start_or_create_for_mysql(void)
} else if (0 == ut_strcmp(srv_file_flush_method_str,
"async_unbuffered")) {
srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED;
+ os_aio_use_native_aio = TRUE;
#endif
} else {
ut_print_timestamp(stderr);
diff --git a/storage/xtradb/trx/trx0sys.c b/storage/xtradb/trx/trx0sys.c
index 548f383742f..e0663ca5f87 100644
--- a/storage/xtradb/trx/trx0sys.c
+++ b/storage/xtradb/trx/trx0sys.c
@@ -1709,7 +1709,7 @@ trx_sys_print_mysql_binlog_offset_from_page(
/* THESE ARE COPIED FROM NON-HOTBACKUP PART OF THE INNODB SOURCE TREE
- (This code duplicaton should be fixed at some point!)
+ (This code duplication should be fixed at some point!)
*/
#define TRX_SYS_SPACE 0 /* the SYSTEM tablespace */
diff --git a/storage/xtradb/trx/trx0trx.c b/storage/xtradb/trx/trx0trx.c
index 2145261c487..f816160df58 100644
--- a/storage/xtradb/trx/trx0trx.c
+++ b/storage/xtradb/trx/trx0trx.c
@@ -110,7 +110,7 @@ trx_create(
trx->conc_state = TRX_NOT_STARTED;
trx->is_registered = 0;
- trx->owns_prepare_mutex = 0;
+ trx->active_commit_ordered = 0;
trx->start_time = ut_time();
diff --git a/storage/xtradb/ut/ut0ut.c b/storage/xtradb/ut/ut0ut.c
index a9c0d381e16..c14be50c41c 100644
--- a/storage/xtradb/ut/ut0ut.c
+++ b/storage/xtradb/ut/ut0ut.c
@@ -546,7 +546,7 @@ ut_print_namel(
trx ? trx->mysql_thd : NULL,
table_id);
- fwrite(buf, 1, bufend - buf, f);
+ (void) fwrite(buf, 1, bufend - buf, f);
}
/**********************************************************************//**
@@ -567,7 +567,7 @@ ut_copy_file(
? (size_t) len
: sizeof buf;
size_t size = fread(buf, 1, maxs, src);
- fwrite(buf, 1, size, dest);
+ (void) fwrite(buf, 1, size, dest);
len -= (long) size;
if (size < maxs) {
break;
@@ -716,6 +716,8 @@ ut_strerr(
return("No index on referenced keys in referenced table");
case DB_END_OF_INDEX:
return("End of index");
+ case DB_SEARCH_ABORTED_BY_USER:
+ return("Operation was interrupted by end user");
/* do not add default: in order to produce a warning if new code
is added to the enum but not added here */
}