diff options
Diffstat (limited to 'storage/maria')
82 files changed, 3379 insertions, 1760 deletions
diff --git a/storage/maria/CMakeLists.txt b/storage/maria/CMakeLists.txt index 545c8cb9318..3fbddaf4580 100644 --- a/storage/maria/CMakeLists.txt +++ b/storage/maria/CMakeLists.txt @@ -35,14 +35,14 @@ SET(ARIA_SOURCES ma_init.c ma_open.c ma_extra.c ma_info.c ma_rkey.c ha_maria.cc trnman.c lockman.c ma_rt_index.c ma_rt_key.c ma_rt_mbr.c ma_rt_split.c ma_sp_key.c ma_control_file.c ma_loghandler.c - ma_pagecache.c ma_pagecaches.c compat_aliases.cc compat_aliases.h + ma_pagecache.c ma_pagecaches.c ma_checkpoint.c ma_recovery.c ma_commit.c ma_pagecrc.c ha_maria.h maria_def.h ma_recovery_util.c ma_servicethread.c + ma_norec.c ) MYSQL_ADD_PLUGIN(aria ${ARIA_SOURCES} STORAGE_ENGINE - MANDATORY RECOMPILE_FOR_EMBEDDED) TARGET_LINK_LIBRARIES(aria myisam) @@ -56,7 +56,7 @@ TARGET_LINK_LIBRARIES(aria_chk aria) MYSQL_ADD_EXECUTABLE(aria_read_log maria_read_log.c) TARGET_LINK_LIBRARIES(aria_read_log aria) -MYSQL_ADD_EXECUTABLE(aria_dump_log ma_loghandler.c unittest/ma_loghandler_examples.c) +MYSQL_ADD_EXECUTABLE(aria_dump_log maria_dump_log.c unittest/ma_loghandler_examples.c) TARGET_LINK_LIBRARIES(aria_dump_log aria) SET_TARGET_PROPERTIES(aria_dump_log PROPERTIES COMPILE_FLAGS "-DMARIA_DUMP_LOG") diff --git a/storage/maria/compat_aliases.cc b/storage/maria/compat_aliases.cc deleted file mode 100644 index 2d3c67d69a7..00000000000 --- a/storage/maria/compat_aliases.cc +++ /dev/null @@ -1,245 +0,0 @@ -/* Copyright (C) 2010 Monty Program Ab - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; version 2 of the License. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ - -/* - compatibility aliases for system and static variables -*/ -#include <my_global.h> -#include <maria.h> -#include <mysql/plugin.h> -#include "ma_loghandler.h" -#include "compat_aliases.h" - -ulong block_size_alias; -static MYSQL_SYSVAR_ULONG(block_size, block_size_alias, - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "Deprecated, use --aria-block-size instead", 0, 0, - MARIA_KEY_BLOCK_LENGTH, MARIA_MIN_KEY_BLOCK_LENGTH, - MARIA_MAX_KEY_BLOCK_LENGTH, MARIA_MIN_KEY_BLOCK_LENGTH); - -ulong checkpoint_interval_alias; -static MYSQL_SYSVAR_ULONG(checkpoint_interval, checkpoint_interval_alias, - PLUGIN_VAR_RQCMDARG, - "Deprecated, use --aria-checkpoint-interval instead", - NULL, NULL, 30, 0, UINT_MAX, 1); - -ulong force_start_after_recovery_failures_alias; -static MYSQL_SYSVAR_ULONG(force_start_after_recovery_failures, force_start_after_recovery_failures_alias, - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "Deprecated, use --aria-force-start-after-recovery-failures instead", - NULL, NULL, 0, 0, UINT_MAX8, 1); - -my_bool page_checksum_alias; -static MYSQL_SYSVAR_BOOL(page_checksum, page_checksum_alias, 0, - "Deprecated, use --aria-page-checksum instead", 0, 0, 1); - -char *log_dir_path_alias; -static MYSQL_SYSVAR_STR(log_dir_path, log_dir_path_alias, - PLUGIN_VAR_NOSYSVAR | PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "Deprecated, use --aria-log-dir-path instead", - NULL, NULL, mysql_real_data_home); - -ulong log_file_size_alias; -static MYSQL_SYSVAR_ULONG(log_file_size, log_file_size_alias, - PLUGIN_VAR_RQCMDARG, - "Deprecated, use --aria-log-file-size instead", - NULL, NULL, TRANSLOG_FILE_SIZE, - TRANSLOG_MIN_FILE_SIZE, 0xffffffffL, TRANSLOG_PAGE_SIZE); - -ulong group_commit_alias; -static MYSQL_SYSVAR_ENUM(group_commit, group_commit_alias, - PLUGIN_VAR_RQCMDARG, - "Deprecated, use --aria-group-commit instead", - NULL, NULL, - TRANSLOG_GCOMMIT_NONE, &maria_group_commit_typelib); - -ulong group_commit_interval_alias; -static MYSQL_SYSVAR_ULONG(group_commit_interval, group_commit_interval_alias, - PLUGIN_VAR_RQCMDARG, - "Deprecated, use --aria-group-commit-interval instead", - NULL, NULL, 0, 0, UINT_MAX, 1); - -ulong log_purge_type_alias; -static MYSQL_SYSVAR_ENUM(log_purge_type, log_purge_type_alias, - PLUGIN_VAR_RQCMDARG, - "Deprecated, use --aria-log-purge-type instead", - NULL, NULL, TRANSLOG_PURGE_IMMIDIATE, - &maria_translog_purge_type_typelib); - -ulonglong max_sort_file_size_alias; -static MYSQL_SYSVAR_ULONGLONG(max_sort_file_size, max_sort_file_size_alias, - PLUGIN_VAR_RQCMDARG, - "Deprecated, use --aria-max-temp-length instead", - 0, 0, MAX_FILE_SIZE, 0, MAX_FILE_SIZE, 1024*1024); - -ulong pagecache_age_threshold_alias; -static MYSQL_SYSVAR_ULONG(pagecache_age_threshold, pagecache_age_threshold_alias, - PLUGIN_VAR_RQCMDARG, - "Deprecated, use --aria-pagecache-age-threshold instead", - 0, 0, 300, 100, ~0L, 100); - -ulonglong pagecache_buffer_size_alias; -static MYSQL_SYSVAR_ULONGLONG(pagecache_buffer_size, pagecache_buffer_size_alias, - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "Deprecated, use --aria-pagecache-buffer-size instead", - 0, 0, KEY_CACHE_SIZE, MALLOC_OVERHEAD, ~0UL, IO_SIZE); - -ulong pagecache_division_limit_alias; -static MYSQL_SYSVAR_ULONG(pagecache_division_limit, pagecache_division_limit_alias, - PLUGIN_VAR_RQCMDARG, - "Deprecated, use --aria-pagecache-division-limit instead", - 0, 0, 100, 1, 100, 1); - -ulong recover_alias; -static MYSQL_SYSVAR_ENUM(recover, recover_alias, PLUGIN_VAR_OPCMDARG, - "Deprecated, use --aria-recover instead", - NULL, NULL, HA_RECOVER_DEFAULT, &maria_recover_typelib); - -ulong repair_threads_alias; -static MYSQL_THDVAR_ULONG(repair_threads, PLUGIN_VAR_RQCMDARG, - "Deprecated, use --aria-repair-threads instead", - 0, 0, 1, 1, ~0L, 1); - -ulong sort_buffer_size_alias; -static MYSQL_THDVAR_ULONG(sort_buffer_size, PLUGIN_VAR_RQCMDARG, - "Deprecated, use --aria-sort-buffer-size instead", - 0, 0, 128L*1024L*1024L, 4, ~0L, 1); - -ulong stats_method_alias; -static MYSQL_THDVAR_ENUM(stats_method, PLUGIN_VAR_RQCMDARG, - "Deprecated, use --aria-stats-method instead", - 0, 0, 0, &maria_stats_method_typelib); - -ulong sync_log_dir_alias; -static MYSQL_SYSVAR_ENUM(sync_log_dir, sync_log_dir_alias, - PLUGIN_VAR_RQCMDARG, - "Deprecated, use --aria-sync-log-dir instead", - NULL, NULL, TRANSLOG_SYNC_DIR_NEWFILE, - &maria_sync_log_dir_typelib); - -my_bool used_for_temp_tables_alias= 1; -static MYSQL_SYSVAR_BOOL(used_for_temp_tables, - used_for_temp_tables_alias, PLUGIN_VAR_READONLY | PLUGIN_VAR_NOCMDOPT, - NULL, 0, 0, 1); - -static struct st_mysql_show_var status_variables_aliases[]= { - {"Maria", (char*) &status_variables, SHOW_ARRAY}, - {NullS, NullS, SHOW_LONG} -}; - -/* - There is one problem with aliases for command-line options. - Plugin initialization works like this - - for all plugins: - prepare command-line options - initialize command-line option variables to the default values - parse command line, assign values as necessary - - for all plugins: - call the plugin initialization function - - it means, we cannot have maria* and aria* command-line options to use - the same underlying variables - because after assigning maria* values, - MySQL will put there default values again preparing for parsing aria* - values. So, maria* values will be lost. - - So, we create separate set of variables for maria* options, - and take both values into account in ha_maria_init(). - - When the command line was parsed, we patch maria* options - to use the same variables as aria* options so that - set @@maria_some_var would have the same value as @@aria_some_var - without forcing us to copy the values around all the time. -*/ - -static struct st_mysql_sys_var* system_variables_aliases[]= { - MYSQL_SYSVAR(block_size), - MYSQL_SYSVAR(checkpoint_interval), - MYSQL_SYSVAR(force_start_after_recovery_failures), - MYSQL_SYSVAR(group_commit), - MYSQL_SYSVAR(group_commit_interval), - MYSQL_SYSVAR(log_dir_path), - MYSQL_SYSVAR(log_file_size), - MYSQL_SYSVAR(log_purge_type), - MYSQL_SYSVAR(max_sort_file_size), - MYSQL_SYSVAR(page_checksum), - MYSQL_SYSVAR(pagecache_age_threshold), - MYSQL_SYSVAR(pagecache_buffer_size), - MYSQL_SYSVAR(pagecache_division_limit), - MYSQL_SYSVAR(recover), - MYSQL_SYSVAR(repair_threads), - MYSQL_SYSVAR(sort_buffer_size), - MYSQL_SYSVAR(stats_method), - MYSQL_SYSVAR(sync_log_dir), - MYSQL_SYSVAR(used_for_temp_tables), - NULL -}; - -#define COPY_SYSVAR(name) \ - memcpy(&MYSQL_SYSVAR_NAME(name), system_variables[i++], \ - sizeof(MYSQL_SYSVAR_NAME(name))); \ - if (name ## _alias != MYSQL_SYSVAR_NAME(name).def_val && \ - *MYSQL_SYSVAR_NAME(name).value == MYSQL_SYSVAR_NAME(name).def_val) \ - *MYSQL_SYSVAR_NAME(name).value= name ## _alias; - -#define COPY_THDVAR(name) \ - name ## _alias= THDVAR(0, name); \ - memcpy(&MYSQL_SYSVAR_NAME(name), system_variables[i++], \ - sizeof(MYSQL_SYSVAR_NAME(name))); \ - if (name ## _alias != MYSQL_SYSVAR_NAME(name).def_val && \ - THDVAR(0, name) == MYSQL_SYSVAR_NAME(name).def_val) \ - THDVAR(0, name)= name ## _alias; - -void copy_variable_aliases() -{ - int i= 0; - COPY_SYSVAR(block_size); - COPY_SYSVAR(checkpoint_interval); - COPY_SYSVAR(force_start_after_recovery_failures); - COPY_SYSVAR(group_commit); - COPY_SYSVAR(group_commit_interval); - COPY_SYSVAR(log_dir_path); - COPY_SYSVAR(log_file_size); - COPY_SYSVAR(log_purge_type); - COPY_SYSVAR(max_sort_file_size); - COPY_SYSVAR(page_checksum); - COPY_SYSVAR(pagecache_age_threshold); - COPY_SYSVAR(pagecache_buffer_size); - COPY_SYSVAR(pagecache_division_limit); - COPY_SYSVAR(recover); - COPY_THDVAR(repair_threads); - COPY_THDVAR(sort_buffer_size); - COPY_THDVAR(stats_method); - COPY_SYSVAR(sync_log_dir); - COPY_SYSVAR(used_for_temp_tables); -} - -struct st_maria_plugin compat_aliases= { - MYSQL_DAEMON_PLUGIN, - &maria_storage_engine, - "Maria", - "Monty Program Ab", - "Compatibility aliases for the Aria engine", - PLUGIN_LICENSE_GPL, - NULL, - NULL, - 0x0105, - status_variables_aliases, - system_variables_aliases, - "1.5", - MariaDB_PLUGIN_MATURITY_GAMMA -}; - diff --git a/storage/maria/compat_aliases.h b/storage/maria/compat_aliases.h deleted file mode 100644 index 46a4da74eec..00000000000 --- a/storage/maria/compat_aliases.h +++ /dev/null @@ -1,27 +0,0 @@ -/* Copyright (C) 2010 Monty Program Ab - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; version 2 of the License. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ - -extern struct st_maria_plugin compat_aliases; -extern char mysql_real_data_home[FN_REFLEN]; -extern TYPELIB maria_recover_typelib; -extern TYPELIB maria_stats_method_typelib; -extern TYPELIB maria_translog_purge_type_typelib; -extern TYPELIB maria_sync_log_dir_typelib; -extern TYPELIB maria_group_commit_typelib; -extern struct st_mysql_storage_engine maria_storage_engine; -extern my_bool use_maria_for_temp_tables; -extern struct st_mysql_sys_var* system_variables[]; -extern st_mysql_show_var status_variables[]; -void copy_variable_aliases(); diff --git a/storage/maria/ha_maria.cc b/storage/maria/ha_maria.cc index b582ad6577e..50b7bf9f5d1 100644 --- a/storage/maria/ha_maria.cc +++ b/storage/maria/ha_maria.cc @@ -28,7 +28,6 @@ #include "ha_maria.h" #include "trnman_public.h" #include "trnman.h" -#include "compat_aliases.h" C_MODE_START #include "maria_def.h" @@ -219,7 +218,8 @@ static MYSQL_SYSVAR_ULONGLONG(max_sort_file_size, maria_max_temp_length, PLUGIN_VAR_RQCMDARG, "Don't use the fast sort index method to created index if the " "temporary file would get bigger than this.", - 0, 0, MAX_FILE_SIZE & ~(1*MB-1), 0, MAX_FILE_SIZE, 1*MB); + 0, 0, MAX_FILE_SIZE & ~((ulonglong) (1*MB-1)), + 0, MAX_FILE_SIZE, 1*MB); static MYSQL_SYSVAR_ULONG(pagecache_age_threshold, pagecache_age_threshold, PLUGIN_VAR_RQCMDARG, @@ -234,7 +234,7 @@ static MYSQL_SYSVAR_ULONGLONG(pagecache_buffer_size, pagecache_buffer_size, "The size of the buffer used for index blocks for Aria tables. " "Increase this to get better index handling (for all reads and " "multiple writes) to as much as you can afford.", 0, 0, - KEY_CACHE_SIZE, 0, ~(ulong) 0, 1); + KEY_CACHE_SIZE, 8192*16L, ~(ulong) 0, 1); static MYSQL_SYSVAR_ULONG(pagecache_division_limit, pagecache_division_limit, PLUGIN_VAR_RQCMDARG, @@ -593,6 +593,8 @@ static int table2maria(TABLE *table_arg, data_file_type row_type, if (found->flags & BLOB_FLAG) recinfo_pos->type= FIELD_BLOB; + else if (found->type() == MYSQL_TYPE_TIMESTAMP) + recinfo_pos->type= FIELD_NORMAL; else if (found->type() == MYSQL_TYPE_VARCHAR) recinfo_pos->type= FIELD_VARCHAR; else if (!(options & HA_OPTION_PACK_RECORD) || @@ -802,6 +804,34 @@ int _ma_killed_ptr(HA_CHECK *param) } +/* + Report progress to mysqld + + This is a bit more complex than what a normal progress report + function normally is. + + The reason is that this is called by enable_index/repair which + is one stage in ALTER TABLE and we can't use the external + stage/max_stage for this. + + thd_progress_init/thd_progress_next_stage is to be called by + high level commands like CHECK TABLE or REPAIR TABLE, not + by sub commands like enable_index(). + + In ma_check.c it's easier to work with stages than with a total + progress, so we use internal stage/max_stage here to keep the + code simple. +*/ + +void _ma_report_progress(HA_CHECK *param, ulonglong progress, + ulonglong max_progress) +{ + thd_progress_report((THD*)param->thd, + progress + max_progress * param->stage, + max_progress * param->max_stage); +} + + void _ma_check_print_error(HA_CHECK *param, const char *fmt, ...) { va_list args; @@ -851,7 +881,7 @@ void _ma_check_print_warning(HA_CHECK *param, const char *fmt, ...) static int maria_create_trn_for_mysql(MARIA_HA *info) { - THD *thd= (THD*) info->external_ptr; + THD *thd= ((TABLE*) info->external_ref)->in_use; TRN *trn= THD_TRN; DBUG_ENTER("maria_create_trn_for_mysql"); @@ -890,6 +920,11 @@ static int maria_create_trn_for_mysql(MARIA_HA *info) DBUG_RETURN(0); } +my_bool ma_killed_in_mariadb(MARIA_HA *info) +{ + return (((TABLE*) (info->external_ref))->in_use->killed != 0); +} + } /* extern "C" */ /** @@ -915,6 +950,7 @@ int_table_flags(HA_NULL_IN_KEY | HA_CAN_FULLTEXT | HA_CAN_SQL_HANDLER | HA_DUPLICATE_POS | HA_CAN_INDEX_BLOBS | HA_AUTO_PART_KEY | HA_FILE_BASED | HA_CAN_GEOMETRY | CANNOT_ROLLBACK_FLAG | HA_CAN_BIT_FIELD | HA_CAN_RTREEKEYS | HA_CAN_REPAIR | + HA_CAN_VIRTUAL_COLUMNS | HA_HAS_RECORDS | HA_STATS_RECORDS_IS_EXACT), can_enable_indexes(1), bulk_insert_single_undo(BULK_INSERT_NONE) {} @@ -967,7 +1003,7 @@ double ha_maria::scan_time() } /* - We need to be able to store at least two keys on an index page as the + We need to be able to store at least 2 keys on an index page as the splitting algorithms depends on this. (With only one key on a page we also can't use any compression, which may make the index file much larger) @@ -1107,6 +1143,8 @@ int ha_maria::open(const char *name, int mode, uint test_if_locked) return (my_errno ? my_errno : -1); file->s->chst_invalidator= query_cache_invalidate_by_MyISAM_filename_ref; + /* Set external_ref, mainly for temporary tables */ + file->external_ref= (void*) table; // For ma_killed() if (test_if_locked & (HA_OPEN_IGNORE_IF_LOCKED | HA_OPEN_TMP_TABLE)) maria_extra(file, HA_EXTRA_NO_WAIT_LOCK, 0); @@ -1130,6 +1168,16 @@ int ha_maria::open(const char *name, int mode, uint test_if_locked) if (file->s->options & (HA_OPTION_CHECKSUM | HA_OPTION_COMPRESS_RECORD)) int_table_flags |= HA_HAS_NEW_CHECKSUM; + /* + For static size rows, tell MariaDB that we will access all bytes + in the record when writing it. This signals MariaDB to initalize + the full row to ensure we don't get any errors from valgrind and + that all bytes in the row is properly reset. + */ + if (file->s->data_file_type == STATIC_RECORD && + (file->s->has_varchar_fields | file->s->has_null_fields)) + int_table_flags|= HA_RECORD_MUST_BE_CLEAN_ON_WRITE; + for (i= 0; i < table->s->keys; i++) { plugin_ref parser= table->key_info[i].parser; @@ -1155,8 +1203,6 @@ int ha_maria::close(void) int ha_maria::write_row(uchar * buf) { - ha_statistic_increment(&SSV::ha_write_count); - /* If we have a timestamp column, update it to the current time */ if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_INSERT) table->timestamp_field->set_time(); @@ -1180,7 +1226,7 @@ int ha_maria::check(THD * thd, HA_CHECK_OPT * check_opt) int error; HA_CHECK ¶m= *(HA_CHECK*) thd->alloc(sizeof(param)); MARIA_SHARE *share= file->s; - const char *old_proc_info= thd_proc_info(thd, "Checking table"); + const char *old_proc_info; TRN *old_trn= file->trn; if (!file || !¶m) return HA_ADMIN_INTERNAL_ERROR; @@ -1189,7 +1235,7 @@ int ha_maria::check(THD * thd, HA_CHECK_OPT * check_opt) param.thd= thd; param.op_name= "check"; param.db_name= table->s->db.str; - param.table_name= table->alias; + param.table_name= table->alias.c_ptr(); param.testflag= check_opt->flags | T_CHECK | T_SILENT; param.stats_method= (enum_handler_stats_method)THDVAR(thd,stats_method); @@ -1199,8 +1245,7 @@ int ha_maria::check(THD * thd, HA_CHECK_OPT * check_opt) if (!maria_is_crashed(file) && (((param.testflag & T_CHECK_ONLY_CHANGED) && - !(share->state.changed & (STATE_CHANGED | STATE_CRASHED | - STATE_CRASHED_ON_REPAIR | + !(share->state.changed & (STATE_CHANGED | STATE_CRASHED_FLAGS | STATE_IN_REPAIR)) && share->state.open_count == 0) || ((param.testflag & T_FAST) && (share->state.open_count == @@ -1209,12 +1254,18 @@ int ha_maria::check(THD * thd, HA_CHECK_OPT * check_opt) return HA_ADMIN_ALREADY_DONE; maria_chk_init_for_check(¶m, file); + old_proc_info= thd_proc_info(thd, "Checking status"); + thd_progress_init(thd, 3); (void) maria_chk_status(¶m, file); // Not fatal error= maria_chk_size(¶m, file); if (!error) error|= maria_chk_del(¶m, file, param.testflag); + thd_proc_info(thd, "Checking keys"); + thd_progress_next_stage(thd); if (!error) error= maria_chk_key(¶m, file); + thd_proc_info(thd, "Checking data"); + thd_progress_next_stage(thd); if (!error) { if ((!(param.testflag & T_QUICK) && @@ -1238,15 +1289,15 @@ int ha_maria::check(THD * thd, HA_CHECK_OPT * check_opt) if (!error) { if ((share->state.changed & (STATE_CHANGED | - STATE_CRASHED_ON_REPAIR | STATE_IN_REPAIR | - STATE_CRASHED | STATE_NOT_ANALYZED)) || + STATE_CRASHED_FLAGS | + STATE_IN_REPAIR | STATE_NOT_ANALYZED)) || (param.testflag & T_STATISTICS) || maria_is_crashed(file)) { file->update |= HA_STATE_CHANGED | HA_STATE_ROW_CHANGED; mysql_mutex_lock(&share->intern_lock); DBUG_PRINT("info", ("Reseting crashed state")); - share->state.changed&= ~(STATE_CHANGED | STATE_CRASHED | - STATE_CRASHED_ON_REPAIR | STATE_IN_REPAIR); + share->state.changed&= ~(STATE_CHANGED | STATE_CRASHED_FLAGS | + STATE_IN_REPAIR); if (!(table->db_stat & HA_READ_ONLY)) error= maria_update_state_info(¶m, file, UPDATE_TIME | UPDATE_OPEN_COUNT | @@ -1265,6 +1316,7 @@ int ha_maria::check(THD * thd, HA_CHECK_OPT * check_opt) /* Reset trn, that may have been set by repair */ _ma_set_trn_for_table(file, old_trn); thd_proc_info(thd, old_proc_info); + thd_progress_end(thd); return error ? HA_ADMIN_CORRUPT : HA_ADMIN_OK; } @@ -1280,6 +1332,7 @@ int ha_maria::analyze(THD *thd, HA_CHECK_OPT * check_opt) int error= 0; HA_CHECK ¶m= *(HA_CHECK*) thd->alloc(sizeof(param)); MARIA_SHARE *share= file->s; + const char *old_proc_info; if (!¶m) return HA_ADMIN_INTERNAL_ERROR; @@ -1288,7 +1341,7 @@ int ha_maria::analyze(THD *thd, HA_CHECK_OPT * check_opt) param.thd= thd; param.op_name= "analyze"; param.db_name= table->s->db.str; - param.table_name= table->alias; + param.table_name= table->alias.c_ptr(); param.testflag= (T_FAST | T_CHECK | T_SILENT | T_STATISTICS | T_DONT_CHECK_CHECKSUM); param.using_global_keycache= 1; @@ -1297,6 +1350,8 @@ int ha_maria::analyze(THD *thd, HA_CHECK_OPT * check_opt) if (!(share->state.changed & STATE_NOT_ANALYZED)) return HA_ADMIN_ALREADY_DONE; + old_proc_info= thd_proc_info(thd, "Scanning"); + thd_progress_init(thd, 1); error= maria_chk_key(¶m, file); if (!error) { @@ -1306,6 +1361,8 @@ int ha_maria::analyze(THD *thd, HA_CHECK_OPT * check_opt) } else if (!maria_is_crashed(file) && !thd->killed) maria_mark_crashed(file); + thd_proc_info(thd, old_proc_info); + thd_progress_end(thd); return error ? HA_ADMIN_CORRUPT : HA_ADMIN_OK; } @@ -1314,6 +1371,7 @@ int ha_maria::repair(THD * thd, HA_CHECK_OPT *check_opt) int error; HA_CHECK ¶m= *(HA_CHECK*) thd->alloc(sizeof(param)); ha_rows start_records; + const char *old_proc_info; if (!file || !¶m) return HA_ADMIN_INTERNAL_ERROR; @@ -1325,7 +1383,10 @@ int ha_maria::repair(THD * thd, HA_CHECK_OPT *check_opt) T_SILENT | T_FORCE_CREATE | T_CALC_CHECKSUM | (check_opt->flags & T_EXTEND ? T_REP : T_REP_BY_SORT)); param.sort_buffer_length= THDVAR(thd, sort_buffer_size); + param.backup_time= check_opt->start_time; start_records= file->state->records; + old_proc_info= thd_proc_info(thd, "Checking table"); + thd_progress_init(thd, 1); while ((error= repair(thd, ¶m, 0)) && param.retry_repair) { param.retry_repair= 0; @@ -1361,6 +1422,8 @@ int ha_maria::repair(THD * thd, HA_CHECK_OPT *check_opt) llstr(start_records, llbuff2), table->s->path.str); } + thd_proc_info(thd, old_proc_info); + thd_progress_end(thd); return error; } @@ -1408,14 +1471,15 @@ int ha_maria::optimize(THD * thd, HA_CHECK_OPT *check_opt) param.testflag= (check_opt->flags | T_SILENT | T_FORCE_CREATE | T_REP_BY_SORT | T_STATISTICS | T_SORT_INDEX); param.sort_buffer_length= THDVAR(thd, sort_buffer_size); + thd_progress_init(thd, 1); if ((error= repair(thd, ¶m, 1)) && param.retry_repair) { sql_print_warning("Warning: Optimize table got errno %d on %s.%s, retrying", my_errno, param.db_name, param.table_name); param.testflag &= ~T_REP_BY_SORT; - error= repair(thd, ¶m, 1); + error= repair(thd, ¶m, 0); } - + thd_progress_end(thd); return error; } @@ -1457,7 +1521,7 @@ int ha_maria::repair(THD *thd, HA_CHECK *param, bool do_optimize) _ma_copy_nontrans_state_information(file); param->db_name= table->s->db.str; - param->table_name= table->alias; + param->table_name= table->alias.c_ptr(); param->tmpfile_createflag= O_RDWR | O_TRUNC; param->using_global_keycache= 1; param->thd= thd; @@ -1552,8 +1616,8 @@ int ha_maria::repair(THD *thd, HA_CHECK *param, bool do_optimize) if ((share->state.changed & STATE_CHANGED) || maria_is_crashed(file)) { DBUG_PRINT("info", ("Reseting crashed state")); - share->state.changed&= ~(STATE_CHANGED | STATE_CRASHED | - STATE_CRASHED_ON_REPAIR | STATE_IN_REPAIR); + share->state.changed&= ~(STATE_CHANGED | STATE_CRASHED_FLAGS | + STATE_IN_REPAIR); file->update |= HA_STATE_CHANGED | HA_STATE_ROW_CHANGED; } /* @@ -1577,7 +1641,7 @@ int ha_maria::repair(THD *thd, HA_CHECK *param, bool do_optimize) llstr(rows, llbuff), llstr(file->state->records, llbuff2)); /* Abort if warning was converted to error */ - if (current_thd->is_error()) + if (table->in_use->is_error()) error= 1; } } @@ -1589,6 +1653,7 @@ int ha_maria::repair(THD *thd, HA_CHECK *param, bool do_optimize) } mysql_mutex_unlock(&share->intern_lock); thd_proc_info(thd, old_proc_info); + thd_progress_end(thd); // Mark done if (!thd->locked_tables_mode) maria_lock_database(file, F_UNLCK); @@ -1812,7 +1877,7 @@ int ha_maria::enable_indexes(uint mode) } else if (mode == HA_KEY_SWITCH_NONUNIQ_SAVE) { - THD *thd= current_thd; + THD *thd= table->in_use; HA_CHECK ¶m= *(HA_CHECK*) thd->alloc(sizeof(param)); if (!¶m) return HA_ADMIN_INTERNAL_ERROR; @@ -1914,16 +1979,28 @@ int ha_maria::indexes_are_disabled(void) void ha_maria::start_bulk_insert(ha_rows rows) { DBUG_ENTER("ha_maria::start_bulk_insert"); - THD *thd= current_thd; - ulong size= min(thd->variables.read_buff_size, - (ulong) (table->s->avg_row_length * rows)); + THD *thd= table->in_use; MARIA_SHARE *share= file->s; - DBUG_PRINT("info", ("start_bulk_insert: rows %lu size %lu", - (ulong) rows, size)); + DBUG_PRINT("info", ("start_bulk_insert: rows %lu", (ulong) rows)); /* don't enable row cache if too few rows */ if (!rows || (rows > MARIA_MIN_ROWS_TO_USE_WRITE_CACHE)) - maria_extra(file, HA_EXTRA_WRITE_CACHE, (void*) &size); + { + ulonglong size= thd->variables.read_buff_size, tmp; + if (rows) + { + if (file->state->records) + { + MARIA_INFO maria_info; + maria_status(file, &maria_info, HA_STATUS_NO_LOCK |HA_STATUS_VARIABLE); + set_if_smaller(size, maria_info.mean_reclength * rows); + } + else if (table->s->avg_row_length) + set_if_smaller(size, (size_t) (table->s->avg_row_length * rows)); + } + tmp= (ulong) size; // Safe becasue of limits + maria_extra(file, HA_EXTRA_WRITE_CACHE, (void*) &tmp); + } can_enable_indexes= (maria_is_all_keys_active(share->state.key_map, share->base.keys)); @@ -1938,25 +2015,34 @@ void ha_maria::start_bulk_insert(ha_rows rows) we don't want to update the key statistics based of only a few rows. Index file rebuild requires an exclusive lock, so if versioning is on don't do it (see how ha_maria::store_lock() tries to predict repair). - We can repair index only if we have an exclusive (TL_WRITE) lock. To - see if table is empty, we shouldn't rely on the old records' count from - our transaction's start (if that old count is 0 but now there are - records in the table, we would wrongly destroy them). - So we need to look at share->state.state.records. - As a safety net for now, we don't remove the test of - file->state->records, because there is uncertainty on what will happen - during repair if the two states disagree. + We can repair index only if we have an exclusive (TL_WRITE) lock or + if this is inside an ALTER TABLE, in which case lock_type == TL_UNLOCK. + + To see if table is empty, we shouldn't rely on the old record + count from our transaction's start (if that old count is 0 but + now there are records in the table, we would wrongly destroy + them). So we need to look at share->state.state.records. As a + safety net for now, we don't remove the test of + file->state->records, because there is uncertainty on what will + happen during repair if the two states disagree. */ if ((file->state->records == 0) && (share->state.state.records == 0) && can_enable_indexes && (!rows || rows >= MARIA_MIN_ROWS_TO_DISABLE_INDEXES) && - (file->lock.type == TL_WRITE)) + (file->lock.type == TL_WRITE || file->lock.type == TL_UNLOCK)) { /** @todo for a single-row INSERT SELECT, we will go into repair, which is more costly (flushes, syncs) than a row write. */ - maria_disable_non_unique_index(file, rows); + if (file->open_flags & HA_OPEN_INTERNAL_TABLE) + { + /* Internal table; If we get a duplicate something is very wrong */ + file->update|= HA_STATE_CHANGED; + maria_clear_all_keys_active(file->s->state.key_map); + } + else + maria_disable_non_unique_index(file, rows); if (share->now_transactional) { bulk_insert_single_undo= BULK_INSERT_SINGLE_UNDO_AND_NO_REPAIR; @@ -2030,10 +2116,10 @@ bool ha_maria::check_and_repair(THD *thd) DBUG_ENTER("ha_maria::check_and_repair"); check_opt.init(); + check_opt.flags= T_MEDIUM | T_AUTO_REPAIR; error= 1; - if ((file->s->state.changed & - (STATE_CRASHED | STATE_CRASHED_ON_REPAIR | STATE_MOVED)) == + if ((file->s->state.changed & (STATE_CRASHED_FLAGS | STATE_MOVED)) == STATE_MOVED) { sql_print_information("Zerofilling moved table: '%s'", @@ -2050,7 +2136,6 @@ bool ha_maria::check_and_repair(THD *thd) DBUG_RETURN(error); error= 0; - check_opt.flags= T_MEDIUM | T_AUTO_REPAIR; // Don't use quick if deleted rows if (!file->state->del && (maria_recover_options & HA_RECOVER_QUICK)) check_opt.flags |= T_QUICK; @@ -2081,7 +2166,7 @@ bool ha_maria::check_and_repair(THD *thd) bool ha_maria::is_crashed() const { - return (file->s->state.changed & (STATE_CRASHED | STATE_MOVED) || + return (file->s->state.changed & (STATE_CRASHED_FLAGS | STATE_MOVED) || (my_disable_locking && file->s->state.open_count)); } @@ -2097,7 +2182,6 @@ bool ha_maria::is_crashed() const int ha_maria::update_row(const uchar * old_data, uchar * new_data) { CHECK_UNTIL_WE_FULLY_IMPLEMENTED_VERSIONING("UPDATE in WRITE CONCURRENT"); - ha_statistic_increment(&SSV::ha_update_count); if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_UPDATE) table->timestamp_field->set_time(); return maria_update(file, old_data, new_data); @@ -2107,7 +2191,6 @@ int ha_maria::update_row(const uchar * old_data, uchar * new_data) int ha_maria::delete_row(const uchar * buf) { CHECK_UNTIL_WE_FULLY_IMPLEMENTED_VERSIONING("DELETE in WRITE CONCURRENT"); - ha_statistic_increment(&SSV::ha_delete_count); return maria_delete(file, buf); } @@ -2131,7 +2214,6 @@ int ha_maria::index_read_map(uchar * buf, const uchar * key, enum ha_rkey_function find_flag) { DBUG_ASSERT(inited == INDEX); - ha_statistic_increment(&SSV::ha_read_key_count); int error= maria_rkey(file, buf, active_index, key, keypart_map, find_flag); table->status= error ? STATUS_NOT_FOUND : 0; return error; @@ -2142,8 +2224,15 @@ int ha_maria::index_read_idx_map(uchar * buf, uint index, const uchar * key, key_part_map keypart_map, enum ha_rkey_function find_flag) { - ha_statistic_increment(&SSV::ha_read_key_count); - int error= maria_rkey(file, buf, index, key, keypart_map, find_flag); + int error; + /* Use the pushed index condition if it matches the index we're scanning */ + end_range= NULL; + if (index == pushed_idx_cond_keyno) + ma_set_index_cond_func(file, index_cond_func_maria, this); + + error= maria_rkey(file, buf, index, key, keypart_map, find_flag); + + ma_set_index_cond_func(file, NULL, 0); table->status= error ? STATUS_NOT_FOUND : 0; return error; } @@ -2154,7 +2243,6 @@ int ha_maria::index_read_last_map(uchar * buf, const uchar * key, { DBUG_ENTER("ha_maria::index_read_last_map"); DBUG_ASSERT(inited == INDEX); - ha_statistic_increment(&SSV::ha_read_key_count); int error= maria_rkey(file, buf, active_index, key, keypart_map, HA_READ_PREFIX_LAST); table->status= error ? STATUS_NOT_FOUND : 0; @@ -2165,7 +2253,6 @@ int ha_maria::index_read_last_map(uchar * buf, const uchar * key, int ha_maria::index_next(uchar * buf) { DBUG_ASSERT(inited == INDEX); - ha_statistic_increment(&SSV::ha_read_next_count); int error= maria_rnext(file, buf, active_index); table->status= error ? STATUS_NOT_FOUND : 0; return error; @@ -2175,7 +2262,6 @@ int ha_maria::index_next(uchar * buf) int ha_maria::index_prev(uchar * buf) { DBUG_ASSERT(inited == INDEX); - ha_statistic_increment(&SSV::ha_read_prev_count); int error= maria_rprev(file, buf, active_index); table->status= error ? STATUS_NOT_FOUND : 0; return error; @@ -2185,7 +2271,6 @@ int ha_maria::index_prev(uchar * buf) int ha_maria::index_first(uchar * buf) { DBUG_ASSERT(inited == INDEX); - ha_statistic_increment(&SSV::ha_read_first_count); int error= maria_rfirst(file, buf, active_index); table->status= error ? STATUS_NOT_FOUND : 0; return error; @@ -2195,7 +2280,6 @@ int ha_maria::index_first(uchar * buf) int ha_maria::index_last(uchar * buf) { DBUG_ASSERT(inited == INDEX); - ha_statistic_increment(&SSV::ha_read_last_count); int error= maria_rlast(file, buf, active_index); table->status= error ? STATUS_NOT_FOUND : 0; return error; @@ -2208,7 +2292,6 @@ int ha_maria::index_next_same(uchar * buf, { int error; DBUG_ASSERT(inited == INDEX); - ha_statistic_increment(&SSV::ha_read_next_count); /* TODO: Delete this loop in Maria 1.5 as versioning will ensure this never happens @@ -2260,7 +2343,6 @@ int ha_maria::rnd_end() int ha_maria::rnd_next(uchar *buf) { - ha_statistic_increment(&SSV::ha_read_rnd_next_count); int error= maria_scan(file, buf); table->status= error ? STATUS_NOT_FOUND : 0; return error; @@ -2282,7 +2364,6 @@ int ha_maria::restart_rnd_next(uchar *buf) int ha_maria::rnd_pos(uchar *buf, uchar *pos) { - ha_statistic_increment(&SSV::ha_read_rnd_count); int error= maria_rrnd(file, buf, my_get_ptr(pos, ref_length)); table->status= error ? STATUS_NOT_FOUND : 0; return error; @@ -2401,6 +2482,7 @@ int ha_maria::reset(void) { pushed_idx_cond= NULL; pushed_idx_cond_keyno= MAX_KEY; + in_range_check_pushed_down= FALSE; ma_set_index_cond_func(file, NULL, 0); ds_mrr.dsmrr_close(); if (file->trn) @@ -2424,7 +2506,7 @@ int ha_maria::extra_opt(enum ha_extra_function operation, ulong cache_size) int ha_maria::delete_all_rows() { - THD *thd= current_thd; + THD *thd= table->in_use; (void) translog_log_debug_info(file->trn, LOGREC_DEBUG_INFO_QUERY, (uchar*) thd->query(), thd->query_length()); if (file->s->now_transactional && @@ -2454,14 +2536,16 @@ int ha_maria::delete_table(const char *name) void ha_maria::drop_table(const char *name) { - (void) close(); - (void) maria_delete_table(name); + DBUG_ASSERT(file->s->temporary); + (void) ha_close(); + (void) maria_delete_table_files(name, 0); } int ha_maria::external_lock(THD *thd, int lock_type) { DBUG_ENTER("ha_maria::external_lock"); + file->external_ref= (void*) table; // For ma_killed() /* We don't test now_transactional because it may vary between lock/unlock and thus confuse our reference counting. @@ -2480,8 +2564,6 @@ int ha_maria::external_lock(THD *thd, int lock_type) /* Transactional table */ if (lock_type != F_UNLCK) { - file->external_ptr= thd; // For maria_register_trn() - if (!file->s->lock_key_trees) // If we don't use versioning { /* @@ -2549,6 +2631,7 @@ int ha_maria::external_lock(THD *thd, int lock_type) { DBUG_PRINT("info", ("locked_tables: %u", trnman_has_locked_tables(trn))); + DBUG_ASSERT(trnman_has_locked_tables(trn) > 0); if (trnman_has_locked_tables(trn) && !trnman_decrement_locked_tables(trn)) { @@ -2678,12 +2761,12 @@ int ha_maria::implicit_commit(THD *thd, bool new_trn) statement assuming they have a trn (see ha_maria::start_stmt()). */ trn= trnman_new_trn(& thd->transaction.wt); - /* This is just a commit, tables stay locked if they were: */ - trnman_reset_locked_tables(trn, locked_tables); THD_TRN= trn; if (unlikely(trn == NULL)) + { error= HA_ERR_OUT_OF_MEM; - + goto end; + } /* Move all locked tables to the new transaction We must do it here as otherwise file->thd and file->state may be @@ -2708,6 +2791,8 @@ int ha_maria::implicit_commit(THD *thd, bool new_trn) } } } + /* This is just a commit, tables stay locked if they were: */ + trnman_reset_locked_tables(trn, locked_tables); } end: DBUG_RETURN(error); @@ -2844,7 +2929,7 @@ int ha_maria::create(const char *name, register TABLE *table_arg, ha_create_info->row_type != ROW_TYPE_PAGE && ha_create_info->row_type != ROW_TYPE_NOT_USED && ha_create_info->row_type != ROW_TYPE_DEFAULT) - push_warning(current_thd, MYSQL_ERROR::WARN_LEVEL_NOTE, + push_warning(thd, MYSQL_ERROR::WARN_LEVEL_NOTE, ER_ILLEGAL_HA_CREATE_OPTION, "Row format set to PAGE because of TRANSACTIONAL=1 option"); @@ -3112,6 +3197,14 @@ bool maria_flush_logs(handlerton *hton) } +int maria_checkpoint_state(handlerton *hton, bool disabled) +{ + maria_checkpoint_disabled= (my_bool) disabled; + return 0; +} + + + #define SHOW_MSG_LEN (FN_REFLEN + 20) /** @brief show status handler @@ -3291,7 +3384,6 @@ bool ha_maria::is_changed() const static int ha_maria_init(void *p) { int res; - copy_variable_aliases(); const char *log_dir= maria_data_root; #ifdef HAVE_PSI_INTERFACE @@ -3305,6 +3397,10 @@ static int ha_maria_init(void *p) maria_hton->panic= maria_hton_panic; maria_hton->commit= maria_commit; maria_hton->rollback= maria_rollback; + maria_hton->checkpoint_state= maria_checkpoint_state; +#ifdef MARIA_CANNOT_ROLLBACK + maria_hton->commit= 0; +#endif maria_hton->flush_logs= maria_flush_logs; maria_hton->show_status= maria_show_status; /* TODO: decide if we support Maria being used for log tables */ @@ -3329,6 +3425,8 @@ static int ha_maria_init(void *p) ma_checkpoint_init(checkpoint_interval); maria_multi_threaded= maria_in_ha_maria= TRUE; maria_create_trn_hook= maria_create_trn_for_mysql; + maria_pagecache->extra_debug= 1; + maria_assert_if_crashed_table= debug_assert_if_crashed_table; #if defined(HAVE_REALPATH) && !defined(HAVE_valgrind) && !defined(HAVE_BROKEN_REALPATH) /* We can only test for sub paths if my_symlink.c is using realpath */ @@ -3336,6 +3434,9 @@ static int ha_maria_init(void *p) #endif if (res) maria_hton= 0; + + ma_killed= ma_killed_in_mariadb; + return res ? HA_ERR_INITIALIZATION : 0; } @@ -3573,13 +3674,13 @@ static struct st_mysql_show_var aria_status_variables[]= { ***************************************************************************/ int ha_maria::multi_range_read_init(RANGE_SEQ_IF *seq, void *seq_init_param, - uint n_ranges, uint mode, - HANDLER_BUFFER *buf) + uint n_ranges, uint mode, + HANDLER_BUFFER *buf) { return ds_mrr.dsmrr_init(this, seq, seq_init_param, n_ranges, mode, buf); } -int ha_maria::multi_range_read_next(char **range_info) +int ha_maria::multi_range_read_next(range_id_t *range_info) { return ds_mrr.dsmrr_next(range_info); } @@ -3600,13 +3701,18 @@ ha_rows ha_maria::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq, } ha_rows ha_maria::multi_range_read_info(uint keyno, uint n_ranges, uint keys, - uint *bufsz, uint *flags, - COST_VECT *cost) + uint key_parts, uint *bufsz, + uint *flags, COST_VECT *cost) { ds_mrr.init(this, table); - return ds_mrr.dsmrr_info(keyno, n_ranges, keys, bufsz, flags, cost); + return ds_mrr.dsmrr_info(keyno, n_ranges, keys, key_parts, bufsz, flags, cost); } +int ha_maria::multi_range_read_explain_info(uint mrr_mode, char *str, + size_t size) +{ + return ds_mrr.dsmrr_explain_info(mrr_mode, str, size); +} /* MyISAM MRR implementation ends */ @@ -3630,7 +3736,6 @@ struct st_mysql_storage_engine maria_storage_engine= { MYSQL_HANDLERTON_INTERFACE_VERSION }; maria_declare_plugin(aria) -compat_aliases, { MYSQL_STORAGE_ENGINE_PLUGIN, &maria_storage_engine, diff --git a/storage/maria/ha_maria.h b/storage/maria/ha_maria.h index 53df1d2cfa6..39c23c8d1b6 100644 --- a/storage/maria/ha_maria.h +++ b/storage/maria/ha_maria.h @@ -158,7 +158,6 @@ public: int assign_to_keycache(THD * thd, HA_CHECK_OPT * check_opt); int preload_keys(THD * thd, HA_CHECK_OPT * check_opt); bool check_if_incompatible_data(HA_CREATE_INFO * info, uint table_changes); - bool check_if_supported_virtual_columns(void) { return TRUE;} #ifdef HAVE_REPLICATION int dump(THD * thd, int fd); int net_read_dump(NET * net); @@ -180,13 +179,15 @@ public: */ int multi_range_read_init(RANGE_SEQ_IF *seq, void *seq_init_param, uint n_ranges, uint mode, HANDLER_BUFFER *buf); - int multi_range_read_next(char **range_info); + int multi_range_read_next(range_id_t *range_info); ha_rows multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq, void *seq_init_param, uint n_ranges, uint *bufsz, uint *flags, COST_VECT *cost); ha_rows multi_range_read_info(uint keyno, uint n_ranges, uint keys, - uint *bufsz, uint *flags, COST_VECT *cost); + uint key_parts, uint *bufsz, + uint *flags, COST_VECT *cost); + int multi_range_read_explain_info(uint mrr_mode, char *str, size_t size); /* Index condition pushdown implementation */ Item *idx_cond_push(uint keyno, Item* idx_cond); diff --git a/storage/maria/lockman.c b/storage/maria/lockman.c index 56d2e261da4..ae9e83e982a 100644 --- a/storage/maria/lockman.c +++ b/storage/maria/lockman.c @@ -690,12 +690,12 @@ enum lockman_getlock_result lockman_getlock(LOCKMAN *lm, LOCK_OWNER *lo, } /* yuck. waiting */ - deadline= my_getsystime() + lm->lock_timeout * 10000; - set_timespec_nsec(timeout,lm->lock_timeout * 1000000); + deadline= my_hrtime().val*1000 + lm->lock_timeout * 1000000; + set_timespec_time_nsec(timeout, deadline); do { pthread_cond_timedwait(wait_for_lo->cond, wait_for_lo->mutex, &timeout); - } while (!DELETED(blocker->link) && my_getsystime() < deadline); + } while (!DELETED(blocker->link) && my_hrtime().val < deadline/1000); pthread_mutex_unlock(wait_for_lo->mutex); lf_rwlock_by_pins(pins); if (!DELETED(blocker->link)) diff --git a/storage/maria/ma_bitmap.c b/storage/maria/ma_bitmap.c index fb12ecdbe3b..c2cf7d32d48 100644 --- a/storage/maria/ma_bitmap.c +++ b/storage/maria/ma_bitmap.c @@ -104,10 +104,11 @@ - On checkpoint (Ie: When we do a checkpoint, we have to ensure that all bitmaps are put on disk even if they are not in the page cache). - - When explicitely requested (for example on backup or after recvoery, + - When explicitely requested (for example on backup or after recovery, to simplify things) The flow of writing a row is that: + - Mark the bitmap not flushable (_ma_bitmap_flushable(X, 1)) - Lock the bitmap - Decide which data pages we will write to - Mark them full in the bitmap page so that other threads do not try to @@ -119,6 +120,7 @@ pages (that is, we marked pages full but when we are done we realize we didn't fill them) - Unlock the bitmap. + - Mark the bitmap flushable (_ma_bitmap_flushable(X, -1)) */ #include "maria_def.h" @@ -127,6 +129,12 @@ #define FULL_HEAD_PAGE 4 #define FULL_TAIL_PAGE 7 +const char *bits_to_txt[]= +{ + "empty", "00-30% full", "30-60% full", "60-90% full", "full", + "tail 00-40 % full", "tail 40-80 % full", "tail/blob full" +}; + /*#define WRONG_BITMAP_FLUSH 1*/ /*define only for provoking bugs*/ #undef WRONG_BITMAP_FLUSH @@ -136,12 +144,15 @@ static my_bool _ma_read_bitmap_page(MARIA_HA *info, static my_bool _ma_bitmap_create_missing(MARIA_HA *info, MARIA_FILE_BITMAP *bitmap, pgcache_page_no_t page); +static void _ma_bitmap_unpin_all(MARIA_SHARE *share); + /* Write bitmap page to key cache */ static inline my_bool write_changed_bitmap(MARIA_SHARE *share, MARIA_FILE_BITMAP *bitmap) { + my_bool res; DBUG_ENTER("write_changed_bitmap"); DBUG_ASSERT(share->pagecache->block_size == bitmap->block_size); DBUG_ASSERT(bitmap->file.write_callback != 0); @@ -159,18 +170,28 @@ static inline my_bool write_changed_bitmap(MARIA_SHARE *share, #endif ) { - my_bool res= pagecache_write(share->pagecache, + res= pagecache_write(share->pagecache, &bitmap->file, bitmap->page, 0, bitmap->map, PAGECACHE_PLAIN_PAGE, PAGECACHE_LOCK_LEFT_UNLOCKED, PAGECACHE_PIN_LEFT_UNPINNED, PAGECACHE_WRITE_DELAY, 0, LSN_IMPOSSIBLE); + DBUG_ASSERT(!res); DBUG_RETURN(res); } else { + /* + bitmap->non_flushable means that someone has changed the bitmap, + but it's not yet complete so it can't yet be written to disk. + In this case we write the changed bitmap to the disk cache, + but keep it pinned until the change is completed. The page will + be unpinned later by _ma_bitmap_unpin_all() as soon as non_flushable + is set back to 0. + */ MARIA_PINNED_PAGE page_link; - int res= pagecache_write(share->pagecache, + DBUG_PRINT("info", ("Writing pinned bitmap page")); + res= pagecache_write(share->pagecache, &bitmap->file, bitmap->page, 0, bitmap->map, PAGECACHE_PLAIN_PAGE, PAGECACHE_LOCK_LEFT_UNLOCKED, PAGECACHE_PIN, @@ -178,7 +199,8 @@ static inline my_bool write_changed_bitmap(MARIA_SHARE *share, LSN_IMPOSSIBLE); page_link.unlock= PAGECACHE_LOCK_LEFT_UNLOCKED; page_link.changed= 1; - push_dynamic(&bitmap->pinned_pages, (void*) &page_link); + push_dynamic(&bitmap->pinned_pages, (const uchar*) (void*) &page_link); + DBUG_ASSERT(!res); DBUG_RETURN(res); } } @@ -189,7 +211,10 @@ static inline my_bool write_changed_bitmap(MARIA_SHARE *share, SYNOPSIS _ma_bitmap_init() share Share handler - file data file handler + file Data file handler + last_page Pointer to last page (max_file_size) that needs to be + mapped by the bitmap. This is adjusted to bitmap + alignment. NOTES This is called the first time a file is opened. @@ -199,12 +224,14 @@ static inline my_bool write_changed_bitmap(MARIA_SHARE *share, 1 error */ -my_bool _ma_bitmap_init(MARIA_SHARE *share, File file) +my_bool _ma_bitmap_init(MARIA_SHARE *share, File file, + pgcache_page_no_t *last_page) { uint aligned_bit_blocks; uint max_page_size; MARIA_FILE_BITMAP *bitmap= &share->bitmap; uint size= share->block_size; + pgcache_page_no_t first_bitmap_with_space; #ifndef DBUG_OFF /* We want to have a copy of the bitmap to be able to print differences */ size*= 2; @@ -221,13 +248,14 @@ my_bool _ma_bitmap_init(MARIA_SHARE *share, File file) /* Size needs to be aligned on 6 */ aligned_bit_blocks= (share->block_size - PAGE_SUFFIX_SIZE) / 6; - bitmap->total_size= aligned_bit_blocks * 6; + bitmap->max_total_size= bitmap->total_size= aligned_bit_blocks * 6; /* In each 6 bytes, we have 6*8/3 = 16 pages covered The +1 is to add the bitmap page, as this doesn't have to be covered */ bitmap->pages_covered= aligned_bit_blocks * 16 + 1; - bitmap->flush_all_requested= 0; + bitmap->flush_all_requested= bitmap->waiting_for_flush_all_requested= + bitmap->waiting_for_non_flushable= 0; bitmap->non_flushable= 0; /* Update size for bits */ @@ -247,13 +275,35 @@ my_bool _ma_bitmap_init(MARIA_SHARE *share, File file) mysql_cond_init(key_SHARE_BITMAP_cond, &share->bitmap.bitmap_cond, 0); + first_bitmap_with_space= share->state.first_bitmap_with_space; _ma_bitmap_reset_cache(share); - if (share->state.first_bitmap_with_space == ~(pgcache_page_no_t) 0) + /* + The bitmap used to map the file are aligned on 6 bytes. We now + calculate the max file size that can be used by the bitmap. This + is needed to get ma_info() give a true file size so that the user can + estimate if there is still space free for records in the file. + */ { - /* Start scanning for free space from start of file */ - share->state.first_bitmap_with_space = 0; + pgcache_page_no_t last_bitmap_page; + ulong blocks, bytes; + + last_bitmap_page= *last_page - *last_page % bitmap->pages_covered; + blocks= *last_page - last_bitmap_page; + bytes= (blocks * 3) / 8; /* 3 bit per page / 8 bits per byte */ + /* Size needs to be aligned on 6 */ + bytes/= 6; + bytes*= 6; + bitmap->last_bitmap_page= last_bitmap_page; + bitmap->last_total_size= bytes; + *last_page= ((last_bitmap_page + bytes*8/3)); } + + /* Restore first_bitmap_with_space if it's resonable */ + if (first_bitmap_with_space <= (share->state.state.data_file_length / + share->block_size)) + share->state.first_bitmap_with_space= first_bitmap_with_space; + return 0; } @@ -268,16 +318,63 @@ my_bool _ma_bitmap_init(MARIA_SHARE *share, File file) my_bool _ma_bitmap_end(MARIA_SHARE *share) { - my_bool res= _ma_bitmap_flush(share); + my_bool res; mysql_mutex_assert_owner(&share->close_lock); + DBUG_ASSERT(share->bitmap.non_flushable == 0); + DBUG_ASSERT(share->bitmap.flush_all_requested == 0); + DBUG_ASSERT(share->bitmap.waiting_for_non_flushable == 0 && + share->bitmap.waiting_for_flush_all_requested == 0); + DBUG_ASSERT(share->bitmap.pinned_pages.elements == 0); + + res= _ma_bitmap_flush(share); mysql_mutex_destroy(&share->bitmap.bitmap_lock); mysql_cond_destroy(&share->bitmap.bitmap_cond); delete_dynamic(&share->bitmap.pinned_pages); my_free(share->bitmap.map); share->bitmap.map= 0; + /* + This is to not get an assert in checkpoint. The bitmap will be flushed + at once by _ma_once_end_block_record() as part of the normal flush + of the kfile. + */ + share->bitmap.changed_not_flushed= 0; return res; } +/* + Ensure that we have incremented open count before we try to read/write + a page while we have the bitmap lock. + This is needed to ensure that we don't call _ma_mark_file_changed() as + part of flushing a page to disk, as this locks share->internal_lock + and then mutex lock would happen in the wrong order. +*/ + +static inline void _ma_bitmap_mark_file_changed(MARIA_SHARE *share, + my_bool flush_translog) +{ + /* + It's extremely unlikely that the following test is true as it + only happens once if the table has changed. + */ + if (unlikely(!share->global_changed && + (share->state.changed & STATE_CHANGED))) + { + /* purecov: begin inspected */ + /* unlock mutex as it can't be hold during _ma_mark_file_changed() */ + mysql_mutex_unlock(&share->bitmap.bitmap_lock); + + /* + We have to flush the translog to ensure we have registered that the + table is open. + */ + if (flush_translog && share->now_transactional) + (void) translog_flush(share->state.logrec_file_id); + + _ma_mark_file_changed(share); + mysql_mutex_lock(&share->bitmap.bitmap_lock); + /* purecov: end */ + } +} /* Send updated bitmap to the page cache @@ -314,6 +411,12 @@ my_bool _ma_bitmap_flush(MARIA_SHARE *share) mysql_mutex_lock(&share->bitmap.bitmap_lock); if (share->bitmap.changed) { + /* + We have to mark the file changed here, as otherwise the following + write to pagecache may force a page out from this file, which would + cause _ma_mark_file_changed() to be called with bitmaplock hold! + */ + _ma_bitmap_mark_file_changed(share, 1); res= write_changed_bitmap(share, &share->bitmap); share->bitmap.changed= 0; } @@ -353,12 +456,45 @@ filter_flush_bitmap_pages(enum pagecache_page_type type my_bool _ma_bitmap_flush_all(MARIA_SHARE *share) { my_bool res= 0; + uint send_signal= 0; MARIA_FILE_BITMAP *bitmap= &share->bitmap; DBUG_ENTER("_ma_bitmap_flush_all"); + +#ifdef EXTRA_DEBUG_BITMAP + { + char buff[160]; + uint len= my_sprintf(buff, + (buff, "bitmap_flush: fd: %d id: %u " + "changed: %d changed_not_flushed: %d " + "flush_all_requested: %d", + share->bitmap.file.file, + share->id, + bitmap->changed, + bitmap->changed_not_flushed, + bitmap->flush_all_requested)); + (void) translog_log_debug_info(0, LOGREC_DEBUG_INFO_QUERY, + (uchar*) buff, len); + } +#endif + mysql_mutex_lock(&bitmap->bitmap_lock); + if (!bitmap->changed && !bitmap->changed_not_flushed) + { + mysql_mutex_unlock(&bitmap->bitmap_lock); + DBUG_RETURN(0); + } + + _ma_bitmap_mark_file_changed(share, 0); + + /* + The following should be true as it was tested above. We have to test + this again as _ma_bitmap_mark_file_changed() did temporarly release + the bitmap mutex. + */ if (bitmap->changed || bitmap->changed_not_flushed) { bitmap->flush_all_requested++; + bitmap->waiting_for_non_flushable++; #ifndef WRONG_BITMAP_FLUSH while (bitmap->non_flushable > 0) { @@ -366,6 +502,16 @@ my_bool _ma_bitmap_flush_all(MARIA_SHARE *share) mysql_cond_wait(&bitmap->bitmap_cond, &bitmap->bitmap_lock); } #endif + bitmap->waiting_for_non_flushable--; +#ifdef EXTRA_DEBUG_BITMAP + { + char tmp[MAX_BITMAP_INFO_LENGTH]; + _ma_get_bitmap_description(bitmap, bitmap->map, bitmap->page, tmp); + (void) translog_log_debug_info(0, LOGREC_DEBUG_INFO_QUERY, + (uchar*) tmp, strlen(tmp)); + } +#endif + DBUG_ASSERT(bitmap->flush_all_requested == 1); /* Bitmap is in a flushable state: its contents in memory are reflected by @@ -401,9 +547,12 @@ my_bool _ma_bitmap_flush_all(MARIA_SHARE *share) become false, wake them up. */ DBUG_PRINT("info", ("bitmap flusher waking up others")); - mysql_cond_broadcast(&bitmap->bitmap_cond); + send_signal= (bitmap->waiting_for_flush_all_requested | + bitmap->waiting_for_non_flushable); } mysql_mutex_unlock(&bitmap->bitmap_lock); + if (send_signal) + mysql_cond_broadcast(&bitmap->bitmap_cond); DBUG_RETURN(res); } @@ -433,11 +582,13 @@ void _ma_bitmap_lock(MARIA_SHARE *share) mysql_mutex_lock(&bitmap->bitmap_lock); bitmap->flush_all_requested++; + bitmap->waiting_for_non_flushable++; while (bitmap->non_flushable) { DBUG_PRINT("info", ("waiting for bitmap to be flushable")); mysql_cond_wait(&bitmap->bitmap_cond, &bitmap->bitmap_lock); } + bitmap->waiting_for_non_flushable--; /* Ensure that _ma_bitmap_flush_all() and _ma_bitmap_lock() are blocked. ma_bitmap_flushable() is blocked thanks to 'flush_all_requested'. @@ -457,6 +608,7 @@ void _ma_bitmap_lock(MARIA_SHARE *share) void _ma_bitmap_unlock(MARIA_SHARE *share) { MARIA_FILE_BITMAP *bitmap= &share->bitmap; + uint send_signal; DBUG_ENTER("_ma_bitmap_unlock"); if (!share->now_transactional) @@ -464,10 +616,14 @@ void _ma_bitmap_unlock(MARIA_SHARE *share) DBUG_ASSERT(bitmap->flush_all_requested > 0 && bitmap->non_flushable == 1); mysql_mutex_lock(&bitmap->bitmap_lock); - bitmap->flush_all_requested--; bitmap->non_flushable= 0; + _ma_bitmap_unpin_all(share); + send_signal= bitmap->waiting_for_non_flushable; + if (!--bitmap->flush_all_requested) + send_signal|= bitmap->waiting_for_flush_all_requested; mysql_mutex_unlock(&bitmap->bitmap_lock); - mysql_cond_broadcast(&bitmap->bitmap_cond); + if (send_signal) + mysql_cond_broadcast(&bitmap->bitmap_cond); DBUG_VOID_RETURN; } @@ -494,7 +650,7 @@ static void _ma_bitmap_unpin_all(MARIA_SHARE *share) while (pinned_page-- != page_link) pagecache_unlock_by_link(share->pagecache, pinned_page->link, pinned_page->unlock, PAGECACHE_UNPIN, - LSN_IMPOSSIBLE, LSN_IMPOSSIBLE, TRUE, TRUE); + LSN_IMPOSSIBLE, LSN_IMPOSSIBLE, FALSE, TRUE); bitmap->pinned_pages.elements= 0; DBUG_VOID_RETURN; } @@ -520,7 +676,7 @@ void _ma_bitmap_delete_all(MARIA_SHARE *share) bzero(bitmap->map, bitmap->block_size); bitmap->changed= 1; bitmap->page= 0; - bitmap->used_size= bitmap->total_size; + bitmap->used_size= bitmap->total_size= bitmap->max_total_size; } DBUG_VOID_RETURN; } @@ -534,7 +690,8 @@ void _ma_bitmap_delete_all(MARIA_SHARE *share) @notes This is called after we have swapped file descriptors and we want - bitmap to forget all cached information + bitmap to forget all cached information. + It's also called directly after we have opened a file. */ void _ma_bitmap_reset_cache(MARIA_SHARE *share) @@ -550,13 +707,20 @@ void _ma_bitmap_reset_cache(MARIA_SHARE *share) We can't read a page yet, as in some case we don't have an active page cache yet. Pretend we have a dummy, full and not changed bitmap page in memory. + + We set bitmap->page to a value so that if we use it in + move_to_next_bitmap() it will point to page 0. + (This can only happen if writing to a bitmap page fails) */ - bitmap->page= ~(ulonglong) 0; - bitmap->used_size= bitmap->total_size; + bitmap->page= ((pgcache_page_no_t) 0) - bitmap->pages_covered; + bitmap->used_size= bitmap->total_size= bitmap->max_total_size; bfill(bitmap->map, share->block_size, 255); #ifndef DBUG_OFF memcpy(bitmap->map + bitmap->block_size, bitmap->map, bitmap->block_size); #endif + + /* Start scanning for free space from start of file */ + share->state.first_bitmap_with_space = 0; } } @@ -680,7 +844,7 @@ static inline uint pattern_to_size(MARIA_FILE_BITMAP *bitmap, uint pattern) Print bitmap for debugging SYNOPSIS - _ma_print_bitmap() + _ma_print_bitmap_changes() bitmap Bitmap to print IMPLEMENTATION @@ -691,12 +855,6 @@ static inline uint pattern_to_size(MARIA_FILE_BITMAP *bitmap, uint pattern) #ifndef DBUG_OFF -const char *bits_to_txt[]= -{ - "empty", "00-30% full", "30-60% full", "60-90% full", "full", - "tail 00-40 % full", "tail 40-80 % full", "tail/blob full" -}; - static void _ma_print_bitmap_changes(MARIA_FILE_BITMAP *bitmap) { uchar *pos, *end, *org_pos; @@ -747,12 +905,11 @@ void _ma_print_bitmap(MARIA_FILE_BITMAP *bitmap, uchar *data, uchar *pos, *end; char llbuff[22]; - end= bitmap->map + bitmap->used_size; DBUG_LOCK_FILE; fprintf(DBUG_FILE,"\nDump of bitmap page at %s\n", llstr(page, llbuff)); page++; /* Skip bitmap page */ - for (pos= data, end= pos + bitmap->total_size; + for (pos= data, end= pos + bitmap->max_total_size; pos < end ; pos+= 6) { @@ -781,6 +938,70 @@ void _ma_print_bitmap(MARIA_FILE_BITMAP *bitmap, uchar *data, #endif /* DBUG_OFF */ +/* + Return content of bitmap as a printable string +*/ + +void _ma_get_bitmap_description(MARIA_FILE_BITMAP *bitmap, + uchar *bitmap_data, + pgcache_page_no_t page, + char *out) +{ + uchar *pos, *end; + uint count=0, dot_printed= 0, len; + char buff[80], last[80]; + + page++; + last[0]=0; + for (pos= bitmap_data, end= pos+ bitmap->used_size ; pos < end ; pos+= 6) + { + ulonglong bits= uint6korr(pos); /* 6 bytes = 6*8/3= 16 patterns */ + uint i; + + for (i= 0; i < 16 ; i++, bits>>= 3) + { + if (count > 60) + { + if (memcmp(buff, last, count)) + { + memcpy(last, buff, count); + len= sprintf(out, "%8lu: ", (ulong) page - count); + memcpy(out+len, buff, count); + out+= len + count + 1; + out[-1]= '\n'; + dot_printed= 0; + } + else if (!(dot_printed++)) + { + out= strmov(out, "...\n"); + } + count= 0; + } + buff[count++]= '0' + (uint) (bits & 7); + page++; + } + } + len= sprintf(out, "%8lu: ", (ulong) page - count); + memcpy(out+len, buff, count); + out[len + count]= '\n'; + out[len + count + 1]= 0; +} + + +/* + Adjust bitmap->total_size to not go over max_data_file_size +*/ + +static void adjust_total_size(MARIA_HA *info, pgcache_page_no_t page) +{ + MARIA_FILE_BITMAP *bitmap= &info->s->bitmap; + + if (page < bitmap->last_bitmap_page) + bitmap->total_size= bitmap->max_total_size; /* Use all bits in bitmap */ + else + bitmap->total_size= bitmap->last_total_size; +} + /*************************************************************************** Reading & writing bitmap pages ***************************************************************************/ @@ -817,12 +1038,16 @@ static my_bool _ma_read_bitmap_page(MARIA_HA *info, DBUG_ASSERT(!bitmap->changed); bitmap->page= page; - if (((page + 1) * bitmap->block_size) > share->state.state.data_file_length) + if ((page + 1) * bitmap->block_size > share->state.state.data_file_length) { /* Inexistent or half-created page */ res= _ma_bitmap_create_missing(info, bitmap, page); + if (!res) + adjust_total_size(info, page); DBUG_RETURN(res); } + + adjust_total_size(info, page); bitmap->used_size= bitmap->total_size; DBUG_ASSERT(share->pagecache->block_size == bitmap->block_size); res= pagecache_read(share->pagecache, @@ -871,6 +1096,13 @@ static my_bool _ma_change_bitmap_page(MARIA_HA *info, { DBUG_ENTER("_ma_change_bitmap_page"); + /* + We have to mark the file changed here, as otherwise the following + read/write to pagecache may force a page out from this file, which would + cause _ma_mark_file_changed() to be called with bitmaplock hold! + */ + _ma_bitmap_mark_file_changed(info->s, 1); + if (bitmap->changed) { if (write_changed_bitmap(info->s, bitmap)) @@ -906,14 +1138,18 @@ static my_bool move_to_next_bitmap(MARIA_HA *info, MARIA_FILE_BITMAP *bitmap) MARIA_STATE_INFO *state= &info->s->state; DBUG_ENTER("move_to_next_bitmap"); - if (state->first_bitmap_with_space != ~(ulonglong) 0 && + if (state->first_bitmap_with_space != ~(pgcache_page_no_t) 0 && state->first_bitmap_with_space != page) { page= state->first_bitmap_with_space; - state->first_bitmap_with_space= ~(ulonglong) 0; + state->first_bitmap_with_space= ~(pgcache_page_no_t) 0; + DBUG_ASSERT(page % bitmap->pages_covered == 0); } else + { page+= bitmap->pages_covered; + DBUG_ASSERT(page % bitmap->pages_covered == 0); + } DBUG_RETURN(_ma_change_bitmap_page(info, bitmap, page)); } @@ -1308,10 +1544,7 @@ static ulong allocate_full_pages(MARIA_FILE_BITMAP *bitmap, best_prefix_bits|= tmp; int6store(best_data, best_prefix_bits); if (!(best_area_size-= best_prefix_area_size)) - { - DBUG_EXECUTE("bitmap", _ma_print_bitmap_changes(bitmap);); - DBUG_RETURN(block->page_count); - } + goto end; best_data+= 6; } best_area_size*= 3; /* Bits to set */ @@ -1329,6 +1562,7 @@ static ulong allocate_full_pages(MARIA_FILE_BITMAP *bitmap, bitmap->used_size= (uint) (best_data - bitmap->map); DBUG_ASSERT(bitmap->used_size <= bitmap->total_size); } +end: bitmap->changed= 1; DBUG_EXECUTE("bitmap", _ma_print_bitmap_changes(bitmap);); DBUG_RETURN(block->page_count); @@ -1621,7 +1855,7 @@ static void use_head(MARIA_HA *info, pgcache_page_no_t page, uint size, find_where_to_split_row() share Maria share row Information of what is in the row (from calc_record_size()) - extents_length Number of bytes needed to store all extents + extents Max number of extents we have to store in header split_size Free size on the page (The head length must be less than this) @@ -1630,7 +1864,7 @@ static void use_head(MARIA_HA *info, pgcache_page_no_t page, uint size, */ static uint find_where_to_split_row(MARIA_SHARE *share, MARIA_ROW *row, - uint extents_length, uint split_size) + uint extents, uint split_size) { uint *lengths, *lengths_end; /* @@ -1640,19 +1874,20 @@ static uint find_where_to_split_row(MARIA_SHARE *share, MARIA_ROW *row, - One extent */ uint row_length= (row->min_length + - size_to_store_key_length(extents_length) + + size_to_store_key_length(extents) + ROW_EXTENT_SIZE); - DBUG_ASSERT(row_length < split_size); + DBUG_ASSERT(row_length <= split_size); + /* Store first in all_field_lengths the different parts that are written to the row. This needs to be in same order as in ma_block_rec.c::write_block_record() */ - row->null_field_lengths[-3]= extents_length; + row->null_field_lengths[-3]= extents * ROW_EXTENT_SIZE; row->null_field_lengths[-2]= share->base.fixed_not_null_fields_length; row->null_field_lengths[-1]= row->field_lengths_length; for (lengths= row->null_field_lengths - EXTRA_LENGTH_FIELDS, - lengths_end= (lengths + share->base.pack_fields - share->base.blobs + + lengths_end= (lengths + share->base.fields - share->base.blobs + EXTRA_LENGTH_FIELDS); lengths < lengths_end; lengths++) { if (row_length + *lengths > split_size) @@ -1808,18 +2043,19 @@ my_bool _ma_bitmap_find_place(MARIA_HA *info, MARIA_ROW *row, head_length+= ELEMENTS_RESERVED_FOR_MAIN_PART * ROW_EXTENT_SIZE; /* The first segment size is stored in 'row_length' */ - row_length= find_where_to_split_row(share, row, extents_length, + row_length= find_where_to_split_row(share, row, row->extents_count + + ELEMENTS_RESERVED_FOR_MAIN_PART-1, max_page_size); full_page_size= MAX_TAIL_SIZE(share->block_size); position= 0; - if (head_length - row_length <= full_page_size) + rest_length= head_length - row_length; + if (rest_length <= full_page_size) position= ELEMENTS_RESERVED_FOR_MAIN_PART -2; /* Only head and tail */ if (find_head(info, row_length, position)) goto abort; row->space_on_head_page= row_length; - rest_length= head_length - row_length; if (write_rest_of_head(info, position, rest_length)) goto abort; @@ -1886,8 +2122,7 @@ my_bool _ma_bitmap_find_new_place(MARIA_HA *info, MARIA_ROW *row, goto abort; /* Switch bitmap to current head page */ - bitmap_page= page / share->bitmap.pages_covered; - bitmap_page*= share->bitmap.pages_covered; + bitmap_page= page - page % share->bitmap.pages_covered; if (share->bitmap.page != bitmap_page && _ma_change_bitmap_page(info, &share->bitmap, bitmap_page)) @@ -1906,16 +2141,22 @@ my_bool _ma_bitmap_find_new_place(MARIA_HA *info, MARIA_ROW *row, /* Allocate enough space */ head_length+= ELEMENTS_RESERVED_FOR_MAIN_PART * ROW_EXTENT_SIZE; - /* The first segment size is stored in 'row_length' */ - row_length= find_where_to_split_row(share, row, extents_length, free_size); + /* + The first segment size is stored in 'row_length' + We have to add ELEMENTS_RESERVED_FOR_MAIN_PART here as the extent + information may be up to this size when the header splits. + */ + row_length= find_where_to_split_row(share, row, row->extents_count + + ELEMENTS_RESERVED_FOR_MAIN_PART-1, + free_size); position= 0; - if (head_length - row_length < MAX_TAIL_SIZE(share->block_size)) + rest_length= head_length - row_length; + if (rest_length <= MAX_TAIL_SIZE(share->block_size)) position= ELEMENTS_RESERVED_FOR_MAIN_PART -2; /* Only head and tail */ use_head(info, page, row_length, position); row->space_on_head_page= row_length; - rest_length= head_length - row_length; if (write_rest_of_head(info, position, rest_length)) goto abort; @@ -2003,7 +2244,7 @@ static my_bool set_page_bits(MARIA_HA *info, MARIA_FILE_BITMAP *bitmap, Get bitmap pattern for a given page SYNOPSIS - get_page_bits() + bitmap_get_page_bits() info Maria handler bitmap Bitmap handler page Page number @@ -2013,8 +2254,8 @@ static my_bool set_page_bits(MARIA_HA *info, MARIA_FILE_BITMAP *bitmap, ~0 Error (couldn't read page) */ -uint _ma_bitmap_get_page_bits(MARIA_HA *info, MARIA_FILE_BITMAP *bitmap, - pgcache_page_no_t page) +static uint bitmap_get_page_bits(MARIA_HA *info, MARIA_FILE_BITMAP *bitmap, + pgcache_page_no_t page) { pgcache_page_no_t bitmap_page; uint offset_page, offset, tmp; @@ -2040,6 +2281,19 @@ uint _ma_bitmap_get_page_bits(MARIA_HA *info, MARIA_FILE_BITMAP *bitmap, } +/* As above, but take a lock while getting the data */ + +uint _ma_bitmap_get_page_bits(MARIA_HA *info, MARIA_FILE_BITMAP *bitmap, + pgcache_page_no_t page) +{ + uint tmp; + mysql_mutex_lock(&bitmap->bitmap_lock); + tmp= bitmap_get_page_bits(info, bitmap, page); + mysql_mutex_unlock(&bitmap->bitmap_lock); + return tmp; +} + + /* Mark all pages in a region as free @@ -2119,6 +2373,7 @@ my_bool _ma_bitmap_reset_full_page_bits(MARIA_HA *info, DBUG_RETURN(0); } + /* Set all pages in a region as used @@ -2151,7 +2406,7 @@ my_bool _ma_bitmap_set_full_page_bits(MARIA_HA *info, bitmap_page= page - page % bitmap->pages_covered; if (page == bitmap_page || - page + page_count >= bitmap_page + bitmap->pages_covered) + page + page_count > bitmap_page + bitmap->pages_covered) { DBUG_ASSERT(0); /* Wrong in data */ DBUG_RETURN(1); @@ -2250,7 +2505,7 @@ void _ma_bitmap_flushable(MARIA_HA *info, int non_flushable_inc) the bitmap's mutex. */ _ma_bitmap_unpin_all(share); - if (unlikely(bitmap->flush_all_requested)) + if (unlikely(bitmap->waiting_for_non_flushable)) { DBUG_PRINT("info", ("bitmap flushable waking up flusher")); mysql_cond_broadcast(&bitmap->bitmap_cond); @@ -2263,6 +2518,8 @@ void _ma_bitmap_flushable(MARIA_HA *info, int non_flushable_inc) } DBUG_ASSERT(non_flushable_inc == 1); DBUG_ASSERT(info->non_flushable_state == 0); + + bitmap->waiting_for_flush_all_requested++; while (unlikely(bitmap->flush_all_requested)) { /* @@ -2279,6 +2536,7 @@ void _ma_bitmap_flushable(MARIA_HA *info, int non_flushable_inc) DBUG_PRINT("info", ("waiting for bitmap flusher")); mysql_cond_wait(&bitmap->bitmap_cond, &bitmap->bitmap_lock); } + bitmap->waiting_for_flush_all_requested--; bitmap->non_flushable++; DBUG_PRINT("info", ("bitmap->non_flushable: %u", bitmap->non_flushable)); mysql_mutex_unlock(&bitmap->bitmap_lock); @@ -2352,7 +2610,7 @@ my_bool _ma_bitmap_release_unused(MARIA_HA *info, MARIA_BITMAP_BLOCKS *blocks) else { DBUG_ASSERT(current_bitmap_value == - _ma_bitmap_get_page_bits(info, bitmap, block->page)); + bitmap_get_page_bits(info, bitmap, block->page)); } /* Handle all full pages and tail pages (for head page and blob) */ @@ -2383,16 +2641,14 @@ my_bool _ma_bitmap_release_unused(MARIA_HA *info, MARIA_BITMAP_BLOCKS *blocks) The page has all bits set; The following test is an optimization to not set the bits to the same value as before. */ + DBUG_ASSERT(current_bitmap_value == + bitmap_get_page_bits(info, bitmap, block->page)); + if (bits != current_bitmap_value) { if (set_page_bits(info, bitmap, block->page, bits)) goto err; } - else - { - DBUG_ASSERT(current_bitmap_value == - _ma_bitmap_get_page_bits(info, bitmap, block->page)); - } } else if (!(block->used & BLOCKUSED_USED) && _ma_bitmap_reset_full_page_bits(info, bitmap, @@ -2408,7 +2664,7 @@ my_bool _ma_bitmap_release_unused(MARIA_HA *info, MARIA_BITMAP_BLOCKS *blocks) if (--bitmap->non_flushable == 0) { _ma_bitmap_unpin_all(info->s); - if (unlikely(bitmap->flush_all_requested)) + if (unlikely(bitmap->waiting_for_non_flushable)) { DBUG_PRINT("info", ("bitmap flushable waking up flusher")); mysql_cond_broadcast(&bitmap->bitmap_cond); @@ -2448,9 +2704,9 @@ my_bool _ma_bitmap_free_full_pages(MARIA_HA *info, const uchar *extents, uint count) { MARIA_FILE_BITMAP *bitmap= &info->s->bitmap; + my_bool res; DBUG_ENTER("_ma_bitmap_free_full_pages"); - mysql_mutex_lock(&bitmap->bitmap_lock); for (; count--; extents+= ROW_EXTENT_SIZE) { pgcache_page_no_t page= uint5korr(extents); @@ -2461,15 +2717,15 @@ my_bool _ma_bitmap_free_full_pages(MARIA_HA *info, const uchar *extents, if (page == 0 && page_count == 0) continue; /* Not used extent */ if (pagecache_delete_pages(info->s->pagecache, &info->dfile, page, - page_count, PAGECACHE_LOCK_WRITE, 1) || - _ma_bitmap_reset_full_page_bits(info, bitmap, page, page_count)) - { - mysql_mutex_unlock(&bitmap->bitmap_lock); + page_count, PAGECACHE_LOCK_WRITE, 1)) + DBUG_RETURN(1); + mysql_mutex_lock(&bitmap->bitmap_lock); + res= _ma_bitmap_reset_full_page_bits(info, bitmap, page, page_count); + mysql_mutex_unlock(&bitmap->bitmap_lock); + if (res) DBUG_RETURN(1); - } } } - mysql_mutex_unlock(&bitmap->bitmap_lock); DBUG_RETURN(0); } @@ -2521,17 +2777,15 @@ my_bool _ma_bitmap_set(MARIA_HA *info, pgcache_page_no_t page, my_bool head, page_type What kind of page this is page Adress to page empty_space Empty space on page - bitmap_pattern Store here the pattern that was in the bitmap for the - page. This is always updated. + bitmap_pattern Bitmap pattern for page (from bitmap) RETURN 0 ok 1 error */ -my_bool _ma_check_bitmap_data(MARIA_HA *info, - enum en_page_type page_type, pgcache_page_no_t page, - uint empty_space, uint *bitmap_pattern) +my_bool _ma_check_bitmap_data(MARIA_HA *info, enum en_page_type page_type, + uint empty_space, uint bitmap_pattern) { uint bits; switch (page_type) { @@ -2552,8 +2806,7 @@ my_bool _ma_check_bitmap_data(MARIA_HA *info, bits= 0; /* to satisfy compiler */ DBUG_ASSERT(0); } - return ((*bitmap_pattern= _ma_bitmap_get_page_bits(info, &info->s->bitmap, - page)) != bits); + return (bitmap_pattern != bits); } @@ -2798,6 +3051,11 @@ static my_bool _ma_bitmap_create_missing(MARIA_HA *info, /* First (in offset order) bitmap page to create */ if (data_file_length < block_size) goto err; /* corrupted, should have first bitmap page */ + if (page * block_size >= share->base.max_data_file_length) + { + my_errno= HA_ERR_RECORD_FILE_FULL; + goto err; + } from= (data_file_length / block_size - 1) / bitmap->pages_covered + 1; from*= bitmap->pages_covered; diff --git a/storage/maria/ma_blockrec.c b/storage/maria/ma_blockrec.c index 1c0e6b88d89..669dbe84fdc 100644 --- a/storage/maria/ma_blockrec.c +++ b/storage/maria/ma_blockrec.c @@ -414,14 +414,29 @@ void _ma_init_block_record_data(void) my_bool _ma_once_init_block_record(MARIA_SHARE *share, File data_file) { + my_bool res; + pgcache_page_no_t last_page; + + /* + First calculate the max file length with can have with a pointer of size + rec_reflength. - share->base.max_data_file_length= - (((ulonglong) 1 << ((share->base.rec_reflength-1)*8))-1) * - share->block_size; + The 'rec_reflength - 1' is because one byte is used for row + position withing the page. + The /2 comes from _ma_transaction_recpos_to_keypos() where we use + the lowest bit to mark if there is a transid following the rownr. + */ + last_page= ((ulonglong) 1 << ((share->base.rec_reflength-1)*8))/2; + if (!last_page) /* Overflow; set max size */ + last_page= ~(pgcache_page_no_t) 0; + + res= _ma_bitmap_init(share, data_file, &last_page); + share->base.max_data_file_length= _ma_safe_mul(last_page + 1, + share->block_size); #if SIZEOF_OFF_T == 4 - set_if_smaller(share->base.max_data_file_length, INT_MAX32); + set_if_smaller(share->base.max_data_file_length, INT_MAX32); #endif - return _ma_bitmap_init(share, data_file); + return res; } @@ -891,8 +906,7 @@ static my_bool extend_area_on_page(MARIA_HA *info, DBUG_PRINT("error", ("Not enough space: " "length: %u request_length: %u", length, request_length)); - my_errno= HA_ERR_WRONG_IN_RECORD; /* File crashed */ - DBUG_ASSERT(0); /* For debugging */ + _ma_set_fatal_error(info->s, HA_ERR_WRONG_IN_RECORD); DBUG_RETURN(1); /* Error in block */ } *empty_space= length; /* All space is here */ @@ -1020,7 +1034,7 @@ make_space_for_directory(MARIA_HA *info, UNDO of DELETE (in which case we know the row was on the page before) or if the bitmap told us there was space on page */ - DBUG_ASSERT(0); + DBUG_ASSERT(!maria_assert_if_crashed_table); return(1); } } @@ -1707,7 +1721,7 @@ struct st_row_pos_info static my_bool get_head_or_tail_page(MARIA_HA *info, - MARIA_BITMAP_BLOCK *block, + const MARIA_BITMAP_BLOCK *block, uchar *buff, uint length, uint page_type, enum pagecache_page_lock lock, struct st_row_pos_info *res) @@ -1777,7 +1791,8 @@ static my_bool get_head_or_tail_page(MARIA_HA *info, DBUG_RETURN(0); crashed: - my_errno= HA_ERR_WRONG_IN_RECORD; /* File crashed */ + DBUG_ASSERT(!maria_assert_if_crashed_table); + _ma_set_fatal_error(share, HA_ERR_WRONG_IN_RECORD); /* File crashed */ DBUG_RETURN(1); } @@ -1806,7 +1821,7 @@ crashed: */ static my_bool get_rowpos_in_head_or_tail_page(MARIA_HA *info, - MARIA_BITMAP_BLOCK *block, + const MARIA_BITMAP_BLOCK *block, uchar *buff, uint length, uint page_type, enum pagecache_page_lock lock, @@ -1870,7 +1885,8 @@ static my_bool get_rowpos_in_head_or_tail_page(MARIA_HA *info, DBUG_RETURN(0); err: - my_errno= HA_ERR_WRONG_IN_RECORD; /* File crashed */ + DBUG_ASSERT(!maria_assert_if_crashed_table); + _ma_set_fatal_error(share, HA_ERR_WRONG_IN_RECORD); /* File crashed */ DBUG_RETURN(1); } @@ -2018,6 +2034,7 @@ static my_bool write_tail(MARIA_HA *info, PAGECACHE_WRITE_DELAY, &page_link.link, LSN_IMPOSSIBLE))) { + DBUG_ASSERT(page_link.link); page_link.unlock= PAGECACHE_LOCK_READ_UNLOCK; page_link.changed= 1; push_dynamic(&info->pinned_pages, (void*) &page_link); @@ -2094,8 +2111,7 @@ static my_bool write_full_pages(MARIA_HA *info, { if (!--sub_blocks) { - DBUG_ASSERT(0); /* Wrong in bitmap or UNDO */ - my_errno= HA_ERR_WRONG_IN_RECORD; /* File crashed */ + _ma_set_fatal_error(share, HA_ERR_WRONG_IN_RECORD); DBUG_RETURN(1); } @@ -2241,7 +2257,7 @@ static void store_extent_info(uchar *to, for (block= first_block, end_block= first_block+count ; block < end_block; block++) { - /* The following is only false for marker blocks */ + /* The following is only false for marker (unused) blocks */ if (likely(block->used & BLOCKUSED_USED)) { uint page_count= block->page_count; @@ -2506,7 +2522,7 @@ static my_bool free_full_page_range(MARIA_HA *info, pgcache_page_no_t page, } if (delete_count && pagecache_delete_pages(share->pagecache, &info->dfile, - page, delete_count, PAGECACHE_LOCK_WRITE, 0)) + page, delete_count, PAGECACHE_LOCK_WRITE, 1)) res= 1; if (share->now_transactional) @@ -2756,7 +2772,7 @@ static my_bool write_block_record(MARIA_HA *info, DBUG_ASSERT(length <= column->length); break; default: /* Wrong data */ - DBUG_ASSERT(0); + DBUG_ASSERT(!maria_assert_if_crashed_table); length=0; break; } @@ -2815,7 +2831,6 @@ static my_bool write_block_record(MARIA_HA *info, DBUG_PRINT("info", ("Used head length on page: %u header_length: %u", head_length, (uint) (flag & ROW_FLAG_TRANSID ? TRANSID_SIZE : 0))); - DBUG_ASSERT(data <= end_of_data); if (head_length < share->base.min_block_length) { /* Extend row to be of size min_block_length */ @@ -2824,6 +2839,7 @@ static my_bool write_block_record(MARIA_HA *info, data+= diff_length; head_length= share->base.min_block_length; } + DBUG_ASSERT(data <= end_of_data); /* If this is a redo entry (ie, undo_lsn != LSN_ERROR) then we should have written exactly head_length bytes (same as original record). @@ -3070,9 +3086,10 @@ static my_bool write_block_record(MARIA_HA *info, extent_data= row_extents_second_part + ((last_head_block - head_block) - 2) * ROW_EXTENT_SIZE; } - DBUG_ASSERT(uint2korr(extent_data+5) & TAIL_BIT); + /* Write information for tail block in the reserved space */ page_store(extent_data, head_tail_block->page); - int2store(extent_data + PAGE_STORE_SIZE, head_tail_block->page_count); + pagerange_store(extent_data + PAGE_STORE_SIZE, + head_tail_block->page_count); } } else @@ -3146,6 +3163,7 @@ static my_bool write_block_record(MARIA_HA *info, PAGECACHE_WRITE_DELAY, &page_link.link, LSN_IMPOSSIBLE)) goto disk_err; + DBUG_ASSERT(page_link.link); page_link.unlock= PAGECACHE_LOCK_READ_UNLOCK; page_link.changed= 1; push_dynamic(&info->pinned_pages, (void*) &page_link); @@ -3414,8 +3432,9 @@ static my_bool write_block_record(MARIA_HA *info, DBUG_RETURN(0); crashed: + DBUG_ASSERT(!maria_assert_if_crashed_table); /* Something was wrong with data on page */ - my_errno= HA_ERR_WRONG_IN_RECORD; + _ma_set_fatal_error(share, HA_ERR_WRONG_IN_RECORD); disk_err: /** @@ -3488,7 +3507,9 @@ static my_bool allocate_and_write_block_record(MARIA_HA *info, /* page will be pinned & locked by get_head_or_tail_page */ if (get_head_or_tail_page(info, blocks->block, info->buff, - row->space_on_head_page, HEAD_PAGE, + max(row->space_on_head_page, + info->s->base.min_block_length), + HEAD_PAGE, PAGECACHE_LOCK_WRITE, &row_pos)) goto err; row->lastpos= ma_recordpos(blocks->block->page, row_pos.rownr); @@ -3619,6 +3640,7 @@ my_bool _ma_write_abort_block_record(MARIA_HA *info) } } } + _ma_bitmap_unlock(share); if (share->now_transactional) { if (_ma_write_clr(info, info->cur_row.orig_undo_lsn, @@ -3628,7 +3650,6 @@ my_bool _ma_write_abort_block_record(MARIA_HA *info) &lsn, (void*) 0)) res= 1; } - _ma_bitmap_unlock(share); _ma_unpin_all_pages_and_finalize_row(info, lsn); DBUG_RETURN(res); } @@ -3806,6 +3827,7 @@ static my_bool _ma_update_block_record2(MARIA_HA *info, DBUG_RETURN(0); err: + DBUG_ASSERT(!maria_assert_if_crashed_table); DBUG_PRINT("error", ("errpos: %d", errpos)); if (info->non_flushable_state) _ma_bitmap_flushable(info, -1); @@ -3885,7 +3907,7 @@ static my_bool _ma_update_at_original_place(MARIA_HA *info, ("org_empty_size: %u head_length: %u length_on_page: %u", org_empty_size, (uint) cur_row->head_length, length_on_head_page)); - my_errno= HA_ERR_WRONG_IN_RECORD; + _ma_set_fatal_error(share, HA_ERR_WRONG_IN_RECORD); goto err; } @@ -3918,8 +3940,11 @@ static my_bool _ma_update_at_original_place(MARIA_HA *info, goto err; block= blocks->block; block->empty_space= row_pos.empty_space; - block->org_bitmap_value= _ma_free_size_to_head_pattern(&share->bitmap, - org_empty_size); + block->org_bitmap_value= + _ma_free_size_to_head_pattern(&share->bitmap, + (enough_free_entries_on_page(share, buff) ? + org_empty_size : 0)); + DBUG_ASSERT(block->org_bitmap_value == _ma_bitmap_get_page_bits(info, &info->s->bitmap, page)); block->used|= BLOCKUSED_USE_ORG_BITMAP; @@ -3943,6 +3968,7 @@ static my_bool _ma_update_at_original_place(MARIA_HA *info, DBUG_RETURN(0); err: + DBUG_ASSERT(!maria_assert_if_crashed_table); _ma_mark_file_crashed(share); if (info->non_flushable_state) _ma_bitmap_flushable(info, -1); @@ -4100,11 +4126,11 @@ static my_bool delete_head_or_tail(MARIA_HA *info, { MARIA_SHARE *share= info->s; uint empty_space; - uint block_size= share->block_size; + int res; + my_bool page_is_empty; uchar *buff; LSN lsn; MARIA_PINNED_PAGE page_link; - int res; enum pagecache_page_lock lock_at_write, lock_at_unpin; DBUG_ENTER("delete_head_or_tail"); DBUG_PRINT("enter", ("id: %lu (%lu:%u)", @@ -4134,13 +4160,14 @@ static my_bool delete_head_or_tail(MARIA_HA *info, lock_at_unpin= PAGECACHE_LOCK_READ_UNLOCK; } - res= delete_dir_entry(buff, block_size, record_number, &empty_space); + res= delete_dir_entry(buff, share->block_size, record_number, &empty_space); if (res < 0) DBUG_RETURN(1); if (res == 0) /* after our deletion, page is still not empty */ { uchar log_data[FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE]; LEX_CUSTRING log_array[TRANSLOG_INTERNAL_PARTS + 1]; + page_is_empty= 0; if (share->now_transactional) { /* Log REDO data */ @@ -4161,6 +4188,7 @@ static my_bool delete_head_or_tail(MARIA_HA *info, } else /* page is now empty */ { + page_is_empty= 1; if (share->now_transactional) { uchar log_data[FILEID_STORE_SIZE + PAGE_STORE_SIZE]; @@ -4175,6 +4203,13 @@ static my_bool delete_head_or_tail(MARIA_HA *info, log_data, NULL)) DBUG_RETURN(1); } + /* + Mark that this page must be written to disk by page cache, even + if we could call pagecache_delete() on it. + This is needed to ensure that repair finds the empty page on disk + and not old data. + */ + pagecache_set_write_on_delete_by_link(page_link.link); DBUG_ASSERT(empty_space >= share->bitmap.sizes[0]); } @@ -4192,8 +4227,8 @@ static my_bool delete_head_or_tail(MARIA_HA *info, If there is not enough space for all possible tails, mark the page full */ - if (!head && !enough_free_entries(buff, share->block_size, - 1 + share->base.blobs)) + if (!head && !page_is_empty && !enough_free_entries(buff, share->block_size, + 1 + share->base.blobs)) empty_space= 0; DBUG_RETURN(_ma_bitmap_set(info, page, head, empty_space)); @@ -4315,6 +4350,7 @@ my_bool _ma_delete_block_record(MARIA_HA *info, const uchar *record) DBUG_RETURN(0); err: + DBUG_ASSERT(!maria_assert_if_crashed_table); _ma_bitmap_flushable(info, -1); _ma_unpin_all_pages_and_finalize_row(info, LSN_IMPOSSIBLE); DBUG_RETURN(1); @@ -4515,7 +4551,8 @@ static uchar *read_next_extent(MARIA_HA *info, MARIA_EXTENT_CURSOR *extent, crashed: - my_errno= HA_ERR_WRONG_IN_RECORD; /* File crashed */ + DBUG_ASSERT(!maria_assert_if_crashed_table); + _ma_set_fatal_error(share, HA_ERR_WRONG_IN_RECORD); DBUG_PRINT("error", ("wrong extent information")); DBUG_RETURN(0); } @@ -4660,7 +4697,12 @@ int _ma_read_block_record2(MARIA_HA *info, uchar *record, { cur_row->trid= transid_korr(data+1); if (!info->trn) - DBUG_RETURN(my_errno= HA_ERR_WRONG_IN_RECORD); /* File crashed */ + { + /* File crashed */ + DBUG_ASSERT(!maria_assert_if_crashed_table); + _ma_set_fatal_error(share, HA_ERR_WRONG_IN_RECORD); + DBUG_RETURN(HA_ERR_WRONG_IN_RECORD); + } if (!trnman_can_read_from(info->trn, cur_row->trid)) DBUG_RETURN(my_errno= HA_ERR_ROW_NOT_VISIBLE); } @@ -4928,7 +4970,7 @@ int _ma_read_block_record2(MARIA_HA *info, uchar *record, goto err; } #ifdef EXTRA_DEBUG - if (share->calc_checksum) + if (share->calc_checksum && !info->in_check_table) { /* Esnure that row checksum is correct */ DBUG_ASSERT(((share->calc_checksum)(info, record) & 255) == @@ -4939,9 +4981,11 @@ int _ma_read_block_record2(MARIA_HA *info, uchar *record, DBUG_RETURN(0); err: + DBUG_ASSERT(!maria_assert_if_crashed_table); /* Something was wrong with data on record */ DBUG_PRINT("error", ("Found record with wrong data")); - DBUG_RETURN((my_errno= HA_ERR_WRONG_IN_RECORD)); + _ma_set_fatal_error(share, HA_ERR_WRONG_IN_RECORD); + DBUG_RETURN(HA_ERR_WRONG_IN_RECORD); } @@ -5077,6 +5121,7 @@ int _ma_read_block_record(MARIA_HA *info, uchar *record, DBUG_ASSERT((buff[PAGE_TYPE_OFFSET] & PAGE_TYPE_MASK) == HEAD_PAGE); if (!(data= get_record_position(buff, block_size, offset, &end_of_data))) { + DBUG_ASSERT(!maria_assert_if_crashed_table); DBUG_PRINT("error", ("Wrong directory entry in data block")); my_errno= HA_ERR_RECORD_DELETED; /* File crashed */ DBUG_RETURN(HA_ERR_RECORD_DELETED); @@ -5154,7 +5199,7 @@ my_bool _ma_scan_init_block_record(MARIA_HA *info) (uchar *) my_malloc(share->block_size * 2, MYF(MY_WME)))))) DBUG_RETURN(1); info->scan.page_buff= info->scan.bitmap_buff + share->block_size; - info->scan.bitmap_end= info->scan.bitmap_buff + share->bitmap.total_size; + info->scan.bitmap_end= info->scan.bitmap_buff + share->bitmap.max_total_size; /* Set scan variables to get _ma_scan_block() to start with reading bitmap */ info->scan.number_of_rows= 0; @@ -5307,7 +5352,7 @@ restart_record_read: #ifdef SANITY_CHECKS if (info->scan.dir < info->scan.dir_end) { - DBUG_ASSERT(0); + DBUG_ASSERT(!maria_assert_if_crashed_table); goto err; } #endif @@ -5391,7 +5436,8 @@ restart_bitmap_scan: (uint) (uchar) info->scan.page_buff[DIR_COUNT_OFFSET]) == 0) { DBUG_PRINT("error", ("Wrong page header")); - DBUG_RETURN((my_errno= HA_ERR_WRONG_IN_RECORD)); + _ma_set_fatal_error(share, HA_ERR_WRONG_IN_RECORD); + DBUG_RETURN(HA_ERR_WRONG_IN_RECORD); } DBUG_PRINT("info", ("Page %lu has %u rows", (ulong) page, info->scan.number_of_rows)); @@ -5418,7 +5464,7 @@ restart_bitmap_scan: /* Read next bitmap */ info->scan.bitmap_page+= share->bitmap.pages_covered; filepos= (my_off_t) info->scan.bitmap_page * block_size; - if (unlikely(filepos >= share->state.state.data_file_length)) + if (unlikely(info->scan.bitmap_page >= info->scan.max_page)) { DBUG_PRINT("info", ("Found end of file")); DBUG_RETURN((my_errno= HA_ERR_END_OF_FILE)); @@ -5436,8 +5482,10 @@ restart_bitmap_scan: goto restart_bitmap_scan; err: + DBUG_ASSERT(!maria_assert_if_crashed_table); DBUG_PRINT("error", ("Wrong data on page")); - DBUG_RETURN((my_errno= HA_ERR_WRONG_IN_RECORD)); + _ma_set_fatal_error(share, HA_ERR_WRONG_IN_RECORD); + DBUG_RETURN(HA_ERR_WRONG_IN_RECORD); } @@ -6319,6 +6367,12 @@ uint _ma_apply_redo_insert_row_head_or_tail(MARIA_HA *info, LSN lsn, empty_space-= (uint) data_length; int2store(buff + EMPTY_SPACE_OFFSET, empty_space); + /* Fix bitmap */ + if (!enough_free_entries_on_page(share, buff)) + empty_space= 0; /* Page is full */ + if (_ma_bitmap_set(info, page, page_type == HEAD_PAGE, empty_space)) + goto err; + /* If page was not read before, write it but keep it pinned. We don't update its LSN When we have processed all REDOs for this page @@ -6336,12 +6390,6 @@ uint _ma_apply_redo_insert_row_head_or_tail(MARIA_HA *info, LSN lsn, LSN_IMPOSSIBLE)) result= my_errno; - /* Fix bitmap */ - if (!enough_free_entries_on_page(share, buff)) - empty_space= 0; /* Page is full */ - if (_ma_bitmap_set(info, page, page_type == HEAD_PAGE, empty_space)) - goto err; - page_link.unlock= PAGECACHE_LOCK_WRITE_UNLOCK; page_link.changed= 1; push_dynamic(&info->pinned_pages, (void*) &page_link); @@ -6355,7 +6403,7 @@ uint _ma_apply_redo_insert_row_head_or_tail(MARIA_HA *info, LSN lsn, DBUG_RETURN(result); crashed_file: - my_errno= HA_ERR_WRONG_IN_RECORD; + _ma_set_fatal_error(share, HA_ERR_WRONG_IN_RECORD); err: error= my_errno; if (unlock_method == PAGECACHE_LOCK_LEFT_WRITELOCKED) @@ -6364,7 +6412,7 @@ err: PAGECACHE_UNPIN, LSN_IMPOSSIBLE, LSN_IMPOSSIBLE, 0, FALSE); _ma_mark_file_crashed(share); - DBUG_ASSERT(0); /* catch recovery errors early */ + DBUG_ASSERT(!maria_assert_if_crashed_table); /* catch recovery error early */ DBUG_RETURN((my_errno= error)); } @@ -6443,7 +6491,7 @@ uint _ma_apply_redo_purge_row_head_or_tail(MARIA_HA *info, LSN lsn, if (delete_dir_entry(buff, block_size, rownr, &empty_space) < 0) { - my_errno= HA_ERR_WRONG_IN_RECORD; + _ma_set_fatal_error(share, HA_ERR_WRONG_IN_RECORD); goto err; } @@ -6467,7 +6515,7 @@ err: PAGECACHE_UNPIN, LSN_IMPOSSIBLE, LSN_IMPOSSIBLE, 0, FALSE); _ma_mark_file_crashed(share); - DBUG_ASSERT(0); + DBUG_ASSERT(!maria_assert_if_crashed_table); DBUG_RETURN((my_errno= error)); } @@ -6479,7 +6527,13 @@ err: @param info Maria handler @param header Header (without FILEID) - @note It marks the pages free in the bitmap + Mark the pages free in the bitmap. + + We have to check against _ma_redo_not_needed_for_page() + to guard against the case where we first clear a block and after + that insert new data into the blocks. If we would unconditionally + clear the bitmap here, future changes would be ignored for the page + if it's not in the dirty list (ie, it would be flushed). @return Operation status @retval 0 OK @@ -6488,19 +6542,25 @@ err: uint _ma_apply_redo_free_blocks(MARIA_HA *info, LSN lsn __attribute__((unused)), + LSN redo_lsn, const uchar *header) { MARIA_SHARE *share= info->s; uint ranges; + uint16 sid; DBUG_ENTER("_ma_apply_redo_free_blocks"); share->state.changed|= (STATE_CHANGED | STATE_NOT_ZEROFILLED | STATE_NOT_MOVABLE); + sid= fileid_korr(header); + header+= FILEID_STORE_SIZE; ranges= pagerange_korr(header); header+= PAGERANGE_STORE_SIZE; DBUG_ASSERT(ranges > 0); + /** @todo leave bitmap lock to the bitmap code... */ + mysql_mutex_lock(&share->bitmap.bitmap_lock); while (ranges--) { my_bool res; @@ -6517,18 +6577,22 @@ uint _ma_apply_redo_free_blocks(MARIA_HA *info, DBUG_PRINT("info", ("page: %lu pages: %u", (long) page, page_range)); - /** @todo leave bitmap lock to the bitmap code... */ - mysql_mutex_lock(&share->bitmap.bitmap_lock); - res= _ma_bitmap_reset_full_page_bits(info, &share->bitmap, start_page, - page_range); - mysql_mutex_unlock(&share->bitmap.bitmap_lock); - if (res) + for ( ; page_range-- ; start_page++) { - _ma_mark_file_crashed(share); - DBUG_ASSERT(0); - DBUG_RETURN(res); + if (_ma_redo_not_needed_for_page(sid, redo_lsn, start_page, FALSE)) + continue; + res= _ma_bitmap_reset_full_page_bits(info, &share->bitmap, start_page, + 1); + if (res) + { + mysql_mutex_unlock(&share->bitmap.bitmap_lock); + _ma_mark_file_crashed(share); + DBUG_ASSERT(!maria_assert_if_crashed_table); + DBUG_RETURN(res); + } } } + mysql_mutex_unlock(&share->bitmap.bitmap_lock); DBUG_RETURN(0); } @@ -6609,7 +6673,7 @@ uint _ma_apply_redo_free_head_or_tail(MARIA_HA *info, LSN lsn, err: _ma_mark_file_crashed(share); - DBUG_ASSERT(0); + DBUG_ASSERT(!maria_assert_if_crashed_table); DBUG_RETURN(1); } @@ -6681,21 +6745,23 @@ uint _ma_apply_redo_insert_row_blobs(MARIA_HA *info, uint page_range; pgcache_page_no_t page, start_page; uchar *buff; + uint data_on_page= data_size; start_page= page= page_korr(header); header+= PAGE_STORE_SIZE; page_range= pagerange_korr(header); header+= PAGERANGE_STORE_SIZE; - for (i= page_range; i-- > 0 ; page++) + for (i= page_range; i-- > 0 ; page++, data+= data_on_page) { MARIA_PINNED_PAGE page_link; enum pagecache_page_lock unlock_method; enum pagecache_page_pin unpin_method; - uint length; set_if_smaller(first_page2, page); set_if_bigger(last_page2, page); + if (i == 0 && sub_ranges == 0) + data_on_page= data_size - empty_space; /* data on last page */ if (_ma_redo_not_needed_for_page(sid, redo_lsn, page, FALSE)) continue; @@ -6758,7 +6824,7 @@ uint _ma_apply_redo_insert_row_blobs(MARIA_HA *info, PAGECACHE_LOCK_WRITE_UNLOCK, PAGECACHE_UNPIN, LSN_IMPOSSIBLE, LSN_IMPOSSIBLE, 0, FALSE); - continue; + goto fix_bitmap; } DBUG_ASSERT((found_page_type == (uchar) BLOB_PAGE) || (found_page_type == (uchar) UNALLOCATED_PAGE)); @@ -6774,33 +6840,32 @@ uint _ma_apply_redo_insert_row_blobs(MARIA_HA *info, lsn_store(buff, lsn); buff[PAGE_TYPE_OFFSET]= BLOB_PAGE; - length= data_size; - if (i == 0 && sub_ranges == 0) + if (data_on_page != data_size) { /* Last page may be only partly filled. We zero the rest, like write_full_pages() does. */ - length-= empty_space; bzero(buff + share->block_size - PAGE_SUFFIX_SIZE - empty_space, empty_space); } - memcpy(buff+ PAGE_TYPE_OFFSET + 1, data, length); - data+= length; + memcpy(buff+ PAGE_TYPE_OFFSET + 1, data, data_on_page); if (pagecache_write(share->pagecache, &info->dfile, page, 0, buff, PAGECACHE_PLAIN_PAGE, unlock_method, unpin_method, PAGECACHE_WRITE_DELAY, 0, LSN_IMPOSSIBLE)) goto err; - } + + fix_bitmap: /** @todo leave bitmap lock to the bitmap code... */ - mysql_mutex_lock(&share->bitmap.bitmap_lock); - res= _ma_bitmap_set_full_page_bits(info, &share->bitmap, start_page, - page_range); - mysql_mutex_unlock(&share->bitmap.bitmap_lock); - if (res) - goto err; + mysql_mutex_lock(&share->bitmap.bitmap_lock); + res= _ma_bitmap_set_full_page_bits(info, &share->bitmap, page, + 1); + mysql_mutex_unlock(&share->bitmap.bitmap_lock); + if (res) + goto err; + } } } *first_page= first_page2; @@ -6809,7 +6874,7 @@ uint _ma_apply_redo_insert_row_blobs(MARIA_HA *info, err: _ma_mark_file_crashed(share); - DBUG_ASSERT(0); + DBUG_ASSERT(!maria_assert_if_crashed_table); DBUG_RETURN(1); } @@ -6879,6 +6944,7 @@ end: DBUG_RETURN(res); err: + DBUG_ASSERT(!maria_assert_if_crashed_table); res= 1; _ma_mark_file_crashed(share); goto end; @@ -7117,6 +7183,7 @@ my_bool _ma_apply_undo_row_delete(MARIA_HA *info, LSN undo_lsn, DBUG_RETURN(0); err: + DBUG_ASSERT(!maria_assert_if_crashed_table); _ma_mark_file_crashed(share); if (info->non_flushable_state) _ma_bitmap_flushable(info, -1); @@ -7292,6 +7359,7 @@ end: DBUG_RETURN(error); err: + DBUG_ASSERT(!maria_assert_if_crashed_table); error= 1; _ma_mark_file_crashed(share); goto end; diff --git a/storage/maria/ma_blockrec.h b/storage/maria/ma_blockrec.h index a5858880dd0..45f5613bb60 100644 --- a/storage/maria/ma_blockrec.h +++ b/storage/maria/ma_blockrec.h @@ -59,7 +59,6 @@ /* Minimum header size needed for a new row */ #define BASE_ROW_HEADER_SIZE FLAG_SIZE -#define TRANS_ROW_EXTRA_HEADER_SIZE TRANSID_SIZE #define PAGE_TYPE_MASK 7 enum en_page_type { UNALLOCATED_PAGE, HEAD_PAGE, TAIL_PAGE, BLOB_PAGE, MAX_PAGE_TYPE }; @@ -78,6 +77,10 @@ enum en_page_type { UNALLOCATED_PAGE, HEAD_PAGE, TAIL_PAGE, BLOB_PAGE, MAX_PAGE_ #define ROW_FLAG_EXTENTS 128 #define ROW_FLAG_ALL (1+2+4+8+128) +/* Size for buffer to hold information about bitmap */ +#define MAX_BITMAP_INFO_LENGTH ((MARIA_MAX_KEY_BLOCK_LENGTH*8/3)*(61*11/60)+10) + + /******** Variables that affects how data pages are utilized ********/ /* Minium size of tail segment */ @@ -181,7 +184,10 @@ TRANSLOG_ADDRESS maria_page_get_lsn(uchar *page, pgcache_page_no_t page_no, uchar* data_ptr); /* ma_bitmap.c */ -my_bool _ma_bitmap_init(MARIA_SHARE *share, File file); +extern const char *bits_to_txt[]; + +my_bool _ma_bitmap_init(MARIA_SHARE *share, File file, + pgcache_page_no_t *last_page); my_bool _ma_bitmap_end(MARIA_SHARE *share); my_bool _ma_bitmap_flush(MARIA_SHARE *share); my_bool _ma_bitmap_flush_all(MARIA_SHARE *share); @@ -206,8 +212,7 @@ my_bool _ma_bitmap_find_new_place(MARIA_HA *info, MARIA_ROW *new_row, MARIA_BITMAP_BLOCKS *result_blocks); my_bool _ma_check_bitmap_data(MARIA_HA *info, enum en_page_type page_type, - pgcache_page_no_t page, - uint empty_space, uint *bitmap_pattern); + uint empty_space, uint bitmap_pattern); my_bool _ma_check_if_right_bitmap_type(MARIA_HA *info, enum en_page_type page_type, pgcache_page_no_t page, @@ -225,6 +230,10 @@ void _ma_bitmap_set_pagecache_callbacks(PAGECACHE_FILE *file, void _ma_print_bitmap(MARIA_FILE_BITMAP *bitmap, uchar *data, pgcache_page_no_t page); #endif +void _ma_get_bitmap_description(MARIA_FILE_BITMAP *bitmap, + uchar *bitmap_data, + pgcache_page_no_t page, + char *out); uint _ma_apply_redo_insert_row_head_or_tail(MARIA_HA *info, LSN lsn, uint page_type, @@ -235,7 +244,7 @@ uint _ma_apply_redo_insert_row_head_or_tail(MARIA_HA *info, LSN lsn, uint _ma_apply_redo_purge_row_head_or_tail(MARIA_HA *info, LSN lsn, uint page_type, const uchar *header); -uint _ma_apply_redo_free_blocks(MARIA_HA *info, LSN lsn, +uint _ma_apply_redo_free_blocks(MARIA_HA *info, LSN lsn, LSN rec_lsn, const uchar *header); uint _ma_apply_redo_free_head_or_tail(MARIA_HA *info, LSN lsn, const uchar *header); diff --git a/storage/maria/ma_cache.c b/storage/maria/ma_cache.c index 36dfe7cbd54..829189baeed 100644 --- a/storage/maria/ma_cache.c +++ b/storage/maria/ma_cache.c @@ -35,8 +35,8 @@ #include "maria_def.h" -my_bool _ma_read_cache(IO_CACHE *info, uchar *buff, my_off_t pos, - size_t length, uint flag) +my_bool _ma_read_cache(MARIA_HA *handler, IO_CACHE *info, uchar *buff, + my_off_t pos, size_t length, uint flag) { size_t read_length,in_buff_length; my_off_t offset; @@ -98,7 +98,12 @@ my_bool _ma_read_cache(IO_CACHE *info, uchar *buff, my_off_t pos, ("Error %d reading next-multi-part block (Got %d bytes)", my_errno, (int) read_length)); if (!my_errno || my_errno == HA_ERR_FILE_TOO_SHORT) - my_errno= HA_ERR_WRONG_IN_RECORD; + { + if (!handler->in_check_table) + _ma_set_fatal_error(handler->s, HA_ERR_WRONG_IN_RECORD); + else + my_errno= HA_ERR_WRONG_IN_RECORD; + } DBUG_RETURN(1); } bzero(buff+read_length,MARIA_BLOCK_INFO_HEADER_LENGTH - in_buff_length - diff --git a/storage/maria/ma_check.c b/storage/maria/ma_check.c index c6cff8ecd68..e183e715a6e 100644 --- a/storage/maria/ma_check.c +++ b/storage/maria/ma_check.c @@ -100,6 +100,9 @@ static my_bool _ma_flush_table_files_before_swap(HA_CHECK *param, static TrID max_trid_in_system(void); static void _ma_check_print_not_visible_error(HA_CHECK *param, TrID used_trid); void retry_if_quick(MARIA_SORT_PARAM *param, int error); +static void print_bitmap_description(MARIA_SHARE *share, + pgcache_page_no_t page, + uchar *buff); /* Initialize check param with default values */ @@ -122,6 +125,7 @@ void maria_chk_init(HA_CHECK *param) param->max_record_length= LONGLONG_MAX; param->pagecache_block_size= KEY_CACHE_BLOCK_SIZE; param->stats_method= MI_STATS_METHOD_NULLS_NOT_EQUAL; + param->max_stage= 1; } @@ -231,14 +235,14 @@ int maria_chk_del(HA_CHECK *param, register MARIA_HA *info, { if (test_flag & T_VERBOSE) puts(""); _ma_check_print_error(param,"Can't read delete-link at filepos: %s", - llstr(next_link,buff)); + llstr(next_link,buff)); DBUG_RETURN(1); } if (*buff != '\0') { if (test_flag & T_VERBOSE) puts(""); _ma_check_print_error(param,"Record at pos: %s is not remove-marked", - llstr(next_link,buff)); + llstr(next_link,buff)); goto wrong; } if (share->options & HA_OPTION_PACK_RECORD) @@ -247,7 +251,9 @@ int maria_chk_del(HA_CHECK *param, register MARIA_HA *info, if (empty && prev_link != old_link) { if (test_flag & T_VERBOSE) puts(""); - _ma_check_print_error(param,"Deleted block at %s doesn't point back at previous delete link",llstr(next_link,buff2)); + _ma_check_print_error(param, + "Deleted block at %s doesn't point back at previous delete link", + llstr(next_link,buff2)); goto wrong; } old_link=next_link; @@ -266,23 +272,23 @@ int maria_chk_del(HA_CHECK *param, register MARIA_HA *info, if (empty != share->state.state.empty) { _ma_check_print_warning(param, - "Found %s deleted space in delete link chain. Should be %s", - llstr(empty,buff2), - llstr(share->state.state.empty,buff)); + "Found %s deleted space in delete link chain. Should be %s", + llstr(empty,buff2), + llstr(share->state.state.empty,buff)); } if (next_link != HA_OFFSET_ERROR) { _ma_check_print_error(param, - "Found more than the expected %s deleted rows in delete link chain", - llstr(share->state.state.del, buff)); + "Found more than the expected %s deleted rows in delete link chain", + llstr(share->state.state.del, buff)); goto wrong; } if (i != 0) { _ma_check_print_error(param, - "Found %s deleted rows in delete link chain. Should be %s", - llstr(share->state.state.del - i, buff2), - llstr(share->state.state.del, buff)); + "Found %s deleted rows in delete link chain. Should be %s", + llstr(share->state.state.del - i, buff2), + llstr(share->state.state.del, buff)); goto wrong; } } @@ -402,26 +408,34 @@ int maria_chk_size(HA_CHECK *param, register MARIA_HA *info) size= mysql_file_seek(share->kfile.file, 0L, MY_SEEK_END, MYF(MY_THREADSAFE)); if ((skr=(my_off_t) share->state.state.key_file_length) != size) { - /* Don't give error if file generated by mariapack */ + /* Don't give error if file generated by maria_pack */ if (skr > size && maria_is_any_key_active(share->state.key_map)) { error=1; _ma_check_print_error(param, - "Size of indexfile is: %-8s Should be: %s", + "Size of indexfile is: %-8s Expected: %s", llstr(size,buff), llstr(skr,buff2)); + share->state.state.key_file_length= size; } else if (!(param->testflag & T_VERY_SILENT)) _ma_check_print_warning(param, - "Size of indexfile is: %-8s Should be: %s", + "Size of indexfile is: %-8s Expected: %s", llstr(size,buff), llstr(skr,buff2)); } - if (!(param->testflag & T_VERY_SILENT) && - ! (share->options & HA_OPTION_COMPRESS_RECORD) && - ulonglong2double(share->state.state.key_file_length) > - ulonglong2double(share->base.margin_key_file_length)*0.9) + if (size > share->base.max_key_file_length) + { + _ma_check_print_warning(param, + "Size of indexfile is: %-8s which is bigger than max indexfile size: %s", + ullstr(size,buff), + ullstr(share->base.max_key_file_length, buff2)); + } + else if (!(param->testflag & T_VERY_SILENT) && + ! (share->options & HA_OPTION_COMPRESS_RECORD) && + ulonglong2double(share->state.state.key_file_length) > + ulonglong2double(share->base.margin_key_file_length)*0.9) _ma_check_print_warning(param,"Keyfile is almost full, %10s of %10s used", - llstr(share->state.state.key_file_length,buff), - llstr(share->base.max_key_file_length-1,buff)); + llstr(share->state.state.key_file_length,buff), + llstr(share->base.max_key_file_length,buff)); size= mysql_file_seek(info->dfile.file, 0L, MY_SEEK_END, MYF(0)); skr=(my_off_t) share->state.state.data_file_length; @@ -434,28 +448,34 @@ int maria_chk_size(HA_CHECK *param, register MARIA_HA *info) #endif if (skr != size) { + share->state.state.data_file_length=size; /* Skip other errors */ if (skr > size && skr != size + MEMMAP_EXTRA_MARGIN) { - share->state.state.data_file_length=size; /* Skip other errors */ error=1; - _ma_check_print_error(param,"Size of datafile is: %-9s Should be: %s", + _ma_check_print_error(param,"Size of datafile is: %-9s Expected: %s", llstr(size,buff), llstr(skr,buff2)); param->testflag|=T_RETRY_WITHOUT_QUICK; } else { _ma_check_print_warning(param, - "Size of datafile is: %-9s Should be: %s", - llstr(size,buff), llstr(skr,buff2)); + "Size of datafile is: %-9s Expected: %s", + llstr(size,buff), llstr(skr,buff2)); } } - if (!(param->testflag & T_VERY_SILENT) && - !(share->options & HA_OPTION_COMPRESS_RECORD) && - ulonglong2double(share->state.state.data_file_length) > - (ulonglong2double(share->base.max_data_file_length)*0.9)) + if (size > share->base.max_data_file_length) + { + _ma_check_print_warning(param, + "Size of datafile is: %-8s which is bigger than max datafile size: %s", + ullstr(size,buff), + ullstr(share->base.max_data_file_length, buff2)); + } else if (!(param->testflag & T_VERY_SILENT) && + !(share->options & HA_OPTION_COMPRESS_RECORD) && + ulonglong2double(share->state.state.data_file_length) > + (ulonglong2double(share->base.max_data_file_length)*0.9)) _ma_check_print_warning(param, "Datafile is almost full, %10s of %10s used", - llstr(share->state.state.data_file_length,buff), - llstr(share->base.max_data_file_length-1,buff2)); + llstr(share->state.state.data_file_length,buff), + llstr(share->base.max_data_file_length,buff2)); DBUG_RETURN(error); } /* maria_chk_size */ @@ -511,6 +531,7 @@ int maria_chk_key(HA_CHECK *param, register MARIA_HA *info) continue; } found_keys++; + _ma_report_progress(param, key, share->base.keys); param->record_checksum=init_checksum; @@ -878,6 +899,7 @@ static int chk_index(HA_CHECK *param, MARIA_HA *info, MARIA_KEYDEF *keyinfo, } info->last_key.keyinfo= tmp_key.keyinfo= keyinfo; + info->lastinx= ~0; /* Safety */ tmp_key.data= tmp_key_buff; for ( ;; ) { @@ -993,10 +1015,12 @@ static int chk_index(HA_CHECK *param, MARIA_HA *info, MARIA_KEYDEF *keyinfo, /* fall through */ } if ((share->data_file_type != BLOCK_RECORD && + share->data_file_type != NO_RECORD && record >= share->state.state.data_file_length) || (share->data_file_type == BLOCK_RECORD && ma_recordpos_to_page(record) * share->base.min_block_length >= - share->state.state.data_file_length)) + share->state.state.data_file_length) || + (share->data_file_type == NO_RECORD && record != 0)) { #ifndef DBUG_OFF char llbuff2[22], llbuff3[22]; @@ -1114,10 +1138,14 @@ static int check_keys_in_record(HA_CHECK *param, MARIA_HA *info, int extend, param->tmp_record_checksum+= (ha_checksum) start_recpos; param->records++; - if (param->testflag & T_WRITE_LOOP && param->records % WRITE_COUNT == 0) + if (param->records % WRITE_COUNT == 0) { - printf("%s\r", llstr(param->records, llbuff)); - fflush(stdout); + if (param->testflag & T_WRITE_LOOP) + { + printf("%s\r", llstr(param->records, llbuff)); + fflush(stdout); + } + _ma_report_progress(param, param->records, share->state.state.records); } /* Check if keys match the record */ @@ -1131,6 +1159,7 @@ static int check_keys_in_record(HA_CHECK *param, MARIA_HA *info, int extend, { (*keyinfo->make_key)(info, &key, keynr, info->lastkey_buff, record, start_recpos, 0); + info->last_key.keyinfo= key.keyinfo; if (extend) { /* We don't need to lock the key tree here as we don't allow @@ -1242,7 +1271,7 @@ static int check_dynamic_record(HA_CHECK *param, MARIA_HA *info, int extend, block_info.next_filepos=pos; do { - if (_ma_read_cache(¶m->read_cache, block_info.header, + if (_ma_read_cache(info, ¶m->read_cache, block_info.header, (start_block=block_info.next_filepos), sizeof(block_info.header), (flag ? 0 : READING_NEXT) | READING_HEADER)) @@ -1260,7 +1289,7 @@ static int check_dynamic_record(HA_CHECK *param, MARIA_HA *info, int extend, llstr(start_block,llbuff)); DBUG_RETURN(1); } - b_type= _ma_get_block_info(&block_info,-1,start_block); + b_type= _ma_get_block_info(info, &block_info,-1,start_block); if (b_type & (BLOCK_DELETED | BLOCK_ERROR | BLOCK_SYNC_ERROR | BLOCK_FATAL_ERROR)) { @@ -1356,7 +1385,7 @@ static int check_dynamic_record(HA_CHECK *param, MARIA_HA *info, int extend, got_error=1; break; } - if (_ma_read_cache(¶m->read_cache, to, block_info.filepos, + if (_ma_read_cache(info, ¶m->read_cache, to, block_info.filepos, (uint) block_info.data_len, flag == 1 ? READING_NEXT : 0)) { @@ -1459,7 +1488,7 @@ static int check_compressed_record(HA_CHECK *param, MARIA_HA *info, int extend, if (_ma_killed_ptr(param)) DBUG_RETURN(-1); - if (_ma_read_cache(¶m->read_cache, block_info.header, pos, + if (_ma_read_cache(info, ¶m->read_cache, block_info.header, pos, share->pack.ref_length, READING_NEXT)) { _ma_check_print_error(param, @@ -1484,7 +1513,7 @@ static int check_compressed_record(HA_CHECK *param, MARIA_HA *info, int extend, got_error=1; goto end; } - if (_ma_read_cache(¶m->read_cache, info->rec_buff, + if (_ma_read_cache(info, ¶m->read_cache, info->rec_buff, block_info.filepos, block_info.rec_len, READING_NEXT)) { _ma_check_print_error(param, @@ -1794,7 +1823,7 @@ static int check_block_record(HA_CHECK *param, MARIA_HA *info, int extend, char llbuff[22], llbuff2[22]; uint block_size= share->block_size; ha_rows full_page_count, tail_count; - my_bool full_dir; + my_bool full_dir, now_transactional; uint offset_page, offset, free_count; LINT_INIT(full_dir); @@ -1805,6 +1834,10 @@ static int check_block_record(HA_CHECK *param, MARIA_HA *info, int extend, my_errno); return 1; } + + now_transactional= info->s->now_transactional; + info->s->now_transactional= 0; /* Don't log changes */ + bitmap_buff= info->scan.bitmap_buff; page_buff= info->scan.page_buff; full_page_count= tail_count= 0; @@ -1817,13 +1850,15 @@ static int check_block_record(HA_CHECK *param, MARIA_HA *info, int extend, pos+= block_size, page++) { uint row_count, real_row_count, empty_space, page_type, bitmap_pattern; + uint bitmap_for_page; LINT_INIT(row_count); LINT_INIT(empty_space); if (_ma_killed_ptr(param)) { _ma_scan_end_block_record(info); - return -1; + info->s->now_transactional= now_transactional; + return -1; /* Interrupted */ } if ((page % share->bitmap.pages_covered) == 0) { @@ -1842,6 +1877,8 @@ static int check_block_record(HA_CHECK *param, MARIA_HA *info, int extend, } param->used+= block_size; param->link_used+= block_size; + if (param->verbose > 2) + print_bitmap_description(share, page, bitmap_buff); continue; } /* Skip pages marked as empty in bitmap */ @@ -1849,7 +1886,7 @@ static int check_block_record(HA_CHECK *param, MARIA_HA *info, int extend, offset= offset_page & 7; data= bitmap_buff + offset_page / 8; bitmap_pattern= uint2korr(data); - if (!((bitmap_pattern >> offset) & 7)) + if (!(bitmap_for_page= ((bitmap_pattern >> offset) & 7))) { param->empty+= block_size; param->del_blocks++; @@ -1872,8 +1909,9 @@ static int check_block_record(HA_CHECK *param, MARIA_HA *info, int extend, if (page_type == UNALLOCATED_PAGE || page_type >= MAX_PAGE_TYPE) { _ma_check_print_error(param, - "Page: %9s Found wrong page type %d", - llstr(page, llbuff), page_type); + "Page: %9s Found wrong page type %d. Bitmap: %d '%s'", + llstr(page, llbuff), page_type, + bitmap_for_page, bits_to_txt[bitmap_for_page]); if (param->err_count++ > MAXERR || !(param->testflag & T_VERBOSE)) goto err; continue; @@ -1920,20 +1958,17 @@ static int check_block_record(HA_CHECK *param, MARIA_HA *info, int extend, param->used+= block_size; break; } - if (_ma_check_bitmap_data(info, page_type, page, + if (_ma_check_bitmap_data(info, page_type, full_dir ? 0 : empty_space, - &bitmap_pattern)) + bitmap_for_page)) { - if (bitmap_pattern == ~(uint) 0) - _ma_check_print_error(param, - "Page %9s: Wrong bitmap for data on page", - llstr(page, llbuff)); - else _ma_check_print_error(param, "Page %9s: Wrong data in bitmap. Page_type: " - "%d full: %d empty_space: %u Bitmap-bits: %d", + "%d full: %d empty_space: %u Bitmap-bits: %d " + "'%s'", llstr(page, llbuff), page_type, full_dir, - empty_space, bitmap_pattern); + empty_space, bitmap_for_page, + bits_to_txt[bitmap_for_page]); if (param->err_count++ > MAXERR || !(param->testflag & T_VERBOSE)) goto err; } @@ -1956,14 +1991,22 @@ static int check_block_record(HA_CHECK *param, MARIA_HA *info, int extend, { /* Not at end of bitmap */ uint bitmap_pattern; + uint byte_offset; + offset_page= (uint) ((page % share->bitmap.pages_covered) -1) * 3; offset= offset_page & 7; - data= bitmap_buff + offset_page / 8; + byte_offset= offset_page / 8; + data= bitmap_buff + byte_offset; bitmap_pattern= uint2korr(data); + if (byte_offset + 1 == share->bitmap.max_total_size) + { + /* On last byte of bitmap; Remove possible checksum */ + bitmap_pattern&= 0xff; + } if (((bitmap_pattern >> offset)) || - (data + 2 < bitmap_buff + share->bitmap.total_size && - _ma_check_if_zero(data+2, bitmap_buff + share->bitmap.total_size - - data - 2))) + (byte_offset + 2 < share->bitmap.max_total_size && + _ma_check_if_zero(data+2, share->bitmap.max_total_size - + byte_offset - 2))) { ulonglong bitmap_page; bitmap_page= page / share->bitmap.pages_covered; @@ -1991,10 +2034,12 @@ static int check_block_record(HA_CHECK *param, MARIA_HA *info, int extend, llstr(param->tail_count, llbuff), llstr(tail_count, llbuff2)); + info->s->now_transactional= now_transactional; return param->error_printed != 0; err: _ma_scan_end_block_record(info); + info->s->now_transactional= now_transactional; return 1; } @@ -2034,6 +2079,8 @@ int maria_chk_data_link(HA_CHECK *param, MARIA_HA *info, my_bool extend) bzero((char*) param->tmp_key_crc, share->base.keys * sizeof(param->tmp_key_crc[0])); + info->in_check_table= 1; /* Don't assert on checksum errors */ + switch (share->data_file_type) { case BLOCK_RECORD: error= check_block_record(param, info, extend, record); @@ -2047,8 +2094,16 @@ int maria_chk_data_link(HA_CHECK *param, MARIA_HA *info, my_bool extend) case COMPRESSED_RECORD: error= check_compressed_record(param, info, extend, record); break; + case NO_RECORD: + param->records= share->state.state.records; + param->record_checksum= 0; + extend= 1; /* No row checksums */ + /* no data, nothing to do */ + break; } /* switch */ + info->in_check_table= 0; + if (error) goto err; @@ -2065,23 +2120,23 @@ int maria_chk_data_link(HA_CHECK *param, MARIA_HA *info, my_bool extend) llstr(share->state.state.records,llbuff2)); error=1; } - else if (param->record_checksum && + if (param->record_checksum && param->record_checksum != param->tmp_record_checksum) { _ma_check_print_error(param, "Key pointers and record positions doesn't match"); error=1; } - else if (param->glob_crc != share->state.state.checksum && - (share->options & - (HA_OPTION_CHECKSUM | HA_OPTION_COMPRESS_RECORD))) + if (param->glob_crc != share->state.state.checksum && + (share->options & + (HA_OPTION_CHECKSUM | HA_OPTION_COMPRESS_RECORD))) { _ma_check_print_warning(param, "Record checksum is not the same as checksum " "stored in the index file"); error=1; } - else if (!extend) + if (!extend) { uint key; for (key=0 ; key < share->base.keys; key++) @@ -2178,12 +2233,17 @@ int maria_chk_data_link(HA_CHECK *param, MARIA_HA *info, my_bool extend) llstr(param->del_length, llbuff2)); printf("Empty space: %12s Linkdata: %10s\n", llstr(param->empty, llbuff),llstr(param->link_used, llbuff2)); - if (param->lost) - printf("Lost space: %12s", llstr(param->lost, llbuff)); - if (param->max_found_trid) + if (share->data_file_type == BLOCK_RECORD) { - printf("Max trans. id: %11s\n", - llstr(param->max_found_trid, llbuff)); + printf("Full pages: %12s Tail count: %12s\n", + llstr(param->full_page_count, llbuff), + llstr(param->tail_count, llbuff2)); + printf("Lost space: %12s\n", llstr(param->lost, llbuff)); + if (param->max_found_trid) + { + printf("Max trans. id: %11s\n", + llstr(param->max_found_trid, llbuff)); + } } } my_free(record); @@ -2278,7 +2338,14 @@ static int initialize_variables_for_repair(HA_CHECK *param, { MARIA_SHARE *share= info->s; - /* Ro allow us to restore state and check how state changed */ + if (share->data_file_type == NO_RECORD) + { + _ma_check_print_error(param, + "Can't repair tables with record type NO_DATA"); + return 1; + } + + /* Allow us to restore state and check how state changed */ memcpy(org_share, share, sizeof(*share)); /* Repair code relies on share->state.state so we have to update it here */ @@ -2315,7 +2382,8 @@ static int initialize_variables_for_repair(HA_CHECK *param, return 1; /* calculate max_records */ - sort_info->filelength= mysql_file_seek(info->dfile.file, 0L, MY_SEEK_END, MYF(0)); + sort_info->filelength= my_seek(info->dfile.file, 0L, MY_SEEK_END, MYF(0)); + param->max_progress= sort_info->filelength; if ((param->testflag & T_CREATE_MISSING_KEYS) || sort_info->org_data_file_type == COMPRESSED_RECORD) sort_info->max_records= share->state.state.records; @@ -2338,6 +2406,8 @@ static int initialize_variables_for_repair(HA_CHECK *param, maria_ignore_trids(info); /* Don't write transid's during repair */ maria_versioning(info, 0); + /* remember original number of rows */ + *info->state= info->s->state.state; return 0; } @@ -2536,11 +2606,12 @@ int maria_repair(HA_CHECK *param, register MARIA_HA *info, if (!rep_quick) { /* Get real path for data file */ - if ((new_file= mysql_file_create(key_file_dfile, fn_format(param->temp_filename, - share->data_file_name.str, "", - DATA_TMP_EXT, 2+4), - 0,param->tmpfile_createflag, - MYF(0))) < 0) + if ((new_file= mysql_file_create(key_file_tmp, + fn_format(param->temp_filename, + share->data_file_name.str, "", + DATA_TMP_EXT, 2+4), + 0,param->tmpfile_createflag, + MYF(0))) < 0) { _ma_check_print_error(param,"Can't create new tempfile: '%s'", param->temp_filename); @@ -2617,6 +2688,7 @@ int maria_repair(HA_CHECK *param, register MARIA_HA *info, maria_lock_memory(param); /* Everything is alloced */ + sort_param.sort_info->info->in_check_table= 1; /* Re-create all keys, which are set in key_map. */ while (!(error=sort_get_next_record(&sort_param))) { @@ -2745,7 +2817,7 @@ int maria_repair(HA_CHECK *param, register MARIA_HA *info, new_file= -1; change_data_file_descriptor(info, -1); if (maria_change_to_newfile(share->data_file_name.str, MARIA_NAME_DEXT, - DATA_TMP_EXT, + DATA_TMP_EXT, param->backup_time, (param->testflag & T_BACKUP_DATA ? MYF(MY_REDEL_MAKE_BACKUP): MYF(0)) | sync_dir) || @@ -2785,6 +2857,7 @@ err: end_io_cache(&sort_info.new_info->rec_cache); info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED); sort_info.new_info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED); + sort_param.sort_info->info->in_check_table= 0; /* this below could fail, shouldn't we detect error? */ if (got_error) { @@ -2800,7 +2873,7 @@ err: if (new_file >= 0) { mysql_file_close(new_file,MYF(0)); - mysql_file_delete(key_file_dfile, param->temp_filename, MYF(MY_WME)); + mysql_file_delete(key_file_tmp, param->temp_filename, MYF(MY_WME)); } maria_mark_crashed_on_repair(info); } @@ -3061,7 +3134,7 @@ int maria_sort_index(HA_CHECK *param, register MARIA_HA *info, char *name) mysql_mutex_unlock(&share->intern_lock); mysql_file_close(new_file, MYF(MY_WME)); if (maria_change_to_newfile(share->index_file_name.str, MARIA_NAME_IEXT, - INDEX_TMP_EXT, sync_dir) || + INDEX_TMP_EXT, 0, sync_dir) || _ma_open_keyfile(share)) goto err2; info->lock_type= F_UNLCK; /* Force maria_readinfo to lock */ @@ -3094,7 +3167,7 @@ int maria_sort_index(HA_CHECK *param, register MARIA_HA *info, char *name) err: mysql_file_close(new_file, MYF(MY_WME)); err2: - mysql_file_delete(key_file_dfile, param->temp_filename,MYF(MY_WME)); + mysql_file_delete(key_file_tmp, param->temp_filename,MYF(MY_WME)); DBUG_RETURN(-1); } /* maria_sort_index */ @@ -3135,7 +3208,8 @@ static int sort_one_index(HA_CHECK *param, MARIA_HA *info, key.keyinfo= keyinfo; if (!(buff= (uchar*) my_alloca((uint) keyinfo->block_length + - keyinfo->maxlength))) + keyinfo->maxlength + + MARIA_INDEX_OVERHEAD_SIZE))) { _ma_check_print_error(param,"Not enough memory for key block"); DBUG_RETURN(-1); @@ -3234,6 +3308,7 @@ static my_bool maria_zerofill_index(HA_CHECK *param, MARIA_HA *info, uint block_size= share->block_size; my_bool zero_lsn= (share->base.born_transactional && !(param->testflag & T_ZEROFILL_KEEP_LSN)); + int error= 1; DBUG_ENTER("maria_zerofill_index"); if (!(param->testflag & T_SILENT)) @@ -3258,7 +3333,7 @@ static my_bool maria_zerofill_index(HA_CHECK *param, MARIA_HA *info, _ma_check_print_error(param, "Page %9s: Got error %d when reading index file", llstr(pos, llbuff), my_errno); - DBUG_RETURN(1); + goto end; } if (zero_lsn) bzero(buff, LSN_SIZE); @@ -3266,7 +3341,7 @@ static my_bool maria_zerofill_index(HA_CHECK *param, MARIA_HA *info, if (share->base.born_transactional) { uint keynr= _ma_get_keynr(share, buff); - if (keynr != MARIA_DELETE_KEY_NR) + if (keynr < share->base.keys) { MARIA_PAGE page; DBUG_ASSERT(keynr < share->base.keys); @@ -3278,7 +3353,7 @@ static my_bool maria_zerofill_index(HA_CHECK *param, MARIA_HA *info, "Page %9s: Got error %d when reading index " "file", llstr(pos, llbuff), my_errno); - DBUG_RETURN(1); + goto end; } } } @@ -3292,10 +3367,13 @@ static my_bool maria_zerofill_index(HA_CHECK *param, MARIA_HA *info, PAGECACHE_UNPIN, LSN_IMPOSSIBLE, LSN_IMPOSSIBLE, 1, FALSE); } + error= 0; /* ok */ + +end: if (flush_pagecache_blocks(share->pagecache, &share->kfile, FLUSH_FORCE_WRITE)) DBUG_RETURN(1); - DBUG_RETURN(0); + DBUG_RETURN(error); } @@ -3458,7 +3536,7 @@ int maria_zerofill(HA_CHECK *param, MARIA_HA *info, const char *name) _ma_tmp_disable_logging_for_table(info, 0); if (!(error= (maria_zerofill_index(param, info, name) || maria_zerofill_data(param, info, name) || - _ma_set_uuid(info, 0)))) + _ma_set_uuid(info->s, 0)))) { /* Mark that we have done zerofill of data and index. If we zeroed pages' @@ -3494,20 +3572,15 @@ int maria_zerofill(HA_CHECK *param, MARIA_HA *info, const char *name) */ int maria_change_to_newfile(const char * filename, const char * old_ext, - const char * new_ext, myf MyFlags) + const char * new_ext, time_t backup_time, + myf MyFlags) { char old_filename[FN_REFLEN],new_filename[FN_REFLEN]; -#ifdef USE_RAID - if (raid_chunks) - return my_raid_redel(fn_format(old_filename,filename,"",old_ext,2+4), - fn_format(new_filename,filename,"",new_ext,2+4), - raid_chunks, - MYF(MY_WME | MY_LINK_WARNING | MyFlags)); -#endif /* Get real path to filename */ (void) fn_format(old_filename,filename,"",old_ext,2+4+32); return my_redel(old_filename, fn_format(new_filename,old_filename,"",new_ext,2+4), + backup_time, MYF(MY_WME | MY_LINK_WARNING | MyFlags)); } /* maria_change_to_newfile */ @@ -3569,7 +3642,7 @@ int maria_repair_by_sort(HA_CHECK *param, register MARIA_HA *info, const char * name, my_bool rep_quick) { int got_error; - uint i; + uint i, keys_to_repair; ha_rows start_records; my_off_t new_header_length, org_header_length, del; File new_file; @@ -3610,11 +3683,12 @@ int maria_repair_by_sort(HA_CHECK *param, register MARIA_HA *info, if (!rep_quick) { /* Get real path for data file */ - if ((new_file=mysql_file_create(key_file_dfile, fn_format(param->temp_filename, - share->data_file_name.str, "", - DATA_TMP_EXT, 2+4), - 0,param->tmpfile_createflag, - MYF(0))) < 0) + if ((new_file=mysql_file_create(key_file_tmp, + fn_format(param->temp_filename, + share->data_file_name.str, "", + DATA_TMP_EXT, 2+4), + 0,param->tmpfile_createflag, + MYF(0))) < 0) { _ma_check_print_error(param,"Can't create new tempfile: '%s'", param->temp_filename); @@ -3695,6 +3769,17 @@ int maria_repair_by_sort(HA_CHECK *param, register MARIA_HA *info, del=share->state.state.del; + /* Calculate number of keys to repair */ + keys_to_repair= 0; + for (sort_param.key=0 ; sort_param.key < share->base.keys ; + sort_param.key++) + { + if (maria_is_key_active(key_map, sort_param.key)) + keys_to_repair++; + } + /* For each key we scan and merge sort the keys */ + param->max_stage= keys_to_repair*2; + rec_per_key_part= param->new_rec_per_key_part; for (sort_param.key=0 ; sort_param.key < share->base.keys ; rec_per_key_part+=sort_param.keyinfo->keysegs, sort_param.key++) @@ -3815,6 +3900,9 @@ int maria_repair_by_sort(HA_CHECK *param, register MARIA_HA *info, /* Set for next loop */ sort_info.max_records= (ha_rows) sort_info.new_info->s->state.state.records; + param->stage++; /* Next stage */ + param->progress= 0; + if (param->testflag & T_STATISTICS) maria_update_key_parts(sort_param.keyinfo, rec_per_key_part, sort_param.unique, @@ -3842,11 +3930,13 @@ int maria_repair_by_sort(HA_CHECK *param, register MARIA_HA *info, if (param->testflag & T_SAFE_REPAIR) { /* Don't repair if we loosed more than one row */ - if (share->state.state.records+1 < start_records) + if (sort_info.new_info->s->state.state.records+1 < start_records) { _ma_check_print_error(param, - "Rows lost; Aborting because safe repair was " - "requested"); + "Rows lost (Found %lu of %lu); Aborting " + "because safe repair was requested", + (ulong) share->state.state.records, + (ulong) start_records); share->state.state.records=start_records; goto err; } @@ -3877,7 +3967,7 @@ int maria_repair_by_sort(HA_CHECK *param, register MARIA_HA *info, } change_data_file_descriptor(info, -1); if (maria_change_to_newfile(share->data_file_name.str, MARIA_NAME_DEXT, - DATA_TMP_EXT, + DATA_TMP_EXT, param->backup_time, (param->testflag & T_BACKUP_DATA ? MYF(MY_REDEL_MAKE_BACKUP): MYF(0)) | sync_dir) || @@ -3893,6 +3983,10 @@ int maria_repair_by_sort(HA_CHECK *param, register MARIA_HA *info, sort_info.org_data_file_type= share->data_file_type; sort_info.filelength= share->state.state.data_file_length; sort_param.fix_datafile=0; + + /* Offsets are now in proportion to the new file length */ + param->max_progress= sort_info.filelength; + } else share->state.state.data_file_length=sort_param.max_pos; @@ -3981,7 +4075,7 @@ err: if (new_file >= 0) { mysql_file_close(new_file, MYF(0)); - mysql_file_delete(key_file_dfile, param->temp_filename, MYF(MY_WME)); + mysql_file_delete(key_file_tmp, param->temp_filename, MYF(MY_WME)); } maria_mark_crashed_on_repair(info); } @@ -4166,12 +4260,13 @@ int maria_repair_parallel(HA_CHECK *param, register MARIA_HA *info, if (!rep_quick) { /* Get real path for data file */ - if ((new_file= mysql_file_create(key_file_dfile, fn_format(param->temp_filename, - share->data_file_name.str, "", - DATA_TMP_EXT, - 2+4), - 0,param->tmpfile_createflag, - MYF(0))) < 0) + if ((new_file= mysql_file_create(key_file_tmp, + fn_format(param->temp_filename, + share->data_file_name.str, "", + DATA_TMP_EXT, + 2+4), + 0,param->tmpfile_createflag, + MYF(0))) < 0) { _ma_check_print_error(param,"Can't create new tempfile: '%s'", param->temp_filename); @@ -4403,8 +4498,13 @@ int maria_repair_parallel(HA_CHECK *param, register MARIA_HA *info, if (param->testflag & T_SAFE_REPAIR) { /* Don't repair if we loosed more than one row */ - if (share->state.state.records+1 < start_records) + if (sort_info.new_info->s->state.state.records+1 < start_records) { + _ma_check_print_error(param, + "Rows lost (Found %lu of %lu); Aborting " + "because safe repair was requested", + (ulong) share->state.state.records, + (ulong) start_records); share->state.state.records=start_records; goto err; } @@ -4500,7 +4600,7 @@ err: mysql_file_close(new_file,MYF(0)); info->dfile.file= new_file= -1; if (maria_change_to_newfile(share->data_file_name.str, MARIA_NAME_DEXT, - DATA_TMP_EXT, + DATA_TMP_EXT, param->backup_time, MYF((param->testflag & T_BACKUP_DATA ? MY_REDEL_MAKE_BACKUP : 0) | sync_dir)) || @@ -4516,7 +4616,7 @@ err: if (new_file >= 0) { mysql_file_close(new_file,MYF(0)); - mysql_file_delete(key_file_dfile, param->temp_filename, MYF(MY_WME)); + mysql_file_delete(key_file_tmp, param->temp_filename, MYF(MY_WME)); if (info->dfile.file == new_file) info->dfile.file= -1; } @@ -4685,6 +4785,11 @@ static int sort_get_next_record(MARIA_SORT_PARAM *sort_param) if (_ma_killed_ptr(param)) DBUG_RETURN(1); + if (param->progress_counter++ >= WRITE_COUNT) + { + param->progress_counter= 0; + _ma_report_progress(param, param->progress, param->max_progress); + } switch (sort_info->org_data_file_type) { case BLOCK_RECORD: @@ -4725,6 +4830,9 @@ static int sort_get_next_record(MARIA_SORT_PARAM *sort_param) flag= HA_ERR_ROW_NOT_VISIBLE; } } + param->progress= (ma_recordpos_to_page(info->cur_row.lastpos)* + share->block_size); + share->page_type= save_page_type; if (!flag) { @@ -4757,7 +4865,7 @@ static int sort_get_next_record(MARIA_SORT_PARAM *sort_param) DBUG_RETURN(-1); } /* Retry only if wrong record, not if disk error */ - if (flag != HA_ERR_WRONG_IN_RECORD) + if (flag != HA_ERR_WRONG_IN_RECORD && flag != HA_ERR_WRONG_CRC) { retry_if_quick(sort_param, flag); DBUG_RETURN(flag); @@ -4777,6 +4885,7 @@ static int sort_get_next_record(MARIA_SORT_PARAM *sort_param) DBUG_RETURN(-1); } sort_param->start_recpos=sort_param->pos; + param->progress= sort_param->pos; if (!sort_param->fix_datafile) { sort_param->current_filepos= sort_param->pos; @@ -4804,6 +4913,7 @@ static int sort_get_next_record(MARIA_SORT_PARAM *sort_param) LINT_INIT(to); pos=sort_param->pos; + param->progress= pos; searching=(sort_param->fix_datafile && (param->testflag & T_EXTEND)); parallel_flag= (sort_param->read_cache.file < 0) ? READING_NEXT : 0; for (;;) @@ -4832,7 +4942,7 @@ static int sort_get_next_record(MARIA_SORT_PARAM *sort_param) _ma_check_print_info(param,"Block: %s used by record at %s", llstr(param->search_after_block,llbuff), llstr(sort_param->start_recpos,llbuff2)); - if (_ma_read_cache(&sort_param->read_cache, + if (_ma_read_cache(info, &sort_param->read_cache, block_info.header, pos, MARIA_BLOCK_INFO_HEADER_LENGTH, (! found_record ? READING_NEXT : 0) | @@ -4854,7 +4964,7 @@ static int sort_get_next_record(MARIA_SORT_PARAM *sort_param) param->testflag|=T_RETRY_WITHOUT_QUICK; DBUG_RETURN(1); /* Something wrong with data */ } - b_type= _ma_get_block_info(&block_info,-1,pos); + b_type= _ma_get_block_info(info, &block_info,-1,pos); if ((b_type & (BLOCK_ERROR | BLOCK_FATAL_ERROR)) || ((b_type & BLOCK_FIRST) && (block_info.rec_len < (uint) share->base.min_pack_length || @@ -5045,7 +5155,7 @@ static int sort_get_next_record(MARIA_SORT_PARAM *sort_param) } } if (block_info.data_len && - _ma_read_cache(&sort_param->read_cache,to,block_info.filepos, + _ma_read_cache(info, &sort_param->read_cache,to,block_info.filepos, block_info.data_len, (found_record == 1 ? READING_NEXT : 0) | parallel_flag)) @@ -5113,9 +5223,10 @@ static int sort_get_next_record(MARIA_SORT_PARAM *sort_param) } } case COMPRESSED_RECORD: + param->progress= sort_param->pos; for (searching=0 ;; searching=1, sort_param->pos++) { - if (_ma_read_cache(&sort_param->read_cache, block_info.header, + if (_ma_read_cache(info, &sort_param->read_cache, block_info.header, sort_param->pos, share->pack.ref_length,READING_NEXT)) DBUG_RETURN(-1); @@ -5147,7 +5258,7 @@ static int sort_get_next_record(MARIA_SORT_PARAM *sort_param) llstr(sort_param->pos,llbuff)); continue; } - if (_ma_read_cache(&sort_param->read_cache, sort_param->rec_buff, + if (_ma_read_cache(info, &sort_param->read_cache, sort_param->rec_buff, block_info.filepos, block_info.rec_len, READING_NEXT)) { @@ -5187,8 +5298,10 @@ static int sort_get_next_record(MARIA_SORT_PARAM *sort_param) } DBUG_RETURN(0); } + case NO_RECORD: + DBUG_RETURN(1); /* Impossible */ } - DBUG_RETURN(1); /* Impossible */ + DBUG_RETURN(1); /* Impossible */ } @@ -5231,7 +5344,10 @@ int _ma_sort_write_record(MARIA_SORT_PARAM *sort_param) if ((sort_param->current_filepos= (*share->write_record_init)(info, sort_param->record)) == HA_OFFSET_ERROR) + { + _ma_check_print_error(param, "%d when writing to datafile", my_errno); DBUG_RETURN(1); + } /* Pointer to end of file */ sort_param->filepos= share->state.state.data_file_length; break; @@ -5308,6 +5424,8 @@ int _ma_sort_write_record(MARIA_SORT_PARAM *sort_param) sort_param->filepos+=reclength+length; share->state.split++; break; + case NO_RECORD: + DBUG_RETURN(1); /* Impossible */ } } if (sort_param->master) @@ -5851,6 +5969,9 @@ int maria_recreate_table(HA_CHECK *param, MARIA_HA **org_info, char *filename) MARIA_CREATE_INFO create_info; DBUG_ENTER("maria_recreate_table"); + if ((!(param->testflag & T_SILENT))) + printf("Recreating table '%s'\n", param->isam_file_name); + error=1; /* Default error */ info= **org_info; status_info= (*org_info)->state[0]; @@ -5996,7 +6117,7 @@ int maria_recreate_table(HA_CHECK *param, MARIA_HA **org_info, char *filename) (*org_info)->s->state.state.records= info.state->records; if (share.state.create_time) (*org_info)->s->state.create_time=share.state.create_time; -#ifdef EXTERNAL_LOCKING +#ifdef MARIA_EXTERNAL_LOCKING (*org_info)->s->state.unique= (*org_info)->this_unique= share.state.unique; #endif (*org_info)->s->state.state.checksum= info.state->checksum; @@ -6051,6 +6172,7 @@ int maria_update_state_info(HA_CHECK *param, MARIA_HA *info,uint update) { share->state.open_count=0; share->global_changed=0; + share->changed= 1; } if (update & UPDATE_STAT) { @@ -6078,7 +6200,6 @@ int maria_update_state_info(HA_CHECK *param, MARIA_HA *info,uint update) MA_STATE_INFO_WRITE_DONT_MOVE_OFFSET | MA_STATE_INFO_WRITE_FULL_INFO)) goto err; - share->changed=0; } { /* Force update of status */ int error; @@ -6447,6 +6568,9 @@ static void change_data_file_descriptor(MARIA_HA *info, File new_file) static void unuse_data_file_descriptor(MARIA_HA *info) { + (void) flush_pagecache_blocks(info->s->pagecache, + &info->s->bitmap.file, + FLUSH_IGNORE_CHANGED); info->dfile.file= info->s->bitmap.file.file= -1; _ma_bitmap_reset_cache(info->s); } @@ -6473,6 +6597,17 @@ static void copy_data_file_state(MARIA_STATE_INFO *to, } +/* Return 1 if block is full of zero's */ + +static my_bool zero_filled_block(uchar *tmp, uint length) +{ + while (length--) + if (*(tmp++) != 0) + return 0; + return 1; +} + + /* Read 'safely' next record while scanning table. @@ -6574,9 +6709,21 @@ read_next_page: { if (my_errno == HA_ERR_WRONG_CRC) { - _ma_check_print_info(sort_info->param, - "Wrong CRC on datapage at %s", - llstr(page, llbuff)); + /* + Don't give errors for zero filled blocks. These can + sometimes be found at end of a bitmap when we wrote a big + record last that was moved to the next bitmap. + */ + if (!zero_filled_block(info->scan.page_buff, share->block_size) || + _ma_check_bitmap_data(info, UNALLOCATED_PAGE, 0, + _ma_bitmap_get_page_bits(info, + &share->bitmap, + page))) + { + _ma_check_print_info(sort_info->param, + "Wrong CRC on datapage at %s", + llstr(page, llbuff)); + } continue; } DBUG_RETURN(my_errno); @@ -6802,3 +6949,17 @@ void retry_if_quick(MARIA_SORT_PARAM *sort_param, int error) param->testflag|=T_RETRY_WITHOUT_QUICK; } } + +/* Print information about bitmap page */ + +static void print_bitmap_description(MARIA_SHARE *share, + pgcache_page_no_t page, + uchar *bitmap_data) +{ + char *tmp= my_malloc(MAX_BITMAP_INFO_LENGTH, MYF(MY_WME)); + if (!tmp) + return; + _ma_get_bitmap_description(&share->bitmap, bitmap_data, page, tmp); + printf("Bitmap page %lu\n%s", (ulong) page, tmp); + my_free(tmp); +} diff --git a/storage/maria/ma_check_standalone.h b/storage/maria/ma_check_standalone.h index d692b2de94c..3ac8cdb5e38 100644 --- a/storage/maria/ma_check_standalone.h +++ b/storage/maria/ma_check_standalone.h @@ -45,6 +45,13 @@ int _ma_killed_ptr(HA_CHECK *param __attribute__((unused))) return 0; } + +void _ma_report_progress(HA_CHECK *param __attribute__((unused)), + ulonglong progress __attribute__((unused)), + ulonglong max_progress __attribute__((unused))) +{ +} + /* print warnings and errors */ /* VARARGS */ diff --git a/storage/maria/ma_checkpoint.c b/storage/maria/ma_checkpoint.c index 6ced2976c29..71f03f4db16 100644 --- a/storage/maria/ma_checkpoint.c +++ b/storage/maria/ma_checkpoint.c @@ -130,6 +130,9 @@ int ma_checkpoint_execute(CHECKPOINT_LEVEL level, my_bool no_wait) /* from then on, we are sure to be and stay the only checkpointer */ result= really_execute_checkpoint(); + DBUG_EXECUTE_IF("maria_crash_after_checkpoint", + { DBUG_PRINT("maria_crash", ("now")); DBUG_ABORT(); }); + mysql_cond_broadcast(&COND_checkpoint); end: DBUG_RETURN(result); @@ -531,10 +534,12 @@ filter_flush_file_evenly(enum pagecache_page_type type, risk could be that while a checkpoint happens no LRD flushing happens. */ +static uint maria_checkpoint_min_activity= 2*1024*1024; + + pthread_handler_t ma_checkpoint_background(void *arg) { /** @brief At least this of log/page bytes written between checkpoints */ - const uint checkpoint_min_activity= 2*1024*1024; /* If the interval could be changed by the user while we are in this thread, it could be annoying: for example it could cause "case 2" to be executed @@ -574,6 +579,12 @@ pthread_handler_t ma_checkpoint_background(void *arg) switch (sleeps % interval) { case 0: + /* If checkpoints are disabled, wait 1 second and try again */ + if (maria_checkpoint_disabled) + { + sleep_time= 1; + break; + } /* With background flushing evenly distributed over the time between two checkpoints, we should have only little flushing to do @@ -586,12 +597,13 @@ pthread_handler_t ma_checkpoint_background(void *arg) would decrease the amount of read pages in recovery). In case of one short statement per minute (very low load), we don't want to checkpoint every minute, hence the positive - checkpoint_min_activity. + maria_checkpoint_min_activity. */ + if (((translog_get_horizon() - log_horizon_at_last_checkpoint) + (maria_pagecache->global_cache_write - pagecache_flushes_at_last_checkpoint) * - maria_pagecache->block_size) < checkpoint_min_activity) + maria_pagecache->block_size) < maria_checkpoint_min_activity) { /* don't take checkpoint, so don't know what to flush */ pages_to_flush_before_next_checkpoint= 0; @@ -1009,17 +1021,25 @@ static int collect_tables(LEX_STRING *str, LSN checkpoint_start_log_horizon) possible that Recovery does not start from before the REDO and thus the state is not recovered. A solution may be to set share->changed=1 under log mutex when writing log records. - But as anyway we have another problem below, this optimization would - be of little use. + + The current solution is to keep a copy the last saved state and + not write the state if it was same as last time. It's ok if + is_of_horizon would be different on disk if all other data is + the same. */ - /** @todo flush state only if changed since last checkpoint */ DBUG_ASSERT(share->last_version != 0); state_copy->state.is_of_horizon= share->state.is_of_horizon= - state_copies_horizon; - if (kfile.file >= 0) + share->checkpoint_state.is_of_horizon= state_copies_horizon; + if (kfile.file >= 0 && memcmp(&share->checkpoint_state, + &state_copy->state, + sizeof(state_copy->state))) + { sync_error|= _ma_state_info_write_sub(kfile.file, &state_copy->state, MA_STATE_INFO_WRITE_DONT_MOVE_OFFSET); + memcpy(&share->checkpoint_state, + &state_copy->state, sizeof(state_copy->state)); + } /* We don't set share->changed=0 because it may interfere with a concurrent _ma_writeinfo() doing share->changed=1 (cancel its @@ -1028,6 +1048,14 @@ static int collect_tables(LEX_STRING *str, LSN checkpoint_start_log_horizon) */ } } +#ifdef EXTRA_DEBUG_BITMAP + else + { + DBUG_ASSERT(share->bitmap.changed == 0 && + share->bitmap.changed_not_flushed == 0); + } +#endif + /* _ma_bitmap_flush_all() may wait, so don't keep intern_lock as otherwise this would deadlock with allocate_and_write_block_record() diff --git a/storage/maria/ma_close.c b/storage/maria/ma_close.c index 88d63252693..a29fe607d6e 100644 --- a/storage/maria/ma_close.c +++ b/storage/maria/ma_close.c @@ -28,7 +28,8 @@ int maria_close(register MARIA_HA *info) my_bool share_can_be_freed= FALSE; MARIA_SHARE *share= info->s; DBUG_ENTER("maria_close"); - DBUG_PRINT("enter",("base: 0x%lx reopen: %u locks: %u", + DBUG_PRINT("enter",("name: '%s' base: 0x%lx reopen: %u locks: %u", + share->open_file_name.str, (long) info, (uint) share->reopen, (uint) share->tot_locks)); @@ -39,9 +40,6 @@ int maria_close(register MARIA_HA *info) if (info->lock_type == F_EXTRA_LCK) info->lock_type=F_UNLCK; /* HA_EXTRA_NO_USER_CHANGE */ - if (share->reopen == 1 && share->kfile.file >= 0) - _ma_decrement_open_count(info); - if (info->lock_type != F_UNLCK) { if (maria_lock_database(info,F_UNLCK)) @@ -76,6 +74,11 @@ int maria_close(register MARIA_HA *info) if (share->kfile.file >= 0) { + my_bool save_global_changed= share->global_changed; + + /* Avoid _ma_mark_file_changed() when flushing pages */ + share->global_changed= 1; + if ((*share->once_end)(share)) error= my_errno; if (flush_pagecache_blocks(share->pagecache, &share->kfile, @@ -97,6 +100,16 @@ int maria_close(register MARIA_HA *info) if (((share->changed && share->base.born_transactional) || maria_is_crashed(info))) { + if (save_global_changed) + { + /* + Reset effect of _ma_mark_file_changed(). Better to do it + here than in _ma_decrement_open_count(), as + _ma_state_info_write() will write the open_count. + */ + save_global_changed= 0; + share->state.open_count--; + } /* State must be written to file as it was not done at table's unlocking. @@ -104,6 +117,19 @@ int maria_close(register MARIA_HA *info) if (_ma_state_info_write(share, MA_STATE_INFO_WRITE_DONT_MOVE_OFFSET)) error= my_errno; } + DBUG_ASSERT(maria_is_crashed(info) || !share->base.born_transactional || + share->state.open_count == 0 || + share->open_count_not_zero_on_open); + + /* Ensure that open_count is zero on close */ + share->global_changed= save_global_changed; + _ma_decrement_open_count(info, 0); + + /* Ensure that open_count really is zero */ + DBUG_ASSERT(maria_is_crashed(info) || share->temporary || + share->state.open_count == 0 || + share->open_count_not_zero_on_open); + /* File must be synced as it is going out of the maria_open_list and so becoming unknown to future Checkpoints. diff --git a/storage/maria/ma_create.c b/storage/maria/ma_create.c index 467c977da07..c89700be29a 100644 --- a/storage/maria/ma_create.c +++ b/storage/maria/ma_create.c @@ -204,7 +204,8 @@ int maria_create(const char *name, enum data_file_type datafile_type, pack_reclength++; not_block_record_extra_length++; max_field_lengths++; - packed++; + if (datafile_type != DYNAMIC_RECORD) + packed++; column->fill_length= 1; options|= HA_OPTION_NULL_FIELDS; /* Use ma_checksum() */ @@ -250,10 +251,16 @@ int maria_create(const char *name, enum data_file_type datafile_type, datafile_type= BLOCK_RECORD; } + if (datafile_type == NO_RECORD && uniques) + { + /* Can't do unique without data, revert to block records */ + datafile_type= BLOCK_RECORD; + } + if (datafile_type == DYNAMIC_RECORD) options|= HA_OPTION_PACK_RECORD; /* Must use packed records */ - if (datafile_type == STATIC_RECORD) + if (datafile_type == STATIC_RECORD || datafile_type == NO_RECORD) { /* We can't use checksum with static length rows */ flags&= ~HA_CREATE_CHECKSUM; @@ -319,7 +326,15 @@ int maria_create(const char *name, enum data_file_type datafile_type, (~(ulonglong) 0)/ci->max_rows < (ulonglong) pack_reclength) ci->data_file_length= ~(ulonglong) 0; else - ci->data_file_length=(ulonglong) ci->max_rows*pack_reclength; + { + ci->data_file_length= _ma_safe_mul(ci->max_rows, pack_reclength); + if (datafile_type == BLOCK_RECORD) + { + /* Assume that blocks are only half full (very pessimistic!) */ + ci->data_file_length= _ma_safe_mul(ci->data_file_length, 2); + set_if_bigger(ci->data_file_length, maria_block_size*2); + } + } } else if (!ci->max_rows) { @@ -331,7 +346,7 @@ int maria_create(const char *name, enum data_file_type datafile_type, ulonglong data_file_length= ci->data_file_length; if (!data_file_length) data_file_length= ((((ulonglong) 1 << ((BLOCK_RECORD_POINTER_SIZE-1) * - 8)) -1) * maria_block_size); + 8))/2 -1) * maria_block_size); if (rows_per_page > 0) { set_if_smaller(rows_per_page, MAX_ROWS_PER_PAGE); @@ -353,11 +368,11 @@ int maria_create(const char *name, enum data_file_type datafile_type, { /* The + 1 is for record position withing page - The / 2 is because we need one bit for knowing if there is transid's + The * 2 is because we need one bit for knowing if there is transid's after the row pointer */ pointer= maria_get_pointer_length((ci->data_file_length / - (maria_block_size * 2)), 3) + 1; + maria_block_size) * 2, 3) + 1; set_if_smaller(pointer, BLOCK_RECORD_POINTER_SIZE); if (!max_rows) @@ -366,7 +381,9 @@ int maria_create(const char *name, enum data_file_type datafile_type, } else { - if (datafile_type != STATIC_RECORD) + if (datafile_type == NO_RECORD) + pointer= 0; + else if (datafile_type != STATIC_RECORD) pointer= maria_get_pointer_length(ci->data_file_length, maria_data_pointer_size); else @@ -676,7 +693,7 @@ int maria_create(const char *name, enum data_file_type datafile_type, share.state.dellink = HA_OFFSET_ERROR; share.state.first_bitmap_with_space= 0; -#ifdef EXTERNAL_LOCKING +#ifdef MARIA_EXTERNAL_LOCKING share.state.process= (ulong) getpid(); #endif share.state.version= (ulong) time((time_t*) 0); @@ -1392,7 +1409,13 @@ int _ma_update_state_lsns_sub(MARIA_SHARE *share, LSN lsn, TrID create_trid, share->state.skip_redo_lsn= share->state.is_of_horizon= lsn; share->state.create_trid= create_trid; mi_int8store(trid_buff, create_trid); - if (update_create_rename_lsn) + + /* + Update create_rename_lsn if update was requested or if the old one had an + impossible value. + */ + if (update_create_rename_lsn || + (share->state.create_rename_lsn > lsn && lsn != LSN_IMPOSSIBLE)) { share->state.create_rename_lsn= lsn; if (share->id != 0) diff --git a/storage/maria/ma_delete.c b/storage/maria/ma_delete.c index 22ffb05af38..fa2ee166b7c 100644 --- a/storage/maria/ma_delete.c +++ b/storage/maria/ma_delete.c @@ -63,7 +63,7 @@ int maria_delete(MARIA_HA *info,const uchar *record) if ((*share->compare_record)(info,record)) goto err; /* Error on read-check */ - if (_ma_mark_file_changed(info)) + if (_ma_mark_file_changed(share)) goto err; /* Ensure we don't change the autoincrement value */ @@ -134,17 +134,12 @@ err: save_errno= HA_ERR_INTERNAL_ERROR; /* Should never happen */ mi_sizestore(lastpos, info->cur_row.lastpos); - if (save_errno != HA_ERR_RECORD_CHANGED) - { - maria_print_error(share, HA_ERR_CRASHED); - maria_mark_crashed(info); /* mark table crashed */ - } - _ma_writeinfo(info, WRITEINFO_UPDATE_KEYFILE); + (void) _ma_writeinfo(info,WRITEINFO_UPDATE_KEYFILE); info->update|=HA_STATE_WRITTEN; /* Buffer changed */ - if (save_errno == HA_ERR_KEY_NOT_FOUND) + if (save_errno != HA_ERR_RECORD_CHANGED) { - maria_print_error(share, HA_ERR_CRASHED); - my_errno=HA_ERR_CRASHED; + _ma_set_fatal_error(share, HA_ERR_CRASHED); + save_errno= HA_ERR_CRASHED; } DBUG_RETURN(my_errno= save_errno); } /* maria_delete */ @@ -209,7 +204,7 @@ my_bool _ma_ck_real_delete(register MARIA_HA *info, MARIA_KEY *key, if ((old_root=*root) == HA_OFFSET_ERROR) { - my_errno=HA_ERR_CRASHED; + _ma_set_fatal_error(info->s, HA_ERR_CRASHED); DBUG_RETURN(1); } if (!(root_buff= (uchar*) my_alloca((uint) keyinfo->block_length+ @@ -344,7 +339,7 @@ static int d_search(MARIA_HA *info, MARIA_KEY *key, uint32 comp_flag, if (!(tmp_key_length=(*keyinfo->get_key)(&tmp_key, page_flag, nod_flag, &kpos))) { - my_errno= HA_ERR_CRASHED; + _ma_set_fatal_error(share, HA_ERR_CRASHED); DBUG_RETURN(-1); } root= _ma_row_pos_from_key(&tmp_key); @@ -406,8 +401,9 @@ static int d_search(MARIA_HA *info, MARIA_KEY *key, uint32 comp_flag, { if (!nod_flag) { + /* This should newer happend */ DBUG_PRINT("error",("Didn't find key")); - my_errno=HA_ERR_CRASHED; /* This should newer happend */ + _ma_set_fatal_error(share, HA_ERR_CRASHED); goto err; } save_flag=0; @@ -571,6 +567,7 @@ static int del(MARIA_HA *info, MARIA_KEY *key, endpos= leaf_page->buff + leaf_length; tmp_key.keyinfo= keyinfo; tmp_key.data= keybuff; + next_buff= 0; if (!(key_start= _ma_get_last_key(&tmp_key, leaf_page, endpos))) DBUG_RETURN(-1); @@ -597,9 +594,11 @@ static int del(MARIA_HA *info, MARIA_KEY *key, /* underflow writes "next_page" to disk */ ret_value= underflow(info, keyinfo, leaf_page, &next_page, endpos); - if (ret_value == 0 && leaf_page->size > - share->max_index_block_size) + if (ret_value < 0) + goto err; + if (leaf_page->size > share->max_index_block_size) { + DBUG_ASSERT(ret_value == 0); ret_value= (_ma_split_page(info, key, leaf_page, share->max_index_block_size, (uchar*) 0, 0, 0, @@ -632,6 +631,7 @@ static int del(MARIA_HA *info, MARIA_KEY *key, goto err; } my_afree(next_buff); + DBUG_ASSERT(leaf_page->size <= share->max_index_block_size); DBUG_RETURN(ret_value); } @@ -709,10 +709,14 @@ static int del(MARIA_HA *info, MARIA_KEY *key, KEY_OP_DEBUG_LOG_ADD_2)) goto err; + DBUG_ASSERT(leaf_page->size <= share->max_index_block_size); DBUG_RETURN(new_leaf_length <= (info->quick_mode ? MARIA_MIN_KEYBLOCK_LENGTH : (uint) keyinfo->underflow_block_length)); err: + if (next_buff) + my_afree(next_buff); + DBUG_RETURN(-1); } /* del */ @@ -731,9 +735,18 @@ err: leaf_page is saved to disk Caller must save anc_buff + For the algoritm to work, we have to ensure for packed keys that + key_length + (underflow_length + max_block_length + key_length) / 2 + <= block_length. + From which follows that underflow_length <= block_length - key_length *3 + For not packed keys we have: + (underflow_length + max_block_length + key_length) / 2 <= block_length + From which follows that underflow_length < block_length - key_length + This is ensured by setting of underflow_block_length. + @return @retval 0 ok - @retval 1 ok, but anc_buff did underflow + @retval 1 ok, but anc_page did underflow @retval -1 error */ @@ -1153,7 +1166,7 @@ static int underflow(MARIA_HA *info, MARIA_KEYDEF *keyinfo, _ma_kpointer(info,leaf_key.data + leaf_key.data_length + leaf_key.ref_length, leaf_page->pos); - /* Save key in anc_page */ + /* Save parting key found by _ma_find_half_pos() in anc_page */ DBUG_DUMP("anc_buff", anc_buff, new_anc_length); DBUG_DUMP_KEY("key_to_anc", &leaf_key); anc_end_pos= anc_buff + new_anc_length; @@ -1191,6 +1204,7 @@ static int underflow(MARIA_HA *info, MARIA_KEYDEF *keyinfo, bmove(leaf_buff+p_length+t_length, half_pos, tmp_length); (*keyinfo->store_key)(keyinfo,leaf_buff+p_length, &key_inserted); new_leaf_length= tmp_length + t_length + p_length; + DBUG_ASSERT(new_leaf_length <= share->max_index_block_size); leaf_page->size= new_leaf_length; leaf_page->flag= page_flag; @@ -1232,7 +1246,6 @@ static int underflow(MARIA_HA *info, MARIA_KEYDEF *keyinfo, /* Log changes to next page This contains original data with some suffix data deleted - */ DBUG_ASSERT(new_buff_length <= buff_length); if (_ma_log_suffix(&next_page, buff_length, new_buff_length)) diff --git a/storage/maria/ma_delete_all.c b/storage/maria/ma_delete_all.c index 3716e679bb1..b5bb9d3ddf5 100644 --- a/storage/maria/ma_delete_all.c +++ b/storage/maria/ma_delete_all.c @@ -52,8 +52,6 @@ int maria_delete_all_rows(MARIA_HA *info) if (_ma_readinfo(info,F_WRLCK,1)) DBUG_RETURN(my_errno); log_record= share->now_transactional && !share->temporary; - if (_ma_mark_file_changed(info)) - goto err; if (log_record) { @@ -75,14 +73,19 @@ int maria_delete_all_rows(MARIA_HA *info) If we fail in this function after this point, log and table will be inconsistent. */ + if (_ma_mark_file_changed(share)) + goto err; } else { + if (_ma_mark_file_changed(share)) + goto err; /* Other branch called function below when writing log record, in hook */ _ma_reset_status(info); } /* Remove old history as the table is now empty for everyone */ _ma_reset_state(info); + share->state.changed= 0; /* If we are using delayed keys or if the user has done changes to the tables @@ -178,6 +181,10 @@ void _ma_reset_status(MARIA_HA *info) state->state.data_file_length= 0; state->state.empty= state->state.key_empty= 0; state->state.checksum= 0; + share->state.open_count= 0; + share->global_changed= 0; + + share->changed= 1; /* We must write state */ *info->state= state->state; diff --git a/storage/maria/ma_delete_table.c b/storage/maria/ma_delete_table.c index f8b7eefb4ad..9e91638fa27 100644 --- a/storage/maria/ma_delete_table.c +++ b/storage/maria/ma_delete_table.c @@ -28,10 +28,6 @@ int maria_delete_table(const char *name) { - char from[FN_REFLEN]; -#ifdef USE_RAID - uint raid_type=0,raid_chunks=0; -#endif MARIA_HA *info; myf sync_dir; DBUG_ENTER("maria_delete_table"); @@ -53,17 +49,10 @@ int maria_delete_table(const char *name) */ if (!(info= maria_open(name, O_RDONLY, HA_OPEN_FOR_REPAIR))) { -#ifdef USE_RAID - raid_type= 0; -#endif sync_dir= 0; } else { -#ifdef USE_RAID - raid_type= info->s->base.raid_type; - raid_chunks= info->s->base.raid_chunks; -#endif sync_dir= (info->s->now_transactional && !info->s->temporary && !maria_in_recovery) ? MY_SYNC_DIR : 0; @@ -93,6 +82,15 @@ int maria_delete_table(const char *name) DBUG_RETURN(1); } + DBUG_RETURN(maria_delete_table_files(name, sync_dir)); +} + + +int maria_delete_table_files(const char *name, myf sync_dir) +{ + char from[FN_REFLEN]; + DBUG_ENTER("maria_delete_table_files"); + fn_format(from,name,"",MARIA_NAME_IEXT,MY_UNPACK_FILENAME|MY_APPEND_EXT); if (mysql_file_delete_with_symlink(key_file_kfile, from, MYF(MY_WME | sync_dir))) diff --git a/storage/maria/ma_dynrec.c b/storage/maria/ma_dynrec.c index 60423507792..cc03d621a26 100644 --- a/storage/maria/ma_dynrec.c +++ b/storage/maria/ma_dynrec.c @@ -389,12 +389,12 @@ static int _ma_find_writepos(MARIA_HA *info, *filepos=info->s->state.dellink; block_info.second_read=0; info->rec_cache.seek_not_done=1; - if (!(_ma_get_block_info(&block_info, info->dfile.file, + if (!(_ma_get_block_info(info, &block_info, info->dfile.file, info->s->state.dellink) & BLOCK_DELETED)) { DBUG_PRINT("error",("Delete link crashed")); - my_errno=HA_ERR_WRONG_IN_RECORD; + _ma_set_fatal_error(info->s, HA_ERR_WRONG_IN_RECORD); DBUG_RETURN(-1); } info->s->state.dellink=block_info.next_filepos; @@ -450,7 +450,8 @@ static my_bool unlink_deleted_block(MARIA_HA *info, MARIA_BLOCK_INFO tmp; tmp.second_read=0; /* Unlink block from the previous block */ - if (!(_ma_get_block_info(&tmp, info->dfile.file, block_info->prev_filepos) + if (!(_ma_get_block_info(info, &tmp, info->dfile.file, + block_info->prev_filepos) & BLOCK_DELETED)) DBUG_RETURN(1); /* Something is wrong */ mi_sizestore(tmp.header+4,block_info->next_filepos); @@ -460,7 +461,7 @@ static my_bool unlink_deleted_block(MARIA_HA *info, /* Unlink block from next block */ if (block_info->next_filepos != HA_OFFSET_ERROR) { - if (!(_ma_get_block_info(&tmp, info->dfile.file, + if (!(_ma_get_block_info(info, &tmp, info->dfile.file, block_info->next_filepos) & BLOCK_DELETED)) DBUG_RETURN(1); /* Something is wrong */ @@ -512,7 +513,7 @@ static my_bool update_backward_delete_link(MARIA_HA *info, if (delete_block != HA_OFFSET_ERROR) { block_info.second_read=0; - if (_ma_get_block_info(&block_info, info->dfile.file, delete_block) + if (_ma_get_block_info(info, &block_info, info->dfile.file, delete_block) & BLOCK_DELETED) { uchar buff[8]; @@ -522,7 +523,7 @@ static my_bool update_backward_delete_link(MARIA_HA *info, } else { - my_errno=HA_ERR_WRONG_IN_RECORD; + _ma_set_fatal_error(info->s, HA_ERR_WRONG_IN_RECORD); DBUG_RETURN(1); /* Wrong delete link */ } } @@ -548,19 +549,21 @@ static my_bool delete_dynamic_record(MARIA_HA *info, MARIA_RECORD_POS filepos, do { /* Remove block at 'filepos' */ - if ((b_type= _ma_get_block_info(&block_info, info->dfile.file, filepos)) + if ((b_type= _ma_get_block_info(info, &block_info, info->dfile.file, + filepos)) & (BLOCK_DELETED | BLOCK_ERROR | BLOCK_SYNC_ERROR | BLOCK_FATAL_ERROR) || (length=(uint) (block_info.filepos-filepos) +block_info.block_len) < MARIA_MIN_BLOCK_LENGTH) { - my_errno=HA_ERR_WRONG_IN_RECORD; + _ma_set_fatal_error(info->s, HA_ERR_WRONG_IN_RECORD); DBUG_RETURN(1); } /* Check if next block is a delete block */ del_block.second_read=0; remove_next_block=0; - if (_ma_get_block_info(&del_block, info->dfile.file, filepos + length) & + if (_ma_get_block_info(info, &del_block, info->dfile.file, + filepos + length) & BLOCK_DELETED && del_block.block_len+length < MARIA_DYN_MAX_BLOCK_LENGTH) { @@ -720,7 +723,7 @@ int _ma_write_part_record(MARIA_HA *info, if (next_block < info->state->data_file_length && info->s->state.dellink != HA_OFFSET_ERROR) { - if ((_ma_get_block_info(&del_block, info->dfile.file, next_block) + if ((_ma_get_block_info(info, &del_block, info->dfile.file, next_block) & BLOCK_DELETED) && res_length + del_block.block_len < MARIA_DYN_MAX_BLOCK_LENGTH) { @@ -832,13 +835,14 @@ static my_bool update_dynamic_record(MARIA_HA *info, MARIA_RECORD_POS filepos, if (filepos != info->s->state.dellink) { block_info.next_filepos= HA_OFFSET_ERROR; - if ((error= _ma_get_block_info(&block_info, info->dfile.file, filepos)) + if ((error= _ma_get_block_info(info, &block_info, info->dfile.file, + filepos)) & (BLOCK_DELETED | BLOCK_ERROR | BLOCK_SYNC_ERROR | BLOCK_FATAL_ERROR)) { DBUG_PRINT("error",("Got wrong block info")); if (!(error & BLOCK_FATAL_ERROR)) - my_errno=HA_ERR_WRONG_IN_RECORD; + _ma_set_fatal_error(info->s, HA_ERR_WRONG_IN_RECORD); goto err; } length=(ulong) (block_info.filepos-filepos) + block_info.block_len; @@ -873,7 +877,7 @@ static my_bool update_dynamic_record(MARIA_HA *info, MARIA_RECORD_POS filepos, MARIA_BLOCK_INFO del_block; del_block.second_read=0; - if (_ma_get_block_info(&del_block, info->dfile.file, + if (_ma_get_block_info(info, &del_block, info->dfile.file, block_info.filepos + block_info.block_len) & BLOCK_DELETED) { @@ -1344,7 +1348,7 @@ ulong _ma_rec_unpack(register MARIA_HA *info, register uchar *to, uchar *from, DBUG_RETURN(found_length); err: - my_errno= HA_ERR_WRONG_IN_RECORD; + _ma_set_fatal_error(info->s, HA_ERR_WRONG_IN_RECORD); DBUG_PRINT("error",("to_end: 0x%lx -> 0x%lx from_end: 0x%lx -> 0x%lx", (long) to, (long) to_end, (long) from, (long) from_end)); DBUG_DUMP("from", info->rec_buff, info->s->base.min_pack_length); @@ -1471,7 +1475,7 @@ int _ma_read_dynamic_record(MARIA_HA *info, uchar *buf, flush_io_cache(&info->rec_cache)) goto err; info->rec_cache.seek_not_done=1; - if ((b_type= _ma_get_block_info(&block_info, file, filepos)) & + if ((b_type= _ma_get_block_info(info, &block_info, file, filepos)) & (BLOCK_DELETED | BLOCK_ERROR | BLOCK_SYNC_ERROR | BLOCK_FATAL_ERROR)) { @@ -1543,7 +1547,7 @@ err: DBUG_RETURN(my_errno); panic: - my_errno=HA_ERR_WRONG_IN_RECORD; + _ma_set_fatal_error(info->s, HA_ERR_WRONG_IN_RECORD); goto err; } @@ -1622,7 +1626,7 @@ my_bool _ma_cmp_dynamic_record(register MARIA_HA *info, block_info.next_filepos=filepos; while (reclength > 0) { - if ((b_type= _ma_get_block_info(&block_info, info->dfile.file, + if ((b_type= _ma_get_block_info(info, &block_info, info->dfile.file, block_info.next_filepos)) & (BLOCK_DELETED | BLOCK_ERROR | BLOCK_SYNC_ERROR | BLOCK_FATAL_ERROR)) @@ -1641,7 +1645,7 @@ my_bool _ma_cmp_dynamic_record(register MARIA_HA *info, } } else if (reclength < block_info.data_len) { - my_errno=HA_ERR_WRONG_IN_RECORD; + _ma_set_fatal_error(info->s, HA_ERR_WRONG_IN_RECORD); goto err; } reclength-= block_info.data_len; @@ -1759,6 +1763,7 @@ int _ma_read_rnd_dynamic_record(MARIA_HA *info, { if (filepos >= info->state->data_file_length) { +#ifdef MARIA_EXTERNAL_LOCKING if (!info_read) { /* Check if changed */ info_read=1; @@ -1771,15 +1776,19 @@ int _ma_read_rnd_dynamic_record(MARIA_HA *info, my_errno= HA_ERR_END_OF_FILE; goto err; } +#else + my_errno= HA_ERR_END_OF_FILE; + goto err; +#endif } if (info->opt_flag & READ_CACHE_USED) { - if (_ma_read_cache(&info->rec_cache, block_info.header, filepos, + if (_ma_read_cache(info, &info->rec_cache, block_info.header, filepos, sizeof(block_info.header), (!block_of_record && skip_deleted_blocks ? READING_NEXT : 0) | READING_HEADER)) goto panic; - b_type= _ma_get_block_info(&block_info,-1,filepos); + b_type= _ma_get_block_info(info, &block_info,-1,filepos); } else { @@ -1788,7 +1797,7 @@ int _ma_read_rnd_dynamic_record(MARIA_HA *info, flush_io_cache(&info->rec_cache)) DBUG_RETURN(my_errno); info->rec_cache.seek_not_done=1; - b_type= _ma_get_block_info(&block_info, info->dfile.file, filepos); + b_type= _ma_get_block_info(info, &block_info, info->dfile.file, filepos); } if (b_type & (BLOCK_DELETED | BLOCK_ERROR | BLOCK_SYNC_ERROR | @@ -1850,7 +1859,7 @@ int _ma_read_rnd_dynamic_record(MARIA_HA *info, { if (info->opt_flag & READ_CACHE_USED) { - if (_ma_read_cache(&info->rec_cache, to,filepos, + if (_ma_read_cache(info, &info->rec_cache, to,filepos, block_info.data_len, (!block_of_record && skip_deleted_blocks) ? READING_NEXT : 0)) @@ -1867,7 +1876,10 @@ int _ma_read_rnd_dynamic_record(MARIA_HA *info, if (mysql_file_read(info->dfile.file, to, block_info.data_len, MYF(MY_NABP))) { if (my_errno == HA_ERR_FILE_TOO_SHORT) - my_errno= HA_ERR_WRONG_IN_RECORD; /* Unexpected end of file */ + { + /* Unexpected end of file */ + _ma_set_fatal_error(share, HA_ERR_WRONG_IN_RECORD); + } goto err; } } @@ -1894,7 +1906,8 @@ int _ma_read_rnd_dynamic_record(MARIA_HA *info, DBUG_RETURN(my_errno); /* Wrong record */ panic: - my_errno=HA_ERR_WRONG_IN_RECORD; /* Something is fatal wrong */ + /* Something is fatal wrong */ + _ma_set_fatal_error(share, HA_ERR_WRONG_IN_RECORD); err: fast_ma_writeinfo(info); DBUG_RETURN(my_errno); @@ -1903,7 +1916,8 @@ err: /* Read and process header from a dynamic-record-file */ -uint _ma_get_block_info(MARIA_BLOCK_INFO *info, File file, my_off_t filepos) +uint _ma_get_block_info(MARIA_HA *handler, MARIA_BLOCK_INFO *info, File file, + my_off_t filepos) { uint return_val=0; uchar *header=info->header; @@ -1918,7 +1932,14 @@ uint _ma_get_block_info(MARIA_BLOCK_INFO *info, File file, my_off_t filepos) mysql_file_seek(file,filepos,MY_SEEK_SET,MYF(0)); if (mysql_file_read(file, header, sizeof(info->header),MYF(0)) != sizeof(info->header)) - goto err; + { + /* + This is either an error or just reading at end of file. + Don't give a fatal error for this case. + */ + my_errno= HA_ERR_WRONG_IN_RECORD; + return BLOCK_ERROR; + } } DBUG_DUMP("header",header,MARIA_BLOCK_INFO_HEADER_LENGTH); if (info->second_read) @@ -2032,6 +2053,10 @@ uint _ma_get_block_info(MARIA_BLOCK_INFO *info, File file, my_off_t filepos) } err: - my_errno=HA_ERR_WRONG_IN_RECORD; /* Garbage */ + if (!handler->in_check_table) + { + /* We may be scanning the table for new rows; Don't give an error */ + _ma_set_fatal_error(handler->s, HA_ERR_WRONG_IN_RECORD); + } return BLOCK_ERROR; } diff --git a/storage/maria/ma_extra.c b/storage/maria/ma_extra.c index d5c698e2087..c8b969363fa 100644 --- a/storage/maria/ma_extra.c +++ b/storage/maria/ma_extra.c @@ -50,7 +50,7 @@ int maria_extra(MARIA_HA *info, enum ha_extra_function function, switch (function) { case HA_EXTRA_RESET_STATE: /* Reset state (don't free buffers) */ - info->lastinx= 0; /* Use first index as def */ + info->lastinx= ~0; /* Detect index changes */ info->last_search_keypage= info->cur_row.lastpos= HA_OFFSET_ERROR; info->page_changed= 1; /* Next/prev gives first/last */ @@ -143,7 +143,7 @@ int maria_extra(MARIA_HA *info, enum ha_extra_function function, (READ_CACHE_USED | WRITE_CACHE_USED | OPT_NO_ROWS)) && !share->state.header.uniques) if (!(init_io_cache(&info->rec_cache, info->dfile.file, cache_size, - WRITE_CACHE,share->state.state.data_file_length, + WRITE_CACHE, info->state->data_file_length, (pbool) (info->lock_type != F_UNLCK), MYF(share->write_flag & MY_WAIT_IF_FULL)))) { @@ -175,8 +175,8 @@ int maria_extra(MARIA_HA *info, enum ha_extra_function function, { if ((error= flush_io_cache(&info->rec_cache))) { - maria_print_error(info->s, HA_ERR_CRASHED); - maria_mark_crashed(info); /* Fatal error found */ + /* Fatal error found */ + _ma_set_fatal_error(share, HA_ERR_CRASHED); } } break; @@ -254,8 +254,8 @@ int maria_extra(MARIA_HA *info, enum ha_extra_function function, if (!share->changed) { - share->state.changed|= STATE_CHANGED | STATE_NOT_ANALYZED; share->changed= 1; /* Update on close */ + share->state.changed|= STATE_CHANGED | STATE_NOT_ANALYZED; if (!share->global_changed) { share->global_changed= 1; @@ -291,14 +291,15 @@ int maria_extra(MARIA_HA *info, enum ha_extra_function function, if (!error && share->changed) { mysql_mutex_lock(&share->intern_lock); - if (!(error= _ma_state_info_write(share, - MA_STATE_INFO_WRITE_DONT_MOVE_OFFSET| - MA_STATE_INFO_WRITE_FULL_INFO))) - share->changed= 0; + error= _ma_state_info_write(share, + MA_STATE_INFO_WRITE_DONT_MOVE_OFFSET| + MA_STATE_INFO_WRITE_FULL_INFO); mysql_mutex_unlock(&share->intern_lock); } mysql_mutex_lock(&THR_LOCK_maria); mysql_mutex_lock(&share->intern_lock); /* protect against Checkpoint */ + /* Safety against assert in checkpoint */ + share->bitmap.changed_not_flushed= 0; /* this makes the share not be re-used next time the table is opened */ share->last_version= 0L; /* Impossible version */ mysql_mutex_unlock(&share->intern_lock); @@ -309,13 +310,15 @@ int maria_extra(MARIA_HA *info, enum ha_extra_function function, share->deleting= TRUE; share->global_changed= FALSE; /* force writing changed flag */ /* To force repair if reopened */ - _ma_mark_file_changed(info); + share->state.open_count= 1; + share->changed= 1; + _ma_mark_file_changed_now(share); /* Fall trough */ case HA_EXTRA_PREPARE_FOR_RENAME: { my_bool do_flush= test(function != HA_EXTRA_PREPARE_FOR_DROP); + my_bool save_global_changed; enum flush_type type; - mysql_mutex_lock(&THR_LOCK_maria); /* This share, to have last_version=0, needs to save all its data/index blocks to disk if this is not for a DROP TABLE. Otherwise they would be @@ -338,7 +341,7 @@ int maria_extra(MARIA_HA *info, enum ha_extra_function function, */ mysql_mutex_lock(&share->intern_lock); if (share->kfile.file >= 0 && function != HA_EXTRA_PREPARE_FOR_DROP) - _ma_decrement_open_count(info); + _ma_decrement_open_count(info, 0); if (info->trn) { _ma_remove_table_from_trnman(share, info->trn); @@ -347,12 +350,17 @@ int maria_extra(MARIA_HA *info, enum ha_extra_function function, } type= do_flush ? FLUSH_RELEASE : FLUSH_IGNORE_CHANGED; + save_global_changed= share->global_changed; + share->global_changed= 1; /* Don't increment open count */ + mysql_mutex_unlock(&share->intern_lock); if (_ma_flush_table_files(info, MARIA_FLUSH_DATA | MARIA_FLUSH_INDEX, type, type)) { error=my_errno; share->changed= 1; } + mysql_mutex_lock(&share->intern_lock); + share->global_changed= save_global_changed; if (info->opt_flag & (READ_CACHE_USED | WRITE_CACHE_USED)) { info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED); @@ -370,25 +378,27 @@ int maria_extra(MARIA_HA *info, enum ha_extra_function function, MA_STATE_INFO_WRITE_FULL_INFO)) || mysql_file_sync(share->kfile.file, MYF(0))) error= my_errno; - else - share->changed= 0; } else { /* be sure that state is not tried for write as file may be closed */ share->changed= 0; + share->global_changed= 0; + share->state.open_count= 0; } } if (share->data_file_type == BLOCK_RECORD && share->bitmap.file.file >= 0) { - if (do_flush && mysql_file_sync(share->bitmap.file.file, MYF(0))) + DBUG_ASSERT(share->bitmap.non_flushable == 0 && + share->bitmap.changed == 0); + if (do_flush && my_sync(share->bitmap.file.file, MYF(0))) error= my_errno; + share->bitmap.changed_not_flushed= 0; } - /* For protection against Checkpoint, we set under intern_lock: */ + /* last_version must be protected by intern_lock; See collect_tables() */ share->last_version= 0L; /* Impossible version */ mysql_mutex_unlock(&share->intern_lock); - mysql_mutex_unlock(&THR_LOCK_maria); break; } case HA_EXTRA_PREPARE_FOR_FORCED_CLOSE: @@ -405,9 +415,8 @@ int maria_extra(MARIA_HA *info, enum ha_extra_function function, if (!share->temporary) error= _ma_flush_table_files(info, MARIA_FLUSH_DATA | MARIA_FLUSH_INDEX, FLUSH_KEEP, FLUSH_KEEP); -#ifdef HAVE_PWRITE - _ma_decrement_open_count(info); -#endif + + _ma_decrement_open_count(info, 1); if (share->not_flushed) { share->not_flushed= 0; @@ -415,9 +424,9 @@ int maria_extra(MARIA_HA *info, enum ha_extra_function function, error= my_errno; if (error) { + /* Fatal error found */ share->changed= 1; - maria_print_error(info->s, HA_ERR_CRASHED); - maria_mark_crashed(info); /* Fatal error found */ + _ma_set_fatal_error(share, HA_ERR_CRASHED); } } break; @@ -553,7 +562,7 @@ int maria_reset(MARIA_HA *info) #endif info->opt_flag&= ~(KEY_READ_USED | REMEMBER_OLD_POS); info->quick_mode= 0; - info->lastinx= 0; /* Use first index as def */ + info->lastinx= ~0; /* detect index changes */ info->last_search_keypage= info->cur_row.lastpos= HA_OFFSET_ERROR; info->page_changed= 1; info->update= ((info->update & HA_STATE_CHANGED) | HA_STATE_NEXT_FOUND | @@ -568,6 +577,12 @@ int _ma_sync_table_files(const MARIA_HA *info) mysql_file_sync(info->s->kfile.file, MYF(MY_WME))); } +uint _ma_file_callback_to_id(void *callback_data) +{ + MARIA_SHARE *share= (MARIA_SHARE*) callback_data; + return share ? share->id : 0; +} + /** @brief flushes the data and/or index file of a table @@ -598,6 +613,8 @@ int _ma_flush_table_files(MARIA_HA *info, uint flush_data_or_index, { int error= 0; MARIA_SHARE *share= info->s; + DBUG_ENTER("_ma_flush_table_files"); + /* flush data file first because it's more critical */ if (flush_data_or_index & MARIA_FLUSH_DATA) { @@ -616,6 +633,7 @@ int _ma_flush_table_files(MARIA_HA *info, uint flush_data_or_index, { mysql_mutex_lock(&share->bitmap.bitmap_lock); share->bitmap.changed= 0; + share->bitmap.changed_not_flushed= 0; mysql_mutex_unlock(&share->bitmap.bitmap_lock); } if (flush_pagecache_blocks(share->pagecache, &info->dfile, @@ -628,10 +646,15 @@ int _ma_flush_table_files(MARIA_HA *info, uint flush_data_or_index, flush_type_for_index)) error= 1; if (!error) - return 0; + DBUG_RETURN(0); - maria_print_error(info->s, HA_ERR_CRASHED); - maria_mark_crashed(info); - return 1; + _ma_set_fatal_error(info->s, HA_ERR_CRASHED); + DBUG_RETURN(1); +} + + +my_bool ma_killed_standalone(MARIA_HA *info __attribute__((unused))) +{ + return 0; } diff --git a/storage/maria/ma_ft_boolean_search.c b/storage/maria/ma_ft_boolean_search.c index 10df277510d..ce0dca9e75e 100644 --- a/storage/maria/ma_ft_boolean_search.c +++ b/storage/maria/ma_ft_boolean_search.c @@ -356,7 +356,8 @@ static int _ft2_search(FTB *ftb, FTB_WORD *ftbw, my_bool init_search) { ftbw->key_root=info->s->state.key_root[ftb->keynr]; ftbw->keyinfo=info->s->keyinfo+ftb->keynr; - key.keyinfo= ftbw->keyinfo; + info->last_key.keyinfo= key.keyinfo= ftbw->keyinfo; + info->lastinx= ~0; /* Safety */ key.data= ftbw->word; key.data_length= ftbw->len; key.ref_length= 0; @@ -380,7 +381,8 @@ static int _ft2_search(FTB *ftb, FTB_WORD *ftbw, my_bool init_search) max_docid); } - key.keyinfo= ftbw->keyinfo; + info->last_key.keyinfo= key.keyinfo= ftbw->keyinfo; + info->lastinx= ~0; /* Safety */ key.data= lastkey_buf; key.data_length= USE_WHOLE_KEY; key.ref_length= 0; diff --git a/storage/maria/ma_init.c b/storage/maria/ma_init.c index c3d52fc64e0..78ca7ed9bf8 100644 --- a/storage/maria/ma_init.c +++ b/storage/maria/ma_init.c @@ -103,7 +103,7 @@ void maria_end(void) trid, recovery_failures); } trnman_destroy(); - if (translog_status == TRANSLOG_OK) + if (translog_status == TRANSLOG_OK || translog_status == TRANSLOG_READONLY) translog_destroy(); end_pagecache(maria_log_pagecache, TRUE); end_pagecache(maria_pagecache, TRUE); diff --git a/storage/maria/ma_key.c b/storage/maria/ma_key.c index 0e1891fb249..f62ffcc49a0 100644 --- a/storage/maria/ma_key.c +++ b/storage/maria/ma_key.c @@ -644,8 +644,7 @@ int _ma_read_key_record(MARIA_HA *info, uchar *buf, MARIA_RECORD_POS filepos) { /* Read only key */ if (_ma_put_key_in_record(info, (uint)info->lastinx, TRUE, buf)) { - maria_print_error(info->s, HA_ERR_CRASHED); - my_errno=HA_ERR_CRASHED; + _ma_set_fatal_error(info->s, HA_ERR_CRASHED); return -1; } info->update|= HA_STATE_AKTIV; /* We should find a record */ @@ -669,25 +668,39 @@ int _ma_read_key_record(MARIA_HA *info, uchar *buf, MARIA_RECORD_POS filepos) will look for column values there) RETURN - ICP_ERROR Error + ICP_ERROR Error ; my_errno set to HA_ERR_CRASHED ICP_NO_MATCH Index condition is not satisfied, continue scanning ICP_MATCH Index condition is satisfied - ICP_OUT_OF_RANGE Index condition is not satisfied, end the scan. + ICP_OUT_OF_RANGE Index condition is not satisfied, end the scan. + my_errno set to HA_ERR_END_OF_FILE + + info->cur_row.lastpos is set to HA_OFFSET_ERROR in case of ICP_ERROR or + ICP_OUT_OF_RANGE to indicate that we don't have any active row. */ -int ma_check_index_cond(register MARIA_HA *info, uint keynr, uchar *record) +ICP_RESULT ma_check_index_cond(register MARIA_HA *info, uint keynr, + uchar *record) { + ICP_RESULT res= ICP_MATCH; if (info->index_cond_func) { if (_ma_put_key_in_record(info, keynr, FALSE, record)) { + /* Impossible case; Can only happen if bug in code */ maria_print_error(info->s, HA_ERR_CRASHED); - my_errno=HA_ERR_CRASHED; - return -1; + info->cur_row.lastpos= HA_OFFSET_ERROR; /* No active record */ + my_errno= HA_ERR_CRASHED; + res= ICP_ERROR; + } + else if ((res= info->index_cond_func(info->index_cond_func_arg)) == + ICP_OUT_OF_RANGE) + { + /* We got beyond the end of scanned range */ + info->cur_row.lastpos= HA_OFFSET_ERROR; /* No active record */ + my_errno= HA_ERR_END_OF_FILE; } - return info->index_cond_func(info->index_cond_func_arg); } - return 1; + return res; } diff --git a/storage/maria/ma_key_recover.c b/storage/maria/ma_key_recover.c index bc85ad025ff..920f5a08013 100644 --- a/storage/maria/ma_key_recover.c +++ b/storage/maria/ma_key_recover.c @@ -66,7 +66,7 @@ void _ma_unpin_all_pages(MARIA_HA *info, LSN undo_lsn) #ifdef EXTRA_DEBUG DBUG_ASSERT((!pinned_page->changed || undo_lsn != LSN_IMPOSSIBLE || !info->s->now_transactional) || - (info->s->state.changed & STATE_CRASHED)); + (info->s->state.changed & STATE_CRASHED_FLAGS)); #endif pagecache_unlock_by_link(info->s->pagecache, pinned_page->link, pinned_page->unlock, PAGECACHE_UNPIN, @@ -1027,7 +1027,7 @@ uint _ma_apply_redo_index(MARIA_HA *info, insert_length, changed_length)); DBUG_ASSERT(insert_length <= changed_length && - page_length + changed_length <= max_page_size); + page_length + insert_length <= max_page_size); bmove_upp(buff + page_length + insert_length, buff + page_length, page_length - keypage_header); diff --git a/storage/maria/ma_keycache.c b/storage/maria/ma_keycache.c index ef893d076bd..e3c57801410 100644 --- a/storage/maria/ma_keycache.c +++ b/storage/maria/ma_keycache.c @@ -79,8 +79,8 @@ int maria_assign_to_pagecache(MARIA_HA *info, if (flush_pagecache_blocks(share->pagecache, &share->kfile, FLUSH_RELEASE)) { error= my_errno; - maria_print_error(info->s, HA_ERR_CRASHED); - maria_mark_crashed(info); /* Mark that table must be checked */ + /* Mark that table must be checked */ + _ma_set_fatal_error(share, error); } /* diff --git a/storage/maria/ma_locking.c b/storage/maria/ma_locking.c index 8d2d3c0ad6e..9bab4cdfe0e 100644 --- a/storage/maria/ma_locking.c +++ b/storage/maria/ma_locking.c @@ -80,9 +80,8 @@ int maria_lock_database(MARIA_HA *info, int lock_type) { if (end_io_cache(&info->rec_cache)) { - error=my_errno; - maria_print_error(info->s, HA_ERR_CRASHED); - maria_mark_crashed(info); + error= my_errno; + _ma_set_fatal_error(share, error); } } if (!count) @@ -104,7 +103,7 @@ int maria_lock_database(MARIA_HA *info, int lock_type) mysql_rwlock_unlock(&share->mmap_lock); } #endif -#ifdef EXTERNAL_LOCKING +#ifdef MARIA_EXTERNAL_LOCKING share->state.process= share->last_process=share->this_process; share->state.unique= info->last_unique= info->this_unique; share->state.update_count= info->last_loop= ++info->this_loop; @@ -129,10 +128,7 @@ int maria_lock_database(MARIA_HA *info, int lock_type) else share->not_flushed=1; if (error) - { - maria_print_error(info->s, HA_ERR_CRASHED); - maria_mark_crashed(info); - } + _ma_set_fatal_error(share, error); } } info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED); @@ -307,7 +303,7 @@ int _ma_writeinfo(register MARIA_HA *info, uint operation) { /* Two threads can't be here */ olderror= my_errno; /* Remember last error */ -#ifdef EXTERNAL_LOCKING +#ifdef MARIA_EXTERNAL_LOCKING /* The following only makes sense if we want to be allow two different processes access the same table at the same time @@ -345,7 +341,7 @@ int _ma_writeinfo(register MARIA_HA *info, uint operation) int _ma_test_if_changed(register MARIA_HA *info) { -#ifdef EXTERNAL_LOCKING +#ifdef MARIA_EXTERNAL_LOCKING MARIA_SHARE *share= info->s; if (share->state.process != share->last_process || share->state.unique != info->last_unique || @@ -390,12 +386,39 @@ int _ma_test_if_changed(register MARIA_HA *info) #define _MA_ALREADY_MARKED_FILE_CHANGED \ ((share->state.changed & STATE_CHANGED) && share->global_changed) -int _ma_mark_file_changed(MARIA_HA *info) +int _ma_mark_file_changed(register MARIA_SHARE *share) +{ + if (!share->base.born_transactional) + { + if (!_MA_ALREADY_MARKED_FILE_CHANGED) + return _ma_mark_file_changed_now(share); + } + else + { + /* + For transactional tables, the table is marked changed when the first page + is written. Here we just mark the state to be updated so that caller + can do 'anaylze table' and find that is has changed before any pages + are written. + */ + if (! test_all_bits(share->state.changed, + (STATE_CHANGED | STATE_NOT_ANALYZED | + STATE_NOT_OPTIMIZED_KEYS))) + { + mysql_mutex_lock(&share->intern_lock); + share->state.changed|=(STATE_CHANGED | STATE_NOT_ANALYZED | + STATE_NOT_OPTIMIZED_KEYS); + mysql_mutex_unlock(&share->intern_lock); + } + } + return 0; +} + +int _ma_mark_file_changed_now(register MARIA_SHARE *share) { uchar buff[3]; - register MARIA_SHARE *share= info->s; int error= 1; - DBUG_ENTER("_ma_mark_file_changed"); + DBUG_ENTER("_ma_mark_file_changed_now"); if (_MA_ALREADY_MARKED_FILE_CHANGED) DBUG_RETURN(0); @@ -406,7 +429,7 @@ int _ma_mark_file_changed(MARIA_HA *info) STATE_NOT_OPTIMIZED_KEYS); if (!share->global_changed) { - share->global_changed=1; + share->changed= share->global_changed= 1; share->state.open_count++; } /* @@ -434,7 +457,7 @@ int _ma_mark_file_changed(MARIA_HA *info) !(share->state.changed & STATE_NOT_MOVABLE)) { /* Lock table to current installation */ - if (_ma_set_uuid(info, 0) || + if (_ma_set_uuid(share, 0) || (share->state.create_rename_lsn == LSN_NEEDS_NEW_STATE_LSNS && _ma_update_state_lsns_sub(share, LSN_IMPOSSIBLE, trnman_get_min_trid(), @@ -476,22 +499,31 @@ my_bool _ma_check_if_zero(uchar *pos, size_t length) call. In these context the following code should be safe! */ -int _ma_decrement_open_count(MARIA_HA *info) +int _ma_decrement_open_count(MARIA_HA *info, my_bool lock_tables) { uchar buff[2]; register MARIA_SHARE *share= info->s; int lock_error=0,write_error=0; + DBUG_ENTER("_ma_decrement_open_count"); + if (share->global_changed) { uint old_lock=info->lock_type; share->global_changed=0; - lock_error= my_disable_locking ? 0 : maria_lock_database(info, F_WRLCK); + lock_error= (my_disable_locking || ! lock_tables ? 0 : + maria_lock_database(info, F_WRLCK)); /* Its not fatal even if we couldn't get the lock ! */ if (share->state.open_count > 0) { share->state.open_count--; share->changed= 1; /* We have to update state */ - if (!share->temporary) + /* + For temporary tables that will just be deleted, we don't have + to decrement state. For transactional tables the state will be + updated in maria_close(). + */ + + if (!share->temporary && !share->now_transactional) { mi_int2store(buff,share->state.open_count); write_error= (int) my_pwrite(share->kfile.file, buff, sizeof(buff), @@ -500,10 +532,10 @@ int _ma_decrement_open_count(MARIA_HA *info) MYF(MY_NABP)); } } - if (!lock_error && !my_disable_locking) + if (!lock_error && !my_disable_locking && lock_tables) lock_error=maria_lock_database(info,old_lock); } - return test(lock_error || write_error); + DBUG_RETURN(test(lock_error || write_error)); } @@ -528,17 +560,40 @@ void _ma_mark_file_crashed(MARIA_SHARE *share) DBUG_VOID_RETURN; } +/* + Handle a fatal error + + - Mark the table as crashed + - Print an error message, if we had not issued an error message before + that the table had been crashed. + - set my_errno to error + - If 'maria_assert_if_crashed_table is set, then assert. +*/ + +void _ma_set_fatal_error(MARIA_SHARE *share, int error) +{ + DBUG_PRINT("error", ("error: %d", error)); + maria_mark_crashed_share(share); + if (!(share->state.changed & STATE_CRASHED_PRINTED)) + { + share->state.changed|= STATE_CRASHED_PRINTED; + maria_print_error(share, error); + } + my_errno= error; + DBUG_ASSERT(!maria_assert_if_crashed_table); +} + /** @brief Set uuid of for a Maria file @fn _ma_set_uuid() - @param info Maria handler + @param share Maria share @param reset_uuid Instead of setting file to maria_uuid, set it to 0 to mark it as movable */ -my_bool _ma_set_uuid(MARIA_HA *info, my_bool reset_uuid) +my_bool _ma_set_uuid(MARIA_SHARE *share, my_bool reset_uuid) { uchar buff[MY_UUID_SIZE], *uuid; @@ -548,7 +603,7 @@ my_bool _ma_set_uuid(MARIA_HA *info, my_bool reset_uuid) bzero(buff, sizeof(buff)); uuid= buff; } - return (my_bool) my_pwrite(info->s->kfile.file, uuid, MY_UUID_SIZE, - mi_uint2korr(info->s->state.header.base_pos), + return (my_bool) my_pwrite(share->kfile.file, uuid, MY_UUID_SIZE, + mi_uint2korr(share->state.header.base_pos), MYF(MY_NABP)); } diff --git a/storage/maria/ma_loghandler.c b/storage/maria/ma_loghandler.c index 98818e9f4f1..18a6179d056 100644 --- a/storage/maria/ma_loghandler.c +++ b/storage/maria/ma_loghandler.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2007 MySQL AB & Sanja Belkin +/* Copyright (C) 2007 MySQL AB & Sanja Belkin. 2010 Monty Program Ab. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -458,7 +458,9 @@ void translog_lock_handler_assert_owner() @param num how many records should be filled */ -static void check_translog_description_table(int num) +static uint max_allowed_translog_type= 0; + +void check_translog_description_table(int num) { int i; DBUG_ENTER("check_translog_description_table"); @@ -467,6 +469,7 @@ static void check_translog_description_table(int num) /* last is reserved for extending the table */ DBUG_ASSERT(num < LOGREC_NUMBER_OF_TYPES - 1); DBUG_ASSERT(log_record_type_descriptor[0].rclass == LOGRECTYPE_NOT_ALLOWED); + max_allowed_translog_type= num; for (i= 0; i <= num; i++) { @@ -973,7 +976,7 @@ static File open_logfile_by_number_no_cache(uint32 file_no) DBUG_ENTER("open_logfile_by_number_no_cache"); /* TODO: add O_DIRECT to open flags (when buffer is aligned) */ - /* TODO: use my_create() */ + /* TODO: use mysql_file_create() */ if ((file= mysql_file_open(key_file_translog, translog_filename_by_fileno(file_no, path), log_descriptor.open_flags, @@ -1080,7 +1083,7 @@ static my_bool translog_write_file_header() memcpy(page, maria_trans_file_magic, sizeof(maria_trans_file_magic)); page+= sizeof(maria_trans_file_magic); /* timestamp */ - timestamp= my_getsystime(); + timestamp= my_hrtime().val; int8store(page, timestamp); page+= 8; /* maria version */ @@ -1151,34 +1154,14 @@ static my_bool translog_max_lsn_to_header(File file, LSN lsn) /* - Information from transaction log file header -*/ - -typedef struct st_loghandler_file_info -{ - /* - LSN_IMPOSSIBLE for current file (not finished file). - Maximum LSN of the record which parts stored in the - file. - */ - LSN max_lsn; - ulonglong timestamp; /* Time stamp */ - ulong maria_version; /* Version of maria loghandler */ - ulong mysql_version; /* Version of mysql server */ - ulong server_id; /* Server ID */ - ulong page_size; /* Loghandler page size */ - ulong file_number; /* Number of the file (from the file header) */ -} LOGHANDLER_FILE_INFO; - -/* @brief Extract hander file information from loghandler file page @param desc header information descriptor to be filled with information @param page_buff buffer with the page content */ -static void translog_interpret_file_header(LOGHANDLER_FILE_INFO *desc, - uchar *page_buff) +void translog_interpret_file_header(LOGHANDLER_FILE_INFO *desc, + uchar *page_buff) { uchar *ptr; @@ -2560,24 +2543,13 @@ my_bool translog_prev_buffer_flush_wait(struct st_translog_buffer *buffer) LSN_IN_PARTS(buffer->prev_sent_to_disk), LSN_IN_PARTS(buffer->prev_buffer_offset))); translog_buffer_lock_assert_owner(buffer); - /* - if prev_sent_to_disk == LSN_IMPOSSIBLE then - prev_buffer_offset should be LSN_IMPOSSIBLE - because it means that this buffer was never used - */ - DBUG_ASSERT((buffer->prev_sent_to_disk == LSN_IMPOSSIBLE && - buffer->prev_buffer_offset == LSN_IMPOSSIBLE) || - buffer->prev_sent_to_disk != LSN_IMPOSSIBLE); if (buffer->prev_buffer_offset != buffer->prev_sent_to_disk) { do { mysql_cond_wait(&buffer->prev_sent_to_disk_cond, &buffer->mutex); if (buffer->file != file || buffer->offset != offset || buffer->ver != ver) - { - translog_buffer_unlock(buffer); DBUG_RETURN(1); /* some the thread flushed the buffer already */ - } } while(buffer->prev_buffer_offset != buffer->prev_sent_to_disk); } DBUG_RETURN(0); @@ -2624,11 +2596,10 @@ static my_bool translog_buffer_flush(struct st_translog_buffer *buffer) { /* some other flush in progress */ translog_wait_for_closing(buffer); + if (buffer->file != file || buffer->offset != offset || buffer->ver != ver) + DBUG_RETURN(0); /* some the thread flushed the buffer already */ } - if (buffer->file != file || buffer->offset != offset || buffer->ver != ver) - DBUG_RETURN(0); /* some the thread flushed the buffer already */ - if (buffer->overlay && translog_prev_buffer_flush_wait(buffer)) DBUG_RETURN(0); /* some the thread flushed the buffer already */ @@ -3525,7 +3496,7 @@ my_bool translog_walk_filenames(const char *directory, @brief Fills table of dependence length of page header from page flags */ -static void translog_fill_overhead_table() +void translog_fill_overhead_table() { uint i; for (i= 0; i < TRANSLOG_FLAGS_NUM; i++) @@ -3620,6 +3591,7 @@ my_bool translog_init_with_table(const char *directory, log_descriptor.flush_no= 0; log_descriptor.next_pass_max_lsn= LSN_IMPOSSIBLE; + /* Normally in Aria this this calls translog_table_init() */ (*init_table_func)(); compile_time_assert(sizeof(log_descriptor.dirty_buffer_mask) * 8 >= TRANSLOG_BUFFERS_NO); @@ -6262,13 +6234,15 @@ my_bool translog_write_record(LSN *lsn, (uint) short_trid, (ulong) rec_len)); DBUG_ASSERT(translog_status == TRANSLOG_OK || translog_status == TRANSLOG_READONLY); + DBUG_ASSERT(type != 0); + DBUG_ASSERT((uint)type <= max_allowed_translog_type); if (unlikely(translog_status != TRANSLOG_OK)) { DBUG_PRINT("error", ("Transaction log is write protected")); DBUG_RETURN(1); } - if (tbl_info) + if (tbl_info && type != LOGREC_FILE_ID) { MARIA_SHARE *share= tbl_info->s; DBUG_ASSERT(share->now_transactional); @@ -6360,9 +6334,9 @@ my_bool translog_write_record(LSN *lsn, /* process this parts */ if (!(rc= (log_record_type_descriptor[type].prewrite_hook && - (*log_record_type_descriptor[type].prewrite_hook) (type, trn, - tbl_info, - hook_arg)))) + (*log_record_type_descriptor[type].prewrite_hook)(type, trn, + tbl_info, + hook_arg)))) { switch (log_record_type_descriptor[type].rclass) { case LOGRECTYPE_VARIABLE_LENGTH: @@ -6375,6 +6349,7 @@ my_bool translog_write_record(LSN *lsn, short_trid, &parts, trn, hook_arg); break; case LOGRECTYPE_NOT_ALLOWED: + DBUG_ASSERT(0); default: DBUG_ASSERT(0); rc= 1; @@ -7748,7 +7723,7 @@ static my_bool translog_sync_files(uint32 min, uint32 max, flush_interval= group_commit_wait; if (flush_interval) - flush_start= my_micro_time(); + flush_start= microsecond_interval_timer(); for (fn= min; fn <= max; fn++) { TRANSLOG_FILE *file= get_logfile_by_number(fn); @@ -7796,6 +7771,7 @@ void translog_flush_buffers(TRANSLOG_ADDRESS *lsn, uint i; uint8 last_buffer_no, start_buffer_no; DBUG_ENTER("translog_flush_buffers"); + LINT_INIT(last_buffer_no); /* We will recheck information when will lock buffers one by @@ -7816,7 +7792,6 @@ void translog_flush_buffers(TRANSLOG_ADDRESS *lsn, (uint) start_buffer_no, (uint) log_descriptor.bc.buffer_no, LSN_IN_PARTS(log_descriptor.bc.buffer->prev_last_lsn))); - /* if LSN up to which we have to flush bigger then maximum LSN of previous buffer and at least one LSN was saved in the current buffer (last_lsn != @@ -7828,18 +7803,28 @@ void translog_flush_buffers(TRANSLOG_ADDRESS *lsn, struct st_translog_buffer *buffer= log_descriptor.bc.buffer; *lsn= log_descriptor.bc.buffer->last_lsn; /* fix lsn if it was horizon */ DBUG_PRINT("info", ("LSN to flush fixed to last lsn: (%lu,0x%lx)", - LSN_IN_PARTS(log_descriptor.bc.buffer->last_lsn))); + LSN_IN_PARTS(*lsn))); last_buffer_no= log_descriptor.bc.buffer_no; log_descriptor.is_everything_flushed= 1; translog_force_current_buffer_to_finish(); translog_buffer_unlock(buffer); } - else + else if (log_descriptor.bc.buffer->prev_last_lsn != LSN_IMPOSSIBLE) { + /* fix lsn if it was horizon */ + *lsn= log_descriptor.bc.buffer->prev_last_lsn; + DBUG_PRINT("info", ("LSN to flush fixed to prev last lsn: (%lu,0x%lx)", + LSN_IN_PARTS(*lsn))); last_buffer_no= ((log_descriptor.bc.buffer_no + TRANSLOG_BUFFERS_NO -1) % TRANSLOG_BUFFERS_NO); translog_unlock(); } + else if (log_descriptor.bc.buffer->last_lsn == LSN_IMPOSSIBLE) + { + DBUG_PRINT("info", ("There is no LSNs yet generated => do nothing")); + translog_unlock(); + DBUG_VOID_RETURN; + } /* flush buffers */ *sent_to_disk= translog_get_sent_to_disk(); @@ -8005,7 +7990,8 @@ retest: /* We do not check time here because mysql_mutex_lock rarely takes a lot of time so we can sacrifice a bit precision to performance - (taking into account that my_micro_time() might be expensive call). + (taking into account that microsecond_interval_timer() might be + expensive call). */ if (flush_interval == 0) break; /* flush pass is ended */ @@ -8014,7 +8000,8 @@ retest: if (log_descriptor.next_pass_max_lsn == LSN_IMPOSSIBLE) { if (flush_interval == 0 || - (time_spent= (my_micro_time() - flush_start)) >= flush_interval) + (time_spent= (microsecond_interval_timer() - flush_start)) >= + flush_interval) { mysql_mutex_unlock(&log_descriptor.log_flush_lock); break; @@ -8116,6 +8103,7 @@ out: int translog_assign_id_to_share(MARIA_HA *tbl_info, TRN *trn) { + uint16 id; MARIA_SHARE *share= tbl_info->s; /* If you give an id to a non-BLOCK_RECORD table, you also need to release @@ -8131,6 +8119,7 @@ int translog_assign_id_to_share(MARIA_HA *tbl_info, TRN *trn) uchar log_data[FILEID_STORE_SIZE]; /* Inspired by set_short_trid() of trnman.c */ uint i= share->kfile.file % SHARE_ID_MAX + 1; + id= 0; do { my_atomic_rwlock_wrlock(&LOCK_id_to_share); @@ -8140,14 +8129,15 @@ int translog_assign_id_to_share(MARIA_HA *tbl_info, TRN *trn) if (id_to_share[i] == NULL && my_atomic_casptr((void **)&id_to_share[i], &tmp, share)) { - share->id= (uint16)i; + id= (uint16) i; break; } } my_atomic_rwlock_wrunlock(&LOCK_id_to_share); i= 1; /* scan the whole array */ - } while (share->id == 0); - DBUG_PRINT("info", ("id_to_share: 0x%lx -> %u", (ulong)share, share->id)); + } while (id == 0); + DBUG_PRINT("info", ("id_to_share: 0x%lx -> %u", (ulong)share, id)); + fileid_store(log_data, id); log_array[TRANSLOG_INTERNAL_PARTS + 0].str= log_data; log_array[TRANSLOG_INTERNAL_PARTS + 0].length= sizeof(log_data); /* @@ -8169,11 +8159,18 @@ int translog_assign_id_to_share(MARIA_HA *tbl_info, TRN *trn) log_array[TRANSLOG_INTERNAL_PARTS + 1].length), sizeof(log_array)/sizeof(log_array[0]), - log_array, log_data, NULL))) + log_array, NULL, NULL))) { mysql_mutex_unlock(&share->intern_lock); return 1; } + /* + Now when translog record is done, we can set share->id. + If we set it before, then translog_write_record may pick up the id + before it's written to the log. + */ + share->id= id; + share->state.logrec_file_id= lsn; } mysql_mutex_unlock(&share->intern_lock); return 0; @@ -8799,7 +8796,7 @@ ma_soft_sync_background( void *arg __attribute__((unused))) DBUG_ENTER("ma_soft_sync_background"); for(;;) { - ulonglong prev_loop= my_micro_time(); + ulonglong prev_loop= microsecond_interval_timer(); ulonglong time, sleep; uint32 min, max, sync_request; min= soft_sync_min; @@ -8811,7 +8808,7 @@ ma_soft_sync_background( void *arg __attribute__((unused))) sleep= group_commit_wait; if (sync_request) translog_sync_files(min, max, FALSE); - time= my_micro_time() - prev_loop; + time= microsecond_interval_timer() - prev_loop; if (time > sleep) sleep= 0; else @@ -8869,116 +8866,6 @@ void translog_soft_sync_end(void) } -#ifdef MARIA_DUMP_LOG -#include <my_getopt.h> -extern void translog_example_table_init(); -static const char *load_default_groups[]= { "aria_dump_log",0 }; -static void get_options(int *argc,char * * *argv); -#ifndef DBUG_OFF -#if defined(__WIN__) -const char *default_dbug_option= "d:t:i:O,\\aria_dump_log.trace"; -#else -const char *default_dbug_option= "d:t:i:o,/tmp/aria_dump_log.trace"; -#endif -#endif -static ulonglong opt_offset; -static ulong opt_pages; -static const char *opt_file= NULL; -static File handler= -1; -static my_bool opt_unit= 0; -static struct my_option my_long_options[] = -{ -#ifdef IMPLTMENTED - {"body", 'b', - "Print chunk body dump", - (uchar **) &opt_body, (uchar **) &opt_body, 0, - GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, -#endif -#ifndef DBUG_OFF - {"debug", '#', "Output debug log. Often the argument is 'd:t:o,filename'.", - 0, 0, 0, GET_STR, OPT_ARG, 0, 0, 0, 0, 0, 0}, -#endif - {"file", 'f', "Path to file which will be read", - (uchar**) &opt_file, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, - {"help", '?', "Display this help and exit.", - 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, - { "offset", 'o', "Start reading log from this offset", - (uchar**) &opt_offset, (uchar**) &opt_offset, - 0, GET_ULL, REQUIRED_ARG, 0, 0, ~(longlong) 0, 0, 0, 0 }, - { "pages", 'n', "Number of pages to read", - (uchar**) &opt_pages, (uchar**) &opt_pages, 0, - GET_ULONG, REQUIRED_ARG, (long) ~(ulong) 0, - (long) 1, (long) ~(ulong) 0, (long) 0, - (long) 1, 0}, - {"unit-test", 'U', - "Use unit test record table (for logs created by unittests", - (uchar **) &opt_unit, (uchar **) &opt_unit, 0, - GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, - {"version", 'V', "Print version and exit.", - 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, - { 0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0} -}; - - -static void print_version(void) -{ - printf("%s Ver 1.0 for %s on %s\n", - my_progname_short, SYSTEM_TYPE, MACHINE_TYPE); -} - - -static void usage(void) -{ - print_version(); - puts("Copyright (C) 2008 MySQL AB"); - puts("This software comes with ABSOLUTELY NO WARRANTY. This is free software,"); - puts("and you are welcome to modify and redistribute it under the GPL license\n"); - - puts("Dump content of aria log pages."); - printf("\nUsage: %s -f file OPTIONS\n", my_progname_short); - my_print_help(my_long_options); - print_defaults("my", load_default_groups); - my_print_variables(my_long_options); -} - - -static my_bool -get_one_option(int optid __attribute__((unused)), - const struct my_option *opt __attribute__((unused)), - char *argument __attribute__((unused))) -{ - switch (optid) { - case '?': - usage(); - exit(0); - case 'V': - print_version(); - exit(0); -#ifndef DBUG_OFF - case '#': - DBUG_SET_INITIAL(argument ? argument : default_dbug_option); - break; -#endif - } - return 0; -} - - -static void get_options(int *argc,char ***argv) -{ - int ho_error; - - if ((ho_error=handle_options(argc, argv, my_long_options, get_one_option))) - exit(ho_error); - - if (opt_file == NULL) - { - usage(); - exit(1); - } -} - - /** @brief Dump information about file header page. */ @@ -8987,7 +8874,6 @@ static void dump_header_page(uchar *buff) { LOGHANDLER_FILE_INFO desc; char strbuff[21]; - translog_interpret_file_header(&desc, buff); printf(" This can be header page:\n" " Timestamp: %s\n" @@ -9164,7 +9050,7 @@ static uchar *dump_chunk(uchar *buffer, uchar *ptr) @brief Dump information about page with data. */ -static void dump_datapage(uchar *buffer) +static void dump_datapage(uchar *buffer, File handler) { uchar *ptr; ulong offset; @@ -9245,82 +9131,12 @@ static void dump_datapage(uchar *buffer) @brief Dump information about page. */ -static void dump_page(uchar *buffer) +void dump_page(uchar *buffer, File handler) { - printf("Page by offset %llu (0x%llx)\n", opt_offset, opt_offset); if (strncmp((char*)maria_trans_file_magic, (char*)buffer, sizeof(maria_trans_file_magic)) == 0) { dump_header_page(buffer); } - dump_datapage(buffer); -} - - -/** - @brief maria_dump_log main function. -*/ - -int main(int argc, char **argv) -{ - char **default_argv; - uchar buffer[TRANSLOG_PAGE_SIZE]; - MY_INIT(argv[0]); - - load_defaults("my", load_default_groups, &argc, &argv); - default_argv= argv; - get_options(&argc, &argv); - - if (opt_unit) - translog_example_table_init(); - else - translog_table_init(); - translog_fill_overhead_table(); - - maria_data_root= (char *)"."; - - if ((handler= my_open(opt_file, O_RDONLY, MYF(MY_WME))) < 0) - { - fprintf(stderr, "Can't open file: '%s' errno: %d\n", - opt_file, my_errno); - goto err; - } - if (mysql_file_seek(handler, opt_offset, SEEK_SET, MYF(MY_WME)) != - opt_offset) - { - fprintf(stderr, "Can't set position %lld file: '%s' errno: %d\n", - opt_offset, opt_file, my_errno); - goto err; - } - for (; - opt_pages; - opt_offset+= TRANSLOG_PAGE_SIZE, opt_pages--) - { - if (mysql_file_pread(handler, buffer, TRANSLOG_PAGE_SIZE, opt_offset, - MYF(MY_NABP))) - { - if (my_errno == HA_ERR_FILE_TOO_SHORT) - goto end; - fprintf(stderr, "Can't read page at position %lld file: '%s' " - "errno: %d\n", opt_offset, opt_file, my_errno); - goto err; - } - dump_page(buffer); - } - -end: - my_close(handler, MYF(0)); - free_defaults(default_argv); - exit(0); - return 0; /* No compiler warning */ - -err: - my_close(handler, MYF(0)); - fprintf(stderr, "%s: FAILED\n", my_progname_short); - free_defaults(default_argv); - exit(1); + dump_datapage(buffer, handler); } - -#include "ma_check_standalone.h" -#endif - diff --git a/storage/maria/ma_loghandler.h b/storage/maria/ma_loghandler.h index 698a8ead7b6..5ac6d67413a 100644 --- a/storage/maria/ma_loghandler.h +++ b/storage/maria/ma_loghandler.h @@ -312,6 +312,9 @@ extern my_bool translog_init_with_table(const char *directory, my_bool readonly, void (*init_table_func)(), my_bool no_error); +#ifndef DBUG_OFF +void check_translog_description_table(int num); +#endif extern my_bool translog_write_record(LSN *lsn, enum translog_record_type type, TRN *trn, @@ -360,6 +363,7 @@ translog_assign_id_to_share_from_recovery(struct st_maria_share *share, extern my_bool translog_walk_filenames(const char *directory, my_bool (*callback)(const char *, const char *)); +extern void dump_page(uchar *buffer, File handler); extern my_bool translog_log_debug_info(TRN *trn, enum translog_debug_info_type type, uchar *info, size_t length); @@ -386,8 +390,31 @@ void translog_set_group_commit_interval(uint32 interval); ma_loghandler_for_recovery.h ? */ +/* + Information from transaction log file header +*/ + +typedef struct st_loghandler_file_info +{ + /* + LSN_IMPOSSIBLE for current file (not finished file). + Maximum LSN of the record which parts stored in the + file. + */ + LSN max_lsn; + ulonglong timestamp; /* Time stamp */ + ulong maria_version; /* Version of maria loghandler */ + ulong mysql_version; /* Version of mysql server */ + ulong server_id; /* Server ID */ + ulong page_size; /* Loghandler page size */ + ulong file_number; /* Number of the file (from the file header) */ +} LOGHANDLER_FILE_INFO; + #define SHARE_ID_MAX 65535 /* array's size */ +extern void translog_fill_overhead_table(); +extern void translog_interpret_file_header(LOGHANDLER_FILE_INFO *desc, + uchar *page_buff); extern LSN translog_first_lsn_in_log(); extern LSN translog_first_theoretical_lsn(); extern LSN translog_next_LSN(TRANSLOG_ADDRESS addr, TRANSLOG_ADDRESS horizon); diff --git a/storage/maria/ma_norec.c b/storage/maria/ma_norec.c new file mode 100644 index 00000000000..6d4f37e34fd --- /dev/null +++ b/storage/maria/ma_norec.c @@ -0,0 +1,66 @@ +/* Copyright (C) 2010 Monty Program Ab + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* + Functions to handle tables with no row data (only index) + This is useful when you just want to do key reads or want to use + the index to check against duplicates. +*/ + +#include "maria_def.h" + +my_bool _ma_write_no_record(MARIA_HA *info __attribute__((unused)), + const uchar *record __attribute__((unused))) +{ + return 0; +} + +my_bool _ma_update_no_record(MARIA_HA *info __attribute__((unused)), + MARIA_RECORD_POS pos __attribute__((unused)), + const uchar *oldrec __attribute__((unused)), + const uchar *record __attribute__((unused))) +{ + return HA_ERR_WRONG_COMMAND; +} + + +my_bool _ma_delete_no_record(MARIA_HA *info __attribute__((unused)), + const uchar *record __attribute__((unused))) +{ + return HA_ERR_WRONG_COMMAND; +} + + +int _ma_read_no_record(MARIA_HA *info __attribute__((unused)), + uchar *record __attribute__((unused)), + MARIA_RECORD_POS pos __attribute__((unused))) +{ + return HA_ERR_WRONG_COMMAND; +} + + +int _ma_read_rnd_no_record(MARIA_HA *info __attribute__((unused)), + uchar *buf __attribute__((unused)), + MARIA_RECORD_POS filepos __attribute__((unused)), + my_bool skip_deleted_blocks __attribute__((unused))) +{ + return HA_ERR_WRONG_COMMAND; +} + +my_off_t _ma_no_keypos_to_recpos(MARIA_SHARE *share __attribute__ ((unused)), + my_off_t pos __attribute__ ((unused))) +{ + return 0; +} diff --git a/storage/maria/ma_open.c b/storage/maria/ma_open.c index 0784a567b45..d545ed76592 100644 --- a/storage/maria/ma_open.c +++ b/storage/maria/ma_open.c @@ -13,7 +13,7 @@ along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -/* open a isam-database */ +/* open an Aria table */ #include "ma_fulltext.h" #include "ma_sp_defs.h" @@ -41,10 +41,10 @@ static uchar *_ma_state_info_read(uchar *ptr, MARIA_STATE_INFO *state); pos+=size;} -#define disk_pos_assert(pos, end_pos) \ +#define disk_pos_assert(share, pos, end_pos) \ if (pos > end_pos) \ { \ - my_errno=HA_ERR_CRASHED; \ + _ma_set_fatal_error(share, HA_ERR_CRASHED); \ goto err; \ } @@ -130,10 +130,12 @@ static MARIA_HA *maria_clone_internal(MARIA_SHARE *share, const char *name, info.s=share; info.cur_row.lastpos= HA_OFFSET_ERROR; + /* Impossible first index to force initialization in _ma_check_index() */ + info.lastinx= ~0; info.update= (short) (HA_STATE_NEXT_FOUND+HA_STATE_PREV_FOUND); info.opt_flag=READ_CHECK_USED; info.this_unique= (ulong) info.dfile.file; /* Uniq number in process */ -#ifdef EXTERNAL_LOCKING +#ifdef MARIA_EXTERNAL_LOCKING if (share->data_file_type == COMPRESSED_RECORD) info.this_unique= share->state.unique; info.this_loop=0; /* Update counter */ @@ -201,6 +203,10 @@ static MARIA_HA *maria_clone_internal(MARIA_SHARE *share, const char *name, *m_info=info; thr_lock_data_init(&share->lock,&m_info->lock,(void*) m_info); + + if (share->options & HA_OPTION_TMP_TABLE) + m_info->lock.type= TL_WRITE; + m_info->open_list.data=(void*) m_info; maria_open_list=list_add(maria_open_list,&m_info->open_list); @@ -385,7 +391,7 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags) errpos= 3; if (mysql_file_pread(kfile, disk_cache, info_length, 0L, MYF(MY_NABP))) { - my_errno=HA_ERR_CRASHED; + _ma_set_fatal_error(share, HA_ERR_CRASHED); goto err; } len=mi_uint2korr(share->state.header.state_info_length); @@ -411,9 +417,11 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags) } disk_pos= _ma_base_info_read(disk_cache + base_pos, &share->base); share->state.state_length=base_pos; + /* For newly opened tables we reset the error-has-been-printed flag */ + share->state.changed&= ~STATE_CRASHED_PRINTED; if (!(open_flags & HA_OPEN_FOR_REPAIR) && - ((share->state.changed & STATE_CRASHED) || + ((share->state.changed & STATE_CRASHED_FLAGS) || ((open_flags & HA_OPEN_ABORT_IF_CRASHED) && (my_disable_locking && share->state.open_count)))) { @@ -425,6 +433,8 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags) HA_ERR_CRASHED_ON_REPAIR : HA_ERR_CRASHED_ON_USAGE); goto err; } + if (share->state.open_count) + share->open_count_not_zero_on_open= 1; /* We can ignore testing uuid if STATE_NOT_MOVABLE is set, as in this @@ -454,7 +464,7 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags) /* sanity check */ if (share->base.keystart > 65535 || share->base.rec_reflength > 8) { - my_errno=HA_ERR_CRASHED; + _ma_set_fatal_error(share, HA_ERR_CRASHED); goto err; } @@ -485,6 +495,10 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags) (uint) share->base.block_size, (uint) maria_block_size)); my_errno=HA_ERR_UNSUPPORTED; + my_printf_error(my_errno, "Wrong block size %u; Expected %u", + MYF(0), + (uint) share->base.block_size, + (uint) maria_block_size); goto err; } @@ -496,7 +510,7 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags) (ulonglong) 1 << (share->base.rec_reflength*8))-1); max_key_file_length= - _ma_safe_mul(maria_block_size, + _ma_safe_mul(share->base.block_size, ((ulonglong) 1 << (share->base.key_reflength*8))-1); #if SIZEOF_OFF_T == 4 set_if_smaller(max_data_file_length, INT_MAX32); @@ -557,20 +571,40 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags) share->block_size= share->base.block_size; /* Convenience */ share->max_index_block_size= share->block_size - KEYPAGE_CHECKSUM_SIZE; + share->keypage_header= ((share->base.born_transactional ? + LSN_STORE_SIZE + TRANSID_SIZE : + 0) + KEYPAGE_KEYID_SIZE + KEYPAGE_FLAG_SIZE + + KEYPAGE_USED_SIZE); { HA_KEYSEG *pos=share->keyparts; uint32 ftkey_nr= 1; for (i=0 ; i < keys ; i++) { - share->keyinfo[i].share= share; - disk_pos=_ma_keydef_read(disk_pos, &share->keyinfo[i]); - share->keyinfo[i].key_nr= i; - disk_pos_assert(disk_pos + share->keyinfo[i].keysegs * HA_KEYSEG_SIZE, + MARIA_KEYDEF *keyinfo= &share->keyinfo[i]; + keyinfo->share= share; + disk_pos=_ma_keydef_read(disk_pos, keyinfo); + keyinfo->key_nr= i; + + /* See ma_delete.cc::underflow() */ + if (!(keyinfo->flag & (HA_BINARY_PACK_KEY | HA_PACK_KEY))) + keyinfo->underflow_block_length= keyinfo->block_length/3; + else + { + /* Packed key, ensure we don't get overflow in underflow() */ + keyinfo->underflow_block_length= + max((int) (share->max_index_block_size - keyinfo->maxlength * 3), + (int) (share->keypage_header + share->base.key_reflength)); + set_if_smaller(keyinfo->underflow_block_length, + keyinfo->block_length/3); + } + + disk_pos_assert(share, + disk_pos + keyinfo->keysegs * HA_KEYSEG_SIZE, end_pos); - if (share->keyinfo[i].key_alg == HA_KEY_ALG_RTREE) + if (keyinfo->key_alg == HA_KEY_ALG_RTREE) share->have_rtree= 1; - share->keyinfo[i].seg=pos; - for (j=0 ; j < share->keyinfo[i].keysegs; j++,pos++) + keyinfo->seg=pos; + for (j=0 ; j < keyinfo->keysegs; j++,pos++) { disk_pos=_ma_keyseg_read(disk_pos, pos); if (pos->type == HA_KEYTYPE_TEXT || @@ -588,32 +622,32 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags) else if (pos->type == HA_KEYTYPE_BINARY) pos->charset= &my_charset_bin; } - if (share->keyinfo[i].flag & HA_SPATIAL) + if (keyinfo->flag & HA_SPATIAL) { #ifdef HAVE_SPATIAL uint sp_segs=SPDIMS*2; - share->keyinfo[i].seg=pos-sp_segs; - share->keyinfo[i].keysegs--; + keyinfo->seg=pos-sp_segs; + keyinfo->keysegs--; versioning= 0; #else my_errno=HA_ERR_UNSUPPORTED; goto err; #endif } - else if (share->keyinfo[i].flag & HA_FULLTEXT) + else if (keyinfo->flag & HA_FULLTEXT) { versioning= 0; DBUG_ASSERT(fulltext_keys); { uint k; - share->keyinfo[i].seg=pos; + keyinfo->seg=pos; for (k=0; k < FT_SEGS; k++) { *pos= ft_keysegs[k]; pos[0].language= pos[-1].language; if (!(pos[0].charset= pos[-1].charset)) { - my_errno=HA_ERR_CRASHED; + _ma_set_fatal_error(share, HA_ERR_CRASHED); goto err; } pos++; @@ -621,8 +655,7 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags) } if (!share->ft2_keyinfo.seg) { - memcpy(&share->ft2_keyinfo, &share->keyinfo[i], - sizeof(MARIA_KEYDEF)); + memcpy(&share->ft2_keyinfo, keyinfo, sizeof(MARIA_KEYDEF)); share->ft2_keyinfo.keysegs=1; share->ft2_keyinfo.flag=0; share->ft2_keyinfo.keylength= @@ -632,10 +665,10 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags) share->ft2_keyinfo.end=pos; setup_key_functions(& share->ft2_keyinfo); } - share->keyinfo[i].ftkey_nr= ftkey_nr++; + keyinfo->ftkey_nr= ftkey_nr++; } - setup_key_functions(share->keyinfo+i); - share->keyinfo[i].end=pos; + setup_key_functions(keyinfo); + keyinfo->end=pos; pos->type=HA_KEYTYPE_END; /* End */ pos->length=share->base.rec_reflength; pos->null_bit=0; @@ -645,7 +678,8 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags) for (i=0 ; i < uniques ; i++) { disk_pos=_ma_uniquedef_read(disk_pos, &share->uniqueinfo[i]); - disk_pos_assert(disk_pos + share->uniqueinfo[i].keysegs * + disk_pos_assert(share, + disk_pos + share->uniqueinfo[i].keysegs * HA_KEYSEG_SIZE, end_pos); share->uniqueinfo[i].seg=pos; for (j=0 ; j < share->uniqueinfo[i].keysegs; j++,pos++) @@ -678,10 +712,6 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags) share->base.null_bytes + share->base.pack_bytes + test(share->options & HA_OPTION_CHECKSUM)); - share->keypage_header= ((share->base.born_transactional ? - LSN_STORE_SIZE + TRANSID_SIZE : - 0) + KEYPAGE_KEYID_SIZE + KEYPAGE_FLAG_SIZE + - KEYPAGE_USED_SIZE); share->kfile.file= kfile; if (open_flags & HA_OPEN_COPY) @@ -749,7 +779,8 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags) share->base.extra_rec_buff_size, share->base.max_key_length); - disk_pos_assert(disk_pos + share->base.fields *MARIA_COLUMNDEF_SIZE, + disk_pos_assert(share, + disk_pos + share->base.fields *MARIA_COLUMNDEF_SIZE, end_pos); for (i= j= 0 ; i < share->base.fields ; i++) { @@ -763,6 +794,10 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags) share->blobs[j].offset= share->columndef[i].offset; j++; } + if (share->columndef[i].type == FIELD_VARCHAR) + share->has_varchar_fields= 1; + if (share->columndef[i].null_bit) + share->has_null_fields= 1; } share->columndef[i].type= FIELD_LAST; /* End marker */ disk_pos= _ma_column_nr_read(disk_pos, share->column_nr, @@ -783,7 +818,8 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags) share->options|= HA_OPTION_READ_ONLY_DATA; share->is_log_table= FALSE; - if (open_flags & HA_OPEN_TMP_TABLE) + if (open_flags & HA_OPEN_TMP_TABLE || + (share->options & HA_OPTION_TMP_TABLE)) { share->options|= HA_OPTION_TMP_TABLE; share->temporary= share->delay_key_write= 1; @@ -794,7 +830,7 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags) _ma_set_index_pagecache_callbacks(&share->kfile, share); share->this_process=(ulong) getpid(); -#ifdef EXTERNAL_LOCKING +#ifdef MARIA_EXTERNAL_LOCKING share->last_process= share->state.process; #endif share->base.key_parts=key_parts; @@ -805,7 +841,6 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags) share->base.margin_key_file_length=(share->base.max_key_file_length - (keys ? MARIA_INDEX_BLOCK_MARGIN * share->block_size * keys : 0)); - share->block_size= share->base.block_size; my_free(disk_cache); _ma_setup_functions(share); if ((*share->once_init)(share, info.dfile.file)) @@ -909,6 +944,19 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags) share->lock.start_trans= _ma_block_start_trans_no_versioning; } } +#ifdef SAFE_MUTEX + if (share->data_file_type == BLOCK_RECORD) + { + /* + We must have internal_lock before bitmap_lock because we call + _ma_flush_table_files() with internal_lock locked. + */ + mysql_mutex_lock(&share->intern_lock); + mysql_mutex_lock(&share->bitmap.bitmap_lock); + mysql_mutex_unlock(&share->bitmap.bitmap_lock); + mysql_mutex_unlock(&share->intern_lock); + } +#endif /* Memory mapping can only be requested after initializing intern_lock. */ @@ -933,6 +981,8 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags) share->state.changed)); mysql_mutex_unlock(&THR_LOCK_maria); + + m_info->open_flags= open_flags; DBUG_RETURN(m_info); err: @@ -1074,6 +1124,20 @@ void _ma_setup_functions(register MARIA_SHARE *share) else share->calc_checksum= _ma_checksum; break; + case NO_RECORD: + share->read_record= _ma_read_no_record; + share->scan= _ma_read_rnd_no_record; + share->delete_record= _ma_delete_no_record; + share->update_record= _ma_update_no_record; + share->write_record= _ma_write_no_record; + share->recpos_to_keypos= _ma_no_keypos_to_recpos; + share->keypos_to_recpos= _ma_no_keypos_to_recpos; + + /* Abort if following functions are called */ + share->compare_record= 0; + share->compare_unique= 0; + share->calc_checksum= 0; + break; case BLOCK_RECORD: share->once_init= _ma_once_init_block_record; share->once_end= _ma_once_end_block_record; @@ -1244,7 +1308,8 @@ uint _ma_state_info_write(MARIA_SHARE *share, uint pWrite) res= _ma_state_info_write_sub(share->kfile.file, &share->state, pWrite); if (pWrite & MA_STATE_INFO_WRITE_LOCK) mysql_mutex_unlock(&share->intern_lock); - share->changed= 0; + /* If open_count != 0 we have to write the state again at close */ + share->changed= share->state.open_count != 0; return res; } @@ -1419,7 +1484,7 @@ static uchar *_ma_state_info_read(uchar *ptr, MARIA_STATE_INFO *state) uint _ma_state_info_read_dsk(File file __attribute__((unused)), MARIA_STATE_INFO *state __attribute__((unused))) { -#ifdef EXTERNAL_LOCKING +#ifdef MARIA_EXTERNAL_LOCKING uchar buff[MARIA_STATE_INFO_SIZE + MARIA_STATE_EXTRA_SIZE]; /* trick to detect transactional tables */ @@ -1556,7 +1621,6 @@ uchar *_ma_keydef_read(uchar *ptr, MARIA_KEYDEF *keydef) keydef->keylength = mi_uint2korr(ptr); ptr+= 2; keydef->minlength = mi_uint2korr(ptr); ptr+= 2; keydef->maxlength = mi_uint2korr(ptr); ptr+= 2; - keydef->underflow_block_length=keydef->block_length/3; keydef->version = 0; /* Not saved */ keydef->parser = &ft_default_parser; keydef->ftkey_nr = 0; @@ -1874,7 +1938,7 @@ int maria_enable_indexes(MARIA_HA *info) DBUG_PRINT("error", ("data_file_length: %lu key_file_length: %lu", (ulong) share->state.state.data_file_length, (ulong) share->state.state.key_file_length)); - maria_print_error(info->s, HA_ERR_CRASHED); + _ma_set_fatal_error(share, HA_ERR_CRASHED); error= HA_ERR_CRASHED; } else diff --git a/storage/maria/ma_packrec.c b/storage/maria/ma_packrec.c index ed4ecd0258d..025787f4a10 100644 --- a/storage/maria/ma_packrec.c +++ b/storage/maria/ma_packrec.c @@ -193,7 +193,7 @@ static my_bool _ma_read_pack_info(MARIA_SHARE *share, File file, /* Only the first three bytes of magic number are independent of version. */ if (memcmp(header, maria_pack_file_magic, 3)) { - my_errno=HA_ERR_WRONG_IN_RECORD; + _ma_set_fatal_error(share, HA_ERR_WRONG_IN_RECORD); goto err0; } share->pack.version= header[3]; /* fourth uchar of magic number */ @@ -330,7 +330,7 @@ static my_bool _ma_read_pack_info(MARIA_SHARE *share, File file, DBUG_RETURN(0); err3: - my_errno=HA_ERR_WRONG_IN_RECORD; + _ma_set_fatal_error(share, HA_ERR_WRONG_IN_RECORD); err2: my_free(share->decode_tables); err1: @@ -759,7 +759,7 @@ int _ma_read_pack_record(MARIA_HA *info, uchar *buf, MARIA_RECORD_POS filepos) DBUG_RETURN(_ma_pack_rec_unpack(info,&info->bit_buff, buf, info->rec_buff, block_info.rec_len)); panic: - my_errno=HA_ERR_WRONG_IN_RECORD; + _ma_set_fatal_error(info->s, HA_ERR_WRONG_IN_RECORD); err: DBUG_RETURN(my_errno); } @@ -794,7 +794,8 @@ int _ma_pack_rec_unpack(register MARIA_HA *info, MARIA_BIT_BUFF *bit_buff, bit_buff->pos - bit_buff->bits / 8 == bit_buff->end) DBUG_RETURN(0); info->update&= ~HA_STATE_AKTIV; - DBUG_RETURN(my_errno=HA_ERR_WRONG_IN_RECORD); + _ma_set_fatal_error(share, HA_ERR_WRONG_IN_RECORD); + DBUG_RETURN(HA_ERR_WRONG_IN_RECORD); } /* _ma_pack_rec_unpack */ @@ -1358,7 +1359,7 @@ int _ma_read_rnd_pack_record(MARIA_HA *info, file= info->dfile.file; if (info->opt_flag & READ_CACHE_USED) { - if (_ma_read_cache(&info->rec_cache, block_info.header, + if (_ma_read_cache(info, &info->rec_cache, block_info.header, filepos, share->pack.ref_length, skip_deleted_blocks ? READING_NEXT : 0)) goto err; @@ -1371,14 +1372,14 @@ int _ma_read_rnd_pack_record(MARIA_HA *info, #ifndef DBUG_OFF if (block_info.rec_len > share->max_pack_length) { - my_errno=HA_ERR_WRONG_IN_RECORD; + _ma_set_fatal_error(share, HA_ERR_WRONG_IN_RECORD); goto err; } #endif if (info->opt_flag & READ_CACHE_USED) { - if (_ma_read_cache(&info->rec_cache, info->rec_buff, + if (_ma_read_cache(info, &info->rec_cache, info->rec_buff, block_info.filepos, block_info.rec_len, skip_deleted_blocks ? READING_NEXT : 0)) goto err; @@ -1644,7 +1645,7 @@ static int _ma_read_rnd_mempack_record(MARIA_HA *info, #ifndef DBUG_OFF if (block_info.rec_len > info->s->max_pack_length) { - my_errno=HA_ERR_WRONG_IN_RECORD; + _ma_set_fatal_error(share, HA_ERR_WRONG_IN_RECORD); goto err; } #endif diff --git a/storage/maria/ma_page.c b/storage/maria/ma_page.c index 6cca2fed559..ed62a80e4f7 100644 --- a/storage/maria/ma_page.c +++ b/storage/maria/ma_page.c @@ -127,8 +127,7 @@ my_bool _ma_fetch_keypage(MARIA_PAGE *page, MARIA_HA *info, { DBUG_PRINT("error",("Got errno: %d from pagecache_read",my_errno)); info->last_keypage=HA_OFFSET_ERROR; - maria_print_error(share, HA_ERR_CRASHED); - my_errno=HA_ERR_CRASHED; + _ma_set_fatal_error(share, HA_ERR_CRASHED); DBUG_RETURN(1); } info->last_keypage= pos; @@ -159,8 +158,7 @@ my_bool _ma_fetch_keypage(MARIA_PAGE *page, MARIA_HA *info, _ma_get_keynr(share, tmp))); DBUG_DUMP("page", tmp, page_size); info->last_keypage = HA_OFFSET_ERROR; - maria_print_error(share, HA_ERR_CRASHED); - my_errno= HA_ERR_CRASHED; + _ma_set_fatal_error(share, HA_ERR_CRASHED); DBUG_RETURN(1); } } @@ -195,6 +193,7 @@ my_bool _ma_write_keypage(MARIA_PAGE *page, enum pagecache_page_lock lock, nod_flag= _ma_test_if_nod(share, buff); DBUG_ASSERT(page->size == page_length); + DBUG_ASSERT(page->size <= share->max_index_block_size); DBUG_ASSERT(page->flag == _ma_get_keypage_flag(share, buff)); if (page->pos < share->base.keystart || @@ -552,8 +551,7 @@ my_bool _ma_compact_keypage(MARIA_PAGE *ma_page, TrID min_read_from) { DBUG_PRINT("error",("Couldn't find last key: page_pos: 0x%lx", (long) page)); - maria_print_error(share, HA_ERR_CRASHED); - my_errno=HA_ERR_CRASHED; + _ma_set_fatal_error(share, HA_ERR_CRASHED); DBUG_RETURN(1); } if (key_has_transid(page-1)) diff --git a/storage/maria/ma_pagecache.c b/storage/maria/ma_pagecache.c index 02d98cf1e66..2618d6a5b50 100644 --- a/storage/maria/ma_pagecache.c +++ b/storage/maria/ma_pagecache.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2000-2008 MySQL AB +/* Copyright (C) 2000-2008 MySQL AB, 2008-2011 Monty Program Ab This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -62,8 +62,8 @@ accessing it; to set this number equal to <N> add #define MAX_THREADS <N> - - to substitute calls of pthread_cond_wait for calls of - pthread_cond_timedwait (wait with timeout set up); + - to substitute calls of mysql_cond_wait for calls of + mysql_cond_timedwait (wait with timeout set up); this setting should be used only when you want to trap a deadlock situation, which theoretically should not happen; to set timeout equal to <T> seconds add @@ -97,9 +97,9 @@ #define PCBLOCK_INFO(B) \ DBUG_PRINT("info", \ - ("block: 0x%lx fd: %lu page: %lu s: %0x hshL: " \ - " 0x%lx req: %u/%u wrlocks: %u rdlocks %u " \ - "rdlocks_q: %u pins: %u status: %u type: %s", \ + ("block: 0x%lx fd: %lu page: %lu status: 0x%x " \ + "hshL: 0x%lx requests: %u/%u wrlocks: %u rdlocks: %u " \ + "rdlocks_q: %u pins: %u type: %s", \ (ulong)(B), \ (ulong)((B)->hash_link ? \ (B)->hash_link->file.file : \ @@ -107,14 +107,14 @@ (ulong)((B)->hash_link ? \ (B)->hash_link->pageno : \ 0), \ - (B)->status, \ + (uint) (B)->status, \ (ulong)(B)->hash_link, \ (uint) (B)->requests, \ (uint)((B)->hash_link ? \ (B)->hash_link->requests : \ 0), \ - block->wlocks, block->rlocks, block->rlocks_queue, \ - (uint)(B)->pins, (uint)(B)->status, \ + (B)->wlocks, (B)->rlocks, (B)->rlocks_queue, \ + (uint)(B)->pins, \ page_cache_page_type_str[(B)->type])) /* TODO: put it to my_static.c */ @@ -129,6 +129,8 @@ my_bool my_disable_flush_pagecache_blocks= 0; #define COND_FOR_WRLOCK 2 /* queue of write lock */ #define COND_SIZE 3 /* number of COND_* queues */ +typedef mysql_cond_t KEYCACHE_CONDVAR; + /* descriptor of the page in the page cache block buffer */ struct st_pagecache_page { @@ -151,11 +153,27 @@ struct st_pagecache_hash_link /* simple states of a block */ #define PCBLOCK_ERROR 1 /* an error occurred when performing disk i/o */ #define PCBLOCK_READ 2 /* the is page in the block buffer */ -#define PCBLOCK_IN_SWITCH 4 /* block is preparing to read new page */ -#define PCBLOCK_REASSIGNED 8 /* block does not accept requests for old page */ + +/* + A tread is reading the data to the page. + If the page contained old changed data, it will be written out with + this state set on the block. + The page is not yet ready to be used for reading. +*/ +#define PCBLOCK_IN_SWITCH 4 +/* + Block does not accept new requests for old page that would cause + the page to be pinned or written to. + (Reads that copies the block can still continue). + This state happens when another thread is waiting for readers to finish + to read data to the block (after the block, if it was changed, has been + flushed out to disk). +*/ +#define PCBLOCK_REASSIGNED 8 #define PCBLOCK_IN_FLUSH 16 /* block is in flush operation */ #define PCBLOCK_CHANGED 32 /* block buffer contains a dirty page */ #define PCBLOCK_DIRECT_W 64 /* possible direct write to the block */ +#define PCBLOCK_DEL_WRITE 128 /* should be written on delete */ /* page status, returned by find_block */ #define PAGE_READ 0 @@ -303,7 +321,7 @@ struct st_pagecache_block_link PAGECACHE_PIN_INFO *pin_list; PAGECACHE_LOCK_INFO *lock_list; #endif - mysql_cond_t *condvar; /* condition variable for 'no readers' event */ + KEYCACHE_CONDVAR *condvar; /* condition variable for 'no readers' event */ uchar *buffer; /* buffer for the block page */ pthread_t write_locker; @@ -476,6 +494,7 @@ error: #define FLUSH_CACHE 2000 /* sort this many blocks at once */ static void free_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block); +static void unlink_hash(PAGECACHE *pagecache, PAGECACHE_HASH_LINK *hash_link); #ifndef DBUG_OFF static void test_key_cache(PAGECACHE *pagecache, const char *where, my_bool lock); @@ -513,6 +532,7 @@ static void pagecache_debug_print _VARARGS((const char *fmt, ...)); #endif /* defined(PAGECACHE_DEBUG_LOG) */ #if defined(PAGECACHE_DEBUG_LOG) && defined(PAGECACHE_DEBUG) +#define KEYCACHE_PRINT(l, m) KEYCACHE_DBUG_PRINT(l,m) #define KEYCACHE_DBUG_PRINT(l, m) \ { if (pagecache_debug_log) \ fprintf(pagecache_debug_log, "%s: ", l); \ @@ -521,8 +541,9 @@ static void pagecache_debug_print _VARARGS((const char *fmt, ...)); #define KEYCACHE_DBUG_ASSERT(a) \ { if (! (a) && pagecache_debug_log) \ fclose(pagecache_debug_log); \ - assert(a); } + DBUG_ASSERT(a); } #else +#define KEYCACHE_PRINT(l, m) #define KEYCACHE_DBUG_PRINT(l, m) DBUG_PRINT(l, m) #define KEYCACHE_DBUG_ASSERT(a) DBUG_ASSERT(a) #endif /* defined(PAGECACHE_DEBUG_LOG) && defined(PAGECACHE_DEBUG) */ @@ -540,6 +561,7 @@ static long pagecache_thread_id; #define KEYCACHE_THREAD_TRACE_END(l) \ KEYCACHE_DBUG_PRINT(l,("]thread %ld",pagecache_thread_id)) #else +#define KEYCACHE_PRINT(l,m) #define KEYCACHE_THREAD_TRACE_BEGIN(l) #define KEYCACHE_THREAD_TRACE_END(l) #define KEYCACHE_THREAD_TRACE(l) @@ -552,16 +574,16 @@ static long pagecache_thread_id; sizeof(PAGECACHE_HASH_LINK))) #if (defined(PAGECACHE_TIMEOUT) && !defined(__WIN__)) || defined(PAGECACHE_DEBUG) -static int pagecache_pthread_cond_wait(pthread_cond_t *cond, - pthread_mutex_t *mutex); +static int pagecache_pthread_cond_wait(mysql_cond_t *cond, + mysql_mutex_t *mutex); #else -#define pagecache_pthread_cond_wait pthread_cond_wait +#define pagecache_pthread_cond_wait mysql_cond_wait #endif #if defined(PAGECACHE_DEBUG) -static int ___pagecache_pthread_mutex_lock(pthread_mutex_t *mutex); -static void ___pagecache_pthread_mutex_unlock(pthread_mutex_t *mutex); -static int ___pagecache_pthread_cond_signal(pthread_cond_t *cond); +static int ___pagecache_pthread_mutex_lock(mysql_mutex_t *mutex); +static void ___pagecache_pthread_mutex_unlock(mysql_mutex_t *mutex); +static int ___pagecache_pthread_cond_signal(mysql_cond_t *cond); #define pagecache_pthread_mutex_lock(M) \ { DBUG_PRINT("lock", ("mutex lock 0x%lx %u", (ulong)(M), __LINE__)); \ ___pagecache_pthread_mutex_lock(M);} @@ -572,9 +594,9 @@ static int ___pagecache_pthread_cond_signal(pthread_cond_t *cond); { DBUG_PRINT("lock", ("signal 0x%lx %u", (ulong)(M), __LINE__)); \ ___pagecache_pthread_cond_signal(M);} #else -#define pagecache_pthread_mutex_lock pthread_mutex_lock -#define pagecache_pthread_mutex_unlock pthread_mutex_unlock -#define pagecache_pthread_cond_signal pthread_cond_signal +#define pagecache_pthread_mutex_lock mysql_mutex_lock +#define pagecache_pthread_mutex_unlock mysql_mutex_unlock +#define pagecache_pthread_cond_signal mysql_cond_signal #endif /* defined(PAGECACHE_DEBUG) */ extern my_bool translog_flush(TRANSLOG_ADDRESS lsn); @@ -606,6 +628,26 @@ static my_bool pagecache_fwrite(PAGECACHE *pagecache, DBUG_ENTER("pagecache_fwrite"); DBUG_ASSERT(type != PAGECACHE_READ_UNKNOWN_PAGE); +#ifdef EXTRA_DEBUG_BITMAP + /* + This code is very good when debugging changes in bitmaps or dirty lists + The above define should be defined for all Aria files if you want to + debug either of the above issues. + */ + + if (pagecache->extra_debug) + { + char buff[80]; + uint len= my_sprintf(buff, + (buff, "fwrite: fd: %d id: %u page: %lu", + filedesc->file, + _ma_file_callback_to_id(filedesc->callback_data), + (ulong) pageno)); + (void) translog_log_debug_info(0, LOGREC_DEBUG_INFO_QUERY, + (uchar*) buff, len); + } +#endif + /* Todo: Integrate this with write_callback so we have only one callback */ if ((*filedesc->flush_log_callback)(buffer, pageno, filedesc->callback_data)) DBUG_RETURN(1); @@ -723,9 +765,9 @@ ulong init_pagecache(PAGECACHE *pagecache, size_t use_mem, if (mysql_mutex_init(key_PAGECACHE_cache_lock, &pagecache->cache_lock, MY_MUTEX_INIT_FAST) || my_hash_init(&pagecache->files_in_flush, &my_charset_bin, 32, - offsetof(struct st_file_in_flush, file), - sizeof(((struct st_file_in_flush *)NULL)->file), - NULL, NULL, 0)) + offsetof(struct st_file_in_flush, file), + sizeof(((struct st_file_in_flush *)NULL)->file), + NULL, NULL, 0)) goto err; pagecache->inited= 1; pagecache->in_init= 0; @@ -752,6 +794,8 @@ ulong init_pagecache(PAGECACHE *pagecache, size_t use_mem, { if (blocks < 8) { + my_message(ENOMEM, "Not enough memory to allocate 8 pagecache pages", + MYF(0)); my_errno= ENOMEM; goto err; } @@ -953,7 +997,7 @@ ulong resize_pagecache(PAGECACHE *pagecache, DBUG_RETURN(pagecache->disk_blocks); } - mysql_mutex_lock(&pagecache->cache_lock); + pagecache_pthread_mutex_lock(&pagecache->cache_lock); wqueue= &pagecache->resize_queue; thread= my_thread_var; @@ -961,7 +1005,7 @@ ulong resize_pagecache(PAGECACHE *pagecache, while (wqueue->last_thread->next != thread) { - mysql_cond_wait(&thread->suspend, &pagecache->cache_lock); + pagecache_pthread_cond_wait(&thread->suspend, &pagecache->cache_lock); } pagecache->resize_in_flush= 1; @@ -977,9 +1021,8 @@ ulong resize_pagecache(PAGECACHE *pagecache, pagecache->can_be_used= 0; while (pagecache->cnt_for_resize_op) { - KEYCACHE_DBUG_PRINT("resize_pagecache: wait", - ("suspend thread %ld", thread->id)); - mysql_cond_wait(&thread->suspend, &pagecache->cache_lock); + DBUG_PRINT("wait", ("suspend thread %s %ld", thread->name, thread->id)); + pagecache_pthread_cond_wait(&thread->suspend, &pagecache->cache_lock); } end_pagecache(pagecache, 0); /* Don't free mutex */ @@ -993,11 +1036,12 @@ finish: /* Signal for the next resize request to proceeed if any */ if (wqueue->last_thread) { - KEYCACHE_DBUG_PRINT("resize_pagecache: signal", - ("thread %ld", wqueue->last_thread->next->id)); - mysql_cond_signal(&wqueue->last_thread->next->suspend); + DBUG_PRINT("signal", + ("thread %s %ld", wqueue->last_thread->next->name, + wqueue->last_thread->next->id)); + pagecache_pthread_cond_signal(&wqueue->last_thread->next->suspend); } - mysql_mutex_unlock(&pagecache->cache_lock); + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); DBUG_RETURN(blocks); } #endif /* 0 */ @@ -1008,6 +1052,7 @@ finish: */ static inline void inc_counter_for_resize_op(PAGECACHE *pagecache) { + mysql_mutex_assert_owner(&pagecache->cache_lock); pagecache->cnt_for_resize_op++; } @@ -1016,15 +1061,18 @@ static inline void inc_counter_for_resize_op(PAGECACHE *pagecache) Decrement counter blocking resize key cache operation; Signal the operation to proceed when counter becomes equal zero */ + static inline void dec_counter_for_resize_op(PAGECACHE *pagecache) { struct st_my_thread_var *last_thread; + mysql_mutex_assert_owner(&pagecache->cache_lock); if (!--pagecache->cnt_for_resize_op && (last_thread= pagecache->resize_queue.last_thread)) { - KEYCACHE_DBUG_PRINT("dec_counter_for_resize_op: signal", - ("thread %ld", last_thread->next->id)); - mysql_cond_signal(&last_thread->next->suspend); + DBUG_PRINT("signal", + ("thread %s %ld", last_thread->next->name, + last_thread->next->id)); + pagecache_pthread_cond_signal(&last_thread->next->suspend); } } @@ -1051,16 +1099,47 @@ void change_pagecache_param(PAGECACHE *pagecache, uint division_limit, { DBUG_ENTER("change_pagecache_param"); - mysql_mutex_lock(&pagecache->cache_lock); + pagecache_pthread_mutex_lock(&pagecache->cache_lock); if (division_limit) pagecache->min_warm_blocks= (pagecache->disk_blocks * division_limit / 100 + 1); if (age_threshold) pagecache->age_threshold= (pagecache->disk_blocks * age_threshold / 100); - mysql_mutex_unlock(&pagecache->cache_lock); + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); + DBUG_VOID_RETURN; +} + + +/* + Check that pagecache was used and cleaned up properly. +*/ + +#ifndef DBUG_OFF +void check_pagecache_is_cleaned_up(PAGECACHE *pagecache) +{ + DBUG_ENTER("check_pagecache_is_cleaned_up"); + /* + Ensure we called inc_counter_for_resize_op and dec_counter_for_resize_op + the same number of times. (If not, a resize() could never happen. + */ + DBUG_ASSERT(pagecache->cnt_for_resize_op == 0); + + if (pagecache->disk_blocks > 0) + { + if (pagecache->block_mem) + { + uint i; + for (i=0 ; i < pagecache->blocks_used ; i++) + { + DBUG_ASSERT(pagecache->block_root[i].status == 0); + DBUG_ASSERT(pagecache->block_root[i].type == PAGECACHE_EMPTY_PAGE); + } + } + } DBUG_VOID_RETURN; } +#endif /* @@ -1085,6 +1164,10 @@ void end_pagecache(PAGECACHE *pagecache, my_bool cleanup) if (pagecache->disk_blocks > 0) { +#ifndef DBUG_OFF + check_pagecache_is_cleaned_up(pagecache); +#endif + if (pagecache->block_mem) { my_large_free(pagecache->block_mem); @@ -1157,7 +1240,7 @@ static void link_to_file_list(PAGECACHE *pagecache, link_changed(block, &pagecache->file_blocks[FILE_HASH(*file)]); if (block->status & PCBLOCK_CHANGED) { - block->status&= ~PCBLOCK_CHANGED; + block->status&= ~(PCBLOCK_CHANGED | PCBLOCK_DEL_WRITE); block->rec_lsn= LSN_MAX; pagecache->blocks_changed--; pagecache->global_blocks_changed--; @@ -1223,6 +1306,7 @@ static void link_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block, { PAGECACHE_BLOCK_LINK *ins; PAGECACHE_BLOCK_LINK **ptr_ins; + DBUG_ENTER("link_block"); PCBLOCK_INFO(block); KEYCACHE_DBUG_ASSERT(! (block->hash_link && block->hash_link->requests)); @@ -1236,6 +1320,11 @@ static void link_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block, PAGECACHE_HASH_LINK *hash_link= (PAGECACHE_HASH_LINK *) first_thread->opt_info; struct st_my_thread_var *thread; + + DBUG_ASSERT(block->requests + block->wlocks + block->rlocks + + block->pins == 0); + DBUG_ASSERT(block->next_used == NULL); + do { thread= next_thread; @@ -1246,22 +1335,25 @@ static void link_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block, */ if ((PAGECACHE_HASH_LINK *) thread->opt_info == hash_link) { - KEYCACHE_DBUG_PRINT("link_block: signal", ("thread: %ld", thread->id)); - mysql_cond_signal(&thread->suspend); + DBUG_PRINT("signal", ("thread: %s %ld", thread->name, thread->id)); + pagecache_pthread_cond_signal(&thread->suspend); wqueue_unlink_from_queue(&pagecache->waiting_for_block, thread); block->requests++; } } while (thread != last_thread); hash_link->block= block; - KEYCACHE_THREAD_TRACE("link_block: after signaling"); + /* Ensure that no other thread tries to use this block */ + block->status|= PCBLOCK_REASSIGNED; + + DBUG_PRINT("signal", ("after signal")); #if defined(PAGECACHE_DEBUG) KEYCACHE_DBUG_PRINT("link_block", ("linked,unlinked block: %u status: %x #requests: %u #available: %u", PCBLOCK_NUMBER(pagecache, block), block->status, block->requests, pagecache->blocks_available)); #endif - return; + DBUG_VOID_RETURN; } ptr_ins= hot ? &pagecache->used_ins : &pagecache->used_last; ins= *ptr_ins; @@ -1290,6 +1382,7 @@ static void link_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block, KEYCACHE_DBUG_ASSERT((ulong) pagecache->blocks_available <= pagecache->blocks_used); #endif + DBUG_VOID_RETURN; } @@ -1298,7 +1391,7 @@ static void link_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block, SYNOPSIS unlink_block() - pagecache pointer to a page cache data structure + pagecache pointer to a page cache data structure block pointer to the block to unlink from the LRU chain RETURN VALUE @@ -1311,7 +1404,7 @@ static void link_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block, static void unlink_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block) { DBUG_ENTER("unlink_block"); - DBUG_PRINT("unlink_block", ("unlink 0x%lx", (ulong)block)); + DBUG_PRINT("pagecache", ("unlink 0x%lx", (ulong)block)); DBUG_ASSERT(block->next_used != NULL); if (block->next_used == block) { @@ -1335,7 +1428,7 @@ static void unlink_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block) #if defined(PAGECACHE_DEBUG) KEYCACHE_DBUG_ASSERT(pagecache->blocks_available != 0); pagecache->blocks_available--; - KEYCACHE_DBUG_PRINT("unlink_block", + KEYCACHE_DBUG_PRINT("pagecache", ("unlinked block: 0x%lx (%u) status: %x #requests: %u #available: %u", (ulong)block, PCBLOCK_NUMBER(pagecache, block), block->status, @@ -1363,9 +1456,6 @@ static void reg_requests(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block, int count) { DBUG_ENTER("reg_requests"); - DBUG_PRINT("enter", ("block: 0x%lx (%u) status: %x reqs: %u", - (ulong)block, PCBLOCK_NUMBER(pagecache, block), - block->status, block->requests)); PCBLOCK_INFO(block); if (! block->requests) /* First request for the block unlinks it */ @@ -1408,7 +1498,7 @@ static void unreg_request(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block, int at_end) { DBUG_ENTER("unreg_request"); - DBUG_PRINT("enter", ("block 0x%lx (%u) status: %x reqs: %u", + DBUG_PRINT("enter", ("block 0x%lx (%u) status: %x requests: %u", (ulong)block, PCBLOCK_NUMBER(pagecache, block), block->status, block->requests)); PCBLOCK_INFO(block); @@ -1461,7 +1551,7 @@ static inline void remove_reader(PAGECACHE_BLOCK_LINK *block) PCBLOCK_INFO(block); DBUG_ASSERT(block->hash_link->requests > 0); if (! --block->hash_link->requests && block->condvar) - mysql_cond_signal(block->condvar); + pagecache_pthread_cond_signal(block->condvar); DBUG_VOID_RETURN; } @@ -1473,22 +1563,51 @@ static inline void remove_reader(PAGECACHE_BLOCK_LINK *block) static inline void wait_for_readers(PAGECACHE *pagecache __attribute__((unused)), - PAGECACHE_BLOCK_LINK *block) + PAGECACHE_BLOCK_LINK *block + __attribute__((unused))) { struct st_my_thread_var *thread= my_thread_var; + DBUG_ASSERT(block->condvar == NULL); while (block->hash_link->requests) { - KEYCACHE_DBUG_PRINT("wait_for_readers: wait", - ("suspend thread: %ld block: %u", - thread->id, PCBLOCK_NUMBER(pagecache, block))); + DBUG_ENTER("wait_for_readers"); + DBUG_PRINT("wait", + ("suspend thread: %s %ld block: %u", + thread->name, thread->id, + PCBLOCK_NUMBER(pagecache, block))); block->condvar= &thread->suspend; - mysql_cond_wait(&thread->suspend, &pagecache->cache_lock); + pagecache_pthread_cond_wait(&thread->suspend, &pagecache->cache_lock); block->condvar= NULL; + DBUG_VOID_RETURN; } } /* + Wait until the flush of the page is done. +*/ + +static void wait_for_flush(PAGECACHE *pagecache + __attribute__((unused)), + PAGECACHE_BLOCK_LINK *block + __attribute__((unused))) +{ + struct st_my_thread_var *thread= my_thread_var; + DBUG_ENTER("wait_for_flush"); + wqueue_add_to_queue(&block->wqueue[COND_FOR_SAVED], thread); + do + { + DBUG_PRINT("wait", + ("suspend thread %s %ld", thread->name, thread->id)); + pagecache_pthread_cond_wait(&thread->suspend, + &pagecache->cache_lock); + } + while(thread->next); + DBUG_VOID_RETURN; +} + + +/* Add a hash link to a bucket in the hash_table */ @@ -1509,10 +1628,14 @@ static inline void link_hash(PAGECACHE_HASH_LINK **start, static void unlink_hash(PAGECACHE *pagecache, PAGECACHE_HASH_LINK *hash_link) { - KEYCACHE_DBUG_PRINT("unlink_hash", ("fd: %u pos_ %lu #requests=%u", - (uint) hash_link->file.file, (ulong) hash_link->pageno, - hash_link->requests)); - KEYCACHE_DBUG_ASSERT(hash_link->requests == 0); + DBUG_ENTER("unlink_hash"); + DBUG_PRINT("enter", ("hash_link: %p fd: %u pos: %lu requests: %u", + hash_link, (uint) hash_link->file.file, + (ulong) hash_link->pageno, + hash_link->requests)); + DBUG_ASSERT(hash_link->requests == 0); + DBUG_ASSERT(!hash_link->block || hash_link->block->pins == 0); + if ((*hash_link->prev= hash_link->next)) hash_link->next->prev= hash_link->prev; hash_link->block= NULL; @@ -1542,20 +1665,29 @@ static void unlink_hash(PAGECACHE *pagecache, PAGECACHE_HASH_LINK *hash_link) if (page->file.file == hash_link->file.file && page->pageno == hash_link->pageno) { - KEYCACHE_DBUG_PRINT("unlink_hash: signal", ("thread %ld", thread->id)); - mysql_cond_signal(&thread->suspend); + DBUG_PRINT("signal", ("thread %s %ld", thread->name, thread->id)); + pagecache_pthread_cond_signal(&thread->suspend); wqueue_unlink_from_queue(&pagecache->waiting_for_hash_link, thread); } } while (thread != last_thread); + + /* + Add this to the hash, so that the waiting threads can find it + when they retry the call to get_hash_link(). This entry is special + in that it has no associated block. + */ link_hash(&pagecache->hash_root[PAGECACHE_HASH(pagecache, hash_link->file, hash_link->pageno)], hash_link); - return; + DBUG_VOID_RETURN; } + + /* Add hash to free hash list */ hash_link->next= pagecache->free_hash_list; pagecache->free_hash_list= hash_link; + DBUG_VOID_RETURN; } @@ -1585,9 +1717,7 @@ static PAGECACHE_HASH_LINK *get_present_hash_link(PAGECACHE *pagecache, int cnt; #endif DBUG_ENTER("get_present_hash_link"); - - KEYCACHE_DBUG_PRINT("get_present_hash_link", ("fd: %u pos: %lu", - (uint) file->file, (ulong) pageno)); + DBUG_PRINT("enter", ("fd: %u pos: %lu", (uint) file->file, (ulong) pageno)); /* Find the bucket in the hash table for the pair (file, pageno); @@ -1622,6 +1752,7 @@ static PAGECACHE_HASH_LINK *get_present_hash_link(PAGECACHE *pagecache, } if (hash_link) { + DBUG_PRINT("exit", ("hash_link: %p", hash_link)); /* Register the request for the page */ hash_link->requests++; } @@ -1643,9 +1774,7 @@ static PAGECACHE_HASH_LINK *get_hash_link(PAGECACHE *pagecache, { reg1 PAGECACHE_HASH_LINK *hash_link; PAGECACHE_HASH_LINK **start; - - KEYCACHE_DBUG_PRINT("get_hash_link", ("fd: %u pos: %lu", - (uint) file->file, (ulong) pageno)); + DBUG_ENTER("get_hash_link"); restart: /* try to find the page in the cache */ @@ -1656,6 +1785,9 @@ restart: /* There is no hash link in the hash table for the pair (file, pageno) */ if (pagecache->free_hash_list) { + DBUG_PRINT("info", ("free_hash_list: %p free_hash_list->next: %p", + pagecache->free_hash_list, + pagecache->free_hash_list->next)); hash_link= pagecache->free_hash_list; pagecache->free_hash_list= hash_link->next; } @@ -1668,16 +1800,16 @@ restart: /* Wait for a free hash link */ struct st_my_thread_var *thread= my_thread_var; PAGECACHE_PAGE page; - KEYCACHE_DBUG_PRINT("get_hash_link", ("waiting")); page.file= *file; page.pageno= pageno; thread->opt_info= (void *) &page; wqueue_link_into_queue(&pagecache->waiting_for_hash_link, thread); - KEYCACHE_DBUG_PRINT("get_hash_link: wait", - ("suspend thread %ld", thread->id)); - mysql_cond_wait(&thread->suspend, &pagecache->cache_lock); + DBUG_PRINT("wait", + ("suspend thread %s %ld", thread->name, thread->id)); + pagecache_pthread_cond_wait(&thread->suspend, + &pagecache->cache_lock); thread->opt_info= NULL; - DBUG_PRINT("info", ("restarting...")); + DBUG_PRINT("thread", ("restarting...")); goto restart; } hash_link->file= *file; @@ -1686,9 +1818,20 @@ restart: link_hash(start, hash_link); /* Register the request for the page */ hash_link->requests++; + DBUG_ASSERT(hash_link->block == 0); + DBUG_ASSERT(hash_link->requests == 1); } - - return hash_link; + else + { + /* + We have to copy the flush_log callback, as it may change if the table + goes from non_transactional to transactional during recovery + */ + hash_link->file.flush_log_callback= file->flush_log_callback; + } + DBUG_PRINT("exit", ("hash_link: %p block: %p", hash_link, + hash_link->block)); + DBUG_RETURN(hash_link); } @@ -1705,7 +1848,12 @@ restart: pageno number of the page in the file init_hits_left how initialize the block counter for the page wrmode <-> get for writing - reg_req Register request to thye page + block_is_copied 1 if block will be copied from page cache under + the pagelock mutex. + reg_req Register request to the page. Normally all pages + should be registered; The only time it's ok to + not register a page is when the page is already + pinned (and thus registered) by the same thread. page_st out {PAGE_READ,PAGE_TO_BE_READ,PAGE_WAIT_TO_BE_READ} RETURN VALUE @@ -1734,6 +1882,7 @@ static PAGECACHE_BLOCK_LINK *find_block(PAGECACHE *pagecache, pgcache_page_no_t pageno, int init_hits_left, my_bool wrmode, + my_bool block_is_copied, my_bool reg_req, int *page_st) { @@ -1741,14 +1890,12 @@ static PAGECACHE_BLOCK_LINK *find_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block; int error= 0; int page_status; - DBUG_ENTER("find_block"); - KEYCACHE_THREAD_TRACE("find_block:begin"); - DBUG_PRINT("enter", ("fd: %d pos: %lu wrmode: %d", - file->file, (ulong) pageno, wrmode)); - KEYCACHE_DBUG_PRINT("find_block", ("fd: %d pos: %lu wrmode: %d", - file->file, (ulong) pageno, - wrmode)); + DBUG_PRINT("enter", ("fd: %d pos: %lu wrmode: %d block_is_copied: %d", + file->file, (ulong) pageno, wrmode, block_is_copied)); + KEYCACHE_PRINT("find_block", ("fd: %d pos: %lu wrmode: %d", + file->file, (ulong) pageno, + wrmode)); #if !defined(DBUG_OFF) && defined(EXTRA_DEBUG) DBUG_EXECUTE("check_pagecache", test_key_cache(pagecache, "start of find_block", 0);); @@ -1796,18 +1943,10 @@ restart: /* Wait until the page is flushed on disk */ DBUG_ASSERT(hash_link->requests > 0); hash_link->requests--; - { - struct st_my_thread_var *thread= my_thread_var; - wqueue_add_to_queue(&block->wqueue[COND_FOR_SAVED], thread); - do - { - KEYCACHE_DBUG_PRINT("find_block: wait", - ("suspend thread %ld", thread->id)); - mysql_cond_wait(&thread->suspend, &pagecache->cache_lock); - } - while(thread->next); - } + wait_for_flush(pagecache, block); + /* Invalidate page in the block if it has not been done yet */ + DBUG_ASSERT(block->status); /* Should always be true */ if (block->status) free_block(pagecache, block); return 0; @@ -1827,7 +1966,7 @@ restart: Only reading requests can proceed until the old dirty page is flushed, all others are to be suspended, then resubmitted */ - if (!wrmode && !(block->status & PCBLOCK_REASSIGNED)) + if (!wrmode && block_is_copied && !(block->status & PCBLOCK_REASSIGNED)) { if (reg_req) reg_requests(pagecache, block, 1); @@ -1845,9 +1984,10 @@ restart: /* Wait until the request can be resubmitted */ do { - KEYCACHE_DBUG_PRINT("find_block: wait", - ("suspend thread %ld", thread->id)); - mysql_cond_wait(&thread->suspend, &pagecache->cache_lock); + DBUG_PRINT("wait", + ("suspend thread %s %ld", thread->name, thread->id)); + pagecache_pthread_cond_wait(&thread->suspend, + &pagecache->cache_lock); } while(thread->next); } @@ -1892,6 +2032,7 @@ restart: #ifndef DBUG_OFF block->type= PAGECACHE_EMPTY_PAGE; #endif + DBUG_ASSERT(reg_req); block->requests= 1; block->temperature= PCBLOCK_COLD; block->hits_left= init_hits_left; @@ -1901,59 +2042,75 @@ restart: block->hash_link= hash_link; hash_link->block= block; page_status= PAGE_TO_BE_READ; - DBUG_PRINT("info", ("page to be read set for page 0x%lx", - (ulong)block)); - KEYCACHE_DBUG_PRINT("find_block", - ("got free or never used block %u", - PCBLOCK_NUMBER(pagecache, block))); + DBUG_PRINT("info", ("page to be read set for page 0x%lx (%u)", + (ulong) block, PCBLOCK_NUMBER(pagecache, block))); + KEYCACHE_PRINT("find_block", + ("got free or never used block %u", + PCBLOCK_NUMBER(pagecache, block))); } else { /* There are no never used blocks, use a block from the LRU chain */ /* - Wait until a new block is added to the LRU chain; - several threads might wait here for the same page, - all of them must get the same block + Ensure that we are going to register the block. + (This should be true as a new block could not have been + pinned by caller). */ + DBUG_ASSERT(reg_req); if (! pagecache->used_last) { + /* + Wait until a new block is added to the LRU chain; + several threads might wait here for the same page, + all of them must get the same block. + + The block is given to us by the next thread executing + link_block(). + */ + struct st_my_thread_var *thread= my_thread_var; thread->opt_info= (void *) hash_link; wqueue_link_into_queue(&pagecache->waiting_for_block, thread); do { - KEYCACHE_DBUG_PRINT("find_block: wait", - ("suspend thread %ld", thread->id)); - mysql_cond_wait(&thread->suspend, &pagecache->cache_lock); + DBUG_PRINT("wait", + ("suspend thread %s %ld", thread->name, thread->id)); + pagecache_pthread_cond_wait(&thread->suspend, + &pagecache->cache_lock); } while (thread->next); thread->opt_info= NULL; + block= hash_link->block; + /* Ensure that the block is registered */ + DBUG_ASSERT(block->requests >= 1); } - block= hash_link->block; - if (! block) + else { /* Take the first block from the LRU chain unlinking it from the chain */ block= pagecache->used_last->next_used; - block->hits_left= init_hits_left; - block->last_hit_time= 0; if (reg_req) reg_requests(pagecache, block, 1); hash_link->block= block; + DBUG_ASSERT(block->requests == 1); } + PCBLOCK_INFO(block); - DBUG_ASSERT(block->wlocks == 0); - DBUG_ASSERT(block->rlocks == 0); - DBUG_ASSERT(block->rlocks_queue == 0); - DBUG_ASSERT(block->pins == 0); + + DBUG_ASSERT(block->hash_link == hash_link || + !(block->status & PCBLOCK_IN_SWITCH)); if (block->hash_link != hash_link && ! (block->status & PCBLOCK_IN_SWITCH) ) { + /* If another thread is flushing the block, wait for it. */ + if (block->status & PCBLOCK_IN_FLUSH) + wait_for_flush(pagecache, block); + /* this is a primary request for a new page */ DBUG_ASSERT(block->wlocks == 0); DBUG_ASSERT(block->rlocks == 0); @@ -1971,19 +2128,19 @@ restart: KEYCACHE_DBUG_PRINT("find_block", ("block is dirty")); - mysql_mutex_unlock(&pagecache->cache_lock); /* The call is thread safe because only the current thread might change the block->hash_link value */ DBUG_ASSERT(block->pins == 0); + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); error= pagecache_fwrite(pagecache, &block->hash_link->file, block->buffer, block->hash_link->pageno, block->type, pagecache->readwrite_flags); - mysql_mutex_lock(&pagecache->cache_lock); + pagecache_pthread_mutex_lock(&pagecache->cache_lock); pagecache->global_cache_write++; } @@ -2000,21 +2157,25 @@ restart: /* Remove the hash link for this page from the hash table */ unlink_hash(pagecache, block->hash_link); + /* All pending requests for this page must be resubmitted */ if (block->wqueue[COND_FOR_SAVED].last_thread) wqueue_release_queue(&block->wqueue[COND_FOR_SAVED]); } link_to_file_list(pagecache, block, file, (my_bool)(block->hash_link ? 1 : 0)); + + block->hash_link= hash_link; PCBLOCK_INFO(block); + block->hits_left= init_hits_left; + block->last_hit_time= 0; block->status= error ? PCBLOCK_ERROR : 0; - block->error= (int16) my_errno; + block->error= error ? (int16) my_errno : 0; #ifndef DBUG_OFF block->type= PAGECACHE_EMPTY_PAGE; if (error) my_debug_put_break_here(); #endif - block->hash_link= hash_link; page_status= PAGE_TO_BE_READ; DBUG_PRINT("info", ("page to be read set for page 0x%lx", (ulong)block)); @@ -2037,12 +2198,24 @@ restart: } else { + /* + The block was found in the cache. It's either a already read + block or a block waiting to be read by another thread. + */ if (reg_req) reg_requests(pagecache, block, 1); KEYCACHE_DBUG_PRINT("find_block", ("block->hash_link: %p hash_link: %p " "block->status: %u", block->hash_link, hash_link, block->status )); + /* + block->hash_link != hash_link can only happen when + the block is in PCBLOCK_IN_SWITCH above (is flushed out + to be replaced by another block). The SWITCH code will change + block->hash_link to point to hash_link. + */ + KEYCACHE_DBUG_ASSERT(block->hash_link == hash_link || + block->status & PCBLOCK_IN_SWITCH); page_status= (((block->hash_link == hash_link) && (block->status & PCBLOCK_READ)) ? PAGE_READ : PAGE_WAIT_TO_BE_READ); @@ -2055,11 +2228,11 @@ restart: ("block: 0x%lx fd: %u pos: %lu block->status: %u page_status: %u", (ulong) block, (uint) file->file, (ulong) pageno, block->status, (uint) page_status)); - KEYCACHE_DBUG_PRINT("find_block", - ("block: 0x%lx fd: %d pos: %lu block->status: %u page_status: %d", - (ulong) block, - file->file, (ulong) pageno, block->status, - page_status)); + KEYCACHE_PRINT("find_block", + ("block: 0x%lx fd: %d pos: %lu block->status: %u page_status: %d", + (ulong) block, + file->file, (ulong) pageno, block->status, + page_status)); #if !defined(DBUG_OFF) && defined(EXTRA_DEBUG) DBUG_EXECUTE("check_pagecache", @@ -2175,21 +2348,26 @@ static my_bool pagecache_wait_lock(PAGECACHE *pagecache, dec_counter_for_resize_op(pagecache); do { - KEYCACHE_DBUG_PRINT("get_wrlock: wait", - ("suspend thread %ld", thread->id)); - mysql_cond_wait(&thread->suspend, &pagecache->cache_lock); + DBUG_PRINT("wait", + ("suspend thread %s %ld", thread->name, thread->id)); + pagecache_pthread_cond_wait(&thread->suspend, + &pagecache->cache_lock); } while(thread->next); + inc_counter_for_resize_op(pagecache); PCBLOCK_INFO(block); if ((block->status & (PCBLOCK_REASSIGNED | PCBLOCK_IN_SWITCH)) || + !block->hash_link || file.file != block->hash_link->file.file || pageno != block->hash_link->pageno) { DBUG_PRINT("info", ("the block 0x%lx changed => need retry " "status: %x files %d != %d or pages %lu != %lu", (ulong)block, block->status, - file.file, block->hash_link->file.file, - (ulong) pageno, (ulong) block->hash_link->pageno)); + file.file, + block->hash_link ? block->hash_link->file.file : -1, + (ulong) pageno, + (ulong) (block->hash_link ? block->hash_link->pageno : 0))); DBUG_RETURN(1); } DBUG_RETURN(0); @@ -2396,25 +2574,17 @@ static my_bool make_lock_and_pin(PAGECACHE *pagecache, my_bool any) { DBUG_ENTER("make_lock_and_pin"); + DBUG_PRINT("enter", ("block: 0x%lx (%u) lock: %s pin: %s any %d", + (ulong)block, PCBLOCK_NUMBER(pagecache, block), + page_cache_page_lock_str[lock], + page_cache_page_pin_str[pin], (int)any)); + PCBLOCK_INFO(block); - DBUG_PRINT("enter", ("block: 0x%lx", (ulong)block)); -#ifndef DBUG_OFF - if (block) - { - DBUG_PRINT("enter", ("block: 0x%lx (%u) wrlocks: %u rdlocks: %u " - "rdlocks_q: %u pins: %u lock: %s pin: %s any %d", - (ulong)block, PCBLOCK_NUMBER(pagecache, block), - block->wlocks, block->rlocks, block->rlocks_queue, - block->pins, - page_cache_page_lock_str[lock], - page_cache_page_pin_str[pin], (int)any)); - PCBLOCK_INFO(block); - } -#endif - + DBUG_ASSERT(block); DBUG_ASSERT(!any || ((lock == PAGECACHE_LOCK_LEFT_UNLOCKED) && (pin == PAGECACHE_UNPIN))); + DBUG_ASSERT(block->hash_link->block == block); switch (lock) { case PAGECACHE_LOCK_WRITE: /* free -> write */ @@ -2479,17 +2649,13 @@ static my_bool make_lock_and_pin(PAGECACHE *pagecache, DBUG_ASSERT(0); /* Never should happened */ } -#ifndef DBUG_OFF - if (block) - PCBLOCK_INFO(block); -#endif + PCBLOCK_INFO(block); DBUG_RETURN(0); retry: DBUG_PRINT("INFO", ("Retry block 0x%lx", (ulong)block)); PCBLOCK_INFO(block); DBUG_ASSERT(block->hash_link->requests > 0); block->hash_link->requests--; - PCBLOCK_INFO(block); DBUG_RETURN(1); } @@ -2521,7 +2687,6 @@ static void read_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block, my_bool primary) { - DBUG_ENTER("read_block"); DBUG_PRINT("enter", ("read block: 0x%lx primary: %d", (ulong)block, primary)); @@ -2534,19 +2699,20 @@ static void read_block(PAGECACHE *pagecache, */ pagecache->global_cache_read++; - /* Page is not in buffer yet, is to be read from disk */ - mysql_mutex_unlock(&pagecache->cache_lock); /* + Page is not in buffer yet, is to be read from disk Here other threads may step in and register as secondary readers. They will register in block->wqueue[COND_FOR_REQUESTED]. */ + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); error= pagecache_fread(pagecache, &block->hash_link->file, block->buffer, block->hash_link->pageno, pagecache->readwrite_flags); - mysql_mutex_lock(&pagecache->cache_lock); + pagecache_pthread_mutex_lock(&pagecache->cache_lock); if (error) { + DBUG_ASSERT(maria_in_recovery || !maria_assert_if_crashed_table); block->status|= PCBLOCK_ERROR; block->error= (int16) my_errno; my_debug_put_break_here(); @@ -2583,9 +2749,10 @@ static void read_block(PAGECACHE *pagecache, wqueue_add_to_queue(&block->wqueue[COND_FOR_REQUESTED], thread); do { - DBUG_PRINT("read_block: wait", - ("suspend thread %ld", thread->id)); - mysql_cond_wait(&thread->suspend, &pagecache->cache_lock); + DBUG_PRINT("wait", + ("suspend thread %s %ld", thread->name, thread->id)); + pagecache_pthread_cond_wait(&thread->suspend, + &pagecache->cache_lock); } while (thread->next); DBUG_PRINT("read_block", @@ -2675,10 +2842,9 @@ void pagecache_unlock(PAGECACHE *pagecache, page_cache_page_pin_str[pin])); /* we do not allow any lock/pin increasing here */ DBUG_ASSERT(pin != PAGECACHE_PIN); - DBUG_ASSERT(lock != PAGECACHE_LOCK_READ); - DBUG_ASSERT(lock != PAGECACHE_LOCK_WRITE); + DBUG_ASSERT(lock != PAGECACHE_LOCK_READ && lock != PAGECACHE_LOCK_WRITE); - mysql_mutex_lock(&pagecache->cache_lock); + pagecache_pthread_mutex_lock(&pagecache->cache_lock); /* As soon as we keep lock cache can be used, and we have lock because want to unlock. @@ -2687,7 +2853,7 @@ void pagecache_unlock(PAGECACHE *pagecache, inc_counter_for_resize_op(pagecache); /* See NOTE for pagecache_unlock about registering requests */ - block= find_block(pagecache, file, pageno, 0, 0, + block= find_block(pagecache, file, pageno, 0, 0, 0, pin == PAGECACHE_PIN_LEFT_UNPINNED, &page_st); PCBLOCK_INFO(block); DBUG_ASSERT(block != 0 && page_st == PAGE_READ); @@ -2738,7 +2904,7 @@ void pagecache_unlock(PAGECACHE *pagecache, dec_counter_for_resize_op(pagecache); - mysql_mutex_unlock(&pagecache->cache_lock); + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); DBUG_VOID_RETURN; } @@ -2767,7 +2933,7 @@ void pagecache_unpin(PAGECACHE *pagecache, DBUG_ENTER("pagecache_unpin"); DBUG_PRINT("enter", ("fd: %u page: %lu", (uint) file->file, (ulong) pageno)); - mysql_mutex_lock(&pagecache->cache_lock); + pagecache_pthread_mutex_lock(&pagecache->cache_lock); /* As soon as we keep lock cache can be used, and we have lock bacause want aunlock. @@ -2776,7 +2942,7 @@ void pagecache_unpin(PAGECACHE *pagecache, inc_counter_for_resize_op(pagecache); /* See NOTE for pagecache_unlock about registering requests */ - block= find_block(pagecache, file, pageno, 0, 0, 0, &page_st); + block= find_block(pagecache, file, pageno, 0, 0, 0, 0, &page_st); DBUG_ASSERT(block != 0); DBUG_ASSERT(page_st == PAGE_READ); /* we can't unpin such page without unlock */ @@ -2805,7 +2971,7 @@ void pagecache_unpin(PAGECACHE *pagecache, dec_counter_for_resize_op(pagecache); - mysql_mutex_unlock(&pagecache->cache_lock); + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); DBUG_VOID_RETURN; } @@ -2856,13 +3022,13 @@ void pagecache_unlock_by_link(PAGECACHE *pagecache, DBUG_ASSERT(pin != PAGECACHE_PIN_LEFT_UNPINNED); DBUG_ASSERT(lock != PAGECACHE_LOCK_READ); DBUG_ASSERT(lock != PAGECACHE_LOCK_WRITE); - mysql_mutex_lock(&pagecache->cache_lock); + pagecache_pthread_mutex_lock(&pagecache->cache_lock); if (pin == PAGECACHE_PIN_LEFT_UNPINNED && lock == PAGECACHE_LOCK_READ_UNLOCK) { if (make_lock_and_pin(pagecache, block, lock, pin, FALSE)) DBUG_ASSERT(0); /* should not happend */ - mysql_mutex_unlock(&pagecache->cache_lock); + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); DBUG_VOID_RETURN; } @@ -2931,7 +3097,7 @@ void pagecache_unlock_by_link(PAGECACHE *pagecache, dec_counter_for_resize_op(pagecache); - mysql_mutex_unlock(&pagecache->cache_lock); + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); DBUG_VOID_RETURN; } @@ -2960,7 +3126,7 @@ void pagecache_unpin_by_link(PAGECACHE *pagecache, (uint) block->hash_link->file.file, (ulong) block->hash_link->pageno)); - mysql_mutex_lock(&pagecache->cache_lock); + pagecache_pthread_mutex_lock(&pagecache->cache_lock); /* As soon as we keep lock cache can be used, and we have lock because want unlock. @@ -2993,7 +3159,7 @@ void pagecache_unpin_by_link(PAGECACHE *pagecache, dec_counter_for_resize_op(pagecache); - mysql_mutex_unlock(&pagecache->cache_lock); + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); DBUG_VOID_RETURN; } @@ -3207,10 +3373,10 @@ restart: uint status; int page_st; - mysql_mutex_lock(&pagecache->cache_lock); + pagecache_pthread_mutex_lock(&pagecache->cache_lock); if (!pagecache->can_be_used) { - mysql_mutex_unlock(&pagecache->cache_lock); + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); goto no_key_cache; } @@ -3220,7 +3386,7 @@ restart: reg_request= ((new_pin == PAGECACHE_PIN_LEFT_UNPINNED) || (new_pin == PAGECACHE_PIN)); block= find_block(pagecache, file, pageno, level, - lock == PAGECACHE_LOCK_WRITE, + lock == PAGECACHE_LOCK_WRITE, buff != 0, reg_request, &page_st); DBUG_PRINT("info", ("Block type: %s current type %s", page_cache_page_type_str[block->type], @@ -3262,7 +3428,7 @@ restart: */ if (reg_request) unreg_request(pagecache, block, 1); - mysql_mutex_unlock(&pagecache->cache_lock); + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); DBUG_PRINT("info", ("restarting...")); goto restart; } @@ -3273,8 +3439,7 @@ restart: buff= block->buffer; /* possibly we will write here (resolved on unlock) */ if ((lock == PAGECACHE_LOCK_WRITE || - lock == PAGECACHE_LOCK_LEFT_WRITELOCKED) && - !(block->status & PCBLOCK_CHANGED)) + lock == PAGECACHE_LOCK_LEFT_WRITELOCKED)) { block->status|= PCBLOCK_DIRECT_W; DBUG_PRINT("info", ("Set PCBLOCK_DIRECT_W for block: 0x%lx", @@ -3283,10 +3448,10 @@ restart: } else { - if (!(status & PCBLOCK_ERROR)) + if (status & PCBLOCK_READ) { #if !defined(SERIALIZED_READ_FROM_CACHE) - mysql_mutex_unlock(&pagecache->cache_lock); + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); #endif DBUG_ASSERT((pagecache->block_size & 511) == 0); @@ -3294,10 +3459,10 @@ restart: memcpy(buff, block->buffer, pagecache->block_size); #if !defined(SERIALIZED_READ_FROM_CACHE) - mysql_mutex_lock(&pagecache->cache_lock); + pagecache_pthread_mutex_lock(&pagecache->cache_lock); #endif } - else + if (status & PCBLOCK_ERROR) my_errno= block->error; } @@ -3307,7 +3472,10 @@ restart: if (make_lock_and_pin(pagecache, block, lock_to_read[lock].unlock_lock, unlock_pin, FALSE)) + { DBUG_ASSERT(0); + return (uchar*) 0; + } } /* Link the block into the LRU chain if it's the last submitted request @@ -3322,7 +3490,7 @@ restart: dec_counter_for_resize_op(pagecache); - mysql_mutex_unlock(&pagecache->cache_lock); + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); if (status & PCBLOCK_ERROR) { @@ -3347,6 +3515,31 @@ no_key_cache: /* Key cache is not used */ /* + @brief Set/reset flag that page always should be flushed on delete + + @param pagecache pointer to a page cache data structure + @param link direct link to page (returned by read or write) + @param write write on delete flag value + +*/ + +void pagecache_set_write_on_delete_by_link(PAGECACHE_BLOCK_LINK *block) +{ + DBUG_ENTER("pagecache_set_write_on_delete_by_link"); + DBUG_PRINT("enter", ("fd: %d block 0x%lx %d -> TRUE", + block->hash_link->file.file, + (ulong) block, + (int) block->status & PCBLOCK_DEL_WRITE)); + DBUG_ASSERT(block->pins); /* should be pinned */ + DBUG_ASSERT(block->wlocks); /* should be write locked */ + + block->status|= PCBLOCK_DEL_WRITE; + + DBUG_VOID_RETURN; +} + + +/* @brief Delete page from the buffer (common part for link and file/page) @param pagecache pointer to a page cache data structure @@ -3365,27 +3558,36 @@ static my_bool pagecache_delete_internal(PAGECACHE *pagecache, my_bool flush) { my_bool error= 0; + if (block->status & PCBLOCK_IN_FLUSH) + { + /* + this call is just 'hint' for the cache to free the page so we will + not interferes with flushing process but must return success + */ + goto out; + } if (block->status & PCBLOCK_CHANGED) { + flush= (flush || (block->status & PCBLOCK_DEL_WRITE)); if (flush) { /* The block contains a dirty page - push it out of the cache */ KEYCACHE_DBUG_PRINT("find_block", ("block is dirty")); - mysql_mutex_unlock(&pagecache->cache_lock); /* The call is thread safe because only the current thread might change the block->hash_link value */ DBUG_ASSERT(block->pins == 1); + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); error= pagecache_fwrite(pagecache, &block->hash_link->file, block->buffer, block->hash_link->pageno, block->type, pagecache->readwrite_flags); - mysql_mutex_lock(&pagecache->cache_lock); + pagecache_pthread_mutex_lock(&pagecache->cache_lock); pagecache->global_cache_write++; if (error) @@ -3393,7 +3595,26 @@ static my_bool pagecache_delete_internal(PAGECACHE *pagecache, block->status|= PCBLOCK_ERROR; block->error= (int16) my_errno; my_debug_put_break_here(); - goto err; + goto out; + } + } + else + { + PAGECACHE_FILE *filedesc= &block->hash_link->file; + /* We are not going to write the page but have to call callbacks */ + DBUG_PRINT("info", ("flush_callback :0x%lx" + "write_callback: 0x%lx data: 0x%lx", + (ulong) filedesc->flush_log_callback, + (ulong) filedesc->write_callback, + (ulong) filedesc->callback_data)); + if ((*filedesc->flush_log_callback) + (block->buffer, block->hash_link->pageno, filedesc->callback_data) || + (*filedesc->write_callback) + (block->buffer, block->hash_link->pageno, filedesc->callback_data)) + { + DBUG_PRINT("error", ("flush or write callback problem")); + error= 1; + goto out; } } pagecache->blocks_changed--; @@ -3410,10 +3631,19 @@ static my_bool pagecache_delete_internal(PAGECACHE *pagecache, DBUG_ASSERT(0); DBUG_ASSERT(block->hash_link->requests > 0); page_link->requests--; - /* See NOTE for pagecache_unlock about registering requests. */ + /* See NOTE for pagecache_unlock() about registering requests. */ free_block(pagecache, block); + dec_counter_for_resize_op(pagecache); + return 0; -err: +out: + /* Cache is locked, so we can relese page before freeing it */ + if (make_lock_and_pin(pagecache, block, + PAGECACHE_LOCK_WRITE_UNLOCK, + PAGECACHE_UNPIN, FALSE)) + DBUG_ASSERT(0); + page_link->requests--; + unreg_request(pagecache, block, 1); dec_counter_for_resize_op(pagecache); return error; } @@ -3454,7 +3684,7 @@ my_bool pagecache_delete_by_link(PAGECACHE *pagecache, if (pagecache->can_be_used) { - mysql_mutex_lock(&pagecache->cache_lock); + pagecache_pthread_mutex_lock(&pagecache->cache_lock); if (!pagecache->can_be_used) goto end; @@ -3464,6 +3694,8 @@ my_bool pagecache_delete_by_link(PAGECACHE *pagecache, */ DBUG_ASSERT((block->status & (PCBLOCK_IN_SWITCH | PCBLOCK_REASSIGNED)) == 0); + + inc_counter_for_resize_op(pagecache); /* make_lock_and_pin() can't fail here, because we are keeping pin on the block and it can't be evicted (which is cause of lock fail and retry) @@ -3480,7 +3712,7 @@ my_bool pagecache_delete_by_link(PAGECACHE *pagecache, error= pagecache_delete_internal(pagecache, block, block->hash_link, flush); end: - mysql_mutex_unlock(&pagecache->cache_lock); + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); } DBUG_RETURN(error); @@ -3571,7 +3803,7 @@ restart: reg1 PAGECACHE_BLOCK_LINK *block; PAGECACHE_HASH_LINK **unused_start, *page_link; - mysql_mutex_lock(&pagecache->cache_lock); + pagecache_pthread_mutex_lock(&pagecache->cache_lock); if (!pagecache->can_be_used) goto end; @@ -3580,7 +3812,8 @@ restart: if (!page_link) { DBUG_PRINT("info", ("There is no such page in the cache")); - mysql_mutex_unlock(&pagecache->cache_lock); + dec_counter_for_resize_op(pagecache); + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); DBUG_RETURN(0); } block= page_link->block; @@ -3592,12 +3825,12 @@ restart: "reassigned" : "in switch"))); PCBLOCK_INFO(block); page_link->requests--; + dec_counter_for_resize_op(pagecache); goto end; } /* See NOTE for pagecache_unlock about registering requests. */ if (pin == PAGECACHE_PIN) reg_requests(pagecache, block, 1); - DBUG_ASSERT(block != 0); if (make_lock_and_pin(pagecache, block, lock, pin, FALSE)) { /* @@ -3606,7 +3839,7 @@ restart: */ if (pin == PAGECACHE_PIN) unreg_request(pagecache, block, 1); - mysql_mutex_unlock(&pagecache->cache_lock); + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); DBUG_PRINT("info", ("restarting...")); goto restart; } @@ -3616,7 +3849,7 @@ restart: error= pagecache_delete_internal(pagecache, block, page_link, flush); end: - mysql_mutex_unlock(&pagecache->cache_lock); + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); } DBUG_RETURN(error); @@ -3763,27 +3996,30 @@ restart: int page_st; my_bool need_page_ready_signal= FALSE; - mysql_mutex_lock(&pagecache->cache_lock); + pagecache_pthread_mutex_lock(&pagecache->cache_lock); if (!pagecache->can_be_used) { - mysql_mutex_unlock(&pagecache->cache_lock); + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); goto no_key_cache; } inc_counter_for_resize_op(pagecache); pagecache->global_cache_w_requests++; - /* See NOTE for pagecache_unlock about registering requests. */ + /* + Here we register a request if the page was not already pinned. + See NOTE for pagecache_unlock about registering requests. + */ reg_request= ((pin == PAGECACHE_PIN_LEFT_UNPINNED) || (pin == PAGECACHE_PIN)); block= find_block(pagecache, file, pageno, level, - TRUE, + TRUE, FALSE, reg_request, &page_st); if (!block) { DBUG_ASSERT(write_mode != PAGECACHE_WRITE_DONE); /* It happens only for requests submitted during resize operation */ dec_counter_for_resize_op(pagecache); - mysql_mutex_unlock(&pagecache->cache_lock); + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); /* Write to the disk key cache is in resize at the moment*/ goto no_key_cache; } @@ -3827,7 +4063,7 @@ restart: */ if (reg_request) unreg_request(pagecache, block, 1); - mysql_mutex_unlock(&pagecache->cache_lock); + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); DBUG_PRINT("info", ("restarting...")); goto restart; } @@ -3911,7 +4147,10 @@ restart: block->hash_link->requests--; /* See NOTE for pagecache_unlock about registering requests. */ if (pin == PAGECACHE_PIN_LEFT_UNPINNED || pin == PAGECACHE_UNPIN) + { unreg_request(pagecache, block, 1); + DBUG_ASSERT(page_link == &fake_link); + } else *page_link= block; @@ -3923,7 +4162,7 @@ restart: dec_counter_for_resize_op(pagecache); - mysql_mutex_unlock(&pagecache->cache_lock); + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); goto end; } @@ -3987,11 +4226,13 @@ end: static void free_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block) { + uint status= block->status; KEYCACHE_THREAD_TRACE("free block"); KEYCACHE_DBUG_PRINT("free_block", ("block: %u hash_link 0x%lx", PCBLOCK_NUMBER(pagecache, block), (long) block->hash_link)); + mysql_mutex_assert_owner(&pagecache->cache_lock); if (block->hash_link) { /* @@ -4010,27 +4251,44 @@ static void free_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block) DBUG_ASSERT(block->rlocks == 0); DBUG_ASSERT(block->rlocks_queue == 0); DBUG_ASSERT(block->pins == 0); + DBUG_ASSERT((block->status & ~(PCBLOCK_ERROR | PCBLOCK_READ | PCBLOCK_IN_FLUSH | PCBLOCK_CHANGED | PCBLOCK_REASSIGNED | PCBLOCK_DEL_WRITE)) == 0); + DBUG_ASSERT(block->requests >= 1); + DBUG_ASSERT(block->next_used == NULL); block->status= 0; #ifndef DBUG_OFF block->type= PAGECACHE_EMPTY_PAGE; #endif block->rec_lsn= LSN_MAX; + block->hash_link= NULL; + if (block->temperature == PCBLOCK_WARM) + pagecache->warm_blocks--; + block->temperature= PCBLOCK_COLD; KEYCACHE_THREAD_TRACE("free block"); KEYCACHE_DBUG_PRINT("free_block", ("block is freed")); unreg_request(pagecache, block, 0); - block->hash_link= NULL; - /* Remove the free block from the LRU ring. */ - unlink_block(pagecache, block); - if (block->temperature == PCBLOCK_WARM) - pagecache->warm_blocks--; - block->temperature= PCBLOCK_COLD; - /* Insert the free block in the free list. */ - block->next_used= pagecache->free_block_list; - pagecache->free_block_list= block; - /* Keep track of the number of currently unused blocks. */ - pagecache->blocks_unused++; + /* + Block->requests is != 0 if unreg_requests()/link_block() gave the block + to a waiting thread + */ + if (!block->requests) + { + DBUG_ASSERT(block->next_used != 0); + + /* Remove the free block from the LRU ring. */ + unlink_block(pagecache, block); + /* Insert the free block in the free list. */ + block->next_used= pagecache->free_block_list; + pagecache->free_block_list= block; + /* Keep track of the number of currently unused blocks. */ + pagecache->blocks_unused++; + } + else + { + /* keep flag set by link_block() */ + block->status= status & PCBLOCK_REASSIGNED; + } /* All pending requests for this page must be resubmitted. */ if (block->wqueue[COND_FOR_SAVED].last_thread) @@ -4078,14 +4336,14 @@ static int flush_cached_blocks(PAGECACHE *pagecache, *first_errno= 0; /* Don't lock the cache during the flush */ - mysql_mutex_unlock(&pagecache->cache_lock); + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); /* As all blocks referred in 'cache' are marked by PCBLOCK_IN_FLUSH we are guaranteed that no thread will change them */ qsort((uchar*) cache, count, sizeof(*cache), (qsort_cmp) cmp_sec_link); - mysql_mutex_lock(&pagecache->cache_lock); + pagecache_pthread_mutex_lock(&pagecache->cache_lock); for (; cache != end; cache++) { PAGECACHE_BLOCK_LINK *block= *cache; @@ -4120,16 +4378,13 @@ static int flush_cached_blocks(PAGECACHE *pagecache, PAGECACHE_LOCK_READ, PAGECACHE_PIN, FALSE)) DBUG_ASSERT(0); - KEYCACHE_DBUG_PRINT("flush_cached_blocks", - ("block: %u (0x%lx) to be flushed", - PCBLOCK_NUMBER(pagecache, block), (ulong)block)); - DBUG_PRINT("info", ("block: %u (0x%lx) to be flushed", + KEYCACHE_PRINT("flush_cached_blocks", + ("block: %u (0x%lx) to be flushed", + PCBLOCK_NUMBER(pagecache, block), (ulong)block)); + DBUG_PRINT("info", ("block: %u (0x%lx) to be flushed", PCBLOCK_NUMBER(pagecache, block), (ulong)block)); PCBLOCK_INFO(block); - mysql_mutex_unlock(&pagecache->cache_lock); - DBUG_PRINT("info", ("block: %u (0x%lx) pins: %u", - PCBLOCK_NUMBER(pagecache, block), (ulong)block, - block->pins)); + /** @todo IO If page is contiguous with next page to flush, group flushes in one single my_pwrite(). @@ -4140,12 +4395,13 @@ static int flush_cached_blocks(PAGECACHE *pagecache, content (see StaleFilePointersInFlush in ma_checkpoint.c). @todo change argument of functions to be File. */ + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); error= pagecache_fwrite(pagecache, &block->hash_link->file, block->buffer, block->hash_link->pageno, block->type, pagecache->readwrite_flags); - mysql_mutex_lock(&pagecache->cache_lock); + pagecache_pthread_mutex_lock(&pagecache->cache_lock); if (make_lock_and_pin(pagecache, block, PAGECACHE_LOCK_READ_UNLOCK, @@ -4285,9 +4541,10 @@ static int flush_pagecache_blocks_int(PAGECACHE *pagecache, wqueue_add_to_queue(&other_flusher->flush_queue, thread); do { - KEYCACHE_DBUG_PRINT("flush_pagecache_blocks_int: wait1", - ("suspend thread %ld", thread->id)); - mysql_cond_wait(&thread->suspend, + DBUG_PRINT("wait", + ("(1) suspend thread %s %ld", + thread->name, thread->id)); + pagecache_pthread_cond_wait(&thread->suspend, &pagecache->cache_lock); } while (thread->next); @@ -4307,11 +4564,11 @@ static int flush_pagecache_blocks_int(PAGECACHE *pagecache, and thus require a table check. */ DBUG_ASSERT(0); - mysql_mutex_unlock(&pagecache->cache_lock); + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); if (my_thread_var->abort) DBUG_RETURN(1); /* End if aborted by user */ sleep(10); - mysql_mutex_lock(&pagecache->cache_lock); + pagecache_pthread_mutex_lock(&pagecache->cache_lock); } if (type != FLUSH_IGNORE_CHANGED) @@ -4330,6 +4587,7 @@ static int flush_pagecache_blocks_int(PAGECACHE *pagecache, KEYCACHE_DBUG_ASSERT(count<= pagecache->blocks_used); } } + count++; /* Allocate one extra for easy end-of-buffer test */ /* Allocate a new buffer only if its bigger than the one we have */ if (count > FLUSH_CACHE && !(cache= @@ -4367,22 +4625,24 @@ restart: DBUG_ASSERT(filter_res == FLUSH_FILTER_OK); } { + DBUG_ASSERT(!(block->status & PCBLOCK_IN_FLUSH)); /* - Mark the block with BLOCK_IN_FLUSH in order not to let - other threads to use it for new pages and interfere with - our sequence of flushing dirty file pages + We care only for the blocks for which flushing was not + initiated by other threads as a result of page swapping */ - block->status|= PCBLOCK_IN_FLUSH; - if (! (block->status & PCBLOCK_IN_SWITCH)) { - /* - We care only for the blocks for which flushing was not - initiated by other threads as a result of page swapping + /* + Mark the block with BLOCK_IN_FLUSH in order not to let + other threads to use it for new pages and interfere with + our sequence of flushing dirty file pages */ + block->status|= PCBLOCK_IN_FLUSH; + reg_requests(pagecache, block, 1); if (type != FLUSH_IGNORE_CHANGED) { + *pos++= block; /* It's not a temporary file */ if (pos == end) { @@ -4402,7 +4662,6 @@ restart: */ goto restart; } - *pos++= block; } else { @@ -4443,9 +4702,10 @@ restart: wqueue_add_to_queue(&block->wqueue[COND_FOR_SAVED], thread); do { - KEYCACHE_DBUG_PRINT("flush_pagecache_blocks_int: wait2", - ("suspend thread %ld", thread->id)); - mysql_cond_wait(&thread->suspend, + DBUG_PRINT("wait", + ("(2) suspend thread %s %ld", + thread->name, thread->id)); + pagecache_pthread_cond_wait(&thread->suspend, &pagecache->cache_lock); } while (thread->next); @@ -4539,11 +4799,11 @@ int flush_pagecache_blocks_with_filter(PAGECACHE *pagecache, if (pagecache->disk_blocks <= 0) DBUG_RETURN(0); - mysql_mutex_lock(&pagecache->cache_lock); + pagecache_pthread_mutex_lock(&pagecache->cache_lock); inc_counter_for_resize_op(pagecache); res= flush_pagecache_blocks_int(pagecache, file, type, filter, filter_arg); dec_counter_for_resize_op(pagecache); - mysql_mutex_unlock(&pagecache->cache_lock); + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); DBUG_RETURN(res); } @@ -4620,7 +4880,7 @@ my_bool pagecache_collect_changed_blocks_with_lsn(PAGECACHE *pagecache, We lock the entire cache but will be quick, just reading/writing a few MBs of memory at most. */ - mysql_mutex_lock(&pagecache->cache_lock); + pagecache_pthread_mutex_lock(&pagecache->cache_lock); for (;;) { struct st_file_in_flush *other_flusher; @@ -4647,9 +4907,9 @@ my_bool pagecache_collect_changed_blocks_with_lsn(PAGECACHE *pagecache, wqueue_add_to_queue(&other_flusher->flush_queue, thread); do { - KEYCACHE_DBUG_PRINT("pagecache_collect_changed_blocks_with_lsn: wait", - ("suspend thread %ld", thread->id)); - mysql_cond_wait(&thread->suspend, + DBUG_PRINT("wait", + ("suspend thread %s %ld", thread->name, thread->id)); + pagecache_pthread_cond_wait(&thread->suspend, &pagecache->cache_lock); } while (thread->next); @@ -4727,7 +4987,7 @@ my_bool pagecache_collect_changed_blocks_with_lsn(PAGECACHE *pagecache, } } end: - mysql_mutex_unlock(&pagecache->cache_lock); + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); *min_rec_lsn= minimum_rec_lsn; DBUG_RETURN(error); @@ -4791,7 +5051,7 @@ static void pagecache_dump(PAGECACHE *pagecache) PAGECACHE_PAGE *page; uint i; - fprintf(pagecache_dump_file, "thread:%u\n", thread->id); + fprintf(pagecache_dump_file, "thread: %s %ld\n", thread->name, thread->id); i=0; thread=last=waiting_for_hash_link.last_thread; @@ -4802,8 +5062,9 @@ static void pagecache_dump(PAGECACHE *pagecache) thread= thread->next; page= (PAGECACHE_PAGE *) thread->opt_info; fprintf(pagecache_dump_file, - "thread:%u, (file,pageno)=(%u,%lu)\n", - thread->id,(uint) page->file.file,(ulong) page->pageno); + "thread: %s %ld, (file,pageno)=(%u,%lu)\n", + thread->name, thread->id, + (uint) page->file.file,(ulong) page->pageno); if (++i == MAX_QUEUE_LEN) break; } @@ -4818,8 +5079,9 @@ static void pagecache_dump(PAGECACHE *pagecache) thread=thread->next; hash_link= (PAGECACHE_HASH_LINK *) thread->opt_info; fprintf(pagecache_dump_file, - "thread:%u hash_link:%u (file,pageno)=(%u,%lu)\n", - thread->id, (uint) PAGECACHE_HASH_LINK_NUMBER(pagecache, hash_link), + "thread: %s %u hash_link:%u (file,pageno)=(%u,%lu)\n", + thread->name, thread->id, + (uint) PAGECACHE_HASH_LINK_NUMBER(pagecache, hash_link), (uint) hash_link->file.file,(ulong) hash_link->pageno); if (++i == MAX_QUEUE_LEN) break; @@ -4848,7 +5110,7 @@ static void pagecache_dump(PAGECACHE *pagecache) { thread=thread->next; fprintf(pagecache_dump_file, - "thread:%u\n", thread->id); + "thread: %s %ld\n", thread->name, thread->id); if (++i == MAX_QUEUE_LEN) break; } @@ -4878,8 +5140,8 @@ static void pagecache_dump(PAGECACHE *pagecache) #if defined(PAGECACHE_TIMEOUT) && !defined(__WIN__) -static int pagecache_pthread_cond_wait(pthread_cond_t *cond, - pthread_mutex_t *mutex) +static int pagecache_pthread_cond_wait(mysql_cond_t *cond, + mysql_mutex_t *mutex) { int rc; struct timeval now; /* time when we started waiting */ @@ -4906,7 +5168,7 @@ static int pagecache_pthread_cond_wait(pthread_cond_t *cond, fprintf(pagecache_debug_log, "waiting...\n"); fflush(pagecache_debug_log); #endif - rc= pthread_cond_timedwait(cond, mutex, &timeout); + rc= mysql_cond_timedwait(cond, mutex, &timeout); KEYCACHE_THREAD_TRACE_BEGIN("finished waiting"); if (rc == ETIMEDOUT || rc == ETIME) { @@ -4927,12 +5189,12 @@ static int pagecache_pthread_cond_wait(pthread_cond_t *cond, } #else #if defined(PAGECACHE_DEBUG) -static int pagecache_pthread_cond_wait(pthread_cond_t *cond, - pthread_mutex_t *mutex) +static int pagecache_pthread_cond_wait(mysql_cond_t *cond, + mysql_mutex_t *mutex) { int rc; KEYCACHE_THREAD_TRACE_END("started waiting"); - rc= pthread_cond_wait(cond, mutex); + rc= mysql_cond_wait(cond, mutex); KEYCACHE_THREAD_TRACE_BEGIN("finished waiting"); return rc; } @@ -4940,27 +5202,27 @@ static int pagecache_pthread_cond_wait(pthread_cond_t *cond, #endif /* defined(PAGECACHE_TIMEOUT) && !defined(__WIN__) */ #if defined(PAGECACHE_DEBUG) -static int ___pagecache_pthread_mutex_lock(pthread_mutex_t *mutex) +static int ___pagecache_pthread_mutex_lock(mysql_mutex_t *mutex) { int rc; - rc= pthread_mutex_lock(mutex); + rc= mysql_mutex_lock(mutex); KEYCACHE_THREAD_TRACE_BEGIN(""); return rc; } -static void ___pagecache_pthread_mutex_unlock(pthread_mutex_t *mutex) +static void ___pagecache_pthread_mutex_unlock(mysql_mutex_t *mutex) { KEYCACHE_THREAD_TRACE_END(""); - pthread_mutex_unlock(mutex); + mysql_mutex_unlock(mutex); } -static int ___pagecache_pthread_cond_signal(pthread_cond_t *cond) +static int ___pagecache_pthread_cond_signal(mysql_cond_t *cond) { int rc; KEYCACHE_THREAD_TRACE("signal"); - rc= pthread_cond_signal(cond); + rc= mysql_cond_signal(cond); return rc; } diff --git a/storage/maria/ma_pagecache.h b/storage/maria/ma_pagecache.h index 648f93d5c74..8460eaddc57 100644 --- a/storage/maria/ma_pagecache.h +++ b/storage/maria/ma_pagecache.h @@ -173,6 +173,7 @@ typedef struct st_pagecache my_bool resize_in_flush; /* true during flush of resize operation */ my_bool can_be_used; /* usage of cache for read/write is allowed */ my_bool in_init; /* Set to 1 in MySQL during init/resize */ + my_bool extra_debug; /* set to 1 if one wants extra logging */ HASH files_in_flush; /**< files in flush_pagecache_blocks_int() */ } PAGECACHE; @@ -251,6 +252,7 @@ extern void pagecache_unpin(PAGECACHE *pagecache, extern void pagecache_unpin_by_link(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *link, LSN lsn); +extern void pagecache_set_write_on_delete_by_link(PAGECACHE_BLOCK_LINK *block); /* Results of flush operation (bit field in fact) */ diff --git a/storage/maria/ma_pagecrc.c b/storage/maria/ma_pagecrc.c index 640bb8880f4..58e3b4b203d 100644 --- a/storage/maria/ma_pagecrc.c +++ b/storage/maria/ma_pagecrc.c @@ -355,9 +355,7 @@ my_bool maria_flush_log_for_page(uchar *page, uchar *data_ptr __attribute__((unused))) { LSN lsn; -#ifndef DBUG_OFF - const MARIA_SHARE *share= (MARIA_SHARE*) data_ptr; -#endif + MARIA_SHARE *share= (MARIA_SHARE*) data_ptr; DBUG_ENTER("maria_flush_log_for_page"); /* share is 0 here only in unittest */ DBUG_ASSERT(!share || (share->page_type == PAGECACHE_LSN_PAGE && @@ -365,6 +363,12 @@ my_bool maria_flush_log_for_page(uchar *page, lsn= lsn_korr(page); if (translog_flush(lsn)) DBUG_RETURN(1); + /* + Now when log is written, it's safe to incremented 'open' counter for + the table so that we know it was not closed properly. + */ + if (share && !share->global_changed) + _ma_mark_file_changed_now(share); DBUG_RETURN(0); } diff --git a/storage/maria/ma_panic.c b/storage/maria/ma_panic.c index f3380e9b68e..8ccb17af81d 100644 --- a/storage/maria/ma_panic.c +++ b/storage/maria/ma_panic.c @@ -67,8 +67,8 @@ int maria_panic(enum ha_panic_function flag) if (info->s->options & HA_OPTION_READ_ONLY_DATA) break; #endif - if (flush_pagecache_blocks(info->s->pagecache, &info->s->kfile, - FLUSH_RELEASE)) + if (_ma_flush_table_files(info, MARIA_FLUSH_DATA | MARIA_FLUSH_INDEX, + FLUSH_RELEASE, FLUSH_RELEASE)) error=my_errno; if (info->opt_flag & WRITE_CACHE_USED) if (flush_io_cache(&info->rec_cache)) @@ -92,8 +92,8 @@ int maria_panic(enum ha_panic_function flag) if (info->dfile.file >= 0 && mysql_file_close(info->dfile.file, MYF(0))) error = my_errno; info->s->kfile.file= info->dfile.file= -1;/* Files aren't open anymore */ - break; #endif + break; case HA_PANIC_READ: /* Restore to before WRITE */ #ifdef CANT_OPEN_FILES_TWICE { /* Open closed files */ diff --git a/storage/maria/ma_recovery.c b/storage/maria/ma_recovery.c index 4e1e3dd0608..d773c4fc343 100644 --- a/storage/maria/ma_recovery.c +++ b/storage/maria/ma_recovery.c @@ -1,5 +1,5 @@ /* Copyright (C) 2006, 2007 MySQL AB - Copyright (C) 2010 Monty Program Ab + Copyright (C) 2010-2011 Monty Program Ab This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -28,6 +28,7 @@ #include "trnman.h" #include "ma_key_recover.h" #include "ma_recovery_util.h" +#include "hash.h" struct st_trn_for_recovery /* used only in the REDO phase */ { @@ -58,6 +59,8 @@ static ulonglong now; /**< for tracking execution time of phases */ static void (*save_error_handler_hook)(uint, const char *,myf); static uint recovery_warnings; /**< count of warnings */ static uint recovery_found_crashed_tables; +HASH tables_to_redo; /* For maria_read_log */ +ulong maria_recovery_force_crash_counter; #define prototype_redo_exec_hook(R) \ static int exec_REDO_LOGREC_ ## R(const TRANSLOG_HEADER_BUFFER *rec) @@ -184,6 +187,21 @@ static void print_preamble() } +static my_bool table_is_part_of_recovery_set(LEX_STRING *file_name) +{ + uint offset =0; + if (!tables_to_redo.records) + return 1; /* Default, recover table */ + + /* Skip base directory */ + if (file_name->str[0] == '.' && + (file_name->str[1] == '/' || file_name->str[1] == '\\')) + offset= 2; + /* Only recover if table is in hash */ + return my_hash_search(&tables_to_redo, (uchar*) file_name->str + offset, + file_name->length - offset) != 0; +} + /** @brief Recovers from the last checkpoint. @@ -302,25 +320,32 @@ int maria_apply_log(LSN from_lsn, LSN end_lsn, skip_DDLs= skip_DDLs_arg; skipped_undo_phase= 0; + trnman_init(max_trid_in_control_file); + if (from_lsn == LSN_IMPOSSIBLE) { if (last_checkpoint_lsn == LSN_IMPOSSIBLE) { from_lsn= translog_first_lsn_in_log(); if (unlikely(from_lsn == LSN_ERROR)) + { + trnman_destroy(); goto err; + } } else { from_lsn= parse_checkpoint_record(last_checkpoint_lsn); if (from_lsn == LSN_ERROR) + { + trnman_destroy(); goto err; + } } } - now= my_getsystime(); + now= microsecond_interval_timer(); in_redo_phase= TRUE; - trnman_init(max_trid_in_control_file); if (run_redo_phase(from_lsn, end_lsn, apply)) { ma_message_no_user(0, "Redo phase failed"); @@ -349,10 +374,10 @@ int maria_apply_log(LSN from_lsn, LSN end_lsn, in_redo_phase= FALSE; old_now= now; - now= my_getsystime(); + now= microsecond_interval_timer(); if (recovery_message_printed == REC_MSG_REDO) { - double phase_took= (now - old_now)/10000000.0; + double phase_took= (now - old_now)/1000000.0; /* Detailed progress info goes to stderr, because ma_message_no_user() cannot put several messages on one line. @@ -418,10 +443,10 @@ int maria_apply_log(LSN from_lsn, LSN end_lsn, } old_now= now; - now= my_getsystime(); + now= microsecond_interval_timer(); if (recovery_message_printed == REC_MSG_UNDO) { - double phase_took= (now - old_now)/10000000.0; + double phase_took= (now - old_now)/1000000.0; procent_printed= 1; fprintf(stderr, " (%.1f seconds); ", phase_took); fflush(stderr); @@ -438,10 +463,10 @@ int maria_apply_log(LSN from_lsn, LSN end_lsn, } old_now= now; - now= my_getsystime(); + now= microsecond_interval_timer(); if (recovery_message_printed == REC_MSG_FLUSH) { - double phase_took= (now - old_now)/10000000.0; + double phase_took= (now - old_now)/1000000.0; procent_printed= 1; fprintf(stderr, " (%.1f seconds); ", phase_took); fflush(stderr); @@ -625,6 +650,7 @@ static void new_transaction(uint16 sid, TrID long_id, LSN undo_lsn, prototype_redo_exec_hook_dummy(CHECKPOINT) { /* the only checkpoint we care about was found via control file, ignore */ + tprint(tracef, "CHECKPOINT found\n"); return 0; } @@ -1276,6 +1302,22 @@ prototype_redo_exec_hook(FILE_ID) { tprint(tracef, " Closing table '%s'\n", info->s->open_file_name.str); prepare_table_for_close(info, rec->lsn); + + /* + Ensure that open count is 1 on close. This is needed as the + table may initially had an open_count > 0 when we initially + opened it as the server may have crashed without closing it + properly. As we now have applied all redo's for the table up to + now, we know the table is ok, so it's safe to reset the open + count to 0. + */ + if (info->s->state.open_count != 0 && info->s->reopen == 1) + { + /* let ma_close() mark the table properly closed */ + info->s->state.open_count= 1; + info->s->global_changed= 1; + info->s->changed= 1; + } if (maria_close(info)) { eprint(tracef, "Failed to close table"); @@ -1645,8 +1687,8 @@ prototype_redo_exec_hook(REDO_FREE_BLOCKS) } buff= log_record_buffer.str; - if (_ma_apply_redo_free_blocks(info, current_group_end_lsn, - buff + FILEID_STORE_SIZE)) + if (_ma_apply_redo_free_blocks(info, current_group_end_lsn, rec->lsn, + buff)) goto end; error= 0; end: @@ -2907,6 +2949,12 @@ static int run_undo_phase(uint uncommitted) translog_free_record_header(&rec); } + /* Force a crash to test recovery of recovery */ + if (maria_recovery_force_crash_counter) + { + DBUG_ASSERT(--maria_recovery_force_crash_counter > 0); + } + if (trnman_rollback_trn(trn)) DBUG_RETURN(1); /* We could want to span a few threads (4?) instead of 1 */ @@ -3017,10 +3065,11 @@ static MARIA_HA *get_MARIA_HA_from_REDO_record(const page= page_korr(rec->header + FILEID_STORE_SIZE); llstr(page, llbuf); break; + case LOGREC_REDO_FREE_BLOCKS: /* - For REDO_FREE_BLOCKS, no need to look at dirty pages list: it does not - read data pages, only reads/modifies bitmap page(s) which is cheap. + We are checking against the dirty pages in _ma_apply_redo_free_blocks() */ + break; default: break; } @@ -3038,6 +3087,12 @@ static MARIA_HA *get_MARIA_HA_from_REDO_record(const share= info->s; tprint(tracef, ", '%s'", share->open_file_name.str); DBUG_ASSERT(in_redo_phase); + if (!table_is_part_of_recovery_set(&share->open_file_name)) + { + tprint(tracef, ", skipped by user\n"); + return NULL; + } + if (cmp_translog_addr(rec->lsn, share->lsn_of_file_id) <= 0) { /* @@ -3071,7 +3126,6 @@ static MARIA_HA *get_MARIA_HA_from_REDO_record(const REDO_INSERT_ROW_BLOBS will consult list by itself, as it covers several pages. */ - tprint(tracef, " page %s", llbuf); if (_ma_redo_not_needed_for_page(sid, rec->lsn, page, index_page_redo_entry)) return NULL; @@ -3108,6 +3162,13 @@ static MARIA_HA *get_MARIA_HA_from_UNDO_record(const } share= info->s; tprint(tracef, ", '%s'", share->open_file_name.str); + + if (!table_is_part_of_recovery_set(&share->open_file_name)) + { + tprint(tracef, ", skipped by user\n"); + return NULL; + } + if (cmp_translog_addr(rec->lsn, share->lsn_of_file_id) <= 0) { tprint(tracef, ", table's LOGREC_FILE_ID has LSN (%lu,0x%lx) more recent" @@ -3383,13 +3444,20 @@ static int close_all_tables(void) */ if (info->s->state.open_count != 0) { - /* let ma_close() mark the table properly closed */ + /* let maria_close() mark the table properly closed */ info->s->state.open_count= 1; info->s->global_changed= 1; + info->s->changed= 1; } prepare_table_for_close(info, addr); error|= maria_close(info); mysql_mutex_lock(&THR_LOCK_maria); + + /* Force a crash to test recovery of recovery */ + if (maria_recovery_force_crash_counter) + { + DBUG_ASSERT(--maria_recovery_force_crash_counter > 0); + } } end: mysql_mutex_unlock(&THR_LOCK_maria); @@ -3464,7 +3532,7 @@ void _ma_tmp_disable_logging_for_table(MARIA_HA *info, /* Reset state pointers. This is needed as in ALTER table we may do - commit fllowed by _ma_renable_logging_for_table and then + commit followed by _ma_renable_logging_for_table and then info->state may point to a state that was deleted by _ma_trnman_end_trans_hook() */ diff --git a/storage/maria/ma_recovery.h b/storage/maria/ma_recovery.h index 0bfcdd17d39..45dba0e86b3 100644 --- a/storage/maria/ma_recovery.h +++ b/storage/maria/ma_recovery.h @@ -30,4 +30,7 @@ int maria_apply_log(LSN lsn, LSN lsn_end, enum maria_apply_log_way apply, FILE *trace_file, my_bool execute_undo_phase, my_bool skip_DDLs, my_bool take_checkpoints, uint *warnings_count); +/* Table of tables to recover */ +extern HASH tables_to_redo; +extern ulong maria_recovery_force_crash_counter; C_MODE_END diff --git a/storage/maria/ma_recovery_util.c b/storage/maria/ma_recovery_util.c index 53f3c2f94b1..57cb5724561 100644 --- a/storage/maria/ma_recovery_util.c +++ b/storage/maria/ma_recovery_util.c @@ -59,9 +59,11 @@ void tprint(FILE *trace_file __attribute__ ((unused)), va_list args; #ifndef DBUG_OFF { - char buff[1024]; + char buff[1024], *end; va_start(args, format); vsnprintf(buff, sizeof(buff)-1, format, args); + if (*(end= strend(buff)) == '\n') + *end= 0; /* Don't print end \n */ DBUG_PRINT("info", ("%s", buff)); va_end(args); } @@ -129,16 +131,20 @@ my_bool _ma_redo_not_needed_for_page(uint16 shortid, LSN lsn, Next 2 bytes: table's short id Next 5 bytes: page number */ + char llbuf[22]; uint64 file_and_page_id= (((uint64)((index << 16) | shortid)) << 40) | page; struct st_dirty_page *dirty_page= (struct st_dirty_page *) my_hash_search(&all_dirty_pages, (uchar *)&file_and_page_id, sizeof(file_and_page_id)); - DBUG_PRINT("info", ("in dirty pages list: %d", dirty_page != NULL)); + DBUG_PRINT("info", ("page %lld in dirty pages list: %d", + (ulonglong) page, + dirty_page != NULL)); if ((dirty_page == NULL) || cmp_translog_addr(lsn, dirty_page->rec_lsn) < 0) { - tprint(tracef, ", ignoring because of dirty_pages list\n"); + tprint(tracef, ", ignoring page %s because of dirty_pages list\n", + llstr((ulonglong) page, llbuf)); return TRUE; } } diff --git a/storage/maria/ma_rkey.c b/storage/maria/ma_rkey.c index 3df7f1b9941..06db57dfab7 100644 --- a/storage/maria/ma_rkey.c +++ b/storage/maria/ma_rkey.c @@ -34,7 +34,7 @@ int maria_rkey(MARIA_HA *info, uchar *buf, int inx, const uchar *key_data, HA_KEYSEG *last_used_keyseg; uint32 nextflag; MARIA_KEY key; - int icp_res= 1; + ICP_RESULT icp_res= ICP_MATCH; DBUG_ENTER("maria_rkey"); DBUG_PRINT("enter", ("base: 0x%lx buf: 0x%lx inx: %d search_flag: %d", (long) info, (long) buf, inx, search_flag)); @@ -44,7 +44,7 @@ int maria_rkey(MARIA_HA *info, uchar *buf, int inx, const uchar *key_data, info->update&= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED); info->last_key_func= search_flag; - keyinfo= share->keyinfo + inx; + keyinfo= info->last_key.keyinfo; key_buff= info->lastkey_buff+info->s->base.max_key_length; @@ -83,17 +83,17 @@ int maria_rkey(MARIA_HA *info, uchar *buf, int inx, const uchar *key_data, mysql_rwlock_rdlock(&keyinfo->root_lock); nextflag= maria_read_vec[search_flag] | key.flag; - if (search_flag != HA_READ_KEY_EXACT || - ((keyinfo->flag & (HA_NOSAME | HA_NULL_PART)) != HA_NOSAME)) + if (search_flag != HA_READ_KEY_EXACT) + { + /* Assume we will get a read next/previous call after this one */ nextflag|= SEARCH_SAVE_BUFF; - + } switch (keyinfo->key_alg) { #ifdef HAVE_RTREE_KEYS case HA_KEY_ALG_RTREE: if (maria_rtree_find_first(info, &key, nextflag) < 0) { - maria_print_error(info->s, HA_ERR_CRASHED); - my_errno= HA_ERR_CRASHED; + _ma_set_fatal_error(share, HA_ERR_CRASHED); info->cur_row.lastpos= HA_OFFSET_ERROR; } break; @@ -103,8 +103,6 @@ int maria_rkey(MARIA_HA *info, uchar *buf, int inx, const uchar *key_data, if (!_ma_search(info, &key, nextflag, info->s->state.key_root[inx])) { MARIA_KEY lastkey; - lastkey.keyinfo= keyinfo; - lastkey.data= info->lastkey_buff; /* Found a key, but it might not be usable. We cannot use rows that are inserted by other threads after we got our table lock @@ -116,7 +114,7 @@ int maria_rkey(MARIA_HA *info, uchar *buf, int inx, const uchar *key_data, not satisfied with an out-of-range condition. */ if ((*share->row_is_visible)(info) && - ((icp_res= ma_check_index_cond(info, inx, buf)) != 0)) + ((icp_res= ma_check_index_cond(info, inx, buf)) != ICP_NO_MATCH)) break; /* The key references a concurrently inserted record. */ @@ -129,6 +127,8 @@ int maria_rkey(MARIA_HA *info, uchar *buf, int inx, const uchar *key_data, break; } + lastkey.keyinfo= keyinfo; + lastkey.data= info->lastkey_buff; do { uint not_used[2]; @@ -144,6 +144,18 @@ int maria_rkey(MARIA_HA *info, uchar *buf, int inx, const uchar *key_data, if (_ma_search_next(info, &lastkey, maria_readnext_vec[search_flag], info->s->state.key_root[inx])) break; /* purecov: inspected */ + + /* + If we are at the last key on the key page, allow writers to + access the index. + */ + if (info->int_keypos >= info->int_maxpos && + ma_yield_and_check_if_killed(info, inx)) + { + DBUG_ASSERT(info->cur_row.lastpos == HA_OFFSET_ERROR); + break; + } + /* Check that the found key does still match the search. _ma_search_next() delivers the next key regardless of its @@ -163,15 +175,19 @@ int maria_rkey(MARIA_HA *info, uchar *buf, int inx, const uchar *key_data, } while (!(*share->row_is_visible)(info) || ((icp_res= ma_check_index_cond(info, inx, buf)) == 0)); } + else + { + DBUG_ASSERT(info->cur_row.lastpos); + } } if (share->lock_key_trees) mysql_rwlock_unlock(&keyinfo->root_lock); - if (info->cur_row.lastpos == HA_OFFSET_ERROR || (icp_res != 1)) + if (info->cur_row.lastpos == HA_OFFSET_ERROR) { - if (icp_res == 2) + if (icp_res == ICP_OUT_OF_RANGE) { - info->cur_row.lastpos= HA_OFFSET_ERROR; + /* We don't want HA_ERR_END_OF_FILE in this particular case */ my_errno= HA_ERR_KEY_NOT_FOUND; } fast_ma_writeinfo(info); @@ -213,3 +229,37 @@ err: info->update|=HA_STATE_NEXT_FOUND; /* Previous gives last row */ DBUG_RETURN(my_errno); } /* _ma_rkey */ + + +/* + Yield to possible other writers during a index scan. + Check also if we got killed by the user and if yes, return + HA_ERR_LOCK_WAIT_TIMEOUT + + return 0 ok + return 1 Query has been requested to be killed +*/ + +my_bool ma_yield_and_check_if_killed(MARIA_HA *info, int inx) +{ + MARIA_SHARE *share; + if (ma_killed(info)) + { + /* purecov: begin tested */ + /* Mark that we don't have an active row */ + info->cur_row.lastpos= HA_OFFSET_ERROR; + /* Set error that we where aborted by kill from application */ + my_errno= HA_ERR_ABORTED_BY_USER; + return 1; + /* purecov: end */ + } + + if ((share= info->s)->lock_key_trees) + { + /* Give writers a chance to access index */ + mysql_rwlock_unlock(&share->keyinfo[inx].root_lock); + mysql_rwlock_rdlock(&share->keyinfo[inx].root_lock); + } + return 0; +} + diff --git a/storage/maria/ma_rnext.c b/storage/maria/ma_rnext.c index 9142921dbb5..d3fab041d75 100644 --- a/storage/maria/ma_rnext.c +++ b/storage/maria/ma_rnext.c @@ -30,7 +30,8 @@ int maria_rnext(MARIA_HA *info, uchar *buf, int inx) uint flag; MARIA_SHARE *share= info->s; MARIA_KEYDEF *keyinfo; - int icp_res= 1; + ICP_RESULT icp_res= ICP_MATCH; + uint update_mask= HA_STATE_NEXT_FOUND; DBUG_ENTER("maria_rnext"); if ((inx = _ma_check_index(info,inx)) < 0) @@ -62,6 +63,20 @@ int maria_rnext(MARIA_HA *info, uchar *buf, int inx) error= _ma_search_first(info, keyinfo, share->state.key_root[inx]); break; } + /* + "search first" failed. This means we have no pivot for + "search next", or in other words MI_INFO::lastkey is + likely uninitialized. + + Normally SQL layer would never request "search next" if + "search first" failed. But HANDLER may do anything. + + As mi_rnext() without preceeding mi_rkey()/mi_rfirst() + equals to mi_rfirst(), we must restore original state + as if failing mi_rfirst() was not called. + */ + if (error) + update_mask|= HA_STATE_PREV_FOUND; } else { @@ -92,8 +107,20 @@ int maria_rnext(MARIA_HA *info, uchar *buf, int inx) if (!error) { while (!(*share->row_is_visible)(info) || - ((icp_res= ma_check_index_cond(info, inx, buf)) == 0)) + ((icp_res= ma_check_index_cond(info, inx, buf)) == ICP_NO_MATCH)) { + /* + If we are at the last key on the key page, allow writers to + access the index. + */ + if (info->int_keypos >= info->int_maxpos && + ma_yield_and_check_if_killed(info, inx)) + { + /* my_errno is set by ma_yield_and_check_if_killed() */ + error= 1; + break; + } + /* Skip rows inserted by other threads since we got a lock */ if ((error= _ma_search_next(info, &info->last_key, SEARCH_BIGGER, @@ -106,18 +133,17 @@ int maria_rnext(MARIA_HA *info, uchar *buf, int inx) /* Don't clear if database-changed */ info->update&= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED); - info->update|= HA_STATE_NEXT_FOUND; + info->update|= update_mask; - if (icp_res == 2) - my_errno=HA_ERR_END_OF_FILE; /* got beyond the end of scanned range */ - - if (error || icp_res != 1) + if (error || icp_res != ICP_MATCH) { + fast_ma_writeinfo(info); if (my_errno == HA_ERR_KEY_NOT_FOUND) - my_errno=HA_ERR_END_OF_FILE; + my_errno= HA_ERR_END_OF_FILE; } else if (!buf) { + fast_ma_writeinfo(info); DBUG_RETURN(info->cur_row.lastpos == HA_OFFSET_ERROR ? my_errno : 0); } else if (!(*info->read_record)(info, buf, info->cur_row.lastpos)) diff --git a/storage/maria/ma_rnext_same.c b/storage/maria/ma_rnext_same.c index 5822e8787e1..353d06adaf4 100644 --- a/storage/maria/ma_rnext_same.c +++ b/storage/maria/ma_rnext_same.c @@ -30,7 +30,7 @@ int maria_rnext_same(MARIA_HA *info, uchar *buf) int error; uint inx,not_used[2]; MARIA_KEYDEF *keyinfo; - int icp_res= 1; + ICP_RESULT icp_res= ICP_MATCH; DBUG_ENTER("maria_rnext_same"); if ((int) (inx= info->lastinx) < 0 || @@ -80,9 +80,19 @@ int maria_rnext_same(MARIA_HA *info, uchar *buf) info->cur_row.lastpos= HA_OFFSET_ERROR; break; } + /* + If we are at the last key on the key page, allow writers to + access the index. + */ + if (info->int_keypos >= info->int_maxpos && + ma_yield_and_check_if_killed(info, inx)) + { + error= 1; + break; + } /* Skip rows that are inserted by other threads since we got a lock */ if ((info->s->row_is_visible)(info) && - ((icp_res= ma_check_index_cond(info, inx, buf)) != 0)) + ((icp_res= ma_check_index_cond(info, inx, buf)) != ICP_NO_MATCH)) break; } } @@ -92,16 +102,15 @@ int maria_rnext_same(MARIA_HA *info, uchar *buf) info->update&= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED); info->update|= HA_STATE_NEXT_FOUND | HA_STATE_RNEXT_SAME; - if (icp_res == 2) - my_errno=HA_ERR_END_OF_FILE; /* got beyond the end of scanned range */ - - if (error || icp_res != 1) + if (error || icp_res != ICP_MATCH) { + fast_ma_writeinfo(info); if (my_errno == HA_ERR_KEY_NOT_FOUND) - my_errno=HA_ERR_END_OF_FILE; + my_errno= HA_ERR_END_OF_FILE; } else if (!buf) { + fast_ma_writeinfo(info); DBUG_RETURN(info->cur_row.lastpos == HA_OFFSET_ERROR ? my_errno : 0); } else if (!(*info->read_record)(info, buf, info->cur_row.lastpos)) diff --git a/storage/maria/ma_rprev.c b/storage/maria/ma_rprev.c index f64e875c2ba..f4d25c0f676 100644 --- a/storage/maria/ma_rprev.c +++ b/storage/maria/ma_rprev.c @@ -28,6 +28,7 @@ int maria_rprev(MARIA_HA *info, uchar *buf, int inx) register uint flag; MARIA_SHARE *share= info->s; MARIA_KEYDEF *keyinfo; + ICP_RESULT icp_res= ICP_MATCH; DBUG_ENTER("maria_rprev"); if ((inx = _ma_check_index(info,inx)) < 0) @@ -55,8 +56,24 @@ int maria_rprev(MARIA_HA *info, uchar *buf, int inx) if (!error) { - while (!(*share->row_is_visible)(info)) + my_off_t cur_keypage= info->last_keypage; + while (!(*share->row_is_visible)(info) || + ((icp_res= ma_check_index_cond(info, inx, buf)) == ICP_NO_MATCH)) { + /* + If we are at the last (i.e. first?) key on the key page, + allow writers to access the index. + */ + if (info->last_keypage != cur_keypage) + { + cur_keypage= info->last_keypage; + if (ma_yield_and_check_if_killed(info, inx)) + { + error= 1; + break; + } + } + /* Skip rows that are inserted by other threads since we got a lock */ if ((error= _ma_search_next(info, &info->last_key, SEARCH_SMALLER, @@ -68,13 +85,16 @@ int maria_rprev(MARIA_HA *info, uchar *buf, int inx) mysql_rwlock_unlock(&keyinfo->root_lock); info->update&= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED); info->update|= HA_STATE_PREV_FOUND; - if (error) + + if (error || icp_res != ICP_MATCH) { + fast_ma_writeinfo(info); if (my_errno == HA_ERR_KEY_NOT_FOUND) - my_errno=HA_ERR_END_OF_FILE; + my_errno= HA_ERR_END_OF_FILE; } else if (!buf) { + fast_ma_writeinfo(info); DBUG_RETURN(info->cur_row.lastpos == HA_OFFSET_ERROR ? my_errno : 0); } else if (!(*info->read_record)(info, buf, info->cur_row.lastpos)) diff --git a/storage/maria/ma_rsame.c b/storage/maria/ma_rsame.c index c20faf965b4..0f29cb71370 100644 --- a/storage/maria/ma_rsame.c +++ b/storage/maria/ma_rsame.c @@ -19,7 +19,7 @@ Find current row with read on position or read on key @notes - If inx >= 0 find record using key + If inx >= 0 find record using key else re-read row on last position @warning This function is not row version safe. @@ -29,6 +29,7 @@ @retval 0 Ok @retval HA_ERR_KEY_NOT_FOUND Row is deleted @retval HA_ERR_END_OF_FILE End of file + @retval HA_ERR_WRONG_INDEX Wrong inx argument */ @@ -36,10 +37,10 @@ int maria_rsame(MARIA_HA *info, uchar *record, int inx) { DBUG_ENTER("maria_rsame"); - if (inx != -1 && ! maria_is_key_active(info->s->state.key_map, inx)) + if (inx >= 0 && _ma_check_index(info, inx) < 0) { DBUG_PRINT("error", ("wrong index usage")); - DBUG_RETURN(my_errno=HA_ERR_WRONG_INDEX); + DBUG_RETURN(my_errno); } if (info->cur_row.lastpos == HA_OFFSET_ERROR || info->update & HA_STATE_DELETED) @@ -55,8 +56,7 @@ int maria_rsame(MARIA_HA *info, uchar *record, int inx) if (inx >= 0) { - MARIA_KEYDEF *keyinfo= info->s->keyinfo + inx; - info->lastinx= inx; + MARIA_KEYDEF *keyinfo= info->last_key.keyinfo; (*keyinfo->make_key)(info, &info->last_key, (uint) inx, info->lastkey_buff, record, info->cur_row.lastpos, diff --git a/storage/maria/ma_rt_index.c b/storage/maria/ma_rt_index.c index 8feac7711e2..2c2090bf343 100644 --- a/storage/maria/ma_rt_index.c +++ b/storage/maria/ma_rt_index.c @@ -134,7 +134,6 @@ static int maria_rtree_find_req(MARIA_HA *info, MARIA_KEYDEF *keyinfo, tmp_key.data_length= key_data_length; info->cur_row.lastpos= _ma_row_pos_from_key(&tmp_key); - info->last_key.keyinfo= keyinfo; info->last_key.data_length= key_data_length; info->last_key.ref_length= share->base.rec_reflength; info->last_key.flag= 0; diff --git a/storage/maria/ma_rt_split.c b/storage/maria/ma_rt_split.c index 856edc60490..6f32a60c073 100644 --- a/storage/maria/ma_rt_split.c +++ b/storage/maria/ma_rt_split.c @@ -544,8 +544,7 @@ int maria_rtree_split_page(const MARIA_KEY *key, MARIA_PAGE *page, } DBUG_PRINT("rtree", ("split new block: %lu", (ulong) *new_page_offs)); - my_afree(new_page); - + my_afree(new_page_buff); split_err: my_afree(coord_buf); DBUG_RETURN(err_code); diff --git a/storage/maria/ma_rt_test.c b/storage/maria/ma_rt_test.c index 4c0ffcf72b1..29244bab6ce 100644 --- a/storage/maria/ma_rt_test.c +++ b/storage/maria/ma_rt_test.c @@ -93,9 +93,10 @@ static enum data_file_type record_type= DYNAMIC_RECORD; int main(int argc, char *argv[]) { + char buff[FN_REFLEN]; MY_INIT(argv[0]); - get_options(argc, argv); maria_data_root= (char *)"."; + get_options(argc, argv); /* Maria requires that we always have a page cache */ if (maria_init() || (init_pagecache(maria_pagecache, maria_block_size * 16, 0, 0, @@ -113,7 +114,7 @@ int main(int argc, char *argv[]) exit(1); } - exit(run_test("rt_test")); + exit(run_test(fn_format(buff, "test1", maria_data_root, "", MYF(0)))); } @@ -614,6 +615,8 @@ static struct my_option my_long_options[] = #endif {"help", '?', "Display help and exit", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"datadir", 'h', "Path to the database root.", &maria_data_root, + &maria_data_root, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, {"row-fixed-size", 'S', "Fixed size records", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, {"rows-in-block", 'M', "Store rows in block format", diff --git a/storage/maria/ma_search.c b/storage/maria/ma_search.c index 9e5513f388b..5dd0296e17b 100644 --- a/storage/maria/ma_search.c +++ b/storage/maria/ma_search.c @@ -38,12 +38,18 @@ int _ma_check_index(MARIA_HA *info, int inx) if (info->lastinx != inx) /* Index changed */ { info->lastinx = inx; + info->last_key.keyinfo= info->s->keyinfo + inx; + info->last_key.flag= 0; info->page_changed=1; info->update= ((info->update & (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED)) | HA_STATE_NEXT_FOUND | HA_STATE_PREV_FOUND); } - if (info->opt_flag & WRITE_CACHE_USED && flush_io_cache(&info->rec_cache)) + if ((info->opt_flag & WRITE_CACHE_USED) && flush_io_cache(&info->rec_cache)) + { + if (unlikely(!my_errno)) + my_errno= HA_ERR_INTERNAL_ERROR; /* Impossible */ return(-1); + } return(inx); } /* _ma_check_index */ @@ -95,6 +101,7 @@ int _ma_search(register MARIA_HA *info, MARIA_KEY *key, uint32 nextflag, @note Position to row is stored in info->lastpos + Last used key is stored in info->last_key @return @retval 0 ok (key found) @@ -120,6 +127,7 @@ static int _ma_search_no_save(register MARIA_HA *info, MARIA_KEY *key, (ulong) (pos / info->s->block_size), nextflag, (ulong) info->cur_row.lastpos)); DBUG_EXECUTE("key", _ma_print_key(DBUG_FILE, key);); + DBUG_ASSERT(info->last_key.keyinfo == key->keyinfo); if (pos == HA_OFFSET_ERROR) { @@ -141,7 +149,11 @@ static int _ma_search_no_save(register MARIA_HA *info, MARIA_KEY *key, flag= (*keyinfo->bin_search)(key, &page, nextflag, &keypos, lastkey, &last_key_not_used); if (flag == MARIA_FOUND_WRONG_KEY) - DBUG_RETURN(-1); + { + maria_print_error(info->s, HA_ERR_CRASHED); + my_errno= HA_ERR_CRASHED; + goto err; + } page_flag= page.flag; used_length= page.size; nod_flag= page.node; @@ -180,7 +192,6 @@ static int _ma_search_no_save(register MARIA_HA *info, MARIA_KEY *key, } } - info->last_key.keyinfo= keyinfo; if ((nextflag & (SEARCH_SMALLER | SEARCH_LAST)) && flag != 0) { uint not_used[2]; @@ -372,8 +383,7 @@ int _ma_seq_search(const MARIA_KEY *key, const MARIA_PAGE *ma_page, length=(*keyinfo->get_key)(&tmp_key, page_flag, nod_flag, &page); if (length == 0 || page > end) { - maria_print_error(share, HA_ERR_CRASHED); - my_errno=HA_ERR_CRASHED; + _ma_set_fatal_error(share, HA_ERR_CRASHED); DBUG_PRINT("error", ("Found wrong key: length: %u page: 0x%lx end: 0x%lx", length, (long) page, (long) end)); @@ -555,8 +565,7 @@ int _ma_prefix_search(const MARIA_KEY *key, const MARIA_PAGE *ma_page, if (page > end) { - maria_print_error(share, HA_ERR_CRASHED); - my_errno=HA_ERR_CRASHED; + _ma_set_fatal_error(share, HA_ERR_CRASHED); DBUG_PRINT("error", ("Found wrong key: length: %u page: 0x%lx end: %lx", length, (long) page, (long) end)); @@ -785,6 +794,7 @@ MARIA_RECORD_POS _ma_row_pos_from_key(const MARIA_KEY *key) case 4: pos= (my_off_t) mi_uint4korr(after_key); break; case 3: pos= (my_off_t) mi_uint3korr(after_key); break; case 2: pos= (my_off_t) mi_uint2korr(after_key); break; + case 0: /* NO_RECORD */ default: pos=0L; /* Shut compiler up */ } @@ -894,6 +904,7 @@ void _ma_dpointer(MARIA_SHARE *share, uchar *buff, my_off_t pos) case 4: mi_int4store(buff,pos); break; case 3: mi_int3store(buff,pos); break; case 2: mi_int2store(buff,(uint) pos); break; + case 0: break; /* For NO_RECORD */ default: abort(); /* Impossible */ } } /* _ma_dpointer */ @@ -1036,8 +1047,7 @@ uint _ma_get_pack_key(MARIA_KEY *int_key, uint page_flag, { if (length > (uint) keyseg->length) { - maria_print_error(keyinfo->share, HA_ERR_CRASHED); - my_errno=HA_ERR_CRASHED; + _ma_set_fatal_error(keyinfo->share, HA_ERR_CRASHED); return 0; /* Error */ } if (length == 0) /* Same key */ @@ -1052,8 +1062,7 @@ uint _ma_get_pack_key(MARIA_KEY *int_key, uint page_flag, ("Found too long null packed key: %u of %u at 0x%lx", length, keyseg->length, (long) *page_pos)); DBUG_DUMP("key", *page_pos, 16); - maria_print_error(keyinfo->share, HA_ERR_CRASHED); - my_errno=HA_ERR_CRASHED; + _ma_set_fatal_error(keyinfo->share, HA_ERR_CRASHED); return 0; } continue; @@ -1110,8 +1119,7 @@ uint _ma_get_pack_key(MARIA_KEY *int_key, uint page_flag, DBUG_PRINT("error",("Found too long packed key: %u of %u at 0x%lx", length, keyseg->length, (long) *page_pos)); DBUG_DUMP("key", *page_pos, 16); - maria_print_error(keyinfo->share, HA_ERR_CRASHED); - my_errno=HA_ERR_CRASHED; + _ma_set_fatal_error(keyinfo->share, HA_ERR_CRASHED); return 0; /* Error */ } store_key_length_inc(key,length); @@ -1270,8 +1278,7 @@ uint _ma_get_binary_pack_key(MARIA_KEY *int_key, uint page_flag, uint nod_flag, ("Found too long binary packed key: %u of %u at 0x%lx", length, keyinfo->maxlength, (long) *page_pos)); DBUG_DUMP("key", *page_pos, 16); - maria_print_error(keyinfo->share, HA_ERR_CRASHED); - my_errno=HA_ERR_CRASHED; + _ma_set_fatal_error(keyinfo->share, HA_ERR_CRASHED); DBUG_RETURN(0); /* Wrong key */ } /* Key is packed against prev key, take prefix from prev key. */ @@ -1362,8 +1369,7 @@ uint _ma_get_binary_pack_key(MARIA_KEY *int_key, uint page_flag, uint nod_flag, if (from_end != page_end) { DBUG_PRINT("error",("Error when unpacking key")); - maria_print_error(keyinfo->share, HA_ERR_CRASHED); - my_errno=HA_ERR_CRASHED; + _ma_set_fatal_error(keyinfo->share, HA_ERR_CRASHED); DBUG_RETURN(0); /* Error */ } } @@ -1449,8 +1455,7 @@ uchar *_ma_get_key(MARIA_KEY *key, MARIA_PAGE *ma_page, uchar *keypos) { if (!(*keyinfo->get_key)(key, page_flag, nod_flag, &page)) { - maria_print_error(keyinfo->share, HA_ERR_CRASHED); - my_errno=HA_ERR_CRASHED; + _ma_set_fatal_error(keyinfo->share, HA_ERR_CRASHED); DBUG_RETURN(0); } } @@ -1500,8 +1505,7 @@ static my_bool _ma_get_prev_key(MARIA_KEY *key, MARIA_PAGE *ma_page, { if (! (*keyinfo->get_key)(key, page_flag, nod_flag, &page)) { - maria_print_error(keyinfo->share, HA_ERR_CRASHED); - my_errno=HA_ERR_CRASHED; + _ma_set_fatal_error(keyinfo->share, HA_ERR_CRASHED); DBUG_RETURN(1); } } @@ -1554,8 +1558,7 @@ uchar *_ma_get_last_key(MARIA_KEY *key, MARIA_PAGE *ma_page, uchar *endpos) { DBUG_PRINT("error",("Couldn't find last key: page: 0x%lx", (long) page)); - maria_print_error(keyinfo->share, HA_ERR_CRASHED); - my_errno=HA_ERR_CRASHED; + _ma_set_fatal_error(keyinfo->share, HA_ERR_CRASHED); DBUG_RETURN(0); } } @@ -1696,7 +1699,7 @@ int _ma_search_next(register MARIA_HA *info, MARIA_KEY *key, } tmp_key.data= lastkey; - info->last_key.keyinfo= tmp_key.keyinfo= keyinfo; + tmp_key.keyinfo= keyinfo; if (nextflag & SEARCH_BIGGER) /* Next key */ { @@ -1778,8 +1781,6 @@ int _ma_search_first(MARIA_HA *info, MARIA_KEYDEF *keyinfo, first_pos= page.buff + share->keypage_header + page.node; } while ((pos= _ma_kpos(page.node, first_pos)) != HA_OFFSET_ERROR); - info->last_key.keyinfo= keyinfo; - if (!(*keyinfo->get_key)(&info->last_key, page.flag, page.node, &first_pos)) DBUG_RETURN(-1); /* Crashed */ @@ -1830,8 +1831,6 @@ int _ma_search_last(MARIA_HA *info, MARIA_KEYDEF *keyinfo, end_of_page= page.buff + page.size; } while ((pos= _ma_kpos(page.node, end_of_page)) != HA_OFFSET_ERROR); - info->last_key.keyinfo= keyinfo; - if (!_ma_get_last_key(&info->last_key, &page, end_of_page)) DBUG_RETURN(-1); info->cur_row.lastpos= _ma_row_pos_from_key(&info->last_key); diff --git a/storage/maria/ma_sort.c b/storage/maria/ma_sort.c index 0a4259d0cb3..88e82d647a8 100644 --- a/storage/maria/ma_sort.c +++ b/storage/maria/ma_sort.c @@ -191,6 +191,9 @@ int _ma_create_index_by_sort(MARIA_SORT_PARAM *info, my_bool no_messages, &tempfile,&tempfile_for_exceptions)) == HA_POS_ERROR) goto err; /* purecov: tested */ + + info->sort_info->param->stage++; /* Merge stage */ + if (maxbuffer == 0) { if (!no_messages) @@ -275,12 +278,13 @@ static ha_rows find_all_keys(MARIA_SORT_PARAM *info, uint keys, idx=error=0; sort_keys[0]= (uchar*) (sort_keys+keys); + info->sort_info->info->in_check_table= 1; while (!(error=(*info->key_read)(info,sort_keys[idx]))) { if (info->real_key_length > info->key_length) { if (write_key(info,sort_keys[idx],tempfile_for_exceptions)) - DBUG_RETURN(HA_POS_ERROR); /* purecov: inspected */ + goto err; /* purecov: inspected */ continue; } @@ -289,7 +293,7 @@ static ha_rows find_all_keys(MARIA_SORT_PARAM *info, uint keys, if (info->write_keys(info,sort_keys,idx-1, (BUFFPEK *)alloc_dynamic(buffpek), tempfile)) - DBUG_RETURN(HA_POS_ERROR); /* purecov: inspected */ + goto err; /* purecov: inspected */ sort_keys[0]=(uchar*) (sort_keys+keys); memcpy(sort_keys[0],sort_keys[idx-1],(size_t) info->key_length); @@ -298,18 +302,23 @@ static ha_rows find_all_keys(MARIA_SORT_PARAM *info, uint keys, sort_keys[idx]=sort_keys[idx-1]+info->key_length; } if (error > 0) - DBUG_RETURN(HA_POS_ERROR); /* Aborted by get_key */ /* purecov: inspected */ + goto err; /* purecov: inspected */ if (buffpek->elements) { if (info->write_keys(info,sort_keys,idx,(BUFFPEK *)alloc_dynamic(buffpek), tempfile)) - DBUG_RETURN(HA_POS_ERROR); /* purecov: inspected */ + goto err; /* purecov: inspected */ *maxbuffer=buffpek->elements-1; } else *maxbuffer=0; + info->sort_info->info->in_check_table= 0; DBUG_RETURN((*maxbuffer)*(keys-1)+idx); + +err: + info->sort_info->info->in_check_table= 0; /* purecov: inspected */ + DBUG_RETURN(HA_POS_ERROR); /* purecov: inspected */ } /* find_all_keys */ @@ -761,6 +770,8 @@ static int write_index(MARIA_SORT_PARAM *info, if ((*info->key_write)(info, *sort_keys++)) DBUG_RETURN(-1); /* purecov: inspected */ } + if (info->sort_info->param->max_stage != 1) /* If not parallel */ + _ma_report_progress(info->sort_info->param, 1, 1); DBUG_RETURN(0); } /* write_index */ @@ -771,7 +782,7 @@ static int merge_many_buff(MARIA_SORT_PARAM *info, uint keys, uchar **sort_keys, BUFFPEK *buffpek, int *maxbuffer, IO_CACHE *t_file) { - register int i; + int tmp, merges, max_merges; IO_CACHE t_file2, *from_file, *to_file, *temp; BUFFPEK *lastbuff; DBUG_ENTER("merge_many_buff"); @@ -783,9 +794,21 @@ static int merge_many_buff(MARIA_SORT_PARAM *info, uint keys, DISK_BUFFER_SIZE, info->sort_info->param->myf_rw)) DBUG_RETURN(1); /* purecov: inspected */ + /* Calculate how many merges are needed */ + max_merges= 1; /* Count merge_index */ + tmp= *maxbuffer; + while (tmp >= MERGEBUFF2) + { + merges= (tmp-MERGEBUFF*3/2 + 1) / MERGEBUFF + 1; + max_merges+= merges; + tmp= merges; + } + merges= 0; + from_file= t_file ; to_file= &t_file2; while (*maxbuffer >= MERGEBUFF2) { + int i; reinit_io_cache(from_file,READ_CACHE,0L,0,0); reinit_io_cache(to_file,WRITE_CACHE,0L,0,0); lastbuff=buffpek; @@ -794,6 +817,8 @@ static int merge_many_buff(MARIA_SORT_PARAM *info, uint keys, if (merge_buffers(info,keys,from_file,to_file,sort_keys,lastbuff++, buffpek+i,buffpek+i+MERGEBUFF-1)) goto cleanup; + if (info->sort_info->param->max_stage != 1) /* If not parallel */ + _ma_report_progress(info->sort_info->param, merges++, max_merges); } if (merge_buffers(info,keys,from_file,to_file,sort_keys,lastbuff++, buffpek+i,buffpek+ *maxbuffer)) @@ -802,6 +827,8 @@ static int merge_many_buff(MARIA_SORT_PARAM *info, uint keys, break; /* purecov: inspected */ temp=from_file; from_file=to_file; to_file=temp; *maxbuffer= (int) (lastbuff-buffpek)-1; + if (info->sort_info->param->max_stage != 1) /* If not parallel */ + _ma_report_progress(info->sort_info->param, merges++, max_merges); } cleanup: close_cached_file(to_file); /* This holds old result */ @@ -1058,6 +1085,8 @@ merge_index(MARIA_SORT_PARAM *info, uint keys, uchar **sort_keys, if (merge_buffers(info,keys,tempfile,(IO_CACHE*) 0,sort_keys,buffpek,buffpek, buffpek+maxbuffer)) DBUG_RETURN(1); /* purecov: inspected */ + if (info->sort_info->param->max_stage != 1) /* If not parallel */ + _ma_report_progress(info->sort_info->param, 1, 1); DBUG_RETURN(0); } /* merge_index */ diff --git a/storage/maria/ma_static.c b/storage/maria/ma_static.c index 19f7cfa4ea2..a075459d389 100644 --- a/storage/maria/ma_static.c +++ b/storage/maria/ma_static.c @@ -38,12 +38,17 @@ my_bool maria_delay_key_write= 0, maria_page_checksums= 1; my_bool maria_inited= FALSE; my_bool maria_in_ha_maria= FALSE; /* If used from ha_maria or not */ my_bool maria_recovery_changed_data= 0, maria_recovery_verbose= 0; +my_bool maria_assert_if_crashed_table= 0; +my_bool maria_checkpoint_disabled= 0; + mysql_mutex_t THR_LOCK_maria; #ifdef DONT_USE_RW_LOCKS ulong maria_concurrent_insert= 0; #else +/* Do concurrent inserts at file end or in old holes */ ulong maria_concurrent_insert= 2; #endif + my_off_t maria_max_temp_length= MAX_FILE_SIZE; ulong maria_bulk_insert_tree_size=8192*1024; ulong maria_data_pointer_size= 4; @@ -107,6 +112,7 @@ static int always_valid(const char *filename __attribute__((unused))) } int (*maria_test_invalid_symlink)(const char *filename)= always_valid; +my_bool (*ma_killed)(MARIA_HA *)= ma_killed_standalone; #ifdef HAVE_PSI_INTERFACE @@ -138,7 +144,6 @@ PSI_thread_key key_thread_checkpoint, key_thread_find_all_keys, key_thread_soft_sync; PSI_file_key key_file_translog, key_file_kfile, key_file_dfile, - key_file_control; + key_file_control, key_file_tmp; #endif /* HAVE_PSI_INTERFACE */ - diff --git a/storage/maria/ma_statrec.c b/storage/maria/ma_statrec.c index e085821b9d0..89a5a30f490 100644 --- a/storage/maria/ma_statrec.c +++ b/storage/maria/ma_statrec.c @@ -294,6 +294,6 @@ int _ma_read_rnd_static_record(MARIA_HA *info, uchar *buf, } /* my_errno should be set if rec_cache.error == -1 */ if (info->rec_cache.error != -1 || my_errno == 0) - my_errno=HA_ERR_WRONG_IN_RECORD; + _ma_set_fatal_error(share, HA_ERR_WRONG_IN_RECORD); DBUG_RETURN(my_errno); /* Something wrong (EOF?) */ } diff --git a/storage/maria/ma_test1.c b/storage/maria/ma_test1.c index cb83116a7cd..945654a0bbe 100644 --- a/storage/maria/ma_test1.c +++ b/storage/maria/ma_test1.c @@ -70,12 +70,13 @@ extern int _ma_flush_table_files(MARIA_HA *info, uint flush_data_or_index, int main(int argc,char *argv[]) { + char buff[FN_REFLEN]; #ifdef SAFE_MUTEX safe_mutex_deadlock_detector= 1; #endif MY_INIT(argv[0]); - get_options(argc,argv); maria_data_root= (char *)"."; + get_options(argc,argv); /* Maria requires that we always have a page cache */ if (maria_init() || (init_pagecache(maria_pagecache, maria_block_size * 16, 0, 0, @@ -95,7 +96,7 @@ int main(int argc,char *argv[]) if (opt_versioning) init_thr_lock(); - exit(run_test("test1")); + exit(run_test(fn_format(buff, "test1", maria_data_root, "", MYF(0)))); } @@ -409,6 +410,10 @@ static int run_test(const char *filename) if (!silent) printf("- Reading rows with key\n"); record[1]= 0; /* For nicer printf */ + + if (record_type == NO_RECORD) + maria_extra(file, HA_EXTRA_KEYREAD, 0); + for (i=0 ; i <= 25 ; i++) { create_key(key,i); @@ -422,9 +427,15 @@ static int run_test(const char *filename) (int) key_length,key+offset_to_key,error,my_errno,record+1); } } + if (record_type == NO_RECORD) + { + maria_extra(file, HA_EXTRA_NO_KEYREAD, 0); + goto end; + } if (!silent) printf("- Reading rows with position\n"); + if (maria_scan_init(file)) { fprintf(stderr, "maria_scan_init failed\n"); @@ -724,6 +735,8 @@ static struct my_option my_long_options[] = {"debug", '#', "Undocumented", 0, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, #endif + {"datadir", 'h', "Path to the database root.", &maria_data_root, + &maria_data_root, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, {"delete-rows", 'd', "Abort after this many rows has been deleted", (uchar**) &remove_count, (uchar**) &remove_count, 0, GET_UINT, REQUIRED_ARG, 1000, 0, 0, 0, 0, 0}, @@ -757,6 +770,8 @@ static struct my_option my_long_options[] = 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, {"rows-in-block", 'M', "Store rows in block format", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"rows-no-data", 'n', "Don't store any data, only keys", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, {"row-pointer-size", 'R', "Undocumented", (uchar**) &rec_pointer_size, (uchar**) &rec_pointer_size, 0, GET_INT, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, {"silent", 's', "Undocumented", @@ -816,6 +831,9 @@ get_one_option(int optid, const struct my_option *opt __attribute__((unused)), case 'M': record_type= BLOCK_RECORD; break; + case 'n': + record_type= NO_RECORD; + break; case 'S': if (key_field == FIELD_VARCHAR) { @@ -887,6 +905,10 @@ static void get_options(int argc, char *argv[]) exit(ho_error); if (transactional) record_type= BLOCK_RECORD; + if (record_type == NO_RECORD) + skip_update= skip_delete= 1; + + return; } /* get options */ diff --git a/storage/maria/ma_test2.c b/storage/maria/ma_test2.c index 5fa27d331ba..5d0882f3fcb 100644 --- a/storage/maria/ma_test2.c +++ b/storage/maria/ma_test2.c @@ -69,24 +69,25 @@ int main(int argc, char *argv[]) MARIA_KEYDEF keyinfo[10]; MARIA_COLUMNDEF recinfo[10]; MARIA_INFO info; - const char *filename; char *blob_buffer; MARIA_CREATE_INFO create_info; + char filename[FN_REFLEN]; #ifdef SAFE_MUTEX safe_mutex_deadlock_detector= 1; #endif MY_INIT(argv[0]); - filename= "test2"; + maria_data_root= (char *)"."; get_options(argc,argv); + fn_format(filename, "test2", maria_data_root, "", MYF(0)); + if (! async_io) my_disable_async_io=1; /* If we sync or not have no affect on this test */ my_disable_sync= 1; - maria_data_root= (char *)"."; /* Maria requires that we always have a page cache */ if (maria_init() || (init_pagecache(maria_pagecache, pagecache_size, 0, 0, @@ -1101,6 +1102,9 @@ static void get_options(int argc, char **argv) case 'H': checkpoint= atoi(++pos); break; + case 'h': + maria_data_root= ++pos; + break; case 'k': if ((keys=(uint) atoi(++pos)) < 1 || keys > (uint) (MARIA_KEYS-first_key)) diff --git a/storage/maria/ma_unique.c b/storage/maria/ma_unique.c index d9f8306488e..ef7aec86834 100644 --- a/storage/maria/ma_unique.c +++ b/storage/maria/ma_unique.c @@ -34,6 +34,7 @@ my_bool _ma_check_unique(MARIA_HA *info, MARIA_UNIQUEDEF *def, uchar *record, MARIA_KEYDEF *keyinfo= &info->s->keyinfo[def->key]; uchar *key_buff= info->lastkey_buff2; MARIA_KEY key; + int error= 0; DBUG_ENTER("_ma_check_unique"); DBUG_PRINT("enter",("unique_hash: %lu", (ulong) unique_hash)); @@ -44,12 +45,19 @@ my_bool _ma_check_unique(MARIA_HA *info, MARIA_UNIQUEDEF *def, uchar *record, /* The above changed info->lastkey_buff2. Inform maria_rnext_same(). */ info->update&= ~HA_STATE_RNEXT_SAME; + /* Setup that unique key is active key */ + info->last_key.keyinfo= keyinfo; + + /* any key pointer in data is destroyed */ + info->lastinx= ~0; + DBUG_ASSERT(key.data_length == MARIA_UNIQUE_HASH_LENGTH); - if (_ma_search(info, &key, SEARCH_FIND, info->s->state.key_root[def->key])) + if (_ma_search(info, &key, SEARCH_FIND | SEARCH_SAVE_BUFF, + info->s->state.key_root[def->key])) { info->page_changed=1; /* Can't optimize read next */ info->cur_row.lastpos= lastpos; - DBUG_RETURN(0); /* No matching rows */ + goto end; } for (;;) @@ -63,7 +71,8 @@ my_bool _ma_check_unique(MARIA_HA *info, MARIA_UNIQUEDEF *def, uchar *record, info->page_changed= 1; /* Can't optimize read next */ info->cur_row.lastpos= lastpos; DBUG_PRINT("info",("Found duplicate")); - DBUG_RETURN(1); /* Found identical */ + error= 1; /* Found identical */ + goto end; } DBUG_ASSERT(info->last_key.data_length == MARIA_UNIQUE_HASH_LENGTH); if (_ma_search_next(info, &info->last_key, SEARCH_BIGGER, @@ -72,9 +81,12 @@ my_bool _ma_check_unique(MARIA_HA *info, MARIA_UNIQUEDEF *def, uchar *record, { info->page_changed= 1; /* Can't optimize read next */ info->cur_row.lastpos= lastpos; - DBUG_RETURN(0); /* end of tree */ + break; /* end of tree */ } } + +end: + DBUG_RETURN(error); } @@ -134,13 +146,14 @@ ha_checksum _ma_unique_hash(MARIA_UNIQUEDEF *def, const uchar *record) keyseg->charset->coll->hash_sort(keyseg->charset, (const uchar*) pos, length, &seed1, &seed2); - crc^= seed1; + crc+= seed1; } else - while (pos != end) - crc=((crc << 8) + - (((uchar) *pos++))) + - (crc >> (8*sizeof(ha_checksum)-8)); + { + my_hash_sort_bin((CHARSET_INFO*) 0, pos, (size_t) (end-pos), + &seed1, &seed2); + crc+= seed1; + } } return crc; } diff --git a/storage/maria/ma_update.c b/storage/maria/ma_update.c index 4051da022c0..0a726c1b7f9 100644 --- a/storage/maria/ma_update.c +++ b/storage/maria/ma_update.c @@ -74,7 +74,8 @@ int maria_update(register MARIA_HA *info, const uchar *oldrec, uchar *newrec) goto err_end; } } - if (_ma_mark_file_changed(info)) + + if (_ma_mark_file_changed(share)) { save_errno=my_errno; goto err_end; @@ -215,7 +216,10 @@ err: { if ((flag++ && _ma_ft_del(info,i,new_key_buff,newrec,pos)) || _ma_ft_add(info,i,old_key_buff,oldrec,pos)) + { + _ma_set_fatal_error(share, my_errno); break; + } } else { @@ -227,25 +231,23 @@ err: oldrec, pos, info->cur_row.trid); if ((flag++ && _ma_ck_delete(info, &new_key)) || _ma_ck_write(info, &old_key)) + { + _ma_set_fatal_error(share, my_errno); break; + } } } } while (i-- != 0); } else - { - maria_print_error(share, HA_ERR_CRASHED); - maria_mark_crashed(info); - } + _ma_set_fatal_error(share, save_errno); + info->update= (HA_STATE_CHANGED | HA_STATE_AKTIV | HA_STATE_ROW_CHANGED | key_changed); err_end: _ma_writeinfo(info, WRITEINFO_UPDATE_KEYFILE); if (save_errno == HA_ERR_KEY_NOT_FOUND) - { - maria_print_error(share, HA_ERR_CRASHED); - save_errno=HA_ERR_CRASHED; - } + _ma_set_fatal_error(share, HA_ERR_CRASHED); DBUG_RETURN(my_errno=save_errno); } /* maria_update */ diff --git a/storage/maria/ma_write.c b/storage/maria/ma_write.c index 9fdbc9e8a98..5e70fde956a 100644 --- a/storage/maria/ma_write.c +++ b/storage/maria/ma_write.c @@ -22,8 +22,6 @@ #include "ma_key_recover.h" #include "ma_blockrec.h" -#define MAX_POINTER_LENGTH 8 - /* Functions declared in this file */ static int w_search(MARIA_HA *info, uint32 comp_flag, @@ -121,16 +119,27 @@ int maria_write(MARIA_HA *info, uchar *record) my_errno=HA_ERR_INDEX_FILE_FULL; goto err2; } - if (_ma_mark_file_changed(info)) + if (_ma_mark_file_changed(share)) goto err2; /* Calculate and check all unique constraints */ - for (i=0 ; i < share->state.header.uniques ; i++) + + if (share->state.header.uniques) { - if (_ma_check_unique(info,share->uniqueinfo+i,record, - _ma_unique_hash(share->uniqueinfo+i,record), - HA_OFFSET_ERROR)) - goto err2; + for (i=0 ; i < share->state.header.uniques ; i++) + { + MARIA_UNIQUEDEF *def= share->uniqueinfo + i; + ha_checksum unique_hash= _ma_unique_hash(share->uniqueinfo+i,record); + if (maria_is_key_active(share->state.key_map, def->key)) + { + if (_ma_check_unique(info, def, record, + unique_hash, HA_OFFSET_ERROR)) + goto err2; + } + else + maria_unique_store(record+ share->keyinfo[def->key].seg->start, + unique_hash); + } } /* Ensure we don't try to restore auto_increment if it doesn't change */ @@ -798,18 +807,18 @@ int _ma_insert(register MARIA_HA *info, MARIA_KEY *key, #endif if (t_length > 0) { - if (t_length >= keyinfo->maxlength*2+MAX_POINTER_LENGTH) + if (t_length >= keyinfo->maxlength*2+MARIA_INDEX_OVERHEAD_SIZE) { - my_errno=HA_ERR_CRASHED; + _ma_set_fatal_error(share, HA_ERR_CRASHED); DBUG_RETURN(-1); } bmove_upp(endpos+t_length, endpos, (uint) (endpos-key_pos)); } else { - if (-t_length >= keyinfo->maxlength*2+MAX_POINTER_LENGTH) + if (-t_length >= keyinfo->maxlength*2+MARIA_INDEX_OVERHEAD_SIZE) { - my_errno=HA_ERR_CRASHED; + _ma_set_fatal_error(share, HA_ERR_CRASHED); DBUG_RETURN(-1); } bmove(key_pos,key_pos-t_length,(uint) (endpos-key_pos)+t_length); @@ -1066,7 +1075,6 @@ int _ma_split_page(MARIA_HA *info, MARIA_KEY *key, MARIA_PAGE *split_page, Returns pointer to start of key. key will contain the key. - return_key_length will contain the length of key after_key will contain the position to where the next key starts */ @@ -1174,7 +1182,7 @@ static uchar *_ma_find_last_pos(MARIA_KEY *int_key, MARIA_PAGE *ma_page, if (!(length=(*keyinfo->get_key)(&tmp_key, page_flag, 0, &page))) { - my_errno=HA_ERR_CRASHED; + _ma_set_fatal_error(share, HA_ERR_CRASHED); DBUG_RETURN(0); } @@ -1187,7 +1195,7 @@ static uchar *_ma_find_last_pos(MARIA_KEY *int_key, MARIA_PAGE *ma_page, memcpy(int_key->data, key_buff, length); /* previous key */ if (!(length=(*keyinfo->get_key)(&tmp_key, page_flag, 0, &page))) { - my_errno=HA_ERR_CRASHED; + _ma_set_fatal_error(share, HA_ERR_CRASHED); DBUG_RETURN(0); } } while (page < end); diff --git a/storage/maria/maria_chk.c b/storage/maria/maria_chk.c index 41e15018e3e..22d1433b008 100644 --- a/storage/maria/maria_chk.c +++ b/storage/maria/maria_chk.c @@ -28,20 +28,17 @@ #include <sys/mman.h> #endif -#ifndef USE_RAID -#define my_raid_create(A,B,C,D,E,F,G) my_create(A,B,C,G) -#define my_raid_delete(A,B,C) my_delete(A,B) -#endif - static uint decode_bits; static char **default_argv; static const char *load_default_groups[]= { "aria_chk", 0 }; static const char *set_collation_name, *opt_tmpdir, *opt_log_dir; +static const char *default_log_dir; static CHARSET_INFO *set_collation; static int stopwords_inited= 0; static MY_TMPDIR maria_chk_tmpdir; -static my_bool opt_transaction_logging, opt_debug, opt_require_control_file; -static my_bool opt_warning_for_wrong_transid; +static my_bool opt_transaction_logging, opt_debug; +static my_bool opt_ignore_control_file, opt_require_control_file; +static my_bool opt_warning_for_wrong_transid, opt_update_state; static const char *type_names[]= { @@ -67,7 +64,7 @@ static const char *field_pack[]= static const char *record_formats[]= { - "Fixed length", "Packed", "Compressed", "Block", "?" + "Fixed length", "Packed", "Compressed", "Block", "No data", "?", "?" }; static const char *bitmap_description[]= @@ -104,7 +101,7 @@ int main(int argc, char **argv) int error; MY_INIT(argv[0]); - opt_log_dir= maria_data_root= (char *)"."; + default_log_dir= opt_log_dir= maria_data_root= (char *)"."; maria_chk_init(&check_param); check_param.opt_lock_memory= 1; /* Lock memory if possible */ check_param.using_global_keycache = 0; @@ -114,10 +111,11 @@ int main(int argc, char **argv) maria_init(); maria_block_size= 0; /* Use block size from control file */ - if (ma_control_file_open(FALSE, opt_require_control_file || - !(check_param.testflag & T_SILENT)) && - (opt_require_control_file || - (opt_transaction_logging && (check_param.testflag & T_REP_ANY)))) + if (!opt_ignore_control_file && + (ma_control_file_open(FALSE, opt_require_control_file || + !(check_param.testflag & T_SILENT)) && + (opt_require_control_file || + (opt_transaction_logging && (check_param.testflag & T_REP_ANY))))) { error= 1; goto end; @@ -202,8 +200,9 @@ enum options_mc { OPT_SORT_KEY_BLOCKS, OPT_DECODE_BITS, OPT_FT_MIN_WORD_LEN, OPT_FT_MAX_WORD_LEN, OPT_FT_STOPWORD_FILE, OPT_MAX_RECORD_LENGTH, OPT_AUTO_CLOSE, OPT_STATS_METHOD, OPT_TRANSACTION_LOG, - OPT_ZEROFILL_KEEP_LSN, OPT_REQUIRE_CONTROL_FILE, - OPT_LOG_DIR, OPT_DATADIR, OPT_WARNING_FOR_WRONG_TRANSID + OPT_ZEROFILL_KEEP_LSN, + OPT_REQUIRE_CONTROL_FILE, OPT_IGNORE_CONTROL_FILE, + OPT_LOG_DIR, OPT_WARNING_FOR_WRONG_TRANSID }; static struct my_option my_long_options[] = @@ -264,12 +263,16 @@ static struct my_option my_long_options[] = {"information", 'i', "Print statistics information about table that is checked.", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + { "ignore-control-file", OPT_IGNORE_CONTROL_FILE, + "Ignore the control file", + (uchar**)&opt_ignore_control_file, 0, 0, GET_BOOL, NO_ARG, + 0, 0, 0, 0, 0, 0}, {"keys-used", 'k', "Tell Aria to update only some specific keys. # is a bit mask of which keys to use. This can be used to get faster inserts.", &check_param.keys_in_use, &check_param.keys_in_use, 0, GET_ULL, REQUIRED_ARG, -1, 0, 0, 0, 0, 0}, - {"datadir", OPT_DATADIR, + {"datadir", 'h', "Path for control file (and logs if --logdir not used).", &maria_data_root, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, @@ -337,10 +340,13 @@ static struct my_option my_long_options[] = &opt_transaction_logging, &opt_transaction_logging, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, {"update-state", 'U', - "Mark tables as crashed if any errors were found and clean if check didn't " - "find any errors. This allows one to get rid of warnings like 'table not " - "properly closed'", - 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + "Mark tables as crashed if any errors were found and clean if check " + "didn't find any errors but table was marked as 'not clean' before. This " + "allows one to get rid of warnings like 'table not properly closed'. " + "If table was updated, update also the timestamp for when check was made. " + "This option is on by default!", + &opt_update_state, &opt_update_state, 0, GET_BOOL, NO_ARG, + 1, 0, 0, 0, 0, 0}, {"unpack", 'u', "Unpack file packed with aria_pack.", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, @@ -415,7 +421,7 @@ static struct my_option my_long_options[] = static void print_version(void) { - printf("%s Ver 1.0 for %s at %s\n", my_progname, SYSTEM_TYPE, + printf("%s Ver 1.1 for %s at %s\n", my_progname, SYSTEM_TYPE, MACHINE_TYPE); } @@ -438,6 +444,9 @@ static void usage(void) -?, --help Display this help and exit.\n\ --datadir=path Path for control file (and logs if --logdir not used)\n\ --logdir=path Path for log files\n\ + --ignore-control-file Don't open the control file. Only use this if you\n\ + are sure the tables are not in use by another\n\ + program!\n\ --require-control-file Abort if we can't find/read the maria_log_control\n\ file\n\ -s, --silent Only print errors. One can use two -s to make\n\ @@ -472,8 +481,18 @@ static void usage(void) -i, --information Print statistics information about table that is checked.\n\ -m, --medium-check Faster than extend-check, but only finds 99.99% of\n\ all errors. Should be good enough for most cases.\n\ - -U, --update-state Mark tables as crashed if you find any errors.\n\ - -T, --read-only Don't mark table as checked.\n"); + -T, --read-only Don't mark table as checked.\n\ + -U, --update-state Mark tables as crashed if any errors were found and\n\ + clean if check didn't find any errors but table was\n\ + marked as 'not clean' before. This allows one to get\n\ + rid of warnings like 'table not properly closed'. If\n\ + table was updated, update also the timestamp for when\n\ + the check was made. This option is on by default!\n\ + Use --skip-update-state to disable.\n\ + --warning-for-wrong-transaction-id\n\ + Give a warning if we find a transaction id in the table that is bigger\n\ + than what exists in the control file. Use --skip-... to disable warning\n\ + "); puts("\ Recover (repair)/ options (When using '--recover' or '--safe-recover'):\n\ @@ -836,6 +855,7 @@ static void get_options(register int *argc,register char ***argv) load_defaults("my", load_default_groups, argc, argv); default_argv= *argv; + check_param.testflag= T_UPDATE_STATE; if (isatty(fileno(stdout))) check_param.testflag|=T_WRITE_LOOP; @@ -884,15 +904,27 @@ static void get_options(register int *argc,register char ***argv) MYF(MY_WME)))) exit(1); + if (maria_data_root != default_log_dir && opt_log_dir == default_log_dir) + { + /* --datadir was used and --log-dir was not. Set log-dir to datadir */ + opt_log_dir= maria_data_root; + } return; } /* get options */ - /* Check table */ +/** + Check/repair table + + @return 0 table is ok + @return 1 Got warning during check + @return 2 Got error during check/repair. +*/ static int maria_chk(HA_CHECK *param, char *filename) { int error,lock_type,recreate; + uint warning_printed_by_chk_status; my_bool rep_quick= test(param->testflag & (T_QUICK | T_FORCE_UNIQUENESS)); MARIA_HA *info; File datafile; @@ -905,6 +937,7 @@ static int maria_chk(HA_CHECK *param, char *filename) recreate=0; datafile=0; param->isam_file_name=filename; /* For error messages */ + warning_printed_by_chk_status= 0; if (!(info=maria_open(filename, (param->testflag & (T_DESCRIPT | T_READONLY)) ? O_RDONLY : O_RDWR, @@ -992,8 +1025,8 @@ static int maria_chk(HA_CHECK *param, char *filename) share->state.open_count != 0); if ((param->testflag & (T_REP_ANY | T_SORT_RECORDS)) && - ((share->state.changed & (STATE_CHANGED | STATE_CRASHED | - STATE_CRASHED_ON_REPAIR | STATE_IN_REPAIR) || + ((share->state.changed & (STATE_CHANGED | STATE_CRASHED_FLAGS | + STATE_IN_REPAIR) || !(param->testflag & T_CHECK_ONLY_CHANGED)))) need_to_check=1; @@ -1010,8 +1043,8 @@ static int maria_chk(HA_CHECK *param, char *filename) need_to_check=1; } if ((param->testflag & T_CHECK_ONLY_CHANGED) && - (share->state.changed & (STATE_CHANGED | STATE_CRASHED | - STATE_CRASHED_ON_REPAIR | STATE_IN_REPAIR))) + (share->state.changed & (STATE_CHANGED | STATE_CRASHED_FLAGS | + STATE_IN_REPAIR))) need_to_check=1; if (!need_to_check) { @@ -1188,9 +1221,9 @@ static int maria_chk(HA_CHECK *param, char *filename) #ifndef TO_BE_REMOVED if (param->out_flag & O_NEW_DATA) { /* Change temp file to org file */ - my_close(info->dfile.file, MYF(MY_WME)); /* Close new file */ + mysql_file_close(info->dfile.file, MYF(MY_WME)); /* Close new file */ error|=maria_change_to_newfile(filename,MARIA_NAME_DEXT,DATA_TMP_EXT, - MYF(0)); + 0, MYF(0)); if (_ma_open_datafile(info,info->s, NullS, -1)) error=1; param->out_flag&= ~O_NEW_DATA; /* We are using new datafile */ @@ -1229,8 +1262,8 @@ static int maria_chk(HA_CHECK *param, char *filename) if (!error) { DBUG_PRINT("info", ("Reseting crashed state")); - share->state.changed&= ~(STATE_CHANGED | STATE_CRASHED | - STATE_CRASHED_ON_REPAIR | STATE_IN_REPAIR); + share->state.changed&= ~(STATE_CHANGED | STATE_CRASHED_FLAGS | + STATE_IN_REPAIR); } else maria_mark_crashed(info); @@ -1246,7 +1279,12 @@ static int maria_chk(HA_CHECK *param, char *filename) maria_chk_init_for_check(param, info); if (opt_warning_for_wrong_transid == 0) param->max_trid= ~ (ulonglong) 0; + error= maria_chk_status(param,info); + /* Forget warning printed by maria_chk_status if no problems found */ + warning_printed_by_chk_status= param->warning_printed; + param->warning_printed= 0; + maria_intersect_keys_active(share->state.key_map, param->keys_in_use); error|= maria_chk_size(param,info); if (!error || !(param->testflag & (T_FAST | T_FORCE_CREATE))) @@ -1283,14 +1321,13 @@ static int maria_chk(HA_CHECK *param, char *filename) if (!error) { if (((share->state.changed & - (STATE_CHANGED | STATE_CRASHED | STATE_CRASHED_ON_REPAIR | - STATE_IN_REPAIR)) || + (STATE_CHANGED | STATE_CRASHED_FLAGS | STATE_IN_REPAIR)) || share->state.open_count != 0) && (param->testflag & T_UPDATE_STATE)) info->update|=HA_STATE_CHANGED | HA_STATE_ROW_CHANGED; DBUG_PRINT("info", ("Reseting crashed state")); - share->state.changed&= ~(STATE_CHANGED | STATE_CRASHED | - STATE_CRASHED_ON_REPAIR | STATE_IN_REPAIR); + share->state.changed&= ~(STATE_CHANGED | STATE_CRASHED_FLAGS | + STATE_IN_REPAIR); } else if (!maria_is_crashed(info) && (param->testflag & T_UPDATE_STATE)) @@ -1306,33 +1343,40 @@ static int maria_chk(HA_CHECK *param, char *filename) (my_bool) !test(param->testflag & T_AUTO_INC)); if (info->update & HA_STATE_CHANGED && ! (param->testflag & T_READONLY)) + { error|=maria_update_state_info(param, info, UPDATE_OPEN_COUNT | - (((param->testflag & T_REP_ANY) ? + (((param->testflag & + (T_REP_ANY | T_UPDATE_STATE)) ? UPDATE_TIME : 0) | (state_updated ? UPDATE_STAT : 0) | ((param->testflag & T_SORT_RECORDS) ? UPDATE_SORT : 0))); + if (warning_printed_by_chk_status) + _ma_check_print_info(param, "Aria table '%s' was ok. Status updated", + filename); + else if (!(param->testflag & T_SILENT)) + printf("State updated\n"); + warning_printed_by_chk_status= 0; + } info->update&= ~HA_STATE_CHANGED; _ma_reenable_logging_for_table(info, FALSE); maria_lock_database(info, F_UNLCK); end2: - end_pagecache(maria_pagecache, 1); if (maria_close(info)) { _ma_check_print_error(param, default_close_errmsg, my_errno, filename); DBUG_RETURN(1); } + end_pagecache(maria_pagecache, 1); if (error == 0) { if (param->out_flag & O_NEW_DATA) error|=maria_change_to_newfile(filename,MARIA_NAME_DEXT,DATA_TMP_EXT, + param->backup_time, ((param->testflag & T_BACKUP_DATA) ? MYF(MY_REDEL_MAKE_BACKUP) : MYF(0))); - if (param->out_flag & O_NEW_INDEX) - error|=maria_change_to_newfile(filename,MARIA_NAME_IEXT,INDEX_TMP_EXT, - MYF(0)); } if (opt_transaction_logging && share->base.born_transactional && !error && @@ -1352,6 +1396,7 @@ end2: if (param->error_printed) { + error= 2; if (param->testflag & (T_REP_ANY | T_SORT_RECORDS | T_SORT_INDEX)) { fprintf(stderr, "Aria table '%s' is not fixed because of errors\n", @@ -1366,12 +1411,17 @@ end2: fprintf(stderr, "Aria table '%s' is corrupted\nFix it using switch " "\"-r\" or \"-o\"\n", filename); } - else if (param->warning_printed && + else if ((param->warning_printed || warning_printed_by_chk_status) && ! (param->testflag & (T_REP_ANY | T_SORT_RECORDS | T_SORT_INDEX | T_FORCE_CREATE))) - fprintf(stderr, "Aria table '%s' is usable but should be fixed\n", - filename); - fflush(stderr); + { + if (!error) + error= 1; + (void) fprintf(stderr, "Aria table '%s' is usable but should be fixed\n", + filename); + } + + (void) fflush(stderr); DBUG_RETURN(error); } /* maria_chk */ @@ -1400,7 +1450,7 @@ static void descript(HA_CHECK *param, register MARIA_HA *info, char *name) DBUG_VOID_RETURN; } - printf("Aria file: %s\n",name); + printf("Aria file: %s\n",name); printf("Record format: %s\n", record_formats[share->data_file_type]); printf("Crashsafe: %s\n", share->base.born_transactional ? "yes" : "no"); @@ -1420,7 +1470,7 @@ static void descript(HA_CHECK *param, register MARIA_HA *info, char *name) if (share->state.check_time) { get_date(buff,1,share->state.check_time); - printf("Recover time: %s\n",buff); + printf("Check/recover time: %s\n",buff); } if (share->base.born_transactional) { @@ -1436,7 +1486,8 @@ static void descript(HA_CHECK *param, register MARIA_HA *info, char *name) printf("UUID: %s\n", buff); pos=buff; if (share->state.changed & STATE_CRASHED) - strmov(buff,"crashed"); + strmov(buff, share->state.changed & STATE_CRASHED_ON_REPAIR ? + "crashed on repair" : "crashed"); else { if (share->state.open_count) @@ -1499,8 +1550,8 @@ static void descript(HA_CHECK *param, register MARIA_HA *info, char *name) if (share->base.max_data_file_length != HA_OFFSET_ERROR || share->base.max_key_file_length != HA_OFFSET_ERROR) printf("Max datafile length: %16s Max keyfile length: %18s\n", - llstr(share->base.max_data_file_length-1,llbuff), - llstr(share->base.max_key_file_length-1,llbuff2)); + ullstr(share->base.max_data_file_length,llbuff), + ullstr(share->base.max_key_file_length,llbuff2)); } } printf("Block_size: %16d\n",(int) share->block_size); @@ -1700,14 +1751,14 @@ static int maria_sort_records(HA_CHECK *param, { _ma_check_print_warning(param, "Can't sort table '%s' on key %d; No such key", - name,sort_key+1); + name,sort_key+1); param->error_printed=0; DBUG_RETURN(0); /* Nothing to do */ } if (keyinfo->flag & HA_FULLTEXT) { _ma_check_print_warning(param,"Can't sort table '%s' on FULLTEXT key %d", - name,sort_key+1); + name,sort_key+1); param->error_printed=0; DBUG_RETURN(0); /* Nothing to do */ } @@ -1759,12 +1810,12 @@ static int maria_sort_records(HA_CHECK *param, } fn_format(param->temp_filename,name,"", MARIA_NAME_DEXT,2+4+32); - new_file= my_create(fn_format(param->temp_filename, - param->temp_filename,"", - DATA_TMP_EXT, - MY_REPLACE_EXT | MY_UNPACK_FILENAME), - 0, param->tmpfile_createflag, - MYF(0)); + new_file= mysql_file_create(key_file_tmp, + fn_format(param->temp_filename, + param->temp_filename, "", + DATA_TMP_EXT, + MY_REPLACE_EXT | MY_UNPACK_FILENAME), + 0, param->tmpfile_createflag, MYF(0)); if (new_file < 0) { _ma_check_print_error(param,"Can't create new tempfile: '%s'", @@ -1782,10 +1833,10 @@ static int maria_sort_records(HA_CHECK *param, for (key=0 ; key < share->base.keys ; key++) share->keyinfo[key].flag|= HA_SORT_ALLOWS_SAME; - if (my_pread(share->kfile.file, temp_buff, - (uint) keyinfo->block_length, - share->state.key_root[sort_key], - MYF(MY_NABP+MY_WME))) + if (mysql_file_pread(share->kfile.file, temp_buff, + (uint) keyinfo->block_length, + share->state.key_root[sort_key], + MYF(MY_NABP+MY_WME))) { _ma_check_print_error(param, "Can't read indexpage from filepos: %s", llstr(share->state.key_root[sort_key], llbuff)); @@ -1818,7 +1869,7 @@ static int maria_sort_records(HA_CHECK *param, goto err; } - my_close(info->dfile.file, MYF(MY_WME)); + mysql_file_close(info->dfile.file, MYF(MY_WME)); param->out_flag|=O_NEW_DATA; /* Data in new file */ info->dfile.file= new_file; /* Use new datafile */ _ma_set_data_pagecache_callbacks(&info->dfile, info->s); @@ -1843,8 +1894,8 @@ err: if (got_error && new_file >= 0) { end_io_cache(&info->rec_cache); - (void) my_close(new_file,MYF(MY_WME)); - (void) my_delete(param->temp_filename, MYF(MY_WME)); + (void) mysql_file_close(new_file,MYF(MY_WME)); + (void) mysql_file_delete(key_file_tmp, param->temp_filename, MYF(MY_WME)); } if (temp_buff) { @@ -1902,9 +1953,9 @@ static int sort_record_index(MARIA_SORT_PARAM *sort_param, if (nod_flag) { next_page= _ma_kpos(nod_flag, keypos); - if (my_pread(share->kfile.file, temp_buff, - (uint) tmp_key.keyinfo->block_length, next_page, - MYF(MY_NABP+MY_WME))) + if (mysql_file_pread(share->kfile.file, temp_buff, + (uint) tmp_key.keyinfo->block_length, next_page, + MYF(MY_NABP+MY_WME))) { _ma_check_print_error(param,"Can't read keys from filepos: %s", llstr(next_page,llbuff)); diff --git a/storage/maria/maria_def.h b/storage/maria/maria_def.h index ef65a9eb3af..cd3294e8975 100644 --- a/storage/maria/maria_def.h +++ b/storage/maria/maria_def.h @@ -123,6 +123,8 @@ typedef struct st_maria_state_info increased. */ LSN skip_redo_lsn; + /* LSN when we wrote file id to the log */ + LSN logrec_file_id; /* the following isn't saved on disk */ uint state_diff_length; /* Should be 0 */ @@ -149,11 +151,13 @@ typedef struct st_maria_state_info #define MARIA_COLUMNDEF_SIZE (2*7+1+1+4) #define MARIA_BASE_INFO_SIZE (MY_UUID_SIZE + 5*8 + 6*4 + 11*2 + 6 + 5*2 + 1 + 16) #define MARIA_INDEX_BLOCK_MARGIN 16 /* Safety margin for .MYI tables */ +#define MARIA_MAX_POINTER_LENGTH 7 /* Node pointer */ /* Internal management bytes needed to store 2 transid/key on an index page */ #define MARIA_MAX_PACK_TRANSID_SIZE (TRANSID_SIZE+1) #define MARIA_TRANSID_PACK_OFFSET (256- TRANSID_SIZE - 1) #define MARIA_MIN_TRANSID_PACK_OFFSET (MARIA_TRANSID_PACK_OFFSET-TRANSID_SIZE) -#define MARIA_INDEX_OVERHEAD_SIZE (MARIA_MAX_PACK_TRANSID_SIZE * 2) +#define MARIA_INDEX_OVERHEAD_SIZE (MARIA_MAX_PACK_TRANSID_SIZE * 2 + \ + MARIA_MAX_POINTER_LENGTH) #define MARIA_DELETE_KEY_NR 255 /* keynr for deleted blocks */ /* @@ -240,11 +244,14 @@ typedef struct st_maria_file_bitmap { uchar *map; pgcache_page_no_t page; /* Page number for current bitmap */ - uint used_size; /* Size of bitmap head that is not 0 */ + pgcache_page_no_t last_bitmap_page; /* Last possible bitmap page */ my_bool changed; /* 1 if page needs to be written */ my_bool changed_not_flushed; /* 1 if some bitmap is not flushed */ + uint used_size; /* Size of bitmap head that is not 0 */ uint flush_all_requested; /**< If _ma_bitmap_flush_all waiting */ + uint waiting_for_flush_all_requested; /* If someone is waiting for above */ uint non_flushable; /**< 0 if bitmap and log are in sync */ + uint waiting_for_non_flushable; /* If someone is waiting for above */ PAGECACHE_FILE file; /* datafile where bitmap is stored */ mysql_mutex_t bitmap_lock; @@ -252,6 +259,8 @@ typedef struct st_maria_file_bitmap /* Constants, allocated when initiating bitmaps */ uint sizes[8]; /* Size per bit combination */ uint total_size; /* Total usable size of bitmap page */ + uint max_total_size; /* Max value for total_size */ + uint last_total_size; /* Size of bitmap on last_bitmap_page */ uint block_size; /* Block size of file */ ulong pages_covered; /* Pages covered by bitmap + 1 */ DYNAMIC_ARRAY pinned_pages; /**< not-yet-flushable bitmap pages */ @@ -264,6 +273,7 @@ typedef struct st_maria_file_bitmap typedef struct st_maria_share { /* Shared between opens */ MARIA_STATE_INFO state; + MARIA_STATE_INFO checkpoint_state; /* Copy of saved state by checkpoint */ MARIA_BASE_INFO base; MARIA_STATE_HISTORY *state_history; MARIA_KEYDEF ft2_keyinfo; /* Second-level ft-key definition */ @@ -371,6 +381,13 @@ typedef struct st_maria_share my_bool temporary; /* Below flag is needed to make log tables work with concurrent insert */ my_bool is_log_table; + my_bool has_null_fields; + my_bool has_varchar_fields; /* If table has varchar fields */ + /* + Set to 1 if open_count was wrong at open. Set to avoid asserts for + wrong open count on close. + */ + my_bool open_count_not_zero_on_open; my_bool changed, /* If changed since lock */ global_changed, /* If changed since open */ @@ -475,11 +492,12 @@ typedef struct st_maria_block_scan MARIA_RECORD_POS row_base_page; } MARIA_BLOCK_SCAN; +//typedef ICP_RESULT (*index_cond_func_t)(void *param); + struct st_maria_handler { MARIA_SHARE *s; /* Shared between open:s */ struct st_ma_transaction *trn; /* Pointer to active transaction */ - void *external_ptr; /* Pointer to THD in mysql */ MARIA_STATUS_INFO *state, state_save; MARIA_STATUS_INFO *state_start; /* State at start of transaction */ MARIA_ROW cur_row; /* The active row that we just read */ @@ -496,6 +514,7 @@ struct st_maria_handler DYNAMIC_ARRAY *ft1_to_ft2; /* used only in ft1->ft2 conversion */ MEM_ROOT ft_memroot; /* used by the parser */ MYSQL_FTPARSER_PARAM *ftparser_param; /* share info between init/deinit */ + void *external_ref; /* For MariaDB TABLE */ uchar *buff; /* page buffer */ uchar *keyread_buff; /* Buffer for last key read */ uchar *lastkey_buff; /* Last used search key */ @@ -537,6 +556,7 @@ struct st_maria_handler ulong row_base_length; /* Length of row header */ uint row_flag; /* Flag to store in row header */ uint opt_flag; /* Optim. for space/speed */ + uint open_flags; /* Flags used in open() */ uint update; /* If file changed since open */ int lastinx; /* Last used index */ uint last_rkey_length; /* Last length in maria_rkey() */ @@ -560,6 +580,7 @@ struct st_maria_handler my_bool was_locked; /* Was locked in panic */ my_bool append_insert_at_end; /* Set if concurrent insert */ my_bool quick_mode; + my_bool in_check_table; /* We are running check tables */ /* Marker if key_del_changed */ /* If info->keyread_buff can't be used for rnext */ my_bool page_changed; @@ -609,6 +630,9 @@ struct st_maria_handler #define STATE_NOT_MOVABLE 256 #define STATE_MOVED 512 /* set if base->uuid != maria_uuid */ #define STATE_IN_REPAIR 1024 /* We are running repair on table */ +#define STATE_CRASHED_PRINTED 2048 + +#define STATE_CRASHED_FLAGS (STATE_CRASHED | STATE_CRASHED_ON_REPAIR | STATE_CRASHED_PRINTED) /* options to maria_read_cache */ @@ -691,7 +715,6 @@ struct st_maria_handler #endif #define DBUG_DUMP_KEY(name, key) DBUG_DUMP(name, (key)->data, (key)->data_length + (key)->ref_length) - /* Functions to store length of space packed keys, VARCHAR or BLOB keys */ #define store_key_length(key,length) \ @@ -715,7 +738,7 @@ struct st_maria_handler { length=mi_uint2korr((key)+1)+3; } \ } -#define maria_max_key_length() ((maria_block_size - MAX_KEYPAGE_HEADER_SIZE)/2 - MARIA_INDEX_OVERHEAD_SIZE) +#define maria_max_key_length() ((maria_block_size - MAX_KEYPAGE_HEADER_SIZE)/3 - MARIA_INDEX_OVERHEAD_SIZE) #define get_pack_length(length) ((length) >= 255 ? 3 : 1) #define _ma_have_versioning(info) ((info)->row_flag & ROW_FLAG_TRANSID) @@ -767,9 +790,9 @@ struct st_maria_handler extern mysql_mutex_t THR_LOCK_maria; #ifdef DONT_USE_RW_LOCKS -#define rw_wrlock(A) {} -#define rw_rdlock(A) {} -#define rw_unlock(A) {} +#define mysql_rwlock_wrlock(A) {} +#define mysql_rwlock_rdlock(A) {} +#define mysql_rwlock_unlock(A) {} #endif /* Some tuning parameters */ @@ -792,9 +815,11 @@ extern uint maria_quick_table_bits; extern char *maria_data_root; extern uchar maria_zero_string[]; extern my_bool maria_inited, maria_in_ha_maria, maria_recovery_changed_data; -extern my_bool maria_recovery_verbose; +extern my_bool maria_recovery_verbose, maria_checkpoint_disabled; +extern my_bool maria_assert_if_crashed_table; extern HASH maria_stored_state; extern int (*maria_create_trn_hook)(MARIA_HA *); +extern my_bool (*ma_killed)(MARIA_HA *); #ifdef HAVE_PSI_INTERFACE extern PSI_mutex_key key_SHARE_BITMAP_lock, key_SORT_INFO_mutex, @@ -825,7 +850,7 @@ extern PSI_thread_key key_thread_checkpoint, key_thread_find_all_keys, key_thread_soft_sync; extern PSI_file_key key_file_translog, key_file_kfile, key_file_dfile, - key_file_control; + key_file_control, key_file_tmp; #endif @@ -887,6 +912,18 @@ extern my_bool _ma_update_static_record(MARIA_HA *, MARIA_RECORD_POS, const uchar *, const uchar *); extern my_bool _ma_delete_static_record(MARIA_HA *info, const uchar *record); extern my_bool _ma_cmp_static_record(MARIA_HA *info, const uchar *record); + +extern my_bool _ma_write_no_record(MARIA_HA *info, const uchar *record); +extern my_bool _ma_update_no_record(MARIA_HA *info, MARIA_RECORD_POS pos, + const uchar *oldrec, const uchar *record); +extern my_bool _ma_delete_no_record(MARIA_HA *info, const uchar *record); +extern int _ma_read_no_record(MARIA_HA *info, uchar *record, + MARIA_RECORD_POS pos); +extern int _ma_read_rnd_no_record(MARIA_HA *info, uchar *buf, + MARIA_RECORD_POS filepos, + my_bool skip_deleted_blocks); +my_off_t _ma_no_keypos_to_recpos(MARIA_SHARE *share, my_off_t pos); + extern my_bool _ma_ck_write(MARIA_HA *info, MARIA_KEY *key); extern my_bool _ma_enlarge_root(MARIA_HA *info, MARIA_KEY *key, MARIA_RECORD_POS *root); @@ -937,11 +974,13 @@ extern my_bool _ma_ck_real_delete(register MARIA_HA *info, MARIA_KEY *key, extern int _ma_readinfo(MARIA_HA *info, int lock_flag, int check_keybuffer); extern int _ma_writeinfo(MARIA_HA *info, uint options); extern int _ma_test_if_changed(MARIA_HA *info); -extern int _ma_mark_file_changed(MARIA_HA *info); +extern int _ma_mark_file_changed(MARIA_SHARE *info); +extern int _ma_mark_file_changed_now(MARIA_SHARE *info); extern void _ma_mark_file_crashed(MARIA_SHARE *share); -extern my_bool _ma_set_uuid(MARIA_HA *info, my_bool reset_uuid); +void _ma_set_fatal_error(MARIA_SHARE *share, int error); +extern my_bool _ma_set_uuid(MARIA_SHARE *info, my_bool reset_uuid); extern my_bool _ma_check_if_zero(uchar *pos, size_t size); -extern int _ma_decrement_open_count(MARIA_HA *info); +extern int _ma_decrement_open_count(MARIA_HA *info, my_bool lock_table); extern int _ma_check_index(MARIA_HA *info, int inx); extern int _ma_search(MARIA_HA *info, MARIA_KEY *key, uint32 nextflag, my_off_t pos); @@ -1035,7 +1074,7 @@ extern MARIA_KEY *_ma_pack_key(MARIA_HA *info, MARIA_KEY *int_key, HA_KEYSEG ** last_used_keyseg); extern void _ma_copy_key(MARIA_KEY *to, const MARIA_KEY *from); extern int _ma_read_key_record(MARIA_HA *info, uchar *buf, MARIA_RECORD_POS); -extern my_bool _ma_read_cache(IO_CACHE *info, uchar *buff, +extern my_bool _ma_read_cache(MARIA_HA *, IO_CACHE *info, uchar *buff, MARIA_RECORD_POS pos, size_t length, uint re_read_if_possibly); extern ulonglong ma_retrieve_auto_increment(const uchar *key, uint8 key_type); @@ -1117,7 +1156,7 @@ typedef struct st_maria_block_info #define fast_ma_writeinfo(INFO) if (!(INFO)->s->tot_locks) (void) _ma_writeinfo((INFO),0) #define fast_ma_readinfo(INFO) ((INFO)->lock_type == F_UNLCK) && _ma_readinfo((INFO),F_RDLCK,1) -extern uint _ma_get_block_info(MARIA_BLOCK_INFO *, File, my_off_t); +extern uint _ma_get_block_info(MARIA_HA *, MARIA_BLOCK_INFO *, File, my_off_t); extern uint _ma_rec_pack(MARIA_HA *info, uchar *to, const uchar *from); extern uint _ma_pack_get_block_info(MARIA_HA *maria, MARIA_BIT_BUFF *bit_buff, MARIA_BLOCK_INFO *info, uchar **rec_buff_p, @@ -1196,6 +1235,7 @@ void _ma_remap_file(MARIA_HA *info, my_off_t size); MARIA_RECORD_POS _ma_write_init_default(MARIA_HA *info, const uchar *record); my_bool _ma_write_abort_default(MARIA_HA *info); +int maria_delete_table_files(const char *name, myf sync_dir); C_MODE_START #define MARIA_FLUSH_DATA 1 @@ -1208,6 +1248,8 @@ int _ma_flush_table_files(MARIA_HA *info, uint flush_data_or_index, See ma_check_standalone.h . */ int _ma_killed_ptr(HA_CHECK *param); +void _ma_report_progress(HA_CHECK *param, ulonglong progress, + ulonglong max_progress); void _ma_check_print_error(HA_CHECK *param, const char *fmt, ...) ATTRIBUTE_FORMAT(printf, 2, 3); void _ma_check_print_warning(HA_CHECK *param, const char *fmt, ...) @@ -1282,5 +1324,9 @@ extern my_bool maria_flush_log_for_page_none(uchar *page, extern PAGECACHE *maria_log_pagecache; extern void ma_set_index_cond_func(MARIA_HA *info, index_cond_func_t func, void *func_arg); -int ma_check_index_cond(register MARIA_HA *info, uint keynr, uchar *record); +ICP_RESULT ma_check_index_cond(register MARIA_HA *info, uint keynr, uchar *record); + +extern my_bool ma_yield_and_check_if_killed(MARIA_HA *info, int inx); +extern my_bool ma_killed_standalone(MARIA_HA *); +extern uint _ma_file_callback_to_id(void *callback_data); diff --git a/storage/maria/maria_dump_log.c b/storage/maria/maria_dump_log.c new file mode 100644 index 00000000000..d5ce3913474 --- /dev/null +++ b/storage/maria/maria_dump_log.c @@ -0,0 +1,192 @@ +/* Copyright (C) 2007 MySQL AB & Sanja Belkin + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include "maria_def.h" +#include <my_getopt.h> +extern void translog_example_table_init(); +static const char *load_default_groups[]= { "aria_dump_log",0 }; +static void get_options(int *argc,char * * *argv); +#ifndef DBUG_OFF +#if defined(__WIN__) +const char *default_dbug_option= "d:t:i:O,\\aria_dump_log.trace"; +#else +const char *default_dbug_option= "d:t:i:o,/tmp/aria_dump_log.trace"; +#endif +#endif +static ulonglong opt_offset; +static ulong opt_pages; +static const char *opt_file= NULL; +static File handler= -1; +static my_bool opt_unit= 0; +static struct my_option my_long_options[] = +{ +#ifdef IMPLTMENTED + {"body", 'b', + "Print chunk body dump", + (uchar **) &opt_body, (uchar **) &opt_body, 0, + GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, +#endif +#ifndef DBUG_OFF + {"debug", '#', "Output debug log. Often the argument is 'd:t:o,filename'.", + 0, 0, 0, GET_STR, OPT_ARG, 0, 0, 0, 0, 0, 0}, +#endif + {"file", 'f', "Path to file which will be read", + (uchar**) &opt_file, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, + {"help", '?', "Display this help and exit.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + { "offset", 'o', "Start reading log from this offset", + (uchar**) &opt_offset, (uchar**) &opt_offset, + 0, GET_ULL, REQUIRED_ARG, 0, 0, ~(longlong) 0, 0, 0, 0 }, + { "pages", 'n', "Number of pages to read", + (uchar**) &opt_pages, (uchar**) &opt_pages, 0, + GET_ULONG, REQUIRED_ARG, (long) ~(ulong) 0, + (long) 1, (long) ~(ulong) 0, (long) 0, + (long) 1, 0}, + {"unit-test", 'U', + "Use unit test record table (for logs created by unittests", + (uchar **) &opt_unit, (uchar **) &opt_unit, 0, + GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"version", 'V', "Print version and exit.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + { 0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0} +}; + + +static void print_version(void) +{ + printf("%s Ver 1.0 for %s on %s\n", + my_progname_short, SYSTEM_TYPE, MACHINE_TYPE); +} + + +static void usage(void) +{ + print_version(); + puts("Copyright (C) 2008 MySQL AB"); + puts("This software comes with ABSOLUTELY NO WARRANTY. This is free software,"); + puts("and you are welcome to modify and redistribute it under the GPL license\n"); + + puts("Dump content of aria log pages."); + printf("\nUsage: %s -f file OPTIONS\n", my_progname_short); + my_print_help(my_long_options); + print_defaults("my", load_default_groups); + my_print_variables(my_long_options); +} + + +static my_bool +get_one_option(int optid __attribute__((unused)), + const struct my_option *opt __attribute__((unused)), + char *argument __attribute__((unused))) +{ + switch (optid) { + case '?': + usage(); + exit(0); + case 'V': + print_version(); + exit(0); +#ifndef DBUG_OFF + case '#': + DBUG_SET_INITIAL(argument ? argument : default_dbug_option); + break; +#endif + } + return 0; +} + + +static void get_options(int *argc,char ***argv) +{ + int ho_error; + + if ((ho_error=handle_options(argc, argv, my_long_options, get_one_option))) + exit(ho_error); + + if (opt_file == NULL) + { + usage(); + exit(1); + } +} + + +/** + @brief maria_dump_log main function. +*/ + +int main(int argc, char **argv) +{ + char **default_argv; + uchar buffer[TRANSLOG_PAGE_SIZE]; + MY_INIT(argv[0]); + + load_defaults("my", load_default_groups, &argc, &argv); + default_argv= argv; + get_options(&argc, &argv); + + if (opt_unit) + translog_example_table_init(); + else + translog_table_init(); + translog_fill_overhead_table(); + + maria_data_root= (char *)"."; + + if ((handler= my_open(opt_file, O_RDONLY, MYF(MY_WME))) < 0) + { + fprintf(stderr, "Can't open file: '%s' errno: %d\n", + opt_file, my_errno); + goto err; + } + if (my_seek(handler, opt_offset, SEEK_SET, MYF(MY_WME)) != + opt_offset) + { + fprintf(stderr, "Can't set position %lld file: '%s' errno: %d\n", + opt_offset, opt_file, my_errno); + goto err; + } + for (; + opt_pages; + opt_offset+= TRANSLOG_PAGE_SIZE, opt_pages--) + { + if (my_pread(handler, buffer, TRANSLOG_PAGE_SIZE, opt_offset, + MYF(MY_NABP))) + { + if (my_errno == HA_ERR_FILE_TOO_SHORT) + goto end; + fprintf(stderr, "Can't read page at position %lld file: '%s' " + "errno: %d\n", opt_offset, opt_file, my_errno); + goto err; + } + printf("Page by offset %llu (0x%llx)\n", opt_offset, opt_offset); + dump_page(buffer, handler); + } + +end: + my_close(handler, MYF(0)); + free_defaults(default_argv); + exit(0); + return 0; /* No compiler warning */ + +err: + my_close(handler, MYF(0)); + fprintf(stderr, "%s: FAILED\n", my_progname_short); + free_defaults(default_argv); + exit(1); +} + +#include "ma_check_standalone.h" + diff --git a/storage/maria/maria_pack.c b/storage/maria/maria_pack.c index 351a2014059..4480dabbcad 100644 --- a/storage/maria/maria_pack.c +++ b/storage/maria/maria_pack.c @@ -683,6 +683,8 @@ static int compress(PACK_MRG_INFO *mrg,char *result_table) error|=my_close(new_file,MYF(MY_WME)); if (!result_table) { + (void) flush_pagecache_blocks(isam_file->s->pagecache, &isam_file->dfile, + FLUSH_RELEASE); error|=my_close(isam_file->dfile.file, MYF(MY_WME)); isam_file->dfile.file= -1; /* Tell maria_close file is closed */ isam_file->s->bitmap.file.file= -1; @@ -729,7 +731,7 @@ static int compress(PACK_MRG_INFO *mrg,char *result_table) my_delete(new_name,MYF(MY_WME)); } else - error=my_redel(org_name,new_name,MYF(MY_WME | MY_COPYTIME)); + error=my_redel(org_name, new_name, 0, MYF(MY_WME | MY_COPYTIME)); } if (! error) error=save_state(isam_file,mrg,new_length,glob_crc); @@ -756,13 +758,13 @@ static int compress(PACK_MRG_INFO *mrg,char *result_table) DBUG_RETURN(0); err: - end_pagecache(maria_pagecache, 1); free_counts_and_tree_and_queue(huff_trees,trees,huff_counts,fields); if (new_file >= 0) my_close(new_file,MYF(0)); if (join_maria_file >= 0) my_close(join_maria_file,MYF(0)); mrg_close(mrg); + end_pagecache(maria_pagecache, 1); fprintf(stderr, "Aborted: %s is not compressed\n", org_name); DBUG_RETURN(-1); } diff --git a/storage/maria/maria_read_log.c b/storage/maria/maria_read_log.c index 6f273b11ce8..74aa8bd9d11 100644 --- a/storage/maria/maria_read_log.c +++ b/storage/maria/maria_read_log.c @@ -32,7 +32,7 @@ const char *default_dbug_option= "d:t:o,/tmp/aria_read_log.trace"; static my_bool opt_display_only, opt_apply, opt_apply_undo, opt_silent; static my_bool opt_check; static const char *opt_tmpdir; -static ulong opt_page_buffer_size; +static ulong opt_page_buffer_size, opt_translog_buffer_size; static ulonglong opt_start_from_lsn, opt_end_lsn, opt_start_from_checkpoint; static MY_TMPDIR maria_chk_tmpdir; @@ -44,9 +44,9 @@ int main(int argc, char **argv) uint warnings_count; MY_INIT(argv[0]); + maria_data_root= (char *)"."; load_defaults("my", load_default_groups, &argc, &argv); default_argv= argv; - maria_data_root= (char *)"."; get_options(&argc, &argv); maria_in_recovery= TRUE; @@ -80,9 +80,8 @@ int main(int argc, char **argv) But if it finds a log and this log was crashed, it will create a new log, which is useless. TODO: start log handler in read-only mode. */ - if (init_pagecache(maria_log_pagecache, - TRANSLOG_PAGECACHE_SIZE, 0, 0, - TRANSLOG_PAGE_SIZE, MY_WME) == 0 || + if (init_pagecache(maria_log_pagecache, opt_translog_buffer_size, + 0, 0, TRANSLOG_PAGE_SIZE, MY_WME) == 0 || translog_init(maria_data_root, TRANSLOG_FILE_SIZE, 0, 0, maria_log_pagecache, TRANSLOG_DEFAULT_FLAGS, opt_display_only)) @@ -166,7 +165,7 @@ err: #include "ma_check_standalone.h" enum options_mc { - OPT_CHARSETS_DIR=256 + OPT_CHARSETS_DIR=256, OPT_FORCE_CRASH, OPT_TRANSLOG_BUFFER_SIZE }; static struct my_option my_long_options[] = @@ -186,20 +185,27 @@ static struct my_option my_long_options[] = #ifndef DBUG_OFF {"debug", '#', "Output debug log. Often the argument is 'd:t:o,filename'.", 0, 0, 0, GET_STR, OPT_ARG, 0, 0, 0, 0, 0, 0}, + {"force-crash", OPT_FORCE_CRASH, "Force crash after # recovery events", + &maria_recovery_force_crash_counter, 0,0, GET_ULONG, REQUIRED_ARG, + 0, 0, ~(long) 0, 0, 0, 0}, #endif {"help", '?', "Display this help and exit.", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, {"display-only", 'd', "display brief info read from records' header", &opt_display_only, &opt_display_only, 0, GET_BOOL, NO_ARG,0, 0, 0, 0, 0, 0}, - {"aria-log-dir-path", 'l', + { "end-lsn", 'e', "Stop applying at this lsn. If end-lsn is used, UNDO:s " + "will not be applied", &opt_end_lsn, &opt_end_lsn, + 0, GET_ULL, REQUIRED_ARG, 0, 0, ~(longlong) 0, 0, 0, 0 }, + {"aria-log-dir-path", 'h', "Path to the directory where to store transactional log", (uchar **) &maria_data_root, (uchar **) &maria_data_root, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, - { "page-buffer-size", 'P', "", + { "page-buffer-size", 'P', + "The size of the buffer used for index blocks for Aria tables", &opt_page_buffer_size, &opt_page_buffer_size, 0, GET_ULONG, REQUIRED_ARG, (long) USE_BUFFER_INIT, - (long) USE_BUFFER_INIT, (long) ~(ulong) 0, (long) MALLOC_OVERHEAD, + 1024L*1024L, (long) ~(ulong) 0, (long) MALLOC_OVERHEAD, (long) IO_SIZE, 0}, { "start-from-lsn", 'o', "Start reading log from this lsn", &opt_start_from_lsn, &opt_start_from_lsn, @@ -207,15 +213,12 @@ static struct my_option my_long_options[] = {"start-from-checkpoint", 'C', "Start applying from last checkpoint", &opt_start_from_checkpoint, &opt_start_from_checkpoint, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, - { "end-lsn", 'e', "Stop applying at this lsn. If end-lsn is used, UNDO:s " - "will not be applied", &opt_end_lsn, &opt_end_lsn, - 0, GET_ULL, REQUIRED_ARG, 0, 0, ~(longlong) 0, 0, 0, 0 }, {"silent", 's', "Print less information during apply/undo phase", &opt_silent, &opt_silent, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, - {"verbose", 'v', "Print more information during apply/undo phase", - &maria_recovery_verbose, &maria_recovery_verbose, 0, - GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"tables-to-redo", 'T', + "List of tables sepearated with , that we should apply REDO on. Use this if you only want to recover some tables", + 0, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, {"tmpdir", 't', "Path for temporary files. Multiple paths can be specified, " "separated by " #if defined( __WIN__) || defined(__NETWARE__) @@ -224,9 +227,18 @@ static struct my_option my_long_options[] = "colon (:)" #endif , (char**) &opt_tmpdir, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, + { "translog-buffer-size", OPT_TRANSLOG_BUFFER_SIZE, + "The size of the buffer used for transaction log for Aria tables", + &opt_translog_buffer_size, &opt_translog_buffer_size, 0, + GET_ULONG, REQUIRED_ARG, (long) TRANSLOG_PAGECACHE_SIZE, + 1024L*1024L, (long) ~(ulong) 0, (long) MALLOC_OVERHEAD, + (long) IO_SIZE, 0}, {"undo", 'u', "Apply UNDO records to tables. (disable with --disable-undo)", (uchar **) &opt_apply_undo, (uchar **) &opt_apply_undo, 0, GET_BOOL, NO_ARG, 1, 0, 0, 0, 0, 0}, + {"verbose", 'v', "Print more information during apply/undo phase", + &maria_recovery_verbose, &maria_recovery_verbose, 0, + GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, {"version", 'V', "Print version and exit.", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, { 0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0} @@ -243,7 +255,7 @@ static void print_version(void) static void usage(void) { print_version(); - puts("Copyright (C) 2007 MySQL AB"); + puts("Copyright (C) 2007 MySQL AB, 2009-2011 Monty Program Ab"); puts("This software comes with ABSOLUTELY NO WARRANTY. This is free software,"); puts("and you are welcome to modify and redistribute it under the GPL license\n"); @@ -263,10 +275,18 @@ static void usage(void) } +static uchar* my_hash_get_string(const uchar *record, size_t *length, + my_bool first __attribute__ ((unused))) +{ + *length= (size_t) (strcend((const char*) record,',')- (const char*) record); + return (uchar*) record; +} + + static my_bool get_one_option(int optid __attribute__((unused)), const struct my_option *opt __attribute__((unused)), - char *argument __attribute__((unused))) + char *argument) { switch (optid) { case '?': @@ -275,6 +295,23 @@ get_one_option(int optid __attribute__((unused)), case 'V': print_version(); exit(0); + case 'T': + { + char *pos; + if (!my_hash_inited(&tables_to_redo)) + { + my_hash_init2(&tables_to_redo, 16, &my_charset_bin, + 16, 0, 0, my_hash_get_string, 0, HASH_UNIQUE); + } + do + { + pos= strcend(argument, ','); + if (pos != argument) /* Skip empty strings */ + my_hash_insert(&tables_to_redo, (uchar*) argument); + argument= pos+1; + } while (*(pos++)); + break; + } #ifndef DBUG_OFF case '#': DBUG_SET_INITIAL(argument ? argument : default_dbug_option); @@ -287,6 +324,7 @@ get_one_option(int optid __attribute__((unused)), static void get_options(int *argc,char ***argv) { int ho_error; + my_bool need_help= 0; if ((ho_error=handle_options(argc, argv, my_long_options, get_one_option))) exit(ho_error); @@ -294,8 +332,23 @@ static void get_options(int *argc,char ***argv) if (!opt_apply) opt_apply_undo= FALSE; - if (((opt_display_only + opt_apply) != 1) || (*argc > 0)) + if (*argc > 0) + { + need_help= 1; + fprintf(stderr, "Too many arguments given\n"); + } + if ((opt_display_only + opt_apply) != 1) + { + need_help= 1; + fprintf(stderr, + "You must use one and only one of the options 'display-only' or " + "'apply'\n"); + } + + if (need_help) { + fflush(stderr); + need_help =1; usage(); exit(1); } diff --git a/storage/maria/tablockman.c b/storage/maria/tablockman.c index e4fede54425..6b538381329 100644 --- a/storage/maria/tablockman.c +++ b/storage/maria/tablockman.c @@ -445,7 +445,7 @@ tablockman_getlock(TABLOCKMAN *lm, TABLE_LOCK_OWNER *lo, mysql_mutex_unlock(& table->mutex); /* now really wait */ - i= pthread_cond_timedwait(wait_for->cond, wait_for->mutex, & timeout); + i= mysql_cond_timedwait(wait_for->cond, wait_for->mutex, & timeout); mysql_mutex_unlock(wait_for->mutex); @@ -543,7 +543,7 @@ void tablockman_release_locks(TABLOCKMAN *lm, TABLE_LOCK_OWNER *lo) lock_compatibility_matrix[lock->next->lock_type][lock->lock_type]) { mysql_mutex_lock(lo->waiting_for->mutex); - pthread_cond_broadcast(lo->waiting_for->cond); + mysql_cond_broadcast(lo->waiting_for->cond); mysql_mutex_unlock(lo->waiting_for->mutex); } lo->waiting_for= 0; @@ -589,7 +589,7 @@ void tablockman_release_locks(TABLOCKMAN *lm, TABLE_LOCK_OWNER *lo) in case somebody's waiting for it */ mysql_mutex_lock(lo->mutex); - pthread_cond_broadcast(lo->cond); + mysql_cond_broadcast(lo->cond); mysql_mutex_unlock(lo->mutex); /* and push all freed locks to the lockman's pool */ @@ -605,7 +605,7 @@ void tablockman_init(TABLOCKMAN *lm, loid_to_tlo_func *func, uint timeout) lm->loid_to_tlo= func; lm->lock_timeout= timeout; mysql_mutex_init(& lm->pool_mutex, MY_MUTEX_INIT_FAST); - my_getsystime(); /* ensure that my_getsystime() is initialized */ + my_interval_timer(); /* ensure that my_interval_timer() is initialized */ } void tablockman_destroy(TABLOCKMAN *lm) diff --git a/storage/maria/unittest/CMakeLists.txt b/storage/maria/unittest/CMakeLists.txt index 1d63bed8e8e..8a83a589706 100644 --- a/storage/maria/unittest/CMakeLists.txt +++ b/storage/maria/unittest/CMakeLists.txt @@ -13,6 +13,7 @@ # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/include ${CMAKE_SOURCE_DIR}/zlib ${CMAKE_SOURCE_DIR}/unittest/mytap) LINK_LIBRARIES(aria myisam mytap mysys ${DBUG_LIBRARY} strings ${ZLIB_LIBRARY}) diff --git a/storage/maria/unittest/ma_control_file-t.c b/storage/maria/unittest/ma_control_file-t.c index aad1a6978b2..8533e461361 100644 --- a/storage/maria/unittest/ma_control_file-t.c +++ b/storage/maria/unittest/ma_control_file-t.c @@ -117,6 +117,26 @@ static CONTROL_FILE_ERROR local_ma_control_file_open(void) return error; } +static char *create_tmpdir(const char *progname) +{ + static char test_dirname[FN_REFLEN]; + char tmp_name[FN_REFLEN]; + uint length; + + /* Create a temporary directory of name TMP-'executable', but without the -t extension */ + fn_format(tmp_name, progname, "", "", MY_REPLACE_DIR | MY_REPLACE_EXT); + length= strlen(tmp_name); + if (length > 2 && tmp_name[length-2] == '-' && tmp_name[length-1] == 't') + tmp_name[length-2]= 0; + strxmov(test_dirname, "TMP-", tmp_name, NullS); + + /* + Don't give an error if we can't create dir, as it may already exist from a previously aborted + run + */ + (void) my_mkdir(test_dirname, 0777, MYF(0)); + return test_dirname; +} int main(int argc,char *argv[]) @@ -124,11 +144,12 @@ int main(int argc,char *argv[]) MY_INIT(argv[0]); my_init(); - maria_data_root= (char *)"."; default_error_handler_hook= error_handler_hook; plan(12); + maria_data_root= create_tmpdir(argv[0]); + diag("Unit tests for control file"); get_options(argc,argv); @@ -155,6 +176,9 @@ int main(int argc,char *argv[]) ok(0 == test_bad_blocksize(), "test of bad blocksize"); ok(0 == test_bad_size(), "test of too small/big file"); + delete_file(0); + rmdir(maria_data_root); + return exit_status(); } diff --git a/storage/maria/unittest/ma_loghandler_examples.c b/storage/maria/unittest/ma_loghandler_examples.c index 0c11a3b9a8e..cd5d927587a 100644 --- a/storage/maria/unittest/ma_loghandler_examples.c +++ b/storage/maria/unittest/ma_loghandler_examples.c @@ -59,6 +59,9 @@ void translog_example_table_init() i < LOGREC_NUMBER_OF_TYPES; i++) log_record_type_descriptor[i].rclass= LOGRECTYPE_NOT_ALLOWED; +#ifndef DBUG_OFF + check_translog_description_table(LOGREC_VARIABLE_RECORD_2LSN_EXAMPLE); +#endif } diff --git a/storage/maria/unittest/ma_maria_log_cleanup.c b/storage/maria/unittest/ma_maria_log_cleanup.c index f85c75b1a88..5e84acf41af 100644 --- a/storage/maria/unittest/ma_maria_log_cleanup.c +++ b/storage/maria/unittest/ma_maria_log_cleanup.c @@ -16,7 +16,7 @@ #include "../maria_def.h" #include <my_dir.h> -my_bool maria_log_remove() +my_bool maria_log_remove(const char *testdir) { MY_DIR *dirp; uint i; @@ -59,6 +59,28 @@ my_bool maria_log_remove() } } my_dirend(dirp); + if (testdir) + rmdir(testdir); return 0; } +char *create_tmpdir(const char *progname) +{ + static char test_dirname[FN_REFLEN]; + char tmp_name[FN_REFLEN]; + uint length; + + /* Create a temporary directory of name TMP-'executable', but without the -t extension */ + fn_format(tmp_name, progname, "", "", MY_REPLACE_DIR | MY_REPLACE_EXT); + length= strlen(tmp_name); + if (length > 2 && tmp_name[length-2] == '-' && tmp_name[length-1] == 't') + tmp_name[length-2]= 0; + strxmov(test_dirname, "TMP-", tmp_name, NullS); + + /* + Don't give an error if we can't create dir, as it may already exist from a previously aborted + run + */ + (void) my_mkdir(test_dirname, 0777, MYF(0)); + return test_dirname; +} diff --git a/storage/maria/unittest/ma_pagecache_consist.c b/storage/maria/unittest/ma_pagecache_consist.c index d9b814e92f1..60c196fddbf 100644 --- a/storage/maria/unittest/ma_pagecache_consist.c +++ b/storage/maria/unittest/ma_pagecache_consist.c @@ -30,7 +30,8 @@ static const char* default_dbug_option; #endif -static char *file1_name= (char*)"page_cache_test_file_1"; +static const char *base_file1_name= "page_cache_test_file_1"; +static char file1_name[FN_REFLEN]; static PAGECACHE_FILE file1; static pthread_cond_t COND_thread_count; static pthread_mutex_t LOCK_thread_count; @@ -330,6 +331,27 @@ static void *test_thread_writer(void *arg) return 0; } +static char *create_tmpdir(const char *progname) +{ + static char test_dirname[FN_REFLEN]; + char tmp_name[FN_REFLEN]; + uint length; + + /* Create a temporary directory of name TMP-'executable', but without the -t extension */ + fn_format(tmp_name, progname, "", "", MY_REPLACE_DIR | MY_REPLACE_EXT); + length= strlen(tmp_name); + if (length > 2 && tmp_name[length-2] == '-' && tmp_name[length-1] == 't') + tmp_name[length-2]= 0; + strxmov(test_dirname, "TMP-", tmp_name, NullS); + + /* + Don't give an error if we can't create dir, as it may already exist from a previously aborted + run + */ + (void) my_mkdir(test_dirname, 0777, MYF(0)); + return test_dirname; +} + int main(int argc __attribute__((unused)), char **argv __attribute__((unused))) @@ -337,7 +359,6 @@ int main(int argc __attribute__((unused)), pthread_t tid; pthread_attr_t thr_attr; int *param, error, pagen; - MY_INIT(argv[0]); #ifndef DBUG_OFF @@ -357,9 +378,13 @@ int main(int argc __attribute__((unused)), DBUG_ENTER("main"); DBUG_PRINT("info", ("Main thread: %s\n", my_thread_name())); plan(number_of_writers + number_of_readers); + SKIP_BIG_TESTS(number_of_writers + number_of_readers) { + char *test_dirname= create_tmpdir(argv[0]); + fn_format(file1_name, base_file1_name, test_dirname, "", MYF(0)); + if ((file1.file= my_open(file1_name, O_CREAT | O_TRUNC | O_RDWR, MYF(0))) == -1) { @@ -476,6 +501,7 @@ int main(int argc __attribute__((unused)), pthread_mutex_unlock(&LOCK_thread_count); DBUG_PRINT("info", ("thread ended")); + flush_pagecache_blocks(&pagecache, &file1, FLUSH_IGNORE_CHANGED); end_pagecache(&pagecache, 1); DBUG_PRINT("info", ("Page cache ended")); @@ -490,6 +516,7 @@ int main(int argc __attribute__((unused)), DBUG_PRINT("info", ("file1 (%d) closed", file1.file)); DBUG_PRINT("info", ("Program end")); + rmdir(test_dirname); } /* SKIP_BIG_TESTS */ my_end(0); diff --git a/storage/maria/unittest/ma_pagecache_rwconsist.c b/storage/maria/unittest/ma_pagecache_rwconsist.c index 88ecbe864e8..4ade9c536ed 100644 --- a/storage/maria/unittest/ma_pagecache_rwconsist.c +++ b/storage/maria/unittest/ma_pagecache_rwconsist.c @@ -33,7 +33,8 @@ static const char* default_dbug_option; #define SLEEP my_sleep(5) -static char *file1_name= (char*)"page_cache_test_file_1"; +static const char *base_file1_name= "page_cache_test_file_1"; +static char file1_name[FN_REFLEN]; static PAGECACHE_FILE file1; static pthread_cond_t COND_thread_count; static pthread_mutex_t LOCK_thread_count; @@ -200,6 +201,27 @@ static void *test_thread_writer(void *arg) return 0; } +char *create_tmpdir(const char *progname) +{ + static char test_dirname[FN_REFLEN]; + char tmp_name[FN_REFLEN]; + uint length; + + /* Create a temporary directory of name TMP-'executable', but without the -t extension */ + fn_format(tmp_name, progname, "", "", MY_REPLACE_DIR | MY_REPLACE_EXT); + length= strlen(tmp_name); + if (length > 2 && tmp_name[length-2] == '-' && tmp_name[length-1] == 't') + tmp_name[length-2]= 0; + strxmov(test_dirname, "TMP-", tmp_name, NullS); + + /* + Don't give an error if we can't create dir, as it may already exist from a previously aborted + run + */ + (void) my_mkdir(test_dirname, 0777, MYF(0)); + return test_dirname; +} + int main(int argc __attribute__((unused)), char **argv __attribute__((unused))) @@ -230,6 +252,9 @@ int main(int argc __attribute__((unused)), SKIP_BIG_TESTS(number_of_writers + number_of_readers) { + char *test_dirname= create_tmpdir(argv[0]); + fn_format(file1_name, base_file1_name, test_dirname, "", MYF(0)); + if ((file1.file= my_open(file1_name, O_CREAT | O_TRUNC | O_RDWR, MYF(0))) == -1) { @@ -341,6 +366,7 @@ int main(int argc __attribute__((unused)), pthread_mutex_unlock(&LOCK_thread_count); DBUG_PRINT("info", ("thread ended")); + flush_pagecache_blocks(&pagecache, &file1, FLUSH_IGNORE_CHANGED); end_pagecache(&pagecache, 1); DBUG_PRINT("info", ("Page cache ended")); @@ -354,6 +380,8 @@ int main(int argc __attribute__((unused)), DBUG_PRINT("info", ("file1 (%d) closed", file1.file)); DBUG_PRINT("info", ("Program end")); + + rmdir(test_dirname); } /* SKIP_BIG_TESTS */ my_end(0); diff --git a/storage/maria/unittest/ma_pagecache_rwconsist2.c b/storage/maria/unittest/ma_pagecache_rwconsist2.c index e63d45ceb3a..a5c50bc15da 100644 --- a/storage/maria/unittest/ma_pagecache_rwconsist2.c +++ b/storage/maria/unittest/ma_pagecache_rwconsist2.c @@ -40,7 +40,8 @@ static const char* default_dbug_option; #define SLEEP my_sleep(5) -static char *file1_name= (char*)"page_cache_test_file_1"; +static const char *base_file1_name= "page_cache_test_file_1"; +static char file1_name[FN_REFLEN]; static PAGECACHE_FILE file1; static pthread_cond_t COND_thread_count; static pthread_mutex_t LOCK_thread_count; @@ -196,6 +197,27 @@ static void *test_thread_writer(void *arg) return 0; } +static char *create_tmpdir(const char *progname) +{ + static char test_dirname[FN_REFLEN]; + char tmp_name[FN_REFLEN]; + uint length; + + /* Create a temporary directory of name TMP-'executable', but without the -t extension */ + fn_format(tmp_name, progname, "", "", MY_REPLACE_DIR | MY_REPLACE_EXT); + length= strlen(tmp_name); + if (length > 2 && tmp_name[length-2] == '-' && tmp_name[length-1] == 't') + tmp_name[length-2]= 0; + strxmov(test_dirname, "TMP-", tmp_name, NullS); + + /* + Don't give an error if we can't create dir, as it may already exist from a previously aborted + run + */ + (void) my_mkdir(test_dirname, 0777, MYF(0)); + return test_dirname; +} + int main(int argc __attribute__((unused)), char **argv __attribute__((unused))) @@ -226,6 +248,9 @@ int main(int argc __attribute__((unused)), SKIP_BIG_TESTS(number_of_writers + number_of_readers) { + char *test_dirname= create_tmpdir(argv[0]); + fn_format(file1_name, base_file1_name, test_dirname, "", MYF(0)); + if ((file1.file= my_open(file1_name, O_CREAT | O_TRUNC | O_RDWR, MYF(0))) == -1) { @@ -350,6 +375,8 @@ int main(int argc __attribute__((unused)), DBUG_PRINT("info", ("file1 (%d) closed", file1.file)); DBUG_PRINT("info", ("Program end")); + + rmdir(test_dirname); } /* SKIP_BIG_TESTS */ my_end(0); diff --git a/storage/maria/unittest/ma_pagecache_single.c b/storage/maria/unittest/ma_pagecache_single.c index 3291346a8b5..bb39b20ce59 100644 --- a/storage/maria/unittest/ma_pagecache_single.c +++ b/storage/maria/unittest/ma_pagecache_single.c @@ -35,8 +35,9 @@ static const char* default_dbug_option; #define SKIP_BIG_TESTS(X) /* no-op */ #endif -static char *file1_name= (char*)"page_cache_test_file_1"; -static char *file2_name= (char*)"page_cache_test_file_2"; +static const char *base_file1_name= "page_cache_test_file_1"; +static const char *base_file2_name= "page_cache_test_file_2"; +static char file1_name[FN_REFLEN], file2_name[FN_REFLEN]; static PAGECACHE_FILE file1; static pthread_cond_t COND_thread_count; static pthread_mutex_t LOCK_thread_count; @@ -720,6 +721,28 @@ static void *test_thread(void *arg) } +static char *create_tmpdir(const char *progname) +{ + static char test_dirname[FN_REFLEN]; + char tmp_name[FN_REFLEN]; + uint length; + + /* Create a temporary directory of name TMP-'executable', but without the -t extension */ + fn_format(tmp_name, progname, "", "", MY_REPLACE_DIR | MY_REPLACE_EXT); + length= strlen(tmp_name); + if (length > 2 && tmp_name[length-2] == '-' && tmp_name[length-1] == 't') + tmp_name[length-2]= 0; + strxmov(test_dirname, "TMP-", tmp_name, NullS); + + /* + Don't give an error if we can't create dir, as it may already exist from a previously aborted + run + */ + (void) my_mkdir(test_dirname, 0777, MYF(0)); + return test_dirname; +} + + int main(int argc __attribute__((unused)), char **argv __attribute__((unused))) { @@ -748,6 +771,9 @@ int main(int argc __attribute__((unused)), plan(18); SKIP_BIG_TESTS(18) { + char *test_dirname= create_tmpdir(argv[0]); + fn_format(file1_name, base_file1_name, test_dirname, "", MYF(0)); + fn_format(file2_name, base_file2_name, test_dirname, "", MYF(0)); if ((tmp_file= my_open(file2_name, O_CREAT | O_TRUNC | O_RDWR, MYF(MY_WME))) < 0) @@ -841,13 +867,13 @@ int main(int argc __attribute__((unused)), exit(1); my_delete(file1_name, MYF(0)); + rmdir(test_dirname); } /* SKIP_BIG_TESTS */ DBUG_PRINT("info", ("file1 (%d) closed", file1.file)); DBUG_PRINT("info", ("Program end")); my_end(0); - } return exit_status(); } diff --git a/storage/maria/unittest/ma_test_all-t b/storage/maria/unittest/ma_test_all-t index 0b11daf7f98..e66d269ab93 100755 --- a/storage/maria/unittest/ma_test_all-t +++ b/storage/maria/unittest/ma_test_all-t @@ -8,7 +8,7 @@ use File::Basename; $|= 1; $^W = 1; # warnings, because env cannot parse 'perl -w' -$VER= "1.4"; +$VER= "1.5"; $opt_version= 0; $opt_help= 0; @@ -28,7 +28,10 @@ my $NEW_TEST= 0; # Test group separator in an array of tests my $test_begin= 0; my $test_end= 0; my $test_counter= 0; - +my $using_internal_tmpdir= 0; +my $full_tmpdir; +my $tmpdir="tmp"; +my $exec_dir="TMP-ma_test_all"; # Run test in this directory run_tests(); #### @@ -46,6 +49,7 @@ sub run_tests "abort-on-error" => \$opt_abort_on_error, "valgrind=s" => \$opt_valgrind, "silent=s" => \$opt_silent, + "tmpdir=s" => \$full_tmpdir, "number-of-tests" => \$opt_number_of_tests, "run-tests=s" => \$opt_run_tests, "start-from=s" => \$opt_run_tests)) @@ -57,7 +61,14 @@ sub run_tests print "$my_progname version $VER\n"; exit(0); } - $maria_path= dirname($0) . "/.."; + + if (! -d $exec_dir) + { + die if (!mkdir("$exec_dir")); + } + chdir($exec_dir); + + $maria_path= "../" . dirname($0) . "/.."; my $suffix= ( $^O =~ /win/i && $^O !~ /darwin/i ) ? ".exe" : ""; $maria_exe_path= "$maria_path/release"; @@ -73,14 +84,28 @@ sub run_tests $maria_exe_path= $maria_path; if ( ! -f "$maria_exe_path/ma_test1$suffix" ) { - die("Cannot find ma_test1 executable\n"); + die("Cannot find ma_test1 executable in $maria_path\n"); } } } - } + } usage() if ($opt_help || $flag_exit); + if (defined($full_tmpdir)) + { + $tmpdir= $full_tmpdir; + } + else + { + $full_tmpdir= $tmpdir; + $using_internal_tmpdir= 1; + if (! -d "$full_tmpdir") + { + die if (!mkdir("$full_tmpdir")); + } + } + # # IMPORTANT: If you modify this file, please read this: # @@ -146,7 +171,7 @@ sub run_tests # clean-up # - unlink <*.TMD aria_log*>; # Delete temporary files + unlink_all_possible_tmp_files(); # # Run tests @@ -210,6 +235,14 @@ sub run_tests run_ma_test_recovery($opt_verbose, 0); run_tests_on_clrs($suffix, $opt_verbose, 0); + unlink_all_possible_tmp_files(); + if ($using_internal_tmpdir) + { + rmdir($tmpdir); + } + rmdir($exec_dir); + chdir(".."); + rmdir($exec_dir); exit($runtime_error); } @@ -250,6 +283,7 @@ sub run_check_tests ["-p -B --key_length=480","-sm"], ["--checksum --unique","-se"], ["--unique","-se"], + ["--rows-no-data", "-s"], ["--key_multiple -N -S","-sm"], ["--key_multiple -a -p --key_length=480","-sm"], ["--key_multiple -a -B --key_length=480","-sm"], @@ -280,38 +314,38 @@ sub run_check_tests for ($i= 0; defined($ma_test1_opt[$i]); $i++) { - unlink <aria_log_control aria_log.*>; - ok("$maria_exe_path/ma_test1$suffix $silent $ma_test1_opt[$i][0] $row_type", + unlink_log_files(); + ok("$maria_exe_path/ma_test1$suffix $silent -h$tmpdir $ma_test1_opt[$i][0] $row_type", $verbose, $i + 1); - ok("$maria_exe_path/aria_chk$suffix $ma_test1_opt[$i][1] test1", + ok("$maria_exe_path/aria_chk$suffix -h$tmpdir $ma_test1_opt[$i][1] $tmpdir/test1", $verbose, $i + 1); } # # These tests are outside the loops. Make sure to include them in # nr_tests manually # - ok("$maria_exe_path/aria_pack$suffix --force -s test1", $verbose, 0); - ok("$maria_exe_path/aria_chk$suffix -ess test1", $verbose, 0); + ok("$maria_exe_path/aria_pack$suffix --force -s $tmpdir/test1", $verbose, 0); + ok("$maria_exe_path/aria_chk$suffix -ess $tmpdir/test1", $verbose, 0); for ($i= 0; defined($ma_test2_opt[$i]); $i++) { - unlink <aria_log_control aria_log.*>; - ok("$maria_exe_path/ma_test2$suffix $silent $ma_test2_opt[$i][0] $row_type", + unlink_log_files(); + ok("$maria_exe_path/ma_test2$suffix $silent -h$tmpdir $ma_test2_opt[$i][0] $row_type", $verbose, $i + 1); - ok("$maria_exe_path/aria_chk$suffix $ma_test2_opt[$i][1] test2", + ok("$maria_exe_path/aria_chk$suffix -h$tmpdir $ma_test2_opt[$i][1] $tmpdir/test2", $verbose, $i + 1); } for ($i= 0; defined($ma_rt_test_opt[$i]); $i++) { - unlink <aria_log_control aria_log.*>; - ok("$maria_exe_path/ma_rt_test$suffix $silent $ma_rt_test_opt[$i][0] $row_type", + unlink_log_files(); + ok("$maria_exe_path/ma_rt_test$suffix $silent -h$tmpdir $ma_rt_test_opt[$i][0] $row_type", $verbose, $i + 1); - ok("$maria_exe_path/aria_chk$suffix $ma_rt_test_opt[$i][1] rt_test", + ok("$maria_exe_path/aria_chk$suffix -h$tmpdir $ma_rt_test_opt[$i][1] $tmpdir/rt_test", $verbose, $i + 1); } - unlink <aria_log_control aria_log.*>; + unlink_log_files(); return 0; } @@ -412,13 +446,16 @@ sub run_pack_tests() "cp test1.MAD test2.MAD", "cp test1.MAI test2.MAI", "$maria_exe_path/aria_pack$suffix --force -s --join=test3 test1 test2", - "$maria_exe_path/aria_chk -s test3", - "$maria_exe_path/aria_chk -s --safe-recover test3", - "$maria_exe_path/aria_chk -s test3" ); - return &count_tests(\@t) if ($count); + return (&count_tests(\@t) + 3) if ($count); &run_test_bunch(\@t, $verbose, 0); + + ok("$maria_exe_path/aria_chk -s test3", $verbose, 0, 1); + @t= ("$maria_exe_path/aria_chk -s --safe-recover test3", + "$maria_exe_path/aria_chk -s test3"); + &run_test_bunch(\@t, $verbose, 0); + return 0; } @@ -433,25 +470,25 @@ sub run_tests_on_warnings_and_errors return 9 if ($count); # Number of tests in this function, e.g. calls to ok() - ok("$maria_exe_path/ma_test2$suffix $silent -L -K -W -P -S -R1 -m500", + ok("$maria_exe_path/ma_test2$suffix -h$tmpdir $silent -L -K -W -P -S -R1 -m500", $verbose, 0); - ok("$maria_exe_path/aria_chk$suffix -sm test2", $verbose, 0); + ok("$maria_exe_path/aria_chk$suffix -h$tmpdir -sm $tmpdir/test2", $verbose, 0); # ma_test2$suffix $silent -L -K -R1 -m2000 ; Should give error 135\n # In the following a failure is a success and success is a failure - $com= "$maria_exe_path/ma_test2$suffix $silent -L -K -R1 -m2000 "; + $com= "$maria_exe_path/ma_test2$suffix -h$tmpdir $silent -L -K -R1 -m2000 "; $com.= ">ma_test2_message.txt 2>&1"; ok($com, $verbose, 0, 1); ok("cat ma_test2_message.txt", $verbose, 0); ok("grep \"Error: 135\" ma_test2_message.txt > /dev/null", $verbose, 0); - # maria_exe_path/aria_chk$suffix -sm test2 will warn that + # maria_exe_path/aria_chk$suffix -h$tmpdir -sm $tmpdir/test2 will warn that # Datafile is almost full - ok("$maria_exe_path/aria_chk$suffix -sm test2 >ma_test2_message.txt 2>&1", - $verbose, 0); + ok("$maria_exe_path/aria_chk$suffix -h$tmpdir -sm $tmpdir/test2 >ma_test2_message.txt 2>&1", + $verbose, 0, 1); ok("cat ma_test2_message.txt", $verbose, 0); ok("grep \"warning: Datafile is almost full\" ma_test2_message.txt>/dev/null", $verbose, 0); unlink <ma_test2_message.txt>; - ok("$maria_exe_path/aria_chk$suffix -ssm test2", $verbose, 0); + ok("$maria_exe_path/aria_chk$suffix -h$tmpdir -ssm $tmpdir/test2", $verbose, 0); return 0; } @@ -479,33 +516,33 @@ sub run_tests_on_clrs my ($i); my @t= ($NEW_TEST, - "$maria_exe_path/ma_test2$suffix -s -L -K -W -P -M -T -c -b -t2 -A1", - "cp aria_log_control tmp", - "$maria_exe_path/aria_read_log$suffix -a -s", - "$maria_exe_path/aria_chk$suffix -s -e test2", - "cp tmp/aria_log_control .", - "rm test2.MA?", - "$maria_exe_path/aria_read_log$suffix -a -s", - "$maria_exe_path/aria_chk$suffix -s -e test2", - "rm test2.MA?", + "$maria_exe_path/ma_test2$suffix -h$tmpdir -s -L -K -W -P -M -T -c -b -t2 -A1", + "cp $tmpdir/aria_log_control $tmpdir/aria_log_control.backup", + "$maria_exe_path/aria_read_log$suffix -a -s -h$tmpdir", + "$maria_exe_path/aria_chk$suffix -h$tmpdir -s -e $tmpdir/test2", + "mv $tmpdir/aria_log_control.backup $tmpdir/aria_log_control", + "rm $tmpdir/test2.MA?", + "$maria_exe_path/aria_read_log$suffix -a -s -h$tmpdir", + "$maria_exe_path/aria_chk$suffix -h$tmpdir -s -e $tmpdir/test2", + "rm $tmpdir/test2.MA?", $NEW_TEST, - "$maria_exe_path/ma_test2$suffix -s -L -K -W -P -M -T -c -b -t2 -A1", - "$maria_exe_path/aria_read_log$suffix -a -s", - "$maria_exe_path/aria_chk$suffix -s -e test2", - "rm test2.MA?", - "$maria_exe_path/aria_read_log$suffix -a -s", - "$maria_exe_path/aria_chk$suffix -e -s test2", - "rm test2.MA?", + "$maria_exe_path/ma_test2$suffix -h$tmpdir -s -L -K -W -P -M -T -c -b -t2 -A1", + "$maria_exe_path/aria_read_log$suffix -a -s -h$tmpdir ", + "$maria_exe_path/aria_chk$suffix -h$tmpdir -s -e $tmpdir/test2", + "rm $tmpdir/test2.MA?", + "$maria_exe_path/aria_read_log$suffix -a -s -h$tmpdir", + "$maria_exe_path/aria_chk$suffix -h$tmpdir -e -s $tmpdir/test2", + "rm $tmpdir/test2.MA?", $NEW_TEST, - "$maria_exe_path/ma_test2$suffix -s -L -K -W -P -M -T -c -b32768 -t4 -A1", - "$maria_exe_path/aria_read_log$suffix -a -s", - "$maria_exe_path/aria_chk$suffix -es test2", - "$maria_exe_path/aria_read_log$suffix -a -s", - "$maria_exe_path/aria_chk$suffix -es test2", - "rm test2.MA?", - "$maria_exe_path/aria_read_log$suffix -a -s", - "$maria_exe_path/aria_chk$suffix -es test2", - "rm test2.MA?" + "$maria_exe_path/ma_test2$suffix -h$tmpdir -s -L -K -W -P -M -T -c -b32768 -t4 -A1", + "$maria_exe_path/aria_read_log$suffix -a -s -h$tmpdir", + "$maria_exe_path/aria_chk$suffix -h$tmpdir -es $tmpdir/test2", + "$maria_exe_path/aria_read_log$suffix -a -s -h$tmpdir ", + "$maria_exe_path/aria_chk$suffix -h$tmpdir -es $tmpdir/test2", + "rm $tmpdir/test2.MA?", + "$maria_exe_path/aria_read_log$suffix -a -s -h$tmpdir", + "$maria_exe_path/aria_chk$suffix -h$tmpdir -es $tmpdir/test2", + "rm $tmpdir/test2.MA?" ); return &count_tests(\@t) if ($count); @@ -533,7 +570,7 @@ sub run_tests_on_clrs sub ok { my ($com, $verbose, $iteration, $expected_error)= @_; - my ($msg, $output, $err, $len); + my ($msg, $output, $err, $errcode, $len); $test_counter++; if ($test_begin > $test_counter) @@ -550,17 +587,22 @@ sub ok if ($verbose) { - print "$com "; + # Print command with out the long unittest/../ prefix + my $tmp; + $tmp= $com; + $tmp =~ s|^unittest/../||; + print "$tmp "; + $len= length($tmp); } $output= `$com 2>&1`; - $len= length($com); if ($verbose) { print " " x (62 - $len); } $err= $?; + $errcode= ($? >> 8); if ((!$err && !$expected_error) || - (($err >> 8) == $expected_error && $expected_error)) + ($errcode == $expected_error && $expected_error)) { print "[ " if ($verbose); print "ok"; @@ -597,7 +639,7 @@ sub ok } $msg.= "at line "; $msg.= (caller)[2]; - $msg.= "\n(errcode: $err, test: $test_counter)\n"; + $msg.= "\n(errcode: $errcode, test: $test_counter)\n"; if ($expected_error) { $msg.= "Was expecting errcode: $expected_error\n"; @@ -650,6 +692,19 @@ sub count_tests return $nr_tests; } +sub unlink_log_files +{ + unlink "$full_tmpdir/aria_log_control", "$full_tmpdir/aria_log.00000001", "$full_tmpdir/aria_log.00000002"; +} + +sub unlink_all_possible_tmp_files() +{ + unlink_log_files(); + + # Unlink tmp files that may have been created when testing the test programs + unlink <$full_tmpdir/*.TMD $full_tmpdir/aria_read_log_test1.txt $full_tmpdir/test1*.MA? $full_tmpdir/ma_test_recovery.output aria_log_control aria_log.00000001 aria_log.00000002 aria_logtest1.MA? test1.MA? test2.MA? test3.MA?>; +} + #### #### Run a bunch of tests #### Arguments: $t: an array of the tests @@ -666,7 +721,7 @@ sub run_test_bunch { if ($clear && @$t[$i] eq $NEW_TEST) { - unlink <aria_log.* aria_log_control>; + unlink_log_files(); } if (@$t[$i] ne $NEW_TEST) { @@ -699,6 +754,7 @@ Options might depend on previous ones. --start-from=... Alias for --run-tests --silent=... Silent option passed to ma_test* tests ('$opt_silent') +--tmpdir=... Store tests data in this directory (works for most tests) --valgrind=... Options for valgrind. ('$opt_valgrind') --verbose Be more verbose. Will print each unittest on a line diff --git a/storage/maria/unittest/ma_test_loghandler-t.c b/storage/maria/unittest/ma_test_loghandler-t.c index 2de06914412..ccaa6f7dc8e 100644 --- a/storage/maria/unittest/ma_test_loghandler-t.c +++ b/storage/maria/unittest/ma_test_loghandler-t.c @@ -19,7 +19,8 @@ #include <tap.h> #include "../trnman.h" -extern my_bool maria_log_remove(); +extern my_bool maria_log_remove(const char *testdir); +extern char *create_tmpdir(const char *progname); extern void example_loghandler_init(); #ifndef DBUG_OFF @@ -161,7 +162,6 @@ int main(int argc __attribute__((unused)), char *argv[]) LEX_CUSTRING parts[TRANSLOG_INTERNAL_PARTS + 3]; struct st_translog_scanner_data scanner; int rc; - MY_INIT(argv[0]); if (my_set_max_open_files(100) < 100) @@ -170,10 +170,14 @@ int main(int argc __attribute__((unused)), char *argv[]) exit(1); } bzero(&pagecache, sizeof(pagecache)); - maria_data_root= (char *)"."; - if (maria_log_remove()) + + maria_data_root= create_tmpdir(argv[0]); + if (maria_log_remove(0)) exit(1); + /* We don't need to do physical syncs in this test */ + my_disable_sync= 1; + for (i= 0; i < (LONG_BUFFER_SIZE + LSN_STORE_SIZE * 2 + 2); i+= 2) { int2store(long_buffer + i, (i >> 1)); @@ -205,7 +209,7 @@ int main(int argc __attribute__((unused)), char *argv[]) fprintf(stderr, "Got error: init_pagecache() (errno: %d)\n", errno); exit(1); } - if (translog_init_with_table(".", LOG_FILE_SIZE, 50112, 0, &pagecache, + if (translog_init_with_table(maria_data_root, LOG_FILE_SIZE, 50112, 0, &pagecache, LOG_FLAGS, 0, &translog_example_table_init, 0)) { @@ -654,7 +658,7 @@ err: end_pagecache(&pagecache, 1); ma_control_file_end(); - if (maria_log_remove()) + if (maria_log_remove(maria_data_root)) exit(1); return(test(exit_status())); diff --git a/storage/maria/unittest/ma_test_loghandler_first_lsn-t.c b/storage/maria/unittest/ma_test_loghandler_first_lsn-t.c index e941d860adb..9ebd56c754c 100644 --- a/storage/maria/unittest/ma_test_loghandler_first_lsn-t.c +++ b/storage/maria/unittest/ma_test_loghandler_first_lsn-t.c @@ -19,7 +19,8 @@ #include <tap.h> #include "../trnman.h" -extern my_bool maria_log_remove(); +extern my_bool maria_log_remove(const char *testdir); +extern char *create_tmpdir(const char *progname); extern void translog_example_table_init(); #ifndef DBUG_OFF @@ -31,7 +32,6 @@ static const char *default_dbug_option; #define LOG_FILE_SIZE (1024L*1024L*1024L + 1024L*1024L*512) #define LOG_FLAGS 0 -static char *first_translog_file= (char*)"maria_log.00000001"; int main(int argc __attribute__((unused)), char *argv[]) { @@ -40,18 +40,18 @@ int main(int argc __attribute__((unused)), char *argv[]) PAGECACHE pagecache; LSN lsn, first_lsn, theor_lsn; LEX_CUSTRING parts[TRANSLOG_INTERNAL_PARTS + 1]; - MY_INIT(argv[0]); plan(2); bzero(&pagecache, sizeof(pagecache)); - maria_data_root= (char *)"."; - if (maria_log_remove()) + /* + Don't give an error if we can't create dir, as it may already exist from a previously aborted + run + */ + maria_data_root= create_tmpdir(argv[0]); + if (maria_log_remove(0)) exit(1); - /* be sure that we have no logs in the directory*/ - my_delete(CONTROL_FILE_BASE_NAME, MYF(0)); - my_delete(first_translog_file, MYF(0)); bzero(long_tr_id, 6); #ifndef DBUG_OFF @@ -78,9 +78,8 @@ int main(int argc __attribute__((unused)), char *argv[]) fprintf(stderr, "Got error: init_pagecache() (errno: %d)\n", errno); exit(1); } - if (translog_init_with_table(".", LOG_FILE_SIZE, 50112, 0, &pagecache, - LOG_FLAGS, 0, &translog_example_table_init, - 0)) + if (translog_init_with_table(maria_data_root, LOG_FILE_SIZE, 50112, 0, &pagecache, + LOG_FLAGS, 0, &translog_example_table_init, 0)) { fprintf(stderr, "Can't init loghandler (%d)\n", errno); exit(1); @@ -154,7 +153,7 @@ int main(int argc __attribute__((unused)), char *argv[]) translog_destroy(); end_pagecache(&pagecache, 1); ma_control_file_end(); - if (maria_log_remove()) + if (maria_log_remove(maria_data_root)) exit(1); exit(0); } diff --git a/storage/maria/unittest/ma_test_loghandler_max_lsn-t.c b/storage/maria/unittest/ma_test_loghandler_max_lsn-t.c index 924daac5f3c..4ae9def8598 100644 --- a/storage/maria/unittest/ma_test_loghandler_max_lsn-t.c +++ b/storage/maria/unittest/ma_test_loghandler_max_lsn-t.c @@ -19,7 +19,8 @@ #include <tap.h> #include "../trnman.h" -extern my_bool maria_log_remove(); +extern my_bool maria_log_remove(const char *testdir); +extern char *create_tmpdir(const char *progname); extern void translog_example_table_init(); #ifndef DBUG_OFF @@ -40,14 +41,14 @@ int main(int argc __attribute__((unused)), char *argv[]) PAGECACHE pagecache; LSN lsn, max_lsn, last_lsn= LSN_IMPOSSIBLE; LEX_CUSTRING parts[TRANSLOG_INTERNAL_PARTS + 1]; - MY_INIT(argv[0]); plan(2); bzero(&pagecache, sizeof(pagecache)); - maria_data_root= (char *)"."; - if (maria_log_remove()) + + maria_data_root= create_tmpdir(argv[0]); + if (maria_log_remove(0)) exit(1); bzero(long_tr_id, 6); @@ -75,7 +76,7 @@ int main(int argc __attribute__((unused)), char *argv[]) fprintf(stderr, "Got error: init_pagecache() (errno: %d)\n", errno); exit(1); } - if (translog_init_with_table(".", LOG_FILE_SIZE, 50112, 0, &pagecache, + if (translog_init_with_table(maria_data_root, LOG_FILE_SIZE, 50112, 0, &pagecache, LOG_FLAGS, 0, &translog_example_table_init, 0)) { @@ -150,7 +151,7 @@ int main(int argc __attribute__((unused)), char *argv[]) translog_destroy(); end_pagecache(&pagecache, 1); ma_control_file_end(); - if (maria_log_remove()) + if (maria_log_remove(maria_data_root)) exit(1); exit(0); } diff --git a/storage/maria/unittest/ma_test_loghandler_multigroup-t.c b/storage/maria/unittest/ma_test_loghandler_multigroup-t.c index 44c174ee1b0..56d0e55607e 100644 --- a/storage/maria/unittest/ma_test_loghandler_multigroup-t.c +++ b/storage/maria/unittest/ma_test_loghandler_multigroup-t.c @@ -21,7 +21,8 @@ #include "sequence_storage.h" #include <my_getopt.h> -extern my_bool maria_log_remove(); +extern my_bool maria_log_remove(const char *testdir); +extern char *create_tmpdir(const char *progname); extern void translog_example_table_init(); #ifndef DBUG_OFF @@ -238,19 +239,23 @@ int main(int argc __attribute__((unused)), char *argv[]) TRANSLOG_HEADER_BUFFER rec; LEX_CUSTRING parts[TRANSLOG_INTERNAL_PARTS + 2]; struct st_translog_scanner_data scanner; + const char *progname=argv[0]; int rc; - MY_INIT(argv[0]); - bzero(&pagecache, sizeof(pagecache)); - maria_data_root= (char *)"."; + load_defaults("my", load_default_groups, &argc, &argv); - default_argv= argv; get_options(&argc, &argv); + default_argv= argv; - if (maria_log_remove()) + bzero(&pagecache, sizeof(pagecache)); + maria_data_root= create_tmpdir(progname); + if (maria_log_remove(0)) exit(1); + /* We don't need to do physical syncs in this test */ + my_disable_sync= 1; + { uchar buff[4]; for (i= 0; i < (LONG_BUFFER_SIZE + LSN_STORE_SIZE * 2 + 2); i++) @@ -274,7 +279,7 @@ int main(int argc __attribute__((unused)), char *argv[]) fprintf(stderr, "Got error: init_pagecache() (errno: %d)\n", errno); exit(1); } - if (translog_init_with_table(".", LOG_FILE_SIZE, 50112, 0, &pagecache, + if (translog_init_with_table(maria_data_root, LOG_FILE_SIZE, 50112, 0, &pagecache, 0, 0, &translog_example_table_init, 0)) { fprintf(stderr, "Can't init loghandler (%d)\n", errno); @@ -437,7 +442,7 @@ int main(int argc __attribute__((unused)), char *argv[]) fprintf(stderr, "pass2: Got error: init_pagecache() (errno: %d)\n", errno); exit(1); } - if (translog_init_with_table(".", LOG_FILE_SIZE, 50112, 0, &pagecache, + if (translog_init_with_table(maria_data_root, LOG_FILE_SIZE, 50112, 0, &pagecache, 0, READONLY, &translog_example_table_init, 0)) { fprintf(stderr, "pass2: Can't init loghandler (%d)\n", errno); @@ -739,7 +744,7 @@ err: ma_control_file_end(); free_defaults(default_argv); seq_storage_destroy(&seq); - if (maria_log_remove()) + if (maria_log_remove(maria_data_root)) exit(1); return (test(exit_status())); diff --git a/storage/maria/unittest/ma_test_loghandler_multithread-t.c b/storage/maria/unittest/ma_test_loghandler_multithread-t.c index 1e9120e655f..86543ca60fb 100644 --- a/storage/maria/unittest/ma_test_loghandler_multithread-t.c +++ b/storage/maria/unittest/ma_test_loghandler_multithread-t.c @@ -19,8 +19,8 @@ #include <tap.h> #include "../trnman.h" -extern my_bool maria_log_remove(); -extern void translog_example_table_init(); +extern my_bool maria_log_remove(const char *testdir); +extern char *create_tmpdir(const char *progname); #ifndef DBUG_OFF static const char *default_dbug_option; @@ -268,17 +268,18 @@ int main(int argc __attribute__((unused)), pthread_attr_t thr_attr; int *param, error; int rc; - - /* Disabled until Sanja tests */ - plan(1); - ok(1, "disabled"); - exit(0); + MY_INIT(argv[0]); plan(WRITERS + FLUSHERS + ITERATIONS * WRITERS * 3 + FLUSH_ITERATIONS * FLUSHERS ); + /* We don't need to do physical syncs in this test */ + my_disable_sync= 1; bzero(&pagecache, sizeof(pagecache)); - maria_data_root= (char *)"."; + maria_data_root= create_tmpdir(argv[0]); + if (maria_log_remove(0)) + exit(1); + long_buffer= malloc(LONG_BUFFER_SIZE + 7 * 2 + 2); if (long_buffer == 0) { @@ -288,11 +289,6 @@ int main(int argc __attribute__((unused)), for (i= 0; i < (LONG_BUFFER_SIZE + 7 * 2 + 2); i++) long_buffer[i]= (i & 0xFF); - MY_INIT(argv[0]); - if (maria_log_remove()) - exit(1); - - #ifndef DBUG_OFF #if defined(__WIN__) default_dbug_option= "d:t:i:O,\\ma_test_loghandler.trace"; @@ -350,7 +346,7 @@ int main(int argc __attribute__((unused)), fprintf(stderr, "Got error: init_pagecache() (errno: %d)\n", errno); exit(1); } - if (translog_init_with_table(".", LOG_FILE_SIZE, 50112, 0, &pagecache, + if (translog_init_with_table(maria_data_root, LOG_FILE_SIZE, 50112, 0, &pagecache, LOG_FLAGS, 0, &translog_example_table_init, 0)) { @@ -549,7 +545,7 @@ err: translog_destroy(); end_pagecache(&pagecache, 1); ma_control_file_end(); - if (maria_log_remove()) + if (maria_log_remove(maria_data_root)) exit(1); return(exit_status()); diff --git a/storage/maria/unittest/ma_test_loghandler_noflush-t.c b/storage/maria/unittest/ma_test_loghandler_noflush-t.c index 2994ead8c3a..c8c0f7d1873 100644 --- a/storage/maria/unittest/ma_test_loghandler_noflush-t.c +++ b/storage/maria/unittest/ma_test_loghandler_noflush-t.c @@ -19,7 +19,8 @@ #include <tap.h> #include "../trnman.h" -extern my_bool maria_log_remove(); +extern my_bool maria_log_remove(const char *testdir); +extern char *create_tmpdir(const char *progname); extern void translog_example_table_init(); #ifndef DBUG_OFF @@ -31,8 +32,6 @@ static const char *default_dbug_option; #define LOG_FILE_SIZE (1024L*1024L*1024L + 1024L*1024L*512) #define LOG_FLAGS 0 -static char *first_translog_file= (char*)"maria_log.00000001"; - int main(int argc __attribute__((unused)), char *argv[]) { uint pagen; @@ -49,12 +48,9 @@ int main(int argc __attribute__((unused)), char *argv[]) plan(1); bzero(&pagecache, sizeof(pagecache)); - maria_data_root= (char *)"."; - if (maria_log_remove()) + maria_data_root= create_tmpdir(argv[0]); + if (maria_log_remove(0)) exit(1); - /* be sure that we have no logs in the directory*/ - my_delete(CONTROL_FILE_BASE_NAME, MYF(0)); - my_delete(first_translog_file, MYF(0)); bzero(long_tr_id, 6); #ifndef DBUG_OFF @@ -81,7 +77,7 @@ int main(int argc __attribute__((unused)), char *argv[]) fprintf(stderr, "Got error: init_pagecache() (errno: %d)\n", errno); exit(1); } - if (translog_init_with_table(".", LOG_FILE_SIZE, 50112, 0, &pagecache, + if (translog_init_with_table(maria_data_root, LOG_FILE_SIZE, 50112, 0, &pagecache, LOG_FLAGS, 0, &translog_example_table_init, 0)) { @@ -139,7 +135,7 @@ err: translog_destroy(); end_pagecache(&pagecache, 1); ma_control_file_end(); - if (maria_log_remove()) + if (maria_log_remove(maria_data_root)) exit(1); exit(rc); diff --git a/storage/maria/unittest/ma_test_loghandler_nologs-t.c b/storage/maria/unittest/ma_test_loghandler_nologs-t.c index 32ada1e58bd..24c93e428e1 100644 --- a/storage/maria/unittest/ma_test_loghandler_nologs-t.c +++ b/storage/maria/unittest/ma_test_loghandler_nologs-t.c @@ -19,8 +19,8 @@ #include <tap.h> #include "../trnman.h" -extern my_bool maria_log_remove(); -extern void example_loghandler_init(); +extern my_bool maria_log_remove(const char *testdir); +extern char *create_tmpdir(const char *progname); #ifndef DBUG_OFF static const char *default_dbug_option; @@ -49,8 +49,8 @@ int main(int argc __attribute__((unused)), char *argv[]) bzero(&pagecache, sizeof(pagecache)); bzero(long_buffer, LONG_BUFFER_SIZE); - maria_data_root= (char *)"."; - if (maria_log_remove()) + maria_data_root= create_tmpdir(argv[0]); + if (maria_log_remove(0)) exit(1); bzero(long_tr_id, 6); @@ -78,7 +78,7 @@ int main(int argc __attribute__((unused)), char *argv[]) fprintf(stderr, "Got error: init_pagecache() (errno: %d)\n", errno); exit(1); } - if (translog_init_with_table(".", LOG_FILE_SIZE, 50112, 0, &pagecache, + if (translog_init_with_table(maria_data_root, LOG_FILE_SIZE, 50112, 0, &pagecache, LOG_FLAGS, 0, &translog_example_table_init, 0)) { @@ -151,7 +151,7 @@ int main(int argc __attribute__((unused)), char *argv[]) fprintf(stderr, "Got error: init_pagecache() (errno: %d)\n", errno); exit(1); } - if (translog_init_with_table(".", LOG_FILE_SIZE, 50112, 0, &pagecache, + if (translog_init_with_table(maria_data_root, LOG_FILE_SIZE, 50112, 0, &pagecache, LOG_FLAGS, 0, &translog_example_table_init, 1)) { @@ -189,7 +189,7 @@ int main(int argc __attribute__((unused)), char *argv[]) ok(1, "New log is OK"); - if (maria_log_remove()) + if (maria_log_remove(maria_data_root)) exit(1); exit(0); } diff --git a/storage/maria/unittest/ma_test_loghandler_pagecache-t.c b/storage/maria/unittest/ma_test_loghandler_pagecache-t.c index 5b115b426b7..0cc94befb39 100644 --- a/storage/maria/unittest/ma_test_loghandler_pagecache-t.c +++ b/storage/maria/unittest/ma_test_loghandler_pagecache-t.c @@ -19,8 +19,8 @@ #include <tap.h> #include "../trnman.h" -extern my_bool maria_log_remove(); -extern void translog_example_table_init(); +extern my_bool maria_log_remove(const char *testdir); +extern char *create_tmpdir(const char *progname); #ifndef DBUG_OFF static const char *default_dbug_option; @@ -31,8 +31,10 @@ static const char *default_dbug_option; #define LOG_FILE_SIZE (1024L*1024L*1024L + 1024L*1024L*512) #define LOG_FLAGS 0 -static char *first_translog_file= (char*)"aria_log.00000001"; -static char *file1_name= (char*)"page_cache_test_file_1"; +static const char *base_first_translog_file= "aria_log.00000001"; +static const char *base_file1_name= "page_cache_test_file_1"; +static char file1_name[FN_REFLEN], first_translog_file[FN_REFLEN]; + static PAGECACHE_FILE file1; @@ -68,18 +70,15 @@ int main(int argc __attribute__((unused)), char *argv[]) LSN lsn; my_off_t file_size; LEX_CUSTRING parts[TRANSLOG_INTERNAL_PARTS + 1]; - MY_INIT(argv[0]); plan(1); bzero(&pagecache, sizeof(pagecache)); - maria_data_root= (char *)"."; - if (maria_log_remove()) + maria_data_root= create_tmpdir(argv[0]); + if (maria_log_remove(0)) exit(1); - /* be sure that we have no logs in the directory*/ - my_delete(CONTROL_FILE_BASE_NAME, MYF(0)); - my_delete(first_translog_file, MYF(0)); + fn_format(first_translog_file, base_first_translog_file, maria_data_root, "", MYF(0)); bzero(long_tr_id, 6); #ifndef DBUG_OFF @@ -106,7 +105,7 @@ int main(int argc __attribute__((unused)), char *argv[]) fprintf(stderr, "Got error: init_pagecache() (errno: %d)\n", errno); exit(1); } - if (translog_init_with_table(".", LOG_FILE_SIZE, 50112, 0, &pagecache, + if (translog_init_with_table(maria_data_root, LOG_FILE_SIZE, 50112, 0, &pagecache, LOG_FLAGS, 0, &translog_example_table_init, 0)) { @@ -145,6 +144,7 @@ int main(int argc __attribute__((unused)), char *argv[]) exit(1); } + fn_format(file1_name, base_file1_name, maria_data_root, "", MYF(0)); if ((file1.file= my_open(file1_name, O_CREAT | O_TRUNC | O_RDWR, MYF(0))) == -1) { @@ -168,7 +168,7 @@ int main(int argc __attribute__((unused)), char *argv[]) PAGECACHE_PIN_LEFT_UNPINNED, PAGECACHE_WRITE_DELAY, 0, LSN_IMPOSSIBLE); - flush_pagecache_blocks(&pagecache, &file1, FLUSH_FORCE_WRITE); + flush_pagecache_blocks(&pagecache, &file1, FLUSH_RELEASE); } my_close(file1.file, MYF(MY_WME)); if ((file1.file= my_open(first_translog_file, O_RDONLY, MYF(MY_WME))) < 0) @@ -192,10 +192,10 @@ int main(int argc __attribute__((unused)), char *argv[]) translog_destroy(); end_pagecache(&pagecache, 1); ma_control_file_end(); - my_delete(CONTROL_FILE_BASE_NAME, MYF(0)); - my_delete(first_translog_file, MYF(0)); - my_delete(file1_name, MYF(0)); + my_delete(file1_name, MYF(MY_WME)); + if (maria_log_remove(maria_data_root)) + exit(1); exit(0); } diff --git a/storage/maria/unittest/ma_test_loghandler_purge-t.c b/storage/maria/unittest/ma_test_loghandler_purge-t.c index e7b604eb172..6ae0e7830ae 100644 --- a/storage/maria/unittest/ma_test_loghandler_purge-t.c +++ b/storage/maria/unittest/ma_test_loghandler_purge-t.c @@ -19,8 +19,8 @@ #include <tap.h> #include "../trnman.h" -extern my_bool maria_log_remove(); -extern void translog_example_table_init(); +extern my_bool maria_log_remove(const char *testdir); +extern char *create_tmpdir(const char *progname); #ifndef DBUG_OFF static const char *default_dbug_option; @@ -49,8 +49,8 @@ int main(int argc __attribute__((unused)), char *argv[]) bzero(&pagecache, sizeof(pagecache)); bzero(long_buffer, LONG_BUFFER_SIZE); - maria_data_root= (char *)"."; - if (maria_log_remove()) + maria_data_root= create_tmpdir(argv[0]); + if (maria_log_remove(0)) exit(1); bzero(long_tr_id, 6); @@ -78,7 +78,7 @@ int main(int argc __attribute__((unused)), char *argv[]) fprintf(stderr, "Got error: init_pagecache() (errno: %d)\n", errno); exit(1); } - if (translog_init_with_table(".", LOG_FILE_SIZE, 50112, 0, &pagecache, + if (translog_init_with_table(maria_data_root, LOG_FILE_SIZE, 50112, 0, &pagecache, LOG_FLAGS, 0, &translog_example_table_init, 0)) { @@ -186,7 +186,7 @@ int main(int argc __attribute__((unused)), char *argv[]) translog_destroy(); end_pagecache(&pagecache, 1); ma_control_file_end(); - if (maria_log_remove()) + if (maria_log_remove(maria_data_root)) exit(1); exit(0); } diff --git a/storage/maria/unittest/ma_test_recovery.pl b/storage/maria/unittest/ma_test_recovery.pl index d9be82f4e58..f3a5bffbc36 100755 --- a/storage/maria/unittest/ma_test_recovery.pl +++ b/storage/maria/unittest/ma_test_recovery.pl @@ -114,7 +114,7 @@ sub main die("can't guess table name"); } $com= "$maria_exe_path/aria_chk$suffix -dvv $table "; - $com.= "| grep -v \"Creation time:\" | grep -v \"file length\" | grep -v \"LSNs:\" | grep -v \"UUID:\""; + $com.= "| grep -v \"Creation time:\" | grep -v \"recover time:\" | grep -v \"file length\" | grep -v \"LSNs:\" | grep -v \"UUID:\""; $com.= "> $tmp/aria_chk_message.good.txt 2>&1"; my_exec($com); my $checksum= my_exec("$maria_exe_path/aria_chk$suffix -dss $table"); @@ -197,7 +197,7 @@ sub main die("can't guess table name"); } $com= "$maria_exe_path/aria_chk$suffix -dvv $table "; - $com.= "| grep -v \"Creation time:\" | grep -v \"file length\" | grep -v \"LSNs:\" | grep -v \"UUID:\" "; + $com.= "| grep -v \"Creation time:\" | grep -v \"recover time:\" | grep -v \"recover time:\" |grep -v \"file length\" | grep -v \"LSNs:\" | grep -v \"UUID:\" "; $com.= "> $tmp/aria_chk_message.good.txt 2>&1"; $res= my_exec($com); print MY_LOG $res; @@ -296,7 +296,7 @@ sub check_table_is_same print "checking if table $table has changed\n"; } - $com= "$maria_exe_path/aria_chk$suffix -dvv $table | grep -v \"Creation time:\" "; + $com= "$maria_exe_path/aria_chk$suffix -dvv $table | grep -v \"Creation time:\" | grep -v \"recover time:\""; $com.= "| grep -v \"file length\" | grep -v \"LSNs:\" | grep -v \"UUID:\" > $tmp/aria_chk_message.txt 2>&1"; $res= `$com`; print MY_LOG $res; @@ -415,7 +415,7 @@ sub physical_cmp # save original tables to restore them later copy("$table.MAD", "$tmp/before_zerofill$table_no.MAD") || die(); copy("$table.MAI", "$tmp/before_zerofill$table_no.MAI") || die(); - $com= "$maria_exe_path/aria_chk$suffix -ss --zerofill-keep-lsn $table"; + $com= "$maria_exe_path/aria_chk$suffix -ss --zerofill-keep-lsn --skip-update-state $table"; $res= `$com`; print MY_LOG $res; $table_no= $table_no + 1; diff --git a/storage/maria/unittest/trnman-t.c b/storage/maria/unittest/trnman-t.c index 5d27fe39d14..c2bc993e2ff 100644 --- a/storage/maria/unittest/trnman-t.c +++ b/storage/maria/unittest/trnman-t.c @@ -75,7 +75,7 @@ pthread_handler_t test_trnman(void *arg) void run_test(const char *test, pthread_handler handler, int n, int m) { pthread_t *threads; - ulonglong now= my_getsystime(); + ulonglong now= microsecond_interval_timer(); int i; litmus= 0; @@ -97,8 +97,8 @@ void run_test(const char *test, pthread_handler handler, int n, int m) } for (i= 0 ; i < n ; i++) pthread_join(threads[i], 0); - now= my_getsystime()-now; - ok(litmus == 0, "Tested %s in %g secs (%d)", test, ((double)now)/1e7, litmus); + now= microsecond_interval_timer() - now; + ok(litmus == 0, "Tested %s in %g secs (%d)", test, ((double)now)/1e6, litmus); my_free(threads); } @@ -162,10 +162,10 @@ int main(int argc __attribute__((unused)), char **argv) diag("mallocs: %d", trnman_allocated_transactions); { - ulonglong now= my_getsystime(); + ulonglong now= microsecond_interval_timer(); trnman_destroy(); - now= my_getsystime()-now; - diag("trnman_destroy: %g", ((double)now)/1e7); + now= microsecond_interval_timer() - now; + diag("trnman_destroy: %g", ((double)now)/1e6); } pthread_mutex_destroy(&rt_mutex); |