diff options
Diffstat (limited to 'innobase')
-rw-r--r-- | innobase/buf/buf0buf.c | 23 | ||||
-rw-r--r-- | innobase/fil/fil0fil.c | 28 | ||||
-rw-r--r-- | innobase/fsp/fsp0fsp.c | 46 | ||||
-rw-r--r-- | innobase/include/fil0fil.h | 8 | ||||
-rw-r--r-- | innobase/include/log0log.h | 14 | ||||
-rw-r--r-- | innobase/include/log0recv.h | 2 | ||||
-rw-r--r-- | innobase/log/log0log.c | 75 | ||||
-rw-r--r-- | innobase/log/log0recv.c | 49 | ||||
-rw-r--r-- | innobase/os/os0file.c | 1 | ||||
-rw-r--r-- | innobase/srv/srv0srv.c | 15 | ||||
-rw-r--r-- | innobase/trx/trx0trx.c | 7 |
11 files changed, 253 insertions, 15 deletions
diff --git a/innobase/buf/buf0buf.c b/innobase/buf/buf0buf.c index 246a60a61cd..3f2476c715b 100644 --- a/innobase/buf/buf0buf.c +++ b/innobase/buf/buf0buf.c @@ -34,6 +34,7 @@ Created 11/5/1995 Heikki Tuuri #include "ibuf0ibuf.h" #include "dict0dict.h" #include "log0recv.h" +#include "log0log.h" #include "trx0undo.h" #include "srv0srv.h" @@ -273,6 +274,7 @@ buf_page_is_corrupted( ulint old_checksum; ulint checksum_field; ulint old_checksum_field; + dulint current_lsn; if (mach_read_from_4(read_buf + FIL_PAGE_LSN + 4) != mach_read_from_4(read_buf + UNIV_PAGE_SIZE @@ -284,6 +286,27 @@ buf_page_is_corrupted( return(TRUE); } +#ifndef UNIV_HOTBACKUP + if (recv_lsn_checks_on && log_peek_lsn(¤t_lsn)) { + if (ut_dulint_cmp(current_lsn, + mach_read_from_8(read_buf + FIL_PAGE_LSN)) + < 0) { + ut_print_timestamp(stderr); + + fprintf(stderr, +" InnoDB: Error: page %lu log sequence number %lu %lu\n" +"InnoDB: is in the future! Current system log sequence number %lu %lu.\n" +"InnoDB: Your database may be corrupt.\n", + mach_read_from_4(read_buf + FIL_PAGE_OFFSET), + ut_dulint_get_high( + mach_read_from_8(read_buf + FIL_PAGE_LSN)), + ut_dulint_get_low( + mach_read_from_8(read_buf + FIL_PAGE_LSN)), + ut_dulint_get_high(current_lsn), + ut_dulint_get_low(current_lsn)); + } + } +#endif old_checksum = buf_calc_page_old_checksum(read_buf); old_checksum_field = mach_read_from_4(read_buf + UNIV_PAGE_SIZE diff --git a/innobase/fil/fil0fil.c b/innobase/fil/fil0fil.c index a8dc357749c..f55df90846c 100644 --- a/innobase/fil/fil0fil.c +++ b/innobase/fil/fil0fil.c @@ -831,6 +831,34 @@ fil_space_release_free_extents( mutex_exit(&(system->mutex)); } +/*********************************************************************** +Gets the number of reserved extents. If the database is silent, this number +should be zero. */ + +ulint +fil_space_get_n_reserved_extents( +/*=============================*/ + ulint id) /* in: space id */ +{ + fil_space_t* space; + fil_system_t* system = fil_system; + ulint n; + + ut_ad(system); + + mutex_enter(&(system->mutex)); + + HASH_SEARCH(hash, system->spaces, id, space, space->id == id); + + ut_a(space); + + n = space->n_reserved_extents; + + mutex_exit(&(system->mutex)); + + return(n); +} + /************************************************************************ Prepares a file node for i/o. Opens the file if it is closed. Updates the pending i/o's field in the node and the system appropriately. Takes the node diff --git a/innobase/fsp/fsp0fsp.c b/innobase/fsp/fsp0fsp.c index b6941d80e90..20bf4972f64 100644 --- a/innobase/fsp/fsp0fsp.c +++ b/innobase/fsp/fsp0fsp.c @@ -2709,10 +2709,52 @@ fseg_free_page_low( return; } +/* + fprintf(stderr, +"InnoDB: InnoDB is freeing space %lu page %lu,\n" +"InnoDB: which belongs to descr seg %lu %lu\n" +"InnoDB: segment %lu %lu.\n", + space, page, + ut_dulint_get_high( + mtr_read_dulint(descr + XDES_ID, MLOG_8BYTES, mtr)), + ut_dulint_get_low( + mtr_read_dulint(descr + XDES_ID, MLOG_8BYTES, mtr)), + ut_dulint_get_high( + mtr_read_dulint(seg_inode + FSEG_ID, MLOG_8BYTES, mtr)), + ut_dulint_get_low( + mtr_read_dulint(seg_inode + FSEG_ID, MLOG_8BYTES, mtr))); +*/ /* If we get here, the page is in some extent of the segment */ - ut_a(0 == ut_dulint_cmp( + if (0 != ut_dulint_cmp( mtr_read_dulint(descr + XDES_ID, MLOG_8BYTES, mtr), - mtr_read_dulint(seg_inode + FSEG_ID, MLOG_8BYTES, mtr))); + mtr_read_dulint(seg_inode + FSEG_ID, MLOG_8BYTES, mtr))) { + + ut_sprintf_buf(errbuf, descr, 40); + fprintf(stderr, +"InnoDB: Dump of the tablespace extent descriptor: %s\n", errbuf); + ut_sprintf_buf(errbuf, seg_inode, 40); + fprintf(stderr, +"InnoDB: Dump of the segment inode: %s\n", errbuf); + + fprintf(stderr, +"InnoDB: Serious error: InnoDB is trying to free space %lu page %lu,\n" +"InnoDB: which does not belong to segment %lu %lu but belongs\n" +"InnoDB: to segment %lu %lu.\n", + space, page, + ut_dulint_get_high( + mtr_read_dulint(descr + XDES_ID, MLOG_8BYTES, mtr)), + ut_dulint_get_low( + mtr_read_dulint(descr + XDES_ID, MLOG_8BYTES, mtr)), + ut_dulint_get_high( + mtr_read_dulint(seg_inode + FSEG_ID, MLOG_8BYTES, mtr)), + ut_dulint_get_low( + mtr_read_dulint(seg_inode + FSEG_ID, MLOG_8BYTES, mtr))); + + fprintf(stderr, +"InnoDB: If the InnoDB recovery crashes here, see section 6.1\n" +"InnoDB: of http://www.innodb.com/ibman.html about forcing recovery.\n"); + ut_a(0); + } not_full_n_used = mtr_read_ulint(seg_inode + FSEG_NOT_FULL_N_USED, MLOG_4BYTES, mtr); diff --git a/innobase/include/fil0fil.h b/innobase/include/fil0fil.h index 4f78fdb2fd7..ad3149f0b36 100644 --- a/innobase/include/fil0fil.h +++ b/innobase/include/fil0fil.h @@ -386,6 +386,14 @@ fil_space_release_free_extents( /*===========================*/ ulint id, /* in: space id */ ulint n_reserved); /* in: how many one reserved */ +/*********************************************************************** +Gets the number of reserved extents. If the database is silent, this number +should be zero. */ + +ulint +fil_space_get_n_reserved_extents( +/*=============================*/ + ulint id); /* in: space id */ typedef struct fil_space_struct fil_space_t; diff --git a/innobase/include/log0log.h b/innobase/include/log0log.h index e79e914e4f6..24ec28a56e6 100644 --- a/innobase/include/log0log.h +++ b/innobase/include/log0log.h @@ -513,6 +513,15 @@ log_print( /*======*/ char* buf, /* in/out: buffer where to print */ char* buf_end);/* in: buffer end */ +/********************************************************** +Peeks the current lsn. */ + +ibool +log_peek_lsn( +/*=========*/ + /* out: TRUE if success, FALSE if could not get the + log system mutex */ + dulint* lsn); /* out: if returns TRUE, current lsn is here */ /************************************************************************** Refreshes the statistics used to print per-second averages. */ @@ -785,6 +794,11 @@ struct log_struct{ called */ /* Fields involved in checkpoints */ + ulint log_group_capacity; /* capacity of the log group; if + the checkpoint age exceeds this, it is + a serious error because it is possible + we will then overwrite log and spoil + crash recovery */ ulint max_modified_age_async; /* when this recommended value for lsn - buf_pool_get_oldest_modification() diff --git a/innobase/include/log0recv.h b/innobase/include/log0recv.h index 7418e4abf1b..e5a5bc05563 100644 --- a/innobase/include/log0recv.h +++ b/innobase/include/log0recv.h @@ -333,6 +333,8 @@ extern ibool recv_recovery_on; extern ibool recv_no_ibuf_operations; extern ibool recv_needed_recovery; +extern ibool recv_lsn_checks_on; + extern ibool recv_is_making_a_backup; extern ulint recv_max_parsed_page_no; diff --git a/innobase/log/log0log.c b/innobase/log/log0log.c index 53ca97baaa0..99827da5bf5 100644 --- a/innobase/log/log0log.c +++ b/innobase/log/log0log.c @@ -33,6 +33,11 @@ log_t* log_sys = NULL; ibool log_do_write = TRUE; ibool log_debug_writes = FALSE; +/* These control how often we print warnings if the last checkpoint is too +old */ +ibool log_has_printed_chkp_warning = FALSE; +time_t log_last_warning_time; + /* Pointer to this variable is used as the i/o-message when we do i/o to an archive */ byte log_archive_io; @@ -299,6 +304,7 @@ log_close(void) dulint oldest_lsn; dulint lsn; log_t* log = log_sys; + ulint checkpoint_age; ut_ad(mutex_own(&(log->mutex))); @@ -322,8 +328,34 @@ log_close(void) log->check_flush_or_checkpoint = TRUE; } - if (ut_dulint_minus(lsn, log->last_checkpoint_lsn) - <= log->max_modified_age_async) { + checkpoint_age = ut_dulint_minus(lsn, log->last_checkpoint_lsn); + + if (checkpoint_age >= log->log_group_capacity) { + /* TODO: split btr_store_big_rec_extern_fields() into small + steps so that we can release all latches in the middle, and + call log_free_check() to ensure we never write over log written + after the latest checkpoint. In principle, we should split all + big_rec operations, but other operations are smaller. */ + + if (!log_has_printed_chkp_warning + || difftime(time(NULL), log_last_warning_time) > 15) { + + log_has_printed_chkp_warning = TRUE; + log_last_warning_time = time(NULL); + + ut_print_timestamp(stderr); + fprintf(stderr, +" InnoDB: ERROR: the age of the last checkpoint is %lu,\n" +"InnoDB: which exceeds the log group capacity %lu.\n" +"InnoDB: If you are using big BLOB or TEXT rows, you must set the\n" +"InnoDB: combined size of log files at least 10 times bigger than the\n" +"InnoDB: largest such row.\n", + checkpoint_age, log->log_group_capacity); + } + } + + if (checkpoint_age <= log->max_modified_age_async) { + goto function_exit; } @@ -332,8 +364,7 @@ log_close(void) if (ut_dulint_is_zero(oldest_lsn) || (ut_dulint_minus(lsn, oldest_lsn) > log->max_modified_age_async) - || (ut_dulint_minus(lsn, log->last_checkpoint_lsn) - > log->max_checkpoint_age_async)) { + || checkpoint_age > log->max_checkpoint_age_async) { log->check_flush_or_checkpoint = TRUE; } @@ -551,7 +582,6 @@ log_calc_max_ages(void) the database server */ { log_group_t* group; - ulint n_threads; ulint margin; ulint free; ibool success = TRUE; @@ -561,8 +591,6 @@ log_calc_max_ages(void) ut_ad(!mutex_own(&(log_sys->mutex))); - n_threads = srv_get_n_threads(); - mutex_enter(&(log_sys->mutex)); group = UT_LIST_GET_FIRST(log_sys->log_groups); @@ -590,12 +618,15 @@ log_calc_max_ages(void) group = UT_LIST_GET_NEXT(log_groups, group); } + /* Add extra safety */ + smallest_capacity = smallest_capacity - smallest_capacity / 10; + /* For each OS thread we must reserve so much free space in the smallest log group that it can accommodate the log entries produced by single query steps: running out of free log space is a serious system error which requires rebooting the database. */ - free = LOG_CHECKPOINT_FREE_PER_THREAD * n_threads + free = LOG_CHECKPOINT_FREE_PER_THREAD * (10 + srv_thread_concurrency) + LOG_CHECKPOINT_EXTRA_FREE; if (free >= smallest_capacity / 2) { success = FALSE; @@ -607,6 +638,10 @@ log_calc_max_ages(void) margin = ut_min(margin, log_sys->adm_checkpoint_interval); + margin = margin - margin / 10; /* Add still some extra safety */ + + log_sys->log_group_capacity = smallest_capacity; + log_sys->max_modified_age_async = margin - margin / LOG_POOL_PREFLUSH_RATIO_ASYNC; log_sys->max_modified_age_sync = margin @@ -626,7 +661,7 @@ failure: if (!success) { fprintf(stderr, - "Error: log file group too small for the number of threads\n"); +"InnoDB: Error: log file group too small for innodb_thread_concurrency\n"); } return(success); @@ -3112,6 +3147,28 @@ log_check_log_recs( } /********************************************************** +Peeks the current lsn. */ + +ibool +log_peek_lsn( +/*=========*/ + /* out: TRUE if success, FALSE if could not get the + log system mutex */ + dulint* lsn) /* out: if returns TRUE, current lsn is here */ +{ + if (0 == mutex_enter_nowait(&(log_sys->mutex), (char*)__FILE__, + __LINE__)) { + *lsn = log_sys->lsn; + + mutex_exit(&(log_sys->mutex)); + + return(TRUE); + } + + return(FALSE); +} + +/********************************************************** Prints info of the log. */ void diff --git a/innobase/log/log0recv.c b/innobase/log/log0recv.c index 8e5fe819afb..9c9ad568d98 100644 --- a/innobase/log/log0recv.c +++ b/innobase/log/log0recv.c @@ -46,6 +46,8 @@ ibool recv_recovery_from_backup_on = FALSE; ibool recv_needed_recovery = FALSE; +ibool recv_lsn_checks_on = FALSE; + /* If the following is TRUE, the buffer pool file pages must be invalidated after recovery and no ibuf operations are allowed; this becomes TRUE if the log record hash table becomes too full, and log records must be merged @@ -71,6 +73,12 @@ ulint recv_previous_parsed_rec_is_multi = 0; ulint recv_max_parsed_page_no = 0; +/* The maximum lsn we see for a page during the recovery process. If this +is bigger than the lsn we are able to scan up to, that is an indication that +the recovery failed and the database may be corrupt. */ + +dulint recv_max_page_lsn; + /************************************************************ Creates the recovery system. */ @@ -132,6 +140,8 @@ recv_sys_init( OS_FILE_LOG_BLOCK_SIZE); recv_sys->found_corrupt_log = FALSE; + recv_max_page_lsn = ut_dulint_zero; + mutex_exit(&(recv_sys->mutex)); } @@ -1124,6 +1134,10 @@ recv_recover_page( mutex_enter(&(recv_sys->mutex)); + if (ut_dulint_cmp(recv_max_page_lsn, page_lsn) < 0) { + recv_max_page_lsn = page_lsn; + } + recv_addr->state = RECV_PROCESSED; ut_a(recv_sys->n_addrs); @@ -2192,9 +2206,12 @@ recv_scan_log_recs( while (log_block < buf + len && !finished) { no = log_block_get_hdr_no(log_block); +/* + fprintf(stderr, "Log block header no %lu\n", no); - /* fprintf(stderr, "Log block header no %lu\n", no); */ - + fprintf(stderr, "Scanned lsn no %lu\n", + log_block_convert_lsn_to_no(scanned_lsn)); +*/ if (no != log_block_convert_lsn_to_no(scanned_lsn) || !log_block_checksum_is_ok_or_old_format(log_block)) { @@ -2586,7 +2603,6 @@ recv_recovery_from_checkpoint_start( recv_group_scan_log_recs(group, &contiguous_lsn, &group_scanned_lsn); - group->scanned_lsn = group_scanned_lsn; if (ut_dulint_cmp(old_scanned_lsn, group_scanned_lsn) < 0) { @@ -2603,6 +2619,31 @@ recv_recovery_from_checkpoint_start( group = UT_LIST_GET_NEXT(log_groups, group); } + /* We currently have only one log group */ + if (ut_dulint_cmp(group_scanned_lsn, checkpoint_lsn) < 0) { + ut_print_timestamp(stderr); + fprintf(stderr, +" InnoDB: ERROR: We were only able to scan the log up to\n" +"InnoDB: %lu %lu, but a checkpoint was at %lu %lu.\n" +"InnoDB: It is possible that the database is now corrupt!\n", + ut_dulint_get_high(group_scanned_lsn), + ut_dulint_get_low(group_scanned_lsn), + ut_dulint_get_high(checkpoint_lsn), + ut_dulint_get_low(checkpoint_lsn)); + } + + if (ut_dulint_cmp(group_scanned_lsn, recv_max_page_lsn) < 0) { + ut_print_timestamp(stderr); + fprintf(stderr, +" InnoDB: ERROR: We were only able to scan the log up to %lu %lu\n" +"InnoDB: but a database page a had an lsn %lu %lu. It is possible that the\n" +"InnoDB: database is now corrupt!\n", + ut_dulint_get_high(group_scanned_lsn), + ut_dulint_get_low(group_scanned_lsn), + ut_dulint_get_high(recv_max_page_lsn), + ut_dulint_get_low(recv_max_page_lsn)); + } + if (ut_dulint_cmp(recv_sys->recovered_lsn, checkpoint_lsn) < 0) { mutex_exit(&(log_sys->mutex)); @@ -2656,6 +2697,8 @@ recv_recovery_from_checkpoint_start( sync_order_checks_on = FALSE; + recv_lsn_checks_on = TRUE; + /* The database is now ready to start almost normal processing of user transactions: transaction rollbacks and the application of the log records in the hash table can be run in background. */ diff --git a/innobase/os/os0file.c b/innobase/os/os0file.c index a9a64ae7864..7936b06c24d 100644 --- a/innobase/os/os0file.c +++ b/innobase/os/os0file.c @@ -1030,6 +1030,7 @@ os_file_flush( #ifdef HAVE_FDATASYNC ret = fdatasync(file); #else +/* printf("Flushing to file %lu\n", (ulint)file); */ ret = fsync(file); #endif os_n_fsyncs++; diff --git a/innobase/srv/srv0srv.c b/innobase/srv/srv0srv.c index f81b2d0a60e..f31d275eff0 100644 --- a/innobase/srv/srv0srv.c +++ b/innobase/srv/srv0srv.c @@ -1798,8 +1798,12 @@ retry: os_fast_mutex_unlock(&srv_conc_mutex); + trx->op_info = (char*)"sleeping before joining InnoDB queue"; + os_thread_sleep(50000); + trx->op_info = (char*)""; + os_fast_mutex_lock(&srv_conc_mutex); srv_conc_n_waiting_threads--; @@ -2299,6 +2303,7 @@ srv_sprintf_innodb_monitor( char* buf_end = buf + len - 2000; double time_elapsed; time_t current_time; + ulint n_reserved; mutex_enter(&srv_innodb_monitor_mutex); @@ -2414,8 +2419,16 @@ srv_sprintf_innodb_monitor( "ROW OPERATIONS\n" "--------------\n"); buf += sprintf(buf, - "%ld queries inside InnoDB, %ld queries in queue\n", + "%ld queries inside InnoDB, %lu queries in queue\n", srv_conc_n_threads, srv_conc_n_waiting_threads); + + n_reserved = fil_space_get_n_reserved_extents(0); + if (n_reserved > 0) { + buf += sprintf(buf, + "%lu tablespace extents now reserved for B-tree split operations\n", + n_reserved); + } + #ifdef UNIV_LINUX buf += sprintf(buf, "Main thread process no. %lu, id %lu, state: %s\n", diff --git a/innobase/trx/trx0trx.c b/innobase/trx/trx0trx.c index e8d6c093680..47c39290b15 100644 --- a/innobase/trx/trx0trx.c +++ b/innobase/trx/trx0trx.c @@ -1624,6 +1624,13 @@ trx_print( } buf += sprintf(buf, "\n"); + + if (trx->n_mysql_tables_in_use > 0 || trx->mysql_n_tables_locked > 0) { + + buf += sprintf(buf, "mysql tables in use %lu, locked %lu\n", + trx->n_mysql_tables_in_use, + trx->mysql_n_tables_locked); + } start_of_line = buf; |