From df5b08891945a6b19354556a0f8fe12e336696e5 Mon Sep 17 00:00:00 2001 From: unknown Date: Fri, 25 Jul 2003 22:26:39 +0300 Subject: srv0srv.c, os0file.c, log0recv.h, log0log.h, fil0fil.h, fsp0fsp.c, fil0fil.c: Merge trx0trx.c: Print more info about a trx in SHOW INNODB status; try to find the bug reported by Plaxo buf0buf.c: Check that page log sequence numbers are not in the future log0recv.c, log0log.c: Fixed a bug: if you used big BLOBs, and your log files were relatively small, InnoDB could in a big BLOB operation temporarily write over the log produced AFTER the latest checkpoint. If InnoDB would crash at that moment, then the crash recovery would fail, because InnoDB would not be able to scan the log even up to the latest checkpoint. Starting from this version, InnoDB tries to ensure the latest checkpoint is young enough. If that is not possible, InnoDB prints a warning to the .err log innobase/log/log0log.c: Fixed a bug: if you used big BLOBs, and your log files were relatively small, InnoDB could in a big BLOB operation temporarily write over the log produced AFTER the latest checkpoint. If InnoDB would crash at that moment, then the crash recovery would fail, because InnoDB would not be able to scan the log even up to the latest checkpoint. Starting from this version, InnoDB tries to ensure the latest checkpoint is young enough. If that is not possible, InnoDB prints a warning to the .err log innobase/log/log0recv.c: Fixed a bug: if you used big BLOBs, and your log files were relatively small, InnoDB could in a big BLOB operation temporarily write over the log produced AFTER the latest checkpoint. If InnoDB would crash at that moment, then the crash recovery would fail, because InnoDB would not be able to scan the log even up to the latest checkpoint. Starting from this version, InnoDB tries to ensure the latest checkpoint is young enough. If that is not possible, InnoDB prints a warning to the .err log innobase/buf/buf0buf.c: Check that page log sequence numbers are not in the future innobase/trx/trx0trx.c: Print more info about a trx in SHOW INNODB status; try to find the bug reported by Plaxo innobase/fil/fil0fil.c: Merge innobase/fsp/fsp0fsp.c: Merge innobase/include/fil0fil.h: Merge innobase/include/log0log.h: Merge innobase/include/log0recv.h: Merge innobase/os/os0file.c: Merge innobase/srv/srv0srv.c: Merge --- innobase/buf/buf0buf.c | 23 ++++++++++++++ innobase/fil/fil0fil.c | 28 +++++++++++++++++ innobase/fsp/fsp0fsp.c | 46 +++++++++++++++++++++++++-- innobase/include/fil0fil.h | 8 +++++ innobase/include/log0log.h | 14 +++++++++ innobase/include/log0recv.h | 2 ++ innobase/log/log0log.c | 75 +++++++++++++++++++++++++++++++++++++++------ innobase/log/log0recv.c | 49 +++++++++++++++++++++++++++-- innobase/os/os0file.c | 1 + innobase/srv/srv0srv.c | 15 ++++++++- innobase/trx/trx0trx.c | 7 +++++ 11 files changed, 253 insertions(+), 15 deletions(-) diff --git a/innobase/buf/buf0buf.c b/innobase/buf/buf0buf.c index 246a60a61cd..3f2476c715b 100644 --- a/innobase/buf/buf0buf.c +++ b/innobase/buf/buf0buf.c @@ -34,6 +34,7 @@ Created 11/5/1995 Heikki Tuuri #include "ibuf0ibuf.h" #include "dict0dict.h" #include "log0recv.h" +#include "log0log.h" #include "trx0undo.h" #include "srv0srv.h" @@ -273,6 +274,7 @@ buf_page_is_corrupted( ulint old_checksum; ulint checksum_field; ulint old_checksum_field; + dulint current_lsn; if (mach_read_from_4(read_buf + FIL_PAGE_LSN + 4) != mach_read_from_4(read_buf + UNIV_PAGE_SIZE @@ -284,6 +286,27 @@ buf_page_is_corrupted( return(TRUE); } +#ifndef UNIV_HOTBACKUP + if (recv_lsn_checks_on && log_peek_lsn(¤t_lsn)) { + if (ut_dulint_cmp(current_lsn, + mach_read_from_8(read_buf + FIL_PAGE_LSN)) + < 0) { + ut_print_timestamp(stderr); + + fprintf(stderr, +" InnoDB: Error: page %lu log sequence number %lu %lu\n" +"InnoDB: is in the future! Current system log sequence number %lu %lu.\n" +"InnoDB: Your database may be corrupt.\n", + mach_read_from_4(read_buf + FIL_PAGE_OFFSET), + ut_dulint_get_high( + mach_read_from_8(read_buf + FIL_PAGE_LSN)), + ut_dulint_get_low( + mach_read_from_8(read_buf + FIL_PAGE_LSN)), + ut_dulint_get_high(current_lsn), + ut_dulint_get_low(current_lsn)); + } + } +#endif old_checksum = buf_calc_page_old_checksum(read_buf); old_checksum_field = mach_read_from_4(read_buf + UNIV_PAGE_SIZE diff --git a/innobase/fil/fil0fil.c b/innobase/fil/fil0fil.c index a8dc357749c..f55df90846c 100644 --- a/innobase/fil/fil0fil.c +++ b/innobase/fil/fil0fil.c @@ -831,6 +831,34 @@ fil_space_release_free_extents( mutex_exit(&(system->mutex)); } +/*********************************************************************** +Gets the number of reserved extents. If the database is silent, this number +should be zero. */ + +ulint +fil_space_get_n_reserved_extents( +/*=============================*/ + ulint id) /* in: space id */ +{ + fil_space_t* space; + fil_system_t* system = fil_system; + ulint n; + + ut_ad(system); + + mutex_enter(&(system->mutex)); + + HASH_SEARCH(hash, system->spaces, id, space, space->id == id); + + ut_a(space); + + n = space->n_reserved_extents; + + mutex_exit(&(system->mutex)); + + return(n); +} + /************************************************************************ Prepares a file node for i/o. Opens the file if it is closed. Updates the pending i/o's field in the node and the system appropriately. Takes the node diff --git a/innobase/fsp/fsp0fsp.c b/innobase/fsp/fsp0fsp.c index b6941d80e90..20bf4972f64 100644 --- a/innobase/fsp/fsp0fsp.c +++ b/innobase/fsp/fsp0fsp.c @@ -2709,10 +2709,52 @@ fseg_free_page_low( return; } +/* + fprintf(stderr, +"InnoDB: InnoDB is freeing space %lu page %lu,\n" +"InnoDB: which belongs to descr seg %lu %lu\n" +"InnoDB: segment %lu %lu.\n", + space, page, + ut_dulint_get_high( + mtr_read_dulint(descr + XDES_ID, MLOG_8BYTES, mtr)), + ut_dulint_get_low( + mtr_read_dulint(descr + XDES_ID, MLOG_8BYTES, mtr)), + ut_dulint_get_high( + mtr_read_dulint(seg_inode + FSEG_ID, MLOG_8BYTES, mtr)), + ut_dulint_get_low( + mtr_read_dulint(seg_inode + FSEG_ID, MLOG_8BYTES, mtr))); +*/ /* If we get here, the page is in some extent of the segment */ - ut_a(0 == ut_dulint_cmp( + if (0 != ut_dulint_cmp( mtr_read_dulint(descr + XDES_ID, MLOG_8BYTES, mtr), - mtr_read_dulint(seg_inode + FSEG_ID, MLOG_8BYTES, mtr))); + mtr_read_dulint(seg_inode + FSEG_ID, MLOG_8BYTES, mtr))) { + + ut_sprintf_buf(errbuf, descr, 40); + fprintf(stderr, +"InnoDB: Dump of the tablespace extent descriptor: %s\n", errbuf); + ut_sprintf_buf(errbuf, seg_inode, 40); + fprintf(stderr, +"InnoDB: Dump of the segment inode: %s\n", errbuf); + + fprintf(stderr, +"InnoDB: Serious error: InnoDB is trying to free space %lu page %lu,\n" +"InnoDB: which does not belong to segment %lu %lu but belongs\n" +"InnoDB: to segment %lu %lu.\n", + space, page, + ut_dulint_get_high( + mtr_read_dulint(descr + XDES_ID, MLOG_8BYTES, mtr)), + ut_dulint_get_low( + mtr_read_dulint(descr + XDES_ID, MLOG_8BYTES, mtr)), + ut_dulint_get_high( + mtr_read_dulint(seg_inode + FSEG_ID, MLOG_8BYTES, mtr)), + ut_dulint_get_low( + mtr_read_dulint(seg_inode + FSEG_ID, MLOG_8BYTES, mtr))); + + fprintf(stderr, +"InnoDB: If the InnoDB recovery crashes here, see section 6.1\n" +"InnoDB: of http://www.innodb.com/ibman.html about forcing recovery.\n"); + ut_a(0); + } not_full_n_used = mtr_read_ulint(seg_inode + FSEG_NOT_FULL_N_USED, MLOG_4BYTES, mtr); diff --git a/innobase/include/fil0fil.h b/innobase/include/fil0fil.h index 4f78fdb2fd7..ad3149f0b36 100644 --- a/innobase/include/fil0fil.h +++ b/innobase/include/fil0fil.h @@ -386,6 +386,14 @@ fil_space_release_free_extents( /*===========================*/ ulint id, /* in: space id */ ulint n_reserved); /* in: how many one reserved */ +/*********************************************************************** +Gets the number of reserved extents. If the database is silent, this number +should be zero. */ + +ulint +fil_space_get_n_reserved_extents( +/*=============================*/ + ulint id); /* in: space id */ typedef struct fil_space_struct fil_space_t; diff --git a/innobase/include/log0log.h b/innobase/include/log0log.h index e79e914e4f6..24ec28a56e6 100644 --- a/innobase/include/log0log.h +++ b/innobase/include/log0log.h @@ -513,6 +513,15 @@ log_print( /*======*/ char* buf, /* in/out: buffer where to print */ char* buf_end);/* in: buffer end */ +/********************************************************** +Peeks the current lsn. */ + +ibool +log_peek_lsn( +/*=========*/ + /* out: TRUE if success, FALSE if could not get the + log system mutex */ + dulint* lsn); /* out: if returns TRUE, current lsn is here */ /************************************************************************** Refreshes the statistics used to print per-second averages. */ @@ -785,6 +794,11 @@ struct log_struct{ called */ /* Fields involved in checkpoints */ + ulint log_group_capacity; /* capacity of the log group; if + the checkpoint age exceeds this, it is + a serious error because it is possible + we will then overwrite log and spoil + crash recovery */ ulint max_modified_age_async; /* when this recommended value for lsn - buf_pool_get_oldest_modification() diff --git a/innobase/include/log0recv.h b/innobase/include/log0recv.h index 7418e4abf1b..e5a5bc05563 100644 --- a/innobase/include/log0recv.h +++ b/innobase/include/log0recv.h @@ -333,6 +333,8 @@ extern ibool recv_recovery_on; extern ibool recv_no_ibuf_operations; extern ibool recv_needed_recovery; +extern ibool recv_lsn_checks_on; + extern ibool recv_is_making_a_backup; extern ulint recv_max_parsed_page_no; diff --git a/innobase/log/log0log.c b/innobase/log/log0log.c index 53ca97baaa0..99827da5bf5 100644 --- a/innobase/log/log0log.c +++ b/innobase/log/log0log.c @@ -33,6 +33,11 @@ log_t* log_sys = NULL; ibool log_do_write = TRUE; ibool log_debug_writes = FALSE; +/* These control how often we print warnings if the last checkpoint is too +old */ +ibool log_has_printed_chkp_warning = FALSE; +time_t log_last_warning_time; + /* Pointer to this variable is used as the i/o-message when we do i/o to an archive */ byte log_archive_io; @@ -299,6 +304,7 @@ log_close(void) dulint oldest_lsn; dulint lsn; log_t* log = log_sys; + ulint checkpoint_age; ut_ad(mutex_own(&(log->mutex))); @@ -322,8 +328,34 @@ log_close(void) log->check_flush_or_checkpoint = TRUE; } - if (ut_dulint_minus(lsn, log->last_checkpoint_lsn) - <= log->max_modified_age_async) { + checkpoint_age = ut_dulint_minus(lsn, log->last_checkpoint_lsn); + + if (checkpoint_age >= log->log_group_capacity) { + /* TODO: split btr_store_big_rec_extern_fields() into small + steps so that we can release all latches in the middle, and + call log_free_check() to ensure we never write over log written + after the latest checkpoint. In principle, we should split all + big_rec operations, but other operations are smaller. */ + + if (!log_has_printed_chkp_warning + || difftime(time(NULL), log_last_warning_time) > 15) { + + log_has_printed_chkp_warning = TRUE; + log_last_warning_time = time(NULL); + + ut_print_timestamp(stderr); + fprintf(stderr, +" InnoDB: ERROR: the age of the last checkpoint is %lu,\n" +"InnoDB: which exceeds the log group capacity %lu.\n" +"InnoDB: If you are using big BLOB or TEXT rows, you must set the\n" +"InnoDB: combined size of log files at least 10 times bigger than the\n" +"InnoDB: largest such row.\n", + checkpoint_age, log->log_group_capacity); + } + } + + if (checkpoint_age <= log->max_modified_age_async) { + goto function_exit; } @@ -332,8 +364,7 @@ log_close(void) if (ut_dulint_is_zero(oldest_lsn) || (ut_dulint_minus(lsn, oldest_lsn) > log->max_modified_age_async) - || (ut_dulint_minus(lsn, log->last_checkpoint_lsn) - > log->max_checkpoint_age_async)) { + || checkpoint_age > log->max_checkpoint_age_async) { log->check_flush_or_checkpoint = TRUE; } @@ -551,7 +582,6 @@ log_calc_max_ages(void) the database server */ { log_group_t* group; - ulint n_threads; ulint margin; ulint free; ibool success = TRUE; @@ -561,8 +591,6 @@ log_calc_max_ages(void) ut_ad(!mutex_own(&(log_sys->mutex))); - n_threads = srv_get_n_threads(); - mutex_enter(&(log_sys->mutex)); group = UT_LIST_GET_FIRST(log_sys->log_groups); @@ -590,12 +618,15 @@ log_calc_max_ages(void) group = UT_LIST_GET_NEXT(log_groups, group); } + /* Add extra safety */ + smallest_capacity = smallest_capacity - smallest_capacity / 10; + /* For each OS thread we must reserve so much free space in the smallest log group that it can accommodate the log entries produced by single query steps: running out of free log space is a serious system error which requires rebooting the database. */ - free = LOG_CHECKPOINT_FREE_PER_THREAD * n_threads + free = LOG_CHECKPOINT_FREE_PER_THREAD * (10 + srv_thread_concurrency) + LOG_CHECKPOINT_EXTRA_FREE; if (free >= smallest_capacity / 2) { success = FALSE; @@ -607,6 +638,10 @@ log_calc_max_ages(void) margin = ut_min(margin, log_sys->adm_checkpoint_interval); + margin = margin - margin / 10; /* Add still some extra safety */ + + log_sys->log_group_capacity = smallest_capacity; + log_sys->max_modified_age_async = margin - margin / LOG_POOL_PREFLUSH_RATIO_ASYNC; log_sys->max_modified_age_sync = margin @@ -626,7 +661,7 @@ failure: if (!success) { fprintf(stderr, - "Error: log file group too small for the number of threads\n"); +"InnoDB: Error: log file group too small for innodb_thread_concurrency\n"); } return(success); @@ -3111,6 +3146,28 @@ log_check_log_recs( return(TRUE); } +/********************************************************** +Peeks the current lsn. */ + +ibool +log_peek_lsn( +/*=========*/ + /* out: TRUE if success, FALSE if could not get the + log system mutex */ + dulint* lsn) /* out: if returns TRUE, current lsn is here */ +{ + if (0 == mutex_enter_nowait(&(log_sys->mutex), (char*)__FILE__, + __LINE__)) { + *lsn = log_sys->lsn; + + mutex_exit(&(log_sys->mutex)); + + return(TRUE); + } + + return(FALSE); +} + /********************************************************** Prints info of the log. */ diff --git a/innobase/log/log0recv.c b/innobase/log/log0recv.c index 8e5fe819afb..9c9ad568d98 100644 --- a/innobase/log/log0recv.c +++ b/innobase/log/log0recv.c @@ -46,6 +46,8 @@ ibool recv_recovery_from_backup_on = FALSE; ibool recv_needed_recovery = FALSE; +ibool recv_lsn_checks_on = FALSE; + /* If the following is TRUE, the buffer pool file pages must be invalidated after recovery and no ibuf operations are allowed; this becomes TRUE if the log record hash table becomes too full, and log records must be merged @@ -71,6 +73,12 @@ ulint recv_previous_parsed_rec_is_multi = 0; ulint recv_max_parsed_page_no = 0; +/* The maximum lsn we see for a page during the recovery process. If this +is bigger than the lsn we are able to scan up to, that is an indication that +the recovery failed and the database may be corrupt. */ + +dulint recv_max_page_lsn; + /************************************************************ Creates the recovery system. */ @@ -132,6 +140,8 @@ recv_sys_init( OS_FILE_LOG_BLOCK_SIZE); recv_sys->found_corrupt_log = FALSE; + recv_max_page_lsn = ut_dulint_zero; + mutex_exit(&(recv_sys->mutex)); } @@ -1124,6 +1134,10 @@ recv_recover_page( mutex_enter(&(recv_sys->mutex)); + if (ut_dulint_cmp(recv_max_page_lsn, page_lsn) < 0) { + recv_max_page_lsn = page_lsn; + } + recv_addr->state = RECV_PROCESSED; ut_a(recv_sys->n_addrs); @@ -2192,9 +2206,12 @@ recv_scan_log_recs( while (log_block < buf + len && !finished) { no = log_block_get_hdr_no(log_block); +/* + fprintf(stderr, "Log block header no %lu\n", no); - /* fprintf(stderr, "Log block header no %lu\n", no); */ - + fprintf(stderr, "Scanned lsn no %lu\n", + log_block_convert_lsn_to_no(scanned_lsn)); +*/ if (no != log_block_convert_lsn_to_no(scanned_lsn) || !log_block_checksum_is_ok_or_old_format(log_block)) { @@ -2586,7 +2603,6 @@ recv_recovery_from_checkpoint_start( recv_group_scan_log_recs(group, &contiguous_lsn, &group_scanned_lsn); - group->scanned_lsn = group_scanned_lsn; if (ut_dulint_cmp(old_scanned_lsn, group_scanned_lsn) < 0) { @@ -2603,6 +2619,31 @@ recv_recovery_from_checkpoint_start( group = UT_LIST_GET_NEXT(log_groups, group); } + /* We currently have only one log group */ + if (ut_dulint_cmp(group_scanned_lsn, checkpoint_lsn) < 0) { + ut_print_timestamp(stderr); + fprintf(stderr, +" InnoDB: ERROR: We were only able to scan the log up to\n" +"InnoDB: %lu %lu, but a checkpoint was at %lu %lu.\n" +"InnoDB: It is possible that the database is now corrupt!\n", + ut_dulint_get_high(group_scanned_lsn), + ut_dulint_get_low(group_scanned_lsn), + ut_dulint_get_high(checkpoint_lsn), + ut_dulint_get_low(checkpoint_lsn)); + } + + if (ut_dulint_cmp(group_scanned_lsn, recv_max_page_lsn) < 0) { + ut_print_timestamp(stderr); + fprintf(stderr, +" InnoDB: ERROR: We were only able to scan the log up to %lu %lu\n" +"InnoDB: but a database page a had an lsn %lu %lu. It is possible that the\n" +"InnoDB: database is now corrupt!\n", + ut_dulint_get_high(group_scanned_lsn), + ut_dulint_get_low(group_scanned_lsn), + ut_dulint_get_high(recv_max_page_lsn), + ut_dulint_get_low(recv_max_page_lsn)); + } + if (ut_dulint_cmp(recv_sys->recovered_lsn, checkpoint_lsn) < 0) { mutex_exit(&(log_sys->mutex)); @@ -2656,6 +2697,8 @@ recv_recovery_from_checkpoint_start( sync_order_checks_on = FALSE; + recv_lsn_checks_on = TRUE; + /* The database is now ready to start almost normal processing of user transactions: transaction rollbacks and the application of the log records in the hash table can be run in background. */ diff --git a/innobase/os/os0file.c b/innobase/os/os0file.c index a9a64ae7864..7936b06c24d 100644 --- a/innobase/os/os0file.c +++ b/innobase/os/os0file.c @@ -1030,6 +1030,7 @@ os_file_flush( #ifdef HAVE_FDATASYNC ret = fdatasync(file); #else +/* printf("Flushing to file %lu\n", (ulint)file); */ ret = fsync(file); #endif os_n_fsyncs++; diff --git a/innobase/srv/srv0srv.c b/innobase/srv/srv0srv.c index f81b2d0a60e..f31d275eff0 100644 --- a/innobase/srv/srv0srv.c +++ b/innobase/srv/srv0srv.c @@ -1798,8 +1798,12 @@ retry: os_fast_mutex_unlock(&srv_conc_mutex); + trx->op_info = (char*)"sleeping before joining InnoDB queue"; + os_thread_sleep(50000); + trx->op_info = (char*)""; + os_fast_mutex_lock(&srv_conc_mutex); srv_conc_n_waiting_threads--; @@ -2299,6 +2303,7 @@ srv_sprintf_innodb_monitor( char* buf_end = buf + len - 2000; double time_elapsed; time_t current_time; + ulint n_reserved; mutex_enter(&srv_innodb_monitor_mutex); @@ -2414,8 +2419,16 @@ srv_sprintf_innodb_monitor( "ROW OPERATIONS\n" "--------------\n"); buf += sprintf(buf, - "%ld queries inside InnoDB, %ld queries in queue\n", + "%ld queries inside InnoDB, %lu queries in queue\n", srv_conc_n_threads, srv_conc_n_waiting_threads); + + n_reserved = fil_space_get_n_reserved_extents(0); + if (n_reserved > 0) { + buf += sprintf(buf, + "%lu tablespace extents now reserved for B-tree split operations\n", + n_reserved); + } + #ifdef UNIV_LINUX buf += sprintf(buf, "Main thread process no. %lu, id %lu, state: %s\n", diff --git a/innobase/trx/trx0trx.c b/innobase/trx/trx0trx.c index e8d6c093680..47c39290b15 100644 --- a/innobase/trx/trx0trx.c +++ b/innobase/trx/trx0trx.c @@ -1624,6 +1624,13 @@ trx_print( } buf += sprintf(buf, "\n"); + + if (trx->n_mysql_tables_in_use > 0 || trx->mysql_n_tables_locked > 0) { + + buf += sprintf(buf, "mysql tables in use %lu, locked %lu\n", + trx->n_mysql_tables_in_use, + trx->mysql_n_tables_locked); + } start_of_line = buf; -- cgit v1.2.1