summaryrefslogtreecommitdiff
path: root/innobase
diff options
context:
space:
mode:
Diffstat (limited to 'innobase')
-rw-r--r--innobase/buf/buf0buf.c23
-rw-r--r--innobase/fil/fil0fil.c28
-rw-r--r--innobase/fsp/fsp0fsp.c46
-rw-r--r--innobase/include/fil0fil.h8
-rw-r--r--innobase/include/log0log.h14
-rw-r--r--innobase/include/log0recv.h2
-rw-r--r--innobase/log/log0log.c75
-rw-r--r--innobase/log/log0recv.c49
-rw-r--r--innobase/os/os0file.c1
-rw-r--r--innobase/srv/srv0srv.c15
-rw-r--r--innobase/trx/trx0trx.c7
11 files changed, 253 insertions, 15 deletions
diff --git a/innobase/buf/buf0buf.c b/innobase/buf/buf0buf.c
index 246a60a61cd..3f2476c715b 100644
--- a/innobase/buf/buf0buf.c
+++ b/innobase/buf/buf0buf.c
@@ -34,6 +34,7 @@ Created 11/5/1995 Heikki Tuuri
#include "ibuf0ibuf.h"
#include "dict0dict.h"
#include "log0recv.h"
+#include "log0log.h"
#include "trx0undo.h"
#include "srv0srv.h"
@@ -273,6 +274,7 @@ buf_page_is_corrupted(
ulint old_checksum;
ulint checksum_field;
ulint old_checksum_field;
+ dulint current_lsn;
if (mach_read_from_4(read_buf + FIL_PAGE_LSN + 4)
!= mach_read_from_4(read_buf + UNIV_PAGE_SIZE
@@ -284,6 +286,27 @@ buf_page_is_corrupted(
return(TRUE);
}
+#ifndef UNIV_HOTBACKUP
+ if (recv_lsn_checks_on && log_peek_lsn(&current_lsn)) {
+ if (ut_dulint_cmp(current_lsn,
+ mach_read_from_8(read_buf + FIL_PAGE_LSN))
+ < 0) {
+ ut_print_timestamp(stderr);
+
+ fprintf(stderr,
+" InnoDB: Error: page %lu log sequence number %lu %lu\n"
+"InnoDB: is in the future! Current system log sequence number %lu %lu.\n"
+"InnoDB: Your database may be corrupt.\n",
+ mach_read_from_4(read_buf + FIL_PAGE_OFFSET),
+ ut_dulint_get_high(
+ mach_read_from_8(read_buf + FIL_PAGE_LSN)),
+ ut_dulint_get_low(
+ mach_read_from_8(read_buf + FIL_PAGE_LSN)),
+ ut_dulint_get_high(current_lsn),
+ ut_dulint_get_low(current_lsn));
+ }
+ }
+#endif
old_checksum = buf_calc_page_old_checksum(read_buf);
old_checksum_field = mach_read_from_4(read_buf + UNIV_PAGE_SIZE
diff --git a/innobase/fil/fil0fil.c b/innobase/fil/fil0fil.c
index a8dc357749c..f55df90846c 100644
--- a/innobase/fil/fil0fil.c
+++ b/innobase/fil/fil0fil.c
@@ -831,6 +831,34 @@ fil_space_release_free_extents(
mutex_exit(&(system->mutex));
}
+/***********************************************************************
+Gets the number of reserved extents. If the database is silent, this number
+should be zero. */
+
+ulint
+fil_space_get_n_reserved_extents(
+/*=============================*/
+ ulint id) /* in: space id */
+{
+ fil_space_t* space;
+ fil_system_t* system = fil_system;
+ ulint n;
+
+ ut_ad(system);
+
+ mutex_enter(&(system->mutex));
+
+ HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
+
+ ut_a(space);
+
+ n = space->n_reserved_extents;
+
+ mutex_exit(&(system->mutex));
+
+ return(n);
+}
+
/************************************************************************
Prepares a file node for i/o. Opens the file if it is closed. Updates the
pending i/o's field in the node and the system appropriately. Takes the node
diff --git a/innobase/fsp/fsp0fsp.c b/innobase/fsp/fsp0fsp.c
index b6941d80e90..20bf4972f64 100644
--- a/innobase/fsp/fsp0fsp.c
+++ b/innobase/fsp/fsp0fsp.c
@@ -2709,10 +2709,52 @@ fseg_free_page_low(
return;
}
+/*
+ fprintf(stderr,
+"InnoDB: InnoDB is freeing space %lu page %lu,\n"
+"InnoDB: which belongs to descr seg %lu %lu\n"
+"InnoDB: segment %lu %lu.\n",
+ space, page,
+ ut_dulint_get_high(
+ mtr_read_dulint(descr + XDES_ID, MLOG_8BYTES, mtr)),
+ ut_dulint_get_low(
+ mtr_read_dulint(descr + XDES_ID, MLOG_8BYTES, mtr)),
+ ut_dulint_get_high(
+ mtr_read_dulint(seg_inode + FSEG_ID, MLOG_8BYTES, mtr)),
+ ut_dulint_get_low(
+ mtr_read_dulint(seg_inode + FSEG_ID, MLOG_8BYTES, mtr)));
+*/
/* If we get here, the page is in some extent of the segment */
- ut_a(0 == ut_dulint_cmp(
+ if (0 != ut_dulint_cmp(
mtr_read_dulint(descr + XDES_ID, MLOG_8BYTES, mtr),
- mtr_read_dulint(seg_inode + FSEG_ID, MLOG_8BYTES, mtr)));
+ mtr_read_dulint(seg_inode + FSEG_ID, MLOG_8BYTES, mtr))) {
+
+ ut_sprintf_buf(errbuf, descr, 40);
+ fprintf(stderr,
+"InnoDB: Dump of the tablespace extent descriptor: %s\n", errbuf);
+ ut_sprintf_buf(errbuf, seg_inode, 40);
+ fprintf(stderr,
+"InnoDB: Dump of the segment inode: %s\n", errbuf);
+
+ fprintf(stderr,
+"InnoDB: Serious error: InnoDB is trying to free space %lu page %lu,\n"
+"InnoDB: which does not belong to segment %lu %lu but belongs\n"
+"InnoDB: to segment %lu %lu.\n",
+ space, page,
+ ut_dulint_get_high(
+ mtr_read_dulint(descr + XDES_ID, MLOG_8BYTES, mtr)),
+ ut_dulint_get_low(
+ mtr_read_dulint(descr + XDES_ID, MLOG_8BYTES, mtr)),
+ ut_dulint_get_high(
+ mtr_read_dulint(seg_inode + FSEG_ID, MLOG_8BYTES, mtr)),
+ ut_dulint_get_low(
+ mtr_read_dulint(seg_inode + FSEG_ID, MLOG_8BYTES, mtr)));
+
+ fprintf(stderr,
+"InnoDB: If the InnoDB recovery crashes here, see section 6.1\n"
+"InnoDB: of http://www.innodb.com/ibman.html about forcing recovery.\n");
+ ut_a(0);
+ }
not_full_n_used = mtr_read_ulint(seg_inode + FSEG_NOT_FULL_N_USED,
MLOG_4BYTES, mtr);
diff --git a/innobase/include/fil0fil.h b/innobase/include/fil0fil.h
index 4f78fdb2fd7..ad3149f0b36 100644
--- a/innobase/include/fil0fil.h
+++ b/innobase/include/fil0fil.h
@@ -386,6 +386,14 @@ fil_space_release_free_extents(
/*===========================*/
ulint id, /* in: space id */
ulint n_reserved); /* in: how many one reserved */
+/***********************************************************************
+Gets the number of reserved extents. If the database is silent, this number
+should be zero. */
+
+ulint
+fil_space_get_n_reserved_extents(
+/*=============================*/
+ ulint id); /* in: space id */
typedef struct fil_space_struct fil_space_t;
diff --git a/innobase/include/log0log.h b/innobase/include/log0log.h
index e79e914e4f6..24ec28a56e6 100644
--- a/innobase/include/log0log.h
+++ b/innobase/include/log0log.h
@@ -513,6 +513,15 @@ log_print(
/*======*/
char* buf, /* in/out: buffer where to print */
char* buf_end);/* in: buffer end */
+/**********************************************************
+Peeks the current lsn. */
+
+ibool
+log_peek_lsn(
+/*=========*/
+ /* out: TRUE if success, FALSE if could not get the
+ log system mutex */
+ dulint* lsn); /* out: if returns TRUE, current lsn is here */
/**************************************************************************
Refreshes the statistics used to print per-second averages. */
@@ -785,6 +794,11 @@ struct log_struct{
called */
/* Fields involved in checkpoints */
+ ulint log_group_capacity; /* capacity of the log group; if
+ the checkpoint age exceeds this, it is
+ a serious error because it is possible
+ we will then overwrite log and spoil
+ crash recovery */
ulint max_modified_age_async;
/* when this recommended value for lsn
- buf_pool_get_oldest_modification()
diff --git a/innobase/include/log0recv.h b/innobase/include/log0recv.h
index 7418e4abf1b..e5a5bc05563 100644
--- a/innobase/include/log0recv.h
+++ b/innobase/include/log0recv.h
@@ -333,6 +333,8 @@ extern ibool recv_recovery_on;
extern ibool recv_no_ibuf_operations;
extern ibool recv_needed_recovery;
+extern ibool recv_lsn_checks_on;
+
extern ibool recv_is_making_a_backup;
extern ulint recv_max_parsed_page_no;
diff --git a/innobase/log/log0log.c b/innobase/log/log0log.c
index 53ca97baaa0..99827da5bf5 100644
--- a/innobase/log/log0log.c
+++ b/innobase/log/log0log.c
@@ -33,6 +33,11 @@ log_t* log_sys = NULL;
ibool log_do_write = TRUE;
ibool log_debug_writes = FALSE;
+/* These control how often we print warnings if the last checkpoint is too
+old */
+ibool log_has_printed_chkp_warning = FALSE;
+time_t log_last_warning_time;
+
/* Pointer to this variable is used as the i/o-message when we do i/o to an
archive */
byte log_archive_io;
@@ -299,6 +304,7 @@ log_close(void)
dulint oldest_lsn;
dulint lsn;
log_t* log = log_sys;
+ ulint checkpoint_age;
ut_ad(mutex_own(&(log->mutex)));
@@ -322,8 +328,34 @@ log_close(void)
log->check_flush_or_checkpoint = TRUE;
}
- if (ut_dulint_minus(lsn, log->last_checkpoint_lsn)
- <= log->max_modified_age_async) {
+ checkpoint_age = ut_dulint_minus(lsn, log->last_checkpoint_lsn);
+
+ if (checkpoint_age >= log->log_group_capacity) {
+ /* TODO: split btr_store_big_rec_extern_fields() into small
+ steps so that we can release all latches in the middle, and
+ call log_free_check() to ensure we never write over log written
+ after the latest checkpoint. In principle, we should split all
+ big_rec operations, but other operations are smaller. */
+
+ if (!log_has_printed_chkp_warning
+ || difftime(time(NULL), log_last_warning_time) > 15) {
+
+ log_has_printed_chkp_warning = TRUE;
+ log_last_warning_time = time(NULL);
+
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+" InnoDB: ERROR: the age of the last checkpoint is %lu,\n"
+"InnoDB: which exceeds the log group capacity %lu.\n"
+"InnoDB: If you are using big BLOB or TEXT rows, you must set the\n"
+"InnoDB: combined size of log files at least 10 times bigger than the\n"
+"InnoDB: largest such row.\n",
+ checkpoint_age, log->log_group_capacity);
+ }
+ }
+
+ if (checkpoint_age <= log->max_modified_age_async) {
+
goto function_exit;
}
@@ -332,8 +364,7 @@ log_close(void)
if (ut_dulint_is_zero(oldest_lsn)
|| (ut_dulint_minus(lsn, oldest_lsn)
> log->max_modified_age_async)
- || (ut_dulint_minus(lsn, log->last_checkpoint_lsn)
- > log->max_checkpoint_age_async)) {
+ || checkpoint_age > log->max_checkpoint_age_async) {
log->check_flush_or_checkpoint = TRUE;
}
@@ -551,7 +582,6 @@ log_calc_max_ages(void)
the database server */
{
log_group_t* group;
- ulint n_threads;
ulint margin;
ulint free;
ibool success = TRUE;
@@ -561,8 +591,6 @@ log_calc_max_ages(void)
ut_ad(!mutex_own(&(log_sys->mutex)));
- n_threads = srv_get_n_threads();
-
mutex_enter(&(log_sys->mutex));
group = UT_LIST_GET_FIRST(log_sys->log_groups);
@@ -590,12 +618,15 @@ log_calc_max_ages(void)
group = UT_LIST_GET_NEXT(log_groups, group);
}
+ /* Add extra safety */
+ smallest_capacity = smallest_capacity - smallest_capacity / 10;
+
/* For each OS thread we must reserve so much free space in the
smallest log group that it can accommodate the log entries produced
by single query steps: running out of free log space is a serious
system error which requires rebooting the database. */
- free = LOG_CHECKPOINT_FREE_PER_THREAD * n_threads
+ free = LOG_CHECKPOINT_FREE_PER_THREAD * (10 + srv_thread_concurrency)
+ LOG_CHECKPOINT_EXTRA_FREE;
if (free >= smallest_capacity / 2) {
success = FALSE;
@@ -607,6 +638,10 @@ log_calc_max_ages(void)
margin = ut_min(margin, log_sys->adm_checkpoint_interval);
+ margin = margin - margin / 10; /* Add still some extra safety */
+
+ log_sys->log_group_capacity = smallest_capacity;
+
log_sys->max_modified_age_async = margin
- margin / LOG_POOL_PREFLUSH_RATIO_ASYNC;
log_sys->max_modified_age_sync = margin
@@ -626,7 +661,7 @@ failure:
if (!success) {
fprintf(stderr,
- "Error: log file group too small for the number of threads\n");
+"InnoDB: Error: log file group too small for innodb_thread_concurrency\n");
}
return(success);
@@ -3112,6 +3147,28 @@ log_check_log_recs(
}
/**********************************************************
+Peeks the current lsn. */
+
+ibool
+log_peek_lsn(
+/*=========*/
+ /* out: TRUE if success, FALSE if could not get the
+ log system mutex */
+ dulint* lsn) /* out: if returns TRUE, current lsn is here */
+{
+ if (0 == mutex_enter_nowait(&(log_sys->mutex), (char*)__FILE__,
+ __LINE__)) {
+ *lsn = log_sys->lsn;
+
+ mutex_exit(&(log_sys->mutex));
+
+ return(TRUE);
+ }
+
+ return(FALSE);
+}
+
+/**********************************************************
Prints info of the log. */
void
diff --git a/innobase/log/log0recv.c b/innobase/log/log0recv.c
index 8e5fe819afb..9c9ad568d98 100644
--- a/innobase/log/log0recv.c
+++ b/innobase/log/log0recv.c
@@ -46,6 +46,8 @@ ibool recv_recovery_from_backup_on = FALSE;
ibool recv_needed_recovery = FALSE;
+ibool recv_lsn_checks_on = FALSE;
+
/* If the following is TRUE, the buffer pool file pages must be invalidated
after recovery and no ibuf operations are allowed; this becomes TRUE if
the log record hash table becomes too full, and log records must be merged
@@ -71,6 +73,12 @@ ulint recv_previous_parsed_rec_is_multi = 0;
ulint recv_max_parsed_page_no = 0;
+/* The maximum lsn we see for a page during the recovery process. If this
+is bigger than the lsn we are able to scan up to, that is an indication that
+the recovery failed and the database may be corrupt. */
+
+dulint recv_max_page_lsn;
+
/************************************************************
Creates the recovery system. */
@@ -132,6 +140,8 @@ recv_sys_init(
OS_FILE_LOG_BLOCK_SIZE);
recv_sys->found_corrupt_log = FALSE;
+ recv_max_page_lsn = ut_dulint_zero;
+
mutex_exit(&(recv_sys->mutex));
}
@@ -1124,6 +1134,10 @@ recv_recover_page(
mutex_enter(&(recv_sys->mutex));
+ if (ut_dulint_cmp(recv_max_page_lsn, page_lsn) < 0) {
+ recv_max_page_lsn = page_lsn;
+ }
+
recv_addr->state = RECV_PROCESSED;
ut_a(recv_sys->n_addrs);
@@ -2192,9 +2206,12 @@ recv_scan_log_recs(
while (log_block < buf + len && !finished) {
no = log_block_get_hdr_no(log_block);
+/*
+ fprintf(stderr, "Log block header no %lu\n", no);
- /* fprintf(stderr, "Log block header no %lu\n", no); */
-
+ fprintf(stderr, "Scanned lsn no %lu\n",
+ log_block_convert_lsn_to_no(scanned_lsn));
+*/
if (no != log_block_convert_lsn_to_no(scanned_lsn)
|| !log_block_checksum_is_ok_or_old_format(log_block)) {
@@ -2586,7 +2603,6 @@ recv_recovery_from_checkpoint_start(
recv_group_scan_log_recs(group, &contiguous_lsn,
&group_scanned_lsn);
-
group->scanned_lsn = group_scanned_lsn;
if (ut_dulint_cmp(old_scanned_lsn, group_scanned_lsn) < 0) {
@@ -2603,6 +2619,31 @@ recv_recovery_from_checkpoint_start(
group = UT_LIST_GET_NEXT(log_groups, group);
}
+ /* We currently have only one log group */
+ if (ut_dulint_cmp(group_scanned_lsn, checkpoint_lsn) < 0) {
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+" InnoDB: ERROR: We were only able to scan the log up to\n"
+"InnoDB: %lu %lu, but a checkpoint was at %lu %lu.\n"
+"InnoDB: It is possible that the database is now corrupt!\n",
+ ut_dulint_get_high(group_scanned_lsn),
+ ut_dulint_get_low(group_scanned_lsn),
+ ut_dulint_get_high(checkpoint_lsn),
+ ut_dulint_get_low(checkpoint_lsn));
+ }
+
+ if (ut_dulint_cmp(group_scanned_lsn, recv_max_page_lsn) < 0) {
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+" InnoDB: ERROR: We were only able to scan the log up to %lu %lu\n"
+"InnoDB: but a database page a had an lsn %lu %lu. It is possible that the\n"
+"InnoDB: database is now corrupt!\n",
+ ut_dulint_get_high(group_scanned_lsn),
+ ut_dulint_get_low(group_scanned_lsn),
+ ut_dulint_get_high(recv_max_page_lsn),
+ ut_dulint_get_low(recv_max_page_lsn));
+ }
+
if (ut_dulint_cmp(recv_sys->recovered_lsn, checkpoint_lsn) < 0) {
mutex_exit(&(log_sys->mutex));
@@ -2656,6 +2697,8 @@ recv_recovery_from_checkpoint_start(
sync_order_checks_on = FALSE;
+ recv_lsn_checks_on = TRUE;
+
/* The database is now ready to start almost normal processing of user
transactions: transaction rollbacks and the application of the log
records in the hash table can be run in background. */
diff --git a/innobase/os/os0file.c b/innobase/os/os0file.c
index a9a64ae7864..7936b06c24d 100644
--- a/innobase/os/os0file.c
+++ b/innobase/os/os0file.c
@@ -1030,6 +1030,7 @@ os_file_flush(
#ifdef HAVE_FDATASYNC
ret = fdatasync(file);
#else
+/* printf("Flushing to file %lu\n", (ulint)file); */
ret = fsync(file);
#endif
os_n_fsyncs++;
diff --git a/innobase/srv/srv0srv.c b/innobase/srv/srv0srv.c
index f81b2d0a60e..f31d275eff0 100644
--- a/innobase/srv/srv0srv.c
+++ b/innobase/srv/srv0srv.c
@@ -1798,8 +1798,12 @@ retry:
os_fast_mutex_unlock(&srv_conc_mutex);
+ trx->op_info = (char*)"sleeping before joining InnoDB queue";
+
os_thread_sleep(50000);
+ trx->op_info = (char*)"";
+
os_fast_mutex_lock(&srv_conc_mutex);
srv_conc_n_waiting_threads--;
@@ -2299,6 +2303,7 @@ srv_sprintf_innodb_monitor(
char* buf_end = buf + len - 2000;
double time_elapsed;
time_t current_time;
+ ulint n_reserved;
mutex_enter(&srv_innodb_monitor_mutex);
@@ -2414,8 +2419,16 @@ srv_sprintf_innodb_monitor(
"ROW OPERATIONS\n"
"--------------\n");
buf += sprintf(buf,
- "%ld queries inside InnoDB, %ld queries in queue\n",
+ "%ld queries inside InnoDB, %lu queries in queue\n",
srv_conc_n_threads, srv_conc_n_waiting_threads);
+
+ n_reserved = fil_space_get_n_reserved_extents(0);
+ if (n_reserved > 0) {
+ buf += sprintf(buf,
+ "%lu tablespace extents now reserved for B-tree split operations\n",
+ n_reserved);
+ }
+
#ifdef UNIV_LINUX
buf += sprintf(buf,
"Main thread process no. %lu, id %lu, state: %s\n",
diff --git a/innobase/trx/trx0trx.c b/innobase/trx/trx0trx.c
index e8d6c093680..47c39290b15 100644
--- a/innobase/trx/trx0trx.c
+++ b/innobase/trx/trx0trx.c
@@ -1624,6 +1624,13 @@ trx_print(
}
buf += sprintf(buf, "\n");
+
+ if (trx->n_mysql_tables_in_use > 0 || trx->mysql_n_tables_locked > 0) {
+
+ buf += sprintf(buf, "mysql tables in use %lu, locked %lu\n",
+ trx->n_mysql_tables_in_use,
+ trx->mysql_n_tables_locked);
+ }
start_of_line = buf;