diff options
25 files changed, 314 insertions, 229 deletions
diff --git a/storage/innobase/Makefile.am b/storage/innobase/Makefile.am index 0b23ae74f9e..30e056d68fb 100644 --- a/storage/innobase/Makefile.am +++ b/storage/innobase/Makefile.am @@ -55,7 +55,9 @@ noinst_HEADERS = include/btr0btr.h include/btr0btr.ic \ include/ha0ha.ic include/hash0hash.h \ include/hash0hash.ic include/ibuf0ibuf.h \ include/ibuf0ibuf.ic include/ibuf0types.h \ + include/lock0iter.h \ include/lock0lock.h include/lock0lock.ic \ + include/lock0priv.h include/lock0priv.ic \ include/lock0types.h include/log0log.h \ include/log0log.ic include/log0recv.h \ include/log0recv.ic include/mach0data.h \ @@ -129,7 +131,8 @@ libinnobase_a_SOURCES = btr/btr0btr.c btr/btr0cur.c btr/btr0pcur.c \ eval/eval0eval.c eval/eval0proc.c \ fil/fil0fil.c fsp/fsp0fsp.c fut/fut0fut.c \ fut/fut0lst.c ha/ha0ha.c ha/hash0hash.c \ - ibuf/ibuf0ibuf.c lock/lock0lock.c \ + ibuf/ibuf0ibuf.c lock/lock0iter.c \ + lock/lock0lock.c \ log/log0log.c log/log0recv.c mach/mach0data.c \ mem/mem0mem.c mem/mem0pool.c mtr/mtr0log.c \ mtr/mtr0mtr.c os/os0file.c os/os0proc.c \ diff --git a/storage/innobase/buf/buf0buf.c b/storage/innobase/buf/buf0buf.c index c847b8db9e2..469d3ac05d7 100644 --- a/storage/innobase/buf/buf0buf.c +++ b/storage/innobase/buf/buf0buf.c @@ -903,8 +903,7 @@ buf_block_make_young( /* Note that we read freed_page_clock's without holding any mutex: this is allowed since the result is used only in heuristics */ - if (buf_pool->freed_page_clock >= block->freed_page_clock - + 1 + (buf_pool->curr_size / 4)) { + if (buf_block_peek_if_too_old(block)) { mutex_enter(&buf_pool->mutex); /* There has been freeing activity in the LRU list: @@ -1648,6 +1647,15 @@ buf_page_init( block->lock_hash_val = lock_rec_hash(space, offset); +#ifdef UNIV_DEBUG_VALGRIND + if (!space) { + /* Silence valid Valgrind warnings about uninitialized + data being written to data files. There are some unused + bytes on some pages that InnoDB does not initialize. */ + UNIV_MEM_VALID(block->frame, UNIV_PAGE_SIZE); + } +#endif /* UNIV_DEBUG_VALGRIND */ + /* Insert into the hash table of file pages */ if (buf_page_hash_get(space, offset)) { diff --git a/storage/innobase/buf/buf0lru.c b/storage/innobase/buf/buf0lru.c index 1e27144bdbf..7b49a7641af 100644 --- a/storage/innobase/buf/buf0lru.c +++ b/storage/innobase/buf/buf0lru.c @@ -244,7 +244,15 @@ buf_LRU_search_and_free_block( frame at all */ if (block->frame) { + /* The page was declared uninitialized + by buf_LRU_block_remove_hashed_page(). + We need to flag the contents of the + page valid (which it still is) in + order to avoid bogus Valgrind + warnings. */ + UNIV_MEM_VALID(block->frame, UNIV_PAGE_SIZE); btr_search_drop_page_hash_index(block->frame); + UNIV_MEM_INVALID(block->frame, UNIV_PAGE_SIZE); } ut_a(block->buf_fix_count == 0); @@ -449,6 +457,7 @@ loop: mutex_enter(&block->mutex); block->state = BUF_BLOCK_READY_FOR_USE; + UNIV_MEM_ALLOC(block->frame, UNIV_PAGE_SIZE); mutex_exit(&block->mutex); @@ -864,6 +873,7 @@ buf_LRU_block_free_non_file_page( block->state = BUF_BLOCK_NOT_USED; + UNIV_MEM_ALLOC(block->frame, UNIV_PAGE_SIZE); #ifdef UNIV_DEBUG /* Wipe contents of page to reveal possible stale pointers to it */ memset(block->frame, '\0', UNIV_PAGE_SIZE); @@ -871,6 +881,8 @@ buf_LRU_block_free_non_file_page( UT_LIST_ADD_FIRST(free, buf_pool->free, block); block->in_free_list = TRUE; + UNIV_MEM_FREE(block->frame, UNIV_PAGE_SIZE); + if (srv_use_awe && block->frame) { /* Add to the list of mapped pages */ @@ -939,6 +951,7 @@ buf_LRU_block_remove_hashed_page( buf_page_address_fold(block->space, block->offset), block); + UNIV_MEM_INVALID(block->frame, UNIV_PAGE_SIZE); block->state = BUF_BLOCK_REMOVE_HASH; } diff --git a/storage/innobase/fsp/fsp0fsp.c b/storage/innobase/fsp/fsp0fsp.c index 78fb55e4ef3..e1074933fe8 100644 --- a/storage/innobase/fsp/fsp0fsp.c +++ b/storage/innobase/fsp/fsp0fsp.c @@ -2829,7 +2829,7 @@ will be able to insert new data to the database without running out the tablespace. Only free extents are taken into account and we also subtract the safety margin required by the above function fsp_reserve_free_extents. */ -ulint +ullint fsp_get_available_space_in_free_extents( /*====================================*/ /* out: available space in kB */ @@ -2895,7 +2895,8 @@ fsp_get_available_space_in_free_extents( return(0); } - return(((n_free - reserve) * FSP_EXTENT_SIZE) + return((ullint)(n_free - reserve) + * FSP_EXTENT_SIZE * (UNIV_PAGE_SIZE / 1024)); } diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index b7d0c4417c3..c2e230d2087 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -4533,17 +4533,16 @@ ha_innobase::position( /********************************************************************* If it's a DB_TOO_BIG_RECORD error then set a suitable message to return to the client.*/ -static +inline void innodb_check_for_record_too_big_error( /*==================================*/ - dict_table_t* table, /* in: table to check */ - int error) /* in: error code to check */ + ulint comp, /* in: ROW_FORMAT: nonzero=COMPACT, 0=REDUNDANT */ + int error) /* in: error code to check */ { if (error == (int)DB_TOO_BIG_RECORD) { - ulint max_row_size; - - max_row_size = page_get_free_space_of_empty_noninline(table); + ulint max_row_size + = page_get_free_space_of_empty_noninline(comp) / 2; my_error(ER_TOO_BIG_ROWSIZE, MYF(0), max_row_size); } @@ -4657,9 +4656,7 @@ create_table_def( error = row_create_table_for_mysql(table, trx); - /* We need access to the table and so we do the error checking - and set the error message here, before the error translation.*/ - innodb_check_for_record_too_big_error(table, error); + innodb_check_for_record_too_big_error(flags & DICT_TF_COMPACT, error); error = convert_error_code_to_mysql(error, NULL); @@ -4783,9 +4780,8 @@ create_index( sure we don't create too long indexes. */ error = row_create_index_for_mysql(index, trx, field_lengths); - /* We need access to the table and so we do the error checking - and set the error message here, before the error translation.*/ - innodb_check_for_record_too_big_error(index->table, error); + innodb_check_for_record_too_big_error(form->s->row_type + != ROW_TYPE_REDUNDANT, error); error = convert_error_code_to_mysql(error, NULL); @@ -4802,6 +4798,8 @@ int create_clustered_index_when_no_primary( /*===================================*/ trx_t* trx, /* in: InnoDB transaction handle */ + ulint comp, /* in: ROW_FORMAT: + nonzero=COMPACT, 0=REDUNDANT */ const char* table_name) /* in: table name */ { dict_index_t* index; @@ -4810,13 +4808,11 @@ create_clustered_index_when_no_primary( /* We pass 0 as the space id, and determine at a lower level the space id where to store the table */ - index = dict_mem_index_create((char*) table_name, - (char*) "GEN_CLUST_INDEX", 0, DICT_CLUSTERED, 0); + index = dict_mem_index_create(table_name, "GEN_CLUST_INDEX", + 0, DICT_CLUSTERED, 0); error = row_create_index_for_mysql(index, trx, NULL); - /* We need access to the table and so we do the error checking - and set the error message here, before the error translation.*/ - innodb_check_for_record_too_big_error(index->table, error); + innodb_check_for_record_too_big_error(comp, error); error = convert_error_code_to_mysql(error, NULL); @@ -4947,8 +4943,9 @@ ha_innobase::create( order the rows by their row id which is internally generated by InnoDB */ - error = create_clustered_index_when_no_primary(trx, - norm_name); + error = create_clustered_index_when_no_primary( + trx, form->s->row_type != ROW_TYPE_REDUNDANT, + norm_name); if (error) { goto cleanup; } @@ -5873,9 +5870,9 @@ ha_innobase::update_table_comment( mutex_enter_noninline(&srv_dict_tmpfile_mutex); rewind(srv_dict_tmpfile); - fprintf(srv_dict_tmpfile, "InnoDB free: %lu kB", - (ulong) fsp_get_available_space_in_free_extents( - prebuilt->table->space)); + fprintf(srv_dict_tmpfile, "InnoDB free: %llu kB", + fsp_get_available_space_in_free_extents( + prebuilt->table->space)); dict_print_info_on_foreign_keys(FALSE, srv_dict_tmpfile, prebuilt->trx, prebuilt->table); diff --git a/storage/innobase/include/buf0buf.ic b/storage/innobase/include/buf0buf.ic index 031bf6c51b4..b077ff0c181 100644 --- a/storage/innobase/include/buf0buf.ic +++ b/storage/innobase/include/buf0buf.ic @@ -28,7 +28,7 @@ buf_block_peek_if_too_old( buf_block_t* block) /* in: block to make younger */ { return(buf_pool->freed_page_clock >= block->freed_page_clock - + 1 + (buf_pool->curr_size / 1024)); + + 1 + (buf_pool->curr_size / 4)); } /************************************************************************* diff --git a/storage/innobase/include/fsp0fsp.h b/storage/innobase/include/fsp0fsp.h index d04269fc157..82e95a2e920 100644 --- a/storage/innobase/include/fsp0fsp.h +++ b/storage/innobase/include/fsp0fsp.h @@ -245,7 +245,7 @@ will be able to insert new data to the database without running out the tablespace. Only free extents are taken into account and we also subtract the safety margin required by the above function fsp_reserve_free_extents. */ -ulint +ullint fsp_get_available_space_in_free_extents( /*====================================*/ /* out: available space in kB */ diff --git a/storage/innobase/include/lock0lock.h b/storage/innobase/include/lock0lock.h index 6b863e32183..059c459c374 100644 --- a/storage/innobase/include/lock0lock.h +++ b/storage/innobase/include/lock0lock.h @@ -519,6 +519,18 @@ lock_is_table_exclusive( dict_table_t* table, /* in: table */ trx_t* trx); /* in: transaction */ /************************************************************************* +Checks if a lock request lock1 has to wait for request lock2. */ + +ibool +lock_has_to_wait( +/*=============*/ + /* out: TRUE if lock1 has to wait for lock2 to be + removed */ + lock_t* lock1, /* in: waiting lock */ + lock_t* lock2); /* in: another lock; NOTE that it is assumed that this + has a lock bit set on the same record as in lock1 if + the locks are record locks */ +/************************************************************************* Checks that a transaction id is sensible, i.e., not in the future. */ ibool @@ -597,7 +609,7 @@ lock_validate(void); /* out: TRUE if ok */ /************************************************************************* Return approximate number or record locks (bits set in the bitmap) for -this transaction. Since delete-marked records ma ybe removed, the +this transaction. Since delete-marked records maybe removed, the record count will not be precise. */ ulint diff --git a/storage/innobase/include/mem0mem.ic b/storage/innobase/include/mem0mem.ic index cb8fbe92cf0..e59443da73d 100644 --- a/storage/innobase/include/mem0mem.ic +++ b/storage/innobase/include/mem0mem.ic @@ -167,6 +167,8 @@ mem_heap_alloc( mem_block_set_free(block, free + MEM_SPACE_NEEDED(n)); #ifdef UNIV_MEM_DEBUG + UNIV_MEM_ALLOC(buf, + n + MEM_FIELD_HEADER_SIZE + MEM_FIELD_TRAILER_SIZE); /* In the debug version write debugging info to the field */ mem_field_init((byte*)buf, n); @@ -177,8 +179,10 @@ mem_heap_alloc( #endif #ifdef UNIV_SET_MEM_TO_ZERO + UNIV_MEM_ALLOC(buf, n); memset(buf, '\0', n); #endif + UNIV_MEM_ALLOC(buf, n); return(buf); } @@ -369,6 +373,8 @@ mem_heap_free_top( if ((heap != block) && (mem_block_get_free(block) == mem_block_get_start(block))) { mem_heap_block_free(heap, block); + } else { + UNIV_MEM_FREE((byte*) block + mem_block_get_free(block), n); } } diff --git a/storage/innobase/include/page0page.h b/storage/innobase/include/page0page.h index 833d268c9de..273007c2778 100644 --- a/storage/innobase/include/page0page.h +++ b/storage/innobase/include/page0page.h @@ -531,6 +531,15 @@ page_get_free_space_of_empty( /* out: free space */ ulint comp) /* in: nonzero=compact page format */ __attribute__((const)); +/***************************************************************** +Calculates free space if a page is emptied. */ + +ulint +page_get_free_space_of_empty_noninline( +/*===================================*/ + /* out: free space */ + ulint comp) /* in: nonzero=compact page format */ + __attribute__((const)); /**************************************************************** Returns the sum of the sizes of the records in the record list excluding the infimum and supremum records. */ diff --git a/storage/innobase/include/row0mysql.h b/storage/innobase/include/row0mysql.h index bda3494073f..1448efe94fe 100644 --- a/storage/innobase/include/row0mysql.h +++ b/storage/innobase/include/row0mysql.h @@ -460,19 +460,6 @@ row_check_table_for_mysql( /* out: DB_ERROR or DB_SUCCESS */ row_prebuilt_t* prebuilt); /* in: prebuilt struct in MySQL handle */ -/************************************************************************* -Get the min of the maximum possible row sizes. */ - -ulint -page_get_free_space_of_empty_noninline( -/*===================================*/ - /* out: The (approx) maximum size - of a row, this is a conservative - estimate, since the size can be - slightly larger depending upon - the ROW_FORMAT setting.*/ - dict_table_t* table); /* in: table for which max record - size required.*/ /* A struct describing a place for an individual column in the MySQL row format which is presented to the table handler in ha_innobase. diff --git a/storage/innobase/include/univ.i b/storage/innobase/include/univ.i index 957baa0391f..ba8e6e56219 100644 --- a/storage/innobase/include/univ.i +++ b/storage/innobase/include/univ.i @@ -83,6 +83,8 @@ memory is read outside the allocated blocks. */ /* Make a non-inline debug version */ #if 0 +#define UNIV_DEBUG_VALGRIND /* Enable extra + Valgrind instrumentation */ #define UNIV_DEBUG /* Enable ut_ad() assertions */ #define UNIV_LIST_DEBUG /* debug UT_LIST_ macros */ #define UNIV_MEM_DEBUG /* detect memory leaks etc */ @@ -214,6 +216,8 @@ typedef __int64 ib_longlong; typedef longlong ib_longlong; #endif +typedef unsigned long long int ullint; + #ifndef __WIN__ #if SIZEOF_LONG != SIZEOF_VOIDP #error "Error: InnoDB's ulint must be of the same size as void*" @@ -298,5 +302,17 @@ typedef void* os_thread_ret_t; #include "ut0dbg.h" #include "ut0ut.h" #include "db0err.h" +#ifdef UNIV_DEBUG_VALGRIND +# include <valgrind/memcheck.h> +# define UNIV_MEM_VALID(addr, size) VALGRIND_MAKE_MEM_DEFINED(addr, size) +# define UNIV_MEM_INVALID(addr, size) VALGRIND_MAKE_MEM_UNDEFINED(addr, size) +# define UNIV_MEM_FREE(addr, size) VALGRIND_MAKE_MEM_NOACCESS(addr, size) +# define UNIV_MEM_ALLOC(addr, size) VALGRIND_MAKE_MEM_UNDEFINED(addr, size) +#else +# define UNIV_MEM_VALID(addr, size) do {} while(0) +# define UNIV_MEM_INVALID(addr, size) do {} while(0) +# define UNIV_MEM_FREE(addr, size) do {} while(0) +# define UNIV_MEM_ALLOC(addr, size) do {} while(0) +#endif #endif diff --git a/storage/innobase/lock/lock0lock.c b/storage/innobase/lock/lock0lock.c index 6f5390145b5..f43752fb5fc 100644 --- a/storage/innobase/lock/lock0lock.c +++ b/storage/innobase/lock/lock0lock.c @@ -6,10 +6,14 @@ The transaction lock system Created 5/7/1996 Heikki Tuuri *******************************************************/ +#define LOCK_MODULE_IMPLEMENTATION + #include "lock0lock.h" +#include "lock0priv.h" #ifdef UNIV_NONINL #include "lock0lock.ic" +#include "lock0priv.ic" #endif #include "usr0sess.h" @@ -319,42 +323,6 @@ ibool lock_print_waits = FALSE; /* The lock system */ lock_sys_t* lock_sys = NULL; -/* A table lock */ -typedef struct lock_table_struct lock_table_t; -struct lock_table_struct{ - dict_table_t* table; /* database table in dictionary cache */ - UT_LIST_NODE_T(lock_t) - locks; /* list of locks on the same table */ -}; - -/* Record lock for a page */ -typedef struct lock_rec_struct lock_rec_t; -struct lock_rec_struct{ - ulint space; /* space id */ - ulint page_no; /* page number */ - ulint n_bits; /* number of bits in the lock bitmap */ - /* NOTE: the lock bitmap is placed immediately - after the lock struct */ -}; - -/* Lock struct */ -struct lock_struct{ - trx_t* trx; /* transaction owning the lock */ - UT_LIST_NODE_T(lock_t) - trx_locks; /* list of the locks of the - transaction */ - ulint type_mode; /* lock type, mode, LOCK_GAP or - LOCK_REC_NOT_GAP, - LOCK_INSERT_INTENTION, - wait flag, ORed */ - hash_node_t hash; /* hash chain node for a record lock */ - dict_index_t* index; /* index for a record lock */ - union { - lock_table_t tab_lock;/* table lock */ - lock_rec_t rec_lock;/* record lock */ - } un_member; -}; - /* We store info on the latest deadlock error to this buffer. InnoDB Monitor will then fetch it and print */ ibool lock_deadlock_found = FALSE; @@ -401,20 +369,6 @@ lock_deadlock_recursive( return LOCK_VICTIM_IS_START */ /************************************************************************* -Gets the type of a lock. */ -UNIV_INLINE -ulint -lock_get_type( -/*==========*/ - /* out: LOCK_TABLE or LOCK_REC */ - lock_t* lock) /* in: lock */ -{ - ut_ad(lock); - - return(lock->type_mode & LOCK_TYPE_MASK); -} - -/************************************************************************* Gets the nth bit of a record lock. */ UNIV_INLINE ibool @@ -611,8 +565,8 @@ UNIV_INLINE ulint lock_get_mode( /*==========*/ - /* out: mode */ - lock_t* lock) /* in: lock */ + /* out: mode */ + const lock_t* lock) /* in: lock */ { ut_ad(lock); @@ -1017,7 +971,7 @@ lock_rec_has_to_wait( /************************************************************************* Checks if a lock request lock1 has to wait for request lock2. */ -static + ibool lock_has_to_wait( /*=============*/ @@ -1098,7 +1052,7 @@ lock_rec_set_nth_bit( /************************************************************************** Looks for a set bit in a record lock bitmap. Returns ULINT_UNDEFINED, if none found. */ -static + ulint lock_rec_find_set_bit( /*==================*/ @@ -1390,7 +1344,7 @@ lock_rec_copy( /************************************************************************* Gets the previous record lock set on a record. */ -static + lock_t* lock_rec_get_prev( /*==============*/ diff --git a/storage/innobase/log/log0recv.c b/storage/innobase/log/log0recv.c index ab5f42e3a13..ce2fc3ed535 100644 --- a/storage/innobase/log/log0recv.c +++ b/storage/innobase/log/log0recv.c @@ -57,6 +57,16 @@ ibool recv_needed_recovery = FALSE; ibool recv_lsn_checks_on = FALSE; +/* There are two conditions under which we scan the logs, the first +is normal startup and the second is when we do a recovery from an +archive. +This flag is set if we are doing a scan from the last checkpoint during +startup. If we find log entries that were written after the last checkpoint +we know that the server was not cleanly shutdown. We must then initialize +the crash recovery environment before attempting to store these entries in +the log hash table. */ +ibool recv_log_scan_is_startup_type = FALSE; + /* If the following is TRUE, the buffer pool file pages must be invalidated after recovery and no ibuf operations are allowed; this becomes TRUE if the log record hash table becomes too full, and log records must be merged @@ -99,6 +109,16 @@ the recovery failed and the database may be corrupt. */ dulint recv_max_page_lsn; +/* prototypes */ + +/*********************************************************** +Initialize crash recovery environment. Can be called iff +recv_needed_recovery == FALSE. */ +static +void +recv_init_crash_recovery(void); +/*===========================*/ + /************************************************************ Creates the recovery system. */ @@ -2284,6 +2304,23 @@ recv_scan_log_recs( if (ut_dulint_cmp(scanned_lsn, recv_sys->scanned_lsn) > 0) { + /* We have found more entries. If this scan is + of startup type, we must initiate crash recovery + environment before parsing these log records. */ + + if (recv_log_scan_is_startup_type + && !recv_needed_recovery) { + + fprintf(stderr, + "InnoDB: Log scan progressed" + " past the checkpoint lsn %lu %lu\n", + (ulong) ut_dulint_get_high( + recv_sys->scanned_lsn), + (ulong) ut_dulint_get_low( + recv_sys->scanned_lsn)); + recv_init_crash_recovery(); + } + /* We were able to find more log data: add it to the parsing buffer if parse_start_lsn is already non-zero */ @@ -2405,6 +2442,48 @@ recv_group_scan_log_recs( #endif /* UNIV_DEBUG */ } +/*********************************************************** +Initialize crash recovery environment. Can be called iff +recv_needed_recovery == FALSE. */ +static +void +recv_init_crash_recovery(void) +/*==========================*/ +{ + ut_a(!recv_needed_recovery); + + recv_needed_recovery = TRUE; + + ut_print_timestamp(stderr); + + fprintf(stderr, + " InnoDB: Database was not" + " shut down normally!\n" + "InnoDB: Starting crash recovery.\n"); + + fprintf(stderr, + "InnoDB: Reading tablespace information" + " from the .ibd files...\n"); + + fil_load_single_table_tablespaces(); + + /* If we are using the doublewrite method, we will + check if there are half-written pages in data files, + and restore them from the doublewrite buffer if + possible */ + + if (srv_force_recovery < SRV_FORCE_NO_LOG_REDO) { + + fprintf(stderr, + "InnoDB: Restoring possible" + " half-written data pages from" + " the doublewrite\n" + "InnoDB: buffer...\n"); + trx_sys_doublewrite_init_or_restore_pages(TRUE); + } + +} + /************************************************************ Recovers from a checkpoint. When this function returns, the database is able to start processing of new user transactions, but the function @@ -2532,92 +2611,6 @@ recv_recovery_from_checkpoint_start( recv_sys->recovered_lsn = checkpoint_lsn; srv_start_lsn = checkpoint_lsn; - - /* NOTE: we always do a 'recovery' at startup, but only if - there is something wrong we will print a message to the - user about recovery: */ - - if (ut_dulint_cmp(checkpoint_lsn, max_flushed_lsn) != 0 - || ut_dulint_cmp(checkpoint_lsn, min_flushed_lsn) != 0) { - - if (ut_dulint_cmp(checkpoint_lsn, max_flushed_lsn) - < 0) { - fprintf(stderr, - "InnoDB: #########################" - "#################################\n" - "InnoDB: " - "WARNING!\n" - "InnoDB: The log sequence number" - " in ibdata files is higher\n" - "InnoDB: than the log sequence number" - " in the ib_logfiles! Are you sure\n" - "InnoDB: you are using the right" - " ib_logfiles to start up" - " the database?\n" - "InnoDB: Log sequence number in" - " ib_logfiles is %lu %lu, log\n" - "InnoDB: sequence numbers stamped" - " to ibdata file headers are between\n" - "InnoDB: %lu %lu and %lu %lu.\n" - "InnoDB: #########################" - "#################################\n", - (ulong) ut_dulint_get_high( - checkpoint_lsn), - (ulong) ut_dulint_get_low( - checkpoint_lsn), - (ulong) ut_dulint_get_high( - min_flushed_lsn), - (ulong) ut_dulint_get_low( - min_flushed_lsn), - (ulong) ut_dulint_get_high( - max_flushed_lsn), - (ulong) ut_dulint_get_low( - max_flushed_lsn)); - } - - recv_needed_recovery = TRUE; - - ut_print_timestamp(stderr); - - fprintf(stderr, - " InnoDB: Database was not" - " shut down normally!\n" - "InnoDB: Starting crash recovery.\n"); - - fprintf(stderr, - "InnoDB: Reading tablespace information" - " from the .ibd files...\n"); - - fil_load_single_table_tablespaces(); - - /* If we are using the doublewrite method, we will - check if there are half-written pages in data files, - and restore them from the doublewrite buffer if - possible */ - - if (srv_force_recovery < SRV_FORCE_NO_LOG_REDO) { - - fprintf(stderr, - "InnoDB: Restoring possible" - " half-written data pages from" - " the doublewrite\n" - "InnoDB: buffer...\n"); - trx_sys_doublewrite_init_or_restore_pages( - TRUE); - } - - ut_print_timestamp(stderr); - - fprintf(stderr, - " InnoDB: Starting log scan" - " based on checkpoint at\n" - "InnoDB: log sequence number %lu %lu.\n", - (ulong) ut_dulint_get_high(checkpoint_lsn), - (ulong) ut_dulint_get_low(checkpoint_lsn)); - } else { - /* Init the doublewrite buffer memory structure */ - trx_sys_doublewrite_init_or_restore_pages(FALSE); - } } contiguous_lsn = ut_dulint_align_down(recv_sys->scanned_lsn, @@ -2670,6 +2663,8 @@ recv_recovery_from_checkpoint_start( group = UT_LIST_GET_NEXT(log_groups, group); } + /* Set the flag to publish that we are doing startup scan. */ + recv_log_scan_is_startup_type = (type == LOG_CHECKPOINT); while (group) { old_scanned_lsn = recv_sys->scanned_lsn; @@ -2691,6 +2686,69 @@ recv_recovery_from_checkpoint_start( group = UT_LIST_GET_NEXT(log_groups, group); } + /* Done with startup scan. Clear the flag. */ + recv_log_scan_is_startup_type = FALSE; + if (type == LOG_CHECKPOINT) { + /* NOTE: we always do a 'recovery' at startup, but only if + there is something wrong we will print a message to the + user about recovery: */ + + if (ut_dulint_cmp(checkpoint_lsn, max_flushed_lsn) != 0 + || ut_dulint_cmp(checkpoint_lsn, min_flushed_lsn) != 0) { + + if (ut_dulint_cmp(checkpoint_lsn, max_flushed_lsn) + < 0) { + fprintf(stderr, + "InnoDB: #########################" + "#################################\n" + "InnoDB: " + "WARNING!\n" + "InnoDB: The log sequence number" + " in ibdata files is higher\n" + "InnoDB: than the log sequence number" + " in the ib_logfiles! Are you sure\n" + "InnoDB: you are using the right" + " ib_logfiles to start up" + " the database?\n" + "InnoDB: Log sequence number in" + " ib_logfiles is %lu %lu, log\n" + "InnoDB: sequence numbers stamped" + " to ibdata file headers are between\n" + "InnoDB: %lu %lu and %lu %lu.\n" + "InnoDB: #########################" + "#################################\n", + (ulong) ut_dulint_get_high( + checkpoint_lsn), + (ulong) ut_dulint_get_low( + checkpoint_lsn), + (ulong) ut_dulint_get_high( + min_flushed_lsn), + (ulong) ut_dulint_get_low( + min_flushed_lsn), + (ulong) ut_dulint_get_high( + max_flushed_lsn), + (ulong) ut_dulint_get_low( + max_flushed_lsn)); + + + } + + if (!recv_needed_recovery) { + fprintf(stderr, + "InnoDB: The log sequence number" + " in ibdata files does not match\n" + "InnoDB: the log sequence number" + " in the ib_logfiles!\n"); + recv_init_crash_recovery(); + } + + } + if (!recv_needed_recovery) { + /* Init the doublewrite buffer memory structure */ + trx_sys_doublewrite_init_or_restore_pages(FALSE); + } + } + /* We currently have only one log group */ if (ut_dulint_cmp(group_scanned_lsn, checkpoint_lsn) < 0) { ut_print_timestamp(stderr); @@ -2747,20 +2805,9 @@ recv_recovery_from_checkpoint_start( recv_synchronize_groups(up_to_date_group); if (!recv_needed_recovery) { - if (ut_dulint_cmp(checkpoint_lsn, recv_sys->recovered_lsn) - != 0) { - fprintf(stderr, - "InnoDB: Warning: we did not need to do" - " crash recovery, but log scan\n" - "InnoDB: progressed past the checkpoint" - " lsn %lu %lu up to lsn %lu %lu\n", - (ulong) ut_dulint_get_high(checkpoint_lsn), - (ulong) ut_dulint_get_low(checkpoint_lsn), - (ulong) ut_dulint_get_high( - recv_sys->recovered_lsn), - (ulong) ut_dulint_get_low( - recv_sys->recovered_lsn)); - } + ut_a(ut_dulint_cmp(checkpoint_lsn, + recv_sys->recovered_lsn) == 0); + } else { srv_start_lsn = recv_sys->recovered_lsn; } diff --git a/storage/innobase/mem/mem0mem.c b/storage/innobase/mem/mem0mem.c index 10b359e8e67..d89a3a55d88 100644 --- a/storage/innobase/mem/mem0mem.c +++ b/storage/innobase/mem/mem0mem.c @@ -514,6 +514,7 @@ mem_heap_block_free( mem_erase_buf((byte*)block, len); #endif + UNIV_MEM_FREE(block, len); if (init_block) { /* Do not have to free: do nothing */ diff --git a/storage/innobase/mem/mem0pool.c b/storage/innobase/mem/mem0pool.c index c010ae61160..27da86a0309 100644 --- a/storage/innobase/mem/mem0pool.c +++ b/storage/innobase/mem/mem0pool.c @@ -229,6 +229,8 @@ mem_pool_create( mem_area_set_size(area, ut_2_exp(i)); mem_area_set_free(area, TRUE); + UNIV_MEM_FREE(MEM_AREA_EXTRA_SIZE + (byte*) area, + ut_2_exp(i) - MEM_AREA_EXTRA_SIZE); UT_LIST_ADD_FIRST(free_list, pool->free_list[i], area); @@ -300,6 +302,7 @@ mem_pool_fill_free_list( UT_LIST_REMOVE(free_list, pool->free_list[i + 1], area); area2 = (mem_area_t*)(((byte*)area) + ut_2_exp(i)); + UNIV_MEM_ALLOC(area2, MEM_AREA_EXTRA_SIZE); mem_area_set_size(area2, ut_2_exp(i)); mem_area_set_free(area2, TRUE); @@ -400,6 +403,8 @@ mem_area_alloc( mutex_exit(&(pool->mutex)); ut_ad(mem_pool_validate(pool)); + UNIV_MEM_ALLOC(MEM_AREA_EXTRA_SIZE + (byte*)area, + ut_2_exp(n) - MEM_AREA_EXTRA_SIZE); return((void*)(MEM_AREA_EXTRA_SIZE + ((byte*)area))); } @@ -482,6 +487,7 @@ mem_area_free( } size = mem_area_get_size(area); + UNIV_MEM_FREE(ptr, size - MEM_AREA_EXTRA_SIZE); if (size == 0) { fprintf(stderr, diff --git a/storage/innobase/os/os0file.c b/storage/innobase/os/os0file.c index f496e1127ce..78140cc5ecf 100644 --- a/storage/innobase/os/os0file.c +++ b/storage/innobase/os/os0file.c @@ -456,10 +456,9 @@ os_file_handle_error_no_exit( #undef USE_FILE_LOCK #define USE_FILE_LOCK -#if defined(UNIV_HOTBACKUP) || defined(__WIN__) || defined(__FreeBSD__) || defined(__NETWARE__) +#if defined(UNIV_HOTBACKUP) || defined(__WIN__) || defined(__NETWARE__) /* InnoDB Hot Backup does not lock the data files. * On Windows, mandatory locking is used. - * On FreeBSD with LinuxThreads, advisory locking does not work properly. */ # undef USE_FILE_LOCK #endif diff --git a/storage/innobase/page/page0page.c b/storage/innobase/page/page0page.c index 4212df7a631..543cf9e34eb 100644 --- a/storage/innobase/page/page0page.c +++ b/storage/innobase/page/page0page.c @@ -209,6 +209,18 @@ page_set_max_trx_id( } } +/***************************************************************** +Calculates free space if a page is emptied. */ + +ulint +page_get_free_space_of_empty_noninline( +/*===================================*/ + /* out: free space */ + ulint comp) /* in: nonzero=compact page format */ +{ + return(page_get_free_space_of_empty(comp)); +} + /**************************************************************** Allocates a block of memory from an index page. */ diff --git a/storage/innobase/rem/rem0rec.c b/storage/innobase/rem/rem0rec.c index 3bc73eca9ea..64f8e2d319c 100644 --- a/storage/innobase/rem/rem0rec.c +++ b/storage/innobase/rem/rem0rec.c @@ -753,7 +753,11 @@ rec_convert_dtuple_to_rec_old( /* Calculate the offset of the origin in the physical record */ rec = buf + rec_get_converted_extra_size(data_size, n_fields); - +#ifdef UNIV_DEBUG + /* Suppress Valgrind warnings of ut_ad() + in mach_write_to_1(), mach_write_to_2() et al. */ + memset(buf, 0xff, rec - buf + data_size); +#endif /* UNIV_DEBUG */ /* Store the number of fields */ rec_set_n_fields_old(rec, n_fields); diff --git a/storage/innobase/row/row0mysql.c b/storage/innobase/row/row0mysql.c index 9f88fd8040b..d51b7e1e0b5 100644 --- a/storage/innobase/row/row0mysql.c +++ b/storage/innobase/row/row0mysql.c @@ -4059,25 +4059,3 @@ row_check_table_for_mysql( return(ret); } - -/************************************************************************* -Get the maximum row size. */ - -ulint -page_get_free_space_of_empty_noninline( -/*===================================*/ - /* out: The (approx) maximum size - of a row, this is a conservative - estimate, since the size can be - slightly larger depending upon - the ROW_FORMAT setting.*/ - dict_table_t* table) /* in: table for which max record - size is required.*/ -{ - ibool compact; - - compact = dict_table_is_comp(table); - - return(page_get_free_space_of_empty(compact) / 2); -} - diff --git a/storage/innobase/sync/sync0rw.c b/storage/innobase/sync/sync0rw.c index 34b45e2c1c3..4db780c8b3f 100644 --- a/storage/innobase/sync/sync0rw.c +++ b/storage/innobase/sync/sync0rw.c @@ -15,16 +15,34 @@ Created 9/11/1995 Heikki Tuuri #include "mem0mem.h" #include "srv0srv.h" +/* number of system calls made during shared latching */ ulint rw_s_system_call_count = 0; + +/* number of spin waits on rw-latches, +resulted during shared (read) locks */ ulint rw_s_spin_wait_count = 0; + +/* number of OS waits on rw-latches, +resulted during shared (read) locks */ ulint rw_s_os_wait_count = 0; +/* number of unlocks (that unlock shared locks), +set only when UNIV_SYNC_PERF_STAT is defined */ ulint rw_s_exit_count = 0; +/* number of system calls made during exclusive latching */ ulint rw_x_system_call_count = 0; + +/* number of spin waits on rw-latches, +resulted during exclusive (write) locks */ ulint rw_x_spin_wait_count = 0; + +/* number of OS waits on rw-latches, +resulted during exclusive (write) locks */ ulint rw_x_os_wait_count = 0; +/* number of unlocks (that unlock exclusive locks), +set only when UNIV_SYNC_PERF_STAT is defined */ ulint rw_x_exit_count = 0; /* The global list of rw-locks */ diff --git a/storage/innobase/sync/sync0sync.c b/storage/innobase/sync/sync0sync.c index 672e1f93aad..bf3f4d1ff20 100644 --- a/storage/innobase/sync/sync0sync.c +++ b/storage/innobase/sync/sync0sync.c @@ -115,6 +115,7 @@ ulint mutex_system_call_count = 0; /* Number of spin waits on mutexes: for performance monitoring */ +/* round=one iteration of a spin loop */ ulint mutex_spin_round_count = 0; ulint mutex_spin_wait_count = 0; ulint mutex_os_wait_count = 0; diff --git a/storage/innobase/trx/trx0sys.c b/storage/innobase/trx/trx0sys.c index 307a03bfbc3..144721150b6 100644 --- a/storage/innobase/trx/trx0sys.c +++ b/storage/innobase/trx/trx0sys.c @@ -868,7 +868,16 @@ trx_sysf_create( trx_sysf_rseg_set_page_no(sys_header, i, FIL_NULL, mtr); } - /* The remaining area (up to the page trailer) is uninitialized. */ + /* The remaining area (up to the page trailer) is uninitialized. + Silence Valgrind warnings about it. */ + UNIV_MEM_VALID(sys_header + (TRX_SYS_RSEGS + + TRX_SYS_N_RSEGS * TRX_SYS_RSEG_SLOT_SIZE + + TRX_SYS_RSEG_SPACE), + (UNIV_PAGE_SIZE - FIL_PAGE_DATA_END + - (TRX_SYS_RSEGS + + TRX_SYS_N_RSEGS * TRX_SYS_RSEG_SLOT_SIZE + + TRX_SYS_RSEG_SPACE)) + + page - sys_header); /* Create the first rollback segment in the SYSTEM tablespace */ page_no = trx_rseg_header_create(TRX_SYS_SPACE, ULINT_MAX, &slot_no, diff --git a/storage/innobase/trx/trx0trx.c b/storage/innobase/trx/trx0trx.c index 2d5ce0e1c61..b312e008cd2 100644 --- a/storage/innobase/trx/trx0trx.c +++ b/storage/innobase/trx/trx0trx.c @@ -1570,19 +1570,21 @@ trx_commit_for_mysql( the transaction object does not have an InnoDB session object, and we set the dummy session that we use for all MySQL transactions. */ - mutex_enter(&kernel_mutex); - if (trx->sess == NULL) { /* Open a dummy session */ if (!trx_dummy_sess) { - trx_dummy_sess = sess_open(); + mutex_enter(&kernel_mutex); + + if (!trx_dummy_sess) { + trx_dummy_sess = sess_open(); + } + + mutex_exit(&kernel_mutex); } trx->sess = trx_dummy_sess; } - - mutex_exit(&kernel_mutex); trx_start_if_not_started(trx); diff --git a/storage/innobase/ut/ut0mem.c b/storage/innobase/ut/ut0mem.c index 4fd515c35e6..b466a5f6872 100644 --- a/storage/innobase/ut/ut0mem.c +++ b/storage/innobase/ut/ut0mem.c @@ -162,6 +162,8 @@ retry: #endif } + UNIV_MEM_ALLOC(ret, n + sizeof(ut_mem_block_t)); + ((ut_mem_block_t*)ret)->size = n + sizeof(ut_mem_block_t); ((ut_mem_block_t*)ret)->magic_n = UT_MEM_MAGIC_N; |