diff options
author | Michael Widenius <monty@askmonty.org> | 2011-02-22 15:15:17 +0200 |
---|---|---|
committer | Michael Widenius <monty@askmonty.org> | 2011-02-22 15:15:17 +0200 |
commit | b35743f9877fc826372eeb6dec21665aa3ff32f4 (patch) | |
tree | 4361a08c36980010723e75e1bc79c974139e0d6c /storage | |
parent | 58bb0769bdf13a9747e900aa2f0955137738ce9d (diff) | |
parent | e69b70140b936f59826de9dc79e4bf7f4821d52e (diff) | |
download | mariadb-git-b35743f9877fc826372eeb6dec21665aa3ff32f4.tar.gz |
Merge with xtradb code changes
(4 tests are still failing, so this push is not yet stable)
Diffstat (limited to 'storage')
48 files changed, 1762 insertions, 427 deletions
diff --git a/storage/xtradb/ChangeLog b/storage/xtradb/ChangeLog index 0cd8ac8a7e6..5ca60eb73d5 100644 --- a/storage/xtradb/ChangeLog +++ b/storage/xtradb/ChangeLog @@ -1,13 +1,56 @@ +2010-11-11 The InnoDB Team + * thr/thr0loc.c, trx/trx0i_s.c: + Fix Bug#57802 Empty ASSERTION parameter passed to the HASH_SEARCH macro + +2010-11-10 The InnoDB Team + + * dict/dict0dict.c, handler/handler0alter.cc, include/dict0dict.h + row/row0merge.c: + Fix Bug#55084 InnoDB crash and corruption after ALTER TABLE + +2010-11-10 The InnoDB Team + + * srv/srv0start.c: + Fix Bug#48026 Log start and end of InnoDB buffer pool + initialization to the error log + +2010-11-03 The InnoDB Team + + * include/btr0btr.h, include/btr0btr.ic, dict/dict0crea.c: + Fix Bug#57947 InnoDB diagnostics shows btr_block_get calls + instead of real callers + +2010-11-03 The InnoDB Team + + * fil/fil0fil.c, fsp/fsp0fsp.c, handler/ha_innodb.cc, + include/fil0fil.h, include/univ.i: + Fix Bug #54538 - use of exclusive innodb dictionary lock limits + performance. + +2010-11-02 The InnoDB Team + + * btr/btr0cur.c, dict/dict0dict.c, dict/dict0load.c, + handler/ha_innodb.cc, include/dict0dict.h, row/row0mysql.c, + innodb_bug53046.result, innodb_bug53046.test: + Fix Bug#53046 dict_update_statistics_low can still be run + concurrently on same table + +2010-11-02 The InnoDB Team + + * row/row0sel.c: + Fix Bug#57799 READ UNCOMMITTED access failure of off-page + DYNAMIC or COMPRESSED columns again + 2010-10-24 The InnoDB Team * row/row0mysql.c - Fix Bug #57700 Latching order violation in + Fix Bug#57700 Latching order violation in row_truncate_table_for_mysql() 2010-10-20 The InnoDB Team * dict/dict0load.c - Fix Bug #57616 Sig 11 in dict_load_table() when failed to load + Fix Bug#57616 Sig 11 in dict_load_table() when failed to load index or foreign key 2010-10-19 The InnoDB Team @@ -17,7 +60,7 @@ include/ibuf0ibuf.h, include/row0mysql.h, row/row0mysql.c, row/row0sel.c, innodb_bug56680.test, innodb_bug56680.result: - Fix Bug #56680 InnoDB may return wrong results from a + Fix Bug#56680 InnoDB may return wrong results from a case-insensitive covering index 2010-10-18 The InnoDB Team @@ -72,7 +115,7 @@ 2010-08-03 The InnoDB Team * include/dict0dict.h, include/dict0dict.ic, row/row0mysql.c: - Fix bug #54678, InnoDB, TRUNCATE, ALTER, I_S SELECT, crash or deadlock + Fix Bug#54678 InnoDB, TRUNCATE, ALTER, I_S SELECT, crash or deadlock 2010-08-03 The InnoDB Team @@ -146,7 +189,7 @@ * dict/dict0load.c, fil/fil0fil.c: Fix Bug#54658: InnoDB: Warning: allocated tablespace %lu, - old maximum was 0 (introduced in Bug #53578 fix) + old maximum was 0 (introduced in Bug#53578 fix) 2010-06-16 The InnoDB Team diff --git a/storage/xtradb/btr/btr0cur.c b/storage/xtradb/btr/btr0cur.c index daa94a549a0..25bdd176880 100644 --- a/storage/xtradb/btr/btr0cur.c +++ b/storage/xtradb/btr/btr0cur.c @@ -1124,7 +1124,7 @@ btr_cur_ins_lock_and_undo( not zero, the parameters index and thr should be specified */ btr_cur_t* cursor, /*!< in: cursor on page after which to insert */ - const dtuple_t* entry, /*!< in: entry to insert */ + dtuple_t* entry, /*!< in/out: entry to insert */ que_thr_t* thr, /*!< in: query thread or NULL */ mtr_t* mtr, /*!< in/out: mini-transaction */ ibool* inherit)/*!< out: TRUE if the inserted new record maybe @@ -3582,11 +3582,9 @@ btr_estimate_number_of_different_key_vals( effective_pages = btr_estimate_n_pages_not_null(index, 1 /*k*/, first_rec_path); if (!effective_pages) { - dict_index_stat_mutex_enter(index); for (j = 0; j <= n_cols; j++) { index->stat_n_diff_key_vals[j] = (ib_int64_t)index->stat_n_leaf_pages; } - dict_index_stat_mutex_exit(index); return; } else if (effective_pages > index->stat_n_leaf_pages) { effective_pages = index->stat_n_leaf_pages; @@ -3728,8 +3726,6 @@ btr_estimate_number_of_different_key_vals( also the pages used for external storage of fields (those pages are included in index->stat_n_leaf_pages) */ - dict_index_stat_mutex_enter(index); - for (j = 0; j <= n_cols; j++) { index->stat_n_diff_key_vals[j] = ((n_diff[j] @@ -3768,8 +3764,6 @@ btr_estimate_number_of_different_key_vals( } } - dict_index_stat_mutex_exit(index); - mem_free(n_diff); if (UNIV_LIKELY_NULL(heap)) { mem_heap_free(heap); @@ -4182,7 +4176,7 @@ Stores the fields in big_rec_vec to the tablespace and puts pointers to them in rec. The extern flags in rec will have to be set beforehand. The fields are stored on pages allocated from leaf node file segment of the index tree. -@return DB_SUCCESS or error */ +@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */ UNIV_INTERN ulint btr_store_big_rec_extern_fields( diff --git a/storage/xtradb/buf/buf0buf.c b/storage/xtradb/buf/buf0buf.c index f09354e370f..cf48d95d505 100644 --- a/storage/xtradb/buf/buf0buf.c +++ b/storage/xtradb/buf/buf0buf.c @@ -53,6 +53,10 @@ Created 11/5/1995 Heikki Tuuri #include "page0zip.h" #include "trx0trx.h" #include "srv0start.h" +#include "que0que.h" +#include "read0read.h" +#include "row0row.h" +#include "ha_prototypes.h" /* prototypes for new functions added to ha_innodb.cc */ trx_t* innobase_get_trx(); @@ -310,6 +314,30 @@ read-ahead or flush occurs */ UNIV_INTERN ibool buf_debug_prints = FALSE; #endif /* UNIV_DEBUG */ +/* Buffer pool shared memory segment information */ +typedef struct buf_shm_info_struct buf_shm_info_t; + +struct buf_shm_info_struct { + char head_str[8]; + ulint binary_id; + ibool is_new; /* during initializing */ + ibool clean; /* clean shutdowned and free */ + ibool reusable; /* reusable */ + ulint buf_pool_size; /* backup value */ + ulint page_size; /* backup value */ + ulint frame_offset; /* offset of the first frame based on chunk->mem */ + ulint zip_hash_offset; + ulint zip_hash_n; + + ulint checksum; + + buf_pool_t buf_pool_backup; + buf_chunk_t chunk_backup; + + ib_uint64_t dummy; +}; + +#define BUF_SHM_INFO_HEAD "XTRA_SHM" #endif /* !UNIV_HOTBACKUP */ /********************************************************************//** @@ -756,6 +784,45 @@ buf_block_init( #endif /* UNIV_SYNC_DEBUG */ } +static +void +buf_block_reuse( +/*============*/ + buf_block_t* block, + ptrdiff_t frame_offset) +{ + /* block_init */ + block->frame += frame_offset; + + UNIV_MEM_DESC(block->frame, UNIV_PAGE_SIZE, block); + + block->index = NULL; + +#ifdef UNIV_DEBUG + /* recreate later */ + block->page.in_page_hash = FALSE; + block->page.in_zip_hash = FALSE; +#endif /* UNIV_DEBUG */ + +#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG + block->n_pointers = 0; +#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */ + + if (block->page.zip.data) + block->page.zip.data += frame_offset; + + block->is_hashed = FALSE; + + mutex_create(&block->mutex, SYNC_BUF_BLOCK); + + rw_lock_create(&block->lock, SYNC_LEVEL_VARYING); + ut_ad(rw_lock_validate(&(block->lock))); + +#ifdef UNIV_SYNC_DEBUG + rw_lock_create(&block->debug_latch, SYNC_NO_ORDER_CHECK); +#endif /* UNIV_SYNC_DEBUG */ +} + /********************************************************************//** Allocates a chunk of buffer frames. @return chunk, or NULL on failure */ @@ -768,26 +835,188 @@ buf_chunk_init( { buf_block_t* block; byte* frame; + ulint zip_hash_n = 0; + ulint zip_hash_mem_size = 0; + hash_table_t* zip_hash_tmp = NULL; ulint i; + buf_shm_info_t* shm_info = NULL; /* Round down to a multiple of page size, although it already should be. */ mem_size = ut_2pow_round(mem_size, UNIV_PAGE_SIZE); + + srv_buffer_pool_shm_is_reused = FALSE; + + if (srv_buffer_pool_shm_key) { + /* zip_hash size */ + zip_hash_n = (mem_size / UNIV_PAGE_SIZE) * 2; + zip_hash_mem_size = ut_2pow_round(hash_create_needed(zip_hash_n) + + (UNIV_PAGE_SIZE - 1), UNIV_PAGE_SIZE); + } + /* Reserve space for the block descriptors. */ mem_size += ut_2pow_round((mem_size / UNIV_PAGE_SIZE) * (sizeof *block) + (UNIV_PAGE_SIZE - 1), UNIV_PAGE_SIZE); + if (srv_buffer_pool_shm_key) { + mem_size += ut_2pow_round(sizeof(buf_shm_info_t) + + (UNIV_PAGE_SIZE - 1), UNIV_PAGE_SIZE); + mem_size += zip_hash_mem_size; + } chunk->mem_size = mem_size; + + if (srv_buffer_pool_shm_key) { + ulint binary_id; + ibool is_new; + + ut_a(buf_pool->n_chunks == 1); + + fprintf(stderr, + "InnoDB: Warning: The innodb_buffer_pool_shm_key option has been specified.\n" + "InnoDB: Do not change the following between restarts of the server while this option is being used:\n" + "InnoDB: * the mysqld executable between restarts of the server.\n" + "InnoDB: * the value of innodb_buffer_pool_size.\n" + "InnoDB: * the value of innodb_page_size.\n" + "InnoDB: * datafiles created by InnoDB during this session.\n" + "InnoDB: Otherwise, data corruption in datafiles may result.\n"); + + /* FIXME: This is vague id still */ + binary_id = (ulint) ((byte*)mtr_commit - (byte*)btr_root_get) + + (ulint) ((byte*)os_get_os_version - (byte*)buf_calc_page_new_checksum) + + (ulint) ((byte*)page_dir_find_owner_slot - (byte*)dfield_data_is_binary_equal) + + (ulint) ((byte*)que_graph_publish - (byte*)dict_casedn_str) + + (ulint) ((byte*)read_view_oldest_copy_or_open_new - (byte*)fil_space_get_version) + + (ulint) ((byte*)rec_get_n_extern_new - (byte*)fsp_get_size_low) + + (ulint) ((byte*)row_get_trx_id_offset - (byte*)ha_create_func) + + (ulint) ((byte*)srv_set_io_thread_op_info - (byte*)thd_is_replication_slave_thread) + + (ulint) ((byte*)mutex_create_func - (byte*)ibuf_inside) + + (ulint) ((byte*)trx_set_detailed_error - (byte*)lock_check_trx_id_sanity) + + (ulint) ((byte*)ut_time - (byte*)mem_heap_strdup); + + chunk->mem = os_shm_alloc(&chunk->mem_size, srv_buffer_pool_shm_key, &is_new); + + if (UNIV_UNLIKELY(chunk->mem == NULL)) { + return(NULL); + } +init_again: +#ifdef UNIV_SET_MEM_TO_ZERO + if (is_new) { + memset(chunk->mem, '\0', chunk->mem_size); + } +#endif + /* for ut_fold_binary_32(), these values should be 32-bit aligned */ + ut_a(sizeof(buf_shm_info_t) % 4 == 0); + ut_a((ulint)chunk->mem % 4 == 0); + ut_a(chunk->mem_size % 4 == 0); + + shm_info = chunk->mem; + + zip_hash_tmp = (hash_table_t*)((byte*)chunk->mem + chunk->mem_size - zip_hash_mem_size); + + if (is_new) { + strncpy(shm_info->head_str, BUF_SHM_INFO_HEAD, 8); + shm_info->binary_id = binary_id; + shm_info->is_new = TRUE; /* changed to FALSE when the initialization is finished */ + shm_info->clean = FALSE; /* changed to TRUE when free the segment. */ + shm_info->reusable = FALSE; /* changed to TRUE when validation is finished. */ + shm_info->buf_pool_size = srv_buf_pool_size; + shm_info->page_size = srv_page_size; + shm_info->zip_hash_offset = chunk->mem_size - zip_hash_mem_size; + shm_info->zip_hash_n = zip_hash_n; + } else { + ulint checksum; + + if (strncmp(shm_info->head_str, BUF_SHM_INFO_HEAD, 8)) { + fprintf(stderr, + "InnoDB: Error: The shared memory segment seems not to be for buffer pool.\n"); + return(NULL); + } + if (shm_info->binary_id != binary_id) { + fprintf(stderr, + "InnoDB: Error: The shared memory segment seems not to be for this binary.\n"); + return(NULL); + } + if (shm_info->is_new) { + fprintf(stderr, + "InnoDB: Error: The shared memory was not initialized yet.\n"); + return(NULL); + } + if (shm_info->buf_pool_size != srv_buf_pool_size) { + fprintf(stderr, + "InnoDB: Error: srv_buf_pool_size is different (shm=%lu current=%lu).\n", + shm_info->buf_pool_size, srv_buf_pool_size); + return(NULL); + } + if (shm_info->page_size != srv_page_size) { + fprintf(stderr, + "InnoDB: Error: srv_page_size is different (shm=%lu current=%lu).\n", + shm_info->page_size, srv_page_size); + return(NULL); + } + if (!shm_info->reusable) { + fprintf(stderr, + "InnoDB: Warning: The shared memory has unrecoverable contents.\n" + "InnoDB: The shared memory segment is initialized.\n"); + is_new = TRUE; + goto init_again; + } + if (!shm_info->clean) { + fprintf(stderr, + "InnoDB: Warning: The shared memory was not shut down cleanly.\n" + "InnoDB: The shared memory segment is initialized.\n"); + is_new = TRUE; + goto init_again; + } + + ut_a(shm_info->zip_hash_offset == chunk->mem_size - zip_hash_mem_size); + ut_a(shm_info->zip_hash_n == zip_hash_n); + + /* check checksum */ + if (srv_buffer_pool_shm_checksum) { + checksum = ut_fold_binary_32((byte*)chunk->mem + sizeof(buf_shm_info_t), + chunk->mem_size - sizeof(buf_shm_info_t)); + } else { + checksum = BUF_NO_CHECKSUM_MAGIC; + } + + if (shm_info->checksum != BUF_NO_CHECKSUM_MAGIC + && shm_info->checksum != checksum) { + fprintf(stderr, + "InnoDB: Error: checksum of the shared memory is not match. " + "(stored=%lu calculated=%lu)\n", + shm_info->checksum, checksum); + return(NULL); + } + + /* flag to use the segment. */ + shm_info->clean = FALSE; /* changed to TRUE when free the segment. */ + } + + /* init zip_hash contents */ + if (is_new) { + hash_create_init(zip_hash_tmp, zip_hash_n); + } else { + /* adjust offset is done later */ + hash_create_reuse(zip_hash_tmp); + + srv_buffer_pool_shm_is_reused = TRUE; + } + } else { chunk->mem = os_mem_alloc_large(&chunk->mem_size); if (UNIV_UNLIKELY(chunk->mem == NULL)) { return(NULL); } + } /* Allocate the block descriptors from the start of the memory block. */ + if (srv_buffer_pool_shm_key) { + chunk->blocks = (buf_block_t*)((byte*)chunk->mem + sizeof(buf_shm_info_t)); + } else { chunk->blocks = chunk->mem; + } /* Align a pointer to the first frame. Note that when os_large_page_size is smaller than UNIV_PAGE_SIZE, @@ -795,8 +1024,13 @@ buf_chunk_init( it is bigger, we may allocate more blocks than requested. */ frame = ut_align(chunk->mem, UNIV_PAGE_SIZE); + if (srv_buffer_pool_shm_key) { + /* reserve zip_hash space and always -1 for reproductibity */ + chunk->size = (chunk->mem_size - zip_hash_mem_size) / UNIV_PAGE_SIZE - 1; + } else { chunk->size = chunk->mem_size / UNIV_PAGE_SIZE - (frame != chunk->mem); + } /* Subtract the space needed for block descriptors. */ { @@ -810,6 +1044,98 @@ buf_chunk_init( chunk->size = size; } + if (shm_info && !(shm_info->is_new)) { + /* convert the shared memory segment for reuse */ + ptrdiff_t phys_offset; + ptrdiff_t logi_offset; + ptrdiff_t blocks_offset; + void* previous_frame_address; + + if (chunk->size < shm_info->chunk_backup.size) { + fprintf(stderr, + "InnoDB: Error: The buffer pool became smaller because of allocated address.\n" + "InnoDB: Retrying may avoid this situation.\n"); + shm_info->clean = TRUE; /* release the flag for retrying */ + return(NULL); + } + + chunk->size = shm_info->chunk_backup.size; + phys_offset = frame - ((byte*)chunk->mem + shm_info->frame_offset); + logi_offset = frame - chunk->blocks[0].frame; + previous_frame_address = chunk->blocks[0].frame; + blocks_offset = (byte*)chunk->blocks - (byte*)shm_info->chunk_backup.blocks; + + if (phys_offset || logi_offset || blocks_offset) { + fprintf(stderr, + "InnoDB: Buffer pool in the shared memory segment should be converted.\n" + "InnoDB: Previous frames in address : %p\n" + "InnoDB: Previous frames were located : %p\n" + "InnoDB: Current frames should be located: %p\n" + "InnoDB: Pysical offset : %ld (%#lx)\n" + "InnoDB: Logical offset (frames) : %ld (%#lx)\n" + "InnoDB: Logical offset (blocks) : %ld (%#lx)\n", + (byte*)chunk->mem + shm_info->frame_offset, + chunk->blocks[0].frame, frame, + phys_offset, phys_offset, logi_offset, logi_offset, + blocks_offset, blocks_offset); + } else { + fprintf(stderr, + "InnoDB: Buffer pool in the shared memory segment can be used as it is.\n"); + } + + if (phys_offset) { + fprintf(stderr, + "InnoDB: Aligning physical offset..."); + + memmove(frame, (byte*)chunk->mem + shm_info->frame_offset, + chunk->size * UNIV_PAGE_SIZE); + + fprintf(stderr, + " Done.\n"); + } + + /* buf_block_t */ + block = chunk->blocks; + for (i = chunk->size; i--; ) { + buf_block_reuse(block, logi_offset); + block++; + } + + if (logi_offset || blocks_offset) { + fprintf(stderr, + "InnoDB: Aligning logical offset..."); + + + /* buf_pool_t buf_pool_backup */ + UT_LIST_OFFSET(flush_list, buf_page_t, shm_info->buf_pool_backup.flush_list, + previous_frame_address, logi_offset, blocks_offset); + UT_LIST_OFFSET(free, buf_page_t, shm_info->buf_pool_backup.free, + previous_frame_address, logi_offset, blocks_offset); + UT_LIST_OFFSET(LRU, buf_page_t, shm_info->buf_pool_backup.LRU, + previous_frame_address, logi_offset, blocks_offset); + if (shm_info->buf_pool_backup.LRU_old) + shm_info->buf_pool_backup.LRU_old = + (buf_page_t*)((byte*)(shm_info->buf_pool_backup.LRU_old) + + (((void*)shm_info->buf_pool_backup.LRU_old > previous_frame_address) + ? logi_offset : blocks_offset)); + + UT_LIST_OFFSET(unzip_LRU, buf_block_t, shm_info->buf_pool_backup.unzip_LRU, + previous_frame_address, logi_offset, blocks_offset); + + UT_LIST_OFFSET(zip_list, buf_page_t, shm_info->buf_pool_backup.zip_clean, + previous_frame_address, logi_offset, blocks_offset); + for (i = 0; i < BUF_BUDDY_SIZES_MAX; i++) { + UT_LIST_OFFSET(zip_list, buf_page_t, shm_info->buf_pool_backup.zip_free[i], + previous_frame_address, logi_offset, blocks_offset); + } + + HASH_OFFSET(zip_hash_tmp, buf_page_t, hash, + previous_frame_address, logi_offset, blocks_offset); + + fprintf(stderr, + " Done.\n"); + } + } else { /* Init block structs and assign frames for them. Then we assign the frames to the first blocks (we already mapped the memory above). */ @@ -833,6 +1159,11 @@ buf_chunk_init( block++; frame += UNIV_PAGE_SIZE; } + } + + if (shm_info) { + shm_info->frame_offset = chunk->blocks[0].frame - (byte*)chunk->mem; + } return(chunk); } @@ -1014,6 +1345,8 @@ buf_chunk_free( UNIV_MEM_UNDESC(block); } + ut_a(!srv_buffer_pool_shm_key); + os_mem_free_large(chunk->mem, chunk->mem_size); } @@ -1063,7 +1396,10 @@ buf_pool_init(void) srv_buf_pool_curr_size = buf_pool->curr_size * UNIV_PAGE_SIZE; buf_pool->page_hash = hash_create(2 * buf_pool->curr_size); + /* zip_hash is allocated to shm when srv_buffer_pool_shm_key is enabled */ + if (!srv_buffer_pool_shm_key) { buf_pool->zip_hash = hash_create(2 * buf_pool->curr_size); + } buf_pool->last_printout_time = time(NULL); @@ -1078,6 +1414,86 @@ buf_pool_init(void) --------------------------- */ /* All fields are initialized by mem_zalloc(). */ + if (srv_buffer_pool_shm_key) { + buf_shm_info_t* shm_info; + + ut_a((byte*)chunk->blocks == (byte*)chunk->mem + sizeof(buf_shm_info_t)); + shm_info = chunk->mem; + + buf_pool->zip_hash = (hash_table_t*)((byte*)chunk->mem + shm_info->zip_hash_offset); + + if(shm_info->is_new) { + shm_info->is_new = FALSE; /* initialization was finished */ + } else { + buf_block_t* block = chunk->blocks; + buf_page_t* b; + + /* shm_info->buf_pool_backup should be converted */ + /* at buf_chunk_init(). So copy simply. */ + buf_pool->flush_list = shm_info->buf_pool_backup.flush_list; + buf_pool->freed_page_clock = shm_info->buf_pool_backup.freed_page_clock; + buf_pool->free = shm_info->buf_pool_backup.free; + buf_pool->LRU = shm_info->buf_pool_backup.LRU; + buf_pool->LRU_old = shm_info->buf_pool_backup.LRU_old; + buf_pool->LRU_old_len = shm_info->buf_pool_backup.LRU_old_len; + buf_pool->unzip_LRU = shm_info->buf_pool_backup.unzip_LRU; + buf_pool->zip_clean = shm_info->buf_pool_backup.zip_clean; + for (i = 0; i < BUF_BUDDY_SIZES_MAX; i++) { + buf_pool->zip_free[i] = shm_info->buf_pool_backup.zip_free[i]; + } + + for (i = 0; i < chunk->size; i++, block++) { + if (buf_block_get_state(block) + == BUF_BLOCK_FILE_PAGE) { + ut_d(block->page.in_page_hash = TRUE); + HASH_INSERT(buf_page_t, hash, buf_pool->page_hash, + buf_page_address_fold( + block->page.space, + block->page.offset), + &block->page); + } + } + + for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b; + b = UT_LIST_GET_NEXT(zip_list, b)) { + ut_ad(!b->in_flush_list); + ut_ad(b->in_LRU_list); + + ut_d(b->in_page_hash = TRUE); + HASH_INSERT(buf_page_t, hash, buf_pool->page_hash, + buf_page_address_fold(b->space, b->offset), b); + } + + for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b; + b = UT_LIST_GET_NEXT(flush_list, b)) { + ut_ad(b->in_flush_list); + ut_ad(b->in_LRU_list); + + switch (buf_page_get_state(b)) { + case BUF_BLOCK_ZIP_DIRTY: + ut_d(b->in_page_hash = TRUE); + HASH_INSERT(buf_page_t, hash, buf_pool->page_hash, + buf_page_address_fold(b->space, + b->offset), b); + break; + case BUF_BLOCK_FILE_PAGE: + /* uncompressed page */ + break; + case BUF_BLOCK_ZIP_FREE: + case BUF_BLOCK_ZIP_PAGE: + case BUF_BLOCK_NOT_USED: + case BUF_BLOCK_READY_FOR_USE: + case BUF_BLOCK_MEMORY: + case BUF_BLOCK_REMOVE_HASH: + ut_error; + break; + } + } + + + } + } + mutex_exit(&LRU_list_mutex); rw_lock_x_unlock(&page_hash_latch); buf_pool_mutex_exit(); @@ -1102,6 +1518,34 @@ buf_pool_free(void) buf_chunk_t* chunk; buf_chunk_t* chunks; + if (srv_buffer_pool_shm_key) { + buf_shm_info_t* shm_info; + + ut_a(buf_pool->n_chunks == 1); + + chunk = buf_pool->chunks; + shm_info = chunk->mem; + ut_a((byte*)chunk->blocks == (byte*)chunk->mem + sizeof(buf_shm_info_t)); + + /* validation the shared memory segment doesn't have unrecoverable contents. */ + /* Currently, validation became not needed */ + shm_info->reusable = TRUE; + + memcpy(&(shm_info->buf_pool_backup), buf_pool, sizeof(buf_pool_t)); + memcpy(&(shm_info->chunk_backup), chunk, sizeof(buf_chunk_t)); + + if (srv_fast_shutdown < 2) { + if (srv_buffer_pool_shm_checksum) { + shm_info->checksum = ut_fold_binary_32((byte*)chunk->mem + sizeof(buf_shm_info_t), + chunk->mem_size - sizeof(buf_shm_info_t)); + } else { + shm_info->checksum = BUF_NO_CHECKSUM_MAGIC; + } + shm_info->clean = TRUE; + } + + os_shm_free(chunk->mem, chunk->mem_size); + } else { chunks = buf_pool->chunks; chunk = chunks + buf_pool->n_chunks; @@ -1110,10 +1554,13 @@ buf_pool_free(void) would fail at shutdown. */ os_mem_free_large(chunk->mem, chunk->mem_size); } + } mem_free(buf_pool->chunks); hash_table_free(buf_pool->page_hash); + if (!srv_buffer_pool_shm_key) { hash_table_free(buf_pool->zip_hash); + } mem_free(buf_pool); buf_pool = NULL; } @@ -1308,6 +1755,11 @@ try_again: //buf_pool_mutex_enter(); mutex_enter(&LRU_list_mutex); + if (srv_buffer_pool_shm_key) { + /* Cannot support shrink */ + goto func_done; + } + shrink_again: if (buf_pool->n_chunks <= 1) { @@ -1551,6 +2003,11 @@ void buf_pool_resize(void) /*=================*/ { + if (srv_buffer_pool_shm_key) { + /* Cannot support resize */ + return; + } + //buf_pool_mutex_enter(); mutex_enter(&LRU_list_mutex); diff --git a/storage/xtradb/buf/buf0flu.c b/storage/xtradb/buf/buf0flu.c index 0ef91137aef..5db6f816aab 100644 --- a/storage/xtradb/buf/buf0flu.c +++ b/storage/xtradb/buf/buf0flu.c @@ -1811,9 +1811,9 @@ buf_flush_validate_low(void) ut_a(om > 0); if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) { + buf_page_t* rpage; ut_a(rnode); - buf_page_t* rpage = *rbt_value(buf_page_t*, - rnode); + rpage = *rbt_value(buf_page_t*, rnode); ut_a(rpage); ut_a(rpage == bpage); rnode = rbt_next(buf_pool->flush_rbt, rnode); diff --git a/storage/xtradb/buf/buf0lru.c b/storage/xtradb/buf/buf0lru.c index 94828940fd4..f90d92eb732 100644 --- a/storage/xtradb/buf/buf0lru.c +++ b/storage/xtradb/buf/buf0lru.c @@ -2265,7 +2265,7 @@ buf_LRU_file_restore(void) ulint req = 0; ibool terminated = FALSE; ibool ret = FALSE; - dump_record_t* records= 0; + dump_record_t* records = NULL; ulint size; ulint size_high; ulint length; diff --git a/storage/xtradb/dict/dict0crea.c b/storage/xtradb/dict/dict0crea.c index 992a7000eda..c63ff57be97 100644 --- a/storage/xtradb/dict/dict0crea.c +++ b/storage/xtradb/dict/dict0crea.c @@ -894,7 +894,7 @@ dict_truncate_index_tree( appropriate field in the SYS_INDEXES record: this mini-transaction marks the B-tree totally truncated */ - btr_page_get(space, zip_size, root_page_no, RW_X_LATCH, mtr); + btr_block_get(space, zip_size, root_page_no, RW_X_LATCH, mtr); btr_free_root(space, zip_size, root_page_no, mtr); create: diff --git a/storage/xtradb/dict/dict0dict.c b/storage/xtradb/dict/dict0dict.c index 40380067bd2..4d99907f093 100644 --- a/storage/xtradb/dict/dict0dict.c +++ b/storage/xtradb/dict/dict0dict.c @@ -81,9 +81,18 @@ UNIV_INTERN rw_lock_t dict_operation_lock; /** Identifies generated InnoDB foreign key names */ static char dict_ibfk[] = "_ibfk_"; -/** array of mutexes protecting dict_index_t::stat_n_diff_key_vals[] */ -#define DICT_INDEX_STAT_MUTEX_SIZE 32 -static mutex_t dict_index_stat_mutex[DICT_INDEX_STAT_MUTEX_SIZE]; +/** array of rw locks protecting +dict_table_t::stat_initialized +dict_table_t::stat_n_rows (*) +dict_table_t::stat_clustered_index_size +dict_table_t::stat_sum_of_other_index_sizes +dict_table_t::stat_modified_counter (*) +dict_table_t::indexes*::stat_n_diff_key_vals[] +dict_table_t::indexes*::stat_index_size +dict_table_t::indexes*::stat_n_leaf_pages +(*) those are not always protected for performance reasons */ +#define DICT_TABLE_STATS_LATCHES_SIZE 64 +static rw_lock_t dict_table_stats_latches[DICT_TABLE_STATS_LATCHES_SIZE]; /*******************************************************************//** Tries to find column names for the index and sets the col field of the @@ -244,43 +253,65 @@ dict_mutex_exit_for_mysql(void) mutex_exit(&(dict_sys->mutex)); } -/** Get the mutex that protects index->stat_n_diff_key_vals[] */ -#define GET_INDEX_STAT_MUTEX(index) \ - (&dict_index_stat_mutex[ut_fold_dulint(index->id) \ - % DICT_INDEX_STAT_MUTEX_SIZE]) +/** Get the latch that protects the stats of a given table */ +#define GET_TABLE_STATS_LATCH(table) \ + (&dict_table_stats_latches[ut_fold_dulint(table->id) \ + % DICT_TABLE_STATS_LATCHES_SIZE]) /**********************************************************************//** -Lock the appropriate mutex to protect index->stat_n_diff_key_vals[]. -index->id is used to pick the right mutex and it should not change -before dict_index_stat_mutex_exit() is called on this index. */ +Lock the appropriate latch to protect a given table's statistics. +table->id is used to pick the corresponding latch from a global array of +latches. */ UNIV_INTERN void -dict_index_stat_mutex_enter( -/*========================*/ - const dict_index_t* index) /*!< in: index */ +dict_table_stats_lock( +/*==================*/ + const dict_table_t* table, /*!< in: table */ + ulint latch_mode) /*!< in: RW_S_LATCH or + RW_X_LATCH */ { - ut_ad(index != NULL); - ut_ad(index->magic_n == DICT_INDEX_MAGIC_N); - ut_ad(index->cached); - ut_ad(!index->to_be_dropped); + ut_ad(table != NULL); + ut_ad(table->magic_n == DICT_TABLE_MAGIC_N); - mutex_enter(GET_INDEX_STAT_MUTEX(index)); + switch (latch_mode) { + case RW_S_LATCH: + rw_lock_s_lock(GET_TABLE_STATS_LATCH(table)); + break; + case RW_X_LATCH: + rw_lock_x_lock(GET_TABLE_STATS_LATCH(table)); + break; + case RW_NO_LATCH: + /* fall through */ + default: + ut_error; + } } /**********************************************************************//** -Unlock the appropriate mutex that protects index->stat_n_diff_key_vals[]. */ +Unlock the latch that has been locked by dict_table_stats_lock() */ UNIV_INTERN void -dict_index_stat_mutex_exit( -/*=======================*/ - const dict_index_t* index) /*!< in: index */ +dict_table_stats_unlock( +/*====================*/ + const dict_table_t* table, /*!< in: table */ + ulint latch_mode) /*!< in: RW_S_LATCH or + RW_X_LATCH */ { - ut_ad(index != NULL); - ut_ad(index->magic_n == DICT_INDEX_MAGIC_N); - ut_ad(index->cached); - ut_ad(!index->to_be_dropped); + ut_ad(table != NULL); + ut_ad(table->magic_n == DICT_TABLE_MAGIC_N); - mutex_exit(GET_INDEX_STAT_MUTEX(index)); + switch (latch_mode) { + case RW_S_LATCH: + rw_lock_s_unlock(GET_TABLE_STATS_LATCH(table)); + break; + case RW_X_LATCH: + rw_lock_x_unlock(GET_TABLE_STATS_LATCH(table)); + break; + case RW_NO_LATCH: + /* fall through */ + default: + ut_error; + } } /********************************************************************//** @@ -671,8 +702,8 @@ dict_init(void) mutex_create(&dict_foreign_err_mutex, SYNC_ANY_LATCH); - for (i = 0; i < DICT_INDEX_STAT_MUTEX_SIZE; i++) { - mutex_create(&dict_index_stat_mutex[i], SYNC_INDEX_TREE); + for (i = 0; i < DICT_TABLE_STATS_LATCHES_SIZE; i++) { + rw_lock_create(&dict_table_stats_latches[i], SYNC_INDEX_TREE); } } @@ -704,13 +735,12 @@ dict_table_get( mutex_exit(&(dict_sys->mutex)); - if (table != NULL) { - if (!table->stat_initialized && !table->is_corrupt) { - /* If table->ibd_file_missing == TRUE, this will - print an error message and return without doing - anything. */ - dict_update_statistics(table, FALSE); - } + if (table != NULL && !table->is_corrupt) { + /* If table->ibd_file_missing == TRUE, this will + print an error message and return without doing + anything. */ + dict_update_statistics(table, TRUE /* only update stats + if they have not been initialized */, FALSE); } return(table); @@ -4366,11 +4396,9 @@ next_rec: btr_pcur_close(&pcur); mtr_commit(&mtr); - dict_index_stat_mutex_enter(index); for (i = 0; i <= n_cols; i++) { index->stat_n_diff_key_vals[i] = stat_n_diff_key_vals_tmp[i]; } - dict_index_stat_mutex_exit(index); } /*===========================================*/ @@ -4424,11 +4452,9 @@ dict_store_statistics( n_cols = dict_index_get_n_unique(index); stat_n_diff_key_vals_tmp = mem_heap_zalloc(heap, (n_cols + 1) * sizeof(ib_int64_t)); - dict_index_stat_mutex_enter(index); for (i = 0; i <= n_cols; i++) { stat_n_diff_key_vals_tmp[i] = index->stat_n_diff_key_vals[i]; } - dict_index_stat_mutex_exit(index); sys_stats = dict_sys->sys_stats; sys_index = UT_LIST_GET_FIRST(sys_stats->indexes); @@ -4504,12 +4530,13 @@ Calculates new estimates for table and index statistics. The statistics are used in query optimization. */ UNIV_INTERN void -dict_update_statistics_low( -/*=======================*/ +dict_update_statistics( +/*===================*/ dict_table_t* table, /*!< in/out: table */ - ibool has_dict_mutex __attribute__((unused)), - /*!< in: TRUE if the caller has the - dictionary mutex */ + ibool only_calc_if_missing_stats, /*!< in: only + update/recalc the stats if they have + not been initialized yet, otherwise + do nothing */ ibool sync) /*!< in: TRUE if must update SYS_STATS */ { dict_index_t* index; @@ -4528,6 +4555,8 @@ dict_update_statistics_low( } if (srv_use_sys_stats_table && !((table->flags >> DICT_TF2_SHIFT) & DICT_TF2_TEMPORARY) && !sync) { + dict_table_stats_lock(table, RW_X_LATCH); + /* reload statistics from SYS_STATS table */ if (dict_reload_statistics(table, &sum_of_index_sizes)) { /* success */ @@ -4537,6 +4566,8 @@ dict_update_statistics_low( #endif goto end; } + + dict_table_stats_unlock(table, RW_X_LATCH); } #ifdef UNIV_DEBUG fprintf(stderr, "InnoDB: DEBUG: update_statistics for %s.\n", @@ -4555,6 +4586,13 @@ dict_update_statistics_low( return; } + dict_table_stats_lock(table, RW_X_LATCH); + + if (only_calc_if_missing_stats && table->stat_initialized) { + dict_table_stats_unlock(table, RW_X_LATCH); + return; + } + do { if (table->is_corrupt) { ut_a(srv_pass_corrupt_table); @@ -4609,13 +4647,9 @@ dict_update_statistics_low( end: index = dict_table_get_first_index(table); - dict_index_stat_mutex_enter(index); - table->stat_n_rows = index->stat_n_diff_key_vals[ dict_index_get_n_unique(index)]; - dict_index_stat_mutex_exit(index); - table->stat_clustered_index_size = index->stat_index_size; table->stat_sum_of_other_index_sizes = sum_of_index_sizes @@ -4624,19 +4658,8 @@ end: table->stat_initialized = TRUE; table->stat_modified_counter = 0; -} -/*********************************************************************//** -Calculates new estimates for table and index statistics. The statistics -are used in query optimization. */ -UNIV_INTERN -void -dict_update_statistics( -/*===================*/ - dict_table_t* table, /*!< in/out: table */ - ibool sync) -{ - dict_update_statistics_low(table, FALSE, sync); + dict_table_stats_unlock(table, RW_X_LATCH); } /**********************************************************************//** @@ -4716,8 +4739,9 @@ dict_table_print_low( ut_ad(mutex_own(&(dict_sys->mutex))); - if (srv_stats_auto_update) - dict_update_statistics_low(table, TRUE, FALSE); + dict_update_statistics(table, FALSE /* update even if initialized */, FALSE); + + dict_table_stats_lock(table, RW_S_LATCH); fprintf(stderr, "--------------------------------------\n" @@ -4746,6 +4770,8 @@ dict_table_print_low( index = UT_LIST_GET_NEXT(indexes, index); } + dict_table_stats_unlock(table, RW_S_LATCH); + foreign = UT_LIST_GET_FIRST(table->foreign_list); while (foreign != NULL) { @@ -4794,8 +4820,6 @@ dict_index_print_low( ut_ad(mutex_own(&(dict_sys->mutex))); - dict_index_stat_mutex_enter(index); - if (index->n_user_defined_cols > 0) { n_vals = index->stat_n_diff_key_vals[ index->n_user_defined_cols]; @@ -4803,8 +4827,6 @@ dict_index_print_low( n_vals = index->stat_n_diff_key_vals[1]; } - dict_index_stat_mutex_exit(index); - fprintf(stderr, " INDEX: name %s, id %lu %lu, fields %lu/%lu," " uniq %lu, type %lu\n" @@ -5148,7 +5170,8 @@ void dict_table_replace_index_in_foreign_list( /*=====================================*/ dict_table_t* table, /*!< in/out: table */ - dict_index_t* index) /*!< in: index to be replaced */ + dict_index_t* index, /*!< in: index to be replaced */ + const trx_t* trx) /*!< in: transaction handle */ { dict_foreign_t* foreign; @@ -5159,7 +5182,13 @@ dict_table_replace_index_in_foreign_list( if (foreign->foreign_index == index) { dict_index_t* new_index = dict_foreign_find_equiv_index(foreign); - ut_a(new_index); + + /* There must exist an alternative index if + check_foreigns (FOREIGN_KEY_CHECKS) is on, + since ha_innobase::prepare_drop_index had done + the check before we reach here. */ + + ut_a(new_index || !trx->check_foreigns); foreign->foreign_index = new_index; } @@ -5293,8 +5322,8 @@ dict_close(void) mem_free(dict_sys); dict_sys = NULL; - for (i = 0; i < DICT_INDEX_STAT_MUTEX_SIZE; i++) { - mutex_free(&dict_index_stat_mutex[i]); + for (i = 0; i < DICT_TABLE_STATS_LATCHES_SIZE; i++) { + rw_lock_free(&dict_table_stats_latches[i]); } } diff --git a/storage/xtradb/dict/dict0load.c b/storage/xtradb/dict/dict0load.c index 19de5bf8264..edd77e2530f 100644 --- a/storage/xtradb/dict/dict0load.c +++ b/storage/xtradb/dict/dict0load.c @@ -222,8 +222,9 @@ loop: /* The table definition was corrupt if there is no index */ - if (srv_stats_auto_update && dict_table_get_first_index(table)) { - dict_update_statistics_low(table, TRUE, FALSE); + if (dict_table_get_first_index(table)) { + dict_update_statistics(table, FALSE /* update + even if initialized */, FALSE); } dict_table_print_low(table); diff --git a/storage/xtradb/fil/fil0fil.c b/storage/xtradb/fil/fil0fil.c index 8e043f1f7e2..2ab0dd6267a 100644 --- a/storage/xtradb/fil/fil0fil.c +++ b/storage/xtradb/fil/fil0fil.c @@ -336,14 +336,15 @@ fil_get_space_id_for_table( /*******************************************************************//** Frees a space object from the tablespace memory cache. Closes the files in the chain but does not delete them. There must not be any pending i/o's or -flushes on the files. */ +flushes on the files. +@return TRUE on success */ static ibool fil_space_free( /*===========*/ - /* out: TRUE if success */ - ulint id, /* in: space id */ - ibool own_mutex);/* in: TRUE if own system->mutex */ + ulint id, /* in: space id */ + ibool x_latched); /* in: TRUE if caller has space->latch + in X mode */ /********************************************************************//** Reads data from a space to a buffer. Remember that the possible incomplete blocks at the end of file are ignored: they are not taken into account when @@ -617,7 +618,7 @@ fil_node_create( UT_LIST_ADD_LAST(chain, space->chain, node); - if (id < SRV_LOG_SPACE_FIRST_ID && fil_system->max_assigned_id < id) { + if (id < SRV_EXTRA_SYS_SPACE_FIRST_ID && fil_system->max_assigned_id < id) { fil_system->max_assigned_id = id; } @@ -1130,6 +1131,7 @@ try_again: space = fil_space_get_by_name(name); if (UNIV_LIKELY_NULL(space)) { + ibool success; ulint namesake_id; ut_print_timestamp(stderr); @@ -1168,9 +1170,10 @@ try_again: namesake_id = space->id; - mutex_exit(&fil_system->mutex); + success = fil_space_free(namesake_id, FALSE); + ut_a(success); - fil_space_free(namesake_id, FALSE); + mutex_exit(&fil_system->mutex); goto try_again; } @@ -1205,6 +1208,7 @@ try_again: space->mark = FALSE; if (UNIV_LIKELY(purpose == FIL_TABLESPACE && !recv_recovery_on) + && UNIV_UNLIKELY(id < SRV_EXTRA_SYS_SPACE_FIRST_ID) && UNIV_UNLIKELY(id > fil_system->max_assigned_id)) { if (!fil_system->space_id_reuse_warned) { fil_system->space_id_reuse_warned = TRUE; @@ -1290,7 +1294,7 @@ fil_assign_new_space_id( (ulong) SRV_LOG_SPACE_FIRST_ID); } - success = (id < SRV_LOG_SPACE_FIRST_ID); + success = (id < SRV_EXTRA_SYS_SPACE_FIRST_ID); if (success) { *space_id = fil_system->max_assigned_id = id; @@ -1323,15 +1327,14 @@ fil_space_free( /*===========*/ /* out: TRUE if success */ ulint id, /* in: space id */ - ibool own_mutex) /* in: TRUE if own system->mutex */ + ibool x_latched) /* in: TRUE if caller has space->latch + in X mode */ { fil_space_t* space; fil_space_t* namespace; fil_node_t* fil_node; - if (!own_mutex) { - mutex_enter(&fil_system->mutex); - } + ut_ad(mutex_own(&fil_system->mutex)); space = fil_space_get_by_id(id); @@ -1342,8 +1345,6 @@ fil_space_free( " from the cache but\n" "InnoDB: it is not there.\n", (ulong) id); - mutex_exit(&fil_system->mutex); - return(FALSE); } @@ -1378,8 +1379,8 @@ fil_space_free( ut_a(0 == UT_LIST_GET_LEN(space->chain)); - if (!own_mutex) { - mutex_exit(&fil_system->mutex); + if (x_latched) { + rw_lock_x_unlock(&space->latch); } rw_lock_free(&(space->latch)); @@ -1626,25 +1627,27 @@ fil_close_all_files(void) /*=====================*/ { fil_space_t* space; - fil_node_t* node; mutex_enter(&fil_system->mutex); space = UT_LIST_GET_FIRST(fil_system->space_list); while (space != NULL) { + fil_node_t* node; fil_space_t* prev_space = space; - node = UT_LIST_GET_FIRST(space->chain); + for (node = UT_LIST_GET_FIRST(space->chain); + node != NULL; + node = UT_LIST_GET_NEXT(chain, node)) { - while (node != NULL) { if (node->open) { fil_node_close_file(node, fil_system); } - node = UT_LIST_GET_NEXT(chain, node); } + space = UT_LIST_GET_NEXT(space_list, space); - fil_space_free(prev_space->id, TRUE); + + fil_space_free(prev_space->id, FALSE); } mutex_exit(&fil_system->mutex); @@ -1666,6 +1669,10 @@ fil_set_max_space_id_if_bigger( ut_error; } + if (max_id >= SRV_EXTRA_SYS_SPACE_FIRST_ID) { + return; + } + mutex_enter(&fil_system->mutex); if (fil_system->max_assigned_id < max_id) { @@ -1684,6 +1691,7 @@ static ulint fil_write_lsn_and_arch_no_to_file( /*==============================*/ + ulint space_id, ulint sum_of_sizes, /*!< in: combined size of previous files in space, in database pages */ ib_uint64_t lsn, /*!< in: lsn to write */ @@ -1693,14 +1701,16 @@ fil_write_lsn_and_arch_no_to_file( byte* buf1; byte* buf; + ut_a(trx_sys_sys_space(space_id)); + buf1 = mem_alloc(2 * UNIV_PAGE_SIZE); buf = ut_align(buf1, UNIV_PAGE_SIZE); - fil_read(TRUE, 0, 0, sum_of_sizes, 0, UNIV_PAGE_SIZE, buf, NULL); + fil_read(TRUE, space_id, 0, sum_of_sizes, 0, UNIV_PAGE_SIZE, buf, NULL); mach_write_ull(buf + FIL_PAGE_FILE_FLUSH_LSN, lsn); - fil_write(TRUE, 0, 0, sum_of_sizes, 0, UNIV_PAGE_SIZE, buf, NULL); + fil_write(TRUE, space_id, 0, sum_of_sizes, 0, UNIV_PAGE_SIZE, buf, NULL); mem_free(buf1); @@ -1736,7 +1746,7 @@ fil_write_flushed_lsn_to_data_files( always open. */ if (space->purpose == FIL_TABLESPACE - && space->id == 0) { + && trx_sys_sys_space(space->id)) { sum_of_sizes = 0; node = UT_LIST_GET_FIRST(space->chain); @@ -1744,7 +1754,7 @@ fil_write_flushed_lsn_to_data_files( mutex_exit(&fil_system->mutex); err = fil_write_lsn_and_arch_no_to_file( - sum_of_sizes, lsn, arch_log_no); + space->id, sum_of_sizes, lsn, arch_log_no); if (err != DB_SUCCESS) { return(err); @@ -2264,6 +2274,19 @@ try_again: path = mem_strdup(space->name); mutex_exit(&fil_system->mutex); + + /* Important: We rely on the data dictionary mutex to ensure + that a race is not possible here. It should serialize the tablespace + drop/free. We acquire an X latch only to avoid a race condition + when accessing the tablespace instance via: + + fsp_get_available_space_in_free_extents(). + + There our main motivation is to reduce the contention on the + dictionary mutex. */ + + rw_lock_x_lock(&space->latch); + #ifndef UNIV_HOTBACKUP /* Invalidate in the buffer pool all pages belonging to the tablespace. Since we have set space->is_being_deleted = TRUE, readahead @@ -2276,7 +2299,11 @@ try_again: #endif /* printf("Deleting tablespace %s id %lu\n", space->name, id); */ - success = fil_space_free(id, FALSE); + mutex_enter(&fil_system->mutex); + + success = fil_space_free(id, TRUE); + + mutex_exit(&fil_system->mutex); if (success) { success = os_file_delete(path); @@ -2284,6 +2311,8 @@ try_again: if (!success) { success = os_file_delete_if_exists(path); } + } else { + rw_lock_x_unlock(&space->latch); } if (success) { @@ -2311,6 +2340,31 @@ try_again: return(FALSE); } +/*******************************************************************//** +Returns TRUE if a single-table tablespace is being deleted. +@return TRUE if being deleted */ +UNIV_INTERN +ibool +fil_tablespace_is_being_deleted( +/*============================*/ + ulint id) /*!< in: space id */ +{ + fil_space_t* space; + ibool is_being_deleted; + + mutex_enter(&fil_system->mutex); + + space = fil_space_get_by_id(id); + + ut_a(space != NULL); + + is_being_deleted = space->is_being_deleted; + + mutex_exit(&fil_system->mutex); + + return(is_being_deleted); +} + #ifndef UNIV_HOTBACKUP /*******************************************************************//** Discards a single-table tablespace. The tablespace must be cached in the @@ -5342,7 +5396,7 @@ fil_page_get_type( return(mach_read_from_2(page + FIL_PAGE_TYPE)); } -/******************************************************************** +/****************************************************************//** Initializes the tablespace memory cache. */ UNIV_INTERN void diff --git a/storage/xtradb/fsp/fsp0fsp.c b/storage/xtradb/fsp/fsp0fsp.c index cd28186109f..0561bcc77fc 100644 --- a/storage/xtradb/fsp/fsp0fsp.c +++ b/storage/xtradb/fsp/fsp0fsp.c @@ -3126,13 +3126,63 @@ fsp_get_available_space_in_free_extents( ut_ad(!mutex_own(&kernel_mutex)); + /* The convoluted mutex acquire is to overcome latching order + issues: The problem is that the fil_mutex is at a lower level + than the tablespace latch and the buffer pool mutex. We have to + first prevent any operations on the file system by acquiring the + dictionary mutex. Then acquire the tablespace latch to obey the + latching order and then release the dictionary mutex. That way we + ensure that the tablespace instance can't be freed while we are + examining its contents (see fil_space_free()). + + However, there is one further complication, we release the fil_mutex + when we need to invalidate the the pages in the buffer pool and we + reacquire the fil_mutex when deleting and freeing the tablespace + instance in fil0fil.c. Here we need to account for that situation + too. */ + + mutex_enter(&dict_sys->mutex); + + /* At this stage there is no guarantee that the tablespace even + exists in the cache. */ + + if (fil_tablespace_deleted_or_being_deleted_in_mem(space, -1)) { + + mutex_exit(&dict_sys->mutex); + + return(ULLINT_UNDEFINED); + } + mtr_start(&mtr); latch = fil_space_get_latch(space, &flags); + + /* This should ensure that the tablespace instance can't be freed + by another thread. However, the tablespace pages can still be freed + from the buffer pool. We need to check for that again. */ + zip_size = dict_table_flags_to_zip_size(flags); mtr_x_lock(latch, &mtr); + mutex_exit(&dict_sys->mutex); + + /* At this point it is possible for the tablespace to be deleted and + its pages removed from the buffer pool. We need to check for that + situation. However, the tablespace instance can't be deleted because + our latching above should ensure that. */ + + if (fil_tablespace_is_being_deleted(space)) { + + mtr_commit(&mtr); + + return(ULLINT_UNDEFINED); + } + + /* From here on even if the user has dropped the tablespace, the + pages _must_ still exist in the buffer pool and the tablespace + instance _must_ be in the file system hash table. */ + space_header = fsp_get_space_header(space, zip_size, &mtr); size = mtr_read_ulint(space_header + FSP_SIZE, MLOG_4BYTES, &mtr); diff --git a/storage/xtradb/ha/hash0hash.c b/storage/xtradb/ha/hash0hash.c index 30c304dafcd..0f4fc55d895 100644 --- a/storage/xtradb/ha/hash0hash.c +++ b/storage/xtradb/ha/hash0hash.c @@ -128,6 +128,70 @@ hash_create( } /*************************************************************//** +*/ +UNIV_INTERN +ulint +hash_create_needed( +/*===============*/ + ulint n) +{ + ulint prime; + ulint offset; + + prime = ut_find_prime(n); + + offset = (sizeof(hash_table_t) + 7) / 8; + offset *= 8; + + return(offset + sizeof(hash_cell_t) * prime); +} + +UNIV_INTERN +void +hash_create_init( +/*=============*/ + hash_table_t* table, + ulint n) +{ + ulint prime; + ulint offset; + + prime = ut_find_prime(n); + + offset = (sizeof(hash_table_t) + 7) / 8; + offset *= 8; + + table->array = (hash_cell_t*)(((byte*)table) + offset); + table->n_cells = prime; +# if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG + table->adaptive = FALSE; +# endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */ + table->n_mutexes = 0; + table->mutexes = NULL; + table->heaps = NULL; + table->heap = NULL; + ut_d(table->magic_n = HASH_TABLE_MAGIC_N); + + /* Initialize the cell array */ + hash_table_clear(table); +} + +UNIV_INTERN +void +hash_create_reuse( +/*==============*/ + hash_table_t* table) +{ + ulint offset; + + offset = (sizeof(hash_table_t) + 7) / 8; + offset *= 8; + + table->array = (hash_cell_t*)(((byte*)table) + offset); + ut_ad(table->magic_n == HASH_TABLE_MAGIC_N); +} + +/*************************************************************//** Frees a hash table. */ UNIV_INTERN void diff --git a/storage/xtradb/handler/ha_innodb.cc b/storage/xtradb/handler/ha_innodb.cc index 17495af2370..3e742fbac87 100644 --- a/storage/xtradb/handler/ha_innodb.cc +++ b/storage/xtradb/handler/ha_innodb.cc @@ -149,6 +149,7 @@ static ulong innobase_read_io_threads; static ulong innobase_write_io_threads; static ulong innobase_page_size; +static ulong innobase_log_block_size; static my_bool innobase_thread_concurrency_timer_based; static long long innobase_buffer_pool_size, innobase_log_file_size; @@ -194,6 +195,7 @@ static my_bool innobase_rollback_on_timeout = FALSE; static my_bool innobase_create_status_file = FALSE; static my_bool innobase_stats_on_metadata = TRUE; static my_bool innobase_use_sys_stats_table = FALSE; +static my_bool innobase_buffer_pool_shm_checksum = TRUE; static char* internal_innobase_data_file_path = NULL; @@ -2114,6 +2116,32 @@ innobase_init( goto error; } + srv_log_block_size = 0; + if (innobase_log_block_size != (1 << 9)) { /*!=512*/ + uint n_shift; + + fprintf(stderr, + "InnoDB: Warning: innodb_log_block_size has been changed from default value 512. (###EXPERIMENTAL### operation)\n"); + for (n_shift = 9; n_shift <= UNIV_PAGE_SIZE_SHIFT_MAX; n_shift++) { + if (innobase_log_block_size == ((ulong)1 << n_shift)) { + srv_log_block_size = (1 << n_shift); + fprintf(stderr, + "InnoDB: The log block size is set to %lu.\n", + srv_log_block_size); + break; + } + } + } else { + srv_log_block_size = 512; + } + + if (!srv_log_block_size) { + fprintf(stderr, + "InnoDB: Error: %lu is not valid value for innodb_log_block_size.\n", + innobase_log_block_size); + goto error; + } + #ifndef MYSQL_SERVER innodb_overwrite_relay_log_info = FALSE; #endif @@ -2417,7 +2445,7 @@ innobase_change_buffering_inited_ok: srv_n_write_io_threads = (ulint) innobase_write_io_threads; srv_read_ahead &= 3; - srv_adaptive_checkpoint %= 3; + srv_adaptive_checkpoint %= 4; srv_force_recovery = (ulint) innobase_force_recovery; @@ -2426,6 +2454,7 @@ innobase_change_buffering_inited_ok: srv_use_doublewrite_buf = (ibool) innobase_use_doublewrite; srv_use_checksums = (ibool) innobase_use_checksums; srv_fast_checksum = (ibool) innobase_fast_checksum; + srv_buffer_pool_shm_checksum = (ibool) innobase_buffer_pool_shm_checksum; #ifdef HAVE_LARGE_PAGES if ((os_use_large_pages = (ibool) my_use_large_pages)) @@ -3871,6 +3900,7 @@ retry: of length ref_length! */ if (!row_table_got_default_clust_index(ib_table)) { + prebuilt->clust_index_was_generated = FALSE; if (UNIV_UNLIKELY(primary_key >= MAX_KEY)) { @@ -6672,6 +6702,33 @@ create_clustered_index_when_no_primary( } /*****************************************************************//** +Return a display name for the row format +@return row format name */ + +const char *get_row_format_name( +/*============================*/ +enum row_type row_format) /*!< in: Row Format */ +{ + switch (row_format) { + case ROW_TYPE_COMPACT: + return("COMPACT"); + case ROW_TYPE_COMPRESSED: + return("COMPRESSED"); + case ROW_TYPE_DYNAMIC: + return("DYNAMIC"); + case ROW_TYPE_REDUNDANT: + return("REDUNDANT"); + case ROW_TYPE_DEFAULT: + return("DEFAULT"); + case ROW_TYPE_FIXED: + return("FIXED"); + default: + break; + } + return("NOT USED"); +} + +/*****************************************************************//** Validates the create options. We may build on this function in future. For now, it checks two specifiers: KEY_BLOCK_SIZE and ROW_FORMAT @@ -6686,9 +6743,9 @@ create_options_are_valid( columns and indexes */ HA_CREATE_INFO* create_info) /*!< in: create info. */ { - ibool kbs_specified = FALSE; + ibool kbs_specified = FALSE; ibool ret = TRUE; - + enum row_type row_type = form->s->row_type; ut_ad(thd != NULL); @@ -6697,13 +6754,28 @@ create_options_are_valid( return(TRUE); } + /* Check for a valid Innodb ROW_FORMAT specifier. For example, + ROW_TYPE_FIXED can be sent to Innodb */ + switch (row_type) { + case ROW_TYPE_COMPACT: + case ROW_TYPE_COMPRESSED: + case ROW_TYPE_DYNAMIC: + case ROW_TYPE_REDUNDANT: + case ROW_TYPE_DEFAULT: + break; + default: + push_warning( + thd, MYSQL_ERROR::WARN_LEVEL_WARN, + ER_ILLEGAL_HA_CREATE_OPTION, + "InnoDB: invalid ROW_FORMAT specifier."); + ret = FALSE; + } + ut_ad(form != NULL); ut_ad(create_info != NULL); - /* First check if KEY_BLOCK_SIZE was specified. */ - if (create_info->key_block_size - || (create_info->used_fields & HA_CREATE_USED_KEY_BLOCK_SIZE)) { - + /* First check if a non-zero KEY_BLOCK_SIZE was specified. */ + if (create_info->key_block_size) { kbs_specified = TRUE; switch (create_info->key_block_size) { case 1: @@ -6714,13 +6786,12 @@ create_options_are_valid( /* Valid value. */ break; default: - push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN, - ER_ILLEGAL_HA_CREATE_OPTION, - "InnoDB: invalid" - " KEY_BLOCK_SIZE = %lu." - " Valid values are" - " [1, 2, 4, 8, 16]", - create_info->key_block_size); + push_warning_printf( + thd, MYSQL_ERROR::WARN_LEVEL_WARN, + ER_ILLEGAL_HA_CREATE_OPTION, + "InnoDB: invalid KEY_BLOCK_SIZE = %lu." + " Valid values are [1, 2, 4, 8, 16]", + create_info->key_block_size); ret = FALSE; } } @@ -6728,109 +6799,67 @@ create_options_are_valid( /* If KEY_BLOCK_SIZE was specified, check for its dependencies. */ if (kbs_specified && !srv_file_per_table) { - push_warning(thd, MYSQL_ERROR::WARN_LEVEL_WARN, - ER_ILLEGAL_HA_CREATE_OPTION, - "InnoDB: KEY_BLOCK_SIZE" - " requires innodb_file_per_table."); + push_warning( + thd, MYSQL_ERROR::WARN_LEVEL_WARN, + ER_ILLEGAL_HA_CREATE_OPTION, + "InnoDB: KEY_BLOCK_SIZE" + " requires innodb_file_per_table."); ret = FALSE; } if (kbs_specified && srv_file_format < DICT_TF_FORMAT_ZIP) { - push_warning(thd, MYSQL_ERROR::WARN_LEVEL_WARN, - ER_ILLEGAL_HA_CREATE_OPTION, - "InnoDB: KEY_BLOCK_SIZE" - " requires innodb_file_format >" - " Antelope."); + push_warning( + thd, MYSQL_ERROR::WARN_LEVEL_WARN, + ER_ILLEGAL_HA_CREATE_OPTION, + "InnoDB: KEY_BLOCK_SIZE requires" + " innodb_file_format > Antelope."); ret = FALSE; } - /* Now check for ROW_FORMAT specifier. */ - if (create_info->used_fields & HA_CREATE_USED_ROW_FORMAT) { - switch (form->s->row_type) { - const char* row_format_name; - case ROW_TYPE_COMPRESSED: - case ROW_TYPE_DYNAMIC: - row_format_name - = form->s->row_type == ROW_TYPE_COMPRESSED - ? "COMPRESSED" - : "DYNAMIC"; - - /* These two ROW_FORMATs require srv_file_per_table - and srv_file_format > Antelope */ - if (!srv_file_per_table) { - push_warning_printf( - thd, - MYSQL_ERROR::WARN_LEVEL_WARN, - ER_ILLEGAL_HA_CREATE_OPTION, - "InnoDB: ROW_FORMAT=%s" - " requires innodb_file_per_table.", - row_format_name); - ret = FALSE; - } - - if (srv_file_format < DICT_TF_FORMAT_ZIP) { - push_warning_printf( - thd, - MYSQL_ERROR::WARN_LEVEL_WARN, - ER_ILLEGAL_HA_CREATE_OPTION, - "InnoDB: ROW_FORMAT=%s" - " requires innodb_file_format >" - " Antelope.", - row_format_name); - ret = FALSE; - } - - /* Cannot specify KEY_BLOCK_SIZE with - ROW_FORMAT = DYNAMIC. - However, we do allow COMPRESSED to be - specified with KEY_BLOCK_SIZE. */ - if (kbs_specified - && form->s->row_type == ROW_TYPE_DYNAMIC) { - push_warning_printf( - thd, - MYSQL_ERROR::WARN_LEVEL_WARN, - ER_ILLEGAL_HA_CREATE_OPTION, - "InnoDB: cannot specify" - " ROW_FORMAT = DYNAMIC with" - " KEY_BLOCK_SIZE."); - ret = FALSE; - } - - break; - - case ROW_TYPE_REDUNDANT: - case ROW_TYPE_COMPACT: - case ROW_TYPE_DEFAULT: - /* Default is COMPACT. */ - row_format_name - = form->s->row_type == ROW_TYPE_REDUNDANT - ? "REDUNDANT" - : "COMPACT"; - - /* Cannot specify KEY_BLOCK_SIZE with these - format specifiers. */ - if (kbs_specified) { - push_warning_printf( - thd, - MYSQL_ERROR::WARN_LEVEL_WARN, - ER_ILLEGAL_HA_CREATE_OPTION, - "InnoDB: cannot specify" - " ROW_FORMAT = %s with" - " KEY_BLOCK_SIZE.", - row_format_name); - ret = FALSE; - } - - break; + switch (row_type) { + case ROW_TYPE_COMPRESSED: + case ROW_TYPE_DYNAMIC: + /* These two ROW_FORMATs require srv_file_per_table + and srv_file_format > Antelope */ + if (!srv_file_per_table) { + push_warning_printf( + thd, MYSQL_ERROR::WARN_LEVEL_WARN, + ER_ILLEGAL_HA_CREATE_OPTION, + "InnoDB: ROW_FORMAT=%s" + " requires innodb_file_per_table.", + get_row_format_name(row_type)); + ret = FALSE; + } - default: - push_warning(thd, - MYSQL_ERROR::WARN_LEVEL_WARN, - ER_ILLEGAL_HA_CREATE_OPTION, - "InnoDB: invalid ROW_FORMAT specifier."); - ret = FALSE; + if (srv_file_format < DICT_TF_FORMAT_ZIP) { + push_warning_printf( + thd, MYSQL_ERROR::WARN_LEVEL_WARN, + ER_ILLEGAL_HA_CREATE_OPTION, + "InnoDB: ROW_FORMAT=%s requires" + " innodb_file_format > Antelope.", + get_row_format_name(row_type)); + ret = FALSE; + } + default: + break; + } + switch (row_type) { + case ROW_TYPE_REDUNDANT: + case ROW_TYPE_COMPACT: + case ROW_TYPE_DYNAMIC: + /* KEY_BLOCK_SIZE is only allowed with Compressed or Default */ + if (kbs_specified) { + push_warning_printf( + thd, MYSQL_ERROR::WARN_LEVEL_WARN, + ER_ILLEGAL_HA_CREATE_OPTION, + "InnoDB: cannot specify ROW_FORMAT = %s" + " with KEY_BLOCK_SIZE.", + get_row_format_name(row_type)); + ret = FALSE; } + default: + break; } return(ret); @@ -6956,8 +6985,7 @@ ha_innobase::create( goto cleanup; } - if (create_info->key_block_size - || (create_info->used_fields & HA_CREATE_USED_KEY_BLOCK_SIZE)) { + if (create_info->key_block_size) { /* Determine the page_zip.ssize corresponding to the requested page size (key_block_size) in kilobytes. */ @@ -6978,38 +7006,39 @@ ha_innobase::create( } if (!srv_file_per_table) { - push_warning(thd, MYSQL_ERROR::WARN_LEVEL_WARN, - ER_ILLEGAL_HA_CREATE_OPTION, - "InnoDB: KEY_BLOCK_SIZE" - " requires innodb_file_per_table."); + push_warning( + thd, MYSQL_ERROR::WARN_LEVEL_WARN, + ER_ILLEGAL_HA_CREATE_OPTION, + "InnoDB: KEY_BLOCK_SIZE" + " requires innodb_file_per_table."); flags = 0; } if (file_format < DICT_TF_FORMAT_ZIP) { - push_warning(thd, MYSQL_ERROR::WARN_LEVEL_WARN, - ER_ILLEGAL_HA_CREATE_OPTION, - "InnoDB: KEY_BLOCK_SIZE" - " requires innodb_file_format >" - " Antelope."); + push_warning( + thd, MYSQL_ERROR::WARN_LEVEL_WARN, + ER_ILLEGAL_HA_CREATE_OPTION, + "InnoDB: KEY_BLOCK_SIZE requires" + " innodb_file_format > Antelope."); flags = 0; } if (!flags) { - push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN, - ER_ILLEGAL_HA_CREATE_OPTION, - "InnoDB: ignoring" - " KEY_BLOCK_SIZE=%lu.", - create_info->key_block_size); + push_warning_printf( + thd, MYSQL_ERROR::WARN_LEVEL_WARN, + ER_ILLEGAL_HA_CREATE_OPTION, + "InnoDB: ignoring" + " KEY_BLOCK_SIZE=%lu.", + create_info->key_block_size); } } row_type = form->s->row_type; if (flags) { - /* if KEY_BLOCK_SIZE was specified on this statement and - ROW_FORMAT was not, automatically change ROW_FORMAT to COMPRESSED.*/ - if ( (create_info->used_fields & HA_CREATE_USED_KEY_BLOCK_SIZE) - && !(create_info->used_fields & HA_CREATE_USED_ROW_FORMAT)) { + /* if ROW_FORMAT is set to default, + automatically change it to COMPRESSED.*/ + if (row_type == ROW_TYPE_DEFAULT) { row_type = ROW_TYPE_COMPRESSED; } else if (row_type != ROW_TYPE_COMPRESSED) { /* ROW_FORMAT other than COMPRESSED @@ -7019,8 +7048,7 @@ ha_innobase::create( such combinations can be obtained with ALTER TABLE anyway. */ push_warning_printf( - thd, - MYSQL_ERROR::WARN_LEVEL_WARN, + thd, MYSQL_ERROR::WARN_LEVEL_WARN, ER_ILLEGAL_HA_CREATE_OPTION, "InnoDB: ignoring KEY_BLOCK_SIZE=%lu" " unless ROW_FORMAT=COMPRESSED.", @@ -7045,33 +7073,24 @@ ha_innobase::create( } switch (row_type) { - const char* row_format_name; case ROW_TYPE_REDUNDANT: break; case ROW_TYPE_COMPRESSED: case ROW_TYPE_DYNAMIC: - row_format_name - = row_type == ROW_TYPE_COMPRESSED - ? "COMPRESSED" - : "DYNAMIC"; - if (!srv_file_per_table) { push_warning_printf( - thd, - MYSQL_ERROR::WARN_LEVEL_WARN, + thd, MYSQL_ERROR::WARN_LEVEL_WARN, ER_ILLEGAL_HA_CREATE_OPTION, - "InnoDB: ROW_FORMAT=%s" - " requires innodb_file_per_table.", - row_format_name); + "InnoDB: ROW_FORMAT=%s requires" + " innodb_file_per_table.", + get_row_format_name(row_type)); } else if (file_format < DICT_TF_FORMAT_ZIP) { push_warning_printf( - thd, - MYSQL_ERROR::WARN_LEVEL_WARN, + thd, MYSQL_ERROR::WARN_LEVEL_WARN, ER_ILLEGAL_HA_CREATE_OPTION, - "InnoDB: ROW_FORMAT=%s" - " requires innodb_file_format >" - " Antelope.", - row_format_name); + "InnoDB: ROW_FORMAT=%s requires" + " innodb_file_format > Antelope.", + get_row_format_name(row_type)); } else { flags |= DICT_TF_COMPACT | (DICT_TF_FORMAT_ZIP @@ -7083,10 +7102,10 @@ ha_innobase::create( case ROW_TYPE_NOT_USED: case ROW_TYPE_FIXED: default: - push_warning(thd, - MYSQL_ERROR::WARN_LEVEL_WARN, - ER_ILLEGAL_HA_CREATE_OPTION, - "InnoDB: assuming ROW_FORMAT=COMPACT."); + push_warning( + thd, MYSQL_ERROR::WARN_LEVEL_WARN, + ER_ILLEGAL_HA_CREATE_OPTION, + "InnoDB: assuming ROW_FORMAT=COMPACT."); case ROW_TYPE_DEFAULT: case ROW_TYPE_COMPACT: flags = DICT_TF_COMPACT; @@ -7269,6 +7288,14 @@ ha_innobase::discard_or_import_tablespace( err = row_discard_tablespace_for_mysql(dict_table->name, trx); } else { err = row_import_tablespace_for_mysql(dict_table->name, trx); + + /* in expanded import mode re-initialize auto_increment again */ + if ((err == DB_SUCCESS) && srv_expand_import && + (table->found_next_number_field != NULL)) { + dict_table_autoinc_lock(dict_table); + innobase_initialize_autoinc(); + dict_table_autoinc_unlock(dict_table); + } } err = convert_error_code_to_mysql(err, dict_table->flags, NULL); @@ -7734,6 +7761,7 @@ ha_innobase::estimate_rows_upper_bound(void) dict_index_t* index; ulonglong estimate; ulonglong local_data_file_length; + ulint stat_n_leaf_pages; DBUG_ENTER("estimate_rows_upper_bound"); @@ -7753,10 +7781,12 @@ ha_innobase::estimate_rows_upper_bound(void) index = dict_table_get_first_index(prebuilt->table); - ut_a(index->stat_n_leaf_pages > 0); + stat_n_leaf_pages = index->stat_n_leaf_pages; + + ut_a(stat_n_leaf_pages > 0); local_data_file_length = - ((ulonglong) index->stat_n_leaf_pages) * UNIV_PAGE_SIZE; + ((ulonglong) stat_n_leaf_pages) * UNIV_PAGE_SIZE; /* Calculate a minimum length for a clustered index record and from @@ -7959,13 +7989,12 @@ ha_innobase::info_low( ib_table = prebuilt->table; if (flag & HA_STATUS_TIME) { - if ((called_from_analyze || innobase_stats_on_metadata) - && !share->ib_table->is_corrupt) { + if ((called_from_analyze || innobase_stats_on_metadata) && !share->ib_table->is_corrupt) { /* In sql_show we call with this flag: update then statistics so that they are up-to-date */ if (srv_use_sys_stats_table && !((ib_table->flags >> DICT_TF2_SHIFT) & DICT_TF2_TEMPORARY) - && thd_sql_command(user_thd) == SQLCOM_ANALYZE) { + && called_from_analyze) { /* If the indexes on the table don't have enough rows in SYS_STATS system table, */ /* they need to be created. */ dict_index_t* index; @@ -7987,7 +8016,8 @@ ha_innobase::info_low( prebuilt->trx->op_info = "updating table statistics"; dict_update_statistics(ib_table, - (thd_sql_command(user_thd) == SQLCOM_ANALYZE)?TRUE:FALSE); + FALSE /* update even if stats + are initialized */, called_from_analyze); prebuilt->trx->op_info = "returning various info to MySQL"; } @@ -8006,6 +8036,9 @@ ha_innobase::info_low( } if (flag & HA_STATUS_VARIABLE) { + + dict_table_stats_lock(ib_table, RW_S_LATCH); + n_rows = ib_table->stat_n_rows; /* Because we do not protect stat_n_rows by any mutex in a @@ -8055,12 +8088,14 @@ ha_innobase::info_low( ib_table->stat_sum_of_other_index_sizes) * UNIV_PAGE_SIZE; + dict_table_stats_unlock(ib_table, RW_S_LATCH); + /* Since fsp_get_available_space_in_free_extents() is acquiring latches inside InnoDB, we do not call it if we are asked by MySQL to avoid locking. Another reason to avoid the call is that it uses quite a lot of CPU. See Bug#38185. */ - if (flag & HA_STATUS_NO_LOCK) { + if (flag & HA_STATUS_NO_LOCK || !srv_stats_update_need_lock) { /* We do not update delete_length if no locking is requested so the "old" value can remain. delete_length is initialized to 0 in @@ -8070,21 +8105,13 @@ ha_innobase::info_low( /* Avoid accessing the tablespace if innodb_crash_recovery is set to a high value. */ stats.delete_length = 0; - } else if (srv_stats_update_need_lock) { - - /* lock the data dictionary to avoid races with - ibd_file_missing and tablespace_discarded */ - row_mysql_lock_data_dictionary(prebuilt->trx); - - /* ib_table->space must be an existent tablespace */ - if (!ib_table->ibd_file_missing - && !ib_table->tablespace_discarded) { + } else { + ullint avail_space; - stats.delete_length = - fsp_get_available_space_in_free_extents( - ib_table->space) * 1024; - } else { + avail_space = fsp_get_available_space_in_free_extents( + ib_table->space); + if (avail_space == ULLINT_UNDEFINED) { THD* thd; thd = ha_thd(); @@ -8101,9 +8128,9 @@ ha_innobase::info_low( ib_table->name); stats.delete_length = 0; + } else { + stats.delete_length = avail_space * 1024; } - - row_mysql_unlock_data_dictionary(prebuilt->trx); } stats.check_time = 0; @@ -8132,6 +8159,8 @@ ha_innobase::info_low( table->s->keys); } + dict_table_stats_lock(ib_table, RW_S_LATCH); + for (i = 0; i < table->s->keys; i++) { ulong j; /* We could get index quickly through internal @@ -8169,8 +8198,6 @@ ha_innobase::info_low( break; } - dict_index_stat_mutex_enter(index); - if (index->stat_n_diff_key_vals[j + 1] == 0) { rec_per_key = stats.records; @@ -8179,8 +8206,6 @@ ha_innobase::info_low( index->stat_n_diff_key_vals[j + 1]); } - dict_index_stat_mutex_exit(index); - /* Since MySQL seems to favor table scans too much over index searches, we pretend index selectivity is 2 times better than @@ -8197,6 +8222,8 @@ ha_innobase::info_low( (ulong) rec_per_key; } } + + dict_table_stats_unlock(ib_table, RW_S_LATCH); } if (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE) { @@ -11176,6 +11203,11 @@ static MYSQL_SYSVAR_ULONG(page_size, innobase_page_size, "###EXPERIMENTAL###: The universal page size of the database. Changing for created database is not supported. Use on your own risk!", NULL, NULL, (1 << 14), (1 << 12), (1 << UNIV_PAGE_SIZE_SHIFT_MAX), 0); +static MYSQL_SYSVAR_ULONG(log_block_size, innobase_log_block_size, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "###EXPERIMENTAL###: The log block size of the transaction log file. Changing for created log file is not supported. Use on your own risk!", + NULL, NULL, (1 << 9)/*512*/, (1 << 9)/*512*/, (1 << UNIV_PAGE_SIZE_SHIFT_MAX), 0); + static MYSQL_SYSVAR_STR(data_home_dir, innobase_data_home_dir, PLUGIN_VAR_READONLY, "The common part for InnoDB table spaces.", @@ -11394,6 +11426,16 @@ static MYSQL_SYSVAR_LONGLONG(buffer_pool_size, innobase_buffer_pool_size, "The size of the memory buffer InnoDB uses to cache data and indexes of its tables.", NULL, NULL, 128*1024*1024L, 32*1024*1024L, LONGLONG_MAX, 1024*1024L); +static MYSQL_SYSVAR_UINT(buffer_pool_shm_key, srv_buffer_pool_shm_key, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "[experimental] The key value of shared memory segment for the buffer pool. 0 (default) disables the feature.", + NULL, NULL, 0, 0, INT_MAX32, 0); + +static MYSQL_SYSVAR_BOOL(buffer_pool_shm_checksum, innobase_buffer_pool_shm_checksum, + PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY, + "Enable buffer_pool_shm checksum validation (enabled by default).", + NULL, NULL, TRUE); + static MYSQL_SYSVAR_ULONG(commit_concurrency, innobase_commit_concurrency, PLUGIN_VAR_RQCMDARG, "Helps in performance tuning in heavily concurrent environments.", @@ -11604,17 +11646,19 @@ innodb_adaptive_checkpoint_update( void* var_ptr, const void* save) { - *(long *)var_ptr= (*(long *)save) % 3; + *(long *)var_ptr= (*(long *)save) % 4; } const char *adaptive_checkpoint_names[]= { "none", /* 0 */ "reflex", /* 1 */ "estimate", /* 2 */ + "keep_average", /* 3 */ /* For compatibility of the older patch */ - "0", /* 3 ("none" + 3) */ - "1", /* 4 ("reflex" + 3) */ - "2", /* 5 ("estimate" + 3) */ + "0", /* 4 ("none" + 3) */ + "1", /* 5 ("reflex" + 3) */ + "2", /* 6 ("estimate" + 3) */ + "3", /* 7 ("keep_average" + 4) */ NullS }; TYPELIB adaptive_checkpoint_typelib= @@ -11624,7 +11668,7 @@ TYPELIB adaptive_checkpoint_typelib= }; static MYSQL_SYSVAR_ENUM(adaptive_checkpoint, srv_adaptive_checkpoint, PLUGIN_VAR_RQCMDARG, - "Enable/Disable flushing along modified age. (none, reflex, [estimate])", + "Enable/Disable flushing along modified age. (none, reflex, [estimate], keep_average)", NULL, innodb_adaptive_checkpoint_update, 2, &adaptive_checkpoint_typelib); static MYSQL_SYSVAR_ULONG(enable_unsafe_group_commit, srv_enable_unsafe_group_commit, @@ -11663,9 +11707,12 @@ static MYSQL_SYSVAR_ULINT(pass_corrupt_table, srv_pass_corrupt_table, static struct st_mysql_sys_var* innobase_system_variables[]= { MYSQL_SYSVAR(page_size), + MYSQL_SYSVAR(log_block_size), MYSQL_SYSVAR(additional_mem_pool_size), MYSQL_SYSVAR(autoextend_increment), MYSQL_SYSVAR(buffer_pool_size), + MYSQL_SYSVAR(buffer_pool_shm_key), + MYSQL_SYSVAR(buffer_pool_shm_checksum), MYSQL_SYSVAR(checksums), MYSQL_SYSVAR(fast_checksum), MYSQL_SYSVAR(commit_concurrency), diff --git a/storage/xtradb/handler/handler0alter.cc b/storage/xtradb/handler/handler0alter.cc index 3a32ed9cf36..c54b4a17fd2 100644 --- a/storage/xtradb/handler/handler0alter.cc +++ b/storage/xtradb/handler/handler0alter.cc @@ -1012,12 +1012,13 @@ ha_innobase::prepare_drop_index( index->to_be_dropped = TRUE; } - /* If FOREIGN_KEY_CHECK = 1 you may not drop an index defined + /* If FOREIGN_KEY_CHECKS = 1 you may not drop an index defined for a foreign key constraint because InnoDB requires that both - tables contain indexes for the constraint. Note that CREATE - INDEX id ON table does a CREATE INDEX and DROP INDEX, and we - can ignore here foreign keys because a new index for the - foreign key has already been created. + tables contain indexes for the constraint. Such index can + be dropped only if FOREIGN_KEY_CHECKS is set to 0. + Note that CREATE INDEX id ON table does a CREATE INDEX and + DROP INDEX, and we can ignore here foreign keys because a + new index for the foreign key has already been created. We check for the foreign key constraints after marking the candidate indexes for deletion, because when we check for an diff --git a/storage/xtradb/handler/innodb_patch_info.h b/storage/xtradb/handler/innodb_patch_info.h index 38b97411340..e68f12d0fec 100644 --- a/storage/xtradb/handler/innodb_patch_info.h +++ b/storage/xtradb/handler/innodb_patch_info.h @@ -47,5 +47,6 @@ struct innodb_enhancement { {"innodb_fast_checksum","Using the checksum on 32bit-unit calculation","incompatible for unpatched ver.","http://www.percona.com/docs/wiki/percona-xtradb"}, {"innodb_files_extend","allow >4GB transaction log files, and can vary universal page size of datafiles","incompatible for unpatched ver.","http://www.percona.com/docs/wiki/percona-xtradb"}, {"innodb_sys_tables_sys_indexes","Expose InnoDB SYS_TABLES and SYS_INDEXES schema tables","","http://www.percona.com/docs/wiki/percona-xtradb"}, +{"innodb_buffer_pool_shm","Put buffer pool contents to shared memory segment and reuse it at clean restart [experimental]","","http://www.percona.com/docs/wiki/percona-xtradb"}, {NULL, NULL, NULL, NULL} }; diff --git a/storage/xtradb/include/btr0btr.h b/storage/xtradb/include/btr0btr.h index 5e6a76c7d21..dde3a0bab69 100644 --- a/storage/xtradb/include/btr0btr.h +++ b/storage/xtradb/include/btr0btr.h @@ -94,26 +94,35 @@ btr_root_get( Gets a buffer page and declares its latching order level. */ UNIV_INLINE buf_block_t* -btr_block_get( -/*==========*/ - ulint space, /*!< in: space id */ - ulint zip_size, /*!< in: compressed page size in bytes - or 0 for uncompressed pages */ - ulint page_no, /*!< in: page number */ - ulint mode, /*!< in: latch mode */ - mtr_t* mtr); /*!< in: mtr */ -/**************************************************************//** -Gets a buffer page and declares its latching order level. */ -UNIV_INLINE -page_t* -btr_page_get( -/*=========*/ - ulint space, /*!< in: space id */ - ulint zip_size, /*!< in: compressed page size in bytes - or 0 for uncompressed pages */ - ulint page_no, /*!< in: page number */ - ulint mode, /*!< in: latch mode */ - mtr_t* mtr); /*!< in: mtr */ +btr_block_get_func( +/*===============*/ + ulint space, /*!< in: space id */ + ulint zip_size, /*!< in: compressed page size in bytes + or 0 for uncompressed pages */ + ulint page_no, /*!< in: page number */ + ulint mode, /*!< in: latch mode */ + const char* file, /*!< in: file name */ + ulint line, /*!< in: line where called */ + mtr_t* mtr) /*!< in/out: mtr */ + __attribute__((nonnull)); +/** Gets a buffer page and declares its latching order level. +@param space tablespace identifier +@param zip_size compressed page size in bytes or 0 for uncompressed pages +@param page_no page number +@param mode latch mode +@param mtr mini-transaction handle +@return the block descriptor */ +# define btr_block_get(space,zip_size,page_no,mode,mtr) \ + btr_block_get_func(space,zip_size,page_no,mode,__FILE__,__LINE__,mtr) +/** Gets a buffer page and declares its latching order level. +@param space tablespace identifier +@param zip_size compressed page size in bytes or 0 for uncompressed pages +@param page_no page number +@param mode latch mode +@param mtr mini-transaction handle +@return the uncompressed page frame */ +# define btr_page_get(space,zip_size,page_no,mode,mtr) \ + buf_block_get_frame(btr_block_get(space,zip_size,page_no,mode,mtr)) #endif /* !UNIV_HOTBACKUP */ /**************************************************************//** Gets the index id field of a page. diff --git a/storage/xtradb/include/btr0btr.ic b/storage/xtradb/include/btr0btr.ic index c9c38f3c3b3..e19c863300a 100644 --- a/storage/xtradb/include/btr0btr.ic +++ b/storage/xtradb/include/btr0btr.ic @@ -39,18 +39,21 @@ Created 6/2/1994 Heikki Tuuri Gets a buffer page and declares its latching order level. */ UNIV_INLINE buf_block_t* -btr_block_get( -/*==========*/ - ulint space, /*!< in: space id */ - ulint zip_size, /*!< in: compressed page size in bytes - or 0 for uncompressed pages */ - ulint page_no, /*!< in: page number */ - ulint mode, /*!< in: latch mode */ - mtr_t* mtr) /*!< in: mtr */ +btr_block_get_func( +/*===============*/ + ulint space, /*!< in: space id */ + ulint zip_size, /*!< in: compressed page size in bytes + or 0 for uncompressed pages */ + ulint page_no, /*!< in: page number */ + ulint mode, /*!< in: latch mode */ + const char* file, /*!< in: file name */ + ulint line, /*!< in: line where called */ + mtr_t* mtr) /*!< in/out: mtr */ { buf_block_t* block; - block = buf_page_get(space, zip_size, page_no, mode, mtr); + block = buf_page_get_gen(space, zip_size, page_no, mode, + NULL, BUF_GET, file, line, mtr); ut_a(srv_pass_corrupt_table || block); @@ -63,23 +66,6 @@ btr_block_get( } /**************************************************************//** -Gets a buffer page and declares its latching order level. */ -UNIV_INLINE -page_t* -btr_page_get( -/*=========*/ - ulint space, /*!< in: space id */ - ulint zip_size, /*!< in: compressed page size in bytes - or 0 for uncompressed pages */ - ulint page_no, /*!< in: page number */ - ulint mode, /*!< in: latch mode */ - mtr_t* mtr) /*!< in: mtr */ -{ - return(buf_block_get_frame(btr_block_get(space, zip_size, page_no, - mode, mtr))); -} - -/**************************************************************//** Sets the index id field of a page. */ UNIV_INLINE void diff --git a/storage/xtradb/include/btr0cur.h b/storage/xtradb/include/btr0cur.h index 7f6bff11f84..1e1dc0f580a 100644 --- a/storage/xtradb/include/btr0cur.h +++ b/storage/xtradb/include/btr0cur.h @@ -520,7 +520,7 @@ Stores the fields in big_rec_vec to the tablespace and puts pointers to them in rec. The extern flags in rec will have to be set beforehand. The fields are stored on pages allocated from leaf node file segment of the index tree. -@return DB_SUCCESS or error */ +@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */ UNIV_INTERN ulint btr_store_big_rec_extern_fields( diff --git a/storage/xtradb/include/buf0buf.h b/storage/xtradb/include/buf0buf.h index d6cc63bb6af..e06927f42f0 100644 --- a/storage/xtradb/include/buf0buf.h +++ b/storage/xtradb/include/buf0buf.h @@ -36,6 +36,7 @@ Created 11/5/1995 Heikki Tuuri #include "ut0rbt.h" #ifndef UNIV_HOTBACKUP #include "os0proc.h" +#include "srv0srv.h" /** @name Modes for buf_page_get_gen */ /* @{ */ @@ -1301,7 +1302,10 @@ struct buf_block_struct{ /**********************************************************************//** Compute the hash fold value for blocks in buf_pool->zip_hash. */ /* @{ */ -#define BUF_POOL_ZIP_FOLD_PTR(ptr) ((ulint) (ptr) / UNIV_PAGE_SIZE) +/* the fold should be relative when srv_buffer_pool_shm_key is enabled */ +#define BUF_POOL_ZIP_FOLD_PTR(ptr) (!srv_buffer_pool_shm_key\ + ?((ulint) (ptr) / UNIV_PAGE_SIZE)\ + :((ulint) ((byte*)ptr - (byte*)(buf_pool->chunks->blocks->frame)) / UNIV_PAGE_SIZE)) #define BUF_POOL_ZIP_FOLD(b) BUF_POOL_ZIP_FOLD_PTR((b)->frame) #define BUF_POOL_ZIP_FOLD_BPAGE(b) BUF_POOL_ZIP_FOLD((buf_block_t*) (b)) /* @} */ diff --git a/storage/xtradb/include/dict0dict.h b/storage/xtradb/include/dict0dict.h index d18b3ecb1b0..7baacdd6055 100644 --- a/storage/xtradb/include/dict0dict.h +++ b/storage/xtradb/include/dict0dict.h @@ -318,7 +318,8 @@ void dict_table_replace_index_in_foreign_list( /*=====================================*/ dict_table_t* table, /*!< in/out: table */ - dict_index_t* index); /*!< in: index to be replaced */ + dict_index_t* index, /*!< in: index to be replaced */ + const trx_t* trx); /*!< in: transaction handle */ /*********************************************************************//** Checks if a index is defined for a foreign key constraint. Index is a part of a foreign key constraint if the index is referenced by foreign key @@ -1053,20 +1054,13 @@ Calculates new estimates for table and index statistics. The statistics are used in query optimization. */ UNIV_INTERN void -dict_update_statistics_low( -/*=======================*/ - dict_table_t* table, /*!< in/out: table */ - ibool has_dict_mutex, /*!< in: TRUE if the caller has the - dictionary mutex */ - ibool sync); -/*********************************************************************//** -Calculates new estimates for table and index statistics. The statistics -are used in query optimization. */ -UNIV_INTERN -void dict_update_statistics( /*===================*/ - dict_table_t* table, /*!< in/out: table */ + dict_table_t* table, /*!< in/out: table */ + ibool only_calc_if_missing_stats, /*!< in: only + update/recalc the stats if they have + not been initialized yet, otherwise + do nothing */ ibool sync); /********************************************************************//** Reserves the dictionary system mutex for MySQL. */ @@ -1081,21 +1075,25 @@ void dict_mutex_exit_for_mysql(void); /*===========================*/ /**********************************************************************//** -Lock the appropriate mutex to protect index->stat_n_diff_key_vals[]. -index->id is used to pick the right mutex and it should not change -before dict_index_stat_mutex_exit() is called on this index. */ +Lock the appropriate latch to protect a given table's statistics. +table->id is used to pick the corresponding latch from a global array of +latches. */ UNIV_INTERN void -dict_index_stat_mutex_enter( -/*========================*/ - const dict_index_t* index); /*!< in: index */ +dict_table_stats_lock( +/*==================*/ + const dict_table_t* table, /*!< in: table */ + ulint latch_mode); /*!< in: RW_S_LATCH or + RW_X_LATCH */ /**********************************************************************//** -Unlock the appropriate mutex that protects index->stat_n_diff_key_vals[]. */ +Unlock the latch that has been locked by dict_table_stats_lock() */ UNIV_INTERN void -dict_index_stat_mutex_exit( -/*=======================*/ - const dict_index_t* index); /*!< in: index */ +dict_table_stats_unlock( +/*====================*/ + const dict_table_t* table, /*!< in: table */ + ulint latch_mode); /*!< in: RW_S_LATCH or + RW_X_LATCH */ /********************************************************************//** Checks if the database name in two table names is the same. @return TRUE if same db name */ diff --git a/storage/xtradb/include/fil0fil.h b/storage/xtradb/include/fil0fil.h index 07c80ef8609..a262ec8f9cc 100644 --- a/storage/xtradb/include/fil0fil.h +++ b/storage/xtradb/include/fil0fil.h @@ -737,6 +737,15 @@ fil_page_get_type( /*==============*/ const byte* page); /*!< in: file page */ +/*******************************************************************//** +Returns TRUE if a single-table tablespace is being deleted. +@return TRUE if being deleted */ +UNIV_INTERN +ibool +fil_tablespace_is_being_deleted( +/*============================*/ + ulint id); /*!< in: space id */ + /************************************************************************* Return local hash table informations. */ diff --git a/storage/xtradb/include/hash0hash.h b/storage/xtradb/include/hash0hash.h index b17c21a45ef..492c767acc4 100644 --- a/storage/xtradb/include/hash0hash.h +++ b/storage/xtradb/include/hash0hash.h @@ -49,6 +49,28 @@ hash_table_t* hash_create( /*========*/ ulint n); /*!< in: number of array cells */ + +/*************************************************************//** +*/ +UNIV_INTERN +ulint +hash_create_needed( +/*===============*/ + ulint n); + +UNIV_INTERN +void +hash_create_init( +/*=============*/ + hash_table_t* table, + ulint n); + +UNIV_INTERN +void +hash_create_reuse( +/*==============*/ + hash_table_t* table); + #ifndef UNIV_HOTBACKUP /*************************************************************//** Creates a mutex array to protect a hash table. */ @@ -328,6 +350,33 @@ do {\ }\ } while (0) +/********************************************************************//** +Align nodes with moving location.*/ +#define HASH_OFFSET(TABLE, NODE_TYPE, PTR_NAME, FADDR, FOFFSET, BOFFSET) \ +do {\ + ulint i2222;\ + ulint cell_count2222;\ +\ + cell_count2222 = hash_get_n_cells(TABLE);\ +\ + for (i2222 = 0; i2222 < cell_count2222; i2222++) {\ + NODE_TYPE* node2222;\ +\ + if ((TABLE)->array[i2222].node) \ + (TABLE)->array[i2222].node = (void*)((byte*)(TABLE)->array[i2222].node \ + + (((TABLE)->array[i2222].node > (void*)FADDR)?FOFFSET:BOFFSET));\ + node2222 = HASH_GET_FIRST((TABLE), i2222);\ +\ + while (node2222) {\ + if (node2222->PTR_NAME) \ + node2222->PTR_NAME = (void*)((byte*)(node2222->PTR_NAME) \ + + ((((void*)node2222->PTR_NAME) > (void*)FADDR)?FOFFSET:BOFFSET));\ +\ + node2222 = node2222->PTR_NAME;\ + }\ + }\ +} while (0) + /************************************************************//** Gets the mutex index for a fold value in a hash table. @return mutex number */ diff --git a/storage/xtradb/include/log0log.h b/storage/xtradb/include/log0log.h index 8fce4ef96bc..2b4b34f2600 100644 --- a/storage/xtradb/include/log0log.h +++ b/storage/xtradb/include/log0log.h @@ -672,6 +672,9 @@ extern log_t* log_sys; when mysqld is first time started on the restored database, it can print helpful info for the user */ +#define LOG_FILE_OS_FILE_LOG_BLOCK_SIZE 64 + /* extend to record log_block_size + of XtraDB. 0 means default 512 */ #define LOG_FILE_ARCH_COMPLETED OS_FILE_LOG_BLOCK_SIZE /* this 4-byte field is TRUE when the writing of an archived log file diff --git a/storage/xtradb/include/os0file.h b/storage/xtradb/include/os0file.h index eeab8a2b5d9..cbbec2cf55e 100644 --- a/storage/xtradb/include/os0file.h +++ b/storage/xtradb/include/os0file.h @@ -107,7 +107,7 @@ whole block gets written. This should be true even in most cases of a crash: if this fails for a log block, then it is equivalent to a media failure in the log. */ -#define OS_FILE_LOG_BLOCK_SIZE 512 +#define OS_FILE_LOG_BLOCK_SIZE srv_log_block_size /** Options for file_create @{ */ #define OS_FILE_OPEN 51 @@ -188,6 +188,8 @@ extern ulint os_n_file_reads; extern ulint os_n_file_writes; extern ulint os_n_fsyncs; +extern ulint srv_log_block_size; + /* File types for directory entry data type */ enum os_file_type_enum{ diff --git a/storage/xtradb/include/os0proc.h b/storage/xtradb/include/os0proc.h index fd46bd7db87..582cef6f803 100644 --- a/storage/xtradb/include/os0proc.h +++ b/storage/xtradb/include/os0proc.h @@ -32,6 +32,11 @@ Created 9/30/1995 Heikki Tuuri #ifdef UNIV_LINUX #include <sys/ipc.h> #include <sys/shm.h> +#else +# if defined HAVE_SYS_IPC_H && HAVE_SYS_SHM_H +#include <sys/ipc.h> +#include <sys/shm.h> +# endif #endif typedef void* os_process_t; @@ -70,6 +75,29 @@ os_mem_free_large( ulint size); /*!< in: size returned by os_mem_alloc_large() */ + +/****************************************************************//** +Allocates or attaches and reuses shared memory segment. +The content is not cleared automatically. +@return allocated memory */ +UNIV_INTERN +void* +os_shm_alloc( +/*=========*/ + ulint* n, /*!< in/out: number of bytes */ + uint key, + ibool* is_new); + +/****************************************************************//** +Detach shared memory segment. */ +UNIV_INTERN +void +os_shm_free( +/*========*/ + void *ptr, /*!< in: pointer returned by + os_shm_alloc() */ + ulint size); /*!< in: size returned by + os_shm_alloc() */ #ifndef UNIV_NONINL #include "os0proc.ic" #endif diff --git a/storage/xtradb/include/os0sync.h b/storage/xtradb/include/os0sync.h index 2cf9ea0ec84..7366e2c3402 100644 --- a/storage/xtradb/include/os0sync.h +++ b/storage/xtradb/include/os0sync.h @@ -196,7 +196,7 @@ ulint os_event_wait_time( /*===============*/ os_event_t event, /*!< in: event to wait */ - ulint time); /*!< in: timeout in microseconds, or + ulint wtime); /*!< in: timeout in microseconds, or OS_SYNC_INFINITE_TIME */ #ifdef __WIN__ /**********************************************************//** diff --git a/storage/xtradb/include/row0ins.h b/storage/xtradb/include/row0ins.h index 9f93565ddb7..810973e61a7 100644 --- a/storage/xtradb/include/row0ins.h +++ b/storage/xtradb/include/row0ins.h @@ -84,9 +84,10 @@ ulint row_ins_index_entry( /*================*/ dict_index_t* index, /*!< in: index */ - dtuple_t* entry, /*!< in: index entry to insert */ + dtuple_t* entry, /*!< in/out: index entry to insert */ ulint n_ext, /*!< in: number of externally stored columns */ - ibool foreign,/*!< in: TRUE=check foreign key constraints */ + ibool foreign,/*!< in: TRUE=check foreign key constraints + (foreign=FALSE only during CREATE INDEX) */ que_thr_t* thr); /*!< in: query thread */ /***********************************************************//** Inserts a row to a table. This is a high-level function used in diff --git a/storage/xtradb/include/row0upd.h b/storage/xtradb/include/row0upd.h index 4e2de9bd2ec..ea14cd64213 100644 --- a/storage/xtradb/include/row0upd.h +++ b/storage/xtradb/include/row0upd.h @@ -126,8 +126,8 @@ UNIV_INTERN void row_upd_index_entry_sys_field( /*==========================*/ - const dtuple_t* entry, /*!< in: index entry, where the memory buffers - for sys fields are already allocated: + dtuple_t* entry, /*!< in/out: index entry, where the memory + buffers for sys fields are already allocated: the function just copies the new values to them */ dict_index_t* index, /*!< in: clustered index */ diff --git a/storage/xtradb/include/srv0srv.h b/storage/xtradb/include/srv0srv.h index 8148dffd13b..f4c9704741c 100644 --- a/storage/xtradb/include/srv0srv.h +++ b/storage/xtradb/include/srv0srv.h @@ -159,6 +159,10 @@ extern ulint srv_buf_pool_curr_size; /*!< current size in bytes */ extern ulint srv_mem_pool_size; extern ulint srv_lock_table_size; +extern uint srv_buffer_pool_shm_key; +extern ibool srv_buffer_pool_shm_is_reused; +extern ibool srv_buffer_pool_shm_checksum; + extern ibool srv_thread_concurrency_timer_based; extern ulint srv_n_file_io_threads; diff --git a/storage/xtradb/include/srv0start.h b/storage/xtradb/include/srv0start.h index 8abf15da9c1..7eb7e96bd50 100644 --- a/storage/xtradb/include/srv0start.h +++ b/storage/xtradb/include/srv0start.h @@ -131,4 +131,7 @@ extern enum srv_shutdown_state srv_shutdown_state; /** Log 'spaces' have id's >= this */ #define SRV_LOG_SPACE_FIRST_ID 0xFFFFFFF0UL +/** reserved for extra system tables */ +#define SRV_EXTRA_SYS_SPACE_FIRST_ID 0xFFFFFFE0UL + #endif diff --git a/storage/xtradb/include/trx0sys.h b/storage/xtradb/include/trx0sys.h index 9ef9485b611..2637189f37e 100644 --- a/storage/xtradb/include/trx0sys.h +++ b/storage/xtradb/include/trx0sys.h @@ -470,7 +470,7 @@ trx_sys_file_format_id_to_name( /* Space id and page no where the trx system file copy resides */ #define TRX_SYS_SPACE 0 /* the SYSTEM tablespace */ -#define TRX_DOUBLEWRITE_SPACE 1 /* the doublewrite buffer tablespace if used */ +#define TRX_DOUBLEWRITE_SPACE 0xFFFFFFE0UL /* the doublewrite buffer tablespace if used */ #define TRX_SYS_SPACE_MAX 9 /* reserved max space id for system tablespaces */ #include "fsp0fsp.h" #define TRX_SYS_PAGE_NO FSP_TRX_SYS_PAGE_NO diff --git a/storage/xtradb/include/trx0sys.ic b/storage/xtradb/include/trx0sys.ic index c7b09d4aec2..5e0f07c8b9d 100644 --- a/storage/xtradb/include/trx0sys.ic +++ b/storage/xtradb/include/trx0sys.ic @@ -81,7 +81,7 @@ trx_sys_sys_space( { if (srv_doublewrite_file) { /* several spaces are reserved */ - return((ibool)(space <= TRX_SYS_SPACE_MAX)); + return((ibool)(space == TRX_SYS_SPACE || space == TRX_DOUBLEWRITE_SPACE)); } else { return((ibool)(space == TRX_SYS_SPACE)); } diff --git a/storage/xtradb/include/univ.i b/storage/xtradb/include/univ.i index 487c291ef80..918ee221094 100644 --- a/storage/xtradb/include/univ.i +++ b/storage/xtradb/include/univ.i @@ -46,8 +46,8 @@ Created 1/20/1994 Heikki Tuuri #define INNODB_VERSION_MAJOR 1 #define INNODB_VERSION_MINOR 0 -#define INNODB_VERSION_BUGFIX 13 -#define PERCONA_INNODB_VERSION 11.6 +#define INNODB_VERSION_BUGFIX 14 +#define PERCONA_INNODB_VERSION 12.5 /* The following is the InnoDB version as shown in SELECT plugin_version FROM information_schema.plugins; @@ -369,6 +369,9 @@ typedef unsigned long long int ullint; /* Maximum value for ib_uint64_t */ #define IB_ULONGLONG_MAX ((ib_uint64_t) (~0ULL)) +/* The 'undefined' value for ullint */ +#define ULLINT_UNDEFINED ((ullint)(-1)) + /* This 'ibool' type is used within Innobase. Remember that different included headers may define 'bool' differently. Do not assume that 'bool' is a ulint! */ #define ibool ulint diff --git a/storage/xtradb/include/ut0lst.h b/storage/xtradb/include/ut0lst.h index 7b15c052978..245dfc226c3 100644 --- a/storage/xtradb/include/ut0lst.h +++ b/storage/xtradb/include/ut0lst.h @@ -257,5 +257,48 @@ do { \ ut_a(ut_list_node_313 == NULL); \ } while (0) +/********************************************************************//** +Align nodes with moving location. +@param NAME the name of the list +@param TYPE node type +@param BASE base node (not a pointer to it) +@param OFFSET offset moved */ +#define UT_LIST_OFFSET(NAME, TYPE, BASE, FADDR, FOFFSET, BOFFSET) \ +do { \ + ulint ut_list_i_313; \ + TYPE* ut_list_node_313; \ + \ + if ((BASE).start) \ + (BASE).start = (void*)((byte*)((BASE).start) \ + + (((void*)((BASE).start) > (void*)FADDR)?FOFFSET:BOFFSET));\ + if ((BASE).end) \ + (BASE).end = (void*)((byte*)((BASE).end) \ + + (((void*)((BASE).end) > (void*)FADDR)?FOFFSET:BOFFSET));\ + \ + ut_list_node_313 = (BASE).start; \ + \ + for (ut_list_i_313 = (BASE).count; ut_list_i_313--; ) { \ + ut_a(ut_list_node_313); \ + if ((ut_list_node_313->NAME).prev) \ + (ut_list_node_313->NAME).prev = (void*)((byte*)((ut_list_node_313->NAME).prev)\ + + (((void*)((ut_list_node_313->NAME).prev) > (void*)FADDR)?FOFFSET:BOFFSET));\ + if ((ut_list_node_313->NAME).next) \ + (ut_list_node_313->NAME).next = (void*)((byte*)((ut_list_node_313->NAME).next)\ + + (((void*)((ut_list_node_313->NAME).next)> (void*)FADDR)?FOFFSET:BOFFSET));\ + ut_list_node_313 = (ut_list_node_313->NAME).next; \ + } \ + \ + ut_a(ut_list_node_313 == NULL); \ + \ + ut_list_node_313 = (BASE).end; \ + \ + for (ut_list_i_313 = (BASE).count; ut_list_i_313--; ) { \ + ut_a(ut_list_node_313); \ + ut_list_node_313 = (ut_list_node_313->NAME).prev; \ + } \ + \ + ut_a(ut_list_node_313 == NULL); \ +} while (0) + #endif diff --git a/storage/xtradb/log/log0log.c b/storage/xtradb/log/log0log.c index 3f14d84ac72..c39f60bd4b9 100644 --- a/storage/xtradb/log/log0log.c +++ b/storage/xtradb/log/log0log.c @@ -1184,6 +1184,9 @@ log_group_file_header_flush( /* Wipe over possible label of ibbackup --restore */ memcpy(buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP, " ", 4); + mach_write_to_4(buf + LOG_FILE_OS_FILE_LOG_BLOCK_SIZE, + srv_log_block_size); + dest_offset = nth_file * group->file_size; #ifdef UNIV_DEBUG @@ -1777,9 +1780,7 @@ log_group_checkpoint( ulint i; ut_ad(mutex_own(&(log_sys->mutex))); -#if LOG_CHECKPOINT_SIZE > OS_FILE_LOG_BLOCK_SIZE -# error "LOG_CHECKPOINT_SIZE > OS_FILE_LOG_BLOCK_SIZE" -#endif + ut_a(LOG_CHECKPOINT_SIZE <= OS_FILE_LOG_BLOCK_SIZE); buf = group->checkpoint_buf; diff --git a/storage/xtradb/log/log0recv.c b/storage/xtradb/log/log0recv.c index 2aaffe4c031..5702f47ad5f 100644 --- a/storage/xtradb/log/log0recv.c +++ b/storage/xtradb/log/log0recv.c @@ -2899,6 +2899,7 @@ recv_init_crash_recovery(void) /*==========================*/ { ut_a(!recv_needed_recovery); + ut_a(!srv_buffer_pool_shm_is_reused); recv_needed_recovery = TRUE; @@ -2956,6 +2957,7 @@ recv_recovery_from_checkpoint_start_func( log_group_t* max_cp_group; log_group_t* up_to_date_group; ulint max_cp_field; + ulint log_hdr_log_block_size; ib_uint64_t checkpoint_lsn; ib_uint64_t checkpoint_no; ib_uint64_t old_scanned_lsn; @@ -3057,6 +3059,20 @@ recv_recovery_from_checkpoint_start_func( log_hdr_buf, max_cp_group); } + log_hdr_log_block_size + = mach_read_from_4(log_hdr_buf + LOG_FILE_OS_FILE_LOG_BLOCK_SIZE); + if (log_hdr_log_block_size == 0) { + /* 0 means default value */ + log_hdr_log_block_size = 512; + } + if (log_hdr_log_block_size != srv_log_block_size) { + fprintf(stderr, + "InnoDB: Error: The block size of ib_logfile (%lu) " + "is not equal to innodb_log_block_size.\n", + log_hdr_log_block_size); + return(DB_ERROR); + } + #ifdef UNIV_LOG_ARCHIVE group = UT_LIST_GET_FIRST(log_sys->log_groups); diff --git a/storage/xtradb/os/os0proc.c b/storage/xtradb/os/os0proc.c index 48922886f23..4567d96b6f4 100644 --- a/storage/xtradb/os/os0proc.c +++ b/storage/xtradb/os/os0proc.c @@ -229,3 +229,173 @@ os_mem_free_large( } #endif } + +/****************************************************************//** +Allocates or attaches and reuses shared memory segment. +The content is not cleared automatically. +@return allocated memory */ +UNIV_INTERN +void* +os_shm_alloc( +/*=========*/ + ulint* n, /*!< in/out: number of bytes */ + uint key, + ibool* is_new) +{ + void* ptr; +#if defined HAVE_SYS_IPC_H && HAVE_SYS_SHM_H + ulint size; + int shmid; + + *is_new = FALSE; + fprintf(stderr, + "InnoDB: The shared memory segment containing the buffer pool is: key %#x (%d).\n", + key, key); +# if defined HAVE_LARGE_PAGES && defined UNIV_LINUX + if (!os_use_large_pages || !os_large_page_size) { + goto skip; + } + + /* Align block size to os_large_page_size */ + ut_ad(ut_is_2pow(os_large_page_size)); + size = ut_2pow_round(*n + (os_large_page_size - 1), + os_large_page_size); + + shmid = shmget((key_t)key, (size_t)size, + IPC_CREAT | IPC_EXCL | SHM_HUGETLB | SHM_R | SHM_W); + if (shmid < 0) { + if (errno == EEXIST) { + fprintf(stderr, + "InnoDB: HugeTLB: The shared memory segment exists.\n"); + shmid = shmget((key_t)key, (size_t)size, + SHM_HUGETLB | SHM_R | SHM_W); + if (shmid < 0) { + fprintf(stderr, + "InnoDB: HugeTLB: Warning: Failed to allocate %lu bytes. (reuse) errno %d\n", + size, errno); + goto skip; + } else { + fprintf(stderr, + "InnoDB: HugeTLB: The existent shared memory segment is used.\n"); + } + } else { + fprintf(stderr, + "InnoDB: HugeTLB: Warning: Failed to allocate %lu bytes. (new) errno %d\n", + size, errno); + goto skip; + } + } else { + *is_new = TRUE; + fprintf(stderr, + "InnoDB: HugeTLB: A new shared memory segment has been created .\n"); + } + + ptr = shmat(shmid, NULL, 0); + if (ptr == (void *)-1) { + fprintf(stderr, + "InnoDB: HugeTLB: Warning: Failed to attach shared memory segment, errno %d\n", + errno); + ptr = NULL; + } + + if (ptr) { + *n = size; + os_fast_mutex_lock(&ut_list_mutex); + ut_total_allocated_memory += size; + os_fast_mutex_unlock(&ut_list_mutex); + UNIV_MEM_ALLOC(ptr, size); + return(ptr); + } +skip: + *is_new = FALSE; +# endif /* HAVE_LARGE_PAGES && defined UNIV_LINUX */ +# ifdef HAVE_GETPAGESIZE + size = getpagesize(); +# else + size = UNIV_PAGE_SIZE; +# endif + /* Align block size to system page size */ + ut_ad(ut_is_2pow(size)); + size = *n = ut_2pow_round(*n + (size - 1), size); + + shmid = shmget((key_t)key, (size_t)size, + IPC_CREAT | IPC_EXCL | SHM_R | SHM_W); + if (shmid < 0) { + if (errno == EEXIST) { + fprintf(stderr, + "InnoDB: A shared memory segment containing the buffer pool seems to already exist.\n"); + shmid = shmget((key_t)key, (size_t)size, + SHM_R | SHM_W); + if (shmid < 0) { + fprintf(stderr, + "InnoDB: Warning: Failed to allocate %lu bytes. (reuse) errno %d\n", + size, errno); + ptr = NULL; + goto end; + } else { + fprintf(stderr, + "InnoDB: The existent shared memory segment is used.\n"); + } + } else { + fprintf(stderr, + "InnoDB: Warning: Failed to allocate %lu bytes. (new) errno %d\n", + size, errno); + ptr = NULL; + goto end; + } + } else { + *is_new = TRUE; + fprintf(stderr, + "InnoDB: A new shared memory segment has been created.\n"); + } + + ptr = shmat(shmid, NULL, 0); + if (ptr == (void *)-1) { + fprintf(stderr, + "InnoDB: Warning: Failed to attach shared memory segment, errno %d\n", + errno); + ptr = NULL; + } + + if (ptr) { + *n = size; + os_fast_mutex_lock(&ut_list_mutex); + ut_total_allocated_memory += size; + os_fast_mutex_unlock(&ut_list_mutex); + UNIV_MEM_ALLOC(ptr, size); + } +end: +#else /* HAVE_SYS_IPC_H && HAVE_SYS_SHM_H */ + fprintf(stderr, "InnoDB: shared memory segment is not supported.\n"); + ptr = NULL; +#endif /* HAVE_SYS_IPC_H && HAVE_SYS_SHM_H */ + return(ptr); +} + +/****************************************************************//** +Detach shared memory segment. */ +UNIV_INTERN +void +os_shm_free( +/*========*/ + void *ptr, /*!< in: pointer returned by + os_shm_alloc() */ + ulint size) /*!< in: size returned by + os_shm_alloc() */ +{ + os_fast_mutex_lock(&ut_list_mutex); + ut_a(ut_total_allocated_memory >= size); + os_fast_mutex_unlock(&ut_list_mutex); + +#if defined HAVE_SYS_IPC_H && HAVE_SYS_SHM_H + if (!shmdt(ptr)) { + os_fast_mutex_lock(&ut_list_mutex); + ut_a(ut_total_allocated_memory >= size); + ut_total_allocated_memory -= size; + os_fast_mutex_unlock(&ut_list_mutex); + UNIV_MEM_FREE(ptr, size); + } +#else /* HAVE_SYS_IPC_H && HAVE_SYS_SHM_H */ + fprintf(stderr, "InnoDB: shared memory segment is not supported.\n"); +#endif /* HAVE_SYS_IPC_H && HAVE_SYS_SHM_H */ +} diff --git a/storage/xtradb/os/os0sync.c b/storage/xtradb/os/os0sync.c index f9ab58c2ee4..dba997927cb 100644 --- a/storage/xtradb/os/os0sync.c +++ b/storage/xtradb/os/os0sync.c @@ -442,12 +442,12 @@ os_event_wait_time( return(1000000); /* dummy value to eliminate compiler warn. */ } #else - int err; - int ret = 0; - ulint tmp; + int err; + int ret = 0; + ulint tmp; ib_int64_t old_count; - struct timeval tv_start; - struct timespec timeout; + struct timeval tv_start; + struct timespec timeout; if (wtime == OS_SYNC_INFINITE_TIME) { os_event_wait(event); diff --git a/storage/xtradb/row/row0ins.c b/storage/xtradb/row/row0ins.c index d4925e46f97..3372e1480b5 100644 --- a/storage/xtradb/row/row0ins.c +++ b/storage/xtradb/row/row0ins.c @@ -1787,7 +1787,7 @@ ulint row_ins_duplicate_error_in_clust( /*=============================*/ btr_cur_t* cursor, /*!< in: B-tree cursor */ - dtuple_t* entry, /*!< in: entry to insert */ + const dtuple_t* entry, /*!< in: entry to insert */ que_thr_t* thr, /*!< in: query thread */ mtr_t* mtr) /*!< in: mtr */ { @@ -1983,7 +1983,7 @@ row_ins_index_entry_low( depending on whether we wish optimistic or pessimistic descent down the index tree */ dict_index_t* index, /*!< in: index */ - dtuple_t* entry, /*!< in: index entry to insert */ + dtuple_t* entry, /*!< in/out: index entry to insert */ ulint n_ext, /*!< in: number of externally stored columns */ que_thr_t* thr) /*!< in: query thread */ { @@ -2164,9 +2164,10 @@ ulint row_ins_index_entry( /*================*/ dict_index_t* index, /*!< in: index */ - dtuple_t* entry, /*!< in: index entry to insert */ + dtuple_t* entry, /*!< in/out: index entry to insert */ ulint n_ext, /*!< in: number of externally stored columns */ - ibool foreign,/*!< in: TRUE=check foreign key constraints */ + ibool foreign,/*!< in: TRUE=check foreign key constraints + (foreign=FALSE only during CREATE INDEX) */ que_thr_t* thr) /*!< in: query thread */ { ulint err; diff --git a/storage/xtradb/row/row0merge.c b/storage/xtradb/row/row0merge.c index 65102851bdf..402a168f5dd 100644 --- a/storage/xtradb/row/row0merge.c +++ b/storage/xtradb/row/row0merge.c @@ -2052,7 +2052,7 @@ row_merge_drop_index( /* Replace this index with another equivalent index for all foreign key constraints on this table where this index is used */ - dict_table_replace_index_in_foreign_list(table, index); + dict_table_replace_index_in_foreign_list(table, index, trx); dict_index_remove_from_cache(table, index); trx->op_info = ""; diff --git a/storage/xtradb/row/row0mysql.c b/storage/xtradb/row/row0mysql.c index e9b272abab4..775ea222544 100644 --- a/storage/xtradb/row/row0mysql.c +++ b/storage/xtradb/row/row0mysql.c @@ -875,7 +875,8 @@ row_update_statistics_if_needed( if (counter > 2000000000 || ((ib_int64_t)counter > 16 + table->stat_n_rows / 16)) { - dict_update_statistics(table, TRUE); + dict_update_statistics(table, FALSE /* update even if stats + are initialized */, TRUE); } } @@ -3014,7 +3015,8 @@ next_rec: dict_table_autoinc_lock(table); dict_table_autoinc_initialize(table, 1); dict_table_autoinc_unlock(table); - dict_update_statistics(table, TRUE); + dict_update_statistics(table, FALSE /* update even if stats are + initialized */, TRUE); trx_commit_for_mysql(trx); diff --git a/storage/xtradb/row/row0sel.c b/storage/xtradb/row/row0sel.c index 82ad30bb390..1ced125b7bd 100644 --- a/storage/xtradb/row/row0sel.c +++ b/storage/xtradb/row/row0sel.c @@ -106,6 +106,18 @@ row_sel_sec_rec_is_for_blob( ulint len; byte buf[DICT_MAX_INDEX_COL_LEN]; + ut_a(clust_len >= BTR_EXTERN_FIELD_REF_SIZE); + + if (UNIV_UNLIKELY + (!memcmp(clust_field + clust_len - BTR_EXTERN_FIELD_REF_SIZE, + field_ref_zero, BTR_EXTERN_FIELD_REF_SIZE))) { + /* The externally stored field was not written yet. + This record should only be seen by + recv_recovery_rollback_active() or any + TRX_ISO_READ_UNCOMMITTED transactions. */ + return(FALSE); + } + len = btr_copy_externally_stored_field_prefix(buf, sizeof buf, zip_size, clust_field, clust_len); diff --git a/storage/xtradb/row/row0upd.c b/storage/xtradb/row/row0upd.c index 444003ba3f0..0dc550b93f5 100644 --- a/storage/xtradb/row/row0upd.c +++ b/storage/xtradb/row/row0upd.c @@ -371,8 +371,8 @@ UNIV_INTERN void row_upd_index_entry_sys_field( /*==========================*/ - const dtuple_t* entry, /*!< in: index entry, where the memory buffers - for sys fields are already allocated: + dtuple_t* entry, /*!< in/out: index entry, where the memory + buffers for sys fields are already allocated: the function just copies the new values to them */ dict_index_t* index, /*!< in: clustered index */ @@ -1587,12 +1587,12 @@ static ulint row_upd_clust_rec_by_insert( /*========================*/ - upd_node_t* node, /*!< in: row update node */ + upd_node_t* node, /*!< in/out: row update node */ dict_index_t* index, /*!< in: clustered index of the record */ que_thr_t* thr, /*!< in: query thread */ ibool check_ref,/*!< in: TRUE if index may be referenced in a foreign key constraint */ - mtr_t* mtr) /*!< in: mtr; gets committed here */ + mtr_t* mtr) /*!< in/out: mtr; gets committed here */ { mem_heap_t* heap = NULL; btr_pcur_t* pcur; diff --git a/storage/xtradb/srv/srv0srv.c b/storage/xtradb/srv/srv0srv.c index 637b9e2df28..9602a958faf 100644 --- a/storage/xtradb/srv/srv0srv.c +++ b/storage/xtradb/srv/srv0srv.c @@ -211,6 +211,11 @@ UNIV_INTERN ulint srv_buf_pool_curr_size = 0; UNIV_INTERN ulint srv_mem_pool_size = ULINT_MAX; UNIV_INTERN ulint srv_lock_table_size = ULINT_MAX; +/* key value for shm */ +UNIV_INTERN uint srv_buffer_pool_shm_key = 0; +UNIV_INTERN ibool srv_buffer_pool_shm_is_reused = FALSE; +UNIV_INTERN ibool srv_buffer_pool_shm_checksum = TRUE; + /* This parameter is deprecated. Use srv_n_io_[read|write]_threads instead. */ UNIV_INTERN ulint srv_n_file_io_threads = ULINT_MAX; @@ -221,6 +226,9 @@ UNIV_INTERN ulint srv_n_write_io_threads = ULINT_MAX; UNIV_INTERN ulint srv_page_size_shift = 0; UNIV_INTERN ulint srv_page_size = 0; +/* The log block size */ +UNIV_INTERN ulint srv_log_block_size = 0; + /* User settable value of the number of pages that must be present in the buffer cache and accessed sequentially for InnoDB to trigger a readahead request. */ @@ -2683,15 +2691,26 @@ srv_master_thread( ulint n_pages_purged = 0; ulint n_bytes_merged; ulint n_pages_flushed; + ulint n_pages_flushed_prev = 0; ulint n_bytes_archived; ulint n_tables_to_drop; ulint n_ios; ulint n_ios_old; ulint n_ios_very_old; ulint n_pend_ios; + ulint next_itr_time; + ulint prev_adaptive_checkpoint = ULINT_UNDEFINED; + ulint inner_loop = 0; ibool skip_sleep = FALSE; ulint i; + struct t_prev_flush_info_struct { + ulint count; + unsigned space:32; + unsigned offset:32; + ib_uint64_t oldest_modification; + } prev_flush_info; + ib_uint64_t lsn_old; ib_uint64_t oldest_lsn; @@ -2741,15 +2760,20 @@ loop: srv_last_log_flush_time = time(NULL); skip_sleep = FALSE; + next_itr_time = ut_time_ms() + 1000; + for (i = 0; i < 10; i++) { + ulint cur_time = ut_time_ms(); + n_ios_old = log_sys->n_log_ios + buf_pool->stat.n_pages_read + buf_pool->stat.n_pages_written; srv_main_thread_op_info = "sleeping"; srv_main_1_second_loops++; if (!skip_sleep) { + if (next_itr_time > cur_time) { - os_event_wait_time(srv_shutdown_event, 1000000); + os_event_wait_time(srv_shutdown_event, ut_min(1000000, (next_itr_time - cur_time) * 1000)); srv_main_sleeps++; /* @@ -2766,6 +2790,10 @@ loop: */ } + /* Each iteration should happen at 1 second interval. */ + next_itr_time = ut_time_ms() + 1000; + } + skip_sleep = FALSE; /* ALTER TABLE in MySQL requires on Unix that the table handler @@ -2828,6 +2856,7 @@ loop: mutex_enter(&(log_sys->mutex)); lsn_old = log_sys->lsn; mutex_exit(&(log_sys->mutex)); + prev_adaptive_checkpoint = ULINT_UNDEFINED; } else if (srv_adaptive_flushing) { /* Try to keep the rate of flushing of dirty @@ -2853,6 +2882,7 @@ loop: mutex_enter(&(log_sys->mutex)); lsn_old = log_sys->lsn; mutex_exit(&(log_sys->mutex)); + prev_adaptive_checkpoint = ULINT_UNDEFINED; } else if (srv_adaptive_checkpoint == 1) { /* adaptive_flushing option is prior to adaptive_checkpoint option, for now */ @@ -2896,6 +2926,7 @@ loop: mutex_exit(&(log_sys->mutex)); } } + prev_adaptive_checkpoint = 1; } else if (srv_adaptive_checkpoint == 2) { /* Try to keep modified age not to exceed @@ -2978,11 +3009,124 @@ retry_flush_batch: mutex_exit(&(log_sys->mutex)); } } + prev_adaptive_checkpoint = 2; + } else if (srv_adaptive_checkpoint == 3) { + buf_page_t* bpage; + ib_uint64_t lsn; + + mutex_enter(&(log_sys->mutex)); + oldest_lsn = buf_pool_get_oldest_modification(); + lsn = log_sys->lsn; + mutex_exit(&(log_sys->mutex)); + + /* upper loop/sec. (x10) */ + next_itr_time -= 900; /* 1000 - 900 == 100 */ + inner_loop++; + if (inner_loop < 10) { + i--; + } else { + inner_loop = 0; + } + + if (prev_adaptive_checkpoint == 3) { + lint n_flush; + lint blocks_sum, new_blocks_sum, flushed_blocks_sum; + + blocks_sum = new_blocks_sum = flushed_blocks_sum = 0; + + /* prev_flush_info should be the previous loop's */ + { + lint blocks_num, new_blocks_num, flushed_blocks_num; + ibool found; + + blocks_num = UT_LIST_GET_LEN(buf_pool->flush_list); + bpage = UT_LIST_GET_FIRST(buf_pool->flush_list); + new_blocks_num = 0; + + found = FALSE; + while (bpage != NULL) { + if (prev_flush_info.space == bpage->space + && prev_flush_info.offset == bpage->offset + && prev_flush_info.oldest_modification + == bpage->oldest_modification) { + found = TRUE; + break; + } + bpage = UT_LIST_GET_NEXT(flush_list, bpage); + new_blocks_num++; + } + if (!found) { + new_blocks_num = blocks_num; + } + + flushed_blocks_num = new_blocks_num + prev_flush_info.count + - blocks_num; + if (flushed_blocks_num < 0) { + flushed_blocks_num = 0; + } + + bpage = UT_LIST_GET_FIRST(buf_pool->flush_list); + + prev_flush_info.count = UT_LIST_GET_LEN(buf_pool->flush_list); + if (bpage) { + prev_flush_info.space = bpage->space; + prev_flush_info.offset = bpage->offset; + prev_flush_info.oldest_modification = bpage->oldest_modification; + } else { + prev_flush_info.space = 0; + prev_flush_info.offset = 0; + prev_flush_info.oldest_modification = 0; + } + + new_blocks_sum += new_blocks_num; + flushed_blocks_sum += flushed_blocks_num; + blocks_sum += blocks_num; + } + + n_flush = blocks_sum * (lsn - lsn_old) / log_sys->max_modified_age_async; + if (flushed_blocks_sum > n_pages_flushed_prev) { + n_flush -= (flushed_blocks_sum - n_pages_flushed_prev); + } + + if (n_flush > 0) { + n_flush++; + n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, n_flush, + oldest_lsn + (lsn - lsn_old)); + } else { + n_pages_flushed = 0; + } + } else { + /* store previous first pages of the flush_list */ + { + bpage = UT_LIST_GET_FIRST(buf_pool->flush_list); + + prev_flush_info.count = UT_LIST_GET_LEN(buf_pool->flush_list); + if (bpage) { + prev_flush_info.space = bpage->space; + prev_flush_info.offset = bpage->offset; + prev_flush_info.oldest_modification = bpage->oldest_modification; + } else { + prev_flush_info.space = 0; + prev_flush_info.offset = 0; + prev_flush_info.oldest_modification = 0; + } + } + n_pages_flushed = 0; + } + lsn_old = lsn; + prev_adaptive_checkpoint = 3; } else { mutex_enter(&(log_sys->mutex)); lsn_old = log_sys->lsn; mutex_exit(&(log_sys->mutex)); + prev_adaptive_checkpoint = ULINT_UNDEFINED; + } + + if (n_pages_flushed == ULINT_UNDEFINED) { + n_pages_flushed_prev = 0; + } else { + n_pages_flushed_prev = n_pages_flushed; } if (srv_activity_count == old_activity_count) { diff --git a/storage/xtradb/srv/srv0start.c b/storage/xtradb/srv/srv0start.c index 27804e78a32..cef045d72e1 100644 --- a/storage/xtradb/srv/srv0start.c +++ b/storage/xtradb/srv/srv0start.c @@ -1439,8 +1439,25 @@ innobase_start_or_create_for_mysql(void) fil_init(srv_file_per_table ? 50000 : 5000, srv_max_n_open_files); + /* Print time to initialize the buffer pool */ + ut_print_timestamp(stderr); + fprintf(stderr, + " InnoDB: Initializing buffer pool, size ="); + + if (srv_buf_pool_size >= 1024 * 1024 * 1024) { + fprintf(stderr, + " %.1fG\n", + ((double) srv_buf_pool_size) / (1024 * 1024 * 1024)); + } else { + fprintf(stderr, + " %.1fM\n", + ((double) srv_buf_pool_size) / (1024 * 1024)); + } + ret = buf_pool_init(); + ut_print_timestamp(stderr); + if (ret == NULL) { fprintf(stderr, "InnoDB: Fatal error: cannot allocate the memory" @@ -1449,6 +1466,9 @@ innobase_start_or_create_for_mysql(void) return(DB_ERROR); } + fprintf(stderr, + " InnoDB: Completed initialization of buffer pool\n"); + #ifdef UNIV_DEBUG /* We have observed deadlocks with a 5MB buffer pool but the actual lower limit could very well be a little higher. */ @@ -1639,14 +1659,6 @@ innobase_start_or_create_for_mysql(void) trx_sys_file_format_init(); - if (create_new_doublewrite_file) { - mtr_start(&mtr); - fsp_header_init(TRX_DOUBLEWRITE_SPACE, TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * 9, &mtr); - mtr_commit(&mtr); - - trx_sys_dummy_create(TRX_DOUBLEWRITE_SPACE); - } - if (create_new_db) { mtr_start(&mtr); fsp_header_init(0, sum_of_new_sizes, &mtr); @@ -1654,6 +1666,15 @@ innobase_start_or_create_for_mysql(void) mtr_commit(&mtr); trx_sys_create(); + + if (create_new_doublewrite_file) { + mtr_start(&mtr); + fsp_header_init(TRX_DOUBLEWRITE_SPACE, TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * 9, &mtr); + mtr_commit(&mtr); + + trx_sys_dummy_create(TRX_DOUBLEWRITE_SPACE); + } + dict_create(); srv_startup_is_before_trx_rollback_phase = FALSE; @@ -1691,6 +1712,13 @@ innobase_start_or_create_for_mysql(void) recv_recovery_from_archive_finish(); #endif /* UNIV_LOG_ARCHIVE */ } else { + char* save_srv_doublewrite_file = NULL; + + if (create_new_doublewrite_file) { + /* doublewrite_file cannot be used for recovery yet. */ + save_srv_doublewrite_file = srv_doublewrite_file; + srv_doublewrite_file = NULL; + } /* Check if we support the max format that is stamped on the system tablespace. @@ -1716,6 +1744,8 @@ innobase_start_or_create_for_mysql(void) Note that this is not as heavy weight as it seems. At this point there will be only ONE page in the buf_LRU and there must be no page in the buf_flush list. */ + /* buffer_pool_shm should not be reused when recovery was needed. */ + if (!srv_buffer_pool_shm_is_reused) buf_pool_invalidate(); /* We always try to do a recovery, even if the database had @@ -1777,6 +1807,17 @@ innobase_start_or_create_for_mysql(void) we have finished the recovery process so that the image of TRX_SYS_PAGE_NO is not stale. */ trx_sys_file_format_tag_init(); + + if (create_new_doublewrite_file) { + /* restore the value */ + srv_doublewrite_file = save_srv_doublewrite_file; + + mtr_start(&mtr); + fsp_header_init(TRX_DOUBLEWRITE_SPACE, TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * 9, &mtr); + mtr_commit(&mtr); + + trx_sys_dummy_create(TRX_DOUBLEWRITE_SPACE); + } } if (!create_new_db && sum_of_new_sizes > 0) { diff --git a/storage/xtradb/sync/sync0rw.c b/storage/xtradb/sync/sync0rw.c index f1018522fdf..0bdffcb98b0 100644 --- a/storage/xtradb/sync/sync0rw.c +++ b/storage/xtradb/sync/sync0rw.c @@ -334,10 +334,13 @@ rw_lock_validate( /*=============*/ rw_lock_t* lock) /*!< in: rw-lock */ { + ulint waiters; + lint lock_word; + ut_a(lock); - ulint waiters = rw_lock_get_waiters(lock); - lint lock_word = lock->lock_word; + waiters = rw_lock_get_waiters(lock); + lock_word = lock->lock_word; ut_ad(lock->magic_n == RW_LOCK_MAGIC_N); ut_a(waiters == 0 || waiters == 1); @@ -986,7 +989,7 @@ rw_lock_debug_print( rwt = info->lock_type; - fprintf(stderr, "Locked: thread %ld file %s line %ld ", + fprintf(stderr, "Locked: thread %lu file %s line %lu ", (ulong) os_thread_pf(info->thread_id), info->file_name, (ulong) info->line); if (rwt == RW_LOCK_SHARED) { diff --git a/storage/xtradb/thr/thr0loc.c b/storage/xtradb/thr/thr0loc.c index 5b9e83be920..e702b37afb4 100644 --- a/storage/xtradb/thr/thr0loc.c +++ b/storage/xtradb/thr/thr0loc.c @@ -72,6 +72,24 @@ struct thr_local_struct{ /** The value of thr_local_struct::magic_n */ #define THR_LOCAL_MAGIC_N 1231234 +#ifdef UNIV_DEBUG +/*******************************************************************//** +Validates thread local data. +@return TRUE if valid */ +static +ibool +thr_local_validate( +/*===============*/ + const thr_local_t* local) /*!< in: data to validate */ +{ + ut_ad(local->magic_n == THR_LOCAL_MAGIC_N); + ut_ad(local->slot_no == ULINT_UNDEFINED + || local->slot_no < OS_THREAD_MAX_N); + ut_ad(local->in_ibuf == FALSE || local->in_ibuf == TRUE); + return(TRUE); +} +#endif /* UNIV_DEBUG */ + /*******************************************************************//** Returns the local storage struct for a thread. @return local storage */ @@ -92,7 +110,8 @@ try_again: local = NULL; HASH_SEARCH(hash, thr_local_hash, os_thread_pf(id), - thr_local_t*, local,, os_thread_eq(local->id, id)); + thr_local_t*, local, ut_ad(thr_local_validate(local)), + os_thread_eq(local->id, id)); if (local == NULL) { mutex_exit(&thr_local_mutex); @@ -103,7 +122,7 @@ try_again: goto try_again; } - ut_ad(local->magic_n == THR_LOCAL_MAGIC_N); + ut_ad(thr_local_validate(local)); return(local); } @@ -189,7 +208,7 @@ thr_local_create(void) local->id = os_thread_get_curr_id(); local->handle = os_thread_get_curr(); local->magic_n = THR_LOCAL_MAGIC_N; - + local->slot_no = ULINT_UNDEFINED; local->in_ibuf = FALSE; mutex_enter(&thr_local_mutex); @@ -217,7 +236,8 @@ thr_local_free( /* Look for the local struct in the hash table */ HASH_SEARCH(hash, thr_local_hash, os_thread_pf(id), - thr_local_t*, local,, os_thread_eq(local->id, id)); + thr_local_t*, local, ut_ad(thr_local_validate(local)), + os_thread_eq(local->id, id)); if (local == NULL) { mutex_exit(&thr_local_mutex); @@ -231,6 +251,7 @@ thr_local_free( mutex_exit(&thr_local_mutex); ut_a(local->magic_n == THR_LOCAL_MAGIC_N); + ut_ad(thr_local_validate(local)); mem_free(local); } @@ -273,6 +294,7 @@ thr_local_close(void) local = HASH_GET_NEXT(hash, prev_local); ut_a(prev_local->magic_n == THR_LOCAL_MAGIC_N); + ut_ad(thr_local_validate(prev_local)); mem_free(prev_local); } } diff --git a/storage/xtradb/trx/trx0i_s.c b/storage/xtradb/trx/trx0i_s.c index c0b9a73a68c..c11ebb6397e 100644 --- a/storage/xtradb/trx/trx0i_s.c +++ b/storage/xtradb/trx/trx0i_s.c @@ -404,6 +404,42 @@ table_cache_create_empty_row( return(row); } +#ifdef UNIV_DEBUG +/*******************************************************************//** +Validates a row in the locks cache. +@return TRUE if valid */ +static +ibool +i_s_locks_row_validate( +/*===================*/ + const i_s_locks_row_t* row) /*!< in: row to validate */ +{ + ut_ad(row->lock_trx_id != 0); + ut_ad(row->lock_mode != NULL); + ut_ad(row->lock_type != NULL); + ut_ad(row->lock_table != NULL); + ut_ad(row->lock_table_id != 0); + + if (row->lock_space == ULINT_UNDEFINED) { + /* table lock */ + ut_ad(!strcmp("TABLE", row->lock_type)); + ut_ad(row->lock_index == NULL); + ut_ad(row->lock_data == NULL); + ut_ad(row->lock_page == ULINT_UNDEFINED); + ut_ad(row->lock_rec == ULINT_UNDEFINED); + } else { + /* record lock */ + ut_ad(!strcmp("RECORD", row->lock_type)); + ut_ad(row->lock_index != NULL); + ut_ad(row->lock_data != NULL); + ut_ad(row->lock_page != ULINT_UNDEFINED); + ut_ad(row->lock_rec != ULINT_UNDEFINED); + } + + return(TRUE); +} +#endif /* UNIV_DEBUG */ + /*******************************************************************//** Fills i_s_trx_row_t object. If memory can not be allocated then FALSE is returned. @@ -431,18 +467,15 @@ fill_trx_row( row->trx_id = trx_get_id(trx); row->trx_started = (ib_time_t) trx->start_time; row->trx_state = trx_get_que_state_str(trx); + row->requested_lock_row = requested_lock_row; + ut_ad(requested_lock_row == NULL + || i_s_locks_row_validate(requested_lock_row)); if (trx->wait_lock != NULL) { - ut_a(requested_lock_row != NULL); - - row->requested_lock_row = requested_lock_row; row->trx_wait_started = (ib_time_t) trx->wait_started; } else { - ut_a(requested_lock_row == NULL); - - row->requested_lock_row = NULL; row->trx_wait_started = 0; } @@ -725,6 +758,7 @@ fill_locks_row( row->lock_table_id = lock_get_table_id(lock); row->hash_chain.value = row; + ut_ad(i_s_locks_row_validate(row)); return(TRUE); } @@ -745,6 +779,9 @@ fill_lock_waits_row( relevant blocking lock row in innodb_locks */ { + ut_ad(i_s_locks_row_validate(requested_lock_row)); + ut_ad(i_s_locks_row_validate(blocking_lock_row)); + row->requested_lock_row = requested_lock_row; row->blocking_lock_row = blocking_lock_row; @@ -816,6 +853,7 @@ locks_row_eq_lock( or ULINT_UNDEFINED if the lock is a table lock */ { + ut_ad(i_s_locks_row_validate(row)); #ifdef TEST_NO_LOCKS_ROW_IS_EVER_EQUAL_TO_LOCK_T return(0); #else @@ -873,7 +911,7 @@ search_innodb_locks( /* auxiliary variable */ hash_chain, /* assertion on every traversed item */ - , + ut_ad(i_s_locks_row_validate(hash_chain->value)), /* this determines if we have found the lock */ locks_row_eq_lock(hash_chain->value, lock, heap_no)); @@ -913,6 +951,7 @@ add_lock_to_cache( dst_row = search_innodb_locks(cache, lock, heap_no); if (dst_row != NULL) { + ut_ad(i_s_locks_row_validate(dst_row)); return(dst_row); } #endif @@ -950,6 +989,7 @@ add_lock_to_cache( } /* for()-loop */ #endif + ut_ad(i_s_locks_row_validate(dst_row)); return(dst_row); } diff --git a/storage/xtradb/trx/trx0undo.c b/storage/xtradb/trx/trx0undo.c index 6f06de593a9..9ed83b5d5c1 100644 --- a/storage/xtradb/trx/trx0undo.c +++ b/storage/xtradb/trx/trx0undo.c @@ -1408,7 +1408,7 @@ trx_undo_lists_init( page_no = mtr_read_ulint(rseg_header + TRX_RSEG_UNDO_SLOTS + i * TRX_RSEG_SLOT_SIZE, MLOG_4BYTES, &mtr); - if (page_no != FIL_NULL) { + if (page_no != 0 && page_no != FIL_NULL) { srv_extra_undoslots = TRUE; fprintf(stderr, "InnoDB: Error: innodb_extra_undoslots option is disabled, but it was enabled before.\n" |