diff options
author | unknown <knielsen@knielsen-hq.org> | 2010-10-19 17:03:26 +0200 |
---|---|---|
committer | unknown <knielsen@knielsen-hq.org> | 2010-10-19 17:03:26 +0200 |
commit | 462d14d42bb33aa5e23b5cb301ba28f56b4ca8e5 (patch) | |
tree | 1c76d5df039efabd8da14ea421682596d6e5990e /storage | |
parent | 745cc74c3301f193c7b82c31d11c3bf0c842be9e (diff) | |
parent | c6ccd3f34693198883d905b98638db49ea6e6a76 (diff) | |
download | mariadb-git-462d14d42bb33aa5e23b5cb301ba28f56b4ca8e5.tar.gz |
Merge XtraDB from Percona-server-5.1.51-12 into MariaDB.
Diffstat (limited to 'storage')
35 files changed, 785 insertions, 278 deletions
diff --git a/storage/xtradb/ChangeLog b/storage/xtradb/ChangeLog index 5ebcf1e87a2..43f87a1baf5 100644 --- a/storage/xtradb/ChangeLog +++ b/storage/xtradb/ChangeLog @@ -1,3 +1,58 @@ +2010-08-24 The InnoDB Team + + * handler/ha_innodb.c, dict/dict0dict.c: + Fix Bug #55832 selects crash too easily when innodb_force_recovery>3 + +2010-08-03 The InnoDB Team + + * include/dict0dict.h, include/dict0dict.ic, row/row0mysql.c: + Fix bug #54678, InnoDB, TRUNCATE, ALTER, I_S SELECT, crash or deadlock + +2010-08-03 The InnoDB Team + + * dict/dict0load.c, handler/ha_innodb.cc, include/db0err.h, + include/dict0load.h, include/dict0mem.h, include/que0que.h, + row/row0merge.c, row/row0mysql.c: + Fix Bug#54582 stack overflow when opening many tables linked + with foreign keys at once + +2010-08-03 The InnoDB Team + + * include/ut0mem.h, ut/ut0mem.c: + Fix Bug #55627 segv in ut_free pars_lexer_close innobase_shutdown + innodb-use-sys-malloc=0 + +2010-08-01 The InnoDB Team + + * handler/ha_innodb.cc + Fix Bug #55382 Assignment with SELECT expressions takes unexpected + S locks in READ COMMITTED +>>>>>>> MERGE-SOURCE + +2010-07-27 The InnoDB Team + + * include/mem0pool.h, mem/mem0mem.c, mem/mem0pool.c, srv/srv0start.c: + Fix Bug#55581 shutdown with innodb-use-sys-malloc=0: assert + mutex->magic_n == MUTEX_MAGIC_N. + +2010-06-30 The InnoDB Team + + * btr/btr0sea.c, ha/ha0ha.c, handler/ha_innodb.cc, include/btr0sea.h: + Fix Bug#54311 Crash on CHECK PARTITION after concurrent LOAD DATA + and adaptive_hash_index=OFF + +2010-06-29 The InnoDB Team + * row/row0row.c, row/row0undo.c, row/row0upd.c: + Fix Bug#54408 txn rollback after recovery: row0umod.c:673 + dict_table_get_format(index->table) + +2010-06-29 The InnoDB Team + + * btr/btr0cur.c, include/btr0cur.h, + include/row0mysql.h, row/row0merge.c, row/row0sel.c: + Fix Bug#54358 READ UNCOMMITTED access failure of off-page DYNAMIC + or COMPRESSED columns + 2010-06-24 The InnoDB Team * handler/ha_innodb.cc: diff --git a/storage/xtradb/Makefile.am b/storage/xtradb/Makefile.am index 9b8c2e52383..7dffdacbcf1 100644 --- a/storage/xtradb/Makefile.am +++ b/storage/xtradb/Makefile.am @@ -326,7 +326,7 @@ libxtradb_a_SOURCES= \ ut/ut0vec.c \ ut/ut0wqueue.c -libxtradb_a_CXXFLAGS= $(AM_CFLAGS) +libxtradb_a_CXXFLAGS= $(AM_CXXFLAGS) libxtradb_a_CFLAGS= $(AM_CFLAGS) EXTRA_LTLIBRARIES= ha_xtradb.la diff --git a/storage/xtradb/btr/btr0cur.c b/storage/xtradb/btr/btr0cur.c index 12c36d08c38..3fc2b48162a 100644 --- a/storage/xtradb/btr/btr0cur.c +++ b/storage/xtradb/btr/btr0cur.c @@ -5208,7 +5208,7 @@ btr_copy_externally_stored_field( /*******************************************************************//** Copies an externally stored field of a record to mem heap. -@return the field copied to heap */ +@return the field copied to heap, or NULL if the field is incomplete */ UNIV_INTERN byte* btr_rec_copy_externally_stored_field( @@ -5238,6 +5238,18 @@ btr_rec_copy_externally_stored_field( data = rec_get_nth_field(rec, offsets, no, &local_len); + ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE); + + if (UNIV_UNLIKELY + (!memcmp(data + local_len - BTR_EXTERN_FIELD_REF_SIZE, + field_ref_zero, BTR_EXTERN_FIELD_REF_SIZE))) { + /* The externally stored field was not written yet. + This record should only be seen by + recv_recovery_rollback_active() or any + TRX_ISO_READ_UNCOMMITTED transactions. */ + return(NULL); + } + return(btr_copy_externally_stored_field(len, data, zip_size, local_len, heap)); } diff --git a/storage/xtradb/btr/btr0sea.c b/storage/xtradb/btr/btr0sea.c index 1e8b4971ccb..6628333d32a 100644 --- a/storage/xtradb/btr/btr0sea.c +++ b/storage/xtradb/btr/btr0sea.c @@ -46,6 +46,7 @@ Created 2/17/1996 Heikki Tuuri /** Flag: has the search system been enabled? Protected by btr_search_latch and btr_search_enabled_mutex. */ UNIV_INTERN char btr_search_enabled = TRUE; +UNIV_INTERN ibool btr_search_fully_disabled = FALSE; /** Mutex protecting btr_search_enabled */ static mutex_t btr_search_enabled_mutex; @@ -201,12 +202,19 @@ btr_search_disable(void) mutex_enter(&btr_search_enabled_mutex); rw_lock_x_lock(&btr_search_latch); + /* Disable access to hash index, also tell ha_insert_for_fold() + stop adding new nodes to hash index, but still allow updating + existing nodes */ btr_search_enabled = FALSE; /* Clear all block->is_hashed flags and remove all entries from btr_search_sys->hash_index. */ buf_pool_drop_hash_index(); + /* hash index has been cleaned up, disallow any operation to + the hash index */ + btr_search_fully_disabled = TRUE; + /* btr_search_enabled_mutex should guarantee this. */ ut_ad(!btr_search_enabled); @@ -225,6 +233,7 @@ btr_search_enable(void) rw_lock_x_lock(&btr_search_latch); btr_search_enabled = TRUE; + btr_search_fully_disabled = FALSE; rw_lock_x_unlock(&btr_search_latch); mutex_exit(&btr_search_enabled_mutex); @@ -1488,7 +1497,7 @@ btr_search_build_page_hash_index( rw_lock_x_lock(&btr_search_latch); - if (UNIV_UNLIKELY(!btr_search_enabled)) { + if (UNIV_UNLIKELY(btr_search_fully_disabled)) { goto exit_func; } diff --git a/storage/xtradb/buf/buf0buf.c b/storage/xtradb/buf/buf0buf.c index 94a67c1759c..55ff207cf11 100644 --- a/storage/xtradb/buf/buf0buf.c +++ b/storage/xtradb/buf/buf0buf.c @@ -792,7 +792,7 @@ buf_block_reuse( ptrdiff_t frame_offset) { /* block_init */ - block->frame = ((byte*)(block->frame) + frame_offset); + block->frame += frame_offset; UNIV_MEM_DESC(block->frame, UNIV_PAGE_SIZE, block); @@ -809,7 +809,7 @@ buf_block_reuse( #endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */ if (block->page.zip.data) - block->page.zip.data = ((byte*)(block->page.zip.data) + frame_offset); + block->page.zip.data += frame_offset; block->is_hashed = FALSE; @@ -845,6 +845,8 @@ buf_chunk_init( although it already should be. */ mem_size = ut_2pow_round(mem_size, UNIV_PAGE_SIZE); + srv_buffer_pool_shm_is_reused = FALSE; + if (srv_buffer_pool_shm_key) { /* zip_hash size */ zip_hash_n = (mem_size / UNIV_PAGE_SIZE) * 2; @@ -870,39 +872,46 @@ buf_chunk_init( ut_a(buf_pool->n_chunks == 1); fprintf(stderr, - "InnoDB: Notice: innodb_buffer_pool_shm_key option is specified.\n" - "InnoDB: This option may not be safe to keep consistency of datafiles.\n" - "InnoDB: Because InnoDB cannot lock datafiles when shutdown until reusing shared memory segment.\n" - "InnoDB: You should ensure no change of InnoDB files while using innodb_buffer_pool_shm_key.\n"); + "InnoDB: Warning: The innodb_buffer_pool_shm_key option has been specified.\n" + "InnoDB: Do not change the following between restarts of the server while this option is being used:\n" + "InnoDB: * the mysqld executable between restarts of the server.\n" + "InnoDB: * the value of innodb_buffer_pool_size.\n" + "InnoDB: * the value of innodb_page_size.\n" + "InnoDB: * datafiles created by InnoDB during this session.\n" + "InnoDB: Otherwise, data corruption in datafiles may result.\n"); /* FIXME: This is vague id still */ - binary_id = (ulint) ((char*)mtr_commit - (char *)btr_root_get) - + (ulint) ((char *)os_get_os_version - (char *)buf_calc_page_new_checksum) - + (ulint) ((char *)page_dir_find_owner_slot - (char *)dfield_data_is_binary_equal) - + (ulint) ((char *)que_graph_publish - (char *)dict_casedn_str) - + (ulint) ((char *)read_view_oldest_copy_or_open_new - (char *)fil_space_get_version) - + (ulint) ((char *)rec_get_n_extern_new - (char *)fsp_get_size_low) - + (ulint) ((char *)row_get_trx_id_offset - (char *)ha_create_func) - + (ulint) ((char *)srv_set_io_thread_op_info - (char *)thd_is_replication_slave_thread) - + (ulint) ((char *)mutex_create_func - (char *)ibuf_inside) - + (ulint) ((char *)trx_set_detailed_error - (char *)lock_check_trx_id_sanity) - + (ulint) ((char *)ut_time - (char *)mem_heap_strdup); + binary_id = (ulint) ((byte*)mtr_commit - (byte*)btr_root_get) + + (ulint) ((byte*)os_get_os_version - (byte*)buf_calc_page_new_checksum) + + (ulint) ((byte*)page_dir_find_owner_slot - (byte*)dfield_data_is_binary_equal) + + (ulint) ((byte*)que_graph_publish - (byte*)dict_casedn_str) + + (ulint) ((byte*)read_view_oldest_copy_or_open_new - (byte*)fil_space_get_version) + + (ulint) ((byte*)rec_get_n_extern_new - (byte*)fsp_get_size_low) + + (ulint) ((byte*)row_get_trx_id_offset - (byte*)ha_create_func) + + (ulint) ((byte*)srv_set_io_thread_op_info - (byte*)thd_is_replication_slave_thread) + + (ulint) ((byte*)mutex_create_func - (byte*)ibuf_inside) + + (ulint) ((byte*)trx_set_detailed_error - (byte*)lock_check_trx_id_sanity) + + (ulint) ((byte*)ut_time - (byte*)mem_heap_strdup); chunk->mem = os_shm_alloc(&chunk->mem_size, srv_buffer_pool_shm_key, &is_new); if (UNIV_UNLIKELY(chunk->mem == NULL)) { return(NULL); } - +init_again: #ifdef UNIV_SET_MEM_TO_ZERO if (is_new) { memset(chunk->mem, '\0', chunk->mem_size); } #endif + /* for ut_fold_binary_32(), these values should be 32-bit aligned */ + ut_a(sizeof(buf_shm_info_t) % 4 == 0); + ut_a((ulint)chunk->mem % 4 == 0); + ut_a(chunk->mem_size % 4 == 0); shm_info = chunk->mem; - zip_hash_tmp = (hash_table_t*)((char *)chunk->mem + chunk->mem_size - zip_hash_mem_size); + zip_hash_tmp = (hash_table_t*)((byte*)chunk->mem + chunk->mem_size - zip_hash_mem_size); if (is_new) { strncpy(shm_info->head_str, BUF_SHM_INFO_HEAD, 8); @@ -932,16 +941,6 @@ buf_chunk_init( "InnoDB: Error: The shared memory was not initialized yet.\n"); return(NULL); } - if (!shm_info->clean) { - fprintf(stderr, - "InnoDB: Error: The shared memory was not shut down cleanly.\n"); - return(NULL); - } - if (!shm_info->reusable) { - fprintf(stderr, - "InnoDB: Error: The shared memory has unrecoverable contents.\n"); - return(NULL); - } if (shm_info->buf_pool_size != srv_buf_pool_size) { fprintf(stderr, "InnoDB: Error: srv_buf_pool_size is different (shm=%lu current=%lu).\n", @@ -954,14 +953,34 @@ buf_chunk_init( shm_info->page_size, srv_page_size); return(NULL); } + if (!shm_info->reusable) { + fprintf(stderr, + "InnoDB: Warning: The shared memory has unrecoverable contents.\n" + "InnoDB: The shared memory segment is initialized.\n"); + is_new = TRUE; + goto init_again; + } + if (!shm_info->clean) { + fprintf(stderr, + "InnoDB: Warning: The shared memory was not shut down cleanly.\n" + "InnoDB: The shared memory segment is initialized.\n"); + is_new = TRUE; + goto init_again; + } ut_a(shm_info->zip_hash_offset == chunk->mem_size - zip_hash_mem_size); ut_a(shm_info->zip_hash_n == zip_hash_n); /* check checksum */ - checksum = ut_fold_binary((byte*)chunk->mem + sizeof(buf_shm_info_t), - chunk->mem_size - sizeof(buf_shm_info_t)); - if (shm_info->checksum != checksum) { + if (srv_buffer_pool_shm_checksum) { + checksum = ut_fold_binary_32((byte*)chunk->mem + sizeof(buf_shm_info_t), + chunk->mem_size - sizeof(buf_shm_info_t)); + } else { + checksum = BUF_NO_CHECKSUM_MAGIC; + } + + if (shm_info->checksum != BUF_NO_CHECKSUM_MAGIC + && shm_info->checksum != checksum) { fprintf(stderr, "InnoDB: Error: checksum of the shared memory is not match. " "(stored=%lu calculated=%lu)\n", @@ -979,6 +998,8 @@ buf_chunk_init( } else { /* adjust offset is done later */ hash_create_reuse(zip_hash_tmp); + + srv_buffer_pool_shm_is_reused = TRUE; } } else { chunk->mem = os_mem_alloc_large(&chunk->mem_size); @@ -992,7 +1013,7 @@ buf_chunk_init( /* Allocate the block descriptors from the start of the memory block. */ if (srv_buffer_pool_shm_key) { - chunk->blocks = (buf_block_t*)((char*)chunk->mem + sizeof(buf_shm_info_t)); + chunk->blocks = (buf_block_t*)((byte*)chunk->mem + sizeof(buf_shm_info_t)); } else { chunk->blocks = chunk->mem; } @@ -1039,10 +1060,10 @@ buf_chunk_init( } chunk->size = shm_info->chunk_backup.size; - phys_offset = (char*)frame - ((char*)chunk->mem + shm_info->frame_offset); - logi_offset = (char *)frame - (char *)chunk->blocks[0].frame; + phys_offset = frame - ((byte*)chunk->mem + shm_info->frame_offset); + logi_offset = frame - chunk->blocks[0].frame; previous_frame_address = chunk->blocks[0].frame; - blocks_offset = (char *)chunk->blocks - (char *)shm_info->chunk_backup.blocks; + blocks_offset = (byte*)chunk->blocks - (byte*)shm_info->chunk_backup.blocks; if (phys_offset || logi_offset || blocks_offset) { fprintf(stderr, @@ -1053,10 +1074,10 @@ buf_chunk_init( "InnoDB: Pysical offset : %ld (%#lx)\n" "InnoDB: Logical offset (frames) : %ld (%#lx)\n" "InnoDB: Logical offset (blocks) : %ld (%#lx)\n", - (char *)chunk->mem + shm_info->frame_offset, + (byte*)chunk->mem + shm_info->frame_offset, chunk->blocks[0].frame, frame, - (ulong) phys_offset, (ulong) phys_offset, (ulong) logi_offset, (ulong) logi_offset, - (ulong) blocks_offset, (ulong) blocks_offset); + (long) phys_offset, (ulong) phys_offset, (long) logi_offset, (ulong) logi_offset, + (long) blocks_offset, (ulong) blocks_offset); } else { fprintf(stderr, "InnoDB: Buffer pool in the shared memory segment can be used as it is.\n"); @@ -1066,24 +1087,24 @@ buf_chunk_init( fprintf(stderr, "InnoDB: Aligning physical offset..."); - memmove(frame, ((char*)chunk->mem + shm_info->frame_offset), + memmove(frame, (byte*)chunk->mem + shm_info->frame_offset, chunk->size * UNIV_PAGE_SIZE); fprintf(stderr, " Done.\n"); } + /* buf_block_t */ + block = chunk->blocks; + for (i = chunk->size; i--; ) { + buf_block_reuse(block, logi_offset); + block++; + } + if (logi_offset || blocks_offset) { fprintf(stderr, "InnoDB: Aligning logical offset..."); - /* buf_block_t */ - block = chunk->blocks; - - for (i = chunk->size; i--; ) { - buf_block_reuse(block, logi_offset); - block++; - } /* buf_pool_t buf_pool_backup */ UT_LIST_OFFSET(flush_list, buf_page_t, shm_info->buf_pool_backup.flush_list, @@ -1094,8 +1115,8 @@ buf_chunk_init( previous_frame_address, logi_offset, blocks_offset); if (shm_info->buf_pool_backup.LRU_old) shm_info->buf_pool_backup.LRU_old = - (buf_page_t*)((char*)(shm_info->buf_pool_backup.LRU_old) - + (((byte*)shm_info->buf_pool_backup.LRU_old > previous_frame_address) + (buf_page_t*)((byte*)(shm_info->buf_pool_backup.LRU_old) + + (((void*)shm_info->buf_pool_backup.LRU_old > previous_frame_address) ? logi_offset : blocks_offset)); UT_LIST_OFFSET(unzip_LRU, buf_block_t, shm_info->buf_pool_backup.unzip_LRU, @@ -1141,7 +1162,7 @@ buf_chunk_init( } if (shm_info) { - shm_info->frame_offset = (char*)chunk->blocks[0].frame - (char*)chunk->mem; + shm_info->frame_offset = chunk->blocks[0].frame - (byte*)chunk->mem; } return(chunk); @@ -1396,10 +1417,10 @@ buf_pool_init(void) if (srv_buffer_pool_shm_key) { buf_shm_info_t* shm_info; - ut_a((char*)chunk->blocks == (char*)chunk->mem + sizeof(buf_shm_info_t)); + ut_a((byte*)chunk->blocks == (byte*)chunk->mem + sizeof(buf_shm_info_t)); shm_info = chunk->mem; - buf_pool->zip_hash = (hash_table_t*)((char*)chunk->mem + shm_info->zip_hash_offset); + buf_pool->zip_hash = (hash_table_t*)((byte*)chunk->mem + shm_info->zip_hash_offset); if(shm_info->is_new) { shm_info->is_new = FALSE; /* initialization was finished */ @@ -1504,7 +1525,7 @@ buf_pool_free(void) chunk = buf_pool->chunks; shm_info = chunk->mem; - ut_a((char*)chunk->blocks == (char*)chunk->mem + sizeof(buf_shm_info_t)); + ut_a((byte*)chunk->blocks == (byte*)chunk->mem + sizeof(buf_shm_info_t)); /* validation the shared memory segment doesn't have unrecoverable contents. */ /* Currently, validation became not needed */ @@ -1514,8 +1535,12 @@ buf_pool_free(void) memcpy(&(shm_info->chunk_backup), chunk, sizeof(buf_chunk_t)); if (srv_fast_shutdown < 2) { - shm_info->checksum = ut_fold_binary((byte*)chunk->mem + sizeof(buf_shm_info_t), - chunk->mem_size - sizeof(buf_shm_info_t)); + if (srv_buffer_pool_shm_checksum) { + shm_info->checksum = ut_fold_binary_32((byte*)chunk->mem + sizeof(buf_shm_info_t), + chunk->mem_size - sizeof(buf_shm_info_t)); + } else { + shm_info->checksum = BUF_NO_CHECKSUM_MAGIC; + } shm_info->clean = TRUE; } diff --git a/storage/xtradb/buf/buf0lru.c b/storage/xtradb/buf/buf0lru.c index 14ec1720873..79c7c0d3bbe 100644 --- a/storage/xtradb/buf/buf0lru.c +++ b/storage/xtradb/buf/buf0lru.c @@ -2228,6 +2228,26 @@ end: return(ret); } + +typedef struct { + ib_uint32_t space_id; + ib_uint32_t page_no; +} dump_record_t; + +static int dump_record_cmp(const void *a, const void *b) +{ + const dump_record_t *rec1 = (dump_record_t *) a; + const dump_record_t *rec2 = (dump_record_t *) b; + + if (rec1->space_id < rec2->space_id) + return -1; + if (rec1->space_id > rec2->space_id) + return 1; + if (rec1->page_no < rec2->page_no) + return -1; + return rec1->page_no > rec2->page_no; +} + /********************************************************************//** Read the pages based on the specific file.*/ UNIV_INTERN @@ -2245,25 +2265,34 @@ buf_LRU_file_restore(void) ulint req = 0; ibool terminated = FALSE; ibool ret = FALSE; - - buffer_base = ut_malloc(2 * UNIV_PAGE_SIZE); - buffer = ut_align(buffer_base, UNIV_PAGE_SIZE); - if (!buffer) { - fprintf(stderr, - " InnoDB: cannot allocate buffer.\n"); - goto end; - } + dump_record_t* records; + ulint size; + ulint size_high; + ulint length; dump_file = os_file_create_simple_no_error_handling( LRU_DUMP_FILE, OS_FILE_OPEN, OS_FILE_READ_ONLY, &success); - if (!success) { + if (!success || !os_file_get_size(dump_file, &size, &size_high)) { os_file_get_last_error(TRUE); fprintf(stderr, " InnoDB: cannot open %s\n", LRU_DUMP_FILE); goto end; } + if (size == 0 || size_high > 0 || size % 8) { + fprintf(stderr, " InnoDB: broken LRU dump file\n"); + goto end; + } + buffer_base = ut_malloc(2 * UNIV_PAGE_SIZE); + buffer = ut_align(buffer_base, UNIV_PAGE_SIZE); + records = ut_malloc(size); + if (!buffer || !records) { + fprintf(stderr, + " InnoDB: cannot allocate buffer.\n"); + goto end; + } buffers = 0; + length = 0; while (!terminated) { success = os_file_read(dump_file, buffer, (buffers << UNIV_PAGE_SIZE_SHIFT) & 0xFFFFFFFFUL, @@ -2272,15 +2301,14 @@ buf_LRU_file_restore(void) if (!success) { fprintf(stderr, " InnoDB: cannot read page %lu of %s," - " or meet unexpected terminal.", + " or meet unexpected terminal.\n", buffers, LRU_DUMP_FILE); goto end; } for (offset = 0; offset < UNIV_PAGE_SIZE/4; offset += 2) { - ulint space_id, zip_size, page_no; - ulint err; - ib_int64_t tablespace_version; + ulint space_id; + ulint page_no; space_id = mach_read_from_4(buffer + offset * 4); page_no = mach_read_from_4(buffer + (offset + 1) * 4); @@ -2290,31 +2318,61 @@ buf_LRU_file_restore(void) break; } - if (offset % 16 == 15) { - os_aio_simulated_wake_handler_threads(); - buf_flush_free_margin(FALSE); + records[length].space_id = space_id; + records[length].page_no = page_no; + length++; + if (length * 8 >= size) { + fprintf(stderr, + " InnoDB: could not find the " + "end-of-file marker after reading " + "the expected %lu bytes from the " + "LRU dump file.\n" + " InnoDB: this could be caused by a " + "broken or incomplete file.\n" + " InnoDB: trying to process what has " + "been read so far.\n", + size); + terminated= TRUE; + break; } + } + buffers++; + } - zip_size = fil_space_get_zip_size(space_id); - if (UNIV_UNLIKELY(zip_size == ULINT_UNDEFINED)) { - continue; - } + qsort(records, length, sizeof(dump_record_t), dump_record_cmp); - if (fil_area_is_exist(space_id, zip_size, page_no, 0, - zip_size ? zip_size : UNIV_PAGE_SIZE)) { + for (offset = 0; offset < length; offset++) { + ulint space_id; + ulint page_no; + ulint zip_size; + ulint err; + ib_int64_t tablespace_version; - tablespace_version = fil_space_get_version(space_id); + space_id = records[offset].space_id; + page_no = records[offset].page_no; - req++; - reads += buf_read_page_low(&err, FALSE, BUF_READ_ANY_PAGE - | OS_AIO_SIMULATED_WAKE_LATER, - space_id, zip_size, TRUE, - tablespace_version, page_no, NULL); - buf_LRU_stat_inc_io(); - } + if (offset % 16 == 15) { + os_aio_simulated_wake_handler_threads(); + buf_flush_free_margin(FALSE); } - buffers++; + zip_size = fil_space_get_zip_size(space_id); + if (UNIV_UNLIKELY(zip_size == ULINT_UNDEFINED)) { + continue; + } + + if (fil_area_is_exist(space_id, zip_size, page_no, 0, + zip_size ? zip_size : UNIV_PAGE_SIZE)) { + + tablespace_version = fil_space_get_version(space_id); + + req++; + reads += buf_read_page_low(&err, FALSE, BUF_READ_ANY_PAGE + | OS_AIO_SIMULATED_WAKE_LATER, + space_id, zip_size, TRUE, + tablespace_version, page_no, NULL); + buf_LRU_stat_inc_io(); + } } os_aio_simulated_wake_handler_threads(); @@ -2330,6 +2388,8 @@ end: os_file_close(dump_file); if (buffer_base) ut_free(buffer_base); + if (records) + ut_free(records); return(ret); } diff --git a/storage/xtradb/dict/dict0crea.c b/storage/xtradb/dict/dict0crea.c index 258bf77d1fc..a6d0e11740a 100644 --- a/storage/xtradb/dict/dict0crea.c +++ b/storage/xtradb/dict/dict0crea.c @@ -1245,13 +1245,13 @@ dict_create_index_step( goto function_exit; } - if (srv_use_sys_stats_table) { + if (srv_use_sys_stats_table + && !((node->table->flags >> DICT_TF2_SHIFT) & DICT_TF2_TEMPORARY)) { node->state = INDEX_BUILD_STATS_COLS; } else { node->state = INDEX_CREATE_INDEX_TREE; } } - if (node->state == INDEX_BUILD_STATS_COLS) { if (node->stats_no <= dict_index_get_n_unique(node->index)) { diff --git a/storage/xtradb/dict/dict0dict.c b/storage/xtradb/dict/dict0dict.c index f383d250388..1d0517f5cc7 100644 --- a/storage/xtradb/dict/dict0dict.c +++ b/storage/xtradb/dict/dict0dict.c @@ -4527,7 +4527,7 @@ dict_update_statistics_low( return; } - if (srv_use_sys_stats_table && !sync) { + if (srv_use_sys_stats_table && !((table->flags >> DICT_TF2_SHIFT) & DICT_TF2_TEMPORARY) && !sync) { /* reload statistics from SYS_STATS table */ if (dict_reload_statistics(table, &sum_of_index_sizes)) { /* success */ @@ -4555,53 +4555,54 @@ dict_update_statistics_low( return; } - do { + do { if (table->is_corrupt) { ut_a(srv_pass_corrupt_table); return; } - if (UNIV_LIKELY - (srv_force_recovery < SRV_FORCE_NO_IBUF_MERGE - || (srv_force_recovery < SRV_FORCE_NO_LOG_REDO - && dict_index_is_clust(index)))) { - ulint size; - size = btr_get_size(index, BTR_TOTAL_SIZE); - - index->stat_index_size = size; - - sum_of_index_sizes += size; - - size = btr_get_size(index, BTR_N_LEAF_PAGES); - - if (size == 0) { - /* The root node of the tree is a leaf */ - size = 1; - } - - index->stat_n_leaf_pages = size; - - btr_estimate_number_of_different_key_vals(index); - } else { - /* If we have set a high innodb_force_recovery - level, do not calculate statistics, as a badly - corrupted index can cause a crash in it. - Initialize some bogus index cardinality - statistics, so that the data can be queried in - various means, also via secondary indexes. */ - ulint i; - - sum_of_index_sizes++; - index->stat_index_size = index->stat_n_leaf_pages = 1; - - for (i = dict_index_get_n_unique(index); i; ) { - index->stat_n_diff_key_vals[i--] = 1; - } - } + if (UNIV_LIKELY + (srv_force_recovery < SRV_FORCE_NO_IBUF_MERGE + || (srv_force_recovery < SRV_FORCE_NO_LOG_REDO + && dict_index_is_clust(index)))) { + ulint size; + size = btr_get_size(index, BTR_TOTAL_SIZE); + + index->stat_index_size = size; + + sum_of_index_sizes += size; + + size = btr_get_size(index, BTR_N_LEAF_PAGES); + + if (size == 0) { + /* The root node of the tree is a leaf */ + size = 1; + } + + index->stat_n_leaf_pages = size; + + btr_estimate_number_of_different_key_vals(index); + } else { + /* If we have set a high innodb_force_recovery + level, do not calculate statistics, as a badly + corrupted index can cause a crash in it. + Initialize some bogus index cardinality + statistics, so that the data can be queried in + various means, also via secondary indexes. */ + ulint i; + + sum_of_index_sizes++; + index->stat_index_size = index->stat_n_leaf_pages = 1; + + for (i = dict_index_get_n_unique(index); i; ) { + index->stat_n_diff_key_vals[i--] = 1; + } + } + index = dict_table_get_next_index(index); - } while(index); + } while (index); - if (srv_use_sys_stats_table) { + if (srv_use_sys_stats_table && !((table->flags >> DICT_TF2_SHIFT) & DICT_TF2_TEMPORARY)) { /* store statistics to SYS_STATS table */ dict_store_statistics(table); } diff --git a/storage/xtradb/dict/dict0load.c b/storage/xtradb/dict/dict0load.c index 881f3d91e3c..43c0810fe67 100644 --- a/storage/xtradb/dict/dict0load.c +++ b/storage/xtradb/dict/dict0load.c @@ -1084,6 +1084,8 @@ dict_load_table_on_id( ut_ad(mutex_own(&(dict_sys->mutex))); + table = NULL; + /* NOTE that the operation of this function is protected by the dictionary mutex, and therefore no deadlocks can occur with other dictionary operations. */ @@ -1110,15 +1112,17 @@ dict_load_table_on_id( BTR_SEARCH_LEAF, &pcur, &mtr); rec = btr_pcur_get_rec(&pcur); - if (!btr_pcur_is_on_user_rec(&pcur) - || rec_get_deleted_flag(rec, 0)) { + if (!btr_pcur_is_on_user_rec(&pcur)) { /* Not found */ + goto func_exit; + } - btr_pcur_close(&pcur); - mtr_commit(&mtr); - mem_heap_free(heap); - - return(NULL); + /* Find the first record that is not delete marked */ + while (rec_get_deleted_flag(rec, 0)) { + if (!btr_pcur_move_to_next_user_rec(&pcur, &mtr)) { + goto func_exit; + } + rec = btr_pcur_get_rec(&pcur); } /*---------------------------------------------------*/ @@ -1131,19 +1135,14 @@ dict_load_table_on_id( /* Check if the table id in record is the one searched for */ if (ut_dulint_cmp(table_id, mach_read_from_8(field)) != 0) { - - btr_pcur_close(&pcur); - mtr_commit(&mtr); - mem_heap_free(heap); - - return(NULL); + goto func_exit; } /* Now we get the table name from the record */ field = rec_get_nth_field_old(rec, 1, &len); /* Load the table definition to memory */ table = dict_load_table(mem_heap_strdupl(heap, (char*) field, len)); - +func_exit: btr_pcur_close(&pcur); mtr_commit(&mtr); mem_heap_free(heap); diff --git a/storage/xtradb/fil/fil0fil.c b/storage/xtradb/fil/fil0fil.c index e9dc8185be6..cae91d4037c 100644 --- a/storage/xtradb/fil/fil0fil.c +++ b/storage/xtradb/fil/fil0fil.c @@ -3043,6 +3043,10 @@ fil_open_single_table_tablespace( if (srv_expand_import && (space_id != id || space_flags != (flags & ~(~0 << DICT_TF_BITS)))) { + ibool file_is_corrupt = FALSE; + byte* buf3; + byte* descr_page; + ibool descr_is_corrupt = FALSE; dulint old_id[31]; dulint new_id[31]; ulint root_page[31]; @@ -3052,16 +3056,37 @@ fil_open_single_table_tablespace( ulint i; int len; ib_uint64_t current_lsn; - ulint size_low, size_high, size; - ib_int64_t size_bytes; + ulint size_low, size_high, size, free_limit; + ib_int64_t size_bytes, free_limit_bytes; dict_table_t* table; dict_index_t* index; fil_system_t* system; fil_node_t* node = NULL; fil_space_t* space; + buf3 = ut_malloc(2 * UNIV_PAGE_SIZE); + descr_page = ut_align(buf3, UNIV_PAGE_SIZE); + current_lsn = log_get_lsn(); + /* check the header page's consistency */ + if (buf_page_is_corrupted(page, + dict_table_flags_to_zip_size(space_flags))) { + fprintf(stderr, "InnoDB: page 0 of %s seems corrupt.\n", filepath); + file_is_corrupt = TRUE; + descr_is_corrupt = TRUE; + } + + /* store as first descr page */ + memcpy(descr_page, page, UNIV_PAGE_SIZE); + + /* get free limit (page number) of the table space */ +/* these should be same to the definition in fsp0fsp.c */ +#define FSP_HEADER_OFFSET FIL_PAGE_DATA +#define FSP_FREE_LIMIT 12 + free_limit = mach_read_from_4(FSP_HEADER_OFFSET + FSP_FREE_LIMIT + page); + free_limit_bytes = (ib_int64_t)free_limit * (ib_int64_t)UNIV_PAGE_SIZE; + /* overwrite fsp header */ fsp_header_init_fields(page, id, flags); mach_write_to_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, id); @@ -3086,6 +3111,12 @@ fil_open_single_table_tablespace( size_bytes = (((ib_int64_t)size_high) << 32) + (ib_int64_t)size_low; + if (size_bytes < free_limit_bytes) { + free_limit_bytes = size_bytes; + fprintf(stderr, "InnoDB: free limit of %s is larger than its real size.\n", filepath); + file_is_corrupt = TRUE; + } + /* get cruster index information */ table = dict_table_get_low(name); index = dict_table_get_first_index(table); @@ -3107,16 +3138,19 @@ fil_open_single_table_tablespace( info_file_path, OS_FILE_OPEN, OS_FILE_READ_ONLY, &success); if (!success) { fprintf(stderr, "InnoDB: cannot open %s\n", info_file_path); + file_is_corrupt = TRUE; goto skip_info; } success = os_file_read(info_file, page, 0, 0, UNIV_PAGE_SIZE); if (!success) { fprintf(stderr, "InnoDB: cannot read %s\n", info_file_path); + file_is_corrupt = TRUE; goto skip_info; } if (mach_read_from_4(page) != 0x78706f72UL || mach_read_from_4(page + 4) != 0x74696e66UL) { fprintf(stderr, "InnoDB: %s seems not to be a correct .exp file\n", info_file_path); + file_is_corrupt = TRUE; goto skip_info; } @@ -3153,20 +3187,29 @@ skip_info: fprintf(stderr, "InnoDB: Progress in %%:"); - for (offset = 0; offset < size_bytes; offset += UNIV_PAGE_SIZE) { + for (offset = 0; offset < free_limit_bytes; offset += UNIV_PAGE_SIZE) { ulint checksum_field; ulint old_checksum_field; + ibool page_is_corrupt; success = os_file_read(file, page, (ulint)(offset & 0xFFFFFFFFUL), (ulint)(offset >> 32), UNIV_PAGE_SIZE); - /* skip inconsistent pages, it may be free page. */ + page_is_corrupt = FALSE; + + /* check consistency */ if (memcmp(page + FIL_PAGE_LSN + 4, page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM + 4, 4)) { - goto skip_write; + page_is_corrupt = TRUE; + } + + if (mach_read_from_4(page + FIL_PAGE_OFFSET) + != offset / UNIV_PAGE_SIZE) { + + page_is_corrupt = TRUE; } checksum_field = mach_read_from_4(page @@ -3182,7 +3225,7 @@ skip_info: && old_checksum_field != buf_calc_page_old_checksum(page)) { - goto skip_write; + page_is_corrupt = TRUE; } if (!srv_fast_checksum @@ -3191,7 +3234,7 @@ skip_info: && checksum_field != buf_calc_page_new_checksum(page)) { - goto skip_write; + page_is_corrupt = TRUE; } if (srv_fast_checksum @@ -3202,6 +3245,77 @@ skip_info: && checksum_field != buf_calc_page_new_checksum(page)) { + page_is_corrupt = TRUE; + } + + /* if it is free page, inconsistency is acceptable */ + if (!offset) { + /* header page*/ + /* it should be overwritten already */ + ut_a(!page_is_corrupt); + + } else if (!((offset / UNIV_PAGE_SIZE) % UNIV_PAGE_SIZE)) { + /* descr page (not header) */ + if (page_is_corrupt) { + file_is_corrupt = TRUE; + descr_is_corrupt = TRUE; + } else { + ut_a(fil_page_get_type(page) == FIL_PAGE_TYPE_XDES); + descr_is_corrupt = FALSE; + } + + /* store as descr page */ + memcpy(descr_page, page, UNIV_PAGE_SIZE); + + } else if (descr_is_corrupt) { + /* unknown state of the page */ + if (page_is_corrupt) { + file_is_corrupt = TRUE; + } + + } else { + /* check free page or not */ + /* These definitions should be same to fsp0fsp.c */ +#define FSP_HEADER_SIZE (32 + 5 * FLST_BASE_NODE_SIZE) + +#define XDES_BITMAP (FLST_NODE_SIZE + 12) +#define XDES_BITS_PER_PAGE 2 +#define XDES_FREE_BIT 0 +#define XDES_SIZE \ + (XDES_BITMAP + UT_BITS_IN_BYTES(FSP_EXTENT_SIZE * XDES_BITS_PER_PAGE)) +#define XDES_ARR_OFFSET (FSP_HEADER_OFFSET + FSP_HEADER_SIZE) + + /*descr = descr_page + XDES_ARR_OFFSET + XDES_SIZE * xdes_calc_descriptor_index(zip_size, offset)*/ + /*xdes_get_bit(descr, XDES_FREE_BIT, page % FSP_EXTENT_SIZE, mtr)*/ + byte* descr; + ulint index; + ulint byte_index; + ulint bit_index; + + descr = descr_page + XDES_ARR_OFFSET + + XDES_SIZE * (ut_2pow_remainder((offset / UNIV_PAGE_SIZE), UNIV_PAGE_SIZE) / FSP_EXTENT_SIZE); + + index = XDES_FREE_BIT + XDES_BITS_PER_PAGE * ((offset / UNIV_PAGE_SIZE) % FSP_EXTENT_SIZE); + byte_index = index / 8; + bit_index = index % 8; + + if (ut_bit_get_nth(mach_read_from_1(descr + XDES_BITMAP + byte_index), bit_index)) { + /* free page */ + if (page_is_corrupt) { + goto skip_write; + } + } else { + /* not free */ + if (page_is_corrupt) { + file_is_corrupt = TRUE; + } + } + } + + if (page_is_corrupt) { + fprintf(stderr, " [errp:%lld]", offset / UNIV_PAGE_SIZE); + + /* cannot treat corrupt page */ goto skip_write; } @@ -3294,11 +3408,11 @@ skip_info: } skip_write: - if (size_bytes - && ((ib_int64_t)((offset + UNIV_PAGE_SIZE) * 100) / size_bytes) - != ((offset * 100) / size_bytes)) { + if (free_limit_bytes + && ((ib_int64_t)((offset + UNIV_PAGE_SIZE) * 100) / free_limit_bytes) + != ((offset * 100) / free_limit_bytes)) { fprintf(stderr, " %lu", - (ulong)((ib_int64_t)((offset + UNIV_PAGE_SIZE) * 100) / size_bytes)); + (ulong)((ib_int64_t)((offset + UNIV_PAGE_SIZE) * 100) / free_limit_bytes)); } } @@ -3379,6 +3493,26 @@ skip_write: node->size = size; } mutex_exit(&(system->mutex)); + + ut_free(buf3); + + if (file_is_corrupt) { + ut_print_timestamp(stderr); + fputs(" InnoDB: Error: file ", + stderr); + ut_print_filename(stderr, filepath); + fprintf(stderr, " seems to be corrupt.\n" + "InnoDB: anyway, all not corrupt pages were tried to be converted to salvage.\n" + "InnoDB: ##### CAUTION #####\n" + "InnoDB: ## The .ibd must cause to crash InnoDB, though re-import would seem to be succeeded.\n" + "InnoDB: ## If you don't have knowledge about salvaging data from .ibd, you should not use the file.\n" + "InnoDB: ###################\n"); + success = FALSE; + + ut_free(buf2); + + goto func_exit; + } } ut_free(buf2); diff --git a/storage/xtradb/ha/ha0ha.c b/storage/xtradb/ha/ha0ha.c index e28b972e61a..7f11917de0a 100644 --- a/storage/xtradb/ha/ha0ha.c +++ b/storage/xtradb/ha/ha0ha.c @@ -31,9 +31,7 @@ Created 8/22/1994 Heikki Tuuri #ifdef UNIV_DEBUG # include "buf0buf.h" #endif /* UNIV_DEBUG */ -#ifdef UNIV_SYNC_DEBUG -# include "btr0sea.h" -#endif /* UNIV_SYNC_DEBUG */ +#include "btr0sea.h" #include "page0page.h" /*************************************************************//** @@ -127,7 +125,8 @@ ha_clear( /*************************************************************//** Inserts an entry into a hash table. If an entry with the same fold number is found, its node is updated to point to the new data, and no new node -is inserted. +is inserted. If btr_search_enabled is set to FALSE, we will only allow +updating existing nodes, but no new node is allowed to be added. @return TRUE if succeed, FALSE if no more memory could be allocated */ UNIV_INTERN ibool @@ -174,6 +173,7 @@ ha_insert_for_fold_func( prev_block->n_pointers--; block->n_pointers++; } + ut_ad(!btr_search_fully_disabled); # endif /* !UNIV_HOTBACKUP */ prev_node->block = block; @@ -186,6 +186,13 @@ ha_insert_for_fold_func( prev_node = prev_node->next; } + /* We are in the process of disabling hash index, do not add + new chain node */ + if (!btr_search_enabled) { + ut_ad(!btr_search_fully_disabled); + return(TRUE); + } + /* We have to allocate a new chain node */ node = mem_heap_alloc(hash_get_heap(table, fold), sizeof(ha_node_t)); diff --git a/storage/xtradb/ha/hash0hash.c b/storage/xtradb/ha/hash0hash.c index 70516deb005..0f4fc55d895 100644 --- a/storage/xtradb/ha/hash0hash.c +++ b/storage/xtradb/ha/hash0hash.c @@ -161,7 +161,7 @@ hash_create_init( offset = (sizeof(hash_table_t) + 7) / 8; offset *= 8; - table->array = (hash_cell_t*)(((char*)table) + offset); + table->array = (hash_cell_t*)(((byte*)table) + offset); table->n_cells = prime; # if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG table->adaptive = FALSE; @@ -187,7 +187,7 @@ hash_create_reuse( offset = (sizeof(hash_table_t) + 7) / 8; offset *= 8; - table->array = (hash_cell_t*)(((char*)table) + offset); + table->array = (hash_cell_t*)(((byte*)table) + offset); ut_ad(table->magic_n == HASH_TABLE_MAGIC_N); } diff --git a/storage/xtradb/handler/ha_innodb.cc b/storage/xtradb/handler/ha_innodb.cc index 3fcd5106386..9e634a07ad4 100644 --- a/storage/xtradb/handler/ha_innodb.cc +++ b/storage/xtradb/handler/ha_innodb.cc @@ -194,6 +194,7 @@ static my_bool innobase_rollback_on_timeout = FALSE; static my_bool innobase_create_status_file = FALSE; static my_bool innobase_stats_on_metadata = TRUE; static my_bool innobase_use_sys_stats_table = FALSE; +static my_bool innobase_buffer_pool_shm_checksum = TRUE; static char* internal_innobase_data_file_path = NULL; @@ -2426,6 +2427,7 @@ innobase_change_buffering_inited_ok: srv_use_doublewrite_buf = (ibool) innobase_use_doublewrite; srv_use_checksums = (ibool) innobase_use_checksums; srv_fast_checksum = (ibool) innobase_fast_checksum; + srv_buffer_pool_shm_checksum = (ibool) innobase_buffer_pool_shm_checksum; #ifdef HAVE_LARGE_PAGES if ((os_use_large_pages = (ibool) my_use_large_pages)) @@ -2562,6 +2564,7 @@ skip_overwrite: /* Get the current high water mark format. */ innobase_file_format_check = (char*) trx_sys_file_format_max_get(); + btr_search_fully_disabled = (!btr_search_enabled); DBUG_RETURN(FALSE); error: DBUG_RETURN(TRUE); @@ -7962,7 +7965,7 @@ ha_innobase::info( /* In sql_show we call with this flag: update then statistics so that they are up-to-date */ - if (srv_use_sys_stats_table + if (srv_use_sys_stats_table && !((ib_table->flags >> DICT_TF2_SHIFT) & DICT_TF2_TEMPORARY) && thd_sql_command(user_thd) == SQLCOM_ANALYZE) { /* If the indexes on the table don't have enough rows in SYS_STATS system table, */ /* they need to be created. */ @@ -8058,17 +8061,17 @@ ha_innobase::info( are asked by MySQL to avoid locking. Another reason to avoid the call is that it uses quite a lot of CPU. See Bug#38185. */ - if (flag & HA_STATUS_NO_LOCK) { - /* We do not update delete_length if no - locking is requested so the "old" value can - remain. delete_length is initialized to 0 in - the ha_statistics' constructor. */ - } else if (UNIV_UNLIKELY - (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE)) { - /* Avoid accessing the tablespace if - innodb_crash_recovery is set to a high value. */ - stats.delete_length = 0; - } else if (srv_stats_update_need_lock) { + if (flag & HA_STATUS_NO_LOCK) { + /* We do not update delete_length if no + locking is requested so the "old" value can + remain. delete_length is initialized to 0 in + the ha_statistics' constructor. */ + } else if (UNIV_UNLIKELY + (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE)) { + /* Avoid accessing the tablespace if + innodb_crash_recovery is set to a high value. */ + stats.delete_length = 0; + } else if (srv_stats_update_need_lock) { /* lock the data dictionary to avoid races with ibd_file_missing and tablespace_discarded */ @@ -11382,9 +11385,14 @@ static MYSQL_SYSVAR_LONGLONG(buffer_pool_size, innobase_buffer_pool_size, static MYSQL_SYSVAR_UINT(buffer_pool_shm_key, srv_buffer_pool_shm_key, PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "[experimental] The key value of shared memory segment for the buffer pool. 0 means disable the feature (default).", + "[experimental] The key value of shared memory segment for the buffer pool. 0 (default) disables the feature.", NULL, NULL, 0, 0, INT_MAX32, 0); +static MYSQL_SYSVAR_BOOL(buffer_pool_shm_checksum, innobase_buffer_pool_shm_checksum, + PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY, + "Enable buffer_pool_shm checksum validation (enabled by default).", + NULL, NULL, TRUE); + static MYSQL_SYSVAR_ULONG(commit_concurrency, innobase_commit_concurrency, PLUGIN_VAR_RQCMDARG, "Helps in performance tuning in heavily concurrent environments.", @@ -11631,6 +11639,12 @@ static MYSQL_SYSVAR_ULONG(dict_size_limit, srv_dict_size_limit, "Limit the allocated memory for dictionary cache. (0: unlimited)", NULL, NULL, 0, 0, LONG_MAX, 0); +static MYSQL_SYSVAR_UINT(auto_lru_dump, srv_auto_lru_dump, + PLUGIN_VAR_RQCMDARG, + "Time in seconds between automatic buffer pool dumps. " + "0 (the default) disables automatic dumps.", + NULL, NULL, 0, 0, UINT_MAX32, 0); + static MYSQL_SYSVAR_ULINT(pass_corrupt_table, srv_pass_corrupt_table, PLUGIN_VAR_RQCMDARG, "Pass corruptions of user tables as 'corrupt table' instead of not crashing itself, " @@ -11645,6 +11659,7 @@ static struct st_mysql_sys_var* innobase_system_variables[]= { MYSQL_SYSVAR(autoextend_increment), MYSQL_SYSVAR(buffer_pool_size), MYSQL_SYSVAR(buffer_pool_shm_key), + MYSQL_SYSVAR(buffer_pool_shm_checksum), MYSQL_SYSVAR(checksums), MYSQL_SYSVAR(fast_checksum), MYSQL_SYSVAR(commit_concurrency), @@ -11722,6 +11737,7 @@ static struct st_mysql_sys_var* innobase_system_variables[]= { MYSQL_SYSVAR(change_buffering), MYSQL_SYSVAR(read_ahead_threshold), MYSQL_SYSVAR(io_capacity), + MYSQL_SYSVAR(auto_lru_dump), MYSQL_SYSVAR(use_purge_thread), MYSQL_SYSVAR(pass_corrupt_table), NULL diff --git a/storage/xtradb/include/btr0cur.h b/storage/xtradb/include/btr0cur.h index 40d0981990a..e151fdcb563 100644 --- a/storage/xtradb/include/btr0cur.h +++ b/storage/xtradb/include/btr0cur.h @@ -571,7 +571,7 @@ btr_copy_externally_stored_field_prefix( ulint local_len);/*!< in: length of data, in bytes */ /*******************************************************************//** Copies an externally stored field of a record to mem heap. -@return the field copied to heap */ +@return the field copied to heap, or NULL if the field is incomplete */ UNIV_INTERN byte* btr_rec_copy_externally_stored_field( diff --git a/storage/xtradb/include/btr0sea.h b/storage/xtradb/include/btr0sea.h index e5f9e129e9b..f6d194319ae 100644 --- a/storage/xtradb/include/btr0sea.h +++ b/storage/xtradb/include/btr0sea.h @@ -201,7 +201,13 @@ btr_search_validate(void); /** Flag: has the search system been enabled? Protected by btr_search_latch and btr_search_enabled_mutex. */ -extern char btr_search_enabled; +extern char btr_search_enabled; + +/** Flag: whether the search system has completed its disabling process, +It is set to TRUE right after buf_pool_drop_hash_index() in +btr_search_disable(), indicating hash index entries are cleaned up. +Protected by btr_search_latch and btr_search_enabled_mutex. */ +extern ibool btr_search_fully_disabled; /** The search info struct in an index */ struct btr_search_struct{ diff --git a/storage/xtradb/include/buf0buf.h b/storage/xtradb/include/buf0buf.h index a7854e3038d..e06927f42f0 100644 --- a/storage/xtradb/include/buf0buf.h +++ b/storage/xtradb/include/buf0buf.h @@ -1305,7 +1305,7 @@ Compute the hash fold value for blocks in buf_pool->zip_hash. */ /* the fold should be relative when srv_buffer_pool_shm_key is enabled */ #define BUF_POOL_ZIP_FOLD_PTR(ptr) (!srv_buffer_pool_shm_key\ ?((ulint) (ptr) / UNIV_PAGE_SIZE)\ - :((ulint) ((char*)ptr - (char*)(buf_pool->chunks->blocks->frame)) / UNIV_PAGE_SIZE)) + :((ulint) ((byte*)ptr - (byte*)(buf_pool->chunks->blocks->frame)) / UNIV_PAGE_SIZE)) #define BUF_POOL_ZIP_FOLD(b) BUF_POOL_ZIP_FOLD_PTR((b)->frame) #define BUF_POOL_ZIP_FOLD_BPAGE(b) BUF_POOL_ZIP_FOLD((buf_block_t*) (b)) /* @} */ diff --git a/storage/xtradb/include/hash0hash.h b/storage/xtradb/include/hash0hash.h index 9dc20cc057f..492c767acc4 100644 --- a/storage/xtradb/include/hash0hash.h +++ b/storage/xtradb/include/hash0hash.h @@ -363,13 +363,13 @@ do {\ NODE_TYPE* node2222;\ \ if ((TABLE)->array[i2222].node) \ - (TABLE)->array[i2222].node = (void*)((char*)(TABLE)->array[i2222].node \ + (TABLE)->array[i2222].node = (void*)((byte*)(TABLE)->array[i2222].node \ + (((TABLE)->array[i2222].node > (void*)FADDR)?FOFFSET:BOFFSET));\ node2222 = HASH_GET_FIRST((TABLE), i2222);\ \ while (node2222) {\ if (node2222->PTR_NAME) \ - node2222->PTR_NAME = (void*)((char*)node2222->PTR_NAME \ + node2222->PTR_NAME = (void*)((byte*)(node2222->PTR_NAME) \ + ((((void*)node2222->PTR_NAME) > (void*)FADDR)?FOFFSET:BOFFSET));\ \ node2222 = node2222->PTR_NAME;\ diff --git a/storage/xtradb/include/mem0pool.h b/storage/xtradb/include/mem0pool.h index 5e93bf88a47..fa8be296ec9 100644 --- a/storage/xtradb/include/mem0pool.h +++ b/storage/xtradb/include/mem0pool.h @@ -100,18 +100,6 @@ mem_pool_get_reserved( /*==================*/ mem_pool_t* pool); /*!< in: memory pool */ /********************************************************************//** -Reserves the mem pool mutex. */ -UNIV_INTERN -void -mem_pool_mutex_enter(void); -/*======================*/ -/********************************************************************//** -Releases the mem pool mutex. */ -UNIV_INTERN -void -mem_pool_mutex_exit(void); -/*=====================*/ -/********************************************************************//** Validates a memory pool. @return TRUE if ok */ UNIV_INTERN diff --git a/storage/xtradb/include/row0mysql.h b/storage/xtradb/include/row0mysql.h index a604f6e3724..f8fab59ef80 100644 --- a/storage/xtradb/include/row0mysql.h +++ b/storage/xtradb/include/row0mysql.h @@ -630,7 +630,11 @@ struct row_prebuilt_struct { the secondary index, then this is set to TRUE */ unsigned templ_contains_blob:1;/*!< TRUE if the template contains - BLOB column(s) */ + a column with DATA_BLOB == + get_innobase_type_from_mysql_type(); + not to be confused with InnoDB + externally stored columns + (VARCHAR can be off-page too) */ mysql_row_templ_t* mysql_template;/*!< template used to transform rows fast between MySQL and Innobase formats; memory for this template diff --git a/storage/xtradb/include/srv0srv.h b/storage/xtradb/include/srv0srv.h index 0904a5da1eb..dc455581350 100644 --- a/storage/xtradb/include/srv0srv.h +++ b/storage/xtradb/include/srv0srv.h @@ -157,6 +157,8 @@ extern ulint srv_mem_pool_size; extern ulint srv_lock_table_size; extern uint srv_buffer_pool_shm_key; +extern ibool srv_buffer_pool_shm_is_reused; +extern ibool srv_buffer_pool_shm_checksum; extern ibool srv_thread_concurrency_timer_based; @@ -340,6 +342,9 @@ extern ulint srv_buf_pool_flushed; reading of a disk page */ extern ulint srv_buf_pool_reads; +/** Time in seconds between automatic buffer pool dumps */ +extern uint srv_auto_lru_dump; + /** Status variables to be passed to MySQL */ typedef struct export_var_struct export_struc; @@ -608,6 +613,16 @@ srv_error_monitor_thread( /*=====================*/ void* arg); /*!< in: a dummy parameter required by os_thread_create */ +/*********************************************************************//** +A thread which restores the buffer pool from a dump file on startup and does +periodic buffer pool dumps. +@return a dummy parameter */ +UNIV_INTERN +os_thread_ret_t +srv_LRU_dump_restore_thread( +/*====================*/ + void* arg); /*!< in: a dummy parameter required by + os_thread_create */ /******************************************************************//** Outputs to a file the output of the InnoDB Monitor. @return FALSE if not all information printed diff --git a/storage/xtradb/include/univ.i b/storage/xtradb/include/univ.i index 64df60ad6d1..8691e3cf337 100644 --- a/storage/xtradb/include/univ.i +++ b/storage/xtradb/include/univ.i @@ -46,8 +46,8 @@ Created 1/20/1994 Heikki Tuuri #define INNODB_VERSION_MAJOR 1 #define INNODB_VERSION_MINOR 0 -#define INNODB_VERSION_BUGFIX 11 -#define PERCONA_INNODB_VERSION 12.0 +#define INNODB_VERSION_BUGFIX 12 +#define PERCONA_INNODB_VERSION 12.1 /* The following is the InnoDB version as shown in SELECT plugin_version FROM information_schema.plugins; diff --git a/storage/xtradb/include/ut0lst.h b/storage/xtradb/include/ut0lst.h index 4305f6571b5..245dfc226c3 100644 --- a/storage/xtradb/include/ut0lst.h +++ b/storage/xtradb/include/ut0lst.h @@ -269,10 +269,10 @@ do { \ TYPE* ut_list_node_313; \ \ if ((BASE).start) \ - (BASE).start = (void*)((char*)((BASE).start) \ + (BASE).start = (void*)((byte*)((BASE).start) \ + (((void*)((BASE).start) > (void*)FADDR)?FOFFSET:BOFFSET));\ if ((BASE).end) \ - (BASE).end = (void*)((char*)((BASE).end) \ + (BASE).end = (void*)((byte*)((BASE).end) \ + (((void*)((BASE).end) > (void*)FADDR)?FOFFSET:BOFFSET));\ \ ut_list_node_313 = (BASE).start; \ @@ -280,10 +280,10 @@ do { \ for (ut_list_i_313 = (BASE).count; ut_list_i_313--; ) { \ ut_a(ut_list_node_313); \ if ((ut_list_node_313->NAME).prev) \ - (ut_list_node_313->NAME).prev = (void*)((char*)((ut_list_node_313->NAME).prev) \ + (ut_list_node_313->NAME).prev = (void*)((byte*)((ut_list_node_313->NAME).prev)\ + (((void*)((ut_list_node_313->NAME).prev) > (void*)FADDR)?FOFFSET:BOFFSET));\ if ((ut_list_node_313->NAME).next) \ - (ut_list_node_313->NAME).next = (void *)((char*)((ut_list_node_313->NAME).next) \ + (ut_list_node_313->NAME).next = (void*)((byte*)((ut_list_node_313->NAME).next)\ + (((void*)((ut_list_node_313->NAME).next)> (void*)FADDR)?FOFFSET:BOFFSET));\ ut_list_node_313 = (ut_list_node_313->NAME).next; \ } \ diff --git a/storage/xtradb/lock/lock0lock.c b/storage/xtradb/lock/lock0lock.c index 7ec4a53e0ea..1ded67d9147 100644 --- a/storage/xtradb/lock/lock0lock.c +++ b/storage/xtradb/lock/lock0lock.c @@ -4606,7 +4606,7 @@ print_rec: nth_lock++; if (nth_lock >= srv_show_locks_held) { - fputs("TOO LOCKS PRINTED FOR THIS TRX:" + fputs("TOO MANY LOCKS PRINTED FOR THIS TRX:" " SUPPRESSING FURTHER PRINTS\n", file); diff --git a/storage/xtradb/log/log0recv.c b/storage/xtradb/log/log0recv.c index bbb634addb0..200b3b088a7 100644 --- a/storage/xtradb/log/log0recv.c +++ b/storage/xtradb/log/log0recv.c @@ -2901,6 +2901,7 @@ recv_init_crash_recovery(void) /*==========================*/ { ut_a(!recv_needed_recovery); + ut_a(!srv_buffer_pool_shm_is_reused); recv_needed_recovery = TRUE; diff --git a/storage/xtradb/mem/mem0mem.c b/storage/xtradb/mem/mem0mem.c index c0ce8a3e1ac..1dd4db30841 100644 --- a/storage/xtradb/mem/mem0mem.c +++ b/storage/xtradb/mem/mem0mem.c @@ -367,7 +367,7 @@ mem_heap_create_block( block->line = line; #ifdef MEM_PERIODIC_CHECK - mem_pool_mutex_enter(); + mutex_enter(&(mem_comm_pool->mutex)); if (!mem_block_list_inited) { mem_block_list_inited = TRUE; @@ -376,7 +376,7 @@ mem_heap_create_block( UT_LIST_ADD_LAST(mem_block_list, mem_block_list, block); - mem_pool_mutex_exit(); + mutex_exit(&(mem_comm_pool->mutex)); #endif mem_block_set_len(block, len); mem_block_set_type(block, type); @@ -479,11 +479,11 @@ mem_heap_block_free( UT_LIST_REMOVE(list, heap->base, block); #ifdef MEM_PERIODIC_CHECK - mem_pool_mutex_enter(); + mutex_enter(&(mem_comm_pool->mutex)); UT_LIST_REMOVE(mem_block_list, mem_block_list, block); - mem_pool_mutex_exit(); + mutex_exit(&(mem_comm_pool->mutex)); #endif ut_ad(heap->total_size >= block->len); @@ -556,7 +556,7 @@ mem_validate_all_blocks(void) { mem_block_t* block; - mem_pool_mutex_enter(); + mutex_enter(&(mem_comm_pool->mutex)); block = UT_LIST_GET_FIRST(mem_block_list); @@ -568,6 +568,6 @@ mem_validate_all_blocks(void) block = UT_LIST_GET_NEXT(mem_block_list, block); } - mem_pool_mutex_exit(); + mutex_exit(&(mem_comm_pool->mutex)); } #endif diff --git a/storage/xtradb/mem/mem0pool.c b/storage/xtradb/mem/mem0pool.c index c4f8af607e0..3291453eeb5 100644 --- a/storage/xtradb/mem/mem0pool.c +++ b/storage/xtradb/mem/mem0pool.c @@ -34,6 +34,7 @@ Created 5/12/1997 Heikki Tuuri #include "ut0lst.h" #include "ut0byte.h" #include "mem0mem.h" +#include "srv0start.h" /* We would like to use also the buffer frames to allocate memory. This would be desirable, because then the memory consumption of the database @@ -121,23 +122,33 @@ mysql@lists.mysql.com */ UNIV_INTERN ulint mem_n_threads_inside = 0; /********************************************************************//** -Reserves the mem pool mutex. */ -UNIV_INTERN +Reserves the mem pool mutex if we are not in server shutdown. Use +this function only in memory free functions, since only memory +free functions are used during server shutdown. */ +UNIV_INLINE void -mem_pool_mutex_enter(void) -/*======================*/ +mem_pool_mutex_enter( +/*=================*/ + mem_pool_t* pool) /*!< in: memory pool */ { - mutex_enter(&(mem_comm_pool->mutex)); + if (srv_shutdown_state < SRV_SHUTDOWN_EXIT_THREADS) { + mutex_enter(&(pool->mutex)); + } } /********************************************************************//** -Releases the mem pool mutex. */ -UNIV_INTERN +Releases the mem pool mutex if we are not in server shutdown. As +its corresponding mem_pool_mutex_enter() function, use it only +in memory free functions */ +UNIV_INLINE void -mem_pool_mutex_exit(void) -/*=====================*/ +mem_pool_mutex_exit( +/*================*/ + mem_pool_t* pool) /*!< in: memory pool */ { - mutex_exit(&(mem_comm_pool->mutex)); + if (srv_shutdown_state < SRV_SHUTDOWN_EXIT_THREADS) { + mutex_exit(&(pool->mutex)); + } } /********************************************************************//** @@ -567,7 +578,7 @@ mem_area_free( n = ut_2_log(size); - mutex_enter(&(pool->mutex)); + mem_pool_mutex_enter(pool); mem_n_threads_inside++; ut_a(mem_n_threads_inside == 1); @@ -595,7 +606,7 @@ mem_area_free( pool->reserved += ut_2_exp(n); mem_n_threads_inside--; - mutex_exit(&(pool->mutex)); + mem_pool_mutex_exit(pool); mem_area_free(new_ptr, pool); @@ -611,7 +622,7 @@ mem_area_free( } mem_n_threads_inside--; - mutex_exit(&(pool->mutex)); + mem_pool_mutex_exit(pool); ut_ad(mem_pool_validate(pool)); } @@ -630,7 +641,7 @@ mem_pool_validate( ulint free; ulint i; - mutex_enter(&(pool->mutex)); + mem_pool_mutex_enter(pool); free = 0; @@ -658,7 +669,7 @@ mem_pool_validate( ut_a(free + pool->reserved == pool->size); - mutex_exit(&(pool->mutex)); + mem_pool_mutex_exit(pool); return(TRUE); } diff --git a/storage/xtradb/os/os0proc.c b/storage/xtradb/os/os0proc.c index c101db3d179..4567d96b6f4 100644 --- a/storage/xtradb/os/os0proc.c +++ b/storage/xtradb/os/os0proc.c @@ -246,12 +246,10 @@ os_shm_alloc( #if defined HAVE_SYS_IPC_H && HAVE_SYS_SHM_H ulint size; int shmid; -#endif *is_new = FALSE; -#if defined HAVE_SYS_IPC_H && HAVE_SYS_SHM_H fprintf(stderr, - "InnoDB: The shared memory key %#x (%d) is specified.\n", + "InnoDB: The shared memory segment containing the buffer pool is: key %#x (%d).\n", key, key); # if defined HAVE_LARGE_PAGES && defined UNIV_LINUX if (!os_use_large_pages || !os_large_page_size) { @@ -268,12 +266,12 @@ os_shm_alloc( if (shmid < 0) { if (errno == EEXIST) { fprintf(stderr, - "InnoDB: HugeTLB: The shared memory segment seems to exist already.\n"); + "InnoDB: HugeTLB: The shared memory segment exists.\n"); shmid = shmget((key_t)key, (size_t)size, SHM_HUGETLB | SHM_R | SHM_W); if (shmid < 0) { fprintf(stderr, - "InnoDB: HugeTLB: Warning: Failed to allocate %lu bytes.(reuse) errno %d\n", + "InnoDB: HugeTLB: Warning: Failed to allocate %lu bytes. (reuse) errno %d\n", size, errno); goto skip; } else { @@ -282,14 +280,14 @@ os_shm_alloc( } } else { fprintf(stderr, - "InnoDB: HugeTLB: Warning: Failed to allocate %lu bytes.(new) errno %d\n", + "InnoDB: HugeTLB: Warning: Failed to allocate %lu bytes. (new) errno %d\n", size, errno); goto skip; } } else { *is_new = TRUE; fprintf(stderr, - "InnoDB: HugeTLB: The new shared memory segment is created.\n"); + "InnoDB: HugeTLB: A new shared memory segment has been created .\n"); } ptr = shmat(shmid, NULL, 0); @@ -325,12 +323,12 @@ skip: if (shmid < 0) { if (errno == EEXIST) { fprintf(stderr, - "InnoDB: The shared memory segment seems to exist already.\n"); + "InnoDB: A shared memory segment containing the buffer pool seems to already exist.\n"); shmid = shmget((key_t)key, (size_t)size, SHM_R | SHM_W); if (shmid < 0) { fprintf(stderr, - "InnoDB: Warning: Failed to allocate %lu bytes.(reuse) errno %d\n", + "InnoDB: Warning: Failed to allocate %lu bytes. (reuse) errno %d\n", size, errno); ptr = NULL; goto end; @@ -340,7 +338,7 @@ skip: } } else { fprintf(stderr, - "InnoDB: Warning: Failed to allocate %lu bytes.(new) errno %d\n", + "InnoDB: Warning: Failed to allocate %lu bytes. (new) errno %d\n", size, errno); ptr = NULL; goto end; @@ -348,7 +346,7 @@ skip: } else { *is_new = TRUE; fprintf(stderr, - "InnoDB: The new shared memory segment is created.\n"); + "InnoDB: A new shared memory segment has been created.\n"); } ptr = shmat(shmid, NULL, 0); diff --git a/storage/xtradb/row/row0merge.c b/storage/xtradb/row/row0merge.c index 1b328aed04c..65102851bdf 100644 --- a/storage/xtradb/row/row0merge.c +++ b/storage/xtradb/row/row0merge.c @@ -1787,6 +1787,11 @@ row_merge_copy_blobs( (below). */ data = btr_rec_copy_externally_stored_field( mrec, offsets, zip_size, i, &len, heap); + /* Because we have locked the table, any records + written by incomplete transactions must have been + rolled back already. There must not be any incomplete + BLOB columns. */ + ut_a(data); dfield_set_data(field, data, len); } diff --git a/storage/xtradb/row/row0row.c b/storage/xtradb/row/row0row.c index cb7dfa2b7c9..8e806a14a98 100644 --- a/storage/xtradb/row/row0row.c +++ b/storage/xtradb/row/row0row.c @@ -294,7 +294,13 @@ row_build( ut_ad(dtuple_check_typed(row)); - if (j) { + if (!ext) { + /* REDUNDANT and COMPACT formats store a local + 768-byte prefix of each externally stored + column. No cache is needed. */ + ut_ad(dict_table_get_format(index->table) + < DICT_TF_FORMAT_ZIP); + } else if (j) { *ext = row_ext_create(j, ext_cols, row, dict_table_zip_size(index->table), heap); diff --git a/storage/xtradb/row/row0sel.c b/storage/xtradb/row/row0sel.c index 01e75301e96..a1511e35435 100644 --- a/storage/xtradb/row/row0sel.c +++ b/storage/xtradb/row/row0sel.c @@ -416,7 +416,7 @@ row_sel_fetch_columns( field_no))) { /* Copy an externally stored field to the - temporary heap */ + temporary heap, if possible. */ heap = mem_heap_create(1); @@ -425,6 +425,17 @@ row_sel_fetch_columns( dict_table_zip_size(index->table), field_no, &len, heap); + /* data == NULL means that the + externally stored field was not + written yet. This record + should only be seen by + recv_recovery_rollback_active() or any + TRX_ISO_READ_UNCOMMITTED + transactions. The InnoDB SQL parser + (the sole caller of this function) + does not implement READ UNCOMMITTED, + and it is not involved during rollback. */ + ut_a(data); ut_a(len != UNIV_SQL_NULL); needs_copy = TRUE; @@ -926,6 +937,7 @@ row_sel_get_clust_rec( when plan->clust_pcur was positioned. The latch will not be released until mtr_commit(mtr). */ + ut_ad(!rec_get_deleted_flag(clust_rec, rec_offs_comp(offsets))); row_sel_fetch_columns(index, clust_rec, offsets, UT_LIST_GET_FIRST(plan->columns)); *out_rec = clust_rec; @@ -1628,6 +1640,13 @@ skip_lock: } if (old_vers == NULL) { + /* The record does not exist + in our read view. Skip it, but + first attempt to determine + whether the index segment we + are searching through has been + exhausted. */ + offsets = rec_get_offsets( rec, index, offsets, ULINT_UNDEFINED, &heap); @@ -2647,9 +2666,8 @@ Convert a row in the Innobase format to a row in the MySQL format. Note that the template in prebuilt may advise us to copy only a few columns to mysql_rec, other columns are left blank. All columns may not be needed in the query. -@return TRUE if success, FALSE if could not allocate memory for a BLOB -(though we may also assert in that case) */ -static +@return TRUE on success, FALSE if not all columns could be retrieved */ +static __attribute__((warn_unused_result)) ibool row_sel_store_mysql_rec( /*====================*/ @@ -2719,6 +2737,21 @@ row_sel_store_mysql_rec( dict_table_zip_size(prebuilt->table), templ->rec_field_no, &len, heap); + if (UNIV_UNLIKELY(!data)) { + /* The externally stored field + was not written yet. This + record should only be seen by + recv_recovery_rollback_active() + or any TRX_ISO_READ_UNCOMMITTED + transactions. */ + + if (extern_field_heap) { + mem_heap_free(extern_field_heap); + } + + return(FALSE); + } + ut_a(len != UNIV_SQL_NULL); } else { /* Field is stored in the row. */ @@ -3136,9 +3169,10 @@ row_sel_pop_cached_row_for_mysql( } /********************************************************************//** -Pushes a row for MySQL to the fetch cache. */ -UNIV_INLINE -void +Pushes a row for MySQL to the fetch cache. +@return TRUE on success, FALSE if the record contains incomplete BLOBs */ +UNIV_INLINE __attribute__((warn_unused_result)) +ibool row_sel_push_cache_row_for_mysql( /*=============================*/ row_prebuilt_t* prebuilt, /*!< in: prebuilt struct */ @@ -3180,10 +3214,11 @@ row_sel_push_cache_row_for_mysql( prebuilt->fetch_cache[ prebuilt->n_fetch_cached], prebuilt, rec, offsets))) { - ut_error; + return(FALSE); } prebuilt->n_fetch_cached++; + return(TRUE); } /*********************************************************************//** @@ -3576,15 +3611,25 @@ row_search_for_mysql( row_sel_try_search_shortcut_for_mysql(). The latch will not be released until mtr_commit(&mtr). */ - ut_ad(!rec_get_deleted_flag(rec, comp)); + ut_ad(!rec_get_deleted_flag(rec, comp)); if (!row_sel_store_mysql_rec(buf, prebuilt, rec, offsets)) { - err = DB_TOO_BIG_RECORD; - - /* We let the main loop to do the - error handling */ - goto shortcut_fails_too_big_rec; + /* Only fresh inserts may contain + incomplete externally stored + columns. Pretend that such + records do not exist. Such + records may only be accessed + at the READ UNCOMMITTED + isolation level or when + rolling back a recovered + transaction. Rollback happens + at a lower level, not here. */ + ut_a(trx->isolation_level + == TRX_ISO_READ_UNCOMMITTED); + + /* Proceed as in case SEL_RETRY. */ + break; } mtr_commit(&mtr); @@ -3624,7 +3669,7 @@ release_search_latch_if_needed: default: ut_ad(0); } -shortcut_fails_too_big_rec: + mtr_commit(&mtr); mtr_start(&mtr); } @@ -4218,7 +4263,7 @@ no_gap_lock: rec = old_vers; } - } else { + } else { /* We are looking into a non-clustered index, and to get the right version of the record we have to look also into the clustered index: this @@ -4226,13 +4271,12 @@ no_gap_lock: information via the clustered index record. */ ut_ad(index != clust_index); + ut_ad(!dict_index_is_clust(index)); - ut_ad(!dict_index_is_clust(index)); - - if (!lock_sec_rec_cons_read_sees( - rec, trx->read_view)) { - goto requires_clust_rec; - } + if (!lock_sec_rec_cons_read_sees( + rec, trx->read_view)) { + goto requires_clust_rec; + } } } @@ -4355,13 +4399,13 @@ requires_clust_rec: ULINT_UNDEFINED, &heap); result_rec = rec; } - - /* result_rec can legitimately be delete-marked - now that it has been established that it points to a - clustered index record that exists in the read view. */ + + /* result_rec can legitimately be delete-marked + now that it has been established that it points to a + clustered index record that exists in the read view. */ } else { result_rec = rec; - ut_ad(!rec_get_deleted_flag(rec, comp)); + ut_ad(!rec_get_deleted_flag(rec, comp)); } /* We found a qualifying record 'result_rec'. At this point, @@ -4393,9 +4437,18 @@ requires_clust_rec: not cache rows because there the cursor is a scrollable cursor. */ - row_sel_push_cache_row_for_mysql(prebuilt, result_rec, - offsets); - if (prebuilt->n_fetch_cached == MYSQL_FETCH_CACHE_SIZE) { + if (!row_sel_push_cache_row_for_mysql(prebuilt, result_rec, + offsets)) { + /* Only fresh inserts may contain incomplete + externally stored columns. Pretend that such + records do not exist. Such records may only be + accessed at the READ UNCOMMITTED isolation + level or when rolling back a recovered + transaction. Rollback happens at a lower + level, not here. */ + ut_a(trx->isolation_level == TRX_ISO_READ_UNCOMMITTED); + } else if (prebuilt->n_fetch_cached + == MYSQL_FETCH_CACHE_SIZE) { goto got_row; } @@ -4411,9 +4464,17 @@ requires_clust_rec: } else { if (!row_sel_store_mysql_rec(buf, prebuilt, result_rec, offsets)) { - err = DB_TOO_BIG_RECORD; - - goto lock_wait_or_error; + /* Only fresh inserts may contain + incomplete externally stored + columns. Pretend that such records do + not exist. Such records may only be + accessed at the READ UNCOMMITTED + isolation level or when rolling back a + recovered transaction. Rollback + happens at a lower level, not here. */ + ut_a(trx->isolation_level + == TRX_ISO_READ_UNCOMMITTED); + goto next_rec; } } diff --git a/storage/xtradb/row/row0undo.c b/storage/xtradb/row/row0undo.c index 9ef842b5114..fd28a4f6520 100644 --- a/storage/xtradb/row/row0undo.c +++ b/storage/xtradb/row/row0undo.c @@ -199,8 +199,24 @@ row_undo_search_clust_to_pcur( ret = FALSE; } else { + row_ext_t** ext; + + if (dict_table_get_format(node->table) >= DICT_TF_FORMAT_ZIP) { + /* In DYNAMIC or COMPRESSED format, there is + no prefix of externally stored columns in the + clustered index record. Build a cache of + column prefixes. */ + ext = &node->ext; + } else { + /* REDUNDANT and COMPACT formats store a local + 768-byte prefix of each externally stored + column. No cache is needed. */ + ext = NULL; + node->ext = NULL; + } + node->row = row_build(ROW_COPY_DATA, clust_index, rec, - offsets, NULL, &node->ext, node->heap); + offsets, NULL, ext, node->heap); if (node->update) { node->undo_row = dtuple_copy(node->row, node->heap); row_upd_replace(node->undo_row, &node->undo_ext, diff --git a/storage/xtradb/row/row0upd.c b/storage/xtradb/row/row0upd.c index c1f2bf9ae4a..04c3139fcc7 100644 --- a/storage/xtradb/row/row0upd.c +++ b/storage/xtradb/row/row0upd.c @@ -1398,6 +1398,7 @@ row_upd_store_row( dict_index_t* clust_index; rec_t* rec; mem_heap_t* heap = NULL; + row_ext_t** ext; ulint offsets_[REC_OFFS_NORMAL_SIZE]; const ulint* offsets; rec_offs_init(offsets_); @@ -1414,8 +1415,22 @@ row_upd_store_row( offsets = rec_get_offsets(rec, clust_index, offsets_, ULINT_UNDEFINED, &heap); + + if (dict_table_get_format(node->table) >= DICT_TF_FORMAT_ZIP) { + /* In DYNAMIC or COMPRESSED format, there is no prefix + of externally stored columns in the clustered index + record. Build a cache of column prefixes. */ + ext = &node->ext; + } else { + /* REDUNDANT and COMPACT formats store a local + 768-byte prefix of each externally stored column. + No cache is needed. */ + ext = NULL; + node->ext = NULL; + } + node->row = row_build(ROW_COPY_DATA, clust_index, rec, offsets, - NULL, &node->ext, node->heap); + NULL, ext, node->heap); if (node->is_delete) { node->upd_row = NULL; node->upd_ext = NULL; diff --git a/storage/xtradb/srv/srv0srv.c b/storage/xtradb/srv/srv0srv.c index b9905116603..c1d0f255c64 100644 --- a/storage/xtradb/srv/srv0srv.c +++ b/storage/xtradb/srv/srv0srv.c @@ -213,6 +213,8 @@ UNIV_INTERN ulint srv_lock_table_size = ULINT_MAX; /* key value for shm */ UNIV_INTERN uint srv_buffer_pool_shm_key = 0; +UNIV_INTERN ibool srv_buffer_pool_shm_is_reused = FALSE; +UNIV_INTERN ibool srv_buffer_pool_shm_checksum = TRUE; /* This parameter is deprecated. Use srv_n_io_[read|write]_threads instead. */ @@ -307,6 +309,9 @@ UNIV_INTERN ulint srv_buf_pool_flushed = 0; reading of a disk page */ UNIV_INTERN ulint srv_buf_pool_reads = 0; +/** Time in seconds between automatic buffer pool dumps */ +UNIV_INTERN uint srv_auto_lru_dump = 0; + /* structure to pass status variables to MySQL */ UNIV_INTERN export_struc export_vars; @@ -2555,6 +2560,56 @@ loop: OS_THREAD_DUMMY_RETURN; } +/*********************************************************************//** +A thread which restores the buffer pool from a dump file on startup and does +periodic buffer pool dumps. +@return a dummy parameter */ +UNIV_INTERN +os_thread_ret_t +srv_LRU_dump_restore_thread( +/*====================*/ + void* arg __attribute__((unused))) + /*!< in: a dummy parameter required by + os_thread_create */ +{ + uint auto_lru_dump; + time_t last_dump_time; + time_t time_elapsed; + +#ifdef UNIV_DEBUG_THREAD_CREATION + fprintf(stderr, "LRU dump/restore thread starts, id %lu\n", + os_thread_pf(os_thread_get_curr_id())); +#endif + + if (srv_auto_lru_dump) + buf_LRU_file_restore(); + + last_dump_time = time(NULL); + +loop: + os_thread_sleep(5000000); + + if (srv_shutdown_state >= SRV_SHUTDOWN_CLEANUP) { + goto exit_func; + } + + time_elapsed = time(NULL) - last_dump_time; + auto_lru_dump = srv_auto_lru_dump; + if (auto_lru_dump > 0 && (time_t) auto_lru_dump < time_elapsed) { + last_dump_time = time(NULL); + buf_LRU_file_dump(); + } + + goto loop; +exit_func: + /* We count the number of threads in os_thread_exit(). A created + thread should always use that to exit and not use return() to exit. */ + + os_thread_exit(NULL); + + OS_THREAD_DUMMY_RETURN; +} + /*******************************************************************//** Tells the InnoDB server that there has been activity in the database and wakes up the master thread if it is suspended (not sleeping). Used diff --git a/storage/xtradb/srv/srv0start.c b/storage/xtradb/srv/srv0start.c index 62ffa366f18..b36faf2d2d7 100644 --- a/storage/xtradb/srv/srv0start.c +++ b/storage/xtradb/srv/srv0start.c @@ -126,9 +126,9 @@ static mutex_t ios_mutex; static ulint ios; /** io_handler_thread parameters for thread identification */ -static ulint n[SRV_MAX_N_IO_THREADS + 6 + 64]; +static ulint n[SRV_MAX_N_IO_THREADS + 7 + 64]; /** io_handler_thread identifiers */ -static os_thread_id_t thread_ids[SRV_MAX_N_IO_THREADS + 6 + 64]; +static os_thread_id_t thread_ids[SRV_MAX_N_IO_THREADS + 7 + 64]; /** We use this mutex to test the return value of pthread_mutex_trylock on successful locking. HP-UX does NOT return 0, though Linux et al do. */ @@ -1719,8 +1719,8 @@ innobase_start_or_create_for_mysql(void) Note that this is not as heavy weight as it seems. At this point there will be only ONE page in the buf_LRU and there must be no page in the buf_flush list. */ - /* TODO: treat more correctly */ - if (!srv_buffer_pool_shm_key) + /* buffer_pool_shm should not be reused when recovery was needed. */ + if (!srv_buffer_pool_shm_is_reused) buf_pool_invalidate(); /* We always try to do a recovery, even if the database had @@ -1835,6 +1835,10 @@ innobase_start_or_create_for_mysql(void) os_thread_create(&srv_monitor_thread, NULL, thread_ids + 4 + SRV_MAX_N_IO_THREADS); + /* Create the thread which automaticaly dumps/restore buffer pool */ + os_thread_create(&srv_LRU_dump_restore_thread, NULL, + thread_ids + 5 + SRV_MAX_N_IO_THREADS); + srv_is_being_started = FALSE; if (trx_doublewrite == NULL) { @@ -1859,13 +1863,13 @@ innobase_start_or_create_for_mysql(void) ulint i; os_thread_create(&srv_purge_thread, NULL, thread_ids - + (5 + SRV_MAX_N_IO_THREADS)); + + (6 + SRV_MAX_N_IO_THREADS)); for (i = 0; i < srv_use_purge_thread - 1; i++) { - n[6 + i + SRV_MAX_N_IO_THREADS] = i; /* using as index for arrays in purge_sys */ + n[7 + i + SRV_MAX_N_IO_THREADS] = i; /* using as index for arrays in purge_sys */ os_thread_create(&srv_purge_worker_thread, - n + (6 + i + SRV_MAX_N_IO_THREADS), - thread_ids + (6 + i + SRV_MAX_N_IO_THREADS)); + n + (7 + i + SRV_MAX_N_IO_THREADS), + thread_ids + (7 + i + SRV_MAX_N_IO_THREADS)); } } #ifdef UNIV_DEBUG @@ -2214,6 +2218,10 @@ innobase_shutdown_for_mysql(void) log_mem_free(); buf_pool_free(); mem_close(); + + /* ut_free_all_mem() frees all allocated memory not freed yet + in shutdown, and it will also free the ut_list_mutex, so it + should be the last one for all operation */ ut_free_all_mem(); if (os_thread_count != 0 diff --git a/storage/xtradb/trx/trx0sys.c b/storage/xtradb/trx/trx0sys.c index ad4471ada0b..11581a3f2ae 100644 --- a/storage/xtradb/trx/trx0sys.c +++ b/storage/xtradb/trx/trx0sys.c @@ -541,8 +541,8 @@ start_again: log_make_checkpoint_at(IB_ULONGLONG_MAX, TRUE); fprintf(stderr, "InnoDB: Doublewrite buffer created in the doublewrite file\n"); + trx_sys_multiple_tablespace_format = TRUE; } - trx_doublewrite_buf_is_being_created = FALSE; } } |