summaryrefslogtreecommitdiff
path: root/storage
diff options
context:
space:
mode:
authorunknown <knielsen@knielsen-hq.org>2010-10-19 17:03:26 +0200
committerunknown <knielsen@knielsen-hq.org>2010-10-19 17:03:26 +0200
commit462d14d42bb33aa5e23b5cb301ba28f56b4ca8e5 (patch)
tree1c76d5df039efabd8da14ea421682596d6e5990e /storage
parent745cc74c3301f193c7b82c31d11c3bf0c842be9e (diff)
parentc6ccd3f34693198883d905b98638db49ea6e6a76 (diff)
downloadmariadb-git-462d14d42bb33aa5e23b5cb301ba28f56b4ca8e5.tar.gz
Merge XtraDB from Percona-server-5.1.51-12 into MariaDB.
Diffstat (limited to 'storage')
-rw-r--r--storage/xtradb/ChangeLog55
-rw-r--r--storage/xtradb/Makefile.am2
-rw-r--r--storage/xtradb/btr/btr0cur.c14
-rw-r--r--storage/xtradb/btr/btr0sea.c11
-rw-r--r--storage/xtradb/buf/buf0buf.c135
-rw-r--r--storage/xtradb/buf/buf0lru.c122
-rw-r--r--storage/xtradb/dict/dict0crea.c4
-rw-r--r--storage/xtradb/dict/dict0dict.c83
-rw-r--r--storage/xtradb/dict/dict0load.c27
-rw-r--r--storage/xtradb/fil/fil0fil.c156
-rw-r--r--storage/xtradb/ha/ha0ha.c15
-rw-r--r--storage/xtradb/ha/hash0hash.c4
-rw-r--r--storage/xtradb/handler/ha_innodb.cc42
-rw-r--r--storage/xtradb/include/btr0cur.h2
-rw-r--r--storage/xtradb/include/btr0sea.h8
-rw-r--r--storage/xtradb/include/buf0buf.h2
-rw-r--r--storage/xtradb/include/hash0hash.h4
-rw-r--r--storage/xtradb/include/mem0pool.h12
-rw-r--r--storage/xtradb/include/row0mysql.h6
-rw-r--r--storage/xtradb/include/srv0srv.h15
-rw-r--r--storage/xtradb/include/univ.i4
-rw-r--r--storage/xtradb/include/ut0lst.h8
-rw-r--r--storage/xtradb/lock/lock0lock.c2
-rw-r--r--storage/xtradb/log/log0recv.c1
-rw-r--r--storage/xtradb/mem/mem0mem.c12
-rw-r--r--storage/xtradb/mem/mem0pool.c41
-rw-r--r--storage/xtradb/os/os0proc.c20
-rw-r--r--storage/xtradb/row/row0merge.c5
-rw-r--r--storage/xtradb/row/row0row.c8
-rw-r--r--storage/xtradb/row/row0sel.c127
-rw-r--r--storage/xtradb/row/row0undo.c18
-rw-r--r--storage/xtradb/row/row0upd.c17
-rw-r--r--storage/xtradb/srv/srv0srv.c55
-rw-r--r--storage/xtradb/srv/srv0start.c24
-rw-r--r--storage/xtradb/trx/trx0sys.c2
35 files changed, 785 insertions, 278 deletions
diff --git a/storage/xtradb/ChangeLog b/storage/xtradb/ChangeLog
index 5ebcf1e87a2..43f87a1baf5 100644
--- a/storage/xtradb/ChangeLog
+++ b/storage/xtradb/ChangeLog
@@ -1,3 +1,58 @@
+2010-08-24 The InnoDB Team
+
+ * handler/ha_innodb.c, dict/dict0dict.c:
+ Fix Bug #55832 selects crash too easily when innodb_force_recovery>3
+
+2010-08-03 The InnoDB Team
+
+ * include/dict0dict.h, include/dict0dict.ic, row/row0mysql.c:
+ Fix bug #54678, InnoDB, TRUNCATE, ALTER, I_S SELECT, crash or deadlock
+
+2010-08-03 The InnoDB Team
+
+ * dict/dict0load.c, handler/ha_innodb.cc, include/db0err.h,
+ include/dict0load.h, include/dict0mem.h, include/que0que.h,
+ row/row0merge.c, row/row0mysql.c:
+ Fix Bug#54582 stack overflow when opening many tables linked
+ with foreign keys at once
+
+2010-08-03 The InnoDB Team
+
+ * include/ut0mem.h, ut/ut0mem.c:
+ Fix Bug #55627 segv in ut_free pars_lexer_close innobase_shutdown
+ innodb-use-sys-malloc=0
+
+2010-08-01 The InnoDB Team
+
+ * handler/ha_innodb.cc
+ Fix Bug #55382 Assignment with SELECT expressions takes unexpected
+ S locks in READ COMMITTED
+>>>>>>> MERGE-SOURCE
+
+2010-07-27 The InnoDB Team
+
+ * include/mem0pool.h, mem/mem0mem.c, mem/mem0pool.c, srv/srv0start.c:
+ Fix Bug#55581 shutdown with innodb-use-sys-malloc=0: assert
+ mutex->magic_n == MUTEX_MAGIC_N.
+
+2010-06-30 The InnoDB Team
+
+ * btr/btr0sea.c, ha/ha0ha.c, handler/ha_innodb.cc, include/btr0sea.h:
+ Fix Bug#54311 Crash on CHECK PARTITION after concurrent LOAD DATA
+ and adaptive_hash_index=OFF
+
+2010-06-29 The InnoDB Team
+ * row/row0row.c, row/row0undo.c, row/row0upd.c:
+ Fix Bug#54408 txn rollback after recovery: row0umod.c:673
+ dict_table_get_format(index->table)
+
+2010-06-29 The InnoDB Team
+
+ * btr/btr0cur.c, include/btr0cur.h,
+ include/row0mysql.h, row/row0merge.c, row/row0sel.c:
+ Fix Bug#54358 READ UNCOMMITTED access failure of off-page DYNAMIC
+ or COMPRESSED columns
+
2010-06-24 The InnoDB Team
* handler/ha_innodb.cc:
diff --git a/storage/xtradb/Makefile.am b/storage/xtradb/Makefile.am
index 9b8c2e52383..7dffdacbcf1 100644
--- a/storage/xtradb/Makefile.am
+++ b/storage/xtradb/Makefile.am
@@ -326,7 +326,7 @@ libxtradb_a_SOURCES= \
ut/ut0vec.c \
ut/ut0wqueue.c
-libxtradb_a_CXXFLAGS= $(AM_CFLAGS)
+libxtradb_a_CXXFLAGS= $(AM_CXXFLAGS)
libxtradb_a_CFLAGS= $(AM_CFLAGS)
EXTRA_LTLIBRARIES= ha_xtradb.la
diff --git a/storage/xtradb/btr/btr0cur.c b/storage/xtradb/btr/btr0cur.c
index 12c36d08c38..3fc2b48162a 100644
--- a/storage/xtradb/btr/btr0cur.c
+++ b/storage/xtradb/btr/btr0cur.c
@@ -5208,7 +5208,7 @@ btr_copy_externally_stored_field(
/*******************************************************************//**
Copies an externally stored field of a record to mem heap.
-@return the field copied to heap */
+@return the field copied to heap, or NULL if the field is incomplete */
UNIV_INTERN
byte*
btr_rec_copy_externally_stored_field(
@@ -5238,6 +5238,18 @@ btr_rec_copy_externally_stored_field(
data = rec_get_nth_field(rec, offsets, no, &local_len);
+ ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE);
+
+ if (UNIV_UNLIKELY
+ (!memcmp(data + local_len - BTR_EXTERN_FIELD_REF_SIZE,
+ field_ref_zero, BTR_EXTERN_FIELD_REF_SIZE))) {
+ /* The externally stored field was not written yet.
+ This record should only be seen by
+ recv_recovery_rollback_active() or any
+ TRX_ISO_READ_UNCOMMITTED transactions. */
+ return(NULL);
+ }
+
return(btr_copy_externally_stored_field(len, data,
zip_size, local_len, heap));
}
diff --git a/storage/xtradb/btr/btr0sea.c b/storage/xtradb/btr/btr0sea.c
index 1e8b4971ccb..6628333d32a 100644
--- a/storage/xtradb/btr/btr0sea.c
+++ b/storage/xtradb/btr/btr0sea.c
@@ -46,6 +46,7 @@ Created 2/17/1996 Heikki Tuuri
/** Flag: has the search system been enabled?
Protected by btr_search_latch and btr_search_enabled_mutex. */
UNIV_INTERN char btr_search_enabled = TRUE;
+UNIV_INTERN ibool btr_search_fully_disabled = FALSE;
/** Mutex protecting btr_search_enabled */
static mutex_t btr_search_enabled_mutex;
@@ -201,12 +202,19 @@ btr_search_disable(void)
mutex_enter(&btr_search_enabled_mutex);
rw_lock_x_lock(&btr_search_latch);
+ /* Disable access to hash index, also tell ha_insert_for_fold()
+ stop adding new nodes to hash index, but still allow updating
+ existing nodes */
btr_search_enabled = FALSE;
/* Clear all block->is_hashed flags and remove all entries
from btr_search_sys->hash_index. */
buf_pool_drop_hash_index();
+ /* hash index has been cleaned up, disallow any operation to
+ the hash index */
+ btr_search_fully_disabled = TRUE;
+
/* btr_search_enabled_mutex should guarantee this. */
ut_ad(!btr_search_enabled);
@@ -225,6 +233,7 @@ btr_search_enable(void)
rw_lock_x_lock(&btr_search_latch);
btr_search_enabled = TRUE;
+ btr_search_fully_disabled = FALSE;
rw_lock_x_unlock(&btr_search_latch);
mutex_exit(&btr_search_enabled_mutex);
@@ -1488,7 +1497,7 @@ btr_search_build_page_hash_index(
rw_lock_x_lock(&btr_search_latch);
- if (UNIV_UNLIKELY(!btr_search_enabled)) {
+ if (UNIV_UNLIKELY(btr_search_fully_disabled)) {
goto exit_func;
}
diff --git a/storage/xtradb/buf/buf0buf.c b/storage/xtradb/buf/buf0buf.c
index 94a67c1759c..55ff207cf11 100644
--- a/storage/xtradb/buf/buf0buf.c
+++ b/storage/xtradb/buf/buf0buf.c
@@ -792,7 +792,7 @@ buf_block_reuse(
ptrdiff_t frame_offset)
{
/* block_init */
- block->frame = ((byte*)(block->frame) + frame_offset);
+ block->frame += frame_offset;
UNIV_MEM_DESC(block->frame, UNIV_PAGE_SIZE, block);
@@ -809,7 +809,7 @@ buf_block_reuse(
#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
if (block->page.zip.data)
- block->page.zip.data = ((byte*)(block->page.zip.data) + frame_offset);
+ block->page.zip.data += frame_offset;
block->is_hashed = FALSE;
@@ -845,6 +845,8 @@ buf_chunk_init(
although it already should be. */
mem_size = ut_2pow_round(mem_size, UNIV_PAGE_SIZE);
+ srv_buffer_pool_shm_is_reused = FALSE;
+
if (srv_buffer_pool_shm_key) {
/* zip_hash size */
zip_hash_n = (mem_size / UNIV_PAGE_SIZE) * 2;
@@ -870,39 +872,46 @@ buf_chunk_init(
ut_a(buf_pool->n_chunks == 1);
fprintf(stderr,
- "InnoDB: Notice: innodb_buffer_pool_shm_key option is specified.\n"
- "InnoDB: This option may not be safe to keep consistency of datafiles.\n"
- "InnoDB: Because InnoDB cannot lock datafiles when shutdown until reusing shared memory segment.\n"
- "InnoDB: You should ensure no change of InnoDB files while using innodb_buffer_pool_shm_key.\n");
+ "InnoDB: Warning: The innodb_buffer_pool_shm_key option has been specified.\n"
+ "InnoDB: Do not change the following between restarts of the server while this option is being used:\n"
+ "InnoDB: * the mysqld executable between restarts of the server.\n"
+ "InnoDB: * the value of innodb_buffer_pool_size.\n"
+ "InnoDB: * the value of innodb_page_size.\n"
+ "InnoDB: * datafiles created by InnoDB during this session.\n"
+ "InnoDB: Otherwise, data corruption in datafiles may result.\n");
/* FIXME: This is vague id still */
- binary_id = (ulint) ((char*)mtr_commit - (char *)btr_root_get)
- + (ulint) ((char *)os_get_os_version - (char *)buf_calc_page_new_checksum)
- + (ulint) ((char *)page_dir_find_owner_slot - (char *)dfield_data_is_binary_equal)
- + (ulint) ((char *)que_graph_publish - (char *)dict_casedn_str)
- + (ulint) ((char *)read_view_oldest_copy_or_open_new - (char *)fil_space_get_version)
- + (ulint) ((char *)rec_get_n_extern_new - (char *)fsp_get_size_low)
- + (ulint) ((char *)row_get_trx_id_offset - (char *)ha_create_func)
- + (ulint) ((char *)srv_set_io_thread_op_info - (char *)thd_is_replication_slave_thread)
- + (ulint) ((char *)mutex_create_func - (char *)ibuf_inside)
- + (ulint) ((char *)trx_set_detailed_error - (char *)lock_check_trx_id_sanity)
- + (ulint) ((char *)ut_time - (char *)mem_heap_strdup);
+ binary_id = (ulint) ((byte*)mtr_commit - (byte*)btr_root_get)
+ + (ulint) ((byte*)os_get_os_version - (byte*)buf_calc_page_new_checksum)
+ + (ulint) ((byte*)page_dir_find_owner_slot - (byte*)dfield_data_is_binary_equal)
+ + (ulint) ((byte*)que_graph_publish - (byte*)dict_casedn_str)
+ + (ulint) ((byte*)read_view_oldest_copy_or_open_new - (byte*)fil_space_get_version)
+ + (ulint) ((byte*)rec_get_n_extern_new - (byte*)fsp_get_size_low)
+ + (ulint) ((byte*)row_get_trx_id_offset - (byte*)ha_create_func)
+ + (ulint) ((byte*)srv_set_io_thread_op_info - (byte*)thd_is_replication_slave_thread)
+ + (ulint) ((byte*)mutex_create_func - (byte*)ibuf_inside)
+ + (ulint) ((byte*)trx_set_detailed_error - (byte*)lock_check_trx_id_sanity)
+ + (ulint) ((byte*)ut_time - (byte*)mem_heap_strdup);
chunk->mem = os_shm_alloc(&chunk->mem_size, srv_buffer_pool_shm_key, &is_new);
if (UNIV_UNLIKELY(chunk->mem == NULL)) {
return(NULL);
}
-
+init_again:
#ifdef UNIV_SET_MEM_TO_ZERO
if (is_new) {
memset(chunk->mem, '\0', chunk->mem_size);
}
#endif
+ /* for ut_fold_binary_32(), these values should be 32-bit aligned */
+ ut_a(sizeof(buf_shm_info_t) % 4 == 0);
+ ut_a((ulint)chunk->mem % 4 == 0);
+ ut_a(chunk->mem_size % 4 == 0);
shm_info = chunk->mem;
- zip_hash_tmp = (hash_table_t*)((char *)chunk->mem + chunk->mem_size - zip_hash_mem_size);
+ zip_hash_tmp = (hash_table_t*)((byte*)chunk->mem + chunk->mem_size - zip_hash_mem_size);
if (is_new) {
strncpy(shm_info->head_str, BUF_SHM_INFO_HEAD, 8);
@@ -932,16 +941,6 @@ buf_chunk_init(
"InnoDB: Error: The shared memory was not initialized yet.\n");
return(NULL);
}
- if (!shm_info->clean) {
- fprintf(stderr,
- "InnoDB: Error: The shared memory was not shut down cleanly.\n");
- return(NULL);
- }
- if (!shm_info->reusable) {
- fprintf(stderr,
- "InnoDB: Error: The shared memory has unrecoverable contents.\n");
- return(NULL);
- }
if (shm_info->buf_pool_size != srv_buf_pool_size) {
fprintf(stderr,
"InnoDB: Error: srv_buf_pool_size is different (shm=%lu current=%lu).\n",
@@ -954,14 +953,34 @@ buf_chunk_init(
shm_info->page_size, srv_page_size);
return(NULL);
}
+ if (!shm_info->reusable) {
+ fprintf(stderr,
+ "InnoDB: Warning: The shared memory has unrecoverable contents.\n"
+ "InnoDB: The shared memory segment is initialized.\n");
+ is_new = TRUE;
+ goto init_again;
+ }
+ if (!shm_info->clean) {
+ fprintf(stderr,
+ "InnoDB: Warning: The shared memory was not shut down cleanly.\n"
+ "InnoDB: The shared memory segment is initialized.\n");
+ is_new = TRUE;
+ goto init_again;
+ }
ut_a(shm_info->zip_hash_offset == chunk->mem_size - zip_hash_mem_size);
ut_a(shm_info->zip_hash_n == zip_hash_n);
/* check checksum */
- checksum = ut_fold_binary((byte*)chunk->mem + sizeof(buf_shm_info_t),
- chunk->mem_size - sizeof(buf_shm_info_t));
- if (shm_info->checksum != checksum) {
+ if (srv_buffer_pool_shm_checksum) {
+ checksum = ut_fold_binary_32((byte*)chunk->mem + sizeof(buf_shm_info_t),
+ chunk->mem_size - sizeof(buf_shm_info_t));
+ } else {
+ checksum = BUF_NO_CHECKSUM_MAGIC;
+ }
+
+ if (shm_info->checksum != BUF_NO_CHECKSUM_MAGIC
+ && shm_info->checksum != checksum) {
fprintf(stderr,
"InnoDB: Error: checksum of the shared memory is not match. "
"(stored=%lu calculated=%lu)\n",
@@ -979,6 +998,8 @@ buf_chunk_init(
} else {
/* adjust offset is done later */
hash_create_reuse(zip_hash_tmp);
+
+ srv_buffer_pool_shm_is_reused = TRUE;
}
} else {
chunk->mem = os_mem_alloc_large(&chunk->mem_size);
@@ -992,7 +1013,7 @@ buf_chunk_init(
/* Allocate the block descriptors from
the start of the memory block. */
if (srv_buffer_pool_shm_key) {
- chunk->blocks = (buf_block_t*)((char*)chunk->mem + sizeof(buf_shm_info_t));
+ chunk->blocks = (buf_block_t*)((byte*)chunk->mem + sizeof(buf_shm_info_t));
} else {
chunk->blocks = chunk->mem;
}
@@ -1039,10 +1060,10 @@ buf_chunk_init(
}
chunk->size = shm_info->chunk_backup.size;
- phys_offset = (char*)frame - ((char*)chunk->mem + shm_info->frame_offset);
- logi_offset = (char *)frame - (char *)chunk->blocks[0].frame;
+ phys_offset = frame - ((byte*)chunk->mem + shm_info->frame_offset);
+ logi_offset = frame - chunk->blocks[0].frame;
previous_frame_address = chunk->blocks[0].frame;
- blocks_offset = (char *)chunk->blocks - (char *)shm_info->chunk_backup.blocks;
+ blocks_offset = (byte*)chunk->blocks - (byte*)shm_info->chunk_backup.blocks;
if (phys_offset || logi_offset || blocks_offset) {
fprintf(stderr,
@@ -1053,10 +1074,10 @@ buf_chunk_init(
"InnoDB: Pysical offset : %ld (%#lx)\n"
"InnoDB: Logical offset (frames) : %ld (%#lx)\n"
"InnoDB: Logical offset (blocks) : %ld (%#lx)\n",
- (char *)chunk->mem + shm_info->frame_offset,
+ (byte*)chunk->mem + shm_info->frame_offset,
chunk->blocks[0].frame, frame,
- (ulong) phys_offset, (ulong) phys_offset, (ulong) logi_offset, (ulong) logi_offset,
- (ulong) blocks_offset, (ulong) blocks_offset);
+ (long) phys_offset, (ulong) phys_offset, (long) logi_offset, (ulong) logi_offset,
+ (long) blocks_offset, (ulong) blocks_offset);
} else {
fprintf(stderr,
"InnoDB: Buffer pool in the shared memory segment can be used as it is.\n");
@@ -1066,24 +1087,24 @@ buf_chunk_init(
fprintf(stderr,
"InnoDB: Aligning physical offset...");
- memmove(frame, ((char*)chunk->mem + shm_info->frame_offset),
+ memmove(frame, (byte*)chunk->mem + shm_info->frame_offset,
chunk->size * UNIV_PAGE_SIZE);
fprintf(stderr,
" Done.\n");
}
+ /* buf_block_t */
+ block = chunk->blocks;
+ for (i = chunk->size; i--; ) {
+ buf_block_reuse(block, logi_offset);
+ block++;
+ }
+
if (logi_offset || blocks_offset) {
fprintf(stderr,
"InnoDB: Aligning logical offset...");
- /* buf_block_t */
- block = chunk->blocks;
-
- for (i = chunk->size; i--; ) {
- buf_block_reuse(block, logi_offset);
- block++;
- }
/* buf_pool_t buf_pool_backup */
UT_LIST_OFFSET(flush_list, buf_page_t, shm_info->buf_pool_backup.flush_list,
@@ -1094,8 +1115,8 @@ buf_chunk_init(
previous_frame_address, logi_offset, blocks_offset);
if (shm_info->buf_pool_backup.LRU_old)
shm_info->buf_pool_backup.LRU_old =
- (buf_page_t*)((char*)(shm_info->buf_pool_backup.LRU_old)
- + (((byte*)shm_info->buf_pool_backup.LRU_old > previous_frame_address)
+ (buf_page_t*)((byte*)(shm_info->buf_pool_backup.LRU_old)
+ + (((void*)shm_info->buf_pool_backup.LRU_old > previous_frame_address)
? logi_offset : blocks_offset));
UT_LIST_OFFSET(unzip_LRU, buf_block_t, shm_info->buf_pool_backup.unzip_LRU,
@@ -1141,7 +1162,7 @@ buf_chunk_init(
}
if (shm_info) {
- shm_info->frame_offset = (char*)chunk->blocks[0].frame - (char*)chunk->mem;
+ shm_info->frame_offset = chunk->blocks[0].frame - (byte*)chunk->mem;
}
return(chunk);
@@ -1396,10 +1417,10 @@ buf_pool_init(void)
if (srv_buffer_pool_shm_key) {
buf_shm_info_t* shm_info;
- ut_a((char*)chunk->blocks == (char*)chunk->mem + sizeof(buf_shm_info_t));
+ ut_a((byte*)chunk->blocks == (byte*)chunk->mem + sizeof(buf_shm_info_t));
shm_info = chunk->mem;
- buf_pool->zip_hash = (hash_table_t*)((char*)chunk->mem + shm_info->zip_hash_offset);
+ buf_pool->zip_hash = (hash_table_t*)((byte*)chunk->mem + shm_info->zip_hash_offset);
if(shm_info->is_new) {
shm_info->is_new = FALSE; /* initialization was finished */
@@ -1504,7 +1525,7 @@ buf_pool_free(void)
chunk = buf_pool->chunks;
shm_info = chunk->mem;
- ut_a((char*)chunk->blocks == (char*)chunk->mem + sizeof(buf_shm_info_t));
+ ut_a((byte*)chunk->blocks == (byte*)chunk->mem + sizeof(buf_shm_info_t));
/* validation the shared memory segment doesn't have unrecoverable contents. */
/* Currently, validation became not needed */
@@ -1514,8 +1535,12 @@ buf_pool_free(void)
memcpy(&(shm_info->chunk_backup), chunk, sizeof(buf_chunk_t));
if (srv_fast_shutdown < 2) {
- shm_info->checksum = ut_fold_binary((byte*)chunk->mem + sizeof(buf_shm_info_t),
- chunk->mem_size - sizeof(buf_shm_info_t));
+ if (srv_buffer_pool_shm_checksum) {
+ shm_info->checksum = ut_fold_binary_32((byte*)chunk->mem + sizeof(buf_shm_info_t),
+ chunk->mem_size - sizeof(buf_shm_info_t));
+ } else {
+ shm_info->checksum = BUF_NO_CHECKSUM_MAGIC;
+ }
shm_info->clean = TRUE;
}
diff --git a/storage/xtradb/buf/buf0lru.c b/storage/xtradb/buf/buf0lru.c
index 14ec1720873..79c7c0d3bbe 100644
--- a/storage/xtradb/buf/buf0lru.c
+++ b/storage/xtradb/buf/buf0lru.c
@@ -2228,6 +2228,26 @@ end:
return(ret);
}
+
+typedef struct {
+ ib_uint32_t space_id;
+ ib_uint32_t page_no;
+} dump_record_t;
+
+static int dump_record_cmp(const void *a, const void *b)
+{
+ const dump_record_t *rec1 = (dump_record_t *) a;
+ const dump_record_t *rec2 = (dump_record_t *) b;
+
+ if (rec1->space_id < rec2->space_id)
+ return -1;
+ if (rec1->space_id > rec2->space_id)
+ return 1;
+ if (rec1->page_no < rec2->page_no)
+ return -1;
+ return rec1->page_no > rec2->page_no;
+}
+
/********************************************************************//**
Read the pages based on the specific file.*/
UNIV_INTERN
@@ -2245,25 +2265,34 @@ buf_LRU_file_restore(void)
ulint req = 0;
ibool terminated = FALSE;
ibool ret = FALSE;
-
- buffer_base = ut_malloc(2 * UNIV_PAGE_SIZE);
- buffer = ut_align(buffer_base, UNIV_PAGE_SIZE);
- if (!buffer) {
- fprintf(stderr,
- " InnoDB: cannot allocate buffer.\n");
- goto end;
- }
+ dump_record_t* records;
+ ulint size;
+ ulint size_high;
+ ulint length;
dump_file = os_file_create_simple_no_error_handling(
LRU_DUMP_FILE, OS_FILE_OPEN, OS_FILE_READ_ONLY, &success);
- if (!success) {
+ if (!success || !os_file_get_size(dump_file, &size, &size_high)) {
os_file_get_last_error(TRUE);
fprintf(stderr,
" InnoDB: cannot open %s\n", LRU_DUMP_FILE);
goto end;
}
+ if (size == 0 || size_high > 0 || size % 8) {
+ fprintf(stderr, " InnoDB: broken LRU dump file\n");
+ goto end;
+ }
+ buffer_base = ut_malloc(2 * UNIV_PAGE_SIZE);
+ buffer = ut_align(buffer_base, UNIV_PAGE_SIZE);
+ records = ut_malloc(size);
+ if (!buffer || !records) {
+ fprintf(stderr,
+ " InnoDB: cannot allocate buffer.\n");
+ goto end;
+ }
buffers = 0;
+ length = 0;
while (!terminated) {
success = os_file_read(dump_file, buffer,
(buffers << UNIV_PAGE_SIZE_SHIFT) & 0xFFFFFFFFUL,
@@ -2272,15 +2301,14 @@ buf_LRU_file_restore(void)
if (!success) {
fprintf(stderr,
" InnoDB: cannot read page %lu of %s,"
- " or meet unexpected terminal.",
+ " or meet unexpected terminal.\n",
buffers, LRU_DUMP_FILE);
goto end;
}
for (offset = 0; offset < UNIV_PAGE_SIZE/4; offset += 2) {
- ulint space_id, zip_size, page_no;
- ulint err;
- ib_int64_t tablespace_version;
+ ulint space_id;
+ ulint page_no;
space_id = mach_read_from_4(buffer + offset * 4);
page_no = mach_read_from_4(buffer + (offset + 1) * 4);
@@ -2290,31 +2318,61 @@ buf_LRU_file_restore(void)
break;
}
- if (offset % 16 == 15) {
- os_aio_simulated_wake_handler_threads();
- buf_flush_free_margin(FALSE);
+ records[length].space_id = space_id;
+ records[length].page_no = page_no;
+ length++;
+ if (length * 8 >= size) {
+ fprintf(stderr,
+ " InnoDB: could not find the "
+ "end-of-file marker after reading "
+ "the expected %lu bytes from the "
+ "LRU dump file.\n"
+ " InnoDB: this could be caused by a "
+ "broken or incomplete file.\n"
+ " InnoDB: trying to process what has "
+ "been read so far.\n",
+ size);
+ terminated= TRUE;
+ break;
}
+ }
+ buffers++;
+ }
- zip_size = fil_space_get_zip_size(space_id);
- if (UNIV_UNLIKELY(zip_size == ULINT_UNDEFINED)) {
- continue;
- }
+ qsort(records, length, sizeof(dump_record_t), dump_record_cmp);
- if (fil_area_is_exist(space_id, zip_size, page_no, 0,
- zip_size ? zip_size : UNIV_PAGE_SIZE)) {
+ for (offset = 0; offset < length; offset++) {
+ ulint space_id;
+ ulint page_no;
+ ulint zip_size;
+ ulint err;
+ ib_int64_t tablespace_version;
- tablespace_version = fil_space_get_version(space_id);
+ space_id = records[offset].space_id;
+ page_no = records[offset].page_no;
- req++;
- reads += buf_read_page_low(&err, FALSE, BUF_READ_ANY_PAGE
- | OS_AIO_SIMULATED_WAKE_LATER,
- space_id, zip_size, TRUE,
- tablespace_version, page_no, NULL);
- buf_LRU_stat_inc_io();
- }
+ if (offset % 16 == 15) {
+ os_aio_simulated_wake_handler_threads();
+ buf_flush_free_margin(FALSE);
}
- buffers++;
+ zip_size = fil_space_get_zip_size(space_id);
+ if (UNIV_UNLIKELY(zip_size == ULINT_UNDEFINED)) {
+ continue;
+ }
+
+ if (fil_area_is_exist(space_id, zip_size, page_no, 0,
+ zip_size ? zip_size : UNIV_PAGE_SIZE)) {
+
+ tablespace_version = fil_space_get_version(space_id);
+
+ req++;
+ reads += buf_read_page_low(&err, FALSE, BUF_READ_ANY_PAGE
+ | OS_AIO_SIMULATED_WAKE_LATER,
+ space_id, zip_size, TRUE,
+ tablespace_version, page_no, NULL);
+ buf_LRU_stat_inc_io();
+ }
}
os_aio_simulated_wake_handler_threads();
@@ -2330,6 +2388,8 @@ end:
os_file_close(dump_file);
if (buffer_base)
ut_free(buffer_base);
+ if (records)
+ ut_free(records);
return(ret);
}
diff --git a/storage/xtradb/dict/dict0crea.c b/storage/xtradb/dict/dict0crea.c
index 258bf77d1fc..a6d0e11740a 100644
--- a/storage/xtradb/dict/dict0crea.c
+++ b/storage/xtradb/dict/dict0crea.c
@@ -1245,13 +1245,13 @@ dict_create_index_step(
goto function_exit;
}
- if (srv_use_sys_stats_table) {
+ if (srv_use_sys_stats_table
+ && !((node->table->flags >> DICT_TF2_SHIFT) & DICT_TF2_TEMPORARY)) {
node->state = INDEX_BUILD_STATS_COLS;
} else {
node->state = INDEX_CREATE_INDEX_TREE;
}
}
-
if (node->state == INDEX_BUILD_STATS_COLS) {
if (node->stats_no <= dict_index_get_n_unique(node->index)) {
diff --git a/storage/xtradb/dict/dict0dict.c b/storage/xtradb/dict/dict0dict.c
index f383d250388..1d0517f5cc7 100644
--- a/storage/xtradb/dict/dict0dict.c
+++ b/storage/xtradb/dict/dict0dict.c
@@ -4527,7 +4527,7 @@ dict_update_statistics_low(
return;
}
- if (srv_use_sys_stats_table && !sync) {
+ if (srv_use_sys_stats_table && !((table->flags >> DICT_TF2_SHIFT) & DICT_TF2_TEMPORARY) && !sync) {
/* reload statistics from SYS_STATS table */
if (dict_reload_statistics(table, &sum_of_index_sizes)) {
/* success */
@@ -4555,53 +4555,54 @@ dict_update_statistics_low(
return;
}
- do {
+ do {
if (table->is_corrupt) {
ut_a(srv_pass_corrupt_table);
return;
}
- if (UNIV_LIKELY
- (srv_force_recovery < SRV_FORCE_NO_IBUF_MERGE
- || (srv_force_recovery < SRV_FORCE_NO_LOG_REDO
- && dict_index_is_clust(index)))) {
- ulint size;
- size = btr_get_size(index, BTR_TOTAL_SIZE);
-
- index->stat_index_size = size;
-
- sum_of_index_sizes += size;
-
- size = btr_get_size(index, BTR_N_LEAF_PAGES);
-
- if (size == 0) {
- /* The root node of the tree is a leaf */
- size = 1;
- }
-
- index->stat_n_leaf_pages = size;
-
- btr_estimate_number_of_different_key_vals(index);
- } else {
- /* If we have set a high innodb_force_recovery
- level, do not calculate statistics, as a badly
- corrupted index can cause a crash in it.
- Initialize some bogus index cardinality
- statistics, so that the data can be queried in
- various means, also via secondary indexes. */
- ulint i;
-
- sum_of_index_sizes++;
- index->stat_index_size = index->stat_n_leaf_pages = 1;
-
- for (i = dict_index_get_n_unique(index); i; ) {
- index->stat_n_diff_key_vals[i--] = 1;
- }
- }
+ if (UNIV_LIKELY
+ (srv_force_recovery < SRV_FORCE_NO_IBUF_MERGE
+ || (srv_force_recovery < SRV_FORCE_NO_LOG_REDO
+ && dict_index_is_clust(index)))) {
+ ulint size;
+ size = btr_get_size(index, BTR_TOTAL_SIZE);
+
+ index->stat_index_size = size;
+
+ sum_of_index_sizes += size;
+
+ size = btr_get_size(index, BTR_N_LEAF_PAGES);
+
+ if (size == 0) {
+ /* The root node of the tree is a leaf */
+ size = 1;
+ }
+
+ index->stat_n_leaf_pages = size;
+
+ btr_estimate_number_of_different_key_vals(index);
+ } else {
+ /* If we have set a high innodb_force_recovery
+ level, do not calculate statistics, as a badly
+ corrupted index can cause a crash in it.
+ Initialize some bogus index cardinality
+ statistics, so that the data can be queried in
+ various means, also via secondary indexes. */
+ ulint i;
+
+ sum_of_index_sizes++;
+ index->stat_index_size = index->stat_n_leaf_pages = 1;
+
+ for (i = dict_index_get_n_unique(index); i; ) {
+ index->stat_n_diff_key_vals[i--] = 1;
+ }
+ }
+
index = dict_table_get_next_index(index);
- } while(index);
+ } while (index);
- if (srv_use_sys_stats_table) {
+ if (srv_use_sys_stats_table && !((table->flags >> DICT_TF2_SHIFT) & DICT_TF2_TEMPORARY)) {
/* store statistics to SYS_STATS table */
dict_store_statistics(table);
}
diff --git a/storage/xtradb/dict/dict0load.c b/storage/xtradb/dict/dict0load.c
index 881f3d91e3c..43c0810fe67 100644
--- a/storage/xtradb/dict/dict0load.c
+++ b/storage/xtradb/dict/dict0load.c
@@ -1084,6 +1084,8 @@ dict_load_table_on_id(
ut_ad(mutex_own(&(dict_sys->mutex)));
+ table = NULL;
+
/* NOTE that the operation of this function is protected by
the dictionary mutex, and therefore no deadlocks can occur
with other dictionary operations. */
@@ -1110,15 +1112,17 @@ dict_load_table_on_id(
BTR_SEARCH_LEAF, &pcur, &mtr);
rec = btr_pcur_get_rec(&pcur);
- if (!btr_pcur_is_on_user_rec(&pcur)
- || rec_get_deleted_flag(rec, 0)) {
+ if (!btr_pcur_is_on_user_rec(&pcur)) {
/* Not found */
+ goto func_exit;
+ }
- btr_pcur_close(&pcur);
- mtr_commit(&mtr);
- mem_heap_free(heap);
-
- return(NULL);
+ /* Find the first record that is not delete marked */
+ while (rec_get_deleted_flag(rec, 0)) {
+ if (!btr_pcur_move_to_next_user_rec(&pcur, &mtr)) {
+ goto func_exit;
+ }
+ rec = btr_pcur_get_rec(&pcur);
}
/*---------------------------------------------------*/
@@ -1131,19 +1135,14 @@ dict_load_table_on_id(
/* Check if the table id in record is the one searched for */
if (ut_dulint_cmp(table_id, mach_read_from_8(field)) != 0) {
-
- btr_pcur_close(&pcur);
- mtr_commit(&mtr);
- mem_heap_free(heap);
-
- return(NULL);
+ goto func_exit;
}
/* Now we get the table name from the record */
field = rec_get_nth_field_old(rec, 1, &len);
/* Load the table definition to memory */
table = dict_load_table(mem_heap_strdupl(heap, (char*) field, len));
-
+func_exit:
btr_pcur_close(&pcur);
mtr_commit(&mtr);
mem_heap_free(heap);
diff --git a/storage/xtradb/fil/fil0fil.c b/storage/xtradb/fil/fil0fil.c
index e9dc8185be6..cae91d4037c 100644
--- a/storage/xtradb/fil/fil0fil.c
+++ b/storage/xtradb/fil/fil0fil.c
@@ -3043,6 +3043,10 @@ fil_open_single_table_tablespace(
if (srv_expand_import
&& (space_id != id || space_flags != (flags & ~(~0 << DICT_TF_BITS)))) {
+ ibool file_is_corrupt = FALSE;
+ byte* buf3;
+ byte* descr_page;
+ ibool descr_is_corrupt = FALSE;
dulint old_id[31];
dulint new_id[31];
ulint root_page[31];
@@ -3052,16 +3056,37 @@ fil_open_single_table_tablespace(
ulint i;
int len;
ib_uint64_t current_lsn;
- ulint size_low, size_high, size;
- ib_int64_t size_bytes;
+ ulint size_low, size_high, size, free_limit;
+ ib_int64_t size_bytes, free_limit_bytes;
dict_table_t* table;
dict_index_t* index;
fil_system_t* system;
fil_node_t* node = NULL;
fil_space_t* space;
+ buf3 = ut_malloc(2 * UNIV_PAGE_SIZE);
+ descr_page = ut_align(buf3, UNIV_PAGE_SIZE);
+
current_lsn = log_get_lsn();
+ /* check the header page's consistency */
+ if (buf_page_is_corrupted(page,
+ dict_table_flags_to_zip_size(space_flags))) {
+ fprintf(stderr, "InnoDB: page 0 of %s seems corrupt.\n", filepath);
+ file_is_corrupt = TRUE;
+ descr_is_corrupt = TRUE;
+ }
+
+ /* store as first descr page */
+ memcpy(descr_page, page, UNIV_PAGE_SIZE);
+
+ /* get free limit (page number) of the table space */
+/* these should be same to the definition in fsp0fsp.c */
+#define FSP_HEADER_OFFSET FIL_PAGE_DATA
+#define FSP_FREE_LIMIT 12
+ free_limit = mach_read_from_4(FSP_HEADER_OFFSET + FSP_FREE_LIMIT + page);
+ free_limit_bytes = (ib_int64_t)free_limit * (ib_int64_t)UNIV_PAGE_SIZE;
+
/* overwrite fsp header */
fsp_header_init_fields(page, id, flags);
mach_write_to_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, id);
@@ -3086,6 +3111,12 @@ fil_open_single_table_tablespace(
size_bytes = (((ib_int64_t)size_high) << 32)
+ (ib_int64_t)size_low;
+ if (size_bytes < free_limit_bytes) {
+ free_limit_bytes = size_bytes;
+ fprintf(stderr, "InnoDB: free limit of %s is larger than its real size.\n", filepath);
+ file_is_corrupt = TRUE;
+ }
+
/* get cruster index information */
table = dict_table_get_low(name);
index = dict_table_get_first_index(table);
@@ -3107,16 +3138,19 @@ fil_open_single_table_tablespace(
info_file_path, OS_FILE_OPEN, OS_FILE_READ_ONLY, &success);
if (!success) {
fprintf(stderr, "InnoDB: cannot open %s\n", info_file_path);
+ file_is_corrupt = TRUE;
goto skip_info;
}
success = os_file_read(info_file, page, 0, 0, UNIV_PAGE_SIZE);
if (!success) {
fprintf(stderr, "InnoDB: cannot read %s\n", info_file_path);
+ file_is_corrupt = TRUE;
goto skip_info;
}
if (mach_read_from_4(page) != 0x78706f72UL
|| mach_read_from_4(page + 4) != 0x74696e66UL) {
fprintf(stderr, "InnoDB: %s seems not to be a correct .exp file\n", info_file_path);
+ file_is_corrupt = TRUE;
goto skip_info;
}
@@ -3153,20 +3187,29 @@ skip_info:
fprintf(stderr, "InnoDB: Progress in %%:");
- for (offset = 0; offset < size_bytes; offset += UNIV_PAGE_SIZE) {
+ for (offset = 0; offset < free_limit_bytes; offset += UNIV_PAGE_SIZE) {
ulint checksum_field;
ulint old_checksum_field;
+ ibool page_is_corrupt;
success = os_file_read(file, page,
(ulint)(offset & 0xFFFFFFFFUL),
(ulint)(offset >> 32), UNIV_PAGE_SIZE);
- /* skip inconsistent pages, it may be free page. */
+ page_is_corrupt = FALSE;
+
+ /* check consistency */
if (memcmp(page + FIL_PAGE_LSN + 4,
page + UNIV_PAGE_SIZE
- FIL_PAGE_END_LSN_OLD_CHKSUM + 4, 4)) {
- goto skip_write;
+ page_is_corrupt = TRUE;
+ }
+
+ if (mach_read_from_4(page + FIL_PAGE_OFFSET)
+ != offset / UNIV_PAGE_SIZE) {
+
+ page_is_corrupt = TRUE;
}
checksum_field = mach_read_from_4(page
@@ -3182,7 +3225,7 @@ skip_info:
&& old_checksum_field
!= buf_calc_page_old_checksum(page)) {
- goto skip_write;
+ page_is_corrupt = TRUE;
}
if (!srv_fast_checksum
@@ -3191,7 +3234,7 @@ skip_info:
&& checksum_field
!= buf_calc_page_new_checksum(page)) {
- goto skip_write;
+ page_is_corrupt = TRUE;
}
if (srv_fast_checksum
@@ -3202,6 +3245,77 @@ skip_info:
&& checksum_field
!= buf_calc_page_new_checksum(page)) {
+ page_is_corrupt = TRUE;
+ }
+
+ /* if it is free page, inconsistency is acceptable */
+ if (!offset) {
+ /* header page*/
+ /* it should be overwritten already */
+ ut_a(!page_is_corrupt);
+
+ } else if (!((offset / UNIV_PAGE_SIZE) % UNIV_PAGE_SIZE)) {
+ /* descr page (not header) */
+ if (page_is_corrupt) {
+ file_is_corrupt = TRUE;
+ descr_is_corrupt = TRUE;
+ } else {
+ ut_a(fil_page_get_type(page) == FIL_PAGE_TYPE_XDES);
+ descr_is_corrupt = FALSE;
+ }
+
+ /* store as descr page */
+ memcpy(descr_page, page, UNIV_PAGE_SIZE);
+
+ } else if (descr_is_corrupt) {
+ /* unknown state of the page */
+ if (page_is_corrupt) {
+ file_is_corrupt = TRUE;
+ }
+
+ } else {
+ /* check free page or not */
+ /* These definitions should be same to fsp0fsp.c */
+#define FSP_HEADER_SIZE (32 + 5 * FLST_BASE_NODE_SIZE)
+
+#define XDES_BITMAP (FLST_NODE_SIZE + 12)
+#define XDES_BITS_PER_PAGE 2
+#define XDES_FREE_BIT 0
+#define XDES_SIZE \
+ (XDES_BITMAP + UT_BITS_IN_BYTES(FSP_EXTENT_SIZE * XDES_BITS_PER_PAGE))
+#define XDES_ARR_OFFSET (FSP_HEADER_OFFSET + FSP_HEADER_SIZE)
+
+ /*descr = descr_page + XDES_ARR_OFFSET + XDES_SIZE * xdes_calc_descriptor_index(zip_size, offset)*/
+ /*xdes_get_bit(descr, XDES_FREE_BIT, page % FSP_EXTENT_SIZE, mtr)*/
+ byte* descr;
+ ulint index;
+ ulint byte_index;
+ ulint bit_index;
+
+ descr = descr_page + XDES_ARR_OFFSET
+ + XDES_SIZE * (ut_2pow_remainder((offset / UNIV_PAGE_SIZE), UNIV_PAGE_SIZE) / FSP_EXTENT_SIZE);
+
+ index = XDES_FREE_BIT + XDES_BITS_PER_PAGE * ((offset / UNIV_PAGE_SIZE) % FSP_EXTENT_SIZE);
+ byte_index = index / 8;
+ bit_index = index % 8;
+
+ if (ut_bit_get_nth(mach_read_from_1(descr + XDES_BITMAP + byte_index), bit_index)) {
+ /* free page */
+ if (page_is_corrupt) {
+ goto skip_write;
+ }
+ } else {
+ /* not free */
+ if (page_is_corrupt) {
+ file_is_corrupt = TRUE;
+ }
+ }
+ }
+
+ if (page_is_corrupt) {
+ fprintf(stderr, " [errp:%lld]", offset / UNIV_PAGE_SIZE);
+
+ /* cannot treat corrupt page */
goto skip_write;
}
@@ -3294,11 +3408,11 @@ skip_info:
}
skip_write:
- if (size_bytes
- && ((ib_int64_t)((offset + UNIV_PAGE_SIZE) * 100) / size_bytes)
- != ((offset * 100) / size_bytes)) {
+ if (free_limit_bytes
+ && ((ib_int64_t)((offset + UNIV_PAGE_SIZE) * 100) / free_limit_bytes)
+ != ((offset * 100) / free_limit_bytes)) {
fprintf(stderr, " %lu",
- (ulong)((ib_int64_t)((offset + UNIV_PAGE_SIZE) * 100) / size_bytes));
+ (ulong)((ib_int64_t)((offset + UNIV_PAGE_SIZE) * 100) / free_limit_bytes));
}
}
@@ -3379,6 +3493,26 @@ skip_write:
node->size = size;
}
mutex_exit(&(system->mutex));
+
+ ut_free(buf3);
+
+ if (file_is_corrupt) {
+ ut_print_timestamp(stderr);
+ fputs(" InnoDB: Error: file ",
+ stderr);
+ ut_print_filename(stderr, filepath);
+ fprintf(stderr, " seems to be corrupt.\n"
+ "InnoDB: anyway, all not corrupt pages were tried to be converted to salvage.\n"
+ "InnoDB: ##### CAUTION #####\n"
+ "InnoDB: ## The .ibd must cause to crash InnoDB, though re-import would seem to be succeeded.\n"
+ "InnoDB: ## If you don't have knowledge about salvaging data from .ibd, you should not use the file.\n"
+ "InnoDB: ###################\n");
+ success = FALSE;
+
+ ut_free(buf2);
+
+ goto func_exit;
+ }
}
ut_free(buf2);
diff --git a/storage/xtradb/ha/ha0ha.c b/storage/xtradb/ha/ha0ha.c
index e28b972e61a..7f11917de0a 100644
--- a/storage/xtradb/ha/ha0ha.c
+++ b/storage/xtradb/ha/ha0ha.c
@@ -31,9 +31,7 @@ Created 8/22/1994 Heikki Tuuri
#ifdef UNIV_DEBUG
# include "buf0buf.h"
#endif /* UNIV_DEBUG */
-#ifdef UNIV_SYNC_DEBUG
-# include "btr0sea.h"
-#endif /* UNIV_SYNC_DEBUG */
+#include "btr0sea.h"
#include "page0page.h"
/*************************************************************//**
@@ -127,7 +125,8 @@ ha_clear(
/*************************************************************//**
Inserts an entry into a hash table. If an entry with the same fold number
is found, its node is updated to point to the new data, and no new node
-is inserted.
+is inserted. If btr_search_enabled is set to FALSE, we will only allow
+updating existing nodes, but no new node is allowed to be added.
@return TRUE if succeed, FALSE if no more memory could be allocated */
UNIV_INTERN
ibool
@@ -174,6 +173,7 @@ ha_insert_for_fold_func(
prev_block->n_pointers--;
block->n_pointers++;
}
+ ut_ad(!btr_search_fully_disabled);
# endif /* !UNIV_HOTBACKUP */
prev_node->block = block;
@@ -186,6 +186,13 @@ ha_insert_for_fold_func(
prev_node = prev_node->next;
}
+ /* We are in the process of disabling hash index, do not add
+ new chain node */
+ if (!btr_search_enabled) {
+ ut_ad(!btr_search_fully_disabled);
+ return(TRUE);
+ }
+
/* We have to allocate a new chain node */
node = mem_heap_alloc(hash_get_heap(table, fold), sizeof(ha_node_t));
diff --git a/storage/xtradb/ha/hash0hash.c b/storage/xtradb/ha/hash0hash.c
index 70516deb005..0f4fc55d895 100644
--- a/storage/xtradb/ha/hash0hash.c
+++ b/storage/xtradb/ha/hash0hash.c
@@ -161,7 +161,7 @@ hash_create_init(
offset = (sizeof(hash_table_t) + 7) / 8;
offset *= 8;
- table->array = (hash_cell_t*)(((char*)table) + offset);
+ table->array = (hash_cell_t*)(((byte*)table) + offset);
table->n_cells = prime;
# if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
table->adaptive = FALSE;
@@ -187,7 +187,7 @@ hash_create_reuse(
offset = (sizeof(hash_table_t) + 7) / 8;
offset *= 8;
- table->array = (hash_cell_t*)(((char*)table) + offset);
+ table->array = (hash_cell_t*)(((byte*)table) + offset);
ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
}
diff --git a/storage/xtradb/handler/ha_innodb.cc b/storage/xtradb/handler/ha_innodb.cc
index 3fcd5106386..9e634a07ad4 100644
--- a/storage/xtradb/handler/ha_innodb.cc
+++ b/storage/xtradb/handler/ha_innodb.cc
@@ -194,6 +194,7 @@ static my_bool innobase_rollback_on_timeout = FALSE;
static my_bool innobase_create_status_file = FALSE;
static my_bool innobase_stats_on_metadata = TRUE;
static my_bool innobase_use_sys_stats_table = FALSE;
+static my_bool innobase_buffer_pool_shm_checksum = TRUE;
static char* internal_innobase_data_file_path = NULL;
@@ -2426,6 +2427,7 @@ innobase_change_buffering_inited_ok:
srv_use_doublewrite_buf = (ibool) innobase_use_doublewrite;
srv_use_checksums = (ibool) innobase_use_checksums;
srv_fast_checksum = (ibool) innobase_fast_checksum;
+ srv_buffer_pool_shm_checksum = (ibool) innobase_buffer_pool_shm_checksum;
#ifdef HAVE_LARGE_PAGES
if ((os_use_large_pages = (ibool) my_use_large_pages))
@@ -2562,6 +2564,7 @@ skip_overwrite:
/* Get the current high water mark format. */
innobase_file_format_check = (char*) trx_sys_file_format_max_get();
+ btr_search_fully_disabled = (!btr_search_enabled);
DBUG_RETURN(FALSE);
error:
DBUG_RETURN(TRUE);
@@ -7962,7 +7965,7 @@ ha_innobase::info(
/* In sql_show we call with this flag: update
then statistics so that they are up-to-date */
- if (srv_use_sys_stats_table
+ if (srv_use_sys_stats_table && !((ib_table->flags >> DICT_TF2_SHIFT) & DICT_TF2_TEMPORARY)
&& thd_sql_command(user_thd) == SQLCOM_ANALYZE) {
/* If the indexes on the table don't have enough rows in SYS_STATS system table, */
/* they need to be created. */
@@ -8058,17 +8061,17 @@ ha_innobase::info(
are asked by MySQL to avoid locking. Another reason to
avoid the call is that it uses quite a lot of CPU.
See Bug#38185. */
- if (flag & HA_STATUS_NO_LOCK) {
- /* We do not update delete_length if no
- locking is requested so the "old" value can
- remain. delete_length is initialized to 0 in
- the ha_statistics' constructor. */
- } else if (UNIV_UNLIKELY
- (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE)) {
- /* Avoid accessing the tablespace if
- innodb_crash_recovery is set to a high value. */
- stats.delete_length = 0;
- } else if (srv_stats_update_need_lock) {
+ if (flag & HA_STATUS_NO_LOCK) {
+ /* We do not update delete_length if no
+ locking is requested so the "old" value can
+ remain. delete_length is initialized to 0 in
+ the ha_statistics' constructor. */
+ } else if (UNIV_UNLIKELY
+ (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE)) {
+ /* Avoid accessing the tablespace if
+ innodb_crash_recovery is set to a high value. */
+ stats.delete_length = 0;
+ } else if (srv_stats_update_need_lock) {
/* lock the data dictionary to avoid races with
ibd_file_missing and tablespace_discarded */
@@ -11382,9 +11385,14 @@ static MYSQL_SYSVAR_LONGLONG(buffer_pool_size, innobase_buffer_pool_size,
static MYSQL_SYSVAR_UINT(buffer_pool_shm_key, srv_buffer_pool_shm_key,
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
- "[experimental] The key value of shared memory segment for the buffer pool. 0 means disable the feature (default).",
+ "[experimental] The key value of shared memory segment for the buffer pool. 0 (default) disables the feature.",
NULL, NULL, 0, 0, INT_MAX32, 0);
+static MYSQL_SYSVAR_BOOL(buffer_pool_shm_checksum, innobase_buffer_pool_shm_checksum,
+ PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
+ "Enable buffer_pool_shm checksum validation (enabled by default).",
+ NULL, NULL, TRUE);
+
static MYSQL_SYSVAR_ULONG(commit_concurrency, innobase_commit_concurrency,
PLUGIN_VAR_RQCMDARG,
"Helps in performance tuning in heavily concurrent environments.",
@@ -11631,6 +11639,12 @@ static MYSQL_SYSVAR_ULONG(dict_size_limit, srv_dict_size_limit,
"Limit the allocated memory for dictionary cache. (0: unlimited)",
NULL, NULL, 0, 0, LONG_MAX, 0);
+static MYSQL_SYSVAR_UINT(auto_lru_dump, srv_auto_lru_dump,
+ PLUGIN_VAR_RQCMDARG,
+ "Time in seconds between automatic buffer pool dumps. "
+ "0 (the default) disables automatic dumps.",
+ NULL, NULL, 0, 0, UINT_MAX32, 0);
+
static MYSQL_SYSVAR_ULINT(pass_corrupt_table, srv_pass_corrupt_table,
PLUGIN_VAR_RQCMDARG,
"Pass corruptions of user tables as 'corrupt table' instead of not crashing itself, "
@@ -11645,6 +11659,7 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
MYSQL_SYSVAR(autoextend_increment),
MYSQL_SYSVAR(buffer_pool_size),
MYSQL_SYSVAR(buffer_pool_shm_key),
+ MYSQL_SYSVAR(buffer_pool_shm_checksum),
MYSQL_SYSVAR(checksums),
MYSQL_SYSVAR(fast_checksum),
MYSQL_SYSVAR(commit_concurrency),
@@ -11722,6 +11737,7 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
MYSQL_SYSVAR(change_buffering),
MYSQL_SYSVAR(read_ahead_threshold),
MYSQL_SYSVAR(io_capacity),
+ MYSQL_SYSVAR(auto_lru_dump),
MYSQL_SYSVAR(use_purge_thread),
MYSQL_SYSVAR(pass_corrupt_table),
NULL
diff --git a/storage/xtradb/include/btr0cur.h b/storage/xtradb/include/btr0cur.h
index 40d0981990a..e151fdcb563 100644
--- a/storage/xtradb/include/btr0cur.h
+++ b/storage/xtradb/include/btr0cur.h
@@ -571,7 +571,7 @@ btr_copy_externally_stored_field_prefix(
ulint local_len);/*!< in: length of data, in bytes */
/*******************************************************************//**
Copies an externally stored field of a record to mem heap.
-@return the field copied to heap */
+@return the field copied to heap, or NULL if the field is incomplete */
UNIV_INTERN
byte*
btr_rec_copy_externally_stored_field(
diff --git a/storage/xtradb/include/btr0sea.h b/storage/xtradb/include/btr0sea.h
index e5f9e129e9b..f6d194319ae 100644
--- a/storage/xtradb/include/btr0sea.h
+++ b/storage/xtradb/include/btr0sea.h
@@ -201,7 +201,13 @@ btr_search_validate(void);
/** Flag: has the search system been enabled?
Protected by btr_search_latch and btr_search_enabled_mutex. */
-extern char btr_search_enabled;
+extern char btr_search_enabled;
+
+/** Flag: whether the search system has completed its disabling process,
+It is set to TRUE right after buf_pool_drop_hash_index() in
+btr_search_disable(), indicating hash index entries are cleaned up.
+Protected by btr_search_latch and btr_search_enabled_mutex. */
+extern ibool btr_search_fully_disabled;
/** The search info struct in an index */
struct btr_search_struct{
diff --git a/storage/xtradb/include/buf0buf.h b/storage/xtradb/include/buf0buf.h
index a7854e3038d..e06927f42f0 100644
--- a/storage/xtradb/include/buf0buf.h
+++ b/storage/xtradb/include/buf0buf.h
@@ -1305,7 +1305,7 @@ Compute the hash fold value for blocks in buf_pool->zip_hash. */
/* the fold should be relative when srv_buffer_pool_shm_key is enabled */
#define BUF_POOL_ZIP_FOLD_PTR(ptr) (!srv_buffer_pool_shm_key\
?((ulint) (ptr) / UNIV_PAGE_SIZE)\
- :((ulint) ((char*)ptr - (char*)(buf_pool->chunks->blocks->frame)) / UNIV_PAGE_SIZE))
+ :((ulint) ((byte*)ptr - (byte*)(buf_pool->chunks->blocks->frame)) / UNIV_PAGE_SIZE))
#define BUF_POOL_ZIP_FOLD(b) BUF_POOL_ZIP_FOLD_PTR((b)->frame)
#define BUF_POOL_ZIP_FOLD_BPAGE(b) BUF_POOL_ZIP_FOLD((buf_block_t*) (b))
/* @} */
diff --git a/storage/xtradb/include/hash0hash.h b/storage/xtradb/include/hash0hash.h
index 9dc20cc057f..492c767acc4 100644
--- a/storage/xtradb/include/hash0hash.h
+++ b/storage/xtradb/include/hash0hash.h
@@ -363,13 +363,13 @@ do {\
NODE_TYPE* node2222;\
\
if ((TABLE)->array[i2222].node) \
- (TABLE)->array[i2222].node = (void*)((char*)(TABLE)->array[i2222].node \
+ (TABLE)->array[i2222].node = (void*)((byte*)(TABLE)->array[i2222].node \
+ (((TABLE)->array[i2222].node > (void*)FADDR)?FOFFSET:BOFFSET));\
node2222 = HASH_GET_FIRST((TABLE), i2222);\
\
while (node2222) {\
if (node2222->PTR_NAME) \
- node2222->PTR_NAME = (void*)((char*)node2222->PTR_NAME \
+ node2222->PTR_NAME = (void*)((byte*)(node2222->PTR_NAME) \
+ ((((void*)node2222->PTR_NAME) > (void*)FADDR)?FOFFSET:BOFFSET));\
\
node2222 = node2222->PTR_NAME;\
diff --git a/storage/xtradb/include/mem0pool.h b/storage/xtradb/include/mem0pool.h
index 5e93bf88a47..fa8be296ec9 100644
--- a/storage/xtradb/include/mem0pool.h
+++ b/storage/xtradb/include/mem0pool.h
@@ -100,18 +100,6 @@ mem_pool_get_reserved(
/*==================*/
mem_pool_t* pool); /*!< in: memory pool */
/********************************************************************//**
-Reserves the mem pool mutex. */
-UNIV_INTERN
-void
-mem_pool_mutex_enter(void);
-/*======================*/
-/********************************************************************//**
-Releases the mem pool mutex. */
-UNIV_INTERN
-void
-mem_pool_mutex_exit(void);
-/*=====================*/
-/********************************************************************//**
Validates a memory pool.
@return TRUE if ok */
UNIV_INTERN
diff --git a/storage/xtradb/include/row0mysql.h b/storage/xtradb/include/row0mysql.h
index a604f6e3724..f8fab59ef80 100644
--- a/storage/xtradb/include/row0mysql.h
+++ b/storage/xtradb/include/row0mysql.h
@@ -630,7 +630,11 @@ struct row_prebuilt_struct {
the secondary index, then this is
set to TRUE */
unsigned templ_contains_blob:1;/*!< TRUE if the template contains
- BLOB column(s) */
+ a column with DATA_BLOB ==
+ get_innobase_type_from_mysql_type();
+ not to be confused with InnoDB
+ externally stored columns
+ (VARCHAR can be off-page too) */
mysql_row_templ_t* mysql_template;/*!< template used to transform
rows fast between MySQL and Innobase
formats; memory for this template
diff --git a/storage/xtradb/include/srv0srv.h b/storage/xtradb/include/srv0srv.h
index 0904a5da1eb..dc455581350 100644
--- a/storage/xtradb/include/srv0srv.h
+++ b/storage/xtradb/include/srv0srv.h
@@ -157,6 +157,8 @@ extern ulint srv_mem_pool_size;
extern ulint srv_lock_table_size;
extern uint srv_buffer_pool_shm_key;
+extern ibool srv_buffer_pool_shm_is_reused;
+extern ibool srv_buffer_pool_shm_checksum;
extern ibool srv_thread_concurrency_timer_based;
@@ -340,6 +342,9 @@ extern ulint srv_buf_pool_flushed;
reading of a disk page */
extern ulint srv_buf_pool_reads;
+/** Time in seconds between automatic buffer pool dumps */
+extern uint srv_auto_lru_dump;
+
/** Status variables to be passed to MySQL */
typedef struct export_var_struct export_struc;
@@ -608,6 +613,16 @@ srv_error_monitor_thread(
/*=====================*/
void* arg); /*!< in: a dummy parameter required by
os_thread_create */
+/*********************************************************************//**
+A thread which restores the buffer pool from a dump file on startup and does
+periodic buffer pool dumps.
+@return a dummy parameter */
+UNIV_INTERN
+os_thread_ret_t
+srv_LRU_dump_restore_thread(
+/*====================*/
+ void* arg); /*!< in: a dummy parameter required by
+ os_thread_create */
/******************************************************************//**
Outputs to a file the output of the InnoDB Monitor.
@return FALSE if not all information printed
diff --git a/storage/xtradb/include/univ.i b/storage/xtradb/include/univ.i
index 64df60ad6d1..8691e3cf337 100644
--- a/storage/xtradb/include/univ.i
+++ b/storage/xtradb/include/univ.i
@@ -46,8 +46,8 @@ Created 1/20/1994 Heikki Tuuri
#define INNODB_VERSION_MAJOR 1
#define INNODB_VERSION_MINOR 0
-#define INNODB_VERSION_BUGFIX 11
-#define PERCONA_INNODB_VERSION 12.0
+#define INNODB_VERSION_BUGFIX 12
+#define PERCONA_INNODB_VERSION 12.1
/* The following is the InnoDB version as shown in
SELECT plugin_version FROM information_schema.plugins;
diff --git a/storage/xtradb/include/ut0lst.h b/storage/xtradb/include/ut0lst.h
index 4305f6571b5..245dfc226c3 100644
--- a/storage/xtradb/include/ut0lst.h
+++ b/storage/xtradb/include/ut0lst.h
@@ -269,10 +269,10 @@ do { \
TYPE* ut_list_node_313; \
\
if ((BASE).start) \
- (BASE).start = (void*)((char*)((BASE).start) \
+ (BASE).start = (void*)((byte*)((BASE).start) \
+ (((void*)((BASE).start) > (void*)FADDR)?FOFFSET:BOFFSET));\
if ((BASE).end) \
- (BASE).end = (void*)((char*)((BASE).end) \
+ (BASE).end = (void*)((byte*)((BASE).end) \
+ (((void*)((BASE).end) > (void*)FADDR)?FOFFSET:BOFFSET));\
\
ut_list_node_313 = (BASE).start; \
@@ -280,10 +280,10 @@ do { \
for (ut_list_i_313 = (BASE).count; ut_list_i_313--; ) { \
ut_a(ut_list_node_313); \
if ((ut_list_node_313->NAME).prev) \
- (ut_list_node_313->NAME).prev = (void*)((char*)((ut_list_node_313->NAME).prev) \
+ (ut_list_node_313->NAME).prev = (void*)((byte*)((ut_list_node_313->NAME).prev)\
+ (((void*)((ut_list_node_313->NAME).prev) > (void*)FADDR)?FOFFSET:BOFFSET));\
if ((ut_list_node_313->NAME).next) \
- (ut_list_node_313->NAME).next = (void *)((char*)((ut_list_node_313->NAME).next) \
+ (ut_list_node_313->NAME).next = (void*)((byte*)((ut_list_node_313->NAME).next)\
+ (((void*)((ut_list_node_313->NAME).next)> (void*)FADDR)?FOFFSET:BOFFSET));\
ut_list_node_313 = (ut_list_node_313->NAME).next; \
} \
diff --git a/storage/xtradb/lock/lock0lock.c b/storage/xtradb/lock/lock0lock.c
index 7ec4a53e0ea..1ded67d9147 100644
--- a/storage/xtradb/lock/lock0lock.c
+++ b/storage/xtradb/lock/lock0lock.c
@@ -4606,7 +4606,7 @@ print_rec:
nth_lock++;
if (nth_lock >= srv_show_locks_held) {
- fputs("TOO LOCKS PRINTED FOR THIS TRX:"
+ fputs("TOO MANY LOCKS PRINTED FOR THIS TRX:"
" SUPPRESSING FURTHER PRINTS\n",
file);
diff --git a/storage/xtradb/log/log0recv.c b/storage/xtradb/log/log0recv.c
index bbb634addb0..200b3b088a7 100644
--- a/storage/xtradb/log/log0recv.c
+++ b/storage/xtradb/log/log0recv.c
@@ -2901,6 +2901,7 @@ recv_init_crash_recovery(void)
/*==========================*/
{
ut_a(!recv_needed_recovery);
+ ut_a(!srv_buffer_pool_shm_is_reused);
recv_needed_recovery = TRUE;
diff --git a/storage/xtradb/mem/mem0mem.c b/storage/xtradb/mem/mem0mem.c
index c0ce8a3e1ac..1dd4db30841 100644
--- a/storage/xtradb/mem/mem0mem.c
+++ b/storage/xtradb/mem/mem0mem.c
@@ -367,7 +367,7 @@ mem_heap_create_block(
block->line = line;
#ifdef MEM_PERIODIC_CHECK
- mem_pool_mutex_enter();
+ mutex_enter(&(mem_comm_pool->mutex));
if (!mem_block_list_inited) {
mem_block_list_inited = TRUE;
@@ -376,7 +376,7 @@ mem_heap_create_block(
UT_LIST_ADD_LAST(mem_block_list, mem_block_list, block);
- mem_pool_mutex_exit();
+ mutex_exit(&(mem_comm_pool->mutex));
#endif
mem_block_set_len(block, len);
mem_block_set_type(block, type);
@@ -479,11 +479,11 @@ mem_heap_block_free(
UT_LIST_REMOVE(list, heap->base, block);
#ifdef MEM_PERIODIC_CHECK
- mem_pool_mutex_enter();
+ mutex_enter(&(mem_comm_pool->mutex));
UT_LIST_REMOVE(mem_block_list, mem_block_list, block);
- mem_pool_mutex_exit();
+ mutex_exit(&(mem_comm_pool->mutex));
#endif
ut_ad(heap->total_size >= block->len);
@@ -556,7 +556,7 @@ mem_validate_all_blocks(void)
{
mem_block_t* block;
- mem_pool_mutex_enter();
+ mutex_enter(&(mem_comm_pool->mutex));
block = UT_LIST_GET_FIRST(mem_block_list);
@@ -568,6 +568,6 @@ mem_validate_all_blocks(void)
block = UT_LIST_GET_NEXT(mem_block_list, block);
}
- mem_pool_mutex_exit();
+ mutex_exit(&(mem_comm_pool->mutex));
}
#endif
diff --git a/storage/xtradb/mem/mem0pool.c b/storage/xtradb/mem/mem0pool.c
index c4f8af607e0..3291453eeb5 100644
--- a/storage/xtradb/mem/mem0pool.c
+++ b/storage/xtradb/mem/mem0pool.c
@@ -34,6 +34,7 @@ Created 5/12/1997 Heikki Tuuri
#include "ut0lst.h"
#include "ut0byte.h"
#include "mem0mem.h"
+#include "srv0start.h"
/* We would like to use also the buffer frames to allocate memory. This
would be desirable, because then the memory consumption of the database
@@ -121,23 +122,33 @@ mysql@lists.mysql.com */
UNIV_INTERN ulint mem_n_threads_inside = 0;
/********************************************************************//**
-Reserves the mem pool mutex. */
-UNIV_INTERN
+Reserves the mem pool mutex if we are not in server shutdown. Use
+this function only in memory free functions, since only memory
+free functions are used during server shutdown. */
+UNIV_INLINE
void
-mem_pool_mutex_enter(void)
-/*======================*/
+mem_pool_mutex_enter(
+/*=================*/
+ mem_pool_t* pool) /*!< in: memory pool */
{
- mutex_enter(&(mem_comm_pool->mutex));
+ if (srv_shutdown_state < SRV_SHUTDOWN_EXIT_THREADS) {
+ mutex_enter(&(pool->mutex));
+ }
}
/********************************************************************//**
-Releases the mem pool mutex. */
-UNIV_INTERN
+Releases the mem pool mutex if we are not in server shutdown. As
+its corresponding mem_pool_mutex_enter() function, use it only
+in memory free functions */
+UNIV_INLINE
void
-mem_pool_mutex_exit(void)
-/*=====================*/
+mem_pool_mutex_exit(
+/*================*/
+ mem_pool_t* pool) /*!< in: memory pool */
{
- mutex_exit(&(mem_comm_pool->mutex));
+ if (srv_shutdown_state < SRV_SHUTDOWN_EXIT_THREADS) {
+ mutex_exit(&(pool->mutex));
+ }
}
/********************************************************************//**
@@ -567,7 +578,7 @@ mem_area_free(
n = ut_2_log(size);
- mutex_enter(&(pool->mutex));
+ mem_pool_mutex_enter(pool);
mem_n_threads_inside++;
ut_a(mem_n_threads_inside == 1);
@@ -595,7 +606,7 @@ mem_area_free(
pool->reserved += ut_2_exp(n);
mem_n_threads_inside--;
- mutex_exit(&(pool->mutex));
+ mem_pool_mutex_exit(pool);
mem_area_free(new_ptr, pool);
@@ -611,7 +622,7 @@ mem_area_free(
}
mem_n_threads_inside--;
- mutex_exit(&(pool->mutex));
+ mem_pool_mutex_exit(pool);
ut_ad(mem_pool_validate(pool));
}
@@ -630,7 +641,7 @@ mem_pool_validate(
ulint free;
ulint i;
- mutex_enter(&(pool->mutex));
+ mem_pool_mutex_enter(pool);
free = 0;
@@ -658,7 +669,7 @@ mem_pool_validate(
ut_a(free + pool->reserved == pool->size);
- mutex_exit(&(pool->mutex));
+ mem_pool_mutex_exit(pool);
return(TRUE);
}
diff --git a/storage/xtradb/os/os0proc.c b/storage/xtradb/os/os0proc.c
index c101db3d179..4567d96b6f4 100644
--- a/storage/xtradb/os/os0proc.c
+++ b/storage/xtradb/os/os0proc.c
@@ -246,12 +246,10 @@ os_shm_alloc(
#if defined HAVE_SYS_IPC_H && HAVE_SYS_SHM_H
ulint size;
int shmid;
-#endif
*is_new = FALSE;
-#if defined HAVE_SYS_IPC_H && HAVE_SYS_SHM_H
fprintf(stderr,
- "InnoDB: The shared memory key %#x (%d) is specified.\n",
+ "InnoDB: The shared memory segment containing the buffer pool is: key %#x (%d).\n",
key, key);
# if defined HAVE_LARGE_PAGES && defined UNIV_LINUX
if (!os_use_large_pages || !os_large_page_size) {
@@ -268,12 +266,12 @@ os_shm_alloc(
if (shmid < 0) {
if (errno == EEXIST) {
fprintf(stderr,
- "InnoDB: HugeTLB: The shared memory segment seems to exist already.\n");
+ "InnoDB: HugeTLB: The shared memory segment exists.\n");
shmid = shmget((key_t)key, (size_t)size,
SHM_HUGETLB | SHM_R | SHM_W);
if (shmid < 0) {
fprintf(stderr,
- "InnoDB: HugeTLB: Warning: Failed to allocate %lu bytes.(reuse) errno %d\n",
+ "InnoDB: HugeTLB: Warning: Failed to allocate %lu bytes. (reuse) errno %d\n",
size, errno);
goto skip;
} else {
@@ -282,14 +280,14 @@ os_shm_alloc(
}
} else {
fprintf(stderr,
- "InnoDB: HugeTLB: Warning: Failed to allocate %lu bytes.(new) errno %d\n",
+ "InnoDB: HugeTLB: Warning: Failed to allocate %lu bytes. (new) errno %d\n",
size, errno);
goto skip;
}
} else {
*is_new = TRUE;
fprintf(stderr,
- "InnoDB: HugeTLB: The new shared memory segment is created.\n");
+ "InnoDB: HugeTLB: A new shared memory segment has been created .\n");
}
ptr = shmat(shmid, NULL, 0);
@@ -325,12 +323,12 @@ skip:
if (shmid < 0) {
if (errno == EEXIST) {
fprintf(stderr,
- "InnoDB: The shared memory segment seems to exist already.\n");
+ "InnoDB: A shared memory segment containing the buffer pool seems to already exist.\n");
shmid = shmget((key_t)key, (size_t)size,
SHM_R | SHM_W);
if (shmid < 0) {
fprintf(stderr,
- "InnoDB: Warning: Failed to allocate %lu bytes.(reuse) errno %d\n",
+ "InnoDB: Warning: Failed to allocate %lu bytes. (reuse) errno %d\n",
size, errno);
ptr = NULL;
goto end;
@@ -340,7 +338,7 @@ skip:
}
} else {
fprintf(stderr,
- "InnoDB: Warning: Failed to allocate %lu bytes.(new) errno %d\n",
+ "InnoDB: Warning: Failed to allocate %lu bytes. (new) errno %d\n",
size, errno);
ptr = NULL;
goto end;
@@ -348,7 +346,7 @@ skip:
} else {
*is_new = TRUE;
fprintf(stderr,
- "InnoDB: The new shared memory segment is created.\n");
+ "InnoDB: A new shared memory segment has been created.\n");
}
ptr = shmat(shmid, NULL, 0);
diff --git a/storage/xtradb/row/row0merge.c b/storage/xtradb/row/row0merge.c
index 1b328aed04c..65102851bdf 100644
--- a/storage/xtradb/row/row0merge.c
+++ b/storage/xtradb/row/row0merge.c
@@ -1787,6 +1787,11 @@ row_merge_copy_blobs(
(below). */
data = btr_rec_copy_externally_stored_field(
mrec, offsets, zip_size, i, &len, heap);
+ /* Because we have locked the table, any records
+ written by incomplete transactions must have been
+ rolled back already. There must not be any incomplete
+ BLOB columns. */
+ ut_a(data);
dfield_set_data(field, data, len);
}
diff --git a/storage/xtradb/row/row0row.c b/storage/xtradb/row/row0row.c
index cb7dfa2b7c9..8e806a14a98 100644
--- a/storage/xtradb/row/row0row.c
+++ b/storage/xtradb/row/row0row.c
@@ -294,7 +294,13 @@ row_build(
ut_ad(dtuple_check_typed(row));
- if (j) {
+ if (!ext) {
+ /* REDUNDANT and COMPACT formats store a local
+ 768-byte prefix of each externally stored
+ column. No cache is needed. */
+ ut_ad(dict_table_get_format(index->table)
+ < DICT_TF_FORMAT_ZIP);
+ } else if (j) {
*ext = row_ext_create(j, ext_cols, row,
dict_table_zip_size(index->table),
heap);
diff --git a/storage/xtradb/row/row0sel.c b/storage/xtradb/row/row0sel.c
index 01e75301e96..a1511e35435 100644
--- a/storage/xtradb/row/row0sel.c
+++ b/storage/xtradb/row/row0sel.c
@@ -416,7 +416,7 @@ row_sel_fetch_columns(
field_no))) {
/* Copy an externally stored field to the
- temporary heap */
+ temporary heap, if possible. */
heap = mem_heap_create(1);
@@ -425,6 +425,17 @@ row_sel_fetch_columns(
dict_table_zip_size(index->table),
field_no, &len, heap);
+ /* data == NULL means that the
+ externally stored field was not
+ written yet. This record
+ should only be seen by
+ recv_recovery_rollback_active() or any
+ TRX_ISO_READ_UNCOMMITTED
+ transactions. The InnoDB SQL parser
+ (the sole caller of this function)
+ does not implement READ UNCOMMITTED,
+ and it is not involved during rollback. */
+ ut_a(data);
ut_a(len != UNIV_SQL_NULL);
needs_copy = TRUE;
@@ -926,6 +937,7 @@ row_sel_get_clust_rec(
when plan->clust_pcur was positioned. The latch will not be
released until mtr_commit(mtr). */
+ ut_ad(!rec_get_deleted_flag(clust_rec, rec_offs_comp(offsets)));
row_sel_fetch_columns(index, clust_rec, offsets,
UT_LIST_GET_FIRST(plan->columns));
*out_rec = clust_rec;
@@ -1628,6 +1640,13 @@ skip_lock:
}
if (old_vers == NULL) {
+ /* The record does not exist
+ in our read view. Skip it, but
+ first attempt to determine
+ whether the index segment we
+ are searching through has been
+ exhausted. */
+
offsets = rec_get_offsets(
rec, index, offsets,
ULINT_UNDEFINED, &heap);
@@ -2647,9 +2666,8 @@ Convert a row in the Innobase format to a row in the MySQL format.
Note that the template in prebuilt may advise us to copy only a few
columns to mysql_rec, other columns are left blank. All columns may not
be needed in the query.
-@return TRUE if success, FALSE if could not allocate memory for a BLOB
-(though we may also assert in that case) */
-static
+@return TRUE on success, FALSE if not all columns could be retrieved */
+static __attribute__((warn_unused_result))
ibool
row_sel_store_mysql_rec(
/*====================*/
@@ -2719,6 +2737,21 @@ row_sel_store_mysql_rec(
dict_table_zip_size(prebuilt->table),
templ->rec_field_no, &len, heap);
+ if (UNIV_UNLIKELY(!data)) {
+ /* The externally stored field
+ was not written yet. This
+ record should only be seen by
+ recv_recovery_rollback_active()
+ or any TRX_ISO_READ_UNCOMMITTED
+ transactions. */
+
+ if (extern_field_heap) {
+ mem_heap_free(extern_field_heap);
+ }
+
+ return(FALSE);
+ }
+
ut_a(len != UNIV_SQL_NULL);
} else {
/* Field is stored in the row. */
@@ -3136,9 +3169,10 @@ row_sel_pop_cached_row_for_mysql(
}
/********************************************************************//**
-Pushes a row for MySQL to the fetch cache. */
-UNIV_INLINE
-void
+Pushes a row for MySQL to the fetch cache.
+@return TRUE on success, FALSE if the record contains incomplete BLOBs */
+UNIV_INLINE __attribute__((warn_unused_result))
+ibool
row_sel_push_cache_row_for_mysql(
/*=============================*/
row_prebuilt_t* prebuilt, /*!< in: prebuilt struct */
@@ -3180,10 +3214,11 @@ row_sel_push_cache_row_for_mysql(
prebuilt->fetch_cache[
prebuilt->n_fetch_cached],
prebuilt, rec, offsets))) {
- ut_error;
+ return(FALSE);
}
prebuilt->n_fetch_cached++;
+ return(TRUE);
}
/*********************************************************************//**
@@ -3576,15 +3611,25 @@ row_search_for_mysql(
row_sel_try_search_shortcut_for_mysql().
The latch will not be released until
mtr_commit(&mtr). */
- ut_ad(!rec_get_deleted_flag(rec, comp));
+ ut_ad(!rec_get_deleted_flag(rec, comp));
if (!row_sel_store_mysql_rec(buf, prebuilt,
rec, offsets)) {
- err = DB_TOO_BIG_RECORD;
-
- /* We let the main loop to do the
- error handling */
- goto shortcut_fails_too_big_rec;
+ /* Only fresh inserts may contain
+ incomplete externally stored
+ columns. Pretend that such
+ records do not exist. Such
+ records may only be accessed
+ at the READ UNCOMMITTED
+ isolation level or when
+ rolling back a recovered
+ transaction. Rollback happens
+ at a lower level, not here. */
+ ut_a(trx->isolation_level
+ == TRX_ISO_READ_UNCOMMITTED);
+
+ /* Proceed as in case SEL_RETRY. */
+ break;
}
mtr_commit(&mtr);
@@ -3624,7 +3669,7 @@ release_search_latch_if_needed:
default:
ut_ad(0);
}
-shortcut_fails_too_big_rec:
+
mtr_commit(&mtr);
mtr_start(&mtr);
}
@@ -4218,7 +4263,7 @@ no_gap_lock:
rec = old_vers;
}
- } else {
+ } else {
/* We are looking into a non-clustered index,
and to get the right version of the record we
have to look also into the clustered index: this
@@ -4226,13 +4271,12 @@ no_gap_lock:
information via the clustered index record. */
ut_ad(index != clust_index);
+ ut_ad(!dict_index_is_clust(index));
- ut_ad(!dict_index_is_clust(index));
-
- if (!lock_sec_rec_cons_read_sees(
- rec, trx->read_view)) {
- goto requires_clust_rec;
- }
+ if (!lock_sec_rec_cons_read_sees(
+ rec, trx->read_view)) {
+ goto requires_clust_rec;
+ }
}
}
@@ -4355,13 +4399,13 @@ requires_clust_rec:
ULINT_UNDEFINED, &heap);
result_rec = rec;
}
-
- /* result_rec can legitimately be delete-marked
- now that it has been established that it points to a
- clustered index record that exists in the read view. */
+
+ /* result_rec can legitimately be delete-marked
+ now that it has been established that it points to a
+ clustered index record that exists in the read view. */
} else {
result_rec = rec;
- ut_ad(!rec_get_deleted_flag(rec, comp));
+ ut_ad(!rec_get_deleted_flag(rec, comp));
}
/* We found a qualifying record 'result_rec'. At this point,
@@ -4393,9 +4437,18 @@ requires_clust_rec:
not cache rows because there the cursor is a scrollable
cursor. */
- row_sel_push_cache_row_for_mysql(prebuilt, result_rec,
- offsets);
- if (prebuilt->n_fetch_cached == MYSQL_FETCH_CACHE_SIZE) {
+ if (!row_sel_push_cache_row_for_mysql(prebuilt, result_rec,
+ offsets)) {
+ /* Only fresh inserts may contain incomplete
+ externally stored columns. Pretend that such
+ records do not exist. Such records may only be
+ accessed at the READ UNCOMMITTED isolation
+ level or when rolling back a recovered
+ transaction. Rollback happens at a lower
+ level, not here. */
+ ut_a(trx->isolation_level == TRX_ISO_READ_UNCOMMITTED);
+ } else if (prebuilt->n_fetch_cached
+ == MYSQL_FETCH_CACHE_SIZE) {
goto got_row;
}
@@ -4411,9 +4464,17 @@ requires_clust_rec:
} else {
if (!row_sel_store_mysql_rec(buf, prebuilt,
result_rec, offsets)) {
- err = DB_TOO_BIG_RECORD;
-
- goto lock_wait_or_error;
+ /* Only fresh inserts may contain
+ incomplete externally stored
+ columns. Pretend that such records do
+ not exist. Such records may only be
+ accessed at the READ UNCOMMITTED
+ isolation level or when rolling back a
+ recovered transaction. Rollback
+ happens at a lower level, not here. */
+ ut_a(trx->isolation_level
+ == TRX_ISO_READ_UNCOMMITTED);
+ goto next_rec;
}
}
diff --git a/storage/xtradb/row/row0undo.c b/storage/xtradb/row/row0undo.c
index 9ef842b5114..fd28a4f6520 100644
--- a/storage/xtradb/row/row0undo.c
+++ b/storage/xtradb/row/row0undo.c
@@ -199,8 +199,24 @@ row_undo_search_clust_to_pcur(
ret = FALSE;
} else {
+ row_ext_t** ext;
+
+ if (dict_table_get_format(node->table) >= DICT_TF_FORMAT_ZIP) {
+ /* In DYNAMIC or COMPRESSED format, there is
+ no prefix of externally stored columns in the
+ clustered index record. Build a cache of
+ column prefixes. */
+ ext = &node->ext;
+ } else {
+ /* REDUNDANT and COMPACT formats store a local
+ 768-byte prefix of each externally stored
+ column. No cache is needed. */
+ ext = NULL;
+ node->ext = NULL;
+ }
+
node->row = row_build(ROW_COPY_DATA, clust_index, rec,
- offsets, NULL, &node->ext, node->heap);
+ offsets, NULL, ext, node->heap);
if (node->update) {
node->undo_row = dtuple_copy(node->row, node->heap);
row_upd_replace(node->undo_row, &node->undo_ext,
diff --git a/storage/xtradb/row/row0upd.c b/storage/xtradb/row/row0upd.c
index c1f2bf9ae4a..04c3139fcc7 100644
--- a/storage/xtradb/row/row0upd.c
+++ b/storage/xtradb/row/row0upd.c
@@ -1398,6 +1398,7 @@ row_upd_store_row(
dict_index_t* clust_index;
rec_t* rec;
mem_heap_t* heap = NULL;
+ row_ext_t** ext;
ulint offsets_[REC_OFFS_NORMAL_SIZE];
const ulint* offsets;
rec_offs_init(offsets_);
@@ -1414,8 +1415,22 @@ row_upd_store_row(
offsets = rec_get_offsets(rec, clust_index, offsets_,
ULINT_UNDEFINED, &heap);
+
+ if (dict_table_get_format(node->table) >= DICT_TF_FORMAT_ZIP) {
+ /* In DYNAMIC or COMPRESSED format, there is no prefix
+ of externally stored columns in the clustered index
+ record. Build a cache of column prefixes. */
+ ext = &node->ext;
+ } else {
+ /* REDUNDANT and COMPACT formats store a local
+ 768-byte prefix of each externally stored column.
+ No cache is needed. */
+ ext = NULL;
+ node->ext = NULL;
+ }
+
node->row = row_build(ROW_COPY_DATA, clust_index, rec, offsets,
- NULL, &node->ext, node->heap);
+ NULL, ext, node->heap);
if (node->is_delete) {
node->upd_row = NULL;
node->upd_ext = NULL;
diff --git a/storage/xtradb/srv/srv0srv.c b/storage/xtradb/srv/srv0srv.c
index b9905116603..c1d0f255c64 100644
--- a/storage/xtradb/srv/srv0srv.c
+++ b/storage/xtradb/srv/srv0srv.c
@@ -213,6 +213,8 @@ UNIV_INTERN ulint srv_lock_table_size = ULINT_MAX;
/* key value for shm */
UNIV_INTERN uint srv_buffer_pool_shm_key = 0;
+UNIV_INTERN ibool srv_buffer_pool_shm_is_reused = FALSE;
+UNIV_INTERN ibool srv_buffer_pool_shm_checksum = TRUE;
/* This parameter is deprecated. Use srv_n_io_[read|write]_threads
instead. */
@@ -307,6 +309,9 @@ UNIV_INTERN ulint srv_buf_pool_flushed = 0;
reading of a disk page */
UNIV_INTERN ulint srv_buf_pool_reads = 0;
+/** Time in seconds between automatic buffer pool dumps */
+UNIV_INTERN uint srv_auto_lru_dump = 0;
+
/* structure to pass status variables to MySQL */
UNIV_INTERN export_struc export_vars;
@@ -2555,6 +2560,56 @@ loop:
OS_THREAD_DUMMY_RETURN;
}
+/*********************************************************************//**
+A thread which restores the buffer pool from a dump file on startup and does
+periodic buffer pool dumps.
+@return a dummy parameter */
+UNIV_INTERN
+os_thread_ret_t
+srv_LRU_dump_restore_thread(
+/*====================*/
+ void* arg __attribute__((unused)))
+ /*!< in: a dummy parameter required by
+ os_thread_create */
+{
+ uint auto_lru_dump;
+ time_t last_dump_time;
+ time_t time_elapsed;
+
+#ifdef UNIV_DEBUG_THREAD_CREATION
+ fprintf(stderr, "LRU dump/restore thread starts, id %lu\n",
+ os_thread_pf(os_thread_get_curr_id()));
+#endif
+
+ if (srv_auto_lru_dump)
+ buf_LRU_file_restore();
+
+ last_dump_time = time(NULL);
+
+loop:
+ os_thread_sleep(5000000);
+
+ if (srv_shutdown_state >= SRV_SHUTDOWN_CLEANUP) {
+ goto exit_func;
+ }
+
+ time_elapsed = time(NULL) - last_dump_time;
+ auto_lru_dump = srv_auto_lru_dump;
+ if (auto_lru_dump > 0 && (time_t) auto_lru_dump < time_elapsed) {
+ last_dump_time = time(NULL);
+ buf_LRU_file_dump();
+ }
+
+ goto loop;
+exit_func:
+ /* We count the number of threads in os_thread_exit(). A created
+ thread should always use that to exit and not use return() to exit. */
+
+ os_thread_exit(NULL);
+
+ OS_THREAD_DUMMY_RETURN;
+}
+
/*******************************************************************//**
Tells the InnoDB server that there has been activity in the database
and wakes up the master thread if it is suspended (not sleeping). Used
diff --git a/storage/xtradb/srv/srv0start.c b/storage/xtradb/srv/srv0start.c
index 62ffa366f18..b36faf2d2d7 100644
--- a/storage/xtradb/srv/srv0start.c
+++ b/storage/xtradb/srv/srv0start.c
@@ -126,9 +126,9 @@ static mutex_t ios_mutex;
static ulint ios;
/** io_handler_thread parameters for thread identification */
-static ulint n[SRV_MAX_N_IO_THREADS + 6 + 64];
+static ulint n[SRV_MAX_N_IO_THREADS + 7 + 64];
/** io_handler_thread identifiers */
-static os_thread_id_t thread_ids[SRV_MAX_N_IO_THREADS + 6 + 64];
+static os_thread_id_t thread_ids[SRV_MAX_N_IO_THREADS + 7 + 64];
/** We use this mutex to test the return value of pthread_mutex_trylock
on successful locking. HP-UX does NOT return 0, though Linux et al do. */
@@ -1719,8 +1719,8 @@ innobase_start_or_create_for_mysql(void)
Note that this is not as heavy weight as it seems. At
this point there will be only ONE page in the buf_LRU
and there must be no page in the buf_flush list. */
- /* TODO: treat more correctly */
- if (!srv_buffer_pool_shm_key)
+ /* buffer_pool_shm should not be reused when recovery was needed. */
+ if (!srv_buffer_pool_shm_is_reused)
buf_pool_invalidate();
/* We always try to do a recovery, even if the database had
@@ -1835,6 +1835,10 @@ innobase_start_or_create_for_mysql(void)
os_thread_create(&srv_monitor_thread, NULL,
thread_ids + 4 + SRV_MAX_N_IO_THREADS);
+ /* Create the thread which automaticaly dumps/restore buffer pool */
+ os_thread_create(&srv_LRU_dump_restore_thread, NULL,
+ thread_ids + 5 + SRV_MAX_N_IO_THREADS);
+
srv_is_being_started = FALSE;
if (trx_doublewrite == NULL) {
@@ -1859,13 +1863,13 @@ innobase_start_or_create_for_mysql(void)
ulint i;
os_thread_create(&srv_purge_thread, NULL, thread_ids
- + (5 + SRV_MAX_N_IO_THREADS));
+ + (6 + SRV_MAX_N_IO_THREADS));
for (i = 0; i < srv_use_purge_thread - 1; i++) {
- n[6 + i + SRV_MAX_N_IO_THREADS] = i; /* using as index for arrays in purge_sys */
+ n[7 + i + SRV_MAX_N_IO_THREADS] = i; /* using as index for arrays in purge_sys */
os_thread_create(&srv_purge_worker_thread,
- n + (6 + i + SRV_MAX_N_IO_THREADS),
- thread_ids + (6 + i + SRV_MAX_N_IO_THREADS));
+ n + (7 + i + SRV_MAX_N_IO_THREADS),
+ thread_ids + (7 + i + SRV_MAX_N_IO_THREADS));
}
}
#ifdef UNIV_DEBUG
@@ -2214,6 +2218,10 @@ innobase_shutdown_for_mysql(void)
log_mem_free();
buf_pool_free();
mem_close();
+
+ /* ut_free_all_mem() frees all allocated memory not freed yet
+ in shutdown, and it will also free the ut_list_mutex, so it
+ should be the last one for all operation */
ut_free_all_mem();
if (os_thread_count != 0
diff --git a/storage/xtradb/trx/trx0sys.c b/storage/xtradb/trx/trx0sys.c
index ad4471ada0b..11581a3f2ae 100644
--- a/storage/xtradb/trx/trx0sys.c
+++ b/storage/xtradb/trx/trx0sys.c
@@ -541,8 +541,8 @@ start_again:
log_make_checkpoint_at(IB_ULONGLONG_MAX, TRUE);
fprintf(stderr, "InnoDB: Doublewrite buffer created in the doublewrite file\n");
+ trx_sys_multiple_tablespace_format = TRUE;
}
-
trx_doublewrite_buf_is_being_created = FALSE;
}
}