diff options
author | Michael Widenius <monty@askmonty.org> | 2011-02-28 19:39:30 +0200 |
---|---|---|
committer | Michael Widenius <monty@askmonty.org> | 2011-02-28 19:39:30 +0200 |
commit | 3358cdd5048671ee6cbbf50c291f7e0d0fda8e1e (patch) | |
tree | da0e622896425203d23ecdfd1bc77b57e3502edf /storage/xtradb | |
parent | 869f5d0e81d5cbecaec3605f292fbb363b9ccbf6 (diff) | |
parent | f83e594218a6d19da2fa1ea2a01d860c30fe2913 (diff) | |
download | mariadb-git-3358cdd5048671ee6cbbf50c291f7e0d0fda8e1e.tar.gz |
Merge with 5.1 to get in changes from MySQL 5.1.55
Diffstat (limited to 'storage/xtradb')
61 files changed, 2364 insertions, 758 deletions
diff --git a/storage/xtradb/ChangeLog b/storage/xtradb/ChangeLog index 0cd8ac8a7e6..bf003b810d2 100644 --- a/storage/xtradb/ChangeLog +++ b/storage/xtradb/ChangeLog @@ -1,13 +1,96 @@ +2011-01-06 The InnoDB Team + * row/row0merge.c: + Fix Bug#59312 Examine MAX_FULL_NAME_LEN in InnoDB to address + possible insufficient name buffer + +2011-01-06 The InnoDB Team + * dict/dict0dict.c, handler/ha_innodb.cc, handler/i_s.cc, + include/univ.i: + Fix Bug#58643 InnoDB: too long table name + +2011-01-06 The InnoDB Team + * handler/i_s.cc, include/trx0i_s.h, trx/trx0i_s.c: + Fix Bug#55397 cannot select from innodb_trx when trx_query contains + blobs that aren't strings + +2011-01-04 The InnoDB Team + * dict/dict0dict.c: + Fix Bug#59197 double quote in field comment prevents foreign + key constraint creation + +2010-12-21 The InnoDB Team + * include/btr0cur.h, include/row0upd.h, btr/btr0cur.c, + row/row0umod.c, row/row0upd.c: + Fix Bug#55284 Double free of off-page columns due to lock wait + while updating PRIMARY KEY + +2010-12-21 The InnoDB Team + + * include/data0data.h, include/data0data.ic, include/row0upd.h, + btr/btr0cur.c, row/row0purge.c, row/row0umod.c, row/row0upd.c, + innodb.result, innodb.test: + Fix Bug#58912 InnoDB unnecessarily avoids update-in-place + on column prefix indexes + +2010-12-09 The InnoDB Team + + * buf/buf0lru.c: + Fix Bug#57600 output of I/O sum[%lu] can go negative + +2010-11-11 The InnoDB Team + + * thr/thr0loc.c, trx/trx0i_s.c: + Fix Bug#57802 Empty ASSERTION parameter passed to the HASH_SEARCH macro + +2010-11-10 The InnoDB Team + + * dict/dict0dict.c, handler/handler0alter.cc, include/dict0dict.h + row/row0merge.c: + Fix Bug#55084 InnoDB crash and corruption after ALTER TABLE + +2010-11-10 The InnoDB Team + + * srv/srv0start.c: + Fix Bug#48026 Log start and end of InnoDB buffer pool + initialization to the error log + +2010-11-03 The InnoDB Team + + * include/btr0btr.h, include/btr0btr.ic, dict/dict0crea.c: + Fix Bug#57947 InnoDB diagnostics shows btr_block_get calls + instead of real callers + +2010-11-03 The InnoDB Team + + * fil/fil0fil.c, fsp/fsp0fsp.c, handler/ha_innodb.cc, + include/fil0fil.h, include/univ.i: + Fix Bug #54538 - use of exclusive innodb dictionary lock limits + performance. + +2010-11-02 The InnoDB Team + + * btr/btr0cur.c, dict/dict0dict.c, dict/dict0load.c, + handler/ha_innodb.cc, include/dict0dict.h, row/row0mysql.c, + innodb_bug53046.result, innodb_bug53046.test: + Fix Bug#53046 dict_update_statistics_low can still be run + concurrently on same table + +2010-11-02 The InnoDB Team + + * row/row0sel.c: + Fix Bug#57799 READ UNCOMMITTED access failure of off-page + DYNAMIC or COMPRESSED columns again + 2010-10-24 The InnoDB Team * row/row0mysql.c - Fix Bug #57700 Latching order violation in + Fix Bug#57700 Latching order violation in row_truncate_table_for_mysql() 2010-10-20 The InnoDB Team * dict/dict0load.c - Fix Bug #57616 Sig 11 in dict_load_table() when failed to load + Fix Bug#57616 Sig 11 in dict_load_table() when failed to load index or foreign key 2010-10-19 The InnoDB Team @@ -17,7 +100,7 @@ include/ibuf0ibuf.h, include/row0mysql.h, row/row0mysql.c, row/row0sel.c, innodb_bug56680.test, innodb_bug56680.result: - Fix Bug #56680 InnoDB may return wrong results from a + Fix Bug#56680 InnoDB may return wrong results from a case-insensitive covering index 2010-10-18 The InnoDB Team @@ -72,7 +155,7 @@ 2010-08-03 The InnoDB Team * include/dict0dict.h, include/dict0dict.ic, row/row0mysql.c: - Fix bug #54678, InnoDB, TRUNCATE, ALTER, I_S SELECT, crash or deadlock + Fix Bug#54678 InnoDB, TRUNCATE, ALTER, I_S SELECT, crash or deadlock 2010-08-03 The InnoDB Team @@ -146,7 +229,7 @@ * dict/dict0load.c, fil/fil0fil.c: Fix Bug#54658: InnoDB: Warning: allocated tablespace %lu, - old maximum was 0 (introduced in Bug #53578 fix) + old maximum was 0 (introduced in Bug#53578 fix) 2010-06-16 The InnoDB Team diff --git a/storage/xtradb/btr/btr0btr.c b/storage/xtradb/btr/btr0btr.c index 4d6cd8d80fc..55204691400 100644 --- a/storage/xtradb/btr/btr0btr.c +++ b/storage/xtradb/btr/btr0btr.c @@ -675,7 +675,7 @@ btr_page_get_father_node_ptr_func( " to fix the\n" "InnoDB: corruption. If the crash happens at " "the database startup, see\n" - "InnoDB: " REFMAN "forcing-recovery.html about\n" + "InnoDB: " REFMAN "forcing-innodb-recovery.html about\n" "InnoDB: forcing recovery. " "Then dump + drop + reimport.\n", stderr); diff --git a/storage/xtradb/btr/btr0cur.c b/storage/xtradb/btr/btr0cur.c index daa94a549a0..321504a2b25 100644 --- a/storage/xtradb/btr/btr0cur.c +++ b/storage/xtradb/btr/btr0cur.c @@ -1124,7 +1124,7 @@ btr_cur_ins_lock_and_undo( not zero, the parameters index and thr should be specified */ btr_cur_t* cursor, /*!< in: cursor on page after which to insert */ - const dtuple_t* entry, /*!< in: entry to insert */ + dtuple_t* entry, /*!< in/out: entry to insert */ que_thr_t* thr, /*!< in: query thread or NULL */ mtr_t* mtr, /*!< in/out: mini-transaction */ ibool* inherit)/*!< out: TRUE if the inserted new record maybe @@ -1933,7 +1933,8 @@ btr_cur_update_in_place( NOT call it if index is secondary */ if (!dict_index_is_clust(index) - || row_upd_changes_ord_field_binary(NULL, index, update)) { + || row_upd_changes_ord_field_binary(NULL, NULL, + index, update)) { /* Remove possible hash index pointer to this record */ btr_search_update_hash_on_delete(cursor); @@ -2685,27 +2686,24 @@ ulint btr_cur_del_mark_set_clust_rec( /*===========================*/ ulint flags, /*!< in: undo logging and locking flags */ - btr_cur_t* cursor, /*!< in: cursor */ + buf_block_t* block, /*!< in/out: buffer block of the record */ + rec_t* rec, /*!< in/out: record */ + dict_index_t* index, /*!< in: clustered index of the record */ + const ulint* offsets,/*!< in: rec_get_offsets(rec) */ ibool val, /*!< in: value to set */ que_thr_t* thr, /*!< in: query thread */ mtr_t* mtr) /*!< in: mtr */ { - dict_index_t* index; - buf_block_t* block; roll_ptr_t roll_ptr; ulint err; - rec_t* rec; page_zip_des_t* page_zip; trx_t* trx; - mem_heap_t* heap = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - ulint* offsets = offsets_; - rec_offs_init(offsets_); - rec = btr_cur_get_rec(cursor); - index = cursor->index; + ut_ad(dict_index_is_clust(index)); + ut_ad(rec_offs_validate(rec, index, offsets)); ut_ad(!!page_rec_is_comp(rec) == dict_table_is_comp(index->table)); - offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap); + ut_ad(buf_block_get_frame(block) == page_align(rec)); + ut_ad(page_is_leaf(page_align(rec))); #ifdef UNIV_DEBUG if (btr_cur_print_record_ops && thr) { @@ -2717,13 +2715,12 @@ btr_cur_del_mark_set_clust_rec( ut_ad(dict_index_is_clust(index)); ut_ad(!rec_get_deleted_flag(rec, rec_offs_comp(offsets))); - err = lock_clust_rec_modify_check_and_lock(flags, - btr_cur_get_block(cursor), + err = lock_clust_rec_modify_check_and_lock(flags, block, rec, index, offsets, thr); if (err != DB_SUCCESS) { - goto func_exit; + return(err); } err = trx_undo_report_row_operation(flags, TRX_UNDO_MODIFY_OP, thr, @@ -2731,11 +2728,9 @@ btr_cur_del_mark_set_clust_rec( &roll_ptr); if (err != DB_SUCCESS) { - goto func_exit; + return(err); } - block = btr_cur_get_block(cursor); - if (block->is_hashed) { rw_lock_x_lock(&btr_search_latch); } @@ -2758,10 +2753,6 @@ btr_cur_del_mark_set_clust_rec( btr_cur_del_mark_set_clust_rec_log(flags, rec, index, val, trx, roll_ptr, mtr); -func_exit: - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } return(err); } @@ -3582,11 +3573,9 @@ btr_estimate_number_of_different_key_vals( effective_pages = btr_estimate_n_pages_not_null(index, 1 /*k*/, first_rec_path); if (!effective_pages) { - dict_index_stat_mutex_enter(index); for (j = 0; j <= n_cols; j++) { index->stat_n_diff_key_vals[j] = (ib_int64_t)index->stat_n_leaf_pages; } - dict_index_stat_mutex_exit(index); return; } else if (effective_pages > index->stat_n_leaf_pages) { effective_pages = index->stat_n_leaf_pages; @@ -3728,8 +3717,6 @@ btr_estimate_number_of_different_key_vals( also the pages used for external storage of fields (those pages are included in index->stat_n_leaf_pages) */ - dict_index_stat_mutex_enter(index); - for (j = 0; j <= n_cols; j++) { index->stat_n_diff_key_vals[j] = ((n_diff[j] @@ -3768,8 +3755,6 @@ btr_estimate_number_of_different_key_vals( } } - dict_index_stat_mutex_exit(index); - mem_free(n_diff); if (UNIV_LIKELY_NULL(heap)) { mem_heap_free(heap); @@ -3862,108 +3847,36 @@ btr_cur_set_ownership_of_extern_field( } /*******************************************************************//** -Marks not updated extern fields as not-owned by this record. The ownership -is transferred to the updated record which is inserted elsewhere in the +Marks non-updated off-page fields as disowned by this record. The ownership +must be transferred to the updated record which is inserted elsewhere in the index tree. In purge only the owner of externally stored field is allowed -to free the field. -@return TRUE if BLOB ownership was transferred */ +to free the field. */ UNIV_INTERN -ibool -btr_cur_mark_extern_inherited_fields( -/*=================================*/ +void +btr_cur_disown_inherited_fields( +/*============================*/ page_zip_des_t* page_zip,/*!< in/out: compressed page whose uncompressed part will be updated, or NULL */ rec_t* rec, /*!< in/out: record in a clustered index */ dict_index_t* index, /*!< in: index of the page */ const ulint* offsets,/*!< in: array returned by rec_get_offsets() */ const upd_t* update, /*!< in: update vector */ - mtr_t* mtr) /*!< in: mtr, or NULL if not logged */ + mtr_t* mtr) /*!< in/out: mini-transaction */ { - ulint n; - ulint j; ulint i; - ibool change_ownership = FALSE; - ut_ad(rec_offs_validate(rec, NULL, offsets)); + ut_ad(rec_offs_validate(rec, index, offsets)); ut_ad(!rec_offs_comp(offsets) || !rec_get_node_ptr_flag(rec)); + ut_ad(rec_offs_any_extern(offsets)); + ut_ad(mtr); - if (!rec_offs_any_extern(offsets)) { - - return(FALSE); - } - - n = rec_offs_n_fields(offsets); - - for (i = 0; i < n; i++) { - if (rec_offs_nth_extern(offsets, i)) { - - /* Check it is not in updated fields */ - - if (update) { - for (j = 0; j < upd_get_n_fields(update); - j++) { - if (upd_get_nth_field(update, j) - ->field_no == i) { - - goto updated; - } - } - } - + for (i = 0; i < rec_offs_n_fields(offsets); i++) { + if (rec_offs_nth_extern(offsets, i) + && !upd_get_field_by_field_no(update, i)) { btr_cur_set_ownership_of_extern_field( page_zip, rec, index, offsets, i, FALSE, mtr); - - change_ownership = TRUE; -updated: - ; } } - - return(change_ownership); -} - -/*******************************************************************//** -The complement of the previous function: in an update entry may inherit -some externally stored fields from a record. We must mark them as inherited -in entry, so that they are not freed in a rollback. */ -UNIV_INTERN -void -btr_cur_mark_dtuple_inherited_extern( -/*=================================*/ - dtuple_t* entry, /*!< in/out: updated entry to be - inserted to clustered index */ - const upd_t* update) /*!< in: update vector */ -{ - ulint i; - - for (i = 0; i < dtuple_get_n_fields(entry); i++) { - - dfield_t* dfield = dtuple_get_nth_field(entry, i); - byte* data; - ulint len; - ulint j; - - if (!dfield_is_ext(dfield)) { - continue; - } - - /* Check if it is in updated fields */ - - for (j = 0; j < upd_get_n_fields(update); j++) { - if (upd_get_nth_field(update, j)->field_no == i) { - - goto is_updated; - } - } - - data = dfield_get_data(dfield); - len = dfield_get_len(dfield); - data[len - BTR_EXTERN_FIELD_REF_SIZE + BTR_EXTERN_LEN] - |= BTR_EXTERN_INHERITED_FLAG; - -is_updated: - ; - } } /*******************************************************************//** @@ -4002,29 +3915,6 @@ btr_cur_unmark_extern_fields( } /*******************************************************************//** -Marks all extern fields in a dtuple as owned by the record. */ -UNIV_INTERN -void -btr_cur_unmark_dtuple_extern_fields( -/*================================*/ - dtuple_t* entry) /*!< in/out: clustered index entry */ -{ - ulint i; - - for (i = 0; i < dtuple_get_n_fields(entry); i++) { - dfield_t* dfield = dtuple_get_nth_field(entry, i); - - if (dfield_is_ext(dfield)) { - byte* data = dfield_get_data(dfield); - ulint len = dfield_get_len(dfield); - - data[len - BTR_EXTERN_FIELD_REF_SIZE + BTR_EXTERN_LEN] - &= ~BTR_EXTERN_OWNER_FLAG; - } - } -} - -/*******************************************************************//** Flags the data tuple fields that are marked as extern storage in the update vector. We use this function to remember which fields we must mark as extern storage in a record inserted for an update. @@ -4182,7 +4072,7 @@ Stores the fields in big_rec_vec to the tablespace and puts pointers to them in rec. The extern flags in rec will have to be set beforehand. The fields are stored on pages allocated from leaf node file segment of the index tree. -@return DB_SUCCESS or error */ +@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */ UNIV_INTERN ulint btr_store_big_rec_extern_fields( diff --git a/storage/xtradb/buf/buf0buf.c b/storage/xtradb/buf/buf0buf.c index 1e9624fce50..452e5b0f526 100644 --- a/storage/xtradb/buf/buf0buf.c +++ b/storage/xtradb/buf/buf0buf.c @@ -53,6 +53,10 @@ Created 11/5/1995 Heikki Tuuri #include "page0zip.h" #include "trx0trx.h" #include "srv0start.h" +#include "que0que.h" +#include "read0read.h" +#include "row0row.h" +#include "ha_prototypes.h" /* prototypes for new functions added to ha_innodb.cc */ trx_t* innobase_get_trx(); @@ -310,6 +314,30 @@ read-ahead or flush occurs */ UNIV_INTERN ibool buf_debug_prints = FALSE; #endif /* UNIV_DEBUG */ +/* Buffer pool shared memory segment information */ +typedef struct buf_shm_info_struct buf_shm_info_t; + +struct buf_shm_info_struct { + char head_str[8]; + ulint binary_id; + ibool is_new; /* during initializing */ + ibool clean; /* clean shutdowned and free */ + ibool reusable; /* reusable */ + ulint buf_pool_size; /* backup value */ + ulint page_size; /* backup value */ + ulint frame_offset; /* offset of the first frame based on chunk->mem */ + ulint zip_hash_offset; + ulint zip_hash_n; + + ulint checksum; + + buf_pool_t buf_pool_backup; + buf_chunk_t chunk_backup; + + ib_uint64_t dummy; +}; + +#define BUF_SHM_INFO_HEAD "XTRA_SHM" #endif /* !UNIV_HOTBACKUP */ /********************************************************************//** @@ -429,7 +457,7 @@ buf_page_is_corrupted( "you may have copied the InnoDB\n" "InnoDB: tablespace but not the InnoDB " "log files. See\n" - "InnoDB: " REFMAN "forcing-recovery.html\n" + "InnoDB: " REFMAN "forcing-innodb-recovery.html\n" "InnoDB: for more information.\n", (ulong) mach_read_from_4(read_buf + FIL_PAGE_OFFSET), @@ -756,6 +784,45 @@ buf_block_init( #endif /* UNIV_SYNC_DEBUG */ } +static +void +buf_block_reuse( +/*============*/ + buf_block_t* block, + ptrdiff_t frame_offset) +{ + /* block_init */ + block->frame += frame_offset; + + UNIV_MEM_DESC(block->frame, UNIV_PAGE_SIZE, block); + + block->index = NULL; + +#ifdef UNIV_DEBUG + /* recreate later */ + block->page.in_page_hash = FALSE; + block->page.in_zip_hash = FALSE; +#endif /* UNIV_DEBUG */ + +#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG + block->n_pointers = 0; +#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */ + + if (block->page.zip.data) + block->page.zip.data += frame_offset; + + block->is_hashed = FALSE; + + mutex_create(&block->mutex, SYNC_BUF_BLOCK); + + rw_lock_create(&block->lock, SYNC_LEVEL_VARYING); + ut_ad(rw_lock_validate(&(block->lock))); + +#ifdef UNIV_SYNC_DEBUG + rw_lock_create(&block->debug_latch, SYNC_NO_ORDER_CHECK); +#endif /* UNIV_SYNC_DEBUG */ +} + /********************************************************************//** Allocates a chunk of buffer frames. @return chunk, or NULL on failure */ @@ -768,26 +835,188 @@ buf_chunk_init( { buf_block_t* block; byte* frame; + ulint zip_hash_n = 0; + ulint zip_hash_mem_size = 0; + hash_table_t* zip_hash_tmp = NULL; ulint i; + buf_shm_info_t* shm_info = NULL; /* Round down to a multiple of page size, although it already should be. */ mem_size = ut_2pow_round(mem_size, UNIV_PAGE_SIZE); + + srv_buffer_pool_shm_is_reused = FALSE; + + if (srv_buffer_pool_shm_key) { + /* zip_hash size */ + zip_hash_n = (mem_size / UNIV_PAGE_SIZE) * 2; + zip_hash_mem_size = ut_2pow_round(hash_create_needed(zip_hash_n) + + (UNIV_PAGE_SIZE - 1), UNIV_PAGE_SIZE); + } + /* Reserve space for the block descriptors. */ mem_size += ut_2pow_round((mem_size / UNIV_PAGE_SIZE) * (sizeof *block) + (UNIV_PAGE_SIZE - 1), UNIV_PAGE_SIZE); + if (srv_buffer_pool_shm_key) { + mem_size += ut_2pow_round(sizeof(buf_shm_info_t) + + (UNIV_PAGE_SIZE - 1), UNIV_PAGE_SIZE); + mem_size += zip_hash_mem_size; + } chunk->mem_size = mem_size; + + if (srv_buffer_pool_shm_key) { + ulint binary_id; + ibool is_new; + + ut_a(buf_pool->n_chunks == 1); + + fprintf(stderr, + "InnoDB: Warning: The innodb_buffer_pool_shm_key option has been specified.\n" + "InnoDB: Do not change the following between restarts of the server while this option is being used:\n" + "InnoDB: * the mysqld executable between restarts of the server.\n" + "InnoDB: * the value of innodb_buffer_pool_size.\n" + "InnoDB: * the value of innodb_page_size.\n" + "InnoDB: * datafiles created by InnoDB during this session.\n" + "InnoDB: Otherwise, data corruption in datafiles may result.\n"); + + /* FIXME: This is vague id still */ + binary_id = (ulint) ((byte*)mtr_commit - (byte*)btr_root_get) + + (ulint) ((byte*)os_get_os_version - (byte*)buf_calc_page_new_checksum) + + (ulint) ((byte*)page_dir_find_owner_slot - (byte*)dfield_data_is_binary_equal) + + (ulint) ((byte*)que_graph_publish - (byte*)dict_casedn_str) + + (ulint) ((byte*)read_view_oldest_copy_or_open_new - (byte*)fil_space_get_version) + + (ulint) ((byte*)rec_get_n_extern_new - (byte*)fsp_get_size_low) + + (ulint) ((byte*)row_get_trx_id_offset - (byte*)ha_create_func) + + (ulint) ((byte*)srv_set_io_thread_op_info - (byte*)thd_is_replication_slave_thread) + + (ulint) ((byte*)mutex_create_func - (byte*)ibuf_inside) + + (ulint) ((byte*)trx_set_detailed_error - (byte*)lock_check_trx_id_sanity) + + (ulint) ((byte*)ut_time - (byte*)mem_heap_strdup); + + chunk->mem = os_shm_alloc(&chunk->mem_size, srv_buffer_pool_shm_key, &is_new); + + if (UNIV_UNLIKELY(chunk->mem == NULL)) { + return(NULL); + } +init_again: +#ifdef UNIV_SET_MEM_TO_ZERO + if (is_new) { + memset(chunk->mem, '\0', chunk->mem_size); + } +#endif + /* for ut_fold_binary_32(), these values should be 32-bit aligned */ + ut_a(sizeof(buf_shm_info_t) % 4 == 0); + ut_a((ulint)chunk->mem % 4 == 0); + ut_a(chunk->mem_size % 4 == 0); + + shm_info = chunk->mem; + + zip_hash_tmp = (hash_table_t*)((byte*)chunk->mem + chunk->mem_size - zip_hash_mem_size); + + if (is_new) { + strncpy(shm_info->head_str, BUF_SHM_INFO_HEAD, 8); + shm_info->binary_id = binary_id; + shm_info->is_new = TRUE; /* changed to FALSE when the initialization is finished */ + shm_info->clean = FALSE; /* changed to TRUE when free the segment. */ + shm_info->reusable = FALSE; /* changed to TRUE when validation is finished. */ + shm_info->buf_pool_size = srv_buf_pool_size; + shm_info->page_size = srv_page_size; + shm_info->zip_hash_offset = chunk->mem_size - zip_hash_mem_size; + shm_info->zip_hash_n = zip_hash_n; + } else { + ulint checksum; + + if (strncmp(shm_info->head_str, BUF_SHM_INFO_HEAD, 8)) { + fprintf(stderr, + "InnoDB: Error: The shared memory segment seems not to be for buffer pool.\n"); + return(NULL); + } + if (shm_info->binary_id != binary_id) { + fprintf(stderr, + "InnoDB: Error: The shared memory segment seems not to be for this binary.\n"); + return(NULL); + } + if (shm_info->is_new) { + fprintf(stderr, + "InnoDB: Error: The shared memory was not initialized yet.\n"); + return(NULL); + } + if (shm_info->buf_pool_size != srv_buf_pool_size) { + fprintf(stderr, + "InnoDB: Error: srv_buf_pool_size is different (shm=%lu current=%lu).\n", + shm_info->buf_pool_size, srv_buf_pool_size); + return(NULL); + } + if (shm_info->page_size != srv_page_size) { + fprintf(stderr, + "InnoDB: Error: srv_page_size is different (shm=%lu current=%lu).\n", + shm_info->page_size, srv_page_size); + return(NULL); + } + if (!shm_info->reusable) { + fprintf(stderr, + "InnoDB: Warning: The shared memory has unrecoverable contents.\n" + "InnoDB: The shared memory segment is initialized.\n"); + is_new = TRUE; + goto init_again; + } + if (!shm_info->clean) { + fprintf(stderr, + "InnoDB: Warning: The shared memory was not shut down cleanly.\n" + "InnoDB: The shared memory segment is initialized.\n"); + is_new = TRUE; + goto init_again; + } + + ut_a(shm_info->zip_hash_offset == chunk->mem_size - zip_hash_mem_size); + ut_a(shm_info->zip_hash_n == zip_hash_n); + + /* check checksum */ + if (srv_buffer_pool_shm_checksum) { + checksum = ut_fold_binary_32((byte*)chunk->mem + sizeof(buf_shm_info_t), + chunk->mem_size - sizeof(buf_shm_info_t)); + } else { + checksum = BUF_NO_CHECKSUM_MAGIC; + } + + if (shm_info->checksum != BUF_NO_CHECKSUM_MAGIC + && shm_info->checksum != checksum) { + fprintf(stderr, + "InnoDB: Error: checksum of the shared memory is not match. " + "(stored=%lu calculated=%lu)\n", + shm_info->checksum, checksum); + return(NULL); + } + + /* flag to use the segment. */ + shm_info->clean = FALSE; /* changed to TRUE when free the segment. */ + } + + /* init zip_hash contents */ + if (is_new) { + hash_create_init(zip_hash_tmp, zip_hash_n); + } else { + /* adjust offset is done later */ + hash_create_reuse(zip_hash_tmp); + + srv_buffer_pool_shm_is_reused = TRUE; + } + } else { chunk->mem = os_mem_alloc_large(&chunk->mem_size); if (UNIV_UNLIKELY(chunk->mem == NULL)) { return(NULL); } + } /* Allocate the block descriptors from the start of the memory block. */ + if (srv_buffer_pool_shm_key) { + chunk->blocks = (buf_block_t*)((byte*)chunk->mem + sizeof(buf_shm_info_t)); + } else { chunk->blocks = chunk->mem; + } /* Align a pointer to the first frame. Note that when os_large_page_size is smaller than UNIV_PAGE_SIZE, @@ -795,8 +1024,13 @@ buf_chunk_init( it is bigger, we may allocate more blocks than requested. */ frame = ut_align(chunk->mem, UNIV_PAGE_SIZE); + if (srv_buffer_pool_shm_key) { + /* reserve zip_hash space and always -1 for reproductibity */ + chunk->size = (chunk->mem_size - zip_hash_mem_size) / UNIV_PAGE_SIZE - 1; + } else { chunk->size = chunk->mem_size / UNIV_PAGE_SIZE - (frame != chunk->mem); + } /* Subtract the space needed for block descriptors. */ { @@ -810,6 +1044,99 @@ buf_chunk_init( chunk->size = size; } + if (shm_info && !(shm_info->is_new)) { + /* convert the shared memory segment for reuse */ + ptrdiff_t phys_offset; + ptrdiff_t logi_offset; + ptrdiff_t blocks_offset; + void* previous_frame_address; + + if (chunk->size < shm_info->chunk_backup.size) { + fprintf(stderr, + "InnoDB: Error: The buffer pool became smaller because of allocated address.\n" + "InnoDB: Retrying may avoid this situation.\n"); + shm_info->clean = TRUE; /* release the flag for retrying */ + return(NULL); + } + + chunk->size = shm_info->chunk_backup.size; + phys_offset = frame - ((byte*)chunk->mem + shm_info->frame_offset); + logi_offset = frame - chunk->blocks[0].frame; + previous_frame_address = chunk->blocks[0].frame; + blocks_offset = (byte*)chunk->blocks - (byte*)shm_info->chunk_backup.blocks; + + if (phys_offset || logi_offset || blocks_offset) { + fprintf(stderr, + "InnoDB: Buffer pool in the shared memory segment should be converted.\n" + "InnoDB: Previous frames in address : %p\n" + "InnoDB: Previous frames were located : %p\n" + "InnoDB: Current frames should be located: %p\n" + "InnoDB: Pysical offset : %ld (%#lx)\n" + "InnoDB: Logical offset (frames) : %ld (%#lx)\n" + "InnoDB: Logical offset (blocks) : %ld (%#lx)\n", + (byte*)chunk->mem + shm_info->frame_offset, + chunk->blocks[0].frame, frame, + (long) phys_offset, (long) phys_offset, + (long) logi_offset, (long) logi_offset, + (long) blocks_offset, (long) blocks_offset); + } else { + fprintf(stderr, + "InnoDB: Buffer pool in the shared memory segment can be used as it is.\n"); + } + + if (phys_offset) { + fprintf(stderr, + "InnoDB: Aligning physical offset..."); + + memmove(frame, (byte*)chunk->mem + shm_info->frame_offset, + chunk->size * UNIV_PAGE_SIZE); + + fprintf(stderr, + " Done.\n"); + } + + /* buf_block_t */ + block = chunk->blocks; + for (i = chunk->size; i--; ) { + buf_block_reuse(block, logi_offset); + block++; + } + + if (logi_offset || blocks_offset) { + fprintf(stderr, + "InnoDB: Aligning logical offset..."); + + + /* buf_pool_t buf_pool_backup */ + UT_LIST_OFFSET(flush_list, buf_page_t, shm_info->buf_pool_backup.flush_list, + previous_frame_address, logi_offset, blocks_offset); + UT_LIST_OFFSET(free, buf_page_t, shm_info->buf_pool_backup.free, + previous_frame_address, logi_offset, blocks_offset); + UT_LIST_OFFSET(LRU, buf_page_t, shm_info->buf_pool_backup.LRU, + previous_frame_address, logi_offset, blocks_offset); + if (shm_info->buf_pool_backup.LRU_old) + shm_info->buf_pool_backup.LRU_old = + (buf_page_t*)((byte*)(shm_info->buf_pool_backup.LRU_old) + + (((void*)shm_info->buf_pool_backup.LRU_old > previous_frame_address) + ? logi_offset : blocks_offset)); + + UT_LIST_OFFSET(unzip_LRU, buf_block_t, shm_info->buf_pool_backup.unzip_LRU, + previous_frame_address, logi_offset, blocks_offset); + + UT_LIST_OFFSET(zip_list, buf_page_t, shm_info->buf_pool_backup.zip_clean, + previous_frame_address, logi_offset, blocks_offset); + for (i = 0; i < BUF_BUDDY_SIZES_MAX; i++) { + UT_LIST_OFFSET(zip_list, buf_page_t, shm_info->buf_pool_backup.zip_free[i], + previous_frame_address, logi_offset, blocks_offset); + } + + HASH_OFFSET(zip_hash_tmp, buf_page_t, hash, + previous_frame_address, logi_offset, blocks_offset); + + fprintf(stderr, + " Done.\n"); + } + } else { /* Init block structs and assign frames for them. Then we assign the frames to the first blocks (we already mapped the memory above). */ @@ -833,6 +1160,11 @@ buf_chunk_init( block++; frame += UNIV_PAGE_SIZE; } + } + + if (shm_info) { + shm_info->frame_offset = chunk->blocks[0].frame - (byte*)chunk->mem; + } return(chunk); } @@ -1014,6 +1346,8 @@ buf_chunk_free( UNIV_MEM_UNDESC(block); } + ut_a(!srv_buffer_pool_shm_key); + os_mem_free_large(chunk->mem, chunk->mem_size); } @@ -1063,7 +1397,10 @@ buf_pool_init(void) srv_buf_pool_curr_size = buf_pool->curr_size * UNIV_PAGE_SIZE; buf_pool->page_hash = hash_create(2 * buf_pool->curr_size); + /* zip_hash is allocated to shm when srv_buffer_pool_shm_key is enabled */ + if (!srv_buffer_pool_shm_key) { buf_pool->zip_hash = hash_create(2 * buf_pool->curr_size); + } buf_pool->last_printout_time = time(NULL); @@ -1078,6 +1415,86 @@ buf_pool_init(void) --------------------------- */ /* All fields are initialized by mem_zalloc(). */ + if (srv_buffer_pool_shm_key) { + buf_shm_info_t* shm_info; + + ut_a((byte*)chunk->blocks == (byte*)chunk->mem + sizeof(buf_shm_info_t)); + shm_info = chunk->mem; + + buf_pool->zip_hash = (hash_table_t*)((byte*)chunk->mem + shm_info->zip_hash_offset); + + if(shm_info->is_new) { + shm_info->is_new = FALSE; /* initialization was finished */ + } else { + buf_block_t* block = chunk->blocks; + buf_page_t* b; + + /* shm_info->buf_pool_backup should be converted */ + /* at buf_chunk_init(). So copy simply. */ + buf_pool->flush_list = shm_info->buf_pool_backup.flush_list; + buf_pool->freed_page_clock = shm_info->buf_pool_backup.freed_page_clock; + buf_pool->free = shm_info->buf_pool_backup.free; + buf_pool->LRU = shm_info->buf_pool_backup.LRU; + buf_pool->LRU_old = shm_info->buf_pool_backup.LRU_old; + buf_pool->LRU_old_len = shm_info->buf_pool_backup.LRU_old_len; + buf_pool->unzip_LRU = shm_info->buf_pool_backup.unzip_LRU; + buf_pool->zip_clean = shm_info->buf_pool_backup.zip_clean; + for (i = 0; i < BUF_BUDDY_SIZES_MAX; i++) { + buf_pool->zip_free[i] = shm_info->buf_pool_backup.zip_free[i]; + } + + for (i = 0; i < chunk->size; i++, block++) { + if (buf_block_get_state(block) + == BUF_BLOCK_FILE_PAGE) { + ut_d(block->page.in_page_hash = TRUE); + HASH_INSERT(buf_page_t, hash, buf_pool->page_hash, + buf_page_address_fold( + block->page.space, + block->page.offset), + &block->page); + } + } + + for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b; + b = UT_LIST_GET_NEXT(zip_list, b)) { + ut_ad(!b->in_flush_list); + ut_ad(b->in_LRU_list); + + ut_d(b->in_page_hash = TRUE); + HASH_INSERT(buf_page_t, hash, buf_pool->page_hash, + buf_page_address_fold(b->space, b->offset), b); + } + + for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b; + b = UT_LIST_GET_NEXT(flush_list, b)) { + ut_ad(b->in_flush_list); + ut_ad(b->in_LRU_list); + + switch (buf_page_get_state(b)) { + case BUF_BLOCK_ZIP_DIRTY: + ut_d(b->in_page_hash = TRUE); + HASH_INSERT(buf_page_t, hash, buf_pool->page_hash, + buf_page_address_fold(b->space, + b->offset), b); + break; + case BUF_BLOCK_FILE_PAGE: + /* uncompressed page */ + break; + case BUF_BLOCK_ZIP_FREE: + case BUF_BLOCK_ZIP_PAGE: + case BUF_BLOCK_NOT_USED: + case BUF_BLOCK_READY_FOR_USE: + case BUF_BLOCK_MEMORY: + case BUF_BLOCK_REMOVE_HASH: + ut_error; + break; + } + } + + + } + } + mutex_exit(&LRU_list_mutex); rw_lock_x_unlock(&page_hash_latch); buf_pool_mutex_exit(); @@ -1102,6 +1519,34 @@ buf_pool_free(void) buf_chunk_t* chunk; buf_chunk_t* chunks; + if (srv_buffer_pool_shm_key) { + buf_shm_info_t* shm_info; + + ut_a(buf_pool->n_chunks == 1); + + chunk = buf_pool->chunks; + shm_info = chunk->mem; + ut_a((byte*)chunk->blocks == (byte*)chunk->mem + sizeof(buf_shm_info_t)); + + /* validation the shared memory segment doesn't have unrecoverable contents. */ + /* Currently, validation became not needed */ + shm_info->reusable = TRUE; + + memcpy(&(shm_info->buf_pool_backup), buf_pool, sizeof(buf_pool_t)); + memcpy(&(shm_info->chunk_backup), chunk, sizeof(buf_chunk_t)); + + if (srv_fast_shutdown < 2) { + if (srv_buffer_pool_shm_checksum) { + shm_info->checksum = ut_fold_binary_32((byte*)chunk->mem + sizeof(buf_shm_info_t), + chunk->mem_size - sizeof(buf_shm_info_t)); + } else { + shm_info->checksum = BUF_NO_CHECKSUM_MAGIC; + } + shm_info->clean = TRUE; + } + + os_shm_free(chunk->mem, chunk->mem_size); + } else { chunks = buf_pool->chunks; chunk = chunks + buf_pool->n_chunks; @@ -1110,10 +1555,13 @@ buf_pool_free(void) would fail at shutdown. */ os_mem_free_large(chunk->mem, chunk->mem_size); } + } mem_free(buf_pool->chunks); hash_table_free(buf_pool->page_hash); + if (!srv_buffer_pool_shm_key) { hash_table_free(buf_pool->zip_hash); + } mem_free(buf_pool); buf_pool = NULL; } @@ -1308,6 +1756,11 @@ try_again: //buf_pool_mutex_enter(); mutex_enter(&LRU_list_mutex); + if (srv_buffer_pool_shm_key) { + /* Cannot support shrink */ + goto func_done; + } + shrink_again: if (buf_pool->n_chunks <= 1) { @@ -1551,6 +2004,11 @@ void buf_pool_resize(void) /*=================*/ { + if (srv_buffer_pool_shm_key) { + /* Cannot support resize */ + return; + } + //buf_pool_mutex_enter(); mutex_enter(&LRU_list_mutex); @@ -2196,7 +2654,7 @@ buf_page_get_gen( ulint fix_type; ibool must_read; ulint retries = 0; - mutex_t* block_mutex; + mutex_t* block_mutex= 0; trx_t* trx = NULL; ulint sec; ulint ms; @@ -3542,7 +4000,7 @@ corrupt: "InnoDB: TABLE to scan your" " table for corruption.\n" "InnoDB: See also " - REFMAN "forcing-recovery.html\n" + REFMAN "forcing-innodb-recovery.html\n" "InnoDB: about forcing recovery.\n", stderr); if (srv_pass_corrupt_table && !trx_sys_sys_space(bpage->space) diff --git a/storage/xtradb/buf/buf0flu.c b/storage/xtradb/buf/buf0flu.c index 0ef91137aef..5db6f816aab 100644 --- a/storage/xtradb/buf/buf0flu.c +++ b/storage/xtradb/buf/buf0flu.c @@ -1811,9 +1811,9 @@ buf_flush_validate_low(void) ut_a(om > 0); if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) { + buf_page_t* rpage; ut_a(rnode); - buf_page_t* rpage = *rbt_value(buf_page_t*, - rnode); + rpage = *rbt_value(buf_page_t*, rnode); ut_a(rpage); ut_a(rpage == bpage); rnode = rbt_next(buf_pool->flush_rbt, rnode); diff --git a/storage/xtradb/buf/buf0lru.c b/storage/xtradb/buf/buf0lru.c index 94828940fd4..92a645ef2f5 100644 --- a/storage/xtradb/buf/buf0lru.c +++ b/storage/xtradb/buf/buf0lru.c @@ -2093,6 +2093,7 @@ buf_LRU_stat_update(void) /*=====================*/ { buf_LRU_stat_t* item; + buf_LRU_stat_t cur_stat; /* If we haven't started eviction yet then don't update stats. */ if (buf_pool->freed_page_clock == 0) { @@ -2107,12 +2108,19 @@ buf_LRU_stat_update(void) buf_LRU_stat_arr_ind++; buf_LRU_stat_arr_ind %= BUF_LRU_STAT_N_INTERVAL; - /* Add the current value and subtract the obsolete entry. */ - buf_LRU_stat_sum.io += buf_LRU_stat_cur.io - item->io; - buf_LRU_stat_sum.unzip += buf_LRU_stat_cur.unzip - item->unzip; + /* Add the current value and subtract the obsolete entry. + Since buf_LRU_stat_cur is not protected by any mutex, + it can be changing between adding to buf_LRU_stat_sum + and copying to item. Assign it to local variables to make + sure the same value assign to the buf_LRU_stat_sum + and item */ + cur_stat = buf_LRU_stat_cur; + + buf_LRU_stat_sum.io += cur_stat.io - item->io; + buf_LRU_stat_sum.unzip += cur_stat.unzip - item->unzip; /* Put current entry in the array. */ - memcpy(item, &buf_LRU_stat_cur, sizeof *item); + memcpy(item, &cur_stat, sizeof *item); //buf_pool_mutex_exit(); mutex_exit(&buf_pool_mutex); @@ -2265,7 +2273,7 @@ buf_LRU_file_restore(void) ulint req = 0; ibool terminated = FALSE; ibool ret = FALSE; - dump_record_t* records= 0; + dump_record_t* records = NULL; ulint size; ulint size_high; ulint length; diff --git a/storage/xtradb/dict/dict0crea.c b/storage/xtradb/dict/dict0crea.c index 992a7000eda..c63ff57be97 100644 --- a/storage/xtradb/dict/dict0crea.c +++ b/storage/xtradb/dict/dict0crea.c @@ -894,7 +894,7 @@ dict_truncate_index_tree( appropriate field in the SYS_INDEXES record: this mini-transaction marks the B-tree totally truncated */ - btr_page_get(space, zip_size, root_page_no, RW_X_LATCH, mtr); + btr_block_get(space, zip_size, root_page_no, RW_X_LATCH, mtr); btr_free_root(space, zip_size, root_page_no, mtr); create: diff --git a/storage/xtradb/dict/dict0dict.c b/storage/xtradb/dict/dict0dict.c index 40380067bd2..638ab55703b 100644 --- a/storage/xtradb/dict/dict0dict.c +++ b/storage/xtradb/dict/dict0dict.c @@ -81,9 +81,18 @@ UNIV_INTERN rw_lock_t dict_operation_lock; /** Identifies generated InnoDB foreign key names */ static char dict_ibfk[] = "_ibfk_"; -/** array of mutexes protecting dict_index_t::stat_n_diff_key_vals[] */ -#define DICT_INDEX_STAT_MUTEX_SIZE 32 -static mutex_t dict_index_stat_mutex[DICT_INDEX_STAT_MUTEX_SIZE]; +/** array of rw locks protecting +dict_table_t::stat_initialized +dict_table_t::stat_n_rows (*) +dict_table_t::stat_clustered_index_size +dict_table_t::stat_sum_of_other_index_sizes +dict_table_t::stat_modified_counter (*) +dict_table_t::indexes*::stat_n_diff_key_vals[] +dict_table_t::indexes*::stat_index_size +dict_table_t::indexes*::stat_n_leaf_pages +(*) those are not always protected for performance reasons */ +#define DICT_TABLE_STATS_LATCHES_SIZE 64 +static rw_lock_t dict_table_stats_latches[DICT_TABLE_STATS_LATCHES_SIZE]; /*******************************************************************//** Tries to find column names for the index and sets the col field of the @@ -244,43 +253,65 @@ dict_mutex_exit_for_mysql(void) mutex_exit(&(dict_sys->mutex)); } -/** Get the mutex that protects index->stat_n_diff_key_vals[] */ -#define GET_INDEX_STAT_MUTEX(index) \ - (&dict_index_stat_mutex[ut_fold_dulint(index->id) \ - % DICT_INDEX_STAT_MUTEX_SIZE]) +/** Get the latch that protects the stats of a given table */ +#define GET_TABLE_STATS_LATCH(table) \ + (&dict_table_stats_latches[ut_fold_dulint(table->id) \ + % DICT_TABLE_STATS_LATCHES_SIZE]) /**********************************************************************//** -Lock the appropriate mutex to protect index->stat_n_diff_key_vals[]. -index->id is used to pick the right mutex and it should not change -before dict_index_stat_mutex_exit() is called on this index. */ +Lock the appropriate latch to protect a given table's statistics. +table->id is used to pick the corresponding latch from a global array of +latches. */ UNIV_INTERN void -dict_index_stat_mutex_enter( -/*========================*/ - const dict_index_t* index) /*!< in: index */ +dict_table_stats_lock( +/*==================*/ + const dict_table_t* table, /*!< in: table */ + ulint latch_mode) /*!< in: RW_S_LATCH or + RW_X_LATCH */ { - ut_ad(index != NULL); - ut_ad(index->magic_n == DICT_INDEX_MAGIC_N); - ut_ad(index->cached); - ut_ad(!index->to_be_dropped); + ut_ad(table != NULL); + ut_ad(table->magic_n == DICT_TABLE_MAGIC_N); - mutex_enter(GET_INDEX_STAT_MUTEX(index)); + switch (latch_mode) { + case RW_S_LATCH: + rw_lock_s_lock(GET_TABLE_STATS_LATCH(table)); + break; + case RW_X_LATCH: + rw_lock_x_lock(GET_TABLE_STATS_LATCH(table)); + break; + case RW_NO_LATCH: + /* fall through */ + default: + ut_error; + } } /**********************************************************************//** -Unlock the appropriate mutex that protects index->stat_n_diff_key_vals[]. */ +Unlock the latch that has been locked by dict_table_stats_lock() */ UNIV_INTERN void -dict_index_stat_mutex_exit( -/*=======================*/ - const dict_index_t* index) /*!< in: index */ +dict_table_stats_unlock( +/*====================*/ + const dict_table_t* table, /*!< in: table */ + ulint latch_mode) /*!< in: RW_S_LATCH or + RW_X_LATCH */ { - ut_ad(index != NULL); - ut_ad(index->magic_n == DICT_INDEX_MAGIC_N); - ut_ad(index->cached); - ut_ad(!index->to_be_dropped); + ut_ad(table != NULL); + ut_ad(table->magic_n == DICT_TABLE_MAGIC_N); - mutex_exit(GET_INDEX_STAT_MUTEX(index)); + switch (latch_mode) { + case RW_S_LATCH: + rw_lock_s_unlock(GET_TABLE_STATS_LATCH(table)); + break; + case RW_X_LATCH: + rw_lock_x_unlock(GET_TABLE_STATS_LATCH(table)); + break; + case RW_NO_LATCH: + /* fall through */ + default: + ut_error; + } } /********************************************************************//** @@ -671,8 +702,8 @@ dict_init(void) mutex_create(&dict_foreign_err_mutex, SYNC_ANY_LATCH); - for (i = 0; i < DICT_INDEX_STAT_MUTEX_SIZE; i++) { - mutex_create(&dict_index_stat_mutex[i], SYNC_INDEX_TREE); + for (i = 0; i < DICT_TABLE_STATS_LATCHES_SIZE; i++) { + rw_lock_create(&dict_table_stats_latches[i], SYNC_INDEX_TREE); } } @@ -704,13 +735,12 @@ dict_table_get( mutex_exit(&(dict_sys->mutex)); - if (table != NULL) { - if (!table->stat_initialized && !table->is_corrupt) { - /* If table->ibd_file_missing == TRUE, this will - print an error message and return without doing - anything. */ - dict_update_statistics(table, FALSE); - } + if (table != NULL && !table->is_corrupt) { + /* If table->ibd_file_missing == TRUE, this will + print an error message and return without doing + anything. */ + dict_update_statistics(table, TRUE /* only update stats + if they have not been initialized */, FALSE); } return(table); @@ -907,7 +937,7 @@ dict_table_rename_in_cache( dict_foreign_t* foreign; dict_index_t* index; ulint fold; - char old_name[MAX_TABLE_NAME_LEN + 1]; + char old_name[MAX_FULL_NAME_LEN + 1]; ut_ad(table); ut_ad(mutex_own(&(dict_sys->mutex))); @@ -919,7 +949,7 @@ dict_table_rename_in_cache( ut_print_timestamp(stderr); fprintf(stderr, "InnoDB: too long table name: '%s', " "max length is %d\n", table->name, - MAX_TABLE_NAME_LEN); + MAX_FULL_NAME_LEN); ut_error; } @@ -969,11 +999,11 @@ dict_table_rename_in_cache( ut_fold_string(old_name), table); if (strlen(new_name) > strlen(table->name)) { - /* We allocate MAX_TABLE_NAME_LEN+1 bytes here to avoid + /* We allocate MAX_FULL_NAME_LEN + 1 bytes here to avoid memory fragmentation, we assume a repeated calls of ut_realloc() with the same size do not cause fragmentation */ - ut_a(strlen(new_name) <= MAX_TABLE_NAME_LEN); - table->name = ut_realloc(table->name, MAX_TABLE_NAME_LEN + 1); + ut_a(strlen(new_name) <= MAX_FULL_NAME_LEN); + table->name = ut_realloc(table->name, MAX_FULL_NAME_LEN + 1); } memcpy(table->name, new_name, strlen(new_name) + 1); @@ -2726,7 +2756,7 @@ dict_scan_to( quote = '\0'; } else if (quote) { /* Within quotes: do nothing. */ - } else if (*ptr == '`' || *ptr == '"') { + } else if (*ptr == '`' || *ptr == '"' || *ptr == '\'') { /* Starting quote: remember the quote character. */ quote = *ptr; } else { @@ -4366,11 +4396,9 @@ next_rec: btr_pcur_close(&pcur); mtr_commit(&mtr); - dict_index_stat_mutex_enter(index); for (i = 0; i <= n_cols; i++) { index->stat_n_diff_key_vals[i] = stat_n_diff_key_vals_tmp[i]; } - dict_index_stat_mutex_exit(index); } /*===========================================*/ @@ -4424,11 +4452,9 @@ dict_store_statistics( n_cols = dict_index_get_n_unique(index); stat_n_diff_key_vals_tmp = mem_heap_zalloc(heap, (n_cols + 1) * sizeof(ib_int64_t)); - dict_index_stat_mutex_enter(index); for (i = 0; i <= n_cols; i++) { stat_n_diff_key_vals_tmp[i] = index->stat_n_diff_key_vals[i]; } - dict_index_stat_mutex_exit(index); sys_stats = dict_sys->sys_stats; sys_index = UT_LIST_GET_FIRST(sys_stats->indexes); @@ -4504,12 +4530,13 @@ Calculates new estimates for table and index statistics. The statistics are used in query optimization. */ UNIV_INTERN void -dict_update_statistics_low( -/*=======================*/ +dict_update_statistics( +/*===================*/ dict_table_t* table, /*!< in/out: table */ - ibool has_dict_mutex __attribute__((unused)), - /*!< in: TRUE if the caller has the - dictionary mutex */ + ibool only_calc_if_missing_stats, /*!< in: only + update/recalc the stats if they have + not been initialized yet, otherwise + do nothing */ ibool sync) /*!< in: TRUE if must update SYS_STATS */ { dict_index_t* index; @@ -4528,6 +4555,8 @@ dict_update_statistics_low( } if (srv_use_sys_stats_table && !((table->flags >> DICT_TF2_SHIFT) & DICT_TF2_TEMPORARY) && !sync) { + dict_table_stats_lock(table, RW_X_LATCH); + /* reload statistics from SYS_STATS table */ if (dict_reload_statistics(table, &sum_of_index_sizes)) { /* success */ @@ -4537,6 +4566,8 @@ dict_update_statistics_low( #endif goto end; } + + dict_table_stats_unlock(table, RW_X_LATCH); } #ifdef UNIV_DEBUG fprintf(stderr, "InnoDB: DEBUG: update_statistics for %s.\n", @@ -4555,6 +4586,13 @@ dict_update_statistics_low( return; } + dict_table_stats_lock(table, RW_X_LATCH); + + if (only_calc_if_missing_stats && table->stat_initialized) { + dict_table_stats_unlock(table, RW_X_LATCH); + return; + } + do { if (table->is_corrupt) { ut_a(srv_pass_corrupt_table); @@ -4609,13 +4647,9 @@ dict_update_statistics_low( end: index = dict_table_get_first_index(table); - dict_index_stat_mutex_enter(index); - table->stat_n_rows = index->stat_n_diff_key_vals[ dict_index_get_n_unique(index)]; - dict_index_stat_mutex_exit(index); - table->stat_clustered_index_size = index->stat_index_size; table->stat_sum_of_other_index_sizes = sum_of_index_sizes @@ -4624,19 +4658,8 @@ end: table->stat_initialized = TRUE; table->stat_modified_counter = 0; -} -/*********************************************************************//** -Calculates new estimates for table and index statistics. The statistics -are used in query optimization. */ -UNIV_INTERN -void -dict_update_statistics( -/*===================*/ - dict_table_t* table, /*!< in/out: table */ - ibool sync) -{ - dict_update_statistics_low(table, FALSE, sync); + dict_table_stats_unlock(table, RW_X_LATCH); } /**********************************************************************//** @@ -4716,8 +4739,9 @@ dict_table_print_low( ut_ad(mutex_own(&(dict_sys->mutex))); - if (srv_stats_auto_update) - dict_update_statistics_low(table, TRUE, FALSE); + dict_update_statistics(table, FALSE /* update even if initialized */, FALSE); + + dict_table_stats_lock(table, RW_S_LATCH); fprintf(stderr, "--------------------------------------\n" @@ -4746,6 +4770,8 @@ dict_table_print_low( index = UT_LIST_GET_NEXT(indexes, index); } + dict_table_stats_unlock(table, RW_S_LATCH); + foreign = UT_LIST_GET_FIRST(table->foreign_list); while (foreign != NULL) { @@ -4794,8 +4820,6 @@ dict_index_print_low( ut_ad(mutex_own(&(dict_sys->mutex))); - dict_index_stat_mutex_enter(index); - if (index->n_user_defined_cols > 0) { n_vals = index->stat_n_diff_key_vals[ index->n_user_defined_cols]; @@ -4803,8 +4827,6 @@ dict_index_print_low( n_vals = index->stat_n_diff_key_vals[1]; } - dict_index_stat_mutex_exit(index); - fprintf(stderr, " INDEX: name %s, id %lu %lu, fields %lu/%lu," " uniq %lu, type %lu\n" @@ -5148,7 +5170,8 @@ void dict_table_replace_index_in_foreign_list( /*=====================================*/ dict_table_t* table, /*!< in/out: table */ - dict_index_t* index) /*!< in: index to be replaced */ + dict_index_t* index, /*!< in: index to be replaced */ + const trx_t* trx) /*!< in: transaction handle */ { dict_foreign_t* foreign; @@ -5159,7 +5182,13 @@ dict_table_replace_index_in_foreign_list( if (foreign->foreign_index == index) { dict_index_t* new_index = dict_foreign_find_equiv_index(foreign); - ut_a(new_index); + + /* There must exist an alternative index if + check_foreigns (FOREIGN_KEY_CHECKS) is on, + since ha_innobase::prepare_drop_index had done + the check before we reach here. */ + + ut_a(new_index || !trx->check_foreigns); foreign->foreign_index = new_index; } @@ -5293,8 +5322,8 @@ dict_close(void) mem_free(dict_sys); dict_sys = NULL; - for (i = 0; i < DICT_INDEX_STAT_MUTEX_SIZE; i++) { - mutex_free(&dict_index_stat_mutex[i]); + for (i = 0; i < DICT_TABLE_STATS_LATCHES_SIZE; i++) { + rw_lock_free(&dict_table_stats_latches[i]); } } diff --git a/storage/xtradb/dict/dict0load.c b/storage/xtradb/dict/dict0load.c index 19de5bf8264..edd77e2530f 100644 --- a/storage/xtradb/dict/dict0load.c +++ b/storage/xtradb/dict/dict0load.c @@ -222,8 +222,9 @@ loop: /* The table definition was corrupt if there is no index */ - if (srv_stats_auto_update && dict_table_get_first_index(table)) { - dict_update_statistics_low(table, TRUE, FALSE); + if (dict_table_get_first_index(table)) { + dict_update_statistics(table, FALSE /* update + even if initialized */, FALSE); } dict_table_print_low(table); diff --git a/storage/xtradb/fil/fil0fil.c b/storage/xtradb/fil/fil0fil.c index a8520187013..dd15576edf7 100644 --- a/storage/xtradb/fil/fil0fil.c +++ b/storage/xtradb/fil/fil0fil.c @@ -336,14 +336,15 @@ fil_get_space_id_for_table( /*******************************************************************//** Frees a space object from the tablespace memory cache. Closes the files in the chain but does not delete them. There must not be any pending i/o's or -flushes on the files. */ +flushes on the files. +@return TRUE on success */ static ibool fil_space_free( /*===========*/ - /* out: TRUE if success */ - ulint id, /* in: space id */ - ibool own_mutex);/* in: TRUE if own system->mutex */ + ulint id, /* in: space id */ + ibool x_latched); /* in: TRUE if caller has space->latch + in X mode */ /********************************************************************//** Reads data from a space to a buffer. Remember that the possible incomplete blocks at the end of file are ignored: they are not taken into account when @@ -617,7 +618,7 @@ fil_node_create( UT_LIST_ADD_LAST(chain, space->chain, node); - if (id < SRV_LOG_SPACE_FIRST_ID && fil_system->max_assigned_id < id) { + if (id < SRV_EXTRA_SYS_SPACE_FIRST_ID && fil_system->max_assigned_id < id) { fil_system->max_assigned_id = id; } @@ -1130,6 +1131,7 @@ try_again: space = fil_space_get_by_name(name); if (UNIV_LIKELY_NULL(space)) { + ibool success; ulint namesake_id; ut_print_timestamp(stderr); @@ -1168,9 +1170,10 @@ try_again: namesake_id = space->id; - mutex_exit(&fil_system->mutex); + success = fil_space_free(namesake_id, FALSE); + ut_a(success); - fil_space_free(namesake_id, FALSE); + mutex_exit(&fil_system->mutex); goto try_again; } @@ -1205,6 +1208,7 @@ try_again: space->mark = FALSE; if (UNIV_LIKELY(purpose == FIL_TABLESPACE && !recv_recovery_on) + && UNIV_UNLIKELY(id < SRV_EXTRA_SYS_SPACE_FIRST_ID) && UNIV_UNLIKELY(id > fil_system->max_assigned_id)) { if (!fil_system->space_id_reuse_warned) { fil_system->space_id_reuse_warned = TRUE; @@ -1290,7 +1294,7 @@ fil_assign_new_space_id( (ulong) SRV_LOG_SPACE_FIRST_ID); } - success = (id < SRV_LOG_SPACE_FIRST_ID); + success = (id < SRV_EXTRA_SYS_SPACE_FIRST_ID); if (success) { *space_id = fil_system->max_assigned_id = id; @@ -1323,15 +1327,14 @@ fil_space_free( /*===========*/ /* out: TRUE if success */ ulint id, /* in: space id */ - ibool own_mutex) /* in: TRUE if own system->mutex */ + ibool x_latched) /* in: TRUE if caller has space->latch + in X mode */ { fil_space_t* space; fil_space_t* namespace; fil_node_t* fil_node; - if (!own_mutex) { - mutex_enter(&fil_system->mutex); - } + ut_ad(mutex_own(&fil_system->mutex)); space = fil_space_get_by_id(id); @@ -1342,8 +1345,6 @@ fil_space_free( " from the cache but\n" "InnoDB: it is not there.\n", (ulong) id); - mutex_exit(&fil_system->mutex); - return(FALSE); } @@ -1378,8 +1379,8 @@ fil_space_free( ut_a(0 == UT_LIST_GET_LEN(space->chain)); - if (!own_mutex) { - mutex_exit(&fil_system->mutex); + if (x_latched) { + rw_lock_x_unlock(&space->latch); } rw_lock_free(&(space->latch)); @@ -1626,25 +1627,27 @@ fil_close_all_files(void) /*=====================*/ { fil_space_t* space; - fil_node_t* node; mutex_enter(&fil_system->mutex); space = UT_LIST_GET_FIRST(fil_system->space_list); while (space != NULL) { + fil_node_t* node; fil_space_t* prev_space = space; - node = UT_LIST_GET_FIRST(space->chain); + for (node = UT_LIST_GET_FIRST(space->chain); + node != NULL; + node = UT_LIST_GET_NEXT(chain, node)) { - while (node != NULL) { if (node->open) { fil_node_close_file(node, fil_system); } - node = UT_LIST_GET_NEXT(chain, node); } + space = UT_LIST_GET_NEXT(space_list, space); - fil_space_free(prev_space->id, TRUE); + + fil_space_free(prev_space->id, FALSE); } mutex_exit(&fil_system->mutex); @@ -1666,6 +1669,10 @@ fil_set_max_space_id_if_bigger( ut_error; } + if (max_id >= SRV_EXTRA_SYS_SPACE_FIRST_ID) { + return; + } + mutex_enter(&fil_system->mutex); if (fil_system->max_assigned_id < max_id) { @@ -1684,6 +1691,7 @@ static ulint fil_write_lsn_and_arch_no_to_file( /*==============================*/ + ulint space_id, ulint sum_of_sizes, /*!< in: combined size of previous files in space, in database pages */ ib_uint64_t lsn, /*!< in: lsn to write */ @@ -1693,14 +1701,16 @@ fil_write_lsn_and_arch_no_to_file( byte* buf1; byte* buf; + ut_a(trx_sys_sys_space(space_id)); + buf1 = mem_alloc(2 * UNIV_PAGE_SIZE); buf = ut_align(buf1, UNIV_PAGE_SIZE); - fil_read(TRUE, 0, 0, sum_of_sizes, 0, UNIV_PAGE_SIZE, buf, NULL); + fil_read(TRUE, space_id, 0, sum_of_sizes, 0, UNIV_PAGE_SIZE, buf, NULL); mach_write_ull(buf + FIL_PAGE_FILE_FLUSH_LSN, lsn); - fil_write(TRUE, 0, 0, sum_of_sizes, 0, UNIV_PAGE_SIZE, buf, NULL); + fil_write(TRUE, space_id, 0, sum_of_sizes, 0, UNIV_PAGE_SIZE, buf, NULL); mem_free(buf1); @@ -1736,7 +1746,7 @@ fil_write_flushed_lsn_to_data_files( always open. */ if (space->purpose == FIL_TABLESPACE - && space->id == 0) { + && trx_sys_sys_space(space->id)) { sum_of_sizes = 0; node = UT_LIST_GET_FIRST(space->chain); @@ -1744,7 +1754,7 @@ fil_write_flushed_lsn_to_data_files( mutex_exit(&fil_system->mutex); err = fil_write_lsn_and_arch_no_to_file( - sum_of_sizes, lsn, arch_log_no); + space->id, sum_of_sizes, lsn, arch_log_no); if (err != DB_SUCCESS) { return(err); @@ -2264,6 +2274,19 @@ try_again: path = mem_strdup(space->name); mutex_exit(&fil_system->mutex); + + /* Important: We rely on the data dictionary mutex to ensure + that a race is not possible here. It should serialize the tablespace + drop/free. We acquire an X latch only to avoid a race condition + when accessing the tablespace instance via: + + fsp_get_available_space_in_free_extents(). + + There our main motivation is to reduce the contention on the + dictionary mutex. */ + + rw_lock_x_lock(&space->latch); + #ifndef UNIV_HOTBACKUP /* Invalidate in the buffer pool all pages belonging to the tablespace. Since we have set space->is_being_deleted = TRUE, readahead @@ -2276,7 +2299,11 @@ try_again: #endif /* printf("Deleting tablespace %s id %lu\n", space->name, id); */ - success = fil_space_free(id, FALSE); + mutex_enter(&fil_system->mutex); + + success = fil_space_free(id, TRUE); + + mutex_exit(&fil_system->mutex); if (success) { success = os_file_delete(path); @@ -2284,6 +2311,8 @@ try_again: if (!success) { success = os_file_delete_if_exists(path); } + } else { + rw_lock_x_unlock(&space->latch); } if (success) { @@ -2311,6 +2340,31 @@ try_again: return(FALSE); } +/*******************************************************************//** +Returns TRUE if a single-table tablespace is being deleted. +@return TRUE if being deleted */ +UNIV_INTERN +ibool +fil_tablespace_is_being_deleted( +/*============================*/ + ulint id) /*!< in: space id */ +{ + fil_space_t* space; + ibool is_being_deleted; + + mutex_enter(&fil_system->mutex); + + space = fil_space_get_by_id(id); + + ut_a(space != NULL); + + is_being_deleted = space->is_being_deleted; + + mutex_exit(&fil_system->mutex); + + return(is_being_deleted); +} + #ifndef UNIV_HOTBACKUP /*******************************************************************//** Discards a single-table tablespace. The tablespace must be cached in the @@ -3313,7 +3367,7 @@ skip_info: } if (page_is_corrupt) { - fprintf(stderr, " [errp:%lld]", offset / UNIV_PAGE_SIZE); + fprintf(stderr, " [errp:%ld]", (long) (offset / UNIV_PAGE_SIZE)); /* cannot treat corrupt page */ goto skip_write; @@ -5342,7 +5396,7 @@ fil_page_get_type( return(mach_read_from_2(page + FIL_PAGE_TYPE)); } -/******************************************************************** +/****************************************************************//** Initializes the tablespace memory cache. */ UNIV_INTERN void diff --git a/storage/xtradb/fsp/fsp0fsp.c b/storage/xtradb/fsp/fsp0fsp.c index cd28186109f..44ebe6819b7 100644 --- a/storage/xtradb/fsp/fsp0fsp.c +++ b/storage/xtradb/fsp/fsp0fsp.c @@ -3126,13 +3126,63 @@ fsp_get_available_space_in_free_extents( ut_ad(!mutex_own(&kernel_mutex)); + /* The convoluted mutex acquire is to overcome latching order + issues: The problem is that the fil_mutex is at a lower level + than the tablespace latch and the buffer pool mutex. We have to + first prevent any operations on the file system by acquiring the + dictionary mutex. Then acquire the tablespace latch to obey the + latching order and then release the dictionary mutex. That way we + ensure that the tablespace instance can't be freed while we are + examining its contents (see fil_space_free()). + + However, there is one further complication, we release the fil_mutex + when we need to invalidate the the pages in the buffer pool and we + reacquire the fil_mutex when deleting and freeing the tablespace + instance in fil0fil.c. Here we need to account for that situation + too. */ + + mutex_enter(&dict_sys->mutex); + + /* At this stage there is no guarantee that the tablespace even + exists in the cache. */ + + if (fil_tablespace_deleted_or_being_deleted_in_mem(space, -1)) { + + mutex_exit(&dict_sys->mutex); + + return(ULLINT_UNDEFINED); + } + mtr_start(&mtr); latch = fil_space_get_latch(space, &flags); + + /* This should ensure that the tablespace instance can't be freed + by another thread. However, the tablespace pages can still be freed + from the buffer pool. We need to check for that again. */ + zip_size = dict_table_flags_to_zip_size(flags); mtr_x_lock(latch, &mtr); + mutex_exit(&dict_sys->mutex); + + /* At this point it is possible for the tablespace to be deleted and + its pages removed from the buffer pool. We need to check for that + situation. However, the tablespace instance can't be deleted because + our latching above should ensure that. */ + + if (fil_tablespace_is_being_deleted(space)) { + + mtr_commit(&mtr); + + return(ULLINT_UNDEFINED); + } + + /* From here on even if the user has dropped the tablespace, the + pages _must_ still exist in the buffer pool and the tablespace + instance _must_ be in the file system hash table. */ + space_header = fsp_get_space_header(space, zip_size, &mtr); size = mtr_read_ulint(space_header + FSP_SIZE, MLOG_4BYTES, &mtr); @@ -3305,7 +3355,7 @@ fseg_free_page_low( "InnoDB: database!\n", (ulong) page); crash: fputs("InnoDB: Please refer to\n" - "InnoDB: " REFMAN "forcing-recovery.html\n" + "InnoDB: " REFMAN "forcing-innodb-recovery.html\n" "InnoDB: about forcing recovery.\n", stderr); ut_error; } diff --git a/storage/xtradb/ha/hash0hash.c b/storage/xtradb/ha/hash0hash.c index 30c304dafcd..0f4fc55d895 100644 --- a/storage/xtradb/ha/hash0hash.c +++ b/storage/xtradb/ha/hash0hash.c @@ -128,6 +128,70 @@ hash_create( } /*************************************************************//** +*/ +UNIV_INTERN +ulint +hash_create_needed( +/*===============*/ + ulint n) +{ + ulint prime; + ulint offset; + + prime = ut_find_prime(n); + + offset = (sizeof(hash_table_t) + 7) / 8; + offset *= 8; + + return(offset + sizeof(hash_cell_t) * prime); +} + +UNIV_INTERN +void +hash_create_init( +/*=============*/ + hash_table_t* table, + ulint n) +{ + ulint prime; + ulint offset; + + prime = ut_find_prime(n); + + offset = (sizeof(hash_table_t) + 7) / 8; + offset *= 8; + + table->array = (hash_cell_t*)(((byte*)table) + offset); + table->n_cells = prime; +# if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG + table->adaptive = FALSE; +# endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */ + table->n_mutexes = 0; + table->mutexes = NULL; + table->heaps = NULL; + table->heap = NULL; + ut_d(table->magic_n = HASH_TABLE_MAGIC_N); + + /* Initialize the cell array */ + hash_table_clear(table); +} + +UNIV_INTERN +void +hash_create_reuse( +/*==============*/ + hash_table_t* table) +{ + ulint offset; + + offset = (sizeof(hash_table_t) + 7) / 8; + offset *= 8; + + table->array = (hash_cell_t*)(((byte*)table) + offset); + ut_ad(table->magic_n == HASH_TABLE_MAGIC_N); +} + +/*************************************************************//** Frees a hash table. */ UNIV_INTERN void diff --git a/storage/xtradb/handler/ha_innodb.cc b/storage/xtradb/handler/ha_innodb.cc index ee060f61c75..23eb6ab67e4 100644 --- a/storage/xtradb/handler/ha_innodb.cc +++ b/storage/xtradb/handler/ha_innodb.cc @@ -149,6 +149,7 @@ static ulong innobase_read_io_threads; static ulong innobase_write_io_threads; static ulong innobase_page_size; +static ulong innobase_log_block_size; static my_bool innobase_thread_concurrency_timer_based; static long long innobase_buffer_pool_size, innobase_log_file_size; @@ -194,6 +195,7 @@ static my_bool innobase_rollback_on_timeout = FALSE; static my_bool innobase_create_status_file = FALSE; static my_bool innobase_stats_on_metadata = TRUE; static my_bool innobase_use_sys_stats_table = FALSE; +static my_bool innobase_buffer_pool_shm_checksum = TRUE; static char* internal_innobase_data_file_path = NULL; @@ -2114,6 +2116,32 @@ innobase_init( goto error; } + srv_log_block_size = 0; + if (innobase_log_block_size != (1 << 9)) { /*!=512*/ + uint n_shift; + + fprintf(stderr, + "InnoDB: Warning: innodb_log_block_size has been changed from default value 512. (###EXPERIMENTAL### operation)\n"); + for (n_shift = 9; n_shift <= UNIV_PAGE_SIZE_SHIFT_MAX; n_shift++) { + if (innobase_log_block_size == ((ulong)1 << n_shift)) { + srv_log_block_size = (1 << n_shift); + fprintf(stderr, + "InnoDB: The log block size is set to %lu.\n", + srv_log_block_size); + break; + } + } + } else { + srv_log_block_size = 512; + } + + if (!srv_log_block_size) { + fprintf(stderr, + "InnoDB: Error: %lu is not valid value for innodb_log_block_size.\n", + innobase_log_block_size); + goto error; + } + #ifndef MYSQL_SERVER innodb_overwrite_relay_log_info = FALSE; #endif @@ -2417,7 +2445,7 @@ innobase_change_buffering_inited_ok: srv_n_write_io_threads = (ulint) innobase_write_io_threads; srv_read_ahead &= 3; - srv_adaptive_checkpoint %= 3; + srv_adaptive_checkpoint %= 4; srv_force_recovery = (ulint) innobase_force_recovery; @@ -2426,6 +2454,7 @@ innobase_change_buffering_inited_ok: srv_use_doublewrite_buf = (ibool) innobase_use_doublewrite; srv_use_checksums = (ibool) innobase_use_checksums; srv_fast_checksum = (ibool) innobase_fast_checksum; + srv_buffer_pool_shm_checksum = (ibool) innobase_buffer_pool_shm_checksum; #ifdef HAVE_LARGE_PAGES if ((os_use_large_pages = (ibool) my_use_large_pages)) @@ -3871,6 +3900,7 @@ retry: of length ref_length! */ if (!row_table_got_default_clust_index(ib_table)) { + prebuilt->clust_index_was_generated = FALSE; if (UNIV_UNLIKELY(primary_key >= MAX_KEY)) { @@ -6399,6 +6429,16 @@ create_table_def( DBUG_RETURN(HA_ERR_GENERIC); } + /* MySQL does the name length check. But we do additional check + on the name length here */ + if (strlen(table_name) > MAX_FULL_NAME_LEN) { + push_warning_printf( + (THD*) trx->mysql_thd, MYSQL_ERROR::WARN_LEVEL_WARN, + ER_TABLE_NAME, + "InnoDB: Table Name or Database Name is too long"); + DBUG_RETURN(ER_TABLE_NAME); + } + n_cols = form->s->fields; /* We pass 0 as the space id, and determine at a lower level the space @@ -6683,6 +6723,60 @@ create_clustered_index_when_no_primary( } /*****************************************************************//** +Return a display name for the row format +@return row format name */ +UNIV_INTERN +const char* +get_row_format_name( +/*================*/ + enum row_type row_format) /*!< in: Row Format */ +{ + switch (row_format) { + case ROW_TYPE_COMPACT: + return("COMPACT"); + case ROW_TYPE_COMPRESSED: + return("COMPRESSED"); + case ROW_TYPE_DYNAMIC: + return("DYNAMIC"); + case ROW_TYPE_REDUNDANT: + return("REDUNDANT"); + case ROW_TYPE_DEFAULT: + return("DEFAULT"); + case ROW_TYPE_FIXED: + return("FIXED"); + case ROW_TYPE_PAGE: + case ROW_TYPE_NOT_USED: + break; + } + return("NOT USED"); +} + +/** If file-per-table is missing, issue warning and set ret false */ +#define CHECK_ERROR_ROW_TYPE_NEEDS_FILE_PER_TABLE \ + if (!srv_file_per_table) { \ + push_warning_printf( \ + thd, MYSQL_ERROR::WARN_LEVEL_WARN, \ + ER_ILLEGAL_HA_CREATE_OPTION, \ + "InnoDB: ROW_FORMAT=%s requires" \ + " innodb_file_per_table.", \ + get_row_format_name(row_format)); \ + ret = FALSE; \ + } + +/** If file-format is Antelope, issue warning and set ret false */ +#define CHECK_ERROR_ROW_TYPE_NEEDS_GT_ANTELOPE \ + if (srv_file_format < DICT_TF_FORMAT_ZIP) { \ + push_warning_printf( \ + thd, MYSQL_ERROR::WARN_LEVEL_WARN, \ + ER_ILLEGAL_HA_CREATE_OPTION, \ + "InnoDB: ROW_FORMAT=%s requires" \ + " innodb_file_format > Antelope.", \ + get_row_format_name(row_format)); \ + ret = FALSE; \ + } + + +/*****************************************************************//** Validates the create options. We may build on this function in future. For now, it checks two specifiers: KEY_BLOCK_SIZE and ROW_FORMAT @@ -6697,9 +6791,9 @@ create_options_are_valid( columns and indexes */ HA_CREATE_INFO* create_info) /*!< in: create info. */ { - ibool kbs_specified = FALSE; + ibool kbs_specified = FALSE; ibool ret = TRUE; - + enum row_type row_format = form->s->row_type; ut_ad(thd != NULL); @@ -6711,10 +6805,8 @@ create_options_are_valid( ut_ad(form != NULL); ut_ad(create_info != NULL); - /* First check if KEY_BLOCK_SIZE was specified. */ - if (create_info->key_block_size - || (create_info->used_fields & HA_CREATE_USED_KEY_BLOCK_SIZE)) { - + /* First check if a non-zero KEY_BLOCK_SIZE was specified. */ + if (create_info->key_block_size) { kbs_specified = TRUE; switch (create_info->key_block_size) { case 1: @@ -6722,127 +6814,71 @@ create_options_are_valid( case 4: case 8: case 16: - /* Valid value. */ - break; - default: - push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN, - ER_ILLEGAL_HA_CREATE_OPTION, - "InnoDB: invalid" - " KEY_BLOCK_SIZE = %lu." - " Valid values are" - " [1, 2, 4, 8, 16]", - create_info->key_block_size); - ret = FALSE; - } - } - - /* If KEY_BLOCK_SIZE was specified, check for its - dependencies. */ - if (kbs_specified && !srv_file_per_table) { - push_warning(thd, MYSQL_ERROR::WARN_LEVEL_WARN, - ER_ILLEGAL_HA_CREATE_OPTION, - "InnoDB: KEY_BLOCK_SIZE" - " requires innodb_file_per_table."); - ret = FALSE; - } - - if (kbs_specified && srv_file_format < DICT_TF_FORMAT_ZIP) { - push_warning(thd, MYSQL_ERROR::WARN_LEVEL_WARN, - ER_ILLEGAL_HA_CREATE_OPTION, - "InnoDB: KEY_BLOCK_SIZE" - " requires innodb_file_format >" - " Antelope."); - ret = FALSE; - } - - /* Now check for ROW_FORMAT specifier. */ - if (create_info->used_fields & HA_CREATE_USED_ROW_FORMAT) { - switch (form->s->row_type) { - const char* row_format_name; - case ROW_TYPE_COMPRESSED: - case ROW_TYPE_DYNAMIC: - row_format_name - = form->s->row_type == ROW_TYPE_COMPRESSED - ? "COMPRESSED" - : "DYNAMIC"; - - /* These two ROW_FORMATs require srv_file_per_table - and srv_file_format > Antelope */ + /* Valid KEY_BLOCK_SIZE, check its dependencies. */ if (!srv_file_per_table) { - push_warning_printf( - thd, - MYSQL_ERROR::WARN_LEVEL_WARN, + push_warning( + thd, MYSQL_ERROR::WARN_LEVEL_WARN, ER_ILLEGAL_HA_CREATE_OPTION, - "InnoDB: ROW_FORMAT=%s" - " requires innodb_file_per_table.", - row_format_name); - ret = FALSE; + "InnoDB: KEY_BLOCK_SIZE requires" + " innodb_file_per_table."); + ret = FALSE; } - if (srv_file_format < DICT_TF_FORMAT_ZIP) { - push_warning_printf( - thd, - MYSQL_ERROR::WARN_LEVEL_WARN, - ER_ILLEGAL_HA_CREATE_OPTION, - "InnoDB: ROW_FORMAT=%s" - " requires innodb_file_format >" - " Antelope.", - row_format_name); - ret = FALSE; - } - - /* Cannot specify KEY_BLOCK_SIZE with - ROW_FORMAT = DYNAMIC. - However, we do allow COMPRESSED to be - specified with KEY_BLOCK_SIZE. */ - if (kbs_specified - && form->s->row_type == ROW_TYPE_DYNAMIC) { - push_warning_printf( - thd, - MYSQL_ERROR::WARN_LEVEL_WARN, + push_warning( + thd, MYSQL_ERROR::WARN_LEVEL_WARN, ER_ILLEGAL_HA_CREATE_OPTION, - "InnoDB: cannot specify" - " ROW_FORMAT = DYNAMIC with" - " KEY_BLOCK_SIZE."); + "InnoDB: KEY_BLOCK_SIZE requires" + " innodb_file_format > Antelope."); ret = FALSE; } - break; - - case ROW_TYPE_REDUNDANT: - case ROW_TYPE_COMPACT: - case ROW_TYPE_DEFAULT: - /* Default is COMPACT. */ - row_format_name - = form->s->row_type == ROW_TYPE_REDUNDANT - ? "REDUNDANT" - : "COMPACT"; - - /* Cannot specify KEY_BLOCK_SIZE with these - format specifiers. */ - if (kbs_specified) { - push_warning_printf( - thd, - MYSQL_ERROR::WARN_LEVEL_WARN, - ER_ILLEGAL_HA_CREATE_OPTION, - "InnoDB: cannot specify" - " ROW_FORMAT = %s with" - " KEY_BLOCK_SIZE.", - row_format_name); - ret = FALSE; - } - - break; - default: - push_warning(thd, - MYSQL_ERROR::WARN_LEVEL_WARN, - ER_ILLEGAL_HA_CREATE_OPTION, - "InnoDB: invalid ROW_FORMAT specifier."); + push_warning_printf( + thd, MYSQL_ERROR::WARN_LEVEL_WARN, + ER_ILLEGAL_HA_CREATE_OPTION, + "InnoDB: invalid KEY_BLOCK_SIZE = %lu." + " Valid values are [1, 2, 4, 8, 16]", + create_info->key_block_size); ret = FALSE; - + break; } } + + /* Check for a valid Innodb ROW_FORMAT specifier and + other incompatibilities. */ + switch (row_format) { + case ROW_TYPE_COMPRESSED: + CHECK_ERROR_ROW_TYPE_NEEDS_FILE_PER_TABLE; + CHECK_ERROR_ROW_TYPE_NEEDS_GT_ANTELOPE; + break; + case ROW_TYPE_DYNAMIC: + CHECK_ERROR_ROW_TYPE_NEEDS_FILE_PER_TABLE; + CHECK_ERROR_ROW_TYPE_NEEDS_GT_ANTELOPE; + /* fall through since dynamic also shuns KBS */ + case ROW_TYPE_COMPACT: + case ROW_TYPE_REDUNDANT: + if (kbs_specified) { + push_warning_printf( + thd, MYSQL_ERROR::WARN_LEVEL_WARN, + ER_ILLEGAL_HA_CREATE_OPTION, + "InnoDB: cannot specify ROW_FORMAT = %s" + " with KEY_BLOCK_SIZE.", + get_row_format_name(row_format)); + ret = FALSE; + } + break; + case ROW_TYPE_DEFAULT: + break; + case ROW_TYPE_FIXED: + case ROW_TYPE_PAGE: + case ROW_TYPE_NOT_USED: + push_warning( + thd, MYSQL_ERROR::WARN_LEVEL_WARN, + ER_ILLEGAL_HA_CREATE_OPTION, \ + "InnoDB: invalid ROW_FORMAT specifier."); + ret = FALSE; + break; + } return(ret); } @@ -6891,7 +6927,7 @@ ha_innobase::create( const ulint file_format = srv_file_format; const char* stmt; size_t stmt_len; - enum row_type row_type; + enum row_type row_format; DBUG_ENTER("ha_innobase::create"); @@ -6967,8 +7003,7 @@ ha_innobase::create( goto cleanup; } - if (create_info->key_block_size - || (create_info->used_fields & HA_CREATE_USED_KEY_BLOCK_SIZE)) { + if (create_info->key_block_size) { /* Determine the page_zip.ssize corresponding to the requested page size (key_block_size) in kilobytes. */ @@ -6989,40 +7024,40 @@ ha_innobase::create( } if (!srv_file_per_table) { - push_warning(thd, MYSQL_ERROR::WARN_LEVEL_WARN, - ER_ILLEGAL_HA_CREATE_OPTION, - "InnoDB: KEY_BLOCK_SIZE" - " requires innodb_file_per_table."); + push_warning( + thd, MYSQL_ERROR::WARN_LEVEL_WARN, + ER_ILLEGAL_HA_CREATE_OPTION, + "InnoDB: KEY_BLOCK_SIZE requires" + " innodb_file_per_table."); flags = 0; } if (file_format < DICT_TF_FORMAT_ZIP) { - push_warning(thd, MYSQL_ERROR::WARN_LEVEL_WARN, - ER_ILLEGAL_HA_CREATE_OPTION, - "InnoDB: KEY_BLOCK_SIZE" - " requires innodb_file_format >" - " Antelope."); + push_warning( + thd, MYSQL_ERROR::WARN_LEVEL_WARN, + ER_ILLEGAL_HA_CREATE_OPTION, + "InnoDB: KEY_BLOCK_SIZE requires" + " innodb_file_format > Antelope."); flags = 0; } if (!flags) { - push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN, - ER_ILLEGAL_HA_CREATE_OPTION, - "InnoDB: ignoring" - " KEY_BLOCK_SIZE=%lu.", - create_info->key_block_size); + push_warning_printf( + thd, MYSQL_ERROR::WARN_LEVEL_WARN, + ER_ILLEGAL_HA_CREATE_OPTION, + "InnoDB: ignoring KEY_BLOCK_SIZE=%lu.", + create_info->key_block_size); } } - row_type = form->s->row_type; + row_format = form->s->row_type; if (flags) { - /* if KEY_BLOCK_SIZE was specified on this statement and - ROW_FORMAT was not, automatically change ROW_FORMAT to COMPRESSED.*/ - if ( (create_info->used_fields & HA_CREATE_USED_KEY_BLOCK_SIZE) - && !(create_info->used_fields & HA_CREATE_USED_ROW_FORMAT)) { - row_type = ROW_TYPE_COMPRESSED; - } else if (row_type != ROW_TYPE_COMPRESSED) { + /* if ROW_FORMAT is set to default, + automatically change it to COMPRESSED.*/ + if (row_format == ROW_TYPE_DEFAULT) { + row_format = ROW_TYPE_COMPRESSED; + } else if (row_format != ROW_TYPE_COMPRESSED) { /* ROW_FORMAT other than COMPRESSED ignores KEY_BLOCK_SIZE. It does not make sense to reject conflicting @@ -7030,8 +7065,7 @@ ha_innobase::create( such combinations can be obtained with ALTER TABLE anyway. */ push_warning_printf( - thd, - MYSQL_ERROR::WARN_LEVEL_WARN, + thd, MYSQL_ERROR::WARN_LEVEL_WARN, ER_ILLEGAL_HA_CREATE_OPTION, "InnoDB: ignoring KEY_BLOCK_SIZE=%lu" " unless ROW_FORMAT=COMPRESSED.", @@ -7040,7 +7074,7 @@ ha_innobase::create( } } else { /* flags == 0 means no KEY_BLOCK_SIZE.*/ - if (row_type == ROW_TYPE_COMPRESSED) { + if (row_format == ROW_TYPE_COMPRESSED) { /* ROW_FORMAT=COMPRESSED without KEY_BLOCK_SIZE implies half the maximum KEY_BLOCK_SIZE. */ @@ -7055,49 +7089,40 @@ ha_innobase::create( } } - switch (row_type) { - const char* row_format_name; + switch (row_format) { case ROW_TYPE_REDUNDANT: break; case ROW_TYPE_COMPRESSED: case ROW_TYPE_DYNAMIC: - row_format_name - = row_type == ROW_TYPE_COMPRESSED - ? "COMPRESSED" - : "DYNAMIC"; - if (!srv_file_per_table) { push_warning_printf( - thd, - MYSQL_ERROR::WARN_LEVEL_WARN, + thd, MYSQL_ERROR::WARN_LEVEL_WARN, ER_ILLEGAL_HA_CREATE_OPTION, - "InnoDB: ROW_FORMAT=%s" - " requires innodb_file_per_table.", - row_format_name); + "InnoDB: ROW_FORMAT=%s requires" + " innodb_file_per_table.", + get_row_format_name(row_format)); } else if (file_format < DICT_TF_FORMAT_ZIP) { push_warning_printf( - thd, - MYSQL_ERROR::WARN_LEVEL_WARN, + thd, MYSQL_ERROR::WARN_LEVEL_WARN, ER_ILLEGAL_HA_CREATE_OPTION, - "InnoDB: ROW_FORMAT=%s" - " requires innodb_file_format >" - " Antelope.", - row_format_name); + "InnoDB: ROW_FORMAT=%s requires" + " innodb_file_format > Antelope.", + get_row_format_name(row_format)); } else { flags |= DICT_TF_COMPACT - | (DICT_TF_FORMAT_ZIP - << DICT_TF_FORMAT_SHIFT); + | (DICT_TF_FORMAT_ZIP + << DICT_TF_FORMAT_SHIFT); break; } /* fall through */ case ROW_TYPE_NOT_USED: case ROW_TYPE_FIXED: - default: - push_warning(thd, - MYSQL_ERROR::WARN_LEVEL_WARN, - ER_ILLEGAL_HA_CREATE_OPTION, - "InnoDB: assuming ROW_FORMAT=COMPACT."); + case ROW_TYPE_PAGE: + push_warning( + thd, MYSQL_ERROR::WARN_LEVEL_WARN, + ER_ILLEGAL_HA_CREATE_OPTION, + "InnoDB: assuming ROW_FORMAT=COMPACT."); case ROW_TYPE_DEFAULT: case ROW_TYPE_COMPACT: flags = DICT_TF_COMPACT; @@ -7211,23 +7236,25 @@ ha_innobase::create( setup at this stage and so we use thd. */ /* We need to copy the AUTOINC value from the old table if - this is an ALTER TABLE or CREATE INDEX because CREATE INDEX - does a table copy too. */ + this is an ALTER|OPTIMIZE TABLE or CREATE INDEX because CREATE INDEX + does a table copy too. If query was one of : + + CREATE TABLE ...AUTO_INCREMENT = x; or + ALTER TABLE...AUTO_INCREMENT = x; or + OPTIMIZE TABLE t; or + CREATE INDEX x on t(...); + + Find out a table definition from the dictionary and get + the current value of the auto increment field. Set a new + value to the auto increment field if the value is greater + than the maximum value in the column. */ if (((create_info->used_fields & HA_CREATE_USED_AUTO) || thd_sql_command(thd) == SQLCOM_ALTER_TABLE + || thd_sql_command(thd) == SQLCOM_OPTIMIZE || thd_sql_command(thd) == SQLCOM_CREATE_INDEX) && create_info->auto_increment_value > 0) { - /* Query was one of : - CREATE TABLE ...AUTO_INCREMENT = x; or - ALTER TABLE...AUTO_INCREMENT = x; or - CREATE INDEX x on t(...); - Find out a table definition from the dictionary and get - the current value of the auto increment field. Set a new - value to the auto increment field if the value is greater - than the maximum value in the column. */ - auto_inc_value = create_info->auto_increment_value; dict_table_autoinc_lock(innobase_table); @@ -7280,6 +7307,14 @@ ha_innobase::discard_or_import_tablespace( err = row_discard_tablespace_for_mysql(dict_table->name, trx); } else { err = row_import_tablespace_for_mysql(dict_table->name, trx); + + /* in expanded import mode re-initialize auto_increment again */ + if ((err == DB_SUCCESS) && srv_expand_import && + (table->found_next_number_field != NULL)) { + dict_table_autoinc_lock(dict_table); + innobase_initialize_autoinc(); + dict_table_autoinc_unlock(dict_table); + } } err = convert_error_code_to_mysql(err, dict_table->flags, NULL); @@ -7745,6 +7780,7 @@ ha_innobase::estimate_rows_upper_bound(void) dict_index_t* index; ulonglong estimate; ulonglong local_data_file_length; + ulint stat_n_leaf_pages; DBUG_ENTER("estimate_rows_upper_bound"); @@ -7764,10 +7800,12 @@ ha_innobase::estimate_rows_upper_bound(void) index = dict_table_get_first_index(prebuilt->table); - ut_a(index->stat_n_leaf_pages > 0); + stat_n_leaf_pages = index->stat_n_leaf_pages; + + ut_a(stat_n_leaf_pages > 0); local_data_file_length = - ((ulonglong) index->stat_n_leaf_pages) * UNIV_PAGE_SIZE; + ((ulonglong) stat_n_leaf_pages) * UNIV_PAGE_SIZE; /* Calculate a minimum length for a clustered index record and from @@ -7970,13 +8008,12 @@ ha_innobase::info_low( ib_table = prebuilt->table; if (flag & HA_STATUS_TIME) { - if ((called_from_analyze || innobase_stats_on_metadata) - && !share->ib_table->is_corrupt) { + if ((called_from_analyze || innobase_stats_on_metadata) && !share->ib_table->is_corrupt) { /* In sql_show we call with this flag: update then statistics so that they are up-to-date */ if (srv_use_sys_stats_table && !((ib_table->flags >> DICT_TF2_SHIFT) & DICT_TF2_TEMPORARY) - && thd_sql_command(user_thd) == SQLCOM_ANALYZE) { + && called_from_analyze) { /* If the indexes on the table don't have enough rows in SYS_STATS system table, */ /* they need to be created. */ dict_index_t* index; @@ -7998,7 +8035,8 @@ ha_innobase::info_low( prebuilt->trx->op_info = "updating table statistics"; dict_update_statistics(ib_table, - (thd_sql_command(user_thd) == SQLCOM_ANALYZE)?TRUE:FALSE); + FALSE /* update even if stats + are initialized */, called_from_analyze); prebuilt->trx->op_info = "returning various info to MySQL"; } @@ -8017,6 +8055,9 @@ ha_innobase::info_low( } if (flag & HA_STATUS_VARIABLE) { + + dict_table_stats_lock(ib_table, RW_S_LATCH); + n_rows = ib_table->stat_n_rows; /* Because we do not protect stat_n_rows by any mutex in a @@ -8066,12 +8107,14 @@ ha_innobase::info_low( ib_table->stat_sum_of_other_index_sizes) * UNIV_PAGE_SIZE; + dict_table_stats_unlock(ib_table, RW_S_LATCH); + /* Since fsp_get_available_space_in_free_extents() is acquiring latches inside InnoDB, we do not call it if we are asked by MySQL to avoid locking. Another reason to avoid the call is that it uses quite a lot of CPU. See Bug#38185. */ - if (flag & HA_STATUS_NO_LOCK) { + if (flag & HA_STATUS_NO_LOCK || !srv_stats_update_need_lock) { /* We do not update delete_length if no locking is requested so the "old" value can remain. delete_length is initialized to 0 in @@ -8081,21 +8124,13 @@ ha_innobase::info_low( /* Avoid accessing the tablespace if innodb_crash_recovery is set to a high value. */ stats.delete_length = 0; - } else if (srv_stats_update_need_lock) { - - /* lock the data dictionary to avoid races with - ibd_file_missing and tablespace_discarded */ - row_mysql_lock_data_dictionary(prebuilt->trx); + } else { + ullint avail_space; - /* ib_table->space must be an existent tablespace */ - if (!ib_table->ibd_file_missing - && !ib_table->tablespace_discarded) { - - stats.delete_length = - fsp_get_available_space_in_free_extents( - ib_table->space) * 1024; - } else { + avail_space = fsp_get_available_space_in_free_extents( + ib_table->space); + if (avail_space == ULLINT_UNDEFINED) { THD* thd; thd = ha_thd(); @@ -8112,9 +8147,9 @@ ha_innobase::info_low( ib_table->name); stats.delete_length = 0; + } else { + stats.delete_length = avail_space * 1024; } - - row_mysql_unlock_data_dictionary(prebuilt->trx); } stats.check_time = 0; @@ -8143,6 +8178,8 @@ ha_innobase::info_low( table->s->keys); } + dict_table_stats_lock(ib_table, RW_S_LATCH); + for (i = 0; i < table->s->keys; i++) { ulong j; /* We could get index quickly through internal @@ -8180,8 +8217,6 @@ ha_innobase::info_low( break; } - dict_index_stat_mutex_enter(index); - if (index->stat_n_diff_key_vals[j + 1] == 0) { rec_per_key = stats.records; @@ -8190,8 +8225,6 @@ ha_innobase::info_low( index->stat_n_diff_key_vals[j + 1]); } - dict_index_stat_mutex_exit(index); - /* Since MySQL seems to favor table scans too much over index searches, we pretend index selectivity is 2 times better than @@ -8208,6 +8241,8 @@ ha_innobase::info_low( (ulong) rec_per_key; } } + + dict_table_stats_unlock(ib_table, RW_S_LATCH); } if (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE) { @@ -11187,6 +11222,11 @@ static MYSQL_SYSVAR_ULONG(page_size, innobase_page_size, "###EXPERIMENTAL###: The universal page size of the database. Changing for created database is not supported. Use on your own risk!", NULL, NULL, (1 << 14), (1 << 12), (1 << UNIV_PAGE_SIZE_SHIFT_MAX), 0); +static MYSQL_SYSVAR_ULONG(log_block_size, innobase_log_block_size, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "###EXPERIMENTAL###: The log block size of the transaction log file. Changing for created log file is not supported. Use on your own risk!", + NULL, NULL, (1 << 9)/*512*/, (1 << 9)/*512*/, (1 << UNIV_PAGE_SIZE_SHIFT_MAX), 0); + static MYSQL_SYSVAR_STR(data_home_dir, innobase_data_home_dir, PLUGIN_VAR_READONLY, "The common part for InnoDB table spaces.", @@ -11405,6 +11445,16 @@ static MYSQL_SYSVAR_LONGLONG(buffer_pool_size, innobase_buffer_pool_size, "The size of the memory buffer InnoDB uses to cache data and indexes of its tables.", NULL, NULL, 128*1024*1024L, 32*1024*1024L, LONGLONG_MAX, 1024*1024L); +static MYSQL_SYSVAR_UINT(buffer_pool_shm_key, srv_buffer_pool_shm_key, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "[experimental] The key value of shared memory segment for the buffer pool. 0 (default) disables the feature.", + NULL, NULL, 0, 0, INT_MAX32, 0); + +static MYSQL_SYSVAR_BOOL(buffer_pool_shm_checksum, innobase_buffer_pool_shm_checksum, + PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY, + "Enable buffer_pool_shm checksum validation (enabled by default).", + NULL, NULL, TRUE); + static MYSQL_SYSVAR_ULONG(commit_concurrency, innobase_commit_concurrency, PLUGIN_VAR_RQCMDARG, "Helps in performance tuning in heavily concurrent environments.", @@ -11615,17 +11665,19 @@ innodb_adaptive_checkpoint_update( void* var_ptr, const void* save) { - *(long *)var_ptr= (*(long *)save) % 3; + *(long *)var_ptr= (*(long *)save) % 4; } const char *adaptive_checkpoint_names[]= { "none", /* 0 */ "reflex", /* 1 */ "estimate", /* 2 */ + "keep_average", /* 3 */ /* For compatibility of the older patch */ - "0", /* 3 ("none" + 3) */ - "1", /* 4 ("reflex" + 3) */ - "2", /* 5 ("estimate" + 3) */ + "0", /* 4 ("none" + 3) */ + "1", /* 5 ("reflex" + 3) */ + "2", /* 6 ("estimate" + 3) */ + "3", /* 7 ("keep_average" + 4) */ NullS }; TYPELIB adaptive_checkpoint_typelib= @@ -11635,7 +11687,7 @@ TYPELIB adaptive_checkpoint_typelib= }; static MYSQL_SYSVAR_ENUM(adaptive_checkpoint, srv_adaptive_checkpoint, PLUGIN_VAR_RQCMDARG, - "Enable/Disable flushing along modified age. (none, reflex, [estimate])", + "Enable/Disable flushing along modified age. (none, reflex, [estimate], keep_average)", NULL, innodb_adaptive_checkpoint_update, 2, &adaptive_checkpoint_typelib); static MYSQL_SYSVAR_ULONG(enable_unsafe_group_commit, srv_enable_unsafe_group_commit, @@ -11674,9 +11726,12 @@ static MYSQL_SYSVAR_ULINT(pass_corrupt_table, srv_pass_corrupt_table, static struct st_mysql_sys_var* innobase_system_variables[]= { MYSQL_SYSVAR(page_size), + MYSQL_SYSVAR(log_block_size), MYSQL_SYSVAR(additional_mem_pool_size), MYSQL_SYSVAR(autoextend_increment), MYSQL_SYSVAR(buffer_pool_size), + MYSQL_SYSVAR(buffer_pool_shm_key), + MYSQL_SYSVAR(buffer_pool_shm_checksum), MYSQL_SYSVAR(checksums), MYSQL_SYSVAR(fast_checksum), MYSQL_SYSVAR(commit_concurrency), diff --git a/storage/xtradb/handler/handler0alter.cc b/storage/xtradb/handler/handler0alter.cc index 3a32ed9cf36..c54b4a17fd2 100644 --- a/storage/xtradb/handler/handler0alter.cc +++ b/storage/xtradb/handler/handler0alter.cc @@ -1012,12 +1012,13 @@ ha_innobase::prepare_drop_index( index->to_be_dropped = TRUE; } - /* If FOREIGN_KEY_CHECK = 1 you may not drop an index defined + /* If FOREIGN_KEY_CHECKS = 1 you may not drop an index defined for a foreign key constraint because InnoDB requires that both - tables contain indexes for the constraint. Note that CREATE - INDEX id ON table does a CREATE INDEX and DROP INDEX, and we - can ignore here foreign keys because a new index for the - foreign key has already been created. + tables contain indexes for the constraint. Such index can + be dropped only if FOREIGN_KEY_CHECKS is set to 0. + Note that CREATE INDEX id ON table does a CREATE INDEX and + DROP INDEX, and we can ignore here foreign keys because a + new index for the foreign key has already been created. We check for the foreign key constraints after marking the candidate indexes for deletion, because when we check for an diff --git a/storage/xtradb/handler/i_s.cc b/storage/xtradb/handler/i_s.cc index 0793daf7650..dead374e782 100644 --- a/storage/xtradb/handler/i_s.cc +++ b/storage/xtradb/handler/i_s.cc @@ -1451,8 +1451,16 @@ fill_innodb_trx_from_cache( row->trx_mysql_thread_id)); /* trx_query */ - OK(field_store_string(fields[IDX_TRX_QUERY], - row->trx_query)); + if (row->trx_query) { + /* store will do appropriate character set + conversion check */ + fields[IDX_TRX_QUERY]->store( + row->trx_query, strlen(row->trx_query), + row->trx_query_cs); + fields[IDX_TRX_QUERY]->set_notnull(); + } else { + fields[IDX_TRX_QUERY]->set_null(); + } OK(schema_table_store_record(thd, table)); } @@ -1709,16 +1717,7 @@ fill_innodb_locks_from_cache( for (i = 0; i < rows_num; i++) { i_s_locks_row_t* row; - - /* note that the decoded database or table name is - never expected to be longer than NAME_LEN; - NAME_LEN for database name - 2 for surrounding quotes around database name - NAME_LEN for table name - 2 for surrounding quotes around table name - 1 for the separating dot (.) - 9 for the #mysql50# prefix */ - char buf[2 * NAME_LEN + 14]; + char buf[MAX_FULL_NAME_LEN + 1]; const char* bufend; char lock_trx_id[TRX_ID_MAX_LEN + 1]; diff --git a/storage/xtradb/handler/innodb_patch_info.h b/storage/xtradb/handler/innodb_patch_info.h index 38b97411340..e68f12d0fec 100644 --- a/storage/xtradb/handler/innodb_patch_info.h +++ b/storage/xtradb/handler/innodb_patch_info.h @@ -47,5 +47,6 @@ struct innodb_enhancement { {"innodb_fast_checksum","Using the checksum on 32bit-unit calculation","incompatible for unpatched ver.","http://www.percona.com/docs/wiki/percona-xtradb"}, {"innodb_files_extend","allow >4GB transaction log files, and can vary universal page size of datafiles","incompatible for unpatched ver.","http://www.percona.com/docs/wiki/percona-xtradb"}, {"innodb_sys_tables_sys_indexes","Expose InnoDB SYS_TABLES and SYS_INDEXES schema tables","","http://www.percona.com/docs/wiki/percona-xtradb"}, +{"innodb_buffer_pool_shm","Put buffer pool contents to shared memory segment and reuse it at clean restart [experimental]","","http://www.percona.com/docs/wiki/percona-xtradb"}, {NULL, NULL, NULL, NULL} }; diff --git a/storage/xtradb/include/btr0btr.h b/storage/xtradb/include/btr0btr.h index 5e6a76c7d21..dde3a0bab69 100644 --- a/storage/xtradb/include/btr0btr.h +++ b/storage/xtradb/include/btr0btr.h @@ -94,26 +94,35 @@ btr_root_get( Gets a buffer page and declares its latching order level. */ UNIV_INLINE buf_block_t* -btr_block_get( -/*==========*/ - ulint space, /*!< in: space id */ - ulint zip_size, /*!< in: compressed page size in bytes - or 0 for uncompressed pages */ - ulint page_no, /*!< in: page number */ - ulint mode, /*!< in: latch mode */ - mtr_t* mtr); /*!< in: mtr */ -/**************************************************************//** -Gets a buffer page and declares its latching order level. */ -UNIV_INLINE -page_t* -btr_page_get( -/*=========*/ - ulint space, /*!< in: space id */ - ulint zip_size, /*!< in: compressed page size in bytes - or 0 for uncompressed pages */ - ulint page_no, /*!< in: page number */ - ulint mode, /*!< in: latch mode */ - mtr_t* mtr); /*!< in: mtr */ +btr_block_get_func( +/*===============*/ + ulint space, /*!< in: space id */ + ulint zip_size, /*!< in: compressed page size in bytes + or 0 for uncompressed pages */ + ulint page_no, /*!< in: page number */ + ulint mode, /*!< in: latch mode */ + const char* file, /*!< in: file name */ + ulint line, /*!< in: line where called */ + mtr_t* mtr) /*!< in/out: mtr */ + __attribute__((nonnull)); +/** Gets a buffer page and declares its latching order level. +@param space tablespace identifier +@param zip_size compressed page size in bytes or 0 for uncompressed pages +@param page_no page number +@param mode latch mode +@param mtr mini-transaction handle +@return the block descriptor */ +# define btr_block_get(space,zip_size,page_no,mode,mtr) \ + btr_block_get_func(space,zip_size,page_no,mode,__FILE__,__LINE__,mtr) +/** Gets a buffer page and declares its latching order level. +@param space tablespace identifier +@param zip_size compressed page size in bytes or 0 for uncompressed pages +@param page_no page number +@param mode latch mode +@param mtr mini-transaction handle +@return the uncompressed page frame */ +# define btr_page_get(space,zip_size,page_no,mode,mtr) \ + buf_block_get_frame(btr_block_get(space,zip_size,page_no,mode,mtr)) #endif /* !UNIV_HOTBACKUP */ /**************************************************************//** Gets the index id field of a page. diff --git a/storage/xtradb/include/btr0btr.ic b/storage/xtradb/include/btr0btr.ic index c9c38f3c3b3..e19c863300a 100644 --- a/storage/xtradb/include/btr0btr.ic +++ b/storage/xtradb/include/btr0btr.ic @@ -39,18 +39,21 @@ Created 6/2/1994 Heikki Tuuri Gets a buffer page and declares its latching order level. */ UNIV_INLINE buf_block_t* -btr_block_get( -/*==========*/ - ulint space, /*!< in: space id */ - ulint zip_size, /*!< in: compressed page size in bytes - or 0 for uncompressed pages */ - ulint page_no, /*!< in: page number */ - ulint mode, /*!< in: latch mode */ - mtr_t* mtr) /*!< in: mtr */ +btr_block_get_func( +/*===============*/ + ulint space, /*!< in: space id */ + ulint zip_size, /*!< in: compressed page size in bytes + or 0 for uncompressed pages */ + ulint page_no, /*!< in: page number */ + ulint mode, /*!< in: latch mode */ + const char* file, /*!< in: file name */ + ulint line, /*!< in: line where called */ + mtr_t* mtr) /*!< in/out: mtr */ { buf_block_t* block; - block = buf_page_get(space, zip_size, page_no, mode, mtr); + block = buf_page_get_gen(space, zip_size, page_no, mode, + NULL, BUF_GET, file, line, mtr); ut_a(srv_pass_corrupt_table || block); @@ -63,23 +66,6 @@ btr_block_get( } /**************************************************************//** -Gets a buffer page and declares its latching order level. */ -UNIV_INLINE -page_t* -btr_page_get( -/*=========*/ - ulint space, /*!< in: space id */ - ulint zip_size, /*!< in: compressed page size in bytes - or 0 for uncompressed pages */ - ulint page_no, /*!< in: page number */ - ulint mode, /*!< in: latch mode */ - mtr_t* mtr) /*!< in: mtr */ -{ - return(buf_block_get_frame(btr_block_get(space, zip_size, page_no, - mode, mtr))); -} - -/**************************************************************//** Sets the index id field of a page. */ UNIV_INLINE void diff --git a/storage/xtradb/include/btr0cur.h b/storage/xtradb/include/btr0cur.h index 7f6bff11f84..b477ad0320a 100644 --- a/storage/xtradb/include/btr0cur.h +++ b/storage/xtradb/include/btr0cur.h @@ -332,10 +332,14 @@ ulint btr_cur_del_mark_set_clust_rec( /*===========================*/ ulint flags, /*!< in: undo logging and locking flags */ - btr_cur_t* cursor, /*!< in: cursor */ + buf_block_t* block, /*!< in/out: buffer block of the record */ + rec_t* rec, /*!< in/out: record */ + dict_index_t* index, /*!< in: clustered index of the record */ + const ulint* offsets,/*!< in: rec_get_offsets(rec) */ ibool val, /*!< in: value to set */ que_thr_t* thr, /*!< in: query thread */ - mtr_t* mtr); /*!< in: mtr */ + mtr_t* mtr) /*!< in: mtr */ + __attribute__((nonnull)); /***********************************************************//** Sets a secondary index record delete mark to TRUE or FALSE. @return DB_SUCCESS, DB_LOCK_WAIT, or error number */ @@ -481,46 +485,28 @@ btr_estimate_number_of_different_key_vals( /*======================================*/ dict_index_t* index); /*!< in: index */ /*******************************************************************//** -Marks not updated extern fields as not-owned by this record. The ownership -is transferred to the updated record which is inserted elsewhere in the +Marks non-updated off-page fields as disowned by this record. The ownership +must be transferred to the updated record which is inserted elsewhere in the index tree. In purge only the owner of externally stored field is allowed -to free the field. -@return TRUE if BLOB ownership was transferred */ +to free the field. */ UNIV_INTERN -ibool -btr_cur_mark_extern_inherited_fields( -/*=================================*/ +void +btr_cur_disown_inherited_fields( +/*============================*/ page_zip_des_t* page_zip,/*!< in/out: compressed page whose uncompressed part will be updated, or NULL */ rec_t* rec, /*!< in/out: record in a clustered index */ dict_index_t* index, /*!< in: index of the page */ const ulint* offsets,/*!< in: array returned by rec_get_offsets() */ const upd_t* update, /*!< in: update vector */ - mtr_t* mtr); /*!< in: mtr, or NULL if not logged */ -/*******************************************************************//** -The complement of the previous function: in an update entry may inherit -some externally stored fields from a record. We must mark them as inherited -in entry, so that they are not freed in a rollback. */ -UNIV_INTERN -void -btr_cur_mark_dtuple_inherited_extern( -/*=================================*/ - dtuple_t* entry, /*!< in/out: updated entry to be - inserted to clustered index */ - const upd_t* update); /*!< in: update vector */ -/*******************************************************************//** -Marks all extern fields in a dtuple as owned by the record. */ -UNIV_INTERN -void -btr_cur_unmark_dtuple_extern_fields( -/*================================*/ - dtuple_t* entry); /*!< in/out: clustered index entry */ + mtr_t* mtr) /*!< in/out: mini-transaction */ + __attribute__((nonnull(2,3,4,5,6))); /*******************************************************************//** Stores the fields in big_rec_vec to the tablespace and puts pointers to them in rec. The extern flags in rec will have to be set beforehand. The fields are stored on pages allocated from leaf node file segment of the index tree. -@return DB_SUCCESS or error */ +@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */ UNIV_INTERN ulint btr_store_big_rec_extern_fields( diff --git a/storage/xtradb/include/buf0buf.h b/storage/xtradb/include/buf0buf.h index d6cc63bb6af..e06927f42f0 100644 --- a/storage/xtradb/include/buf0buf.h +++ b/storage/xtradb/include/buf0buf.h @@ -36,6 +36,7 @@ Created 11/5/1995 Heikki Tuuri #include "ut0rbt.h" #ifndef UNIV_HOTBACKUP #include "os0proc.h" +#include "srv0srv.h" /** @name Modes for buf_page_get_gen */ /* @{ */ @@ -1301,7 +1302,10 @@ struct buf_block_struct{ /**********************************************************************//** Compute the hash fold value for blocks in buf_pool->zip_hash. */ /* @{ */ -#define BUF_POOL_ZIP_FOLD_PTR(ptr) ((ulint) (ptr) / UNIV_PAGE_SIZE) +/* the fold should be relative when srv_buffer_pool_shm_key is enabled */ +#define BUF_POOL_ZIP_FOLD_PTR(ptr) (!srv_buffer_pool_shm_key\ + ?((ulint) (ptr) / UNIV_PAGE_SIZE)\ + :((ulint) ((byte*)ptr - (byte*)(buf_pool->chunks->blocks->frame)) / UNIV_PAGE_SIZE)) #define BUF_POOL_ZIP_FOLD(b) BUF_POOL_ZIP_FOLD_PTR((b)->frame) #define BUF_POOL_ZIP_FOLD_BPAGE(b) BUF_POOL_ZIP_FOLD((buf_block_t*) (b)) /* @} */ diff --git a/storage/xtradb/include/data0data.h b/storage/xtradb/include/data0data.h index f9fce3f3657..cab8d790ac1 100644 --- a/storage/xtradb/include/data0data.h +++ b/storage/xtradb/include/data0data.h @@ -154,14 +154,19 @@ dfield_dup( dfield_t* field, /*!< in/out: data field */ mem_heap_t* heap); /*!< in: memory heap where allocated */ /*********************************************************************//** -Tests if data length and content is equal for two dfields. -@return TRUE if equal */ +Tests if two data fields are equal. +If len==0, tests the data length and content for equality. +If len>0, tests the first len bytes of the content for equality. +@return TRUE if both fields are NULL or if they are equal */ UNIV_INLINE ibool dfield_datas_are_binary_equal( /*==========================*/ const dfield_t* field1, /*!< in: field */ - const dfield_t* field2);/*!< in: field */ + const dfield_t* field2, /*!< in: field */ + ulint len) /*!< in: maximum prefix to compare, + or 0 to compare the whole field length */ + __attribute__((nonnull, warn_unused_result)); /*********************************************************************//** Tests if dfield data length and content is equal to the given. @return TRUE if equal */ diff --git a/storage/xtradb/include/data0data.ic b/storage/xtradb/include/data0data.ic index da79aa33702..74e0f7d09a0 100644 --- a/storage/xtradb/include/data0data.ic +++ b/storage/xtradb/include/data0data.ic @@ -229,20 +229,30 @@ dfield_dup( } /*********************************************************************//** -Tests if data length and content is equal for two dfields. -@return TRUE if equal */ +Tests if two data fields are equal. +If len==0, tests the data length and content for equality. +If len>0, tests the first len bytes of the content for equality. +@return TRUE if both fields are NULL or if they are equal */ UNIV_INLINE ibool dfield_datas_are_binary_equal( /*==========================*/ const dfield_t* field1, /*!< in: field */ - const dfield_t* field2) /*!< in: field */ + const dfield_t* field2, /*!< in: field */ + ulint len) /*!< in: maximum prefix to compare, + or 0 to compare the whole field length */ { - ulint len; + ulint len2 = len; - len = field1->len; + if (field1->len == UNIV_SQL_NULL || len == 0 || field1->len < len) { + len = field1->len; + } + + if (field2->len == UNIV_SQL_NULL || len2 == 0 || field2->len < len2) { + len2 = field2->len; + } - return(len == field2->len + return(len == len2 && (len == UNIV_SQL_NULL || !memcmp(field1->data, field2->data, len))); } diff --git a/storage/xtradb/include/dict0dict.h b/storage/xtradb/include/dict0dict.h index d18b3ecb1b0..7baacdd6055 100644 --- a/storage/xtradb/include/dict0dict.h +++ b/storage/xtradb/include/dict0dict.h @@ -318,7 +318,8 @@ void dict_table_replace_index_in_foreign_list( /*=====================================*/ dict_table_t* table, /*!< in/out: table */ - dict_index_t* index); /*!< in: index to be replaced */ + dict_index_t* index, /*!< in: index to be replaced */ + const trx_t* trx); /*!< in: transaction handle */ /*********************************************************************//** Checks if a index is defined for a foreign key constraint. Index is a part of a foreign key constraint if the index is referenced by foreign key @@ -1053,20 +1054,13 @@ Calculates new estimates for table and index statistics. The statistics are used in query optimization. */ UNIV_INTERN void -dict_update_statistics_low( -/*=======================*/ - dict_table_t* table, /*!< in/out: table */ - ibool has_dict_mutex, /*!< in: TRUE if the caller has the - dictionary mutex */ - ibool sync); -/*********************************************************************//** -Calculates new estimates for table and index statistics. The statistics -are used in query optimization. */ -UNIV_INTERN -void dict_update_statistics( /*===================*/ - dict_table_t* table, /*!< in/out: table */ + dict_table_t* table, /*!< in/out: table */ + ibool only_calc_if_missing_stats, /*!< in: only + update/recalc the stats if they have + not been initialized yet, otherwise + do nothing */ ibool sync); /********************************************************************//** Reserves the dictionary system mutex for MySQL. */ @@ -1081,21 +1075,25 @@ void dict_mutex_exit_for_mysql(void); /*===========================*/ /**********************************************************************//** -Lock the appropriate mutex to protect index->stat_n_diff_key_vals[]. -index->id is used to pick the right mutex and it should not change -before dict_index_stat_mutex_exit() is called on this index. */ +Lock the appropriate latch to protect a given table's statistics. +table->id is used to pick the corresponding latch from a global array of +latches. */ UNIV_INTERN void -dict_index_stat_mutex_enter( -/*========================*/ - const dict_index_t* index); /*!< in: index */ +dict_table_stats_lock( +/*==================*/ + const dict_table_t* table, /*!< in: table */ + ulint latch_mode); /*!< in: RW_S_LATCH or + RW_X_LATCH */ /**********************************************************************//** -Unlock the appropriate mutex that protects index->stat_n_diff_key_vals[]. */ +Unlock the latch that has been locked by dict_table_stats_lock() */ UNIV_INTERN void -dict_index_stat_mutex_exit( -/*=======================*/ - const dict_index_t* index); /*!< in: index */ +dict_table_stats_unlock( +/*====================*/ + const dict_table_t* table, /*!< in: table */ + ulint latch_mode); /*!< in: RW_S_LATCH or + RW_X_LATCH */ /********************************************************************//** Checks if the database name in two table names is the same. @return TRUE if same db name */ diff --git a/storage/xtradb/include/fil0fil.h b/storage/xtradb/include/fil0fil.h index 07c80ef8609..a262ec8f9cc 100644 --- a/storage/xtradb/include/fil0fil.h +++ b/storage/xtradb/include/fil0fil.h @@ -737,6 +737,15 @@ fil_page_get_type( /*==============*/ const byte* page); /*!< in: file page */ +/*******************************************************************//** +Returns TRUE if a single-table tablespace is being deleted. +@return TRUE if being deleted */ +UNIV_INTERN +ibool +fil_tablespace_is_being_deleted( +/*============================*/ + ulint id); /*!< in: space id */ + /************************************************************************* Return local hash table informations. */ diff --git a/storage/xtradb/include/hash0hash.h b/storage/xtradb/include/hash0hash.h index b17c21a45ef..492c767acc4 100644 --- a/storage/xtradb/include/hash0hash.h +++ b/storage/xtradb/include/hash0hash.h @@ -49,6 +49,28 @@ hash_table_t* hash_create( /*========*/ ulint n); /*!< in: number of array cells */ + +/*************************************************************//** +*/ +UNIV_INTERN +ulint +hash_create_needed( +/*===============*/ + ulint n); + +UNIV_INTERN +void +hash_create_init( +/*=============*/ + hash_table_t* table, + ulint n); + +UNIV_INTERN +void +hash_create_reuse( +/*==============*/ + hash_table_t* table); + #ifndef UNIV_HOTBACKUP /*************************************************************//** Creates a mutex array to protect a hash table. */ @@ -328,6 +350,33 @@ do {\ }\ } while (0) +/********************************************************************//** +Align nodes with moving location.*/ +#define HASH_OFFSET(TABLE, NODE_TYPE, PTR_NAME, FADDR, FOFFSET, BOFFSET) \ +do {\ + ulint i2222;\ + ulint cell_count2222;\ +\ + cell_count2222 = hash_get_n_cells(TABLE);\ +\ + for (i2222 = 0; i2222 < cell_count2222; i2222++) {\ + NODE_TYPE* node2222;\ +\ + if ((TABLE)->array[i2222].node) \ + (TABLE)->array[i2222].node = (void*)((byte*)(TABLE)->array[i2222].node \ + + (((TABLE)->array[i2222].node > (void*)FADDR)?FOFFSET:BOFFSET));\ + node2222 = HASH_GET_FIRST((TABLE), i2222);\ +\ + while (node2222) {\ + if (node2222->PTR_NAME) \ + node2222->PTR_NAME = (void*)((byte*)(node2222->PTR_NAME) \ + + ((((void*)node2222->PTR_NAME) > (void*)FADDR)?FOFFSET:BOFFSET));\ +\ + node2222 = node2222->PTR_NAME;\ + }\ + }\ +} while (0) + /************************************************************//** Gets the mutex index for a fold value in a hash table. @return mutex number */ diff --git a/storage/xtradb/include/log0log.h b/storage/xtradb/include/log0log.h index 8fce4ef96bc..2b4b34f2600 100644 --- a/storage/xtradb/include/log0log.h +++ b/storage/xtradb/include/log0log.h @@ -672,6 +672,9 @@ extern log_t* log_sys; when mysqld is first time started on the restored database, it can print helpful info for the user */ +#define LOG_FILE_OS_FILE_LOG_BLOCK_SIZE 64 + /* extend to record log_block_size + of XtraDB. 0 means default 512 */ #define LOG_FILE_ARCH_COMPLETED OS_FILE_LOG_BLOCK_SIZE /* this 4-byte field is TRUE when the writing of an archived log file diff --git a/storage/xtradb/include/os0file.h b/storage/xtradb/include/os0file.h index eeab8a2b5d9..cbbec2cf55e 100644 --- a/storage/xtradb/include/os0file.h +++ b/storage/xtradb/include/os0file.h @@ -107,7 +107,7 @@ whole block gets written. This should be true even in most cases of a crash: if this fails for a log block, then it is equivalent to a media failure in the log. */ -#define OS_FILE_LOG_BLOCK_SIZE 512 +#define OS_FILE_LOG_BLOCK_SIZE srv_log_block_size /** Options for file_create @{ */ #define OS_FILE_OPEN 51 @@ -188,6 +188,8 @@ extern ulint os_n_file_reads; extern ulint os_n_file_writes; extern ulint os_n_fsyncs; +extern ulint srv_log_block_size; + /* File types for directory entry data type */ enum os_file_type_enum{ diff --git a/storage/xtradb/include/os0proc.h b/storage/xtradb/include/os0proc.h index fd46bd7db87..582cef6f803 100644 --- a/storage/xtradb/include/os0proc.h +++ b/storage/xtradb/include/os0proc.h @@ -32,6 +32,11 @@ Created 9/30/1995 Heikki Tuuri #ifdef UNIV_LINUX #include <sys/ipc.h> #include <sys/shm.h> +#else +# if defined HAVE_SYS_IPC_H && HAVE_SYS_SHM_H +#include <sys/ipc.h> +#include <sys/shm.h> +# endif #endif typedef void* os_process_t; @@ -70,6 +75,29 @@ os_mem_free_large( ulint size); /*!< in: size returned by os_mem_alloc_large() */ + +/****************************************************************//** +Allocates or attaches and reuses shared memory segment. +The content is not cleared automatically. +@return allocated memory */ +UNIV_INTERN +void* +os_shm_alloc( +/*=========*/ + ulint* n, /*!< in/out: number of bytes */ + uint key, + ibool* is_new); + +/****************************************************************//** +Detach shared memory segment. */ +UNIV_INTERN +void +os_shm_free( +/*========*/ + void *ptr, /*!< in: pointer returned by + os_shm_alloc() */ + ulint size); /*!< in: size returned by + os_shm_alloc() */ #ifndef UNIV_NONINL #include "os0proc.ic" #endif diff --git a/storage/xtradb/include/os0sync.h b/storage/xtradb/include/os0sync.h index 2cf9ea0ec84..7366e2c3402 100644 --- a/storage/xtradb/include/os0sync.h +++ b/storage/xtradb/include/os0sync.h @@ -196,7 +196,7 @@ ulint os_event_wait_time( /*===============*/ os_event_t event, /*!< in: event to wait */ - ulint time); /*!< in: timeout in microseconds, or + ulint wtime); /*!< in: timeout in microseconds, or OS_SYNC_INFINITE_TIME */ #ifdef __WIN__ /**********************************************************//** diff --git a/storage/xtradb/include/row0ins.h b/storage/xtradb/include/row0ins.h index 9f93565ddb7..810973e61a7 100644 --- a/storage/xtradb/include/row0ins.h +++ b/storage/xtradb/include/row0ins.h @@ -84,9 +84,10 @@ ulint row_ins_index_entry( /*================*/ dict_index_t* index, /*!< in: index */ - dtuple_t* entry, /*!< in: index entry to insert */ + dtuple_t* entry, /*!< in/out: index entry to insert */ ulint n_ext, /*!< in: number of externally stored columns */ - ibool foreign,/*!< in: TRUE=check foreign key constraints */ + ibool foreign,/*!< in: TRUE=check foreign key constraints + (foreign=FALSE only during CREATE INDEX) */ que_thr_t* thr); /*!< in: query thread */ /***********************************************************//** Inserts a row to a table. This is a high-level function used in diff --git a/storage/xtradb/include/row0upd.h b/storage/xtradb/include/row0upd.h index 4e2de9bd2ec..b61e6b6dca1 100644 --- a/storage/xtradb/include/row0upd.h +++ b/storage/xtradb/include/row0upd.h @@ -126,8 +126,8 @@ UNIV_INTERN void row_upd_index_entry_sys_field( /*==========================*/ - const dtuple_t* entry, /*!< in: index entry, where the memory buffers - for sys fields are already allocated: + dtuple_t* entry, /*!< in/out: index entry, where the memory + buffers for sys fields are already allocated: the function just copies the new values to them */ dict_index_t* index, /*!< in: clustered index */ @@ -286,10 +286,13 @@ row_upd_changes_ord_field_binary( row and the data values in update are not known when this function is called, e.g., at compile time */ + const row_ext_t*ext, /*!< NULL, or prefixes of the externally + stored columns in the old row */ dict_index_t* index, /*!< in: index of the record */ - const upd_t* update);/*!< in: update vector for the row; NOTE: the + const upd_t* update) /*!< in: update vector for the row; NOTE: the field numbers in this MUST be clustered index positions! */ + __attribute__((nonnull(3,4), warn_unused_result)); /***********************************************************//** Checks if an update vector changes an ordering field of an index record. This function is fast if the update vector is short or the number of ordering @@ -462,11 +465,16 @@ struct upd_node_struct{ #define UPD_NODE_INSERT_CLUSTERED 3 /* clustered index record should be inserted, old record is already delete marked */ -#define UPD_NODE_UPDATE_ALL_SEC 4 /* an ordering field of the clustered +#define UPD_NODE_INSERT_BLOB 4 /* clustered index record should be + inserted, old record is already + delete-marked; non-updated BLOBs + should be inherited by the new record + and disowned by the old record */ +#define UPD_NODE_UPDATE_ALL_SEC 5 /* an ordering field of the clustered index record was changed, or this is a delete operation: should update all the secondary index records */ -#define UPD_NODE_UPDATE_SOME_SEC 5 /* secondary index entries should be +#define UPD_NODE_UPDATE_SOME_SEC 6 /* secondary index entries should be looked at and updated if an ordering field changed */ diff --git a/storage/xtradb/include/srv0srv.h b/storage/xtradb/include/srv0srv.h index 8148dffd13b..f4c9704741c 100644 --- a/storage/xtradb/include/srv0srv.h +++ b/storage/xtradb/include/srv0srv.h @@ -159,6 +159,10 @@ extern ulint srv_buf_pool_curr_size; /*!< current size in bytes */ extern ulint srv_mem_pool_size; extern ulint srv_lock_table_size; +extern uint srv_buffer_pool_shm_key; +extern ibool srv_buffer_pool_shm_is_reused; +extern ibool srv_buffer_pool_shm_checksum; + extern ibool srv_thread_concurrency_timer_based; extern ulint srv_n_file_io_threads; diff --git a/storage/xtradb/include/srv0start.h b/storage/xtradb/include/srv0start.h index 8abf15da9c1..7eb7e96bd50 100644 --- a/storage/xtradb/include/srv0start.h +++ b/storage/xtradb/include/srv0start.h @@ -131,4 +131,7 @@ extern enum srv_shutdown_state srv_shutdown_state; /** Log 'spaces' have id's >= this */ #define SRV_LOG_SPACE_FIRST_ID 0xFFFFFFF0UL +/** reserved for extra system tables */ +#define SRV_EXTRA_SYS_SPACE_FIRST_ID 0xFFFFFFE0UL + #endif diff --git a/storage/xtradb/include/trx0i_s.h b/storage/xtradb/include/trx0i_s.h index 7bd4e1b88c8..48d41038ea4 100644 --- a/storage/xtradb/include/trx0i_s.h +++ b/storage/xtradb/include/trx0i_s.h @@ -110,6 +110,8 @@ struct i_s_trx_row_struct { /*!< thd_get_thread_id() */ const char* trx_query; /*!< MySQL statement being executed in the transaction */ + struct charset_info_st* trx_query_cs; /*!< charset encode the MySQL + statement */ }; /** This structure represents INFORMATION_SCHEMA.innodb_lock_waits row */ diff --git a/storage/xtradb/include/trx0sys.h b/storage/xtradb/include/trx0sys.h index 9ef9485b611..2637189f37e 100644 --- a/storage/xtradb/include/trx0sys.h +++ b/storage/xtradb/include/trx0sys.h @@ -470,7 +470,7 @@ trx_sys_file_format_id_to_name( /* Space id and page no where the trx system file copy resides */ #define TRX_SYS_SPACE 0 /* the SYSTEM tablespace */ -#define TRX_DOUBLEWRITE_SPACE 1 /* the doublewrite buffer tablespace if used */ +#define TRX_DOUBLEWRITE_SPACE 0xFFFFFFE0UL /* the doublewrite buffer tablespace if used */ #define TRX_SYS_SPACE_MAX 9 /* reserved max space id for system tablespaces */ #include "fsp0fsp.h" #define TRX_SYS_PAGE_NO FSP_TRX_SYS_PAGE_NO diff --git a/storage/xtradb/include/trx0sys.ic b/storage/xtradb/include/trx0sys.ic index c7b09d4aec2..5e0f07c8b9d 100644 --- a/storage/xtradb/include/trx0sys.ic +++ b/storage/xtradb/include/trx0sys.ic @@ -81,7 +81,7 @@ trx_sys_sys_space( { if (srv_doublewrite_file) { /* several spaces are reserved */ - return((ibool)(space <= TRX_SYS_SPACE_MAX)); + return((ibool)(space == TRX_SYS_SPACE || space == TRX_DOUBLEWRITE_SPACE)); } else { return((ibool)(space == TRX_SYS_SPACE)); } diff --git a/storage/xtradb/include/univ.i b/storage/xtradb/include/univ.i index 487c291ef80..111bda1a3c5 100644 --- a/storage/xtradb/include/univ.i +++ b/storage/xtradb/include/univ.i @@ -46,8 +46,8 @@ Created 1/20/1994 Heikki Tuuri #define INNODB_VERSION_MAJOR 1 #define INNODB_VERSION_MINOR 0 -#define INNODB_VERSION_BUGFIX 13 -#define PERCONA_INNODB_VERSION 11.6 +#define INNODB_VERSION_BUGFIX 15 +#define PERCONA_INNODB_VERSION 12.5 /* The following is the InnoDB version as shown in SELECT plugin_version FROM information_schema.plugins; @@ -303,6 +303,18 @@ number does not include a terminating '\0'. InnoDB probably can handle longer names internally */ #define MAX_TABLE_NAME_LEN 192 +/* The maximum length of a database name. Like MAX_TABLE_NAME_LEN this is +the MySQL's NAME_LEN, see check_and_convert_db_name(). */ +#define MAX_DATABASE_NAME_LEN MAX_TABLE_NAME_LEN + +/* MAX_FULL_NAME_LEN defines the full name path including the +database name and table name. In addition, 14 bytes is added for: + 2 for surrounding quotes around table name + 1 for the separating dot (.) + 9 for the #mysql50# prefix */ +#define MAX_FULL_NAME_LEN \ + (MAX_TABLE_NAME_LEN + MAX_DATABASE_NAME_LEN + 14) + /* UNIVERSAL TYPE DEFINITIONS ========================== @@ -369,6 +381,9 @@ typedef unsigned long long int ullint; /* Maximum value for ib_uint64_t */ #define IB_ULONGLONG_MAX ((ib_uint64_t) (~0ULL)) +/* The 'undefined' value for ullint */ +#define ULLINT_UNDEFINED ((ullint)(-1)) + /* This 'ibool' type is used within Innobase. Remember that different included headers may define 'bool' differently. Do not assume that 'bool' is a ulint! */ #define ibool ulint diff --git a/storage/xtradb/include/ut0lst.h b/storage/xtradb/include/ut0lst.h index 7b15c052978..245dfc226c3 100644 --- a/storage/xtradb/include/ut0lst.h +++ b/storage/xtradb/include/ut0lst.h @@ -257,5 +257,48 @@ do { \ ut_a(ut_list_node_313 == NULL); \ } while (0) +/********************************************************************//** +Align nodes with moving location. +@param NAME the name of the list +@param TYPE node type +@param BASE base node (not a pointer to it) +@param OFFSET offset moved */ +#define UT_LIST_OFFSET(NAME, TYPE, BASE, FADDR, FOFFSET, BOFFSET) \ +do { \ + ulint ut_list_i_313; \ + TYPE* ut_list_node_313; \ + \ + if ((BASE).start) \ + (BASE).start = (void*)((byte*)((BASE).start) \ + + (((void*)((BASE).start) > (void*)FADDR)?FOFFSET:BOFFSET));\ + if ((BASE).end) \ + (BASE).end = (void*)((byte*)((BASE).end) \ + + (((void*)((BASE).end) > (void*)FADDR)?FOFFSET:BOFFSET));\ + \ + ut_list_node_313 = (BASE).start; \ + \ + for (ut_list_i_313 = (BASE).count; ut_list_i_313--; ) { \ + ut_a(ut_list_node_313); \ + if ((ut_list_node_313->NAME).prev) \ + (ut_list_node_313->NAME).prev = (void*)((byte*)((ut_list_node_313->NAME).prev)\ + + (((void*)((ut_list_node_313->NAME).prev) > (void*)FADDR)?FOFFSET:BOFFSET));\ + if ((ut_list_node_313->NAME).next) \ + (ut_list_node_313->NAME).next = (void*)((byte*)((ut_list_node_313->NAME).next)\ + + (((void*)((ut_list_node_313->NAME).next)> (void*)FADDR)?FOFFSET:BOFFSET));\ + ut_list_node_313 = (ut_list_node_313->NAME).next; \ + } \ + \ + ut_a(ut_list_node_313 == NULL); \ + \ + ut_list_node_313 = (BASE).end; \ + \ + for (ut_list_i_313 = (BASE).count; ut_list_i_313--; ) { \ + ut_a(ut_list_node_313); \ + ut_list_node_313 = (ut_list_node_313->NAME).prev; \ + } \ + \ + ut_a(ut_list_node_313 == NULL); \ +} while (0) + #endif diff --git a/storage/xtradb/include/ut0vec.h b/storage/xtradb/include/ut0vec.h index a770f671cfc..0f8b955b098 100644 --- a/storage/xtradb/include/ut0vec.h +++ b/storage/xtradb/include/ut0vec.h @@ -94,6 +94,25 @@ ib_vector_get( ulint n); /*!< in: element index to get */ /****************************************************************//** +Get last element. The vector must not be empty. +@return last element */ +UNIV_INLINE +void* +ib_vector_get_last( +/*===============*/ + ib_vector_t* vec); /*!< in: vector */ + +/****************************************************************//** +Set the n'th element. */ +UNIV_INLINE +void +ib_vector_set( +/*==========*/ + ib_vector_t* vec, /*!< in/out: vector */ + ulint n, /*!< in: element index to set */ + void* elem); /*!< in: data element */ + +/****************************************************************//** Remove the last element from the vector. */ UNIV_INLINE void* diff --git a/storage/xtradb/include/ut0vec.ic b/storage/xtradb/include/ut0vec.ic index 02e881f9bca..34c858868ce 100644 --- a/storage/xtradb/include/ut0vec.ic +++ b/storage/xtradb/include/ut0vec.ic @@ -51,6 +51,35 @@ ib_vector_get( } /****************************************************************//** +Get last element. The vector must not be empty. +@return last element */ +UNIV_INLINE +void* +ib_vector_get_last( +/*===============*/ + ib_vector_t* vec) /*!< in: vector */ +{ + ut_a(vec->used > 0); + + return(vec->data[vec->used - 1]); +} + +/****************************************************************//** +Set the n'th element. */ +UNIV_INLINE +void +ib_vector_set( +/*==========*/ + ib_vector_t* vec, /*!< in/out: vector */ + ulint n, /*!< in: element index to set */ + void* elem) /*!< in: data element */ +{ + ut_a(n < vec->used); + + vec->data[n] = elem; +} + +/****************************************************************//** Remove the last element from the vector. @return last vector element */ UNIV_INLINE diff --git a/storage/xtradb/lock/lock0lock.c b/storage/xtradb/lock/lock0lock.c index 1ded67d9147..4fcb5b2c522 100644 --- a/storage/xtradb/lock/lock0lock.c +++ b/storage/xtradb/lock/lock0lock.c @@ -3632,6 +3632,80 @@ lock_table_create( } /*************************************************************//** +Pops autoinc lock requests from the transaction's autoinc_locks. We +handle the case where there are gaps in the array and they need to +be popped off the stack. */ +UNIV_INLINE +void +lock_table_pop_autoinc_locks( +/*=========================*/ + trx_t* trx) /*!< in/out: transaction that owns the AUTOINC locks */ +{ + ut_ad(mutex_own(&kernel_mutex)); + ut_ad(!ib_vector_is_empty(trx->autoinc_locks)); + + /* Skip any gaps, gaps are NULL lock entries in the + trx->autoinc_locks vector. */ + + do { + ib_vector_pop(trx->autoinc_locks); + + if (ib_vector_is_empty(trx->autoinc_locks)) { + return; + } + + } while (ib_vector_get_last(trx->autoinc_locks) == NULL); +} + +/*************************************************************//** +Removes an autoinc lock request from the transaction's autoinc_locks. */ +UNIV_INLINE +void +lock_table_remove_autoinc_lock( +/*===========================*/ + lock_t* lock, /*!< in: table lock */ + trx_t* trx) /*!< in/out: transaction that owns the lock */ +{ + lock_t* autoinc_lock; + lint i = ib_vector_size(trx->autoinc_locks) - 1; + + ut_ad(mutex_own(&kernel_mutex)); + ut_ad(lock_get_mode(lock) == LOCK_AUTO_INC); + ut_ad(lock_get_type_low(lock) & LOCK_TABLE); + ut_ad(!ib_vector_is_empty(trx->autoinc_locks)); + + /* With stored functions and procedures the user may drop + a table within the same "statement". This special case has + to be handled by deleting only those AUTOINC locks that were + held by the table being dropped. */ + + autoinc_lock = ib_vector_get(trx->autoinc_locks, i); + + /* This is the default fast case. */ + + if (autoinc_lock == lock) { + lock_table_pop_autoinc_locks(trx); + } else { + /* The last element should never be NULL */ + ut_a(autoinc_lock != NULL); + + /* Handle freeing the locks from within the stack. */ + + while (--i >= 0) { + autoinc_lock = ib_vector_get(trx->autoinc_locks, i); + + if (UNIV_LIKELY(autoinc_lock == lock)) { + ib_vector_set(trx->autoinc_locks, i, NULL); + return; + } + } + + /* Must find the autoinc lock. */ + ut_error; + } +} + +/*************************************************************//** Removes a table lock request from the queue and the trx list of locks; this is a low-level function which does NOT check if waiting requests can now be granted. */ @@ -3670,10 +3744,8 @@ lock_table_remove_low( if (!lock_get_wait(lock) && !ib_vector_is_empty(trx->autoinc_locks)) { - lock_t* autoinc_lock; - autoinc_lock = ib_vector_pop(trx->autoinc_locks); - ut_a(autoinc_lock == lock); + lock_table_remove_autoinc_lock(lock, trx); } ut_a(table->n_waiting_or_granted_auto_inc_locks > 0); diff --git a/storage/xtradb/log/log0log.c b/storage/xtradb/log/log0log.c index 3f14d84ac72..c39f60bd4b9 100644 --- a/storage/xtradb/log/log0log.c +++ b/storage/xtradb/log/log0log.c @@ -1184,6 +1184,9 @@ log_group_file_header_flush( /* Wipe over possible label of ibbackup --restore */ memcpy(buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP, " ", 4); + mach_write_to_4(buf + LOG_FILE_OS_FILE_LOG_BLOCK_SIZE, + srv_log_block_size); + dest_offset = nth_file * group->file_size; #ifdef UNIV_DEBUG @@ -1777,9 +1780,7 @@ log_group_checkpoint( ulint i; ut_ad(mutex_own(&(log_sys->mutex))); -#if LOG_CHECKPOINT_SIZE > OS_FILE_LOG_BLOCK_SIZE -# error "LOG_CHECKPOINT_SIZE > OS_FILE_LOG_BLOCK_SIZE" -#endif + ut_a(LOG_CHECKPOINT_SIZE <= OS_FILE_LOG_BLOCK_SIZE); buf = group->checkpoint_buf; diff --git a/storage/xtradb/log/log0recv.c b/storage/xtradb/log/log0recv.c index 2aaffe4c031..895067c700a 100644 --- a/storage/xtradb/log/log0recv.c +++ b/storage/xtradb/log/log0recv.c @@ -2260,7 +2260,7 @@ recv_report_corrupt_log( "InnoDB: far enough in recovery! Please run CHECK TABLE\n" "InnoDB: on your InnoDB tables to check that they are ok!\n" "InnoDB: If mysqld crashes after this recovery, look at\n" - "InnoDB: " REFMAN "forcing-recovery.html\n" + "InnoDB: " REFMAN "forcing-innodb-recovery.html\n" "InnoDB: about forcing recovery.\n", stderr); fflush(stderr); @@ -2899,6 +2899,7 @@ recv_init_crash_recovery(void) /*==========================*/ { ut_a(!recv_needed_recovery); + ut_a(!srv_buffer_pool_shm_is_reused); recv_needed_recovery = TRUE; @@ -2956,6 +2957,7 @@ recv_recovery_from_checkpoint_start_func( log_group_t* max_cp_group; log_group_t* up_to_date_group; ulint max_cp_field; + ulint log_hdr_log_block_size; ib_uint64_t checkpoint_lsn; ib_uint64_t checkpoint_no; ib_uint64_t old_scanned_lsn; @@ -2966,9 +2968,10 @@ recv_recovery_from_checkpoint_start_func( #endif /* UNIV_LOG_ARCHIVE */ byte* buf; byte* log_hdr_buf; - byte log_hdr_buf_base[LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE]; + byte *log_hdr_buf_base; ulint err; + log_hdr_buf_base= alloca(LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE); log_hdr_buf = ut_align(log_hdr_buf_base, OS_FILE_LOG_BLOCK_SIZE); #ifdef UNIV_LOG_ARCHIVE @@ -3057,6 +3060,20 @@ recv_recovery_from_checkpoint_start_func( log_hdr_buf, max_cp_group); } + log_hdr_log_block_size + = mach_read_from_4(log_hdr_buf + LOG_FILE_OS_FILE_LOG_BLOCK_SIZE); + if (log_hdr_log_block_size == 0) { + /* 0 means default value */ + log_hdr_log_block_size = 512; + } + if (log_hdr_log_block_size != srv_log_block_size) { + fprintf(stderr, + "InnoDB: Error: The block size of ib_logfile (%lu) " + "is not equal to innodb_log_block_size.\n", + log_hdr_log_block_size); + return(DB_ERROR); + } + #ifdef UNIV_LOG_ARCHIVE group = UT_LIST_GET_FIRST(log_sys->log_groups); diff --git a/storage/xtradb/os/os0proc.c b/storage/xtradb/os/os0proc.c index 48922886f23..4567d96b6f4 100644 --- a/storage/xtradb/os/os0proc.c +++ b/storage/xtradb/os/os0proc.c @@ -229,3 +229,173 @@ os_mem_free_large( } #endif } + +/****************************************************************//** +Allocates or attaches and reuses shared memory segment. +The content is not cleared automatically. +@return allocated memory */ +UNIV_INTERN +void* +os_shm_alloc( +/*=========*/ + ulint* n, /*!< in/out: number of bytes */ + uint key, + ibool* is_new) +{ + void* ptr; +#if defined HAVE_SYS_IPC_H && HAVE_SYS_SHM_H + ulint size; + int shmid; + + *is_new = FALSE; + fprintf(stderr, + "InnoDB: The shared memory segment containing the buffer pool is: key %#x (%d).\n", + key, key); +# if defined HAVE_LARGE_PAGES && defined UNIV_LINUX + if (!os_use_large_pages || !os_large_page_size) { + goto skip; + } + + /* Align block size to os_large_page_size */ + ut_ad(ut_is_2pow(os_large_page_size)); + size = ut_2pow_round(*n + (os_large_page_size - 1), + os_large_page_size); + + shmid = shmget((key_t)key, (size_t)size, + IPC_CREAT | IPC_EXCL | SHM_HUGETLB | SHM_R | SHM_W); + if (shmid < 0) { + if (errno == EEXIST) { + fprintf(stderr, + "InnoDB: HugeTLB: The shared memory segment exists.\n"); + shmid = shmget((key_t)key, (size_t)size, + SHM_HUGETLB | SHM_R | SHM_W); + if (shmid < 0) { + fprintf(stderr, + "InnoDB: HugeTLB: Warning: Failed to allocate %lu bytes. (reuse) errno %d\n", + size, errno); + goto skip; + } else { + fprintf(stderr, + "InnoDB: HugeTLB: The existent shared memory segment is used.\n"); + } + } else { + fprintf(stderr, + "InnoDB: HugeTLB: Warning: Failed to allocate %lu bytes. (new) errno %d\n", + size, errno); + goto skip; + } + } else { + *is_new = TRUE; + fprintf(stderr, + "InnoDB: HugeTLB: A new shared memory segment has been created .\n"); + } + + ptr = shmat(shmid, NULL, 0); + if (ptr == (void *)-1) { + fprintf(stderr, + "InnoDB: HugeTLB: Warning: Failed to attach shared memory segment, errno %d\n", + errno); + ptr = NULL; + } + + if (ptr) { + *n = size; + os_fast_mutex_lock(&ut_list_mutex); + ut_total_allocated_memory += size; + os_fast_mutex_unlock(&ut_list_mutex); + UNIV_MEM_ALLOC(ptr, size); + return(ptr); + } +skip: + *is_new = FALSE; +# endif /* HAVE_LARGE_PAGES && defined UNIV_LINUX */ +# ifdef HAVE_GETPAGESIZE + size = getpagesize(); +# else + size = UNIV_PAGE_SIZE; +# endif + /* Align block size to system page size */ + ut_ad(ut_is_2pow(size)); + size = *n = ut_2pow_round(*n + (size - 1), size); + + shmid = shmget((key_t)key, (size_t)size, + IPC_CREAT | IPC_EXCL | SHM_R | SHM_W); + if (shmid < 0) { + if (errno == EEXIST) { + fprintf(stderr, + "InnoDB: A shared memory segment containing the buffer pool seems to already exist.\n"); + shmid = shmget((key_t)key, (size_t)size, + SHM_R | SHM_W); + if (shmid < 0) { + fprintf(stderr, + "InnoDB: Warning: Failed to allocate %lu bytes. (reuse) errno %d\n", + size, errno); + ptr = NULL; + goto end; + } else { + fprintf(stderr, + "InnoDB: The existent shared memory segment is used.\n"); + } + } else { + fprintf(stderr, + "InnoDB: Warning: Failed to allocate %lu bytes. (new) errno %d\n", + size, errno); + ptr = NULL; + goto end; + } + } else { + *is_new = TRUE; + fprintf(stderr, + "InnoDB: A new shared memory segment has been created.\n"); + } + + ptr = shmat(shmid, NULL, 0); + if (ptr == (void *)-1) { + fprintf(stderr, + "InnoDB: Warning: Failed to attach shared memory segment, errno %d\n", + errno); + ptr = NULL; + } + + if (ptr) { + *n = size; + os_fast_mutex_lock(&ut_list_mutex); + ut_total_allocated_memory += size; + os_fast_mutex_unlock(&ut_list_mutex); + UNIV_MEM_ALLOC(ptr, size); + } +end: +#else /* HAVE_SYS_IPC_H && HAVE_SYS_SHM_H */ + fprintf(stderr, "InnoDB: shared memory segment is not supported.\n"); + ptr = NULL; +#endif /* HAVE_SYS_IPC_H && HAVE_SYS_SHM_H */ + return(ptr); +} + +/****************************************************************//** +Detach shared memory segment. */ +UNIV_INTERN +void +os_shm_free( +/*========*/ + void *ptr, /*!< in: pointer returned by + os_shm_alloc() */ + ulint size) /*!< in: size returned by + os_shm_alloc() */ +{ + os_fast_mutex_lock(&ut_list_mutex); + ut_a(ut_total_allocated_memory >= size); + os_fast_mutex_unlock(&ut_list_mutex); + +#if defined HAVE_SYS_IPC_H && HAVE_SYS_SHM_H + if (!shmdt(ptr)) { + os_fast_mutex_lock(&ut_list_mutex); + ut_a(ut_total_allocated_memory >= size); + ut_total_allocated_memory -= size; + os_fast_mutex_unlock(&ut_list_mutex); + UNIV_MEM_FREE(ptr, size); + } +#else /* HAVE_SYS_IPC_H && HAVE_SYS_SHM_H */ + fprintf(stderr, "InnoDB: shared memory segment is not supported.\n"); +#endif /* HAVE_SYS_IPC_H && HAVE_SYS_SHM_H */ +} diff --git a/storage/xtradb/os/os0sync.c b/storage/xtradb/os/os0sync.c index f9ab58c2ee4..dba997927cb 100644 --- a/storage/xtradb/os/os0sync.c +++ b/storage/xtradb/os/os0sync.c @@ -442,12 +442,12 @@ os_event_wait_time( return(1000000); /* dummy value to eliminate compiler warn. */ } #else - int err; - int ret = 0; - ulint tmp; + int err; + int ret = 0; + ulint tmp; ib_int64_t old_count; - struct timeval tv_start; - struct timespec timeout; + struct timeval tv_start; + struct timespec timeout; if (wtime == OS_SYNC_INFINITE_TIME) { os_event_wait(event); diff --git a/storage/xtradb/plug.in b/storage/xtradb/plug.in index b68b59725d4..2e0c873094a 100644 --- a/storage/xtradb/plug.in +++ b/storage/xtradb/plug.in @@ -140,17 +140,24 @@ MYSQL_PLUGIN_ACTIONS(xtradb, [ ) AC_MSG_CHECKING(whether Solaris libc atomic functions are available) - # either define HAVE_IB_SOLARIS_ATOMICS or not - AC_CHECK_FUNCS(atomic_cas_ulong \ + # Define HAVE_IB_SOLARIS_ATOMICS if _all_ of the following + # functions are present. + AC_CHECK_FUNCS(atomic_add_long_nv \ atomic_cas_32 \ atomic_cas_64 \ - atomic_add_long_nv \ - atomic_swap_uchar, - - AC_DEFINE([HAVE_IB_SOLARIS_ATOMICS], [1], - [Define to 1 if Solaris libc atomic functions \ - are available]) - ) + atomic_cas_ulong \ + atomic_swap_uchar) + + if test "${ac_cv_func_atomic_add_long_nv}" = "yes" -a \ + "${ac_cv_func_atomic_cas_32}" = "yes" -a \ + "${ac_cv_func_atomic_cas_64}" = "yes" -a \ + "${ac_cv_func_atomic_cas_ulong}" = "yes" -a \ + "${ac_cv_func_atomic_swap_uchar}" = "yes" ; then + + AC_DEFINE([HAVE_IB_SOLARIS_ATOMICS], [1], + [Define to 1 if Solaris libc atomic functions are available] + ) + fi AC_MSG_CHECKING(whether pthread_t can be used by Solaris libc atomic functions) # either define HAVE_IB_ATOMIC_PTHREAD_T_SOLARIS or not diff --git a/storage/xtradb/row/row0ins.c b/storage/xtradb/row/row0ins.c index d4925e46f97..3372e1480b5 100644 --- a/storage/xtradb/row/row0ins.c +++ b/storage/xtradb/row/row0ins.c @@ -1787,7 +1787,7 @@ ulint row_ins_duplicate_error_in_clust( /*=============================*/ btr_cur_t* cursor, /*!< in: B-tree cursor */ - dtuple_t* entry, /*!< in: entry to insert */ + const dtuple_t* entry, /*!< in: entry to insert */ que_thr_t* thr, /*!< in: query thread */ mtr_t* mtr) /*!< in: mtr */ { @@ -1983,7 +1983,7 @@ row_ins_index_entry_low( depending on whether we wish optimistic or pessimistic descent down the index tree */ dict_index_t* index, /*!< in: index */ - dtuple_t* entry, /*!< in: index entry to insert */ + dtuple_t* entry, /*!< in/out: index entry to insert */ ulint n_ext, /*!< in: number of externally stored columns */ que_thr_t* thr) /*!< in: query thread */ { @@ -2164,9 +2164,10 @@ ulint row_ins_index_entry( /*================*/ dict_index_t* index, /*!< in: index */ - dtuple_t* entry, /*!< in: index entry to insert */ + dtuple_t* entry, /*!< in/out: index entry to insert */ ulint n_ext, /*!< in: number of externally stored columns */ - ibool foreign,/*!< in: TRUE=check foreign key constraints */ + ibool foreign,/*!< in: TRUE=check foreign key constraints + (foreign=FALSE only during CREATE INDEX) */ que_thr_t* thr) /*!< in: query thread */ { ulint err; diff --git a/storage/xtradb/row/row0merge.c b/storage/xtradb/row/row0merge.c index 65102851bdf..782c029fbcc 100644 --- a/storage/xtradb/row/row0merge.c +++ b/storage/xtradb/row/row0merge.c @@ -2052,7 +2052,7 @@ row_merge_drop_index( /* Replace this index with another equivalent index for all foreign key constraints on this table where this index is used */ - dict_table_replace_index_in_foreign_list(table, index); + dict_table_replace_index_in_foreign_list(table, index, trx); dict_index_remove_from_cache(table, index); trx->op_info = ""; @@ -2350,7 +2350,7 @@ row_merge_rename_tables( { ulint err = DB_ERROR; pars_info_t* info; - char old_name[MAX_TABLE_NAME_LEN + 1]; + char old_name[MAX_FULL_NAME_LEN + 1]; ut_ad(trx->mysql_thread_id == os_thread_get_curr_id()); ut_ad(old_table != new_table); @@ -2365,7 +2365,7 @@ row_merge_rename_tables( ut_print_timestamp(stderr); fprintf(stderr, "InnoDB: too long table name: '%s', " "max length is %d\n", old_table->name, - MAX_TABLE_NAME_LEN); + MAX_FULL_NAME_LEN); ut_error; } diff --git a/storage/xtradb/row/row0mysql.c b/storage/xtradb/row/row0mysql.c index e9b272abab4..205aead8efe 100644 --- a/storage/xtradb/row/row0mysql.c +++ b/storage/xtradb/row/row0mysql.c @@ -574,7 +574,7 @@ handle_new_error: "InnoDB: If the mysqld server crashes" " after the startup or when\n" "InnoDB: you dump the tables, look at\n" - "InnoDB: " REFMAN "forcing-recovery.html" + "InnoDB: " REFMAN "forcing-innodb-recovery.html" " for help.\n", stderr); break; case DB_FOREIGN_EXCEED_MAX_CASCADE: @@ -875,7 +875,8 @@ row_update_statistics_if_needed( if (counter > 2000000000 || ((ib_int64_t)counter > 16 + table->stat_n_rows / 16)) { - dict_update_statistics(table, TRUE); + dict_update_statistics(table, FALSE /* update even if stats + are initialized */, TRUE); } } @@ -3014,7 +3015,8 @@ next_rec: dict_table_autoinc_lock(table); dict_table_autoinc_initialize(table, 1); dict_table_autoinc_unlock(table); - dict_update_statistics(table, TRUE); + dict_update_statistics(table, FALSE /* update even if stats are + initialized */, TRUE); trx_commit_for_mysql(trx); diff --git a/storage/xtradb/row/row0purge.c b/storage/xtradb/row/row0purge.c index 31b255cf2d4..8bf2ae0f458 100644 --- a/storage/xtradb/row/row0purge.c +++ b/storage/xtradb/row/row0purge.c @@ -413,7 +413,7 @@ row_purge_upd_exist_or_extern( while (node->index != NULL) { index = node->index; - if (row_upd_changes_ord_field_binary(NULL, node->index, + if (row_upd_changes_ord_field_binary(NULL, NULL, node->index, node->update)) { /* Build the older version of the index entry */ entry = row_build_index_entry(node->row, NULL, diff --git a/storage/xtradb/row/row0sel.c b/storage/xtradb/row/row0sel.c index 82ad30bb390..1ced125b7bd 100644 --- a/storage/xtradb/row/row0sel.c +++ b/storage/xtradb/row/row0sel.c @@ -106,6 +106,18 @@ row_sel_sec_rec_is_for_blob( ulint len; byte buf[DICT_MAX_INDEX_COL_LEN]; + ut_a(clust_len >= BTR_EXTERN_FIELD_REF_SIZE); + + if (UNIV_UNLIKELY + (!memcmp(clust_field + clust_len - BTR_EXTERN_FIELD_REF_SIZE, + field_ref_zero, BTR_EXTERN_FIELD_REF_SIZE))) { + /* The externally stored field was not written yet. + This record should only be seen by + recv_recovery_rollback_active() or any + TRX_ISO_READ_UNCOMMITTED transactions. */ + return(FALSE); + } + len = btr_copy_externally_stored_field_prefix(buf, sizeof buf, zip_size, clust_field, clust_len); diff --git a/storage/xtradb/row/row0umod.c b/storage/xtradb/row/row0umod.c index 5998dadd16d..562f8093c38 100644 --- a/storage/xtradb/row/row0umod.c +++ b/storage/xtradb/row/row0umod.c @@ -668,19 +668,18 @@ row_undo_mod_upd_exist_sec( while (node->index != NULL) { index = node->index; - if (row_upd_changes_ord_field_binary(node->row, node->index, - node->update)) { + if (row_upd_changes_ord_field_binary( + node->row, node->ext, node->index, node->update)) { /* Build the newest version of the index entry */ entry = row_build_index_entry(node->row, node->ext, index, heap); if (UNIV_UNLIKELY(!entry)) { /* The server must have crashed in - row_upd_clust_rec_by_insert(), in - row_ins_index_entry_low() before - btr_store_big_rec_extern_fields() - has written the externally stored columns - (BLOBs) of the new clustered index entry. */ + row_upd_clust_rec_by_insert() before + the updated externally stored columns (BLOBs) + of the new clustered index entry were + written. */ /* The table must be in DYNAMIC or COMPRESSED format. REDUNDANT and COMPACT formats diff --git a/storage/xtradb/row/row0upd.c b/storage/xtradb/row/row0upd.c index 444003ba3f0..e1c78949603 100644 --- a/storage/xtradb/row/row0upd.c +++ b/storage/xtradb/row/row0upd.c @@ -371,8 +371,8 @@ UNIV_INTERN void row_upd_index_entry_sys_field( /*==========================*/ - const dtuple_t* entry, /*!< in: index entry, where the memory buffers - for sys fields are already allocated: + dtuple_t* entry, /*!< in/out: index entry, where the memory + buffers for sys fields are already allocated: the function just copies the new values to them */ dict_index_t* index, /*!< in: clustered index */ @@ -1198,20 +1198,21 @@ row_upd_changes_ord_field_binary( row and the data values in update are not known when this function is called, e.g., at compile time */ + const row_ext_t*ext, /*!< NULL, or prefixes of the externally + stored columns in the old row */ dict_index_t* index, /*!< in: index of the record */ const upd_t* update) /*!< in: update vector for the row; NOTE: the field numbers in this MUST be clustered index positions! */ { - ulint n_unique; - ulint n_upd_fields; - ulint i, j; - dict_index_t* clust_index; + ulint n_unique; + ulint i; + const dict_index_t* clust_index; - ut_ad(update && index); + ut_ad(update); + ut_ad(index); n_unique = dict_index_get_n_unique(index); - n_upd_fields = upd_get_n_fields(update); clust_index = dict_table_get_first_index(index->table); @@ -1219,33 +1220,72 @@ row_upd_changes_ord_field_binary( const dict_field_t* ind_field; const dict_col_t* col; - ulint col_pos; ulint col_no; + const upd_field_t* upd_field; + const dfield_t* dfield; + dfield_t dfield_ext; + ulint dfield_len= 0; + const byte* buf; ind_field = dict_index_get_nth_field(index, i); col = dict_field_get_col(ind_field); - col_pos = dict_col_get_clust_pos(col, clust_index); col_no = dict_col_get_no(col); - for (j = 0; j < n_upd_fields; j++) { + upd_field = upd_get_field_by_field_no( + update, dict_col_get_clust_pos(col, clust_index)); - const upd_field_t* upd_field - = upd_get_nth_field(update, j); + if (upd_field == NULL) { + continue; + } + + if (row == NULL) { + ut_ad(ext == NULL); + return(TRUE); + } - /* Note that if the index field is a column prefix - then it may be that row does not contain an externally - stored part of the column value, and we cannot compare - the datas */ + dfield = dtuple_get_nth_field(row, col_no); - if (col_pos == upd_field->field_no - && (row == NULL - || ind_field->prefix_len > 0 - || !dfield_datas_are_binary_equal( - dtuple_get_nth_field(row, col_no), - &(upd_field->new_val)))) { + /* This treatment of column prefix indexes is loosely + based on row_build_index_entry(). */ - return(TRUE); + if (UNIV_LIKELY(ind_field->prefix_len == 0) + || dfield_is_null(dfield)) { + /* do nothing special */ + } else if (UNIV_LIKELY_NULL(ext)) { + /* See if the column is stored externally. */ + buf = row_ext_lookup(ext, col_no, &dfield_len); + + ut_ad(col->ord_part); + + if (UNIV_LIKELY_NULL(buf)) { + if (UNIV_UNLIKELY(buf == field_ref_zero)) { + /* This should never happen, but + we try to fail safe here. */ + ut_ad(0); + return(TRUE); + } + + goto copy_dfield; } + } else if (dfield_is_ext(dfield)) { + dfield_len = dfield_get_len(dfield); + ut_a(dfield_len > BTR_EXTERN_FIELD_REF_SIZE); + dfield_len -= BTR_EXTERN_FIELD_REF_SIZE; + ut_a(dict_index_is_clust(index) + || ind_field->prefix_len <= dfield_len); + buf = dfield_get_data(dfield); +copy_dfield: + ut_a(dfield_len > 0); + dfield_copy(&dfield_ext, dfield); + dfield_set_data(&dfield_ext, buf, dfield_len); + dfield = &dfield_ext; + } + + if (!dfield_datas_are_binary_equal( + dfield, &upd_field->new_val, + ind_field->prefix_len)) { + + return(TRUE); } } @@ -1329,7 +1369,7 @@ row_upd_changes_first_fields_binary( if (col_pos == upd_field->field_no && !dfield_datas_are_binary_equal( dtuple_get_nth_field(entry, i), - &(upd_field->new_val))) { + &upd_field->new_val, 0)) { return(TRUE); } @@ -1568,14 +1608,99 @@ row_upd_sec_step( ut_ad(!dict_index_is_clust(node->index)); if (node->state == UPD_NODE_UPDATE_ALL_SEC - || row_upd_changes_ord_field_binary(node->row, node->index, - node->update)) { + || row_upd_changes_ord_field_binary(node->row, node->ext, + node->index, node->update)) { return(row_upd_sec_index_entry(node, thr)); } return(DB_SUCCESS); } +#ifdef UNIV_DEBUG +# define row_upd_clust_rec_by_insert_inherit(rec,offsets,entry,update) \ + row_upd_clust_rec_by_insert_inherit_func(rec,offsets,entry,update) +#else /* UNIV_DEBUG */ +# define row_upd_clust_rec_by_insert_inherit(rec,offsets,entry,update) \ + row_upd_clust_rec_by_insert_inherit_func(entry,update) +#endif /* UNIV_DEBUG */ +/*******************************************************************//** +Mark non-updated off-page columns inherited when the primary key is +updated. We must mark them as inherited in entry, so that they are not +freed in a rollback. A limited version of this function used to be +called btr_cur_mark_dtuple_inherited_extern(). +@return TRUE if any columns were inherited */ +static __attribute__((warn_unused_result)) +ibool +row_upd_clust_rec_by_insert_inherit_func( +/*=====================================*/ +#ifdef UNIV_DEBUG + const rec_t* rec, /*!< in: old record, or NULL */ + const ulint* offsets,/*!< in: rec_get_offsets(rec), or NULL */ +#endif /* UNIV_DEBUG */ + dtuple_t* entry, /*!< in/out: updated entry to be + inserted into the clustered index */ + const upd_t* update) /*!< in: update vector */ +{ + ibool inherit = FALSE; + ulint i; + + ut_ad(!rec == !offsets); + ut_ad(!rec || rec_offs_any_extern(offsets)); + + for (i = 0; i < dtuple_get_n_fields(entry); i++) { + dfield_t* dfield = dtuple_get_nth_field(entry, i); + byte* data; + ulint len; + + ut_ad(!offsets + || !rec_offs_nth_extern(offsets, i) + == !dfield_is_ext(dfield) + || upd_get_field_by_field_no(update, i)); + if (!dfield_is_ext(dfield) + || upd_get_field_by_field_no(update, i)) { + continue; + } + +#ifdef UNIV_DEBUG + if (UNIV_LIKELY(rec != NULL)) { + const byte* rec_data + = rec_get_nth_field(rec, offsets, i, &len); + ut_ad(len == dfield_get_len(dfield)); + ut_ad(len != UNIV_SQL_NULL); + ut_ad(len >= BTR_EXTERN_FIELD_REF_SIZE); + + rec_data += len - BTR_EXTERN_FIELD_REF_SIZE; + + /* The pointer must not be zero. */ + ut_ad(memcmp(rec_data, field_ref_zero, + BTR_EXTERN_FIELD_REF_SIZE)); + /* The BLOB must be owned. */ + ut_ad(!(rec_data[BTR_EXTERN_LEN] + & BTR_EXTERN_OWNER_FLAG)); + } +#endif /* UNIV_DEBUG */ + + len = dfield_get_len(dfield); + ut_a(len != UNIV_SQL_NULL); + ut_a(len >= BTR_EXTERN_FIELD_REF_SIZE); + data = dfield_get_data(dfield); + data += len - BTR_EXTERN_FIELD_REF_SIZE; + /* The pointer must not be zero. */ + ut_a(memcmp(data, field_ref_zero, BTR_EXTERN_FIELD_REF_SIZE)); + /* The BLOB must be owned. */ + ut_a(!(data[BTR_EXTERN_LEN] & BTR_EXTERN_OWNER_FLAG)); + + data[BTR_EXTERN_LEN] |= BTR_EXTERN_INHERITED_FLAG; + /* The BTR_EXTERN_INHERITED_FLAG only matters in + rollback. Purge will always free the extern fields of + a delete-marked row. */ + + inherit = TRUE; + } + + return(inherit); +} + /***********************************************************//** Marks the clustered index record deleted and inserts the updated version of the record to the index. This function should be used when the ordering @@ -1587,21 +1712,23 @@ static ulint row_upd_clust_rec_by_insert( /*========================*/ - upd_node_t* node, /*!< in: row update node */ + upd_node_t* node, /*!< in/out: row update node */ dict_index_t* index, /*!< in: clustered index of the record */ que_thr_t* thr, /*!< in: query thread */ ibool check_ref,/*!< in: TRUE if index may be referenced in a foreign key constraint */ - mtr_t* mtr) /*!< in: mtr; gets committed here */ + mtr_t* mtr) /*!< in/out: mtr; gets committed here */ { - mem_heap_t* heap = NULL; + mem_heap_t* heap; btr_pcur_t* pcur; btr_cur_t* btr_cur; trx_t* trx; dict_table_t* table; dtuple_t* entry; ulint err; - ibool change_ownership = FALSE; + ibool change_ownership = FALSE; + rec_t* rec; + ulint* offsets = NULL; ut_ad(node); ut_ad(dict_index_is_clust(index)); @@ -1611,77 +1738,112 @@ row_upd_clust_rec_by_insert( pcur = node->pcur; btr_cur = btr_pcur_get_btr_cur(pcur); - if (node->state != UPD_NODE_INSERT_CLUSTERED) { - rec_t* rec; - dict_index_t* index; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - ulint* offsets; - rec_offs_init(offsets_); + heap = mem_heap_create(1000); + + entry = row_build_index_entry(node->upd_row, node->upd_ext, + index, heap); + ut_a(entry); - err = btr_cur_del_mark_set_clust_rec(BTR_NO_LOCKING_FLAG, - btr_cur, TRUE, thr, mtr); + row_upd_index_entry_sys_field(entry, index, DATA_TRX_ID, trx->id); + + switch (node->state) { + default: + ut_error; + case UPD_NODE_INSERT_BLOB: + /* A lock wait occurred in row_ins_index_entry() in + the previous invocation of this function. Mark the + off-page columns in the entry inherited. */ + + change_ownership = row_upd_clust_rec_by_insert_inherit( + NULL, NULL, entry, node->update); + ut_a(change_ownership); + /* fall through */ + case UPD_NODE_INSERT_CLUSTERED: + /* A lock wait occurred in row_ins_index_entry() in + the previous invocation of this function. */ + break; + case UPD_NODE_UPDATE_CLUSTERED: + /* This is the first invocation of the function where + we update the primary key. Delete-mark the old record + in the clustered index and prepare to insert a new entry. */ + rec = btr_cur_get_rec(btr_cur); + offsets = rec_get_offsets(rec, index, NULL, + ULINT_UNDEFINED, &heap); + ut_ad(page_rec_is_user_rec(rec)); + + err = btr_cur_del_mark_set_clust_rec( + BTR_NO_LOCKING_FLAG, btr_cur_get_block(btr_cur), + rec, index, offsets, TRUE, thr, mtr); if (err != DB_SUCCESS) { +err_exit: mtr_commit(mtr); + mem_heap_free(heap); return(err); } - /* Mark as not-owned the externally stored fields which the new - row inherits from the delete marked record: purge should not - free those externally stored fields even if the delete marked - record is removed from the index tree, or updated. */ + /* If the the new row inherits externally stored + fields (off-page columns a.k.a. BLOBs) from the + delete-marked old record, mark them disowned by the + old record and owned by the new entry. */ + + if (rec_offs_any_extern(offsets)) { + change_ownership = row_upd_clust_rec_by_insert_inherit( + rec, offsets, entry, node->update); + + if (change_ownership) { + btr_pcur_store_position(pcur, mtr); + } + } - rec = btr_cur_get_rec(btr_cur); - index = dict_table_get_first_index(table); - offsets = rec_get_offsets(rec, index, offsets_, - ULINT_UNDEFINED, &heap); - change_ownership = btr_cur_mark_extern_inherited_fields( - btr_cur_get_page_zip(btr_cur), rec, index, offsets, - node->update, mtr); if (check_ref) { /* NOTE that the following call loses the position of pcur ! */ err = row_upd_check_references_constraints( node, pcur, table, index, offsets, thr, mtr); if (err != DB_SUCCESS) { - mtr_commit(mtr); - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - return(err); + goto err_exit; } } } mtr_commit(mtr); - if (!heap) { - heap = mem_heap_create(500); - } - node->state = UPD_NODE_INSERT_CLUSTERED; + err = row_ins_index_entry(index, entry, + node->upd_ext ? node->upd_ext->n_ext : 0, + TRUE, thr); + node->state = change_ownership + ? UPD_NODE_INSERT_BLOB + : UPD_NODE_INSERT_CLUSTERED; - entry = row_build_index_entry(node->upd_row, node->upd_ext, - index, heap); - ut_a(entry); + if (err == DB_SUCCESS && change_ownership) { + /* Mark the non-updated fields disowned by the old record. */ - row_upd_index_entry_sys_field(entry, index, DATA_TRX_ID, trx->id); + /* NOTE: this transaction has an x-lock on the record + and therefore other transactions cannot modify the + record when we have no latch on the page. In addition, + we assume that other query threads of the same + transaction do not modify the record in the meantime. + Therefore we can assert that the restoration of the + cursor succeeds. */ - if (change_ownership) { - /* If we return from a lock wait, for example, we may have - extern fields marked as not-owned in entry (marked in the - if-branch above). We must unmark them, take the ownership - back. */ + mtr_start(mtr); - btr_cur_unmark_dtuple_extern_fields(entry); + if (!btr_pcur_restore_position(BTR_MODIFY_LEAF, pcur, mtr)) { + ut_error; + } - /* We must mark non-updated extern fields in entry as - inherited, so that a possible rollback will not free them. */ + rec = btr_cur_get_rec(btr_cur); + offsets = rec_get_offsets(rec, index, offsets, + ULINT_UNDEFINED, &heap); + ut_ad(page_rec_is_user_rec(rec)); - btr_cur_mark_dtuple_inherited_extern(entry, node->update); + btr_cur_disown_inherited_fields( + btr_cur_get_page_zip(btr_cur), + rec, index, offsets, node->update, mtr); + + mtr_commit(mtr); } - err = row_ins_index_entry(index, entry, - node->upd_ext ? node->upd_ext->n_ext : 0, - TRUE, thr); mem_heap_free(heap); return(err); @@ -1825,8 +1987,9 @@ row_upd_del_mark_clust_rec( /* Mark the clustered index record deleted; we do not have to check locks, because we assume that we have an x-lock on the record */ - err = btr_cur_del_mark_set_clust_rec(BTR_NO_LOCKING_FLAG, - btr_cur, TRUE, thr, mtr); + err = btr_cur_del_mark_set_clust_rec( + BTR_NO_LOCKING_FLAG, btr_cur_get_block(btr_cur), + btr_cur_get_rec(btr_cur), index, offsets, TRUE, thr, mtr); if (err == DB_SUCCESS && check_ref) { /* NOTE that the following call loses the position of pcur ! */ @@ -1973,7 +2136,8 @@ exit_func: row_upd_store_row(node); - if (row_upd_changes_ord_field_binary(node->row, index, node->update)) { + if (row_upd_changes_ord_field_binary(node->row, node->ext, index, + node->update)) { /* Update causes an ordering field (ordering fields within the B-tree) of the clustered index record to change: perform @@ -2042,7 +2206,8 @@ row_upd( } if (node->state == UPD_NODE_UPDATE_CLUSTERED - || node->state == UPD_NODE_INSERT_CLUSTERED) { + || node->state == UPD_NODE_INSERT_CLUSTERED + || node->state == UPD_NODE_INSERT_BLOB) { log_free_check(); err = row_upd_clust_step(node, thr); diff --git a/storage/xtradb/srv/srv0srv.c b/storage/xtradb/srv/srv0srv.c index 637b9e2df28..3184308f573 100644 --- a/storage/xtradb/srv/srv0srv.c +++ b/storage/xtradb/srv/srv0srv.c @@ -211,6 +211,11 @@ UNIV_INTERN ulint srv_buf_pool_curr_size = 0; UNIV_INTERN ulint srv_mem_pool_size = ULINT_MAX; UNIV_INTERN ulint srv_lock_table_size = ULINT_MAX; +/* key value for shm */ +UNIV_INTERN uint srv_buffer_pool_shm_key = 0; +UNIV_INTERN ibool srv_buffer_pool_shm_is_reused = FALSE; +UNIV_INTERN ibool srv_buffer_pool_shm_checksum = TRUE; + /* This parameter is deprecated. Use srv_n_io_[read|write]_threads instead. */ UNIV_INTERN ulint srv_n_file_io_threads = ULINT_MAX; @@ -221,6 +226,9 @@ UNIV_INTERN ulint srv_n_write_io_threads = ULINT_MAX; UNIV_INTERN ulint srv_page_size_shift = 0; UNIV_INTERN ulint srv_page_size = 0; +/* The log block size */ +UNIV_INTERN ulint srv_log_block_size = 0; + /* User settable value of the number of pages that must be present in the buffer cache and accessed sequentially for InnoDB to trigger a readahead request. */ @@ -2683,15 +2691,26 @@ srv_master_thread( ulint n_pages_purged = 0; ulint n_bytes_merged; ulint n_pages_flushed; + ulint n_pages_flushed_prev = 0; ulint n_bytes_archived; ulint n_tables_to_drop; ulint n_ios; ulint n_ios_old; ulint n_ios_very_old; ulint n_pend_ios; + ulint next_itr_time; + ulint prev_adaptive_checkpoint = ULINT_UNDEFINED; + ulint inner_loop = 0; ibool skip_sleep = FALSE; ulint i; + struct t_prev_flush_info_struct { + ulint count; + unsigned space:32; + unsigned offset:32; + ib_uint64_t oldest_modification; + } prev_flush_info; + ib_uint64_t lsn_old; ib_uint64_t oldest_lsn; @@ -2704,6 +2723,7 @@ srv_master_thread( srv_main_thread_id = os_thread_pf(os_thread_get_curr_id()); + memset(&prev_flush_info, 0, sizeof(prev_flush_info)); mutex_enter(&kernel_mutex); srv_table_reserve_slot(SRV_MASTER); @@ -2723,6 +2743,8 @@ loop: n_ios_very_old = log_sys->n_log_ios + buf_pool->stat.n_pages_read + buf_pool->stat.n_pages_written; + n_pages_flushed= 0; + mutex_enter(&kernel_mutex); /* Store the user activity counter at the start of this loop */ @@ -2741,15 +2763,20 @@ loop: srv_last_log_flush_time = time(NULL); skip_sleep = FALSE; + next_itr_time = ut_time_ms() + 1000; + for (i = 0; i < 10; i++) { + ulint cur_time = ut_time_ms(); + n_ios_old = log_sys->n_log_ios + buf_pool->stat.n_pages_read + buf_pool->stat.n_pages_written; srv_main_thread_op_info = "sleeping"; srv_main_1_second_loops++; if (!skip_sleep) { + if (next_itr_time > cur_time) { - os_event_wait_time(srv_shutdown_event, 1000000); + os_event_wait_time(srv_shutdown_event, ut_min(1000000, (next_itr_time - cur_time) * 1000)); srv_main_sleeps++; /* @@ -2766,6 +2793,10 @@ loop: */ } + /* Each iteration should happen at 1 second interval. */ + next_itr_time = ut_time_ms() + 1000; + } + skip_sleep = FALSE; /* ALTER TABLE in MySQL requires on Unix that the table handler @@ -2828,6 +2859,7 @@ loop: mutex_enter(&(log_sys->mutex)); lsn_old = log_sys->lsn; mutex_exit(&(log_sys->mutex)); + prev_adaptive_checkpoint = ULINT_UNDEFINED; } else if (srv_adaptive_flushing) { /* Try to keep the rate of flushing of dirty @@ -2853,6 +2885,7 @@ loop: mutex_enter(&(log_sys->mutex)); lsn_old = log_sys->lsn; mutex_exit(&(log_sys->mutex)); + prev_adaptive_checkpoint = ULINT_UNDEFINED; } else if (srv_adaptive_checkpoint == 1) { /* adaptive_flushing option is prior to adaptive_checkpoint option, for now */ @@ -2896,6 +2929,7 @@ loop: mutex_exit(&(log_sys->mutex)); } } + prev_adaptive_checkpoint = 1; } else if (srv_adaptive_checkpoint == 2) { /* Try to keep modified age not to exceed @@ -2978,11 +3012,124 @@ retry_flush_batch: mutex_exit(&(log_sys->mutex)); } } + prev_adaptive_checkpoint = 2; + } else if (srv_adaptive_checkpoint == 3) { + buf_page_t* bpage; + ib_uint64_t lsn; + + mutex_enter(&(log_sys->mutex)); + oldest_lsn = buf_pool_get_oldest_modification(); + lsn = log_sys->lsn; + mutex_exit(&(log_sys->mutex)); + /* upper loop/sec. (x10) */ + next_itr_time -= 900; /* 1000 - 900 == 100 */ + inner_loop++; + if (inner_loop < 10) { + i--; + } else { + inner_loop = 0; + } + + if (prev_adaptive_checkpoint == 3) { + lint n_flush; + lint blocks_sum, new_blocks_sum, flushed_blocks_sum; + + blocks_sum = new_blocks_sum = flushed_blocks_sum = 0; + + /* prev_flush_info should be the previous loop's */ + { + lint blocks_num, new_blocks_num, flushed_blocks_num; + ibool found; + + blocks_num = UT_LIST_GET_LEN(buf_pool->flush_list); + bpage = UT_LIST_GET_FIRST(buf_pool->flush_list); + new_blocks_num = 0; + + found = FALSE; + while (bpage != NULL) { + if (prev_flush_info.space == bpage->space + && prev_flush_info.offset == bpage->offset + && prev_flush_info.oldest_modification + == bpage->oldest_modification) { + found = TRUE; + break; + } + bpage = UT_LIST_GET_NEXT(flush_list, bpage); + new_blocks_num++; + } + if (!found) { + new_blocks_num = blocks_num; + } + + flushed_blocks_num = new_blocks_num + prev_flush_info.count + - blocks_num; + if (flushed_blocks_num < 0) { + flushed_blocks_num = 0; + } + + bpage = UT_LIST_GET_FIRST(buf_pool->flush_list); + + prev_flush_info.count = UT_LIST_GET_LEN(buf_pool->flush_list); + if (bpage) { + prev_flush_info.space = bpage->space; + prev_flush_info.offset = bpage->offset; + prev_flush_info.oldest_modification = bpage->oldest_modification; + } else { + prev_flush_info.space = 0; + prev_flush_info.offset = 0; + prev_flush_info.oldest_modification = 0; + } + + new_blocks_sum += new_blocks_num; + flushed_blocks_sum += flushed_blocks_num; + blocks_sum += blocks_num; + } + + n_flush = (lint) (blocks_sum * (lsn - lsn_old) / log_sys->max_modified_age_async); + if ((ulint) flushed_blocks_sum > n_pages_flushed_prev) { + n_flush -= (flushed_blocks_sum - n_pages_flushed_prev); + } + + if (n_flush > 0) { + n_flush++; + n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, n_flush, + oldest_lsn + (lsn - lsn_old)); + } else { + n_pages_flushed = 0; + } + } else { + /* store previous first pages of the flush_list */ + { + bpage = UT_LIST_GET_FIRST(buf_pool->flush_list); + + prev_flush_info.count = UT_LIST_GET_LEN(buf_pool->flush_list); + if (bpage) { + prev_flush_info.space = bpage->space; + prev_flush_info.offset = bpage->offset; + prev_flush_info.oldest_modification = bpage->oldest_modification; + } else { + prev_flush_info.space = 0; + prev_flush_info.offset = 0; + prev_flush_info.oldest_modification = 0; + } + } + n_pages_flushed = 0; + } + + lsn_old = lsn; + prev_adaptive_checkpoint = 3; } else { mutex_enter(&(log_sys->mutex)); lsn_old = log_sys->lsn; mutex_exit(&(log_sys->mutex)); + prev_adaptive_checkpoint = ULINT_UNDEFINED; + } + + if (n_pages_flushed == ULINT_UNDEFINED) { + n_pages_flushed_prev = 0; + } else { + n_pages_flushed_prev = n_pages_flushed; } if (srv_activity_count == old_activity_count) { diff --git a/storage/xtradb/srv/srv0start.c b/storage/xtradb/srv/srv0start.c index 27804e78a32..cef045d72e1 100644 --- a/storage/xtradb/srv/srv0start.c +++ b/storage/xtradb/srv/srv0start.c @@ -1439,8 +1439,25 @@ innobase_start_or_create_for_mysql(void) fil_init(srv_file_per_table ? 50000 : 5000, srv_max_n_open_files); + /* Print time to initialize the buffer pool */ + ut_print_timestamp(stderr); + fprintf(stderr, + " InnoDB: Initializing buffer pool, size ="); + + if (srv_buf_pool_size >= 1024 * 1024 * 1024) { + fprintf(stderr, + " %.1fG\n", + ((double) srv_buf_pool_size) / (1024 * 1024 * 1024)); + } else { + fprintf(stderr, + " %.1fM\n", + ((double) srv_buf_pool_size) / (1024 * 1024)); + } + ret = buf_pool_init(); + ut_print_timestamp(stderr); + if (ret == NULL) { fprintf(stderr, "InnoDB: Fatal error: cannot allocate the memory" @@ -1449,6 +1466,9 @@ innobase_start_or_create_for_mysql(void) return(DB_ERROR); } + fprintf(stderr, + " InnoDB: Completed initialization of buffer pool\n"); + #ifdef UNIV_DEBUG /* We have observed deadlocks with a 5MB buffer pool but the actual lower limit could very well be a little higher. */ @@ -1639,14 +1659,6 @@ innobase_start_or_create_for_mysql(void) trx_sys_file_format_init(); - if (create_new_doublewrite_file) { - mtr_start(&mtr); - fsp_header_init(TRX_DOUBLEWRITE_SPACE, TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * 9, &mtr); - mtr_commit(&mtr); - - trx_sys_dummy_create(TRX_DOUBLEWRITE_SPACE); - } - if (create_new_db) { mtr_start(&mtr); fsp_header_init(0, sum_of_new_sizes, &mtr); @@ -1654,6 +1666,15 @@ innobase_start_or_create_for_mysql(void) mtr_commit(&mtr); trx_sys_create(); + + if (create_new_doublewrite_file) { + mtr_start(&mtr); + fsp_header_init(TRX_DOUBLEWRITE_SPACE, TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * 9, &mtr); + mtr_commit(&mtr); + + trx_sys_dummy_create(TRX_DOUBLEWRITE_SPACE); + } + dict_create(); srv_startup_is_before_trx_rollback_phase = FALSE; @@ -1691,6 +1712,13 @@ innobase_start_or_create_for_mysql(void) recv_recovery_from_archive_finish(); #endif /* UNIV_LOG_ARCHIVE */ } else { + char* save_srv_doublewrite_file = NULL; + + if (create_new_doublewrite_file) { + /* doublewrite_file cannot be used for recovery yet. */ + save_srv_doublewrite_file = srv_doublewrite_file; + srv_doublewrite_file = NULL; + } /* Check if we support the max format that is stamped on the system tablespace. @@ -1716,6 +1744,8 @@ innobase_start_or_create_for_mysql(void) Note that this is not as heavy weight as it seems. At this point there will be only ONE page in the buf_LRU and there must be no page in the buf_flush list. */ + /* buffer_pool_shm should not be reused when recovery was needed. */ + if (!srv_buffer_pool_shm_is_reused) buf_pool_invalidate(); /* We always try to do a recovery, even if the database had @@ -1777,6 +1807,17 @@ innobase_start_or_create_for_mysql(void) we have finished the recovery process so that the image of TRX_SYS_PAGE_NO is not stale. */ trx_sys_file_format_tag_init(); + + if (create_new_doublewrite_file) { + /* restore the value */ + srv_doublewrite_file = save_srv_doublewrite_file; + + mtr_start(&mtr); + fsp_header_init(TRX_DOUBLEWRITE_SPACE, TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * 9, &mtr); + mtr_commit(&mtr); + + trx_sys_dummy_create(TRX_DOUBLEWRITE_SPACE); + } } if (!create_new_db && sum_of_new_sizes > 0) { diff --git a/storage/xtradb/sync/sync0rw.c b/storage/xtradb/sync/sync0rw.c index f1018522fdf..0bdffcb98b0 100644 --- a/storage/xtradb/sync/sync0rw.c +++ b/storage/xtradb/sync/sync0rw.c @@ -334,10 +334,13 @@ rw_lock_validate( /*=============*/ rw_lock_t* lock) /*!< in: rw-lock */ { + ulint waiters; + lint lock_word; + ut_a(lock); - ulint waiters = rw_lock_get_waiters(lock); - lint lock_word = lock->lock_word; + waiters = rw_lock_get_waiters(lock); + lock_word = lock->lock_word; ut_ad(lock->magic_n == RW_LOCK_MAGIC_N); ut_a(waiters == 0 || waiters == 1); @@ -986,7 +989,7 @@ rw_lock_debug_print( rwt = info->lock_type; - fprintf(stderr, "Locked: thread %ld file %s line %ld ", + fprintf(stderr, "Locked: thread %lu file %s line %lu ", (ulong) os_thread_pf(info->thread_id), info->file_name, (ulong) info->line); if (rwt == RW_LOCK_SHARED) { diff --git a/storage/xtradb/thr/thr0loc.c b/storage/xtradb/thr/thr0loc.c index 5b9e83be920..e702b37afb4 100644 --- a/storage/xtradb/thr/thr0loc.c +++ b/storage/xtradb/thr/thr0loc.c @@ -72,6 +72,24 @@ struct thr_local_struct{ /** The value of thr_local_struct::magic_n */ #define THR_LOCAL_MAGIC_N 1231234 +#ifdef UNIV_DEBUG +/*******************************************************************//** +Validates thread local data. +@return TRUE if valid */ +static +ibool +thr_local_validate( +/*===============*/ + const thr_local_t* local) /*!< in: data to validate */ +{ + ut_ad(local->magic_n == THR_LOCAL_MAGIC_N); + ut_ad(local->slot_no == ULINT_UNDEFINED + || local->slot_no < OS_THREAD_MAX_N); + ut_ad(local->in_ibuf == FALSE || local->in_ibuf == TRUE); + return(TRUE); +} +#endif /* UNIV_DEBUG */ + /*******************************************************************//** Returns the local storage struct for a thread. @return local storage */ @@ -92,7 +110,8 @@ try_again: local = NULL; HASH_SEARCH(hash, thr_local_hash, os_thread_pf(id), - thr_local_t*, local,, os_thread_eq(local->id, id)); + thr_local_t*, local, ut_ad(thr_local_validate(local)), + os_thread_eq(local->id, id)); if (local == NULL) { mutex_exit(&thr_local_mutex); @@ -103,7 +122,7 @@ try_again: goto try_again; } - ut_ad(local->magic_n == THR_LOCAL_MAGIC_N); + ut_ad(thr_local_validate(local)); return(local); } @@ -189,7 +208,7 @@ thr_local_create(void) local->id = os_thread_get_curr_id(); local->handle = os_thread_get_curr(); local->magic_n = THR_LOCAL_MAGIC_N; - + local->slot_no = ULINT_UNDEFINED; local->in_ibuf = FALSE; mutex_enter(&thr_local_mutex); @@ -217,7 +236,8 @@ thr_local_free( /* Look for the local struct in the hash table */ HASH_SEARCH(hash, thr_local_hash, os_thread_pf(id), - thr_local_t*, local,, os_thread_eq(local->id, id)); + thr_local_t*, local, ut_ad(thr_local_validate(local)), + os_thread_eq(local->id, id)); if (local == NULL) { mutex_exit(&thr_local_mutex); @@ -231,6 +251,7 @@ thr_local_free( mutex_exit(&thr_local_mutex); ut_a(local->magic_n == THR_LOCAL_MAGIC_N); + ut_ad(thr_local_validate(local)); mem_free(local); } @@ -273,6 +294,7 @@ thr_local_close(void) local = HASH_GET_NEXT(hash, prev_local); ut_a(prev_local->magic_n == THR_LOCAL_MAGIC_N); + ut_ad(thr_local_validate(prev_local)); mem_free(prev_local); } } diff --git a/storage/xtradb/trx/trx0i_s.c b/storage/xtradb/trx/trx0i_s.c index c0b9a73a68c..e148234888b 100644 --- a/storage/xtradb/trx/trx0i_s.c +++ b/storage/xtradb/trx/trx0i_s.c @@ -404,6 +404,42 @@ table_cache_create_empty_row( return(row); } +#ifdef UNIV_DEBUG +/*******************************************************************//** +Validates a row in the locks cache. +@return TRUE if valid */ +static +ibool +i_s_locks_row_validate( +/*===================*/ + const i_s_locks_row_t* row) /*!< in: row to validate */ +{ + ut_ad(row->lock_trx_id != 0); + ut_ad(row->lock_mode != NULL); + ut_ad(row->lock_type != NULL); + ut_ad(row->lock_table != NULL); + ut_ad(row->lock_table_id != 0); + + if (row->lock_space == ULINT_UNDEFINED) { + /* table lock */ + ut_ad(!strcmp("TABLE", row->lock_type)); + ut_ad(row->lock_index == NULL); + ut_ad(row->lock_data == NULL); + ut_ad(row->lock_page == ULINT_UNDEFINED); + ut_ad(row->lock_rec == ULINT_UNDEFINED); + } else { + /* record lock */ + ut_ad(!strcmp("RECORD", row->lock_type)); + ut_ad(row->lock_index != NULL); + /* row->lock_data == NULL if buf_page_try_get() == NULL */ + ut_ad(row->lock_page != ULINT_UNDEFINED); + ut_ad(row->lock_rec != ULINT_UNDEFINED); + } + + return(TRUE); +} +#endif /* UNIV_DEBUG */ + /*******************************************************************//** Fills i_s_trx_row_t object. If memory can not be allocated then FALSE is returned. @@ -431,18 +467,15 @@ fill_trx_row( row->trx_id = trx_get_id(trx); row->trx_started = (ib_time_t) trx->start_time; row->trx_state = trx_get_que_state_str(trx); + row->requested_lock_row = requested_lock_row; + ut_ad(requested_lock_row == NULL + || i_s_locks_row_validate(requested_lock_row)); if (trx->wait_lock != NULL) { - ut_a(requested_lock_row != NULL); - - row->requested_lock_row = requested_lock_row; row->trx_wait_started = (ib_time_t) trx->wait_started; } else { - ut_a(requested_lock_row == NULL); - - row->requested_lock_row = NULL; row->trx_wait_started = 0; } @@ -461,7 +494,6 @@ fill_trx_row( stmt = innobase_get_stmt(trx->mysql_thd, &stmt_len); if (stmt != NULL) { - char query[TRX_I_S_TRX_QUERY_MAX_LEN + 1]; if (stmt_len > TRX_I_S_TRX_QUERY_MAX_LEN) { @@ -475,6 +507,8 @@ fill_trx_row( cache->storage, stmt, stmt_len + 1, MAX_ALLOWED_FOR_STORAGE(cache)); + row->trx_query_cs = innobase_get_charset(trx->mysql_thd); + if (row->trx_query == NULL) { return(FALSE); @@ -725,6 +759,7 @@ fill_locks_row( row->lock_table_id = lock_get_table_id(lock); row->hash_chain.value = row; + ut_ad(i_s_locks_row_validate(row)); return(TRUE); } @@ -745,6 +780,9 @@ fill_lock_waits_row( relevant blocking lock row in innodb_locks */ { + ut_ad(i_s_locks_row_validate(requested_lock_row)); + ut_ad(i_s_locks_row_validate(blocking_lock_row)); + row->requested_lock_row = requested_lock_row; row->blocking_lock_row = blocking_lock_row; @@ -816,6 +854,7 @@ locks_row_eq_lock( or ULINT_UNDEFINED if the lock is a table lock */ { + ut_ad(i_s_locks_row_validate(row)); #ifdef TEST_NO_LOCKS_ROW_IS_EVER_EQUAL_TO_LOCK_T return(0); #else @@ -873,7 +912,7 @@ search_innodb_locks( /* auxiliary variable */ hash_chain, /* assertion on every traversed item */ - , + ut_ad(i_s_locks_row_validate(hash_chain->value)), /* this determines if we have found the lock */ locks_row_eq_lock(hash_chain->value, lock, heap_no)); @@ -913,6 +952,7 @@ add_lock_to_cache( dst_row = search_innodb_locks(cache, lock, heap_no); if (dst_row != NULL) { + ut_ad(i_s_locks_row_validate(dst_row)); return(dst_row); } #endif @@ -950,6 +990,7 @@ add_lock_to_cache( } /* for()-loop */ #endif + ut_ad(i_s_locks_row_validate(dst_row)); return(dst_row); } diff --git a/storage/xtradb/trx/trx0undo.c b/storage/xtradb/trx/trx0undo.c index 6f06de593a9..9ed83b5d5c1 100644 --- a/storage/xtradb/trx/trx0undo.c +++ b/storage/xtradb/trx/trx0undo.c @@ -1408,7 +1408,7 @@ trx_undo_lists_init( page_no = mtr_read_ulint(rseg_header + TRX_RSEG_UNDO_SLOTS + i * TRX_RSEG_SLOT_SIZE, MLOG_4BYTES, &mtr); - if (page_no != FIL_NULL) { + if (page_no != 0 && page_no != FIL_NULL) { srv_extra_undoslots = TRUE; fprintf(stderr, "InnoDB: Error: innodb_extra_undoslots option is disabled, but it was enabled before.\n" diff --git a/storage/xtradb/ut/ut0dbg.c b/storage/xtradb/ut/ut0dbg.c index 4484e6c36de..7839466e16f 100644 --- a/storage/xtradb/ut/ut0dbg.c +++ b/storage/xtradb/ut/ut0dbg.c @@ -79,7 +79,7 @@ ut_dbg_assertion_failed( " or crashes, even\n" "InnoDB: immediately after the mysqld startup, there may be\n" "InnoDB: corruption in the InnoDB tablespace. Please refer to\n" - "InnoDB: " REFMAN "forcing-recovery.html\n" + "InnoDB: " REFMAN "forcing-innodb-recovery.html\n" "InnoDB: about forcing recovery.\n", stderr); #if defined(UNIV_SYNC_DEBUG) || !defined(UT_DBG_USE_ABORT) ut_dbg_stop_threads = TRUE; diff --git a/storage/xtradb/ut/ut0ut.c b/storage/xtradb/ut/ut0ut.c index 498873e290a..fc1bdffcf38 100644 --- a/storage/xtradb/ut/ut0ut.c +++ b/storage/xtradb/ut/ut0ut.c @@ -553,7 +553,7 @@ ut_print_namel( trx ? trx->mysql_thd : NULL, table_id); - fwrite(buf, 1, bufend - buf, f); + (void) fwrite(buf, 1, bufend - buf, f); } /**********************************************************************//** @@ -574,7 +574,7 @@ ut_copy_file( ? (size_t) len : sizeof buf; size_t size = fread(buf, 1, maxs, src); - fwrite(buf, 1, size, dest); + (void) fwrite(buf, 1, size, dest); len -= (long) size; if (size < maxs) { break; |