diff options
Diffstat (limited to 'storage/innobase')
42 files changed, 473 insertions, 6334 deletions
diff --git a/storage/innobase/CMakeLists.txt b/storage/innobase/CMakeLists.txt index c4da4b179be..91c5adc4f07 100644 --- a/storage/innobase/CMakeLists.txt +++ b/storage/innobase/CMakeLists.txt @@ -74,7 +74,6 @@ SET(INNOBASE_SOURCES gis/gis0sea.cc fts/fts0plugin.cc handler/ha_innodb.cc -# handler/ha_innopart.cc handler/handler0alter.cc handler/i_s.cc ibuf/ibuf0ibuf.cc diff --git a/storage/innobase/btr/btr0btr.cc b/storage/innobase/btr/btr0btr.cc index 64a14d0e959..815324825bd 100644 --- a/storage/innobase/btr/btr0btr.cc +++ b/storage/innobase/btr/btr0btr.cc @@ -4846,7 +4846,6 @@ btr_validate_level( bool ret = true; mtr_t mtr; mem_heap_t* heap = mem_heap_create(256); - fseg_header_t* seg; ulint* offsets = NULL; ulint* offsets2= NULL; #ifdef UNIV_ZIP_DEBUG @@ -4870,7 +4869,6 @@ btr_validate_level( block = btr_root_block_get(index, RW_SX_LATCH, &mtr); page = buf_block_get_frame(block); - seg = page + PAGE_HEADER + PAGE_BTR_SEG_TOP; #ifdef UNIV_DEBUG if (dict_index_is_spatial(index)) { @@ -4879,7 +4877,7 @@ btr_validate_level( } #endif - const fil_space_t* space = fil_space_get(index->space); + fil_space_t* space = fil_space_get(index->space); const page_size_t table_page_size( dict_table_page_size(index->table)); const page_size_t space_page_size(space->flags); @@ -4897,9 +4895,7 @@ btr_validate_level( while (level != btr_page_get_level(page, &mtr)) { const rec_t* node_ptr; - if (fseg_page_is_free(seg, - block->page.id.space(), - block->page.id.page_no())) { + if (fseg_page_is_free(space, block->page.id.page_no())) { btr_validate_report1(index, level, block); @@ -4959,11 +4955,6 @@ btr_validate_level( /* Now we are on the desired level. Loop through the pages on that level. */ - if (level == 0) { - /* Leaf pages are managed in their own file segment. */ - seg -= PAGE_BTR_SEG_TOP - PAGE_BTR_SEG_LEAF; - } - loop: mem_heap_empty(heap); offsets = offsets2 = NULL; @@ -4982,9 +4973,7 @@ loop: ut_a(block->page.id.space() == index->space); - if (fseg_page_is_free(seg, - block->page.id.space(), - block->page.id.page_no())) { + if (fseg_page_is_free(space, block->page.id.page_no())) { btr_validate_report1(index, level, block); diff --git a/storage/innobase/btr/btr0cur.cc b/storage/innobase/btr/btr0cur.cc index 7bdd03c8a9e..e740370d2f0 100644 --- a/storage/innobase/btr/btr0cur.cc +++ b/storage/innobase/btr/btr0cur.cc @@ -3901,8 +3901,10 @@ any_extern: } /* We limit max record size to 16k even for 64k page size. */ - if (new_rec_size >= REC_MAX_DATA_SIZE) { - err = DB_OVERFLOW; + if (new_rec_size >= COMPRESSED_REC_MAX_DATA_SIZE || + (!dict_table_is_comp(index->table) + && new_rec_size >= REDUNDANT_REC_MAX_DATA_SIZE)) { + err = DB_OVERFLOW; goto func_exit; } diff --git a/storage/innobase/buf/buf0buf.cc b/storage/innobase/buf/buf0buf.cc index b57fba75869..ad93238410e 100644 --- a/storage/innobase/buf/buf0buf.cc +++ b/storage/innobase/buf/buf0buf.cc @@ -1477,17 +1477,15 @@ buf_block_init( rw_lock_create(PFS_NOT_INSTRUMENTED, &block->lock, SYNC_LEVEL_VARYING); - ut_d(rw_lock_create( - PFS_NOT_INSTRUMENTED, - &block->debug_latch, SYNC_NO_ORDER_CHECK)); + ut_d(rw_lock_create(PFS_NOT_INSTRUMENTED, &block->debug_latch, + SYNC_LEVEL_VARYING)); #else /* PFS_SKIP_BUFFER_MUTEX_RWLOCK || PFS_GROUP_BUFFER_SYNC */ rw_lock_create(buf_block_lock_key, &block->lock, SYNC_LEVEL_VARYING); - ut_d(rw_lock_create( - buf_block_debug_latch_key, - &block->debug_latch, SYNC_NO_ORDER_CHECK)); + ut_d(rw_lock_create(buf_block_debug_latch_key, + &block->debug_latch, SYNC_LEVEL_VARYING)); #endif /* PFS_SKIP_BUFFER_MUTEX_RWLOCK || PFS_GROUP_BUFFER_SYNC */ diff --git a/storage/innobase/dict/dict0crea.cc b/storage/innobase/dict/dict0crea.cc index 8db1878b4ef..1c28a39a62e 100644 --- a/storage/innobase/dict/dict0crea.cc +++ b/storage/innobase/dict/dict0crea.cc @@ -104,6 +104,7 @@ dict_create_sys_tables_tuple( | ((table->flags & DICT_TF_COMPACT) << 31)); dfield_set_data(dfield, ptr, 4); + /* 5: TYPE (table flags) -----------------------------*/ dfield = dtuple_get_nth_field( entry, DICT_COL__SYS_TABLES__TYPE); diff --git a/storage/innobase/dict/dict0dict.cc b/storage/innobase/dict/dict0dict.cc index 943a37ef4e0..55429b2680f 100644 --- a/storage/innobase/dict/dict0dict.cc +++ b/storage/innobase/dict/dict0dict.cc @@ -1375,9 +1375,6 @@ dict_table_add_to_cache( } ut_ad(dict_lru_validate()); - - dict_sys->size += mem_heap_get_size(table->heap) - + strlen(table->name.m_name) + 1; } /**********************************************************************//** @@ -1756,9 +1753,6 @@ dict_table_rename_in_cache( HASH_INSERT(dict_table_t, name_hash, dict_sys->table_hash, fold, table); - dict_sys->size += strlen(new_name) - strlen(old_name); - ut_a(dict_sys->size > 0); - /* Update the table_name field in indexes */ for (index = dict_table_get_first_index(table); index != NULL; @@ -2049,7 +2043,6 @@ dict_table_remove_from_cache_low( { dict_foreign_t* foreign; dict_index_t* index; - lint size; ut_ad(table); ut_ad(dict_lru_validate()); @@ -2130,12 +2123,6 @@ dict_table_remove_from_cache_low( UT_DELETE(table->vc_templ); } - size = mem_heap_get_size(table->heap) + strlen(table->name.m_name) + 1; - - ut_ad(dict_sys->size >= size); - - dict_sys->size -= size; - dict_mem_table_free(table); } @@ -2330,9 +2317,10 @@ dict_index_too_big_for_tree( page(16k for 64k page size). No additional sparse page directory entry will be generated for the first few user records. */ - page_rec_max = srv_page_size == UNIV_PAGE_SIZE_MAX - ? REC_MAX_DATA_SIZE - 1 - : page_get_free_space_of_empty(comp) / 2; + page_rec_max = (comp || srv_page_size < UNIV_PAGE_SIZE_MAX) + ? page_get_free_space_of_empty(comp) / 2 + : REDUNDANT_REC_MAX_DATA_SIZE; + page_ptr_max = page_rec_max; /* Each record has a header. */ rec_max_size = comp @@ -2610,8 +2598,6 @@ dict_index_add_to_cache_w_vcol( rw_lock_create(index_tree_rw_lock_key, &new_index->lock, SYNC_INDEX_TREE); - dict_sys->size += mem_heap_get_size(new_index->heap); - dict_mem_index_free(index); return(DB_SUCCESS); @@ -2628,8 +2614,6 @@ dict_index_remove_from_cache_low( ibool lru_evict) /*!< in: TRUE if index being evicted to make room in the table LRU list */ { - lint size; - ut_ad(table && index); ut_ad(table->magic_n == DICT_TABLE_MAGIC_N); ut_ad(index->magic_n == DICT_INDEX_MAGIC_N); @@ -2730,12 +2714,6 @@ dict_index_remove_from_cache_low( } } - size = mem_heap_get_size(index->heap); - - ut_ad(dict_sys->size >= size); - - dict_sys->size -= size; - dict_mem_index_free(index); } @@ -4623,7 +4601,6 @@ dict_create_foreign_constraints_low( if (!success) { ib::error() << "Could not find the table " << create_name << " being" << operation << " near to " << orig; - mutex_exit(&dict_foreign_err_mutex); ib_push_warning(trx, DB_ERROR, "%s table %s with foreign key constraint" @@ -5301,6 +5278,7 @@ try_find_index: " failed. You have more than one on delete or on update clause" " in '%s' near '%s'.\n", operation, create_name, start_of_latest_foreign, start_of_latest_set); + mutex_exit(&dict_foreign_err_mutex); ib_push_warning(trx, DB_CANNOT_ADD_CONSTRAINT, "%s table %s with foreign key constraint" @@ -5309,7 +5287,6 @@ try_find_index: operation, create_name, start_of_latest_foreign, start_of_latest_set); dict_foreign_free(foreign); - mutex_exit(&dict_foreign_err_mutex); return(DB_CANNOT_ADD_CONSTRAINT); } @@ -6872,8 +6849,6 @@ dict_close(void) mutex_free(&dict_foreign_err_mutex); - ut_ad(dict_sys->size == 0); - ut_free(dict_sys); dict_sys = NULL; @@ -7246,6 +7221,41 @@ dict_tf_to_row_format_string( return(0); } +/** Calculate the used memory occupied by the data dictionary +table and index objects. +@return number of bytes occupied. */ +UNIV_INTERN +ulint +dict_sys_get_size() +{ + ulint size = 0; + + ut_ad(dict_sys); + + mutex_enter(&dict_sys->mutex); + + for(ulint i = 0; i < hash_get_n_cells(dict_sys->table_hash); i++) { + dict_table_t* table; + + for (table = static_cast<dict_table_t*>(HASH_GET_FIRST(dict_sys->table_hash,i)); + table != NULL; + table = static_cast<dict_table_t*>(HASH_GET_NEXT(name_hash, table))) { + dict_index_t* index; + size += mem_heap_get_size(table->heap) + strlen(table->name.m_name) +1; + + for(index = dict_table_get_first_index(table); + index != NULL; + index = dict_table_get_next_index(index)) { + size += mem_heap_get_size(index->heap); + } + } + } + + mutex_exit(&dict_sys->mutex); + + return (size); +} + /** Look for any dictionary objects that are found in the given tablespace. @param[in] space_id Tablespace ID to search for. @return true if tablespace is empty. */ diff --git a/storage/innobase/dict/dict0load.cc b/storage/innobase/dict/dict0load.cc index 55d84bf17df..6193a8f66f5 100644 --- a/storage/innobase/dict/dict0load.cc +++ b/storage/innobase/dict/dict0load.cc @@ -3529,17 +3529,12 @@ dict_load_foreign( here. The child table will be loaded later, along with its foreign key constraint. */ - lint old_size = mem_heap_get_size(ref_table->heap); - ut_a(ref_table != NULL); fk_tables.push_back( mem_heap_strdupl(ref_table->heap, foreign->foreign_table_name_lookup, foreign_table_name_len)); - lint new_size = mem_heap_get_size(ref_table->heap); - dict_sys->size += new_size - old_size; - dict_foreign_remove_from_cache(foreign); DBUG_RETURN(DB_SUCCESS); } diff --git a/storage/innobase/dict/dict0stats.cc b/storage/innobase/dict/dict0stats.cc index 9350b5d400d..177a16a2b37 100644 --- a/storage/innobase/dict/dict0stats.cc +++ b/storage/innobase/dict/dict0stats.cc @@ -1335,16 +1335,6 @@ dict_stats_analyze_index_level( mem_heap_free(heap); } -/* aux enum for controlling the behavior of dict_stats_scan_page() @{ */ -enum page_scan_method_t { - /** scan the records on the given page, counting the number - of distinct ones; @see srv_stats_include_delete_marked */ - COUNT_ALL_NON_BORING, - /** quit on the first record that differs from its right neighbor */ - QUIT_ON_FIRST_NON_BORING -}; -/* @} */ - /** Scan a page, reading records from left to right and counting the number of distinct records (looking only at the first n_prefix columns) and the number of external pages pointed by records from this page. @@ -1361,7 +1351,7 @@ be big enough) @param[in] index index of the page @param[in] page the page to scan @param[in] n_prefix look at the first n_prefix columns -@param[in] scan_method scan to the end of the page or not +@param[in] is_leaf whether this is the leaf page @param[out] n_diff number of distinct records encountered @param[out] n_external_pages if this is non-NULL then it will be set to the number of externally stored pages which were encountered @@ -1376,7 +1366,7 @@ dict_stats_scan_page( const dict_index_t* index, const page_t* page, ulint n_prefix, - page_scan_method_t scan_method, + bool is_leaf, ib_uint64_t* n_diff, ib_uint64_t* n_external_pages) { @@ -1388,8 +1378,9 @@ dict_stats_scan_page( Because offsets1,offsets2 should be big enough, this memory heap should never be used. */ mem_heap_t* heap = NULL; + ut_ad(is_leaf == page_is_leaf(page)); const rec_t* (*get_next)(const rec_t*) - = srv_stats_include_delete_marked + = !is_leaf || srv_stats_include_delete_marked ? page_rec_get_next_const : page_rec_get_next_non_del_marked; @@ -1440,7 +1431,7 @@ dict_stats_scan_page( (*n_diff)++; - if (scan_method == QUIT_ON_FIRST_NON_BORING) { + if (!is_leaf) { break; } } @@ -1566,7 +1557,7 @@ dict_stats_analyze_index_below_cur( /* search for the first non-boring record on the page */ offsets_rec = dict_stats_scan_page( &rec, offsets1, offsets2, index, page, n_prefix, - QUIT_ON_FIRST_NON_BORING, n_diff, NULL); + false, n_diff, NULL); /* pages on level > 0 are not allowed to be empty */ ut_a(offsets_rec != NULL); @@ -1611,7 +1602,7 @@ dict_stats_analyze_index_below_cur( offsets_rec = dict_stats_scan_page( &rec, offsets1, offsets2, index, page, n_prefix, - COUNT_ALL_NON_BORING, n_diff, + true, n_diff, n_external_pages); #if 0 diff --git a/storage/innobase/fil/fil0crypt.cc b/storage/innobase/fil/fil0crypt.cc index 6e431a6ee0f..e1b5bcbc325 100644 --- a/storage/innobase/fil/fil0crypt.cc +++ b/storage/innobase/fil/fil0crypt.cc @@ -93,13 +93,20 @@ static ib_mutex_t crypt_stat_mutex; extern my_bool srv_background_scrub_data_uncompressed; extern my_bool srv_background_scrub_data_compressed; +/*********************************************************************** +Check if a key needs rotation given a key_state +@param[in] encrypt_mode Encryption mode +@param[in] key_version Current key version +@param[in] latest_key_version Latest key version +@param[in] rotate_key_age when to rotate +@return true if key needs rotation, false if not */ static bool fil_crypt_needs_rotation( - fil_encryption_t encrypt_mode, /*!< in: Encryption - mode */ - uint key_version, /*!< in: Key version */ - uint latest_key_version, /*!< in: Latest key version */ - uint rotate_key_age); /*!< in: When to rotate */ + fil_encryption_t encrypt_mode, + uint key_version, + uint latest_key_version, + uint rotate_key_age) + MY_ATTRIBUTE((warn_unused_result)); /********************************************************************* Init space crypt */ @@ -326,10 +333,17 @@ fil_space_destroy_crypt_data( fil_space_crypt_t **crypt_data) { if (crypt_data != NULL && (*crypt_data) != NULL) { - mutex_enter(&fil_crypt_threads_mutex); - fil_space_crypt_t* c = *crypt_data; - *crypt_data = NULL; - mutex_exit(&fil_crypt_threads_mutex); + fil_space_crypt_t* c; + if (UNIV_LIKELY(fil_crypt_threads_inited)) { + mutex_enter(&fil_crypt_threads_mutex); + c = *crypt_data; + *crypt_data = NULL; + mutex_exit(&fil_crypt_threads_mutex); + } else { + ut_ad(srv_read_only_mode || !srv_was_started); + c = *crypt_data; + *crypt_data = NULL; + } if (c) { c->~fil_space_crypt_t(); ut_free(c); @@ -1582,20 +1596,6 @@ fil_crypt_find_page_to_rotate( return found; } -/*********************************************************************** -Check if a page is uninitialized (doesn't need to be rotated) -@param[in] frame Page to check -@param[in] page_size Page size -@return true if page is uninitialized, false if not. */ -static inline -bool -fil_crypt_is_page_uninitialized( - const byte *frame, - const page_size_t& page_size) -{ - return (buf_page_is_zeroes(frame, page_size)); -} - #define fil_crypt_get_page_throttle(state,offset,mtr,sleeptime_ms) \ fil_crypt_get_page_throttle_func(state, offset, mtr, \ sleeptime_ms, __FILE__, __LINE__) @@ -1709,7 +1709,7 @@ btr_scrub_get_block_and_allocation_status( mtr_start(&local_mtr); - *allocation_status = fsp_page_is_free(space->id, offset, &local_mtr) ? + *allocation_status = fseg_page_is_free(space, offset) ? BTR_SCRUB_PAGE_FREE : BTR_SCRUB_PAGE_ALLOCATED; @@ -1756,9 +1756,9 @@ fil_crypt_rotate_page( ulint offset = state->offset; ulint sleeptime_ms = 0; fil_space_crypt_t *crypt_data = space->crypt_data; - const page_size_t page_size = page_size_t(space->flags); ut_ad(space->n_pending_ops > 0); + ut_ad(offset > 0); /* In fil_crypt_thread where key rotation is done we have acquired space and checked that this space is not yet @@ -1773,44 +1773,55 @@ fil_crypt_rotate_page( return; } + ut_d(const bool was_free = fseg_page_is_free(space, offset)); + mtr_t mtr; mtr.start(); if (buf_block_t* block = fil_crypt_get_page_throttle(state, offset, &mtr, &sleeptime_ms)) { - mtr.set_named_space(space); - bool modified = false; int needs_scrubbing = BTR_SCRUB_SKIP_PAGE; lsn_t block_lsn = block->page.newest_modification; byte* frame = buf_block_get_frame(block); uint kv = mach_read_from_4(frame+FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION); - /* check if tablespace is closing after reading page */ - if (!space->is_stopping()) { - - if (kv == 0 && - fil_crypt_is_page_uninitialized(frame, page_size)) { - ; - } else if (fil_crypt_needs_rotation( - crypt_data->encryption, - kv, key_state->key_version, - key_state->rotate_key_age)) { - - modified = true; - - /* force rotation by dummy updating page */ - mlog_write_ulint(frame + - FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, - space_id, MLOG_4BYTES, &mtr); - - /* statistics */ - state->crypt_stat.pages_modified++; - } else { - if (crypt_data->is_encrypted()) { - if (kv < state->min_key_version_found) { - state->min_key_version_found = kv; - } + if (space->is_stopping()) { + /* The tablespace is closing (in DROP TABLE or + TRUNCATE TABLE or similar): avoid further access */ + } else if (!*reinterpret_cast<uint32_t*>(FIL_PAGE_OFFSET + + frame)) { + /* It looks like this page was never + allocated. Because key rotation is accessing + pages in a pattern that is unlike the normal + B-tree and undo log access pattern, we cannot + invoke fseg_page_is_free() here, because that + could result in a deadlock. If we invoked + fseg_page_is_free() and released the + tablespace latch before acquiring block->lock, + then the fseg_page_is_free() information + could be stale already. */ + ut_ad(was_free); + ut_ad(kv == 0); + ut_ad(page_get_space_id(frame) == 0); + } else if (fil_crypt_needs_rotation( + crypt_data->encryption, + kv, key_state->key_version, + key_state->rotate_key_age)) { + + mtr.set_named_space(space); + modified = true; + + /* force rotation by dummy updating page */ + mlog_write_ulint(frame + FIL_PAGE_SPACE_ID, + space_id, MLOG_4BYTES, &mtr); + + /* statistics */ + state->crypt_stat.pages_modified++; + } else { + if (crypt_data->is_encrypted()) { + if (kv < state->min_key_version_found) { + state->min_key_version_found = kv; } } @@ -1920,7 +1931,8 @@ fil_crypt_rotate_pages( rotate_thread_t* state) { ulint space = state->space->id; - ulint end = state->offset + state->batch; + ulint end = std::min(state->offset + state->batch, + state->space->free_limit); ut_ad(state->space->n_pending_ops > 0); @@ -2375,7 +2387,10 @@ fil_space_crypt_close_tablespace( ib::warn() << "Waited " << now - start << " seconds to drop space: " - << space->name << "."; + << space->name << " (" + << space->id << ") active threads " + << cnt << "flushing=" + << flushing << "."; last = now; } } diff --git a/storage/innobase/fsp/fsp0fsp.cc b/storage/innobase/fsp/fsp0fsp.cc index 33b237bd488..77b28d2c01b 100644 --- a/storage/innobase/fsp/fsp0fsp.cc +++ b/storage/innobase/fsp/fsp0fsp.cc @@ -436,7 +436,8 @@ xdes_get_descriptor_with_space_hdr( && (init_space || space->purpose == FIL_TYPE_TEMPORARY || (srv_startup_is_before_trx_rollback_phase - && space->id <= srv_undo_tablespaces)))); + && (space->id == TRX_SYS_SPACE + || srv_is_undo_tablespace(space->id)))))); ut_ad(size == space->size_in_header); if ((offset >= size) || (offset >= limit)) { @@ -506,6 +507,51 @@ xdes_get_descriptor( sp_header, space, offset, mtr)); } +/** Get the extent descriptor of a page. +The page where the extent descriptor resides is x-locked. If the page +offset is equal to the free limit of the space, we will add new +extents from above the free limit to the space free list, if not free +limit == space size. This adding is necessary to make the descriptor +defined, as they are uninitialized above the free limit. +@param[in] space tablespace +@param[in] page descriptor page offset +@param[in] offset page offset +@param[in] page_size page size +@param[in,out] mtr mini-transaction +@return the extent descriptor +@retval NULL if the descriptor is not available */ +MY_ATTRIBUTE((warn_unused_result)) +static +const xdes_t* +xdes_get_descriptor_const( + const fil_space_t* space, + page_no_t page, + page_no_t offset, + const page_size_t& page_size, + mtr_t* mtr) +{ + ut_ad(mtr_memo_contains(mtr, &space->latch, MTR_MEMO_S_LOCK)); + ut_ad(offset < space->free_limit); + ut_ad(offset < space->size_in_header); + + if (buf_block_t* block = buf_page_get(page_id_t(space->id, page), + page_size, RW_S_LATCH, mtr)) { + buf_block_dbg_add_level(block, SYNC_FSP_PAGE); + + ut_ad(page != 0 || space->free_limit == mach_read_from_4( + FSP_FREE_LIMIT + FSP_HEADER_OFFSET + + block->frame)); + ut_ad(page != 0 || space->size_in_header == mach_read_from_4( + FSP_SIZE + FSP_HEADER_OFFSET + + block->frame)); + + return(block->frame + XDES_ARR_OFFSET + XDES_SIZE + * xdes_calc_descriptor_index(page_size, offset)); + } + + return(NULL); +} + /** Get a pointer to the extent descriptor. The page where the extent descriptor resides is x-locked. @param[in] space tablespace @@ -611,25 +657,31 @@ fsp_space_modify_check( #endif /* UNIV_DEBUG */ /** Initialize a file page. -@param[in] space tablespace @param[in,out] block file page @param[in,out] mtr mini-transaction */ -MY_ATTRIBUTE((nonnull)) static void -fsp_init_file_page( - const fil_space_t* space MY_ATTRIBUTE((unused)), - buf_block_t* block, - mtr_t* mtr) +fsp_init_file_page(buf_block_t* block, mtr_t* mtr) { - ut_d(fsp_space_modify_check(space, mtr)); - ut_ad(space->id == block->page.id.space()); fsp_init_file_page_low(block); mlog_write_initial_log_record(buf_block_get_frame(block), MLOG_INIT_FILE_PAGE2, mtr); } +#ifdef UNIV_DEBUG +static +void +fsp_init_file_page(const fil_space_t* space, buf_block_t* block, mtr_t* mtr) +{ + ut_d(fsp_space_modify_check(space, mtr)); + ut_ad(space->id == block->page.id.space()); + fsp_init_file_page(block, mtr); +} +#else /* UNIV_DEBUG */ +# define fsp_init_file_page(space, block, mtr) fsp_init_file_page(block, mtr) +#endif + /***********************************************************//** Parses a redo log record of a file page init. @return end of log record or NULL */ @@ -3149,39 +3201,31 @@ fseg_free_page_func( DBUG_VOID_RETURN; } -/**********************************************************************//** -Checks if a single page of a segment is free. -@return true if free */ +/** Determine whether a page is free. +@param[in,out] space tablespace +@param[in] page page number +@return whether the page is marked as free */ bool -fseg_page_is_free( -/*==============*/ - fseg_header_t* seg_header, /*!< in: segment header */ - ulint space_id, /*!< in: space id */ - ulint page) /*!< in: page offset */ +fseg_page_is_free(fil_space_t* space, unsigned page) { + bool is_free; mtr_t mtr; - ibool is_free; - xdes_t* descr; - fseg_inode_t* seg_inode; - - mtr_start(&mtr); - const fil_space_t* space = mtr_x_lock_space(space_id, &mtr); - const page_size_t page_size(space->flags); - - seg_inode = fseg_inode_get(seg_header, space_id, page_size, &mtr); - - ut_a(seg_inode); - ut_ad(mach_read_from_4(seg_inode + FSEG_MAGIC_N) - == FSEG_MAGIC_N_VALUE); - ut_ad(!((page_offset(seg_inode) - FSEG_ARR_OFFSET) % FSEG_INODE_SIZE)); - - descr = xdes_get_descriptor(space, page, page_size, &mtr); - ut_a(descr); + page_size_t page_size(space->flags); + page_no_t dpage = xdes_calc_descriptor_page(page_size, page); - is_free = xdes_mtr_get_bit( - descr, XDES_FREE_BIT, page % FSP_EXTENT_SIZE, &mtr); + mtr.start(); + mtr_s_lock(&space->latch, &mtr); - mtr_commit(&mtr); + if (page >= space->free_limit || page >= space->size_in_header) { + is_free = true; + } else if (const xdes_t* descr = xdes_get_descriptor_const( + space, dpage, page, page_size, &mtr)) { + is_free = xdes_get_bit(descr, XDES_FREE_BIT, + page % FSP_EXTENT_SIZE); + } else { + is_free = true; + } + mtr.commit(); return(is_free); } @@ -3563,28 +3607,3 @@ fseg_header::to_stream(std::ostream& out) const return(out); } #endif /* UNIV_DEBUG */ - -/**********************************************************************//** -Checks if a single page is free. -@return true if free */ -UNIV_INTERN -bool -fsp_page_is_free_func( -/*==============*/ - ulint space_id, /*!< in: space id */ - ulint page_no, /*!< in: page offset */ - mtr_t* mtr, /*!< in/out: mini-transaction */ - const char *file, - unsigned line) -{ - ut_ad(mtr); - - fil_space_t* space = mtr_x_lock_space(space_id, mtr); - const page_size_t page_size(space->flags); - - xdes_t* descr = xdes_get_descriptor(space, page_no, page_size, mtr); - ut_a(descr); - - return xdes_mtr_get_bit( - descr, XDES_FREE_BIT, page_no % FSP_EXTENT_SIZE, mtr); -} diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index faa00407397..02225d9f49f 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -351,10 +351,11 @@ thd_destructor_proxy(void *) mysql_mutex_unlock(&thd_destructor_mutex); srv_running = NULL; - if (srv_fast_shutdown == 0) { - while (trx_sys_any_active_transactions()) { - os_thread_sleep(1000); - } + while (srv_fast_shutdown == 0 && + (trx_sys_any_active_transactions() || + (uint)thread_count > srv_n_purge_threads + 1)) { + thd_proc_info(thd, "InnoDB slow shutdown wait"); + os_thread_sleep(1000); } /* Some background threads might generate undo pages that will @@ -631,7 +632,6 @@ static PSI_mutex_info all_innodb_mutexes[] = { # endif /* UNIV_DEBUG */ PSI_KEY(rw_lock_list_mutex), PSI_KEY(rw_lock_mutex), - PSI_KEY(srv_dict_tmpfile_mutex), PSI_KEY(srv_innodb_monitor_mutex), PSI_KEY(srv_misc_tmpfile_mutex), PSI_KEY(srv_monitor_file_mutex), @@ -723,6 +723,7 @@ static PSI_file_info all_innodb_files[] = { static void innodb_remember_check_sysvar_funcs(); mysql_var_check_func check_sysvar_enum; +mysql_var_check_func check_sysvar_int; // should page compression be used by default for new tables static MYSQL_THDVAR_BOOL(compression_default, PLUGIN_VAR_OPCMDARG, @@ -1745,8 +1746,9 @@ innobase_reset_background_thd(MYSQL_THD thd) ut_ad(THDVAR(thd, background_thread)); /* background purge thread */ + const char *proc_info= thd_proc_info(thd, "reset"); reset_thd(thd); - thd_proc_info(thd, ""); + thd_proc_info(thd, proc_info); } @@ -2164,15 +2166,21 @@ convert_error_code_to_mysql( locally for BLOB fields. Refer to dict_table_get_format(). We limit max record size to 16k for 64k page size. */ bool prefix = (dict_tf_get_format(flags) == UNIV_FORMAT_A); + bool comp = !!(flags & DICT_TF_COMPACT); + ulint free_space = page_get_free_space_of_empty(comp) / 2; + + if (free_space >= (comp ? COMPRESSED_REC_MAX_DATA_SIZE : + REDUNDANT_REC_MAX_DATA_SIZE)) { + free_space = (comp ? COMPRESSED_REC_MAX_DATA_SIZE : + REDUNDANT_REC_MAX_DATA_SIZE) - 1; + } + my_printf_error(ER_TOO_BIG_ROWSIZE, - "Row size too large (> %lu). Changing some columns" - " to TEXT or BLOB %smay help. In current row" - " format, BLOB prefix of %d bytes is stored inline.", + "Row size too large (> " ULINTPF "). Changing some columns " + "to TEXT or BLOB %smay help. In current row " + "format, BLOB prefix of %d bytes is stored inline.", MYF(0), - srv_page_size == UNIV_PAGE_SIZE_MAX - ? REC_MAX_DATA_SIZE - 1 - : page_get_free_space_of_empty(flags & - DICT_TF_COMPACT) / 2, + free_space, prefix ? "or using ROW_FORMAT=DYNAMIC or" " ROW_FORMAT=COMPRESSED " @@ -18302,6 +18310,34 @@ innodb_file_format_name_validate( return(1); } +/*************************************************************//** +Don't allow to set innodb_fast_shutdown=0 if purge threads are +already down. +@return 0 if innodb_fast_shutdown can be set */ +static +int +fast_shutdown_validate( +/*=============================*/ + THD* thd, /*!< in: thread handle */ + struct st_mysql_sys_var* var, /*!< in: pointer to system + variable */ + void* save, /*!< out: immediate result + for update function */ + struct st_mysql_value* value) /*!< in: incoming string */ +{ + if (check_sysvar_int(thd, var, save, value)) { + return(1); + } + + uint new_val = *reinterpret_cast<uint*>(save); + + if (srv_fast_shutdown && !new_val && !srv_running) { + return(1); + } + + return(0); +} + /****************************************************************//** Update the system variable innodb_file_format using the "saved" value. This function is registered as a callback with MySQL. */ @@ -20644,7 +20680,7 @@ static MYSQL_SYSVAR_UINT(fast_shutdown, srv_fast_shutdown, PLUGIN_VAR_OPCMDARG, "Speeds up the shutdown process of the InnoDB storage engine. Possible" " values are 0, 1 (faster) or 2 (fastest - crash-like).", - NULL, NULL, 1, 0, 2, 0); + fast_shutdown_validate, NULL, 1, 0, 2, 0); static MYSQL_SYSVAR_BOOL(file_per_table, srv_file_per_table, PLUGIN_VAR_NOCMDARG, @@ -22940,6 +22976,9 @@ static void innodb_remember_check_sysvar_funcs() /* remember build-in sysvar check functions */ ut_ad((MYSQL_SYSVAR_NAME(checksum_algorithm).flags & 0x1FF) == PLUGIN_VAR_ENUM); check_sysvar_enum = MYSQL_SYSVAR_NAME(checksum_algorithm).check; + + ut_ad((MYSQL_SYSVAR_NAME(flush_log_at_timeout).flags & 15) == PLUGIN_VAR_INT); + check_sysvar_int = MYSQL_SYSVAR_NAME(flush_log_at_timeout).check; } /********************************************************************//** diff --git a/storage/innobase/handler/ha_innodb.h b/storage/innobase/handler/ha_innodb.h index 82ba8bab6e6..d7f5d36a680 100644 --- a/storage/innobase/handler/ha_innodb.h +++ b/storage/innobase/handler/ha_innodb.h @@ -170,6 +170,10 @@ public: int index_last(uchar * buf); + /* Copy a cached MySQL row. If requested, also avoids + overwriting non-read columns. */ + void copy_cached_row(uchar *to_rec, const uchar *from_rec, + uint rec_length); int rnd_init(bool scan); int rnd_end(); diff --git a/storage/innobase/handler/ha_innopart.cc b/storage/innobase/handler/ha_innopart.cc deleted file mode 100644 index fb6f4b89a41..00000000000 --- a/storage/innobase/handler/ha_innopart.cc +++ /dev/null @@ -1,4264 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2014, 2017, Oracle and/or its affiliates. All rights reserved. -Copyright (c) 2016, 2017, MariaDB Corporation. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/** @file ha_innopart.cc -Code for native partitioning in InnoDB. - -Created Nov 22, 2013 Mattias Jonsson */ - -#include "univ.i" - -/* Include necessary SQL headers */ -#include <debug_sync.h> -#include <log.h> -#include <strfunc.h> -#include <sql_acl.h> -#include <sql_class.h> -#include <sql_show.h> -#include <sql_table.h> -#include <my_check_opt.h> - -/* Include necessary InnoDB headers */ -#include "btr0sea.h" -#include "dict0dict.h" -#include "dict0stats.h" -#include "lock0lock.h" -#include "row0import.h" -#include "row0merge.h" -#include "row0mysql.h" -#include "row0quiesce.h" -#include "row0sel.h" -#include "row0ins.h" -#include "row0upd.h" -#include "fsp0sysspace.h" -#include "ut0ut.h" - -#include "ha_innodb.h" -#include "ha_innopart.h" -#include "partition_info.h" -#include "key.h" - -#define INSIDE_HA_INNOPART_CC - -/* To be backwards compatible we also fold partition separator on windows. */ -#ifdef _WIN32 -static const char* part_sep = "#p#"; -static const char* sub_sep = "#sp#"; -#else -static const char* part_sep = "#P#"; -static const char* sub_sep = "#SP#"; -#endif /* _WIN32 */ - -/* Partition separator for *nix platforms */ -const char* part_sep_nix = "#P#"; -const char* sub_sep_nix = "#SP#"; - -extern char* innobase_file_format_max; - -Ha_innopart_share::Ha_innopart_share( - TABLE_SHARE* table_share) - : - Partition_share(), - m_table_parts(), - m_index_mapping(), - m_tot_parts(), - m_index_count(), - m_ref_count(), - m_table_share(table_share) -{} - -Ha_innopart_share::~Ha_innopart_share() -{ - ut_ad(m_ref_count == 0); - if (m_table_parts != NULL) { - ut_free(m_table_parts); - m_table_parts = NULL; - } - if (m_index_mapping != NULL) { - ut_free(m_index_mapping); - m_index_mapping = NULL; - } -} - -/** Fold to lower case if windows or lower_case_table_names == 1. -@param[in,out] s String to fold.*/ -void -Ha_innopart_share::partition_name_casedn_str( - char* s) -{ -#ifdef _WIN32 - innobase_casedn_str(s); -#endif -} - -/** Translate and append partition name. -@param[out] to String to write in filesystem charset -@param[in] from Name in system charset -@param[in] sep Separator -@param[in] len Max length of to buffer -@return length of written string. */ -size_t -Ha_innopart_share::append_sep_and_name( - char* to, - const char* from, - const char* sep, - size_t len) -{ - size_t ret; - size_t sep_len = strlen(sep); - - ut_ad(len > sep_len + strlen(from)); - ut_ad(to != NULL); - ut_ad(from != NULL); - ut_ad(from[0] != '\0'); - memcpy(to, sep, sep_len); - - ret = tablename_to_filename(from, to + sep_len, - len - sep_len); - - /* Don't convert to lower case for nix style name. */ - if (strcmp(sep, part_sep_nix) != 0 - && strcmp(sep, sub_sep_nix) != 0) { - - partition_name_casedn_str(to); - } - - return(ret + sep_len); -} - -/** Copy a cached MySQL row. -If requested, also avoids overwriting non-read columns. -@param[out] buf Row in MySQL format. -@param[in] cached_row Which row to copy. */ -inline -void -ha_innopart::copy_cached_row( - uchar* buf, - const uchar* cached_row) -{ - if (m_prebuilt->keep_other_fields_on_keyread) { - row_sel_copy_cached_fields_for_mysql(buf, cached_row, - m_prebuilt); - } else { - memcpy(buf, cached_row, m_rec_length); - } -} - -/** Open one partition. -@param[in] part_id Partition id to open. -@param[in] partition_name Name of internal innodb table to open. -@return false on success else true. */ -bool -Ha_innopart_share::open_one_table_part( - uint part_id, - const char* partition_name) -{ - char norm_name[FN_REFLEN]; - - normalize_table_name(norm_name, partition_name); - m_table_parts[part_id] = - ha_innobase::open_dict_table(partition_name, norm_name, - TRUE, DICT_ERR_IGNORE_NONE); - - if (m_table_parts[part_id] == NULL) { - return(true); - } - - dict_table_t *ib_table = m_table_parts[part_id]; - if ((!DICT_TF2_FLAG_IS_SET(ib_table, DICT_TF2_FTS_HAS_DOC_ID) - && m_table_share->fields - != (dict_table_get_n_user_cols(ib_table) - + dict_table_get_n_v_cols(ib_table))) - || (DICT_TF2_FLAG_IS_SET(ib_table, DICT_TF2_FTS_HAS_DOC_ID) - && (m_table_share->fields - != dict_table_get_n_user_cols(ib_table) - + dict_table_get_n_v_cols(ib_table) - 1))) { - ib::warn() << "Partition `" << get_partition_name(part_id) - << "` contains " << dict_table_get_n_user_cols(ib_table) - << " user defined columns in InnoDB, but " - << m_table_share->fields - << " columns in MySQL. Please check" - " INFORMATION_SCHEMA.INNODB_SYS_COLUMNS and " REFMAN - "innodb-troubleshooting.html for how to resolve the" - " issue."; - - /* Mark this partition as corrupted, so the drop table - or force recovery can still use it, but not others. - TODO: persist table->corrupted so it will be retained on - restart and out-of-bounds operations will see it. */ - - ib_table->corrupted = true; - dict_table_close(ib_table, FALSE, FALSE); - } - - /* TODO: To save memory, compare with first partition and reuse - the column names etc. in the internal InnoDB meta-data cache. */ - - return(false); -} - -/** Set up the virtual column template for partition table, and points -all m_table_parts[]->vc_templ to it. -@param[in] table MySQL TABLE object -@param[in] ib_table InnoDB dict_table_t -@param[in] table_name Table name (db/table_name) */ -void -Ha_innopart_share::set_v_templ( - TABLE* table, - dict_table_t* ib_table, - const char* name) -{ - ut_ad(mutex_own(&dict_sys->mutex)); - - if (ib_table->n_v_cols > 0) { - for (ulint i = 0; i < m_tot_parts; i++) { - if (m_table_parts[i]->vc_templ == NULL) { - m_table_parts[i]->vc_templ - = UT_NEW_NOKEY(dict_vcol_templ_t()); - m_table_parts[i]->vc_templ->vtempl = NULL; - } else if (m_table_parts[i]->get_ref_count() == 1) { - /* Clean and refresh the template */ - dict_free_vc_templ(m_table_parts[i]->vc_templ); - m_table_parts[i]->vc_templ->vtempl = NULL; - } - - if (m_table_parts[i]->vc_templ->vtempl == NULL) { - innobase_build_v_templ( - table, ib_table, - m_table_parts[i]->vc_templ, - NULL, true, name); - } - } - } -} - -/** Initialize the share with table and indexes per partition. -@param[in] part_info Partition info (partition names to use). -@param[in] table_name Table name (db/table_name). -@return false on success else true. */ -bool -Ha_innopart_share::open_table_parts( - partition_info* part_info, - const char* table_name) -{ - size_t table_name_len; - size_t len; - uint ib_num_index; - uint mysql_num_index; - char partition_name[FN_REFLEN]; - bool index_loaded = true; - -#ifndef DBUG_OFF - if (m_table_share->tmp_table == NO_TMP_TABLE) { - mysql_mutex_assert_owner(&m_table_share->LOCK_ha_data); - } -#endif /* DBUG_OFF */ - m_ref_count++; - if (m_table_parts != NULL) { - ut_ad(m_ref_count > 1); - ut_ad(m_tot_parts > 0); - - /* Increment dict_table_t reference count for all partitions */ - mutex_enter(&dict_sys->mutex); - for (uint i = 0; i < m_tot_parts; i++) { - dict_table_t* table = m_table_parts[i]; - table->acquire(); - ut_ad(table->get_ref_count() >= m_ref_count); - } - mutex_exit(&dict_sys->mutex); - - return(false); - } - ut_ad(m_ref_count == 1); - m_tot_parts = part_info->get_tot_partitions(); - size_t table_parts_size = sizeof(dict_table_t*) * m_tot_parts; - m_table_parts = static_cast<dict_table_t**>( - ut_zalloc(table_parts_size, mem_key_partitioning)); - if (m_table_parts == NULL) { - m_ref_count--; - return(true); - } - - /* Set up the array over all table partitions. */ - table_name_len = strlen(table_name); - memcpy(partition_name, table_name, table_name_len); - List_iterator<partition_element> - part_it(part_info->partitions); - partition_element* part_elem; - uint i = 0; - - while ((part_elem = part_it++)) { - len = append_sep_and_name( - partition_name + table_name_len, - part_elem->partition_name, - part_sep_nix, - FN_REFLEN - table_name_len); - if (part_info->is_sub_partitioned()) { - List_iterator<partition_element> - sub_it(part_elem->subpartitions); - partition_element* sub_elem; - while ((sub_elem = sub_it++)) { - append_sep_and_name( - partition_name - + table_name_len + len, - sub_elem->partition_name, - sub_sep_nix, - FN_REFLEN - table_name_len - len); - if (open_one_table_part(i, partition_name)) { - goto err; - } - i++; - } - } else { - if (open_one_table_part(i, partition_name)) { - goto err; - } - i++; - } - } - ut_ad(i == m_tot_parts); - - /* Create the mapping of mysql index number to innodb indexes. */ - - ib_num_index = (uint) UT_LIST_GET_LEN(m_table_parts[0]->indexes); - mysql_num_index = part_info->table->s->keys; - - /* If there exists inconsistency between MySQL and InnoDB dictionary - (metadata) information, the number of index defined in MySQL - could exceed that in InnoDB, do not build index translation - table in such case. */ - - if (ib_num_index < mysql_num_index) { - ut_ad(0); - goto err; - } - - if (mysql_num_index != 0) { - size_t alloc_size = mysql_num_index * m_tot_parts - * sizeof(*m_index_mapping); - m_index_mapping = static_cast<dict_index_t**>( - ut_zalloc(alloc_size, mem_key_partitioning)); - if (m_index_mapping == NULL) { - - /* Report an error if index_mapping continues to be - NULL and mysql_num_index is a non-zero value. */ - - ib::error() << "Failed to allocate memory for" - " index translation table. Number of" - " Index:" << mysql_num_index; - goto err; - } - } - - /* For each index in the mysql key_info array, fetch its - corresponding InnoDB index pointer into index_mapping - array. */ - - for (ulint idx = 0; idx < mysql_num_index; idx++) { - for (ulint part = 0; part < m_tot_parts; part++) { - ulint count = part * mysql_num_index + idx; - - /* Fetch index pointers into index_mapping according - to mysql index sequence. */ - - m_index_mapping[count] = dict_table_get_index_on_name( - m_table_parts[part], - part_info->table->key_info[idx].name); - - if (m_index_mapping[count] == NULL) { - ib::error() << "Cannot find index `" - << part_info->table->key_info[idx].name - << "` in InnoDB index dictionary" - " partition `" - << get_partition_name(part) << "`."; - index_loaded = false; - break; - } - - /* Double check fetched index has the same - column info as those in mysql key_info. */ - - if (!innobase_match_index_columns( - &part_info->table->key_info[idx], - m_index_mapping[count])) { - ib::error() << "Found index `" - << part_info->table->key_info[idx].name - << "` whose column info does not match" - " that of MySQL."; - index_loaded = false; - break; - } - } - } - if (!index_loaded && m_index_mapping != NULL) { - ut_free(m_index_mapping); - m_index_mapping = NULL; - } - - /* Successfully built the translation table. */ - m_index_count = mysql_num_index; - - return(false); -err: - close_table_parts(); - - return(true); -} - -/** Close all partitions. */ -void -Ha_innopart_share::close_table_parts() -{ -#ifndef DBUG_OFF - if (m_table_share->tmp_table == NO_TMP_TABLE) { - mysql_mutex_assert_owner(&m_table_share->LOCK_ha_data); - } -#endif /* DBUG_OFF */ - m_ref_count--; - if (m_ref_count != 0) { - - /* Decrement dict_table_t reference count for all partitions */ - mutex_enter(&dict_sys->mutex); - for (uint i = 0; i < m_tot_parts; i++) { - dict_table_t* table = m_table_parts[i]; - table->release(); - ut_ad(table->get_ref_count() >= m_ref_count); - } - mutex_exit(&dict_sys->mutex); - - return; - } - - /* Last instance closed, close all table partitions and - free the memory. */ - - mutex_enter(&dict_sys->mutex); - if (m_table_parts != NULL) { - for (uint i = 0; i < m_tot_parts; i++) { - if (m_table_parts[i] != NULL) { - dict_table_close(m_table_parts[i], TRUE, TRUE); - } - } - ut_free(m_table_parts); - m_table_parts = NULL; - } - mutex_exit(&dict_sys->mutex); - if (m_index_mapping != NULL) { - ut_free(m_index_mapping); - m_index_mapping = NULL; - } - - m_tot_parts = 0; - m_index_count = 0; -} - -/** Get index. -Find the index of the specified partition and key number. -@param[in] part_id Partition number. -@param[in] keynr Key number. -@return Index pointer or NULL. */ -inline -dict_index_t* -Ha_innopart_share::get_index( - uint part_id, - uint keynr) -{ - ut_a(part_id < m_tot_parts); - ut_ad(keynr < m_index_count || keynr == MAX_KEY); - if (m_index_mapping == NULL - || keynr >= m_index_count) { - - if (keynr == MAX_KEY) { - return(dict_table_get_first_index( - get_table_part(part_id))); - } - return(NULL); - } - return(m_index_mapping[m_index_count * part_id + keynr]); -} - -/** Get MySQL key number corresponding to InnoDB index. -Calculates the key number used inside MySQL for an Innobase index. We will -first check the "index translation table" for a match of the index to get -the index number. If there does not exist an "index translation table", -or not able to find the index in the translation table, then we will fall back -to the traditional way of looping through dict_index_t list to find a -match. In this case, we have to take into account if we generated a -default clustered index for the table -@param[in] part_id Partition the index belongs to. -@param[in] index Index to return MySQL key number for. -@return the key number used inside MySQL or UINT_MAX if key is not found. */ -inline -uint -Ha_innopart_share::get_mysql_key( - uint part_id, - const dict_index_t* index) -{ - ut_ad(index != NULL); - ut_ad(m_index_mapping != NULL); - ut_ad(m_tot_parts); - - if (index != NULL && m_index_mapping != NULL) { - uint start; - uint end; - - if (part_id < m_tot_parts) { - start = part_id * m_index_count; - end = start + m_index_count; - } else { - start = 0; - end = m_tot_parts * m_index_count; - } - for (uint i = start; i < end; i++) { - if (m_index_mapping[i] == index) { - return(i % m_index_count); - } - } - - /* Print an error message if we cannot find the index - in the "index translation table". */ - - if (index->is_committed()) { - ib::error() << "Cannot find index " - << index->name - << " in InnoDB index translation table."; - } - } - - return(UINT_MAX); -} - -/** Helper function for set bit in bitmap. -@param[in,out] buf Bitmap buffer to update bit in. -@param[in] bit_pos Bit number (index starts at 0). */ -static -inline -void -set_bit( - byte* buf, - size_t pos) -{ - buf[pos/8] |= (0x1 << (pos & 0x7)); -} - -/** Helper function for clear bit in bitmap. -@param[in,out] buf Bitmap buffer to update bit in. -@param[in] bit_pos Bit number (index starts at 0). */ -static -inline -void -clear_bit( - byte* buf, - size_t pos) -{ - buf[pos/8] &= ~(0x1 << (pos & 0x7)); -} - -/** Helper function for get bit in bitmap. -@param[in,out] buf Bitmap buffer. -@param[in] bit_pos Bit number (index starts at 0). -@return byte set to 0x0 or 0x1. -@retval 0x0 bit not set. -@retval 0x1 bet set. */ -static -inline -byte -get_bit( - byte* buf, - size_t pos) -{ - return((buf[pos/8] >> (pos & 0x7)) & 0x1); -} - -/** Helper class for encapsulating new/altered partitions during -ADD/REORG/... PARTITION. */ -class Altered_partitions -{ -private: - /** New partitions during ADD/REORG/... PARTITION. */ - dict_table_t** m_new_table_parts; - - /** Insert nodes per partition. */ - ins_node_t** m_ins_nodes; - - /** sql_stat_start per partition. */ - byte* m_sql_stat_start; - - /** Trx id per partition. */ - trx_id_t* m_trx_ids; - - /** Number of new partitions. */ - size_t m_num_new_parts; - - /** Only need to create the partitions (no open/lock). */ - bool m_only_create; - -public: - Altered_partitions( - uint n_partitions, - bool only_create); - - ~Altered_partitions(); - - bool - initialize(); - - bool - only_create() const - { - return(m_only_create); - } - - /** Set currently used partition. - @param[in] new_part_id Partition id to set. - @param[in] part InnoDB table to use. */ - inline - void - set_part( - ulint new_part_id, - dict_table_t* part) - { - ut_ad(m_new_table_parts[new_part_id] == NULL); - m_new_table_parts[new_part_id] = part; - set_bit(m_sql_stat_start, new_part_id); - } - - /** Get lower level InnoDB table for partition. - @param[in] part_id Partition id. - @return Lower level InnoDB table for the partition id. */ - inline - dict_table_t* - part( - uint part_id) const - { - ut_ad(part_id < m_num_new_parts); - return(m_new_table_parts[part_id]); - } - - /** Set up prebuilt for using a specified partition. - @param[in] prebuilt Prebuilt to update. - @param[in] new_part_id Partition to use. */ - inline - void - get_prebuilt( - row_prebuilt_t* prebuilt, - uint new_part_id) const - { - ut_ad(m_new_table_parts[new_part_id]); - prebuilt->table = m_new_table_parts[new_part_id]; - prebuilt->ins_node = m_ins_nodes[new_part_id]; - prebuilt->trx_id = m_trx_ids[new_part_id]; - prebuilt->sql_stat_start = get_bit(m_sql_stat_start, - new_part_id); - } - - /** Update cached values for a partition from prebuilt. - @param[in] prebuilt Prebuilt to copy from. - @param[in] new_part_id Partition id to copy. */ - inline - void - set_from_prebuilt( - row_prebuilt_t* prebuilt, - uint new_part_id) - { - ut_ad(m_new_table_parts[new_part_id] == prebuilt->table); - m_ins_nodes[new_part_id] = prebuilt->ins_node; - m_trx_ids[new_part_id] = prebuilt->trx_id; - if (prebuilt->sql_stat_start == 0) { - clear_bit(m_sql_stat_start, new_part_id); - } - } -}; - -Altered_partitions::Altered_partitions( - uint n_partitions, - bool only_create) - : - m_new_table_parts(), - m_ins_nodes(), - m_sql_stat_start(), - m_trx_ids(), - m_num_new_parts(n_partitions), - m_only_create(only_create) - {} - -Altered_partitions::~Altered_partitions() -{ - if (m_new_table_parts != NULL) { - for (ulint i = 0; i < m_num_new_parts; i++) { - if (m_new_table_parts[i] != NULL) { - dict_table_close(m_new_table_parts[i], - false, true); - } - } - ut_free(m_new_table_parts); - m_new_table_parts = NULL; - } - if (m_ins_nodes != NULL) { - for (ulint i = 0; i < m_num_new_parts; i++) { - if (m_ins_nodes[i] != NULL) { - ins_node_t* ins = m_ins_nodes[i]; - ut_ad(ins->select == NULL); - que_graph_free_recursive(ins->select); - ins->select = NULL; - if (ins->entry_sys_heap != NULL) { - mem_heap_free(ins->entry_sys_heap); - ins->entry_sys_heap = NULL; - } - } - } - ut_free(m_ins_nodes); - m_ins_nodes = NULL; - } - if (m_sql_stat_start != NULL) { - ut_free(m_sql_stat_start); - m_sql_stat_start = NULL; - } - if (m_trx_ids != NULL) { - ut_free(m_trx_ids); - m_trx_ids = NULL; - } -} - -/** Initialize the object. -@return false on success else true. */ -bool -Altered_partitions::initialize() -{ - size_t alloc_size = sizeof(*m_new_table_parts) * m_num_new_parts; - m_new_table_parts = static_cast<dict_table_t**>( - ut_zalloc(alloc_size, mem_key_partitioning)); - if (m_new_table_parts == NULL) { - return(true); - } - - alloc_size = sizeof(*m_ins_nodes) * m_num_new_parts; - m_ins_nodes = static_cast<ins_node_t**>( - ut_zalloc(alloc_size, mem_key_partitioning)); - if (m_ins_nodes == NULL) { - ut_free(m_new_table_parts); - m_new_table_parts = NULL; - return(true); - } - - alloc_size = sizeof(*m_sql_stat_start) - * UT_BITS_IN_BYTES(m_num_new_parts); - m_sql_stat_start = static_cast<byte*>( - ut_zalloc(alloc_size, mem_key_partitioning)); - if (m_sql_stat_start == NULL) { - ut_free(m_new_table_parts); - m_new_table_parts = NULL; - ut_free(m_ins_nodes); - m_ins_nodes = NULL; - return(true); - } - - alloc_size = sizeof(*m_trx_ids) * m_num_new_parts; - m_trx_ids = static_cast<trx_id_t*>( - ut_zalloc(alloc_size, mem_key_partitioning)); - if (m_trx_ids == NULL) { - ut_free(m_new_table_parts); - m_new_table_parts = NULL; - ut_free(m_ins_nodes); - m_ins_nodes = NULL; - ut_free(m_sql_stat_start); - m_sql_stat_start = NULL; - return(true); - } - - return(false); -} - -/** Construct ha_innopart handler. -@param[in] hton Handlerton. -@param[in] table_arg MySQL Table. -@return a new ha_innopart handler. */ -ha_innopart::ha_innopart( - handlerton* hton, - TABLE_SHARE* table_arg) - : - ha_innobase(hton, table_arg), - Partition_helper(this), - m_ins_node_parts(), - m_upd_node_parts(), - m_blob_heap_parts(), - m_trx_id_parts(), - m_row_read_type_parts(), - m_sql_stat_start_parts(), - m_pcur(), - m_clust_pcur(), - m_new_partitions() -{ - m_int_table_flags &= ~(HA_INNOPART_DISABLED_TABLE_FLAGS); - - /* INNOBASE_SHARE is not used in ha_innopart. - This also flags for ha_innobase that it is a partitioned table. - And make it impossible to use legacy share functionality. */ - - m_share = NULL; -} - -/** Destruct ha_innopart handler. */ -ha_innopart::~ha_innopart() -{} - -/** Returned supported alter table flags. -@param[in] flags Flags to support. -@return Supported flags. */ -uint -ha_innopart::alter_table_flags( - uint flags) -{ - return(HA_PARTITION_FUNCTION_SUPPORTED | HA_FAST_CHANGE_PARTITION); -} - -/** Set the autoinc column max value. -This should only be called once from ha_innobase::open(). -Therefore there's no need for a covering lock. -@param[in] no_lock Ignored! -@return 0 for success or error code. */ -inline -int -ha_innopart::initialize_auto_increment( - bool /* no_lock */) -{ - int error = 0; - ulonglong auto_inc = 0; - const Field* field = table->found_next_number_field; - -#ifndef DBUG_OFF - if (table_share->tmp_table == NO_TMP_TABLE) - { - mysql_mutex_assert_owner(m_part_share->auto_inc_mutex); - } -#endif - - /* Since a table can already be "open" in InnoDB's internal - data dictionary, we only init the autoinc counter once, the - first time the table is loaded. We can safely reuse the - autoinc value from a previous MySQL open. */ - - if (m_part_share->auto_inc_initialized) { - /* Already initialized, nothing to do. */ - return(0); - } - - if (field == NULL) { - ib::info() << "Unable to determine the AUTOINC column name"; - } - - if (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE) { - /* If the recovery level is set so high that writes - are disabled we force the AUTOINC counter to 0 - value effectively disabling writes to the table. - Secondly, we avoid reading the table in case the read - results in failure due to a corrupted table/index. - - We will not return an error to the client, so that the - tables can be dumped with minimal hassle. If an error - were returned in this case, the first attempt to read - the table would fail and subsequent SELECTs would succeed. */ - - } else if (field == NULL) { - /* This is a far more serious error, best to avoid - opening the table and return failure. */ - - my_error(ER_AUTOINC_READ_FAILED, MYF(0)); - error = HA_ERR_AUTOINC_READ_FAILED; - } else { - ib_uint64_t col_max_value = field->get_max_int_value(); - - update_thd(ha_thd()); - - for (uint part = 0; part < m_tot_parts; part++) { - dict_table_t* ib_table - = m_part_share->get_table_part(part); - dict_table_autoinc_lock(ib_table); - ut_ad(ib_table->persistent_autoinc); - ib_uint64_t read_auto_inc - = dict_table_autoinc_read(ib_table); - if (read_auto_inc == 0) { - read_auto_inc = btr_read_autoinc( - dict_table_get_first_index(ib_table)); - - /* At the this stage we do not know the - increment nor the offset, - so use a default increment of 1. */ - - read_auto_inc = innobase_next_autoinc( - read_auto_inc, 1, 1, 0, col_max_value); - dict_table_autoinc_initialize(ib_table, - read_auto_inc); - } - set_if_bigger(auto_inc, read_auto_inc); - dict_table_autoinc_unlock(ib_table); - } - } - -done: - m_part_share->next_auto_inc_val = auto_inc; - m_part_share->auto_inc_initialized = true; - return(error); -} - -/** Opens a partitioned InnoDB table. -Initializes needed data and opens the table which already exists -in an InnoDB database. -@param[in] name Table name (db/tablename) -@param[in] mode Not used -@param[in] test_if_locked Not used -@return 0 or error number. */ -int -ha_innopart::open( - const char* name, - int /*mode*/, - uint /*test_if_locked*/) -{ - dict_table_t* ib_table; - char norm_name[FN_REFLEN]; - - DBUG_ENTER("ha_innopart::open"); - - ut_ad(table); - if (m_part_info == NULL) { - /* Must be during ::clone()! */ - ut_ad(table->part_info != NULL); - m_part_info = table->part_info; - } - - /* Under some cases MySQL seems to call this function while - holding search latch(es). This breaks the latching order as - we acquire dict_sys->mutex below and leads to a deadlock. */ - - normalize_table_name(norm_name, name); - - m_user_thd = NULL; - - /* Get the Ha_innopart_share from the TABLE_SHARE. */ - lock_shared_ha_data(); - m_part_share = static_cast<Ha_innopart_share*>(get_ha_share_ptr()); - if (m_part_share == NULL) { - m_part_share = new (std::nothrow) - Ha_innopart_share(table_share); - if (m_part_share == NULL) { -share_error: - unlock_shared_ha_data(); - DBUG_RETURN(HA_ERR_INTERNAL_ERROR); - } - set_ha_share_ptr(static_cast<Handler_share*>(m_part_share)); - } - if (m_part_share->open_table_parts(m_part_info, name) - || m_part_share->populate_partition_name_hash(m_part_info)) { - goto share_error; - } - if (m_part_share->auto_inc_mutex == NULL - && table->found_next_number_field != NULL) { - if (m_part_share->init_auto_inc_mutex(table_share)) { - goto share_error; - } - } - unlock_shared_ha_data(); - - /* Will be allocated if it is needed in ::update_row(). */ - m_upd_buf = NULL; - m_upd_buf_size = 0; - - /* Get pointer to a table object in InnoDB dictionary cache. */ - ib_table = m_part_share->get_table_part(0); - - m_pcur_parts = NULL; - m_clust_pcur_parts = NULL; - m_pcur_map = NULL; - - /* TODO: Handle mismatching #P# vs #p# in upgrading to new DD instead! - See bug#58406, The problem exists when moving partitioned tables - between Windows and Unix-like platforms. InnoDB always folds the name - on windows, partitioning never folds partition (and #P# separator). - I.e. non of it follows lower_case_table_names correctly :( */ - - if (open_partitioning(m_part_share)) - { - close(); - DBUG_RETURN(HA_ERR_INITIALIZATION); - } - - /* Currently we track statistics for all partitions, but for - the secondary indexes we only use the biggest partition. */ - - for (uint part_id = 0; part_id < m_tot_parts; part_id++) { - innobase_copy_frm_flags_from_table_share( - m_part_share->get_table_part(part_id), - table->s); - dict_stats_init(m_part_share->get_table_part(part_id)); - } - - MONITOR_INC(MONITOR_TABLE_OPEN); - - bool no_tablespace; - THD* thd = ha_thd(); - - /* TODO: Should we do this check for every partition during ::open()? */ - /* TODO: refactor this in ha_innobase so it can increase code reuse. */ - if (dict_table_is_discarded(ib_table)) { - - ib_senderrf(thd, - IB_LOG_LEVEL_WARN, ER_TABLESPACE_DISCARDED, - table->s->table_name.str); - - /* Allow an open because a proper DISCARD should have set - all the flags and index root page numbers to FIL_NULL that - should prevent any DML from running but it should allow DDL - operations. */ - - no_tablespace = false; - - } else if (ib_table->ibd_file_missing) { - - ib_senderrf( - thd, IB_LOG_LEVEL_WARN, - ER_TABLESPACE_MISSING, norm_name); - - /* This means we have no idea what happened to the tablespace - file, best to play it safe. */ - - no_tablespace = true; - } else { - no_tablespace = false; - } - - if (!thd_tablespace_op(thd) && no_tablespace) { - set_my_errno(ENOENT); - - lock_shared_ha_data(); - m_part_share->close_table_parts(); - unlock_shared_ha_data(); - m_part_share = NULL; - - DBUG_RETURN(HA_ERR_NO_SUCH_TABLE); - } - - m_prebuilt = row_create_prebuilt(ib_table, table->s->reclength); - - m_prebuilt->default_rec = table->s->default_values; - ut_ad(m_prebuilt->default_rec); - - DBUG_ASSERT(table != NULL); - m_prebuilt->m_mysql_table = table; - - if (ib_table->n_v_cols > 0) { - mutex_enter(&dict_sys->mutex); - m_part_share->set_v_templ(table, ib_table, name); - mutex_exit(&dict_sys->mutex); - } - - /* Looks like MySQL-3.23 sometimes has primary key number != 0. */ - m_primary_key = table->s->primary_key; - key_used_on_scan = m_primary_key; - - /* Allocate a buffer for a 'row reference'. A row reference is - a string of bytes of length ref_length which uniquely specifies - a row in our table. Note that MySQL may also compare two row - references for equality by doing a simple memcmp on the strings - of length ref_length! */ - - if (!row_table_got_default_clust_index(ib_table)) { - - m_prebuilt->clust_index_was_generated = FALSE; - - if (UNIV_UNLIKELY(m_primary_key >= MAX_KEY)) { - table_name_t table_name; - table_name.m_name = const_cast<char*>(name); - ib::error() << "Table " << table_name - << " has a primary key in InnoDB data" - " dictionary, but not in MySQL!"; - - /* This mismatch could cause further problems - if not attended, bring this to the user's attention - by printing a warning in addition to log a message - in the errorlog. */ - - push_warning_printf(thd, Sql_condition::SL_WARNING, - ER_NO_SUCH_INDEX, - "Table %s has a" - " primary key in InnoDB data" - " dictionary, but not in" - " MySQL!", name); - - /* If m_primary_key >= MAX_KEY, its (m_primary_key) - value could be out of bound if continue to index - into key_info[] array. Find InnoDB primary index, - and assign its key_length to ref_length. - In addition, since MySQL indexes are sorted starting - with primary index, unique index etc., initialize - ref_length to the first index key length in - case we fail to find InnoDB cluster index. - - Please note, this will not resolve the primary - index mismatch problem, other side effects are - possible if users continue to use the table. - However, we allow this table to be opened so - that user can adopt necessary measures for the - mismatch while still being accessible to the table - date. */ - - if (table->key_info == NULL) { - ut_ad(table->s->keys == 0); - ref_length = 0; - } else { - ref_length = table->key_info[0].key_length; - } - - /* Find corresponding cluster index - key length in MySQL's key_info[] array. */ - - for (uint i = 0; i < table->s->keys; i++) { - dict_index_t* index; - index = innopart_get_index(0, i); - if (dict_index_is_clust(index)) { - ref_length = - table->key_info[i].key_length; - } - } - ut_a(ref_length); - ref_length += PARTITION_BYTES_IN_POS; - } else { - /* MySQL allocates the buffer for ref. - key_info->key_length includes space for all key - columns + one byte for each column that may be - NULL. ref_length must be as exact as possible to - save space, because all row reference buffers are - allocated based on ref_length. */ - - ref_length = table->key_info[m_primary_key].key_length; - ref_length += PARTITION_BYTES_IN_POS; - } - } else { - if (m_primary_key != MAX_KEY) { - table_name_t table_name; - table_name.m_name = const_cast<char*>(name); - ib::error() << "Table " << table_name - << " has no primary key in InnoDB data" - " dictionary, but has one in MySQL! If you" - " created the table with a MySQL version <" - " 3.23.54 and did not define a primary key," - " but defined a unique key with all non-NULL" - " columns, then MySQL internally treats that" - " key as the primary key. You can fix this" - " error by dump + DROP + CREATE + reimport" - " of the table."; - - /* This mismatch could cause further problems - if not attended, bring this to the user attention - by printing a warning in addition to log a message - in the errorlog. */ - - push_warning_printf(thd, Sql_condition::SL_WARNING, - ER_NO_SUCH_INDEX, - "InnoDB: Table %s has no" - " primary key in InnoDB data" - " dictionary, but has one in" - " MySQL!", name); - } - - m_prebuilt->clust_index_was_generated = TRUE; - - ref_length = DATA_ROW_ID_LEN; - ref_length += PARTITION_BYTES_IN_POS; - - /* If we automatically created the clustered index, then - MySQL does not know about it, and MySQL must NOT be aware - of the index used on scan, to make it avoid checking if we - update the column of the index. That is why we assert below - that key_used_on_scan is the undefined value MAX_KEY. - The column is the row id in the automatical generation case, - and it will never be updated anyway. */ - - if (key_used_on_scan != MAX_KEY) { - table_name_t table_name; - table_name.m_name = const_cast<char*>(name); - ib::warn() << "Table " << table_name - << " key_used_on_scan is " - << key_used_on_scan << " even though there is" - " no primary key inside InnoDB."; - } - } - - /* Index block size in InnoDB: used by MySQL in query optimization. */ - stats.block_size = UNIV_PAGE_SIZE; - - if (m_prebuilt->table != NULL) { - /* We update the highest file format in the system table - space, if this table has higher file format setting. */ - - trx_sys_file_format_max_upgrade( - (const char**) &innobase_file_format_max, - dict_table_get_format(m_prebuilt->table)); - } - - /* Only if the table has an AUTOINC column. */ - if (m_prebuilt->table != NULL - && !m_prebuilt->table->ibd_file_missing - && table->found_next_number_field != NULL) { - int error; - - /* Since a table can already be "open" in InnoDB's internal - data dictionary, we only init the autoinc counter once, the - first time the table is loaded, - see ha_innopart::initialize_auto_increment. - We can safely reuse the autoinc value from a previous MySQL - open. */ - - lock_auto_increment(); - error = initialize_auto_increment(false); - unlock_auto_increment(); - if (error != 0) { - close(); - DBUG_RETURN(error); - } - } - -#ifdef HA_INNOPART_SUPPORTS_FULLTEXT - /* Set plugin parser for fulltext index. */ - for (uint i = 0; i < table->s->keys; i++) { - if (table->key_info[i].flags & HA_USES_PARSER) { - dict_index_t* index = innobase_get_index(i); - plugin_ref parser = table->key_info[i].parser; - - ut_ad(index->type & DICT_FTS); - index->parser = - static_cast<st_mysql_ftparser *>( - plugin_decl(parser)->info); - - DBUG_EXECUTE_IF("fts_instrument_use_default_parser", - index->parser = &fts_default_parser;); - } - } -#endif /* HA_INNOPART_SUPPORTS_FULLTEXT */ - - size_t alloc_size = sizeof(*m_ins_node_parts) * m_tot_parts; - m_ins_node_parts = static_cast<ins_node_t**>( - ut_zalloc(alloc_size, mem_key_partitioning)); - - alloc_size = sizeof(*m_upd_node_parts) * m_tot_parts; - m_upd_node_parts = static_cast<upd_node_t**>( - ut_zalloc(alloc_size, mem_key_partitioning)); - - alloc_blob_heap_array(); - - alloc_size = sizeof(*m_trx_id_parts) * m_tot_parts; - m_trx_id_parts = static_cast<trx_id_t*>( - ut_zalloc(alloc_size, mem_key_partitioning)); - - alloc_size = sizeof(*m_row_read_type_parts) * m_tot_parts; - m_row_read_type_parts = static_cast<ulint*>( - ut_zalloc(alloc_size, mem_key_partitioning)); - - alloc_size = UT_BITS_IN_BYTES(m_tot_parts); - m_sql_stat_start_parts = static_cast<uchar*>( - ut_zalloc(alloc_size, mem_key_partitioning)); - if (m_ins_node_parts == NULL - || m_upd_node_parts == NULL - || m_blob_heap_parts == NULL - || m_trx_id_parts == NULL - || m_row_read_type_parts == NULL - || m_sql_stat_start_parts == NULL) { - close(); // Frees all the above. - DBUG_RETURN(HA_ERR_OUT_OF_MEM); - } - info(HA_STATUS_NO_LOCK | HA_STATUS_VARIABLE | HA_STATUS_CONST); - - DBUG_RETURN(0); -} - -/** Get a cloned ha_innopart handler. -@param[in] name Table name. -@param[in] mem_root MySQL mem_root to use. -@return new ha_innopart handler. */ -handler* -ha_innopart::clone( - const char* name, - MEM_ROOT* mem_root) -{ - ha_innopart* new_handler; - - DBUG_ENTER("ha_innopart::clone"); - - new_handler = dynamic_cast<ha_innopart*>(handler::clone(name, - mem_root)); - if (new_handler != NULL) { - ut_ad(new_handler->m_prebuilt != NULL); - - new_handler->m_prebuilt->select_lock_type = - m_prebuilt->select_lock_type; - } - - DBUG_RETURN(new_handler); -} - -/** Clear used ins_nodes and upd_nodes. */ -void ha_innopart::clear_ins_upd_nodes() -{ - /* Free memory from insert nodes. */ - if (m_ins_node_parts != NULL) { - for (uint i = 0; i < m_tot_parts; i++) { - if (m_ins_node_parts[i] != NULL) { - ins_node_t* ins = m_ins_node_parts[i]; - if (ins->select != NULL) { - que_graph_free_recursive(ins->select); - ins->select = NULL; - } - - if (ins->entry_sys_heap != NULL) { - mem_heap_free(ins->entry_sys_heap); - ins->entry_sys_heap = NULL; - } - m_ins_node_parts[i] = NULL; - } - } - } - - /* Free memory from update nodes. */ - if (m_upd_node_parts != NULL) { - for (uint i = 0; i < m_tot_parts; i++) { - if (m_upd_node_parts[i] != NULL) { - upd_node_t* upd = m_upd_node_parts[i]; - if (upd->cascade_top) { - mem_heap_free(upd->cascade_heap); - upd->cascade_top = false; - upd->cascade_heap = NULL; - } - if (upd->in_mysql_interface) { - btr_pcur_free_for_mysql(upd->pcur); - upd->in_mysql_interface = FALSE; - } - - if (upd->select != NULL) { - que_graph_free_recursive(upd->select); - upd->select = NULL; - } - if (upd->heap != NULL) { - mem_heap_free(upd->heap); - upd->heap = NULL; - } - m_upd_node_parts[i] = NULL; - } - } - } -} - -/** Closes a handle to an InnoDB table. -@return 0 */ -int -ha_innopart::close() -{ - DBUG_ENTER("ha_innopart::close"); - - ut_ad(m_pcur_parts == NULL); - ut_ad(m_clust_pcur_parts == NULL); - close_partitioning(); - - ut_ad(m_part_share != NULL); - if (m_part_share != NULL) { - lock_shared_ha_data(); - m_part_share->close_table_parts(); - unlock_shared_ha_data(); - m_part_share = NULL; - } - clear_ins_upd_nodes(); - free_blob_heap_array(); - - /* Prevent double close of m_prebuilt->table. The real one was done - done in m_part_share->close_table_parts(). */ - m_prebuilt->table = NULL; - row_prebuilt_free(m_prebuilt, FALSE); - - if (m_upd_buf != NULL) { - ut_ad(m_upd_buf_size != 0); - /* Allocated with my_malloc! */ - my_free(m_upd_buf); - m_upd_buf = NULL; - m_upd_buf_size = 0; - } - - if (m_ins_node_parts != NULL) { - ut_free(m_ins_node_parts); - m_ins_node_parts = NULL; - } - if (m_upd_node_parts != NULL) { - ut_free(m_upd_node_parts); - m_upd_node_parts = NULL; - } - if (m_trx_id_parts != NULL) { - ut_free(m_trx_id_parts); - m_trx_id_parts = NULL; - } - if (m_row_read_type_parts != NULL) { - ut_free(m_row_read_type_parts); - m_row_read_type_parts = NULL; - } - if (m_sql_stat_start_parts != NULL) { - ut_free(m_sql_stat_start_parts); - m_sql_stat_start_parts = NULL; - } - - MONITOR_INC(MONITOR_TABLE_CLOSE); - - /* Tell InnoDB server that there might be work for - utility threads: */ - - srv_active_wake_master_thread(); - - DBUG_RETURN(0); -} - -/** Change active partition. -Copies needed info into m_prebuilt from the partition specific memory. -@param[in] part_id Partition to set as active. */ -void -ha_innopart::set_partition( - uint part_id) -{ - DBUG_ENTER("ha_innopart::set_partition"); - - DBUG_PRINT("ha_innopart", ("partition id: %u", part_id)); - - if (part_id >= m_tot_parts) { - ut_ad(0); - DBUG_VOID_RETURN; - } - if (m_pcur_parts != NULL) { - m_prebuilt->pcur = &m_pcur_parts[m_pcur_map[part_id]]; - } - if (m_clust_pcur_parts != NULL) { - m_prebuilt->clust_pcur = - &m_clust_pcur_parts[m_pcur_map[part_id]]; - } - m_prebuilt->ins_node = m_ins_node_parts[part_id]; - m_prebuilt->upd_node = m_upd_node_parts[part_id]; - - /* For unordered scan and table scan, use blob_heap from first - partition as we need exactly one blob. */ - m_prebuilt->blob_heap = m_blob_heap_parts[m_ordered ? part_id : 0]; - -#ifdef UNIV_DEBUG - if (m_prebuilt->blob_heap != NULL) { - DBUG_PRINT("ha_innopart", ("validating blob_heap: %p", - m_prebuilt->blob_heap)); - mem_heap_validate(m_prebuilt->blob_heap); - } -#endif - - m_prebuilt->trx_id = m_trx_id_parts[part_id]; - m_prebuilt->row_read_type = m_row_read_type_parts[part_id]; - m_prebuilt->sql_stat_start = get_bit(m_sql_stat_start_parts, part_id); - m_prebuilt->table = m_part_share->get_table_part(part_id); - m_prebuilt->index = innopart_get_index(part_id, active_index); - - DBUG_VOID_RETURN; -} - -/** Update active partition. -Copies needed info from m_prebuilt into the partition specific memory. -@param[in] part_id Partition to set as active. */ -void -ha_innopart::update_partition( - uint part_id) -{ - DBUG_ENTER("ha_innopart::update_partition"); - DBUG_PRINT("ha_innopart", ("partition id: %u", part_id)); - - if (part_id >= m_tot_parts) { - ut_ad(0); - DBUG_VOID_RETURN; - } - m_ins_node_parts[part_id] = m_prebuilt->ins_node; - m_upd_node_parts[part_id] = m_prebuilt->upd_node; - -#ifdef UNIV_DEBUG - if (m_prebuilt->blob_heap != NULL) { - DBUG_PRINT("ha_innopart", ("validating blob_heap: %p", - m_prebuilt->blob_heap)); - mem_heap_validate(m_prebuilt->blob_heap); - } -#endif - - /* For unordered scan and table scan, use blob_heap from first - partition as we need exactly one blob anytime. */ - m_blob_heap_parts[m_ordered ? part_id : 0] = m_prebuilt->blob_heap; - - m_trx_id_parts[part_id] = m_prebuilt->trx_id; - m_row_read_type_parts[part_id] = m_prebuilt->row_read_type; - if (m_prebuilt->sql_stat_start == 0) { - clear_bit(m_sql_stat_start_parts, part_id); - } - m_last_part = part_id; - DBUG_VOID_RETURN; -} - -/** Was the last returned row semi consistent read. -In an UPDATE or DELETE, if the row under the cursor was locked by -another transaction, and the engine used an optimistic read of the last -committed row value under the cursor, then the engine returns 1 from -this function. MySQL must NOT try to update this optimistic value. If -the optimistic value does not match the WHERE condition, MySQL can -decide to skip over this row. This can be used to avoid unnecessary -lock waits. - -If this method returns true, it will also signal the storage -engine that the next read will be a locking re-read of the row. -@see handler.h and row0mysql.h -@return true if last read was semi consistent else false. */ -bool -ha_innopart::was_semi_consistent_read() -{ - return(m_row_read_type_parts[m_last_part] - == ROW_READ_DID_SEMI_CONSISTENT); -} - -/** Try semi consistent read. -Tell the engine whether it should avoid unnecessary lock waits. -If yes, in an UPDATE or DELETE, if the row under the cursor was locked -by another transaction, the engine may try an optimistic read of -the last committed row value under the cursor. -@see handler.h and row0mysql.h -@param[in] yes Should semi-consistent read be used. */ -void -ha_innopart::try_semi_consistent_read( - bool yes) -{ - ha_innobase::try_semi_consistent_read(yes); - for (uint i = m_part_info->get_first_used_partition(); - i < m_tot_parts; - i = m_part_info->get_next_used_partition(i)) { - - m_row_read_type_parts[i] = m_prebuilt->row_read_type; - } -} - -/** Removes a lock on a row. -Removes a new lock set on a row, if it was not read optimistically. -This can be called after a row has been read in the processing of -an UPDATE or a DELETE query. @see ha_innobase::unlock_row(). */ -void -ha_innopart::unlock_row() -{ - ut_ad(m_last_part < m_tot_parts); - set_partition(m_last_part); - ha_innobase::unlock_row(); - update_partition(m_last_part); -} - -/** Write a row in partition. -Stores a row in an InnoDB database, to the table specified in this -handle. -@param[in] part_id Partition to write to. -@param[in] record A row in MySQL format. -@return 0 or error code. */ -int -ha_innopart::write_row_in_part( - uint part_id, - uchar* record) -{ - int error; - Field* saved_next_number_field = table->next_number_field; - DBUG_ENTER("ha_innopart::write_row_in_part"); - set_partition(part_id); - - /* Prevent update_auto_increment to be called - again in ha_innobase::write_row(). */ - - table->next_number_field = NULL; - - /* TODO: try to avoid creating a new dtuple - (in row_get_prebuilt_insert_row()) for each partition). - Might be needed due to ins_node implementation. */ - - error = ha_innobase::write_row(record); - update_partition(part_id); - table->next_number_field = saved_next_number_field; - DBUG_RETURN(error); -} - -/** Update a row in partition. -Updates a row given as a parameter to a new value. -@param[in] part_id Partition to update row in. -@param[in] old_row Old row in MySQL format. -@param[in] new_row New row in MySQL format. -@return 0 or error number. */ -int -ha_innopart::update_row_in_part( - uint part_id, - const uchar* old_row, - uchar* new_row) -{ - int error; - DBUG_ENTER("ha_innopart::update_row_in_part"); - - set_partition(part_id); - error = ha_innobase::update_row(old_row, new_row); - update_partition(part_id); - DBUG_RETURN(error); -} - -/** Deletes a row in partition. -@param[in] part_id Partition to delete from. -@param[in] record Row to delete in MySQL format. -@return 0 or error number. */ -int -ha_innopart::delete_row_in_part( - uint part_id, - const uchar* record) -{ - int error; - DBUG_ENTER("ha_innopart::delete_row_in_part"); - m_err_rec = NULL; - - m_last_part = part_id; - set_partition(part_id); - error = ha_innobase::delete_row(record); - update_partition(part_id); - DBUG_RETURN(error); -} - -/** Initializes a handle to use an index. -@param[in] keynr Key (index) number. -@param[in] sorted True if result MUST be sorted according to index. -@return 0 or error number. */ -int -ha_innopart::index_init( - uint keynr, - bool sorted) -{ - int error; - uint part_id = m_part_info->get_first_used_partition(); - DBUG_ENTER("ha_innopart::index_init"); - - active_index = keynr; - if (part_id == MY_BIT_NONE) { - DBUG_RETURN(0); - } - - error = ph_index_init_setup(keynr, sorted); - if (error != 0) { - DBUG_RETURN(error); - } - - if (sorted) { - error = init_record_priority_queue(); - if (error != 0) { - /* Needs cleanup in case it returns error. */ - destroy_record_priority_queue(); - DBUG_RETURN(error); - } - /* Disable prefetch. - The prefetch buffer is not partitioning aware, so it may return - rows from a different partition if either the prefetch buffer is - full, or it is non-empty and the partition is exhausted. */ - m_prebuilt->m_no_prefetch = true; - } - - /* For scan across partitions, the keys needs to be materialized */ - m_prebuilt->m_read_virtual_key = true; - - error = change_active_index(part_id, keynr); - if (error != 0) { - destroy_record_priority_queue(); - DBUG_RETURN(error); - } - - DBUG_EXECUTE_IF("partition_fail_index_init", { - destroy_record_priority_queue(); - DBUG_RETURN(HA_ERR_NO_PARTITION_FOUND); - }); - - DBUG_RETURN(0); -} - -/** End index cursor. -@return 0 or error code. */ -int -ha_innopart::index_end() -{ - uint part_id = m_part_info->get_first_used_partition(); - DBUG_ENTER("ha_innopart::index_end"); - - if (part_id == MY_BIT_NONE) { - /* Never initialized any index. */ - active_index = MAX_KEY; - DBUG_RETURN(0); - } - if (m_ordered) { - destroy_record_priority_queue(); - m_prebuilt->m_no_prefetch = false; - } - m_prebuilt->m_read_virtual_key = false; - - DBUG_RETURN(ha_innobase::index_end()); -} - -/* Partitioning support functions. */ - -/** Setup the ordered record buffer and the priority queue. -@param[in] used_parts Number of used partitions in query. -@return false for success else true. */ -int -ha_innopart::init_record_priority_queue_for_parts( - uint used_parts) -{ - size_t alloc_size; - void* buf; - - DBUG_ENTER("ha_innopart::init_record_priority_queue_for_parts"); - ut_ad(used_parts >= 1); - /* TODO: Don't use this if only one partition is used! */ - //ut_ad(used_parts > 1); - - /* We could reuse current m_prebuilt->pcur/clust_pcur for the first - used partition, but it would complicate and affect performance, - so we trade some extra memory instead. */ - - m_pcur = m_prebuilt->pcur; - m_clust_pcur = m_prebuilt->clust_pcur; - - /* If we searching for secondary key or doing a write/update - we will need two pcur, one for the active (secondary) index and - one for the clustered index. */ - - bool need_clust_index = - m_curr_key_info[1] != NULL - || get_lock_type() != F_RDLCK; - - /* pcur and clust_pcur per partition. - By using zalloc, we do not need to initialize the pcur's! */ - - alloc_size = used_parts * sizeof(btr_pcur_t); - if (need_clust_index) { - alloc_size *= 2; - } - buf = ut_zalloc(alloc_size, mem_key_partitioning); - if (buf == NULL) { - DBUG_RETURN(true); - } - m_pcur_parts = static_cast<btr_pcur_t*>(buf); - if (need_clust_index) { - m_clust_pcur_parts = &m_pcur_parts[used_parts]; - } - /* mapping from part_id to pcur. */ - alloc_size = m_tot_parts * sizeof(*m_pcur_map); - buf = ut_zalloc(alloc_size, mem_key_partitioning); - if (buf == NULL) { - DBUG_RETURN(true); - } - m_pcur_map = static_cast<uint16_t*>(buf); - { - uint16_t pcur_count = 0; - for (uint i = m_part_info->get_first_used_partition(); - i < m_tot_parts; - i = m_part_info->get_next_used_partition(i)) { - m_pcur_map[i] = pcur_count++; - } - } - - DBUG_RETURN(false); -} - -/** Destroy the ordered record buffer and the priority queue. */ -inline -void -ha_innopart::destroy_record_priority_queue_for_parts() -{ - DBUG_ENTER("ha_innopart::destroy_record_priority_queue"); - if (m_pcur_parts != NULL) { - uint used_parts; - used_parts = bitmap_bits_set(&m_part_info->read_partitions); - for (uint i = 0; i < used_parts; i++) { - btr_pcur_free(&m_pcur_parts[i]); - if (m_clust_pcur_parts != NULL) { - btr_pcur_free(&m_clust_pcur_parts[i]); - } - } - ut_free(m_pcur_parts); - m_clust_pcur_parts = NULL; - m_pcur_parts = NULL; - /* Reset the original m_prebuilt->pcur. */ - m_prebuilt->pcur = m_pcur; - m_prebuilt->clust_pcur = m_clust_pcur; - } - if (m_pcur_map != NULL) { - ut_free(m_pcur_map); - m_pcur_map = NULL; - } - DBUG_VOID_RETURN; -} - -/** Print error information. -@param[in] error Error code (MySQL). -@param[in] errflag Flags. */ -void -ha_innopart::print_error( - int error, - myf errflag) -{ - DBUG_ENTER("ha_innopart::print_error"); - if (print_partition_error(error, errflag)) { - ha_innobase::print_error(error, errflag); - } - - DBUG_VOID_RETURN; -} - -/** Can error be ignored. -@param[in] error Error code to check. -@return true if ignorable else false. */ -bool -ha_innopart::is_ignorable_error( - int error) -{ - if (ha_innobase::is_ignorable_error(error) - || error == HA_ERR_NO_PARTITION_FOUND - || error == HA_ERR_NOT_IN_LOCK_PARTITIONS) { - - return(true); - } - return(false); -} - -/** Get the index for the current partition -@param[in] keynr MySQL index number. -@return InnoDB index or NULL. */ -inline -dict_index_t* -ha_innopart::innobase_get_index( - uint keynr) -{ - uint part_id = m_last_part; - if (part_id >= m_tot_parts) { - ut_ad(0); - part_id = 0; - } - return(innopart_get_index(part_id, keynr)); -} - -/** Get the index for a handle. -Does not change active index. -@param[in] keynr Use this index; MAX_KEY means always clustered index, -even if it was internally generated by InnoDB. -@param[in] part_id From this partition. -@return NULL or index instance. */ -inline -dict_index_t* -ha_innopart::innopart_get_index( - uint part_id, - uint keynr) -{ - KEY* key = NULL; - dict_index_t* index = NULL; - - DBUG_ENTER("innopart_get_index"); - - if (keynr != MAX_KEY && table->s->keys > 0) { - key = table->key_info + keynr; - - index = m_part_share->get_index(part_id, keynr); - - if (index != NULL) { - ut_a(ut_strcmp(index->name, key->name) == 0); - } else { - /* Can't find index with keynr in the translation - table. Only print message if the index translation - table exists. */ - - ib::warn() << "InnoDB could not find index " - << (key ? key->name : "NULL") - << " key no " << keynr << " for table " - << m_prebuilt->table->name - << " through its index translation table"; - - index = dict_table_get_index_on_name(m_prebuilt->table, - key->name); - } - } else { - /* Get the generated index. */ - ut_ad(keynr == MAX_KEY); - index = dict_table_get_first_index( - m_part_share->get_table_part(part_id)); - } - - if (index == NULL) { - ib::error() << "InnoDB could not find key n:o " - << keynr << " with name " << (key ? key->name : "NULL") - << " from dict cache for table " - << m_prebuilt->table->name << " partition n:o " - << part_id; - } - - DBUG_RETURN(index); -} - -/** Changes the active index of a handle. -@param[in] part_id Use this partition. -@param[in] keynr Use this index; MAX_KEY means always clustered index, -even if it was internally generated by InnoDB. -@return 0 or error number. */ -int -ha_innopart::change_active_index( - uint part_id, - uint keynr) -{ - DBUG_ENTER("ha_innopart::change_active_index"); - - ut_ad(m_user_thd == ha_thd()); - ut_a(m_prebuilt->trx == thd_to_trx(m_user_thd)); - - active_index = keynr; - set_partition(part_id); - - if (UNIV_UNLIKELY(m_prebuilt->index == NULL)) { - ib::warn() << "change_active_index(" << part_id - << "," << keynr << ") failed"; - m_prebuilt->index_usable = FALSE; - DBUG_RETURN(1); - } - - m_prebuilt->index_usable = row_merge_is_index_usable(m_prebuilt->trx, - m_prebuilt->index); - - if (UNIV_UNLIKELY(!m_prebuilt->index_usable)) { - if (dict_index_is_corrupted(m_prebuilt->index)) { - char table_name[MAX_FULL_NAME_LEN + 1]; - - innobase_format_name( - table_name, sizeof table_name, - m_prebuilt->index->table->name.m_name); - - push_warning_printf( - m_user_thd, Sql_condition::SL_WARNING, - HA_ERR_INDEX_CORRUPT, - "InnoDB: Index %s for table %s is" - " marked as corrupted" - " (partition %u)", - m_prebuilt->index->name(), table_name, part_id); - DBUG_RETURN(HA_ERR_INDEX_CORRUPT); - } else { - push_warning_printf( - m_user_thd, Sql_condition::SL_WARNING, - HA_ERR_TABLE_DEF_CHANGED, - "InnoDB: insufficient history for index %u", - keynr); - } - - /* The caller seems to ignore this. Thus, we must check - this again in row_search_for_mysql(). */ - - DBUG_RETURN(HA_ERR_TABLE_DEF_CHANGED); - } - - ut_a(m_prebuilt->search_tuple != NULL); - - /* If too expensive, cache the keynr and only update search_tuple when - keynr changes. Remember that the clustered index is also used for - MAX_KEY. */ - dtuple_set_n_fields(m_prebuilt->search_tuple, - m_prebuilt->index->n_fields); - - dict_index_copy_types(m_prebuilt->search_tuple, m_prebuilt->index, - m_prebuilt->index->n_fields); - - /* MySQL changes the active index for a handle also during some - queries, for example SELECT MAX(a), SUM(a) first retrieves the - MAX() and then calculates the sum. Previously we played safe - and used the flag ROW_MYSQL_WHOLE_ROW below, but that caused - unnecessary copying. Starting from MySQL-4.1 we use a more - efficient flag here. */ - - /* TODO: Is this really needed? - Will it not be built in index_read? */ - - build_template(false); - - DBUG_RETURN(0); -} - -/** Return first record in index from a partition. -@param[in] part Partition to read from. -@param[out] record First record in index in the partition. -@return error number or 0. */ -int -ha_innopart::index_first_in_part( - uint part, - uchar* record) -{ - int error; - DBUG_ENTER("ha_innopart::index_first_in_part"); - - set_partition(part); - error = ha_innobase::index_first(record); - update_partition(part); - - DBUG_RETURN(error); -} - -/** Return next record in index from a partition. -@param[in] part Partition to read from. -@param[out] record Last record in index in the partition. -@return error number or 0. */ -int -ha_innopart::index_next_in_part( - uint part, - uchar* record) -{ - DBUG_ENTER("ha_innopart::index_next_in_part"); - - int error; - - set_partition(part); - error = ha_innobase::index_next(record); - update_partition(part); - - ut_ad(m_ordered_scan_ongoing - || m_ordered_rec_buffer == NULL - || m_prebuilt->used_in_HANDLER - || m_part_spec.start_part >= m_part_spec.end_part); - - DBUG_RETURN(error); -} - -/** Return next same record in index from a partition. -This routine is used to read the next record, but only if the key is -the same as supplied in the call. -@param[in] part Partition to read from. -@param[out] record Last record in index in the partition. -@param[in] key Key to match. -@param[in] length Length of key. -@return error number or 0. */ -int -ha_innopart::index_next_same_in_part( - uint part, - uchar* record, - const uchar* key, - uint length) -{ - int error; - - set_partition(part); - error = ha_innobase::index_next_same(record, key, length); - update_partition(part); - return(error); -} - -/** Return last record in index from a partition. -@param[in] part Partition to read from. -@param[out] record Last record in index in the partition. -@return error number or 0. */ -int -ha_innopart::index_last_in_part( - uint part, - uchar* record) -{ - int error; - - set_partition(part); - error = ha_innobase::index_last(record); - update_partition(part); - return(error); -} - -/** Return previous record in index from a partition. -@param[in] part Partition to read from. -@param[out] record Last record in index in the partition. -@return error number or 0. */ -int -ha_innopart::index_prev_in_part( - uint part, - uchar* record) -{ - int error; - - set_partition(part); - error = ha_innobase::index_prev(record); - update_partition(part); - - ut_ad(m_ordered_scan_ongoing - || m_ordered_rec_buffer == NULL - || m_prebuilt->used_in_HANDLER - || m_part_spec.start_part >= m_part_spec.end_part); - - return(error); -} - -/** Start index scan and return first record from a partition. -This routine starts an index scan using a start key. The calling -function will check the end key on its own. -@param[in] part Partition to read from. -@param[out] record First matching record in index in the partition. -@param[in] key Key to match. -@param[in] keypart_map Which part of the key to use. -@param[in] find_flag Key condition/direction to use. -@return error number or 0. */ -int -ha_innopart::index_read_map_in_part( - uint part, - uchar* record, - const uchar* key, - key_part_map keypart_map, - enum ha_rkey_function find_flag) -{ - int error; - - set_partition(part); - error = ha_innobase::index_read_map( - record, - key, - keypart_map, - find_flag); - update_partition(part); - return(error); -} - -/** Start index scan and return first record from a partition. -This routine starts an index scan using a start key. The calling -function will check the end key on its own. -@param[in] part Partition to read from. -@param[out] record First matching record in index in the partition. -@param[in] index Index to read from. -@param[in] key Key to match. -@param[in] keypart_map Which part of the key to use. -@param[in] find_flag Key condition/direction to use. -@return error number or 0. */ -int -ha_innopart::index_read_idx_map_in_part( - uint part, - uchar* record, - uint index, - const uchar* key, - key_part_map keypart_map, - enum ha_rkey_function find_flag) -{ - int error; - - set_partition(part); - error = ha_innobase::index_read_idx_map( - record, - index, - key, - keypart_map, - find_flag); - update_partition(part); - return(error); -} - -/** Return last matching record in index from a partition. -@param[in] part Partition to read from. -@param[out] record Last matching record in index in the partition. -@param[in] key Key to match. -@param[in] keypart_map Which part of the key to use. -@return error number or 0. */ -int -ha_innopart::index_read_last_map_in_part( - uint part, - uchar* record, - const uchar* key, - key_part_map keypart_map) -{ - int error; - set_partition(part); - error = ha_innobase::index_read_last_map(record, key, keypart_map); - update_partition(part); - return(error); -} - -/** Start index scan and return first record from a partition. -This routine starts an index scan using a start and end key. -@param[in] part Partition to read from. -@param[in,out] record First matching record in index in the partition, -if NULL use table->record[0] as return buffer. -@param[in] start_key Start key to match. -@param[in] end_key End key to match. -@param[in] eq_range Is equal range, start_key == end_key. -@param[in] sorted Return rows in sorted order. -@return error number or 0. */ -int -ha_innopart::read_range_first_in_part( - uint part, - uchar* record, - const key_range* start_key, - const key_range* end_key, - bool eq_range, - bool sorted) -{ - int error; - uchar* read_record = record; - set_partition(part); - if (read_record == NULL) { - read_record = table->record[0]; - } - if (m_start_key.key != NULL) { - error = ha_innobase::index_read( - read_record, - m_start_key.key, - m_start_key.length, - m_start_key.flag); - } else { - error = ha_innobase::index_first(read_record); - } - if (error == HA_ERR_KEY_NOT_FOUND) { - error = HA_ERR_END_OF_FILE; - } else if (error == 0 && !in_range_check_pushed_down) { - /* compare_key uses table->record[0], so we - need to copy the data if not already there. */ - - if (record != NULL) { - copy_cached_row(table->record[0], read_record); - } - if (compare_key(end_range) > 0) { - /* must use ha_innobase:: due to set/update_partition - could overwrite states if ha_innopart::unlock_row() - was used. */ - ha_innobase::unlock_row(); - error = HA_ERR_END_OF_FILE; - } - } - update_partition(part); - return(error); -} - -/** Return next record in index range scan from a partition. -@param[in] part Partition to read from. -@param[in,out] record First matching record in index in the partition, -if NULL use table->record[0] as return buffer. -@return error number or 0. */ -int -ha_innopart::read_range_next_in_part( - uint part, - uchar* record) -{ - int error; - uchar* read_record = record; - - set_partition(part); - if (read_record == NULL) { - read_record = table->record[0]; - } - - /* TODO: Implement ha_innobase::read_range*? - So it will return HA_ERR_END_OF_FILE or - HA_ERR_KEY_NOT_FOUND when passing end_range. */ - - error = ha_innobase::index_next(read_record); - if (error == 0 && !in_range_check_pushed_down) { - /* compare_key uses table->record[0], so we - need to copy the data if not already there. */ - - if (record != NULL) { - copy_cached_row(table->record[0], read_record); - } - if (compare_key(end_range) > 0) { - /* must use ha_innobase:: due to set/update_partition - could overwrite states if ha_innopart::unlock_row() - was used. */ - ha_innobase::unlock_row(); - error = HA_ERR_END_OF_FILE; - } - } - update_partition(part); - - return(error); -} - -/** Initialize a table scan in a specific partition. -@param[in] part_id Partition to initialize. -@param[in] scan True if table/index scan false otherwise (for rnd_pos) -@return 0 or error number. */ -int -ha_innopart::rnd_init_in_part( - uint part_id, - bool scan) -{ - int err; - - if (m_prebuilt->clust_index_was_generated) { - err = change_active_index(part_id, MAX_KEY); - } else { - err = change_active_index(part_id, m_primary_key); - } - - m_start_of_scan = 1; - - /* Don't use semi-consistent read in random row reads (by position). - This means we must disable semi_consistent_read if scan is false. */ - - if (!scan) { - try_semi_consistent_read(false); - } - - return(err); -} - -/** Ends a table scan. -@param[in] part_id Partition to end table scan in. -@param[in] scan True for scan else random access. -@return 0 or error number. */ -int -ha_innopart::rnd_end_in_part( - uint part_id, - bool scan) -{ - return(index_end()); -} - -/** Read next row in partition. -Reads the next row in a table scan (also used to read the FIRST row -in a table scan). -@param[in] part_id Partition to end table scan in. -@param[out] buf Returns the row in this buffer, in MySQL format. -@return 0, HA_ERR_END_OF_FILE or error number. */ -int -ha_innopart::rnd_next_in_part( - uint part_id, - uchar* buf) -{ - int error; - - DBUG_ENTER("ha_innopart::rnd_next_in_part"); - - set_partition(part_id); - if (m_start_of_scan) { - error = ha_innobase::index_first(buf); - - if (error == HA_ERR_KEY_NOT_FOUND) { - error = HA_ERR_END_OF_FILE; - } - m_start_of_scan = 0; - } else { - ha_statistic_increment(&SSV::ha_read_rnd_next_count); - error = ha_innobase::general_fetch(buf, ROW_SEL_NEXT, 0); - } - - update_partition(part_id); - DBUG_RETURN(error); -} - -/** Get a row from a position. -Fetches a row from the table based on a row reference. -@param[out] buf Returns the row in this buffer, in MySQL format. -@param[in] pos Position, given as primary key value or DB_ROW_ID -(if no primary key) of the row in MySQL format. The length of data in pos has -to be ref_length. -@return 0, HA_ERR_KEY_NOT_FOUND or error code. */ -int -ha_innopart::rnd_pos( - uchar* buf, - uchar* pos) -{ - int error; - uint part_id; - DBUG_ENTER("ha_innopart::rnd_pos"); - ut_ad(PARTITION_BYTES_IN_POS == 2); - DBUG_DUMP("pos", pos, ref_length); - - ha_statistic_increment(&SSV::ha_read_rnd_count); - - ut_a(m_prebuilt->trx == thd_to_trx(ha_thd())); - - /* Restore used partition. */ - part_id = uint2korr(pos); - - set_partition(part_id); - - /* Note that we assume the length of the row reference is fixed - for the table, and it is == ref_length. */ - - error = ha_innobase::index_read(buf, pos + PARTITION_BYTES_IN_POS, - ref_length - PARTITION_BYTES_IN_POS, - HA_READ_KEY_EXACT); - DBUG_PRINT("info", ("part %u index_read returned %d", part_id, error)); - DBUG_DUMP("buf", buf, table_share->reclength); - - update_partition(part_id); - - DBUG_RETURN(error); -} - -/** Return position for cursor in last used partition. -Stores a reference to the current row to 'ref' field of the handle. Note -that in the case where we have generated the clustered index for the -table, the function parameter is illogical: we MUST ASSUME that 'record' -is the current 'position' of the handle, because if row ref is actually -the row id internally generated in InnoDB, then 'record' does not contain -it. We just guess that the row id must be for the record where the handle -was positioned the last time. -@param[out] ref_arg Pointer to buffer where to write the position. -@param[in] record Record to position for. */ -void -ha_innopart::position_in_last_part( - uchar* ref_arg, - const uchar* record) -{ - if (m_prebuilt->clust_index_was_generated) { - /* No primary key was defined for the table and we - generated the clustered index from row id: the - row reference will be the row id, not any key value - that MySQL knows of. */ - - memcpy(ref_arg, m_prebuilt->row_id, DATA_ROW_ID_LEN); - } else { - - /* Copy primary key as the row reference */ - KEY* key_info = table->key_info + m_primary_key; - key_copy(ref_arg, (uchar*)record, key_info, - key_info->key_length); - } -} - -/** Fill in data_dir_path and tablespace name from internal data -dictionary. -@param part_elem Partition element to fill. -@param ib_table InnoDB table to copy from. */ -void -ha_innopart::update_part_elem( - partition_element* part_elem, - dict_table_t* ib_table) -{ - dict_get_and_save_data_dir_path(ib_table, false); - if (ib_table->data_dir_path != NULL) { - if (part_elem->data_file_name == NULL - || strcmp(ib_table->data_dir_path, - part_elem->data_file_name) != 0) { - - /* Play safe and allocate memory from TABLE and copy - instead of expose the internal data dictionary. */ - part_elem->data_file_name = - strdup_root(&table->mem_root, - ib_table->data_dir_path); - } - } else { - part_elem->data_file_name = NULL; - } - - part_elem->index_file_name = NULL; -} - -/** Update create_info. -Used in SHOW CREATE TABLE et al. -@param[in,out] create_info Create info to update. */ -void -ha_innopart::update_create_info( - HA_CREATE_INFO* create_info) -{ - uint num_subparts = m_part_info->num_subparts; - uint num_parts; - uint part; - dict_table_t* table; - List_iterator<partition_element> - part_it(m_part_info->partitions); - partition_element* part_elem; - partition_element* sub_elem; - DBUG_ENTER("ha_innopart::update_create_info"); - if ((create_info->used_fields & HA_CREATE_USED_AUTO) == 0) { - info(HA_STATUS_AUTO); - create_info->auto_increment_value = stats.auto_increment_value; - } - - num_parts = (num_subparts != 0) ? m_tot_parts / num_subparts : m_tot_parts; - - /* DATA/INDEX DIRECTORY are never applied to the whole partitioned - table, only to its parts. */ - - create_info->data_file_name = NULL; - create_info->index_file_name = NULL; - - /* Since update_create_info() can be called from - mysql_prepare_alter_table() when not all partitions are set up, - we look for that condition first. - If all partitions are not available then simply return, - since it does not need any updated partitioning info. */ - - if (!m_part_info->temp_partitions.is_empty()) { - DBUG_VOID_RETURN; - } - part = 0; - while ((part_elem = part_it++)) { - if (part >= num_parts) { - DBUG_VOID_RETURN; - } - if (m_part_info->is_sub_partitioned()) { - List_iterator<partition_element> - subpart_it(part_elem->subpartitions); - uint subpart = 0; - while ((sub_elem = subpart_it++)) { - if (subpart >= num_subparts) { - DBUG_VOID_RETURN; - } - subpart++; - } - if (subpart != num_subparts) { - DBUG_VOID_RETURN; - } - } - part++; - } - if (part != num_parts) { - DBUG_VOID_RETURN; - } - - /* part_elem->data_file_name should be correct from - the .frm, but may have been changed, so update from SYS_DATAFILES. - index_file_name is ignored, so remove it. */ - - part = 0; - part_it.rewind(); - while ((part_elem = part_it++)) { - if (m_part_info->is_sub_partitioned()) { - List_iterator<partition_element> - subpart_it(part_elem->subpartitions); - while ((sub_elem = subpart_it++)) { - table = m_part_share->get_table_part(part++); - update_part_elem(sub_elem, table); - } - } else { - table = m_part_share->get_table_part(part++); - update_part_elem(part_elem, table); - } - } - DBUG_VOID_RETURN; -} - -/** Set create_info->data_file_name. -@param[in] part_elem Partition to copy from. -@param[in,out] info Create info to set. */ -static -void -set_create_info_dir( - partition_element* part_elem, - HA_CREATE_INFO* info) -{ - if (part_elem->data_file_name != NULL - && part_elem->data_file_name[0] != '\0') { - info->data_file_name = part_elem->data_file_name; - } - if (part_elem->index_file_name != NULL - && part_elem->index_file_name[0] != '\0') { - info->index_file_name = part_elem->index_file_name; - } -} - -/** Set flags and append '/' to remote path if necessary. */ -void -create_table_info_t::set_remote_path_flags() -{ - if (m_remote_path[0] != '\0') { - ut_ad(DICT_TF_HAS_DATA_DIR(m_flags) != 0); - - /* os_file_make_remote_pathname will truncate - everything after the last '/', so append '/' - if it is not the last character. */ - - size_t len = strlen(m_remote_path); - if (m_remote_path[len - 1] != OS_PATH_SEPARATOR) { - m_remote_path[len] = OS_PATH_SEPARATOR; - m_remote_path[len + 1] = '\0'; - } - } else { - ut_ad(DICT_TF_HAS_DATA_DIR(m_flags) == 0); - } -} - -/** Creates a new table to an InnoDB database. -@param[in] name Table name (in filesystem charset). -@param[in] form MySQL Table containing information of -partitions, columns and indexes etc. -@param[in] create_info Additional create information, like -create statement string. -@return 0 or error number. */ -int -ha_innopart::create( - const char* name, - TABLE* form, - HA_CREATE_INFO* create_info) -{ - int error; - /** {database}/{tablename} */ - char table_name[FN_REFLEN]; - /** absolute path of table */ - char remote_path[FN_REFLEN]; - char partition_name[FN_REFLEN]; - char* table_name_end; - size_t table_name_len; - char* partition_name_start; - char table_data_file_name[FN_REFLEN]; - const char* index_file_name; - size_t len; - - create_table_info_t info(ha_thd(), - form, - create_info, - table_name, - remote_path); - - DBUG_ENTER("ha_innopart::create"); - ut_ad(create_info != NULL); - ut_ad(m_part_info == form->part_info); - ut_ad(table_share != NULL); - - /* Not allowed to create temporary partitioned tables. */ - if (create_info != NULL - && (create_info->options & HA_LEX_CREATE_TMP_TABLE) != 0) { - my_error(ER_PARTITION_NO_TEMPORARY, MYF(0)); - ut_ad(0); // Can we support partitioned temporary tables? - DBUG_RETURN(HA_ERR_INTERNAL_ERROR); - } - - error = info.initialize(); - if (error != 0) { - DBUG_RETURN(error); - } - - /* Setup and check table level options. */ - error = info.prepare_create_table(name); - if (error != 0) { - DBUG_RETURN(error); - } - strcpy(partition_name, table_name); - partition_name_start = partition_name + strlen(partition_name); - table_name_len = strlen(table_name); - table_name_end = table_name + table_name_len; - if (create_info->data_file_name != NULL) { - /* Strip the tablename from the path. */ - strncpy(table_data_file_name, create_info->data_file_name, - FN_REFLEN-1); - table_data_file_name[FN_REFLEN - 1] = '\0'; - char* ptr = strrchr(table_data_file_name, OS_PATH_SEPARATOR); - ut_ad(ptr != NULL); - if (ptr != NULL) { - ptr++; - *ptr = '\0'; - create_info->data_file_name = table_data_file_name; - } - } else { - table_data_file_name[0] = '\0'; - } - index_file_name = create_info->index_file_name; - - info.allocate_trx(); - - /* Latch the InnoDB data dictionary exclusively so that no deadlocks - or lock waits can happen in it during a table create operation. - Drop table etc. do this latching in row0mysql.cc. */ - - row_mysql_lock_data_dictionary(info.trx()); - - /* TODO: use the new DD tables instead to decrease duplicate info. */ - List_iterator_fast <partition_element> - part_it(form->part_info->partitions); - partition_element* part_elem; - while ((part_elem = part_it++)) { - /* Append the partition name to the table name. */ - len = Ha_innopart_share::append_sep_and_name( - partition_name_start, - part_elem->partition_name, - part_sep, - FN_REFLEN - table_name_len); - if ((table_name_len + len) >= FN_REFLEN) { - ut_ad(0); - goto cleanup; - } - - /* Override table level DATA/INDEX DIRECTORY. */ - set_create_info_dir(part_elem, create_info); - - if (!form->part_info->is_sub_partitioned()) { - error = info.prepare_create_table(partition_name); - if (error != 0) { - goto cleanup; - } - info.set_remote_path_flags(); - error = info.create_table(); - if (error != 0) { - goto cleanup; - } - } else { - size_t part_name_len = strlen(partition_name_start) - + table_name_len; - char* part_name_end = partition_name + part_name_len; - List_iterator_fast <partition_element> - sub_it(part_elem->subpartitions); - partition_element* sub_elem; - - while ((sub_elem = sub_it++)) { - ut_ad(sub_elem->partition_name != NULL); - - /* 'table' will be - <name>#P#<part_name>#SP#<subpart_name>. - Append the sub-partition name to - the partition name. */ - - len = Ha_innopart_share::append_sep_and_name( - part_name_end, - sub_elem->partition_name, - sub_sep, - FN_REFLEN - part_name_len); - if ((len + part_name_len) >= FN_REFLEN) { - ut_ad(0); - goto cleanup; - } - /* Override part level DATA/INDEX DIRECTORY. */ - set_create_info_dir(sub_elem, create_info); - - Ha_innopart_share::partition_name_casedn_str( - part_name_end + 4); - error = info.prepare_create_table(partition_name); - if (error != 0) { - goto cleanup; - } - info.set_remote_path_flags(); - error = info.create_table(); - if (error != 0) { - goto cleanup; - } - - /* Reset partition level - DATA/INDEX DIRECTORY. */ - - create_info->data_file_name = - table_data_file_name; - create_info->index_file_name = - index_file_name; - set_create_info_dir(part_elem, create_info); - } - } - /* Reset table level DATA/INDEX DIRECTORY. */ - create_info->data_file_name = table_data_file_name; - create_info->index_file_name = index_file_name; - } - - innobase_commit_low(info.trx()); - - row_mysql_unlock_data_dictionary(info.trx()); - - /* Flush the log to reduce probability that the .frm files and - the InnoDB data dictionary get out-of-sync if the user runs - with innodb_flush_log_at_trx_commit = 0. */ - - log_buffer_flush_to_disk(); - - part_it.rewind(); - /* No need to use these now, only table_name will be used. */ - create_info->data_file_name = NULL; - create_info->index_file_name = NULL; - while ((part_elem = part_it++)) { - Ha_innopart_share::append_sep_and_name( - table_name_end, - part_elem->partition_name, - part_sep, - FN_REFLEN - table_name_len); - if (!form->part_info->is_sub_partitioned()) { - error = info.create_table_update_dict(); - if (error != 0) { - ut_ad(0); - goto end; - } - } else { - size_t part_name_len = strlen(table_name_end); - char* part_name_end = table_name_end + part_name_len; - List_iterator_fast <partition_element> - sub_it(part_elem->subpartitions); - partition_element* sub_elem; - while ((sub_elem = sub_it++)) { - Ha_innopart_share::append_sep_and_name( - part_name_end, - sub_elem->partition_name, - sub_sep, - FN_REFLEN - table_name_len - - part_name_len); - error = info.create_table_update_dict(); - if (error != 0) { - ut_ad(0); - goto end; - } - } - } - } - -end: - /* Tell the InnoDB server that there might be work for - utility threads: */ - - srv_active_wake_master_thread(); - - trx_free_for_mysql(info.trx()); - - DBUG_RETURN(error); - -cleanup: - trx_rollback_for_mysql(info.trx()); - - row_mysql_unlock_data_dictionary(info.trx()); - - trx_free_for_mysql(info.trx()); - - DBUG_RETURN(error); -} - -/** Discards or imports an InnoDB tablespace. -@param[in] discard True if discard, else import. -@return 0 or error number. */ -int -ha_innopart::discard_or_import_tablespace( - my_bool discard) -{ - int error = 0; - uint i; - DBUG_ENTER("ha_innopart::discard_or_import_tablespace"); - - for (i= m_part_info->get_first_used_partition(); - i < m_tot_parts; - i= m_part_info->get_next_used_partition(i)) { - - m_prebuilt->table = m_part_share->get_table_part(i); - error= ha_innobase::discard_or_import_tablespace(discard); - if (error != 0) { - break; - } - } - m_prebuilt->table = m_part_share->get_table_part(0); - - /* IMPORT/DISCARD also means resetting auto_increment. Make sure - that auto_increment initialization is done after all partitions - are imported. */ - if (table->found_next_number_field != NULL) { - lock_auto_increment(); - m_part_share->next_auto_inc_val = 0; - m_part_share->auto_inc_initialized = false; - unlock_auto_increment(); - } - - DBUG_RETURN(error); -} - -/** Compare key and rowid. -Helper function for sorting records in the priority queue. -a/b points to table->record[0] rows which must have the -key fields set. The bytes before a and b store the rowid. -This is used for comparing/sorting rows first according to -KEY and if same KEY, by rowid (ref). -@param[in] key_info Null terminated array of index information. -@param[in] a Pointer to record+ref in first record. -@param[in] b Pointer to record+ref in second record. -@return Return value is SIGN(first_rec - second_rec) -@retval 0 Keys are equal. -@retval -1 second_rec is greater than first_rec. -@retval +1 first_rec is greater than second_rec. */ -int -ha_innopart::key_and_rowid_cmp( - KEY** key_info, - uchar *a, - uchar *b) -{ - int cmp = key_rec_cmp(key_info, a, b); - if (cmp != 0) { - return(cmp); - } - - /* We must compare by rowid, which is added before the record, - in the priority queue. */ - - return(memcmp(a - DATA_ROW_ID_LEN, b - DATA_ROW_ID_LEN, - DATA_ROW_ID_LEN)); -} - -/** Extra hints from MySQL. -@param[in] operation Operation hint. -@return 0 or error number. */ -int -ha_innopart::extra( - enum ha_extra_function operation) -{ - if (operation == HA_EXTRA_SECONDARY_SORT_ROWID) { - /* index_init(sorted=true) must have been called! */ - ut_ad(m_ordered); - ut_ad(m_ordered_rec_buffer != NULL); - /* No index_read call must have been done! */ - ut_ad(m_queue->empty()); - - /* If not PK is set as secondary sort, do secondary sort by - rowid/ref. */ - - ut_ad(m_curr_key_info[1] != NULL - || m_prebuilt->clust_index_was_generated != 0 - || m_curr_key_info[0] - == table->key_info + table->s->primary_key); - - if (m_curr_key_info[1] == NULL - && m_prebuilt->clust_index_was_generated) { - m_ref_usage = Partition_helper::REF_USED_FOR_SORT; - m_queue->m_fun = key_and_rowid_cmp; - } - return(0); - } - return(ha_innobase::extra(operation)); -} - -/** Delete all rows in a partition. -@return 0 or error number. */ -int -ha_innopart::truncate_partition_low() -{ - return(truncate()); -} - -/** Deletes all rows of a partitioned InnoDB table. -@return 0 or error number. */ -int -ha_innopart::truncate() -{ - dberr_t err = DB_SUCCESS; - int error; - - DBUG_ENTER("ha_innopart::truncate"); - - if (high_level_read_only) { - DBUG_RETURN(HA_ERR_TABLE_READONLY); - } - - /* TRUNCATE also means resetting auto_increment. Hence, reset - it so that it will be initialized again at the next use. */ - - if (table->found_next_number_field != NULL) { - lock_auto_increment(); - m_part_share->next_auto_inc_val= 0; - m_part_share->auto_inc_initialized= false; - unlock_auto_increment(); - } - - /* Get the transaction associated with the current thd, or create one - if not yet created, and update m_prebuilt->trx. */ - - update_thd(ha_thd()); - - if (!trx_is_started(m_prebuilt->trx)) { - ++m_prebuilt->trx->will_lock; - } - /* Truncate the table in InnoDB. */ - - for (uint i = m_part_info->get_first_used_partition(); - i < m_tot_parts; - i = m_part_info->get_next_used_partition(i)) { - - set_partition(i); - err = row_truncate_table_for_mysql(m_prebuilt->table, - m_prebuilt->trx); - update_partition(i); - if (err != DB_SUCCESS) { - break; - } - } - - switch (err) { - - case DB_TABLESPACE_DELETED: - case DB_TABLESPACE_NOT_FOUND: - ib_senderrf( - m_prebuilt->trx->mysql_thd, IB_LOG_LEVEL_ERROR, - (err == DB_TABLESPACE_DELETED ? - ER_TABLESPACE_DISCARDED : ER_TABLESPACE_MISSING), - table->s->table_name.str); - table->status = STATUS_NOT_FOUND; - error = HA_ERR_NO_SUCH_TABLE; - break; - - default: - error = convert_error_code_to_mysql( - err, m_prebuilt->table->flags, - m_prebuilt->trx->mysql_thd); - table->status = STATUS_NOT_FOUND; - break; - } - DBUG_RETURN(error); -} - -/** Estimates the number of index records in a range. -@param[in] keynr Index number. -@param[in] min_key Start key value (or NULL). -@param[in] max_key End key value (or NULL). -@return estimated number of rows. */ -ha_rows -ha_innopart::records_in_range( - uint keynr, - key_range* min_key, - key_range* max_key) -{ - KEY* key; - dict_index_t* index; - dtuple_t* range_start; - dtuple_t* range_end; - int64_t n_rows = 0; - page_cur_mode_t mode1; - page_cur_mode_t mode2; - mem_heap_t* heap; - uint part_id; - - DBUG_ENTER("ha_innopart::records_in_range"); - DBUG_PRINT("info", ("keynr %u min %p max %p", keynr, min_key, max_key)); - - ut_a(m_prebuilt->trx == thd_to_trx(ha_thd())); - - m_prebuilt->trx->op_info = (char*)"estimating records in index range"; - - active_index = keynr; - - key = table->key_info + active_index; - - part_id = m_part_info->get_first_used_partition(); - if (part_id == MY_BIT_NONE) { - DBUG_RETURN(0); - } - /* This also sets m_prebuilt->index! */ - set_partition(part_id); - index = m_prebuilt->index; - - /* Only validate the first partition, to avoid too much overhead. */ - - /* There exists possibility of not being able to find requested - index due to inconsistency between MySQL and InoDB dictionary info. - Necessary message should have been printed in innopart_get_index(). */ - if (index == NULL - || dict_table_is_discarded(m_prebuilt->table) - || dict_index_is_corrupted(index) - || !row_merge_is_index_usable(m_prebuilt->trx, index)) { - - n_rows = HA_POS_ERROR; - goto func_exit; - } - - heap = mem_heap_create(2 * (key->actual_key_parts * sizeof(dfield_t) - + sizeof(dtuple_t))); - - range_start = dtuple_create(heap, key->actual_key_parts); - dict_index_copy_types(range_start, index, key->actual_key_parts); - - range_end = dtuple_create(heap, key->actual_key_parts); - dict_index_copy_types(range_end, index, key->actual_key_parts); - - row_sel_convert_mysql_key_to_innobase( - range_start, - m_prebuilt->srch_key_val1, - m_prebuilt->srch_key_val_len, - index, - (byte*) (min_key ? min_key->key : (const uchar*) 0), - (ulint) (min_key ? min_key->length : 0), - m_prebuilt->trx); - - ut_ad(min_key != NULL - ? range_start->n_fields > 0 - : range_start->n_fields == 0); - - row_sel_convert_mysql_key_to_innobase( - range_end, - m_prebuilt->srch_key_val2, - m_prebuilt->srch_key_val_len, - index, - (byte*) (max_key != NULL ? max_key->key : (const uchar*) 0), - (ulint) (max_key != NULL ? max_key->length : 0), - m_prebuilt->trx); - - ut_ad(max_key != NULL - ? range_end->n_fields > 0 - : range_end->n_fields == 0); - - mode1 = convert_search_mode_to_innobase(min_key ? min_key->flag : - HA_READ_KEY_EXACT); - mode2 = convert_search_mode_to_innobase(max_key ? max_key->flag : - HA_READ_KEY_EXACT); - - if (mode1 != PAGE_CUR_UNSUPP && mode2 != PAGE_CUR_UNSUPP) { - - n_rows = btr_estimate_n_rows_in_range(index, range_start, - mode1, range_end, - mode2); - DBUG_PRINT("info", ("part_id %u rows %ld", part_id, - (long int) n_rows)); - for (part_id = m_part_info->get_next_used_partition(part_id); - part_id < m_tot_parts; - part_id = m_part_info->get_next_used_partition(part_id)) { - - index = m_part_share->get_index(part_id, keynr); - int64_t n = btr_estimate_n_rows_in_range(index, - range_start, - mode1, - range_end, - mode2); - n_rows += n; - DBUG_PRINT("info", ("part_id %u rows %ld (%ld)", - part_id, - (long int) n, - (long int) n_rows)); - } - } else { - - n_rows = HA_POS_ERROR; - } - - mem_heap_free(heap); - -func_exit: - - m_prebuilt->trx->op_info = (char*)""; - - /* The MySQL optimizer seems to believe an estimate of 0 rows is - always accurate and may return the result 'Empty set' based on that. - The accuracy is not guaranteed, and even if it were, for a locking - read we should anyway perform the search to set the next-key lock. - Add 1 to the value to make sure MySQL does not make the assumption! */ - - if (n_rows == 0) { - n_rows = 1; - } - - DBUG_RETURN((ha_rows) n_rows); -} - -/** Gives an UPPER BOUND to the number of rows in a table. -This is used in filesort.cc. -@return upper bound of rows. */ -ha_rows -ha_innopart::estimate_rows_upper_bound() -{ - const dict_index_t* index; - ulonglong estimate = 0; - ulonglong local_data_file_length; - ulint stat_n_leaf_pages; - - DBUG_ENTER("ha_innopart::estimate_rows_upper_bound"); - - /* We do not know if MySQL can call this function before calling - external_lock(). To be safe, update the thd of the current table - handle. */ - - update_thd(ha_thd()); - - m_prebuilt->trx->op_info = "calculating upper bound for table rows"; - - for (uint i = m_part_info->get_first_used_partition(); - i < m_tot_parts; - i = m_part_info->get_next_used_partition(i)) { - - m_prebuilt->table = m_part_share->get_table_part(i); - index = dict_table_get_first_index(m_prebuilt->table); - - stat_n_leaf_pages = index->stat_n_leaf_pages; - - ut_a(stat_n_leaf_pages > 0); - - local_data_file_length = - ((ulonglong) stat_n_leaf_pages) * UNIV_PAGE_SIZE; - - /* Calculate a minimum length for a clustered index record - and from that an upper bound for the number of rows. - Since we only calculate new statistics in row0mysql.cc when a - table has grown by a threshold factor, - we must add a safety factor 2 in front of the formula below. */ - - estimate += 2 * local_data_file_length - / dict_index_calc_min_rec_len(index); - } - - m_prebuilt->trx->op_info = ""; - - DBUG_RETURN((ha_rows) estimate); -} - -/** Time estimate for full table scan. -How many seeks it will take to read through the table. This is to be -comparable to the number returned by records_in_range so that we can -decide if we should scan the table or use keys. -@return estimated time measured in disk seeks. */ -double -ha_innopart::scan_time() -{ - double scan_time = 0.0; - DBUG_ENTER("ha_innopart::scan_time"); - - for (uint i = m_part_info->get_first_used_partition(); - i < m_tot_parts; - i = m_part_info->get_next_used_partition(i)) { - m_prebuilt->table = m_part_share->get_table_part(i); - scan_time += ha_innobase::scan_time(); - } - DBUG_RETURN(scan_time); -} - -/** Updates the statistics for one partition (table). -@param[in] table Table to update the statistics for. -@param[in] is_analyze True if called from ::analyze(). -@return error code. */ -static -int -update_table_stats( - dict_table_t* table, - bool is_analyze) -{ - dict_stats_upd_option_t opt; - dberr_t ret; - - if (dict_stats_is_persistent_enabled(table)) { - if (is_analyze) { - opt = DICT_STATS_RECALC_PERSISTENT; - } else { - /* This is e.g. 'SHOW INDEXES', - fetch the persistent stats from disk. */ - opt = DICT_STATS_FETCH_ONLY_IF_NOT_IN_MEMORY; - } - } else { - opt = DICT_STATS_RECALC_TRANSIENT; - } - - ut_ad(!mutex_own(&dict_sys->mutex)); - ret = dict_stats_update(table, opt); - - if (ret != DB_SUCCESS) { - return(HA_ERR_GENERIC); - } - return(0); -} - -/** Updates and return statistics. -Returns statistics information of the table to the MySQL interpreter, -in various fields of the handle object. -@param[in] flag Flags for what to update and return. -@param[in] is_analyze True if called from ::analyze(). -@return HA_ERR_* error code or 0. */ -int -ha_innopart::info_low( - uint flag, - bool is_analyze) -{ - dict_table_t* ib_table; - ib_uint64_t max_rows = 0; - uint biggest_partition = 0; - int error = 0; - - DBUG_ENTER("ha_innopart::info_low"); - - /* If we are forcing recovery at a high level, we will suppress - statistics calculation on tables, because that may crash the - server if an index is badly corrupted. */ - - /* We do not know if MySQL can call this function before calling - external_lock(). To be safe, update the thd of the current table - handle. */ - - update_thd(ha_thd()); - - m_prebuilt->trx->op_info = "returning various info to MySQL"; - - ut_ad(m_part_share->get_table_part(0)->n_ref_count > 0); - - if ((flag & HA_STATUS_TIME) != 0) { - stats.update_time = 0; - - if (is_analyze) { - /* Only analyze the given partitions. */ - int error = set_altered_partitions(); - if (error != 0) { - /* Already checked in mysql_admin_table! */ - ut_ad(0); - DBUG_RETURN(error); - } - } - if (is_analyze || innobase_stats_on_metadata) { - m_prebuilt->trx->op_info = "updating table statistics"; - } - - /* TODO: Only analyze the PK for all partitions, - then the secondary indexes only for the largest partition! */ - for (uint i = m_part_info->get_first_used_partition(); - i < m_tot_parts; - i = m_part_info->get_next_used_partition(i)) { - - ib_table = m_part_share->get_table_part(i); - if (is_analyze || innobase_stats_on_metadata) { - error = update_table_stats(ib_table, is_analyze); - if (error != 0) { - m_prebuilt->trx->op_info = ""; - DBUG_RETURN(error); - } - } - set_if_bigger(stats.update_time, - (ulong) ib_table->update_time); - } - - if (is_analyze || innobase_stats_on_metadata) { - m_prebuilt->trx->op_info = - "returning various info to MySQL"; - } - } - - if ((flag & HA_STATUS_VARIABLE) != 0) { - - /* TODO: If this is called after pruning, then we could - also update the statistics according to the non-pruned - partitions, by allocating new rec_per_key on the TABLE, - instead of using the info from the TABLE_SHARE. */ - ulint stat_clustered_index_size = 0; - ulint stat_sum_of_other_index_sizes = 0; - ib_uint64_t n_rows = 0; - ulint avail_space = 0; - bool checked_sys_tablespace = false; - - if ((flag & HA_STATUS_VARIABLE_EXTRA) != 0) { - stats.delete_length = 0; - } - - for (uint i = m_part_info->get_first_used_partition(); - i < m_tot_parts; - i = m_part_info->get_next_used_partition(i)) { - - ib_table = m_part_share->get_table_part(i); - if ((flag & HA_STATUS_NO_LOCK) == 0) { - dict_table_stats_lock(ib_table, RW_S_LATCH); - } - - ut_a(ib_table->stat_initialized); - - n_rows += ib_table->stat_n_rows; - if (ib_table->stat_n_rows > max_rows) { - max_rows = ib_table->stat_n_rows; - biggest_partition = i; - } - - stat_clustered_index_size += - ib_table->stat_clustered_index_size; - - stat_sum_of_other_index_sizes += - ib_table->stat_sum_of_other_index_sizes; - - if ((flag & HA_STATUS_NO_LOCK) == 0) { - dict_table_stats_unlock(ib_table, RW_S_LATCH); - } - - if ((flag & HA_STATUS_VARIABLE_EXTRA) != 0 - && (flag & HA_STATUS_NO_LOCK) == 0 - && srv_force_recovery < SRV_FORCE_NO_IBUF_MERGE - && avail_space != ULINT_UNDEFINED) { - - /* Only count system tablespace once! */ - if (is_system_tablespace(ib_table->space)) { - if (checked_sys_tablespace) { - continue; - } - checked_sys_tablespace = true; - } - - uintmax_t space = - fsp_get_available_space_in_free_extents( - ib_table->space); - if (space == UINTMAX_MAX) { - THD* thd = ha_thd(); - const char* table_name - = ib_table->name.m_name; - - push_warning_printf( - thd, - Sql_condition::SL_WARNING, - ER_CANT_GET_STAT, - "InnoDB: Trying to get the" - " free space for partition %s" - " but its tablespace has been" - " discarded or the .ibd file" - " is missing. Setting the free" - " space of the partition to" - " zero.", - ut_get_name( - m_prebuilt->trx, - table_name).c_str()); - } else { - avail_space += - static_cast<ulint>(space); - } - } - } - - /* - The MySQL optimizer seems to assume in a left join that n_rows - is an accurate estimate if it is zero. Of course, it is not, - since we do not have any locks on the rows yet at this phase. - Since SHOW TABLE STATUS seems to call this function with the - HA_STATUS_TIME flag set, while the left join optimizer does not - set that flag, we add one to a zero value if the flag is not - set. That way SHOW TABLE STATUS will show the best estimate, - while the optimizer never sees the table empty. */ - - if (n_rows == 0 && (flag & HA_STATUS_TIME) == 0) { - n_rows++; - } - - /* Fix bug#40386: Not flushing query cache after truncate. - n_rows can not be 0 unless the table is empty, set to 1 - instead. The original problem of bug#29507 is actually - fixed in the server code. */ - if (thd_sql_command(m_user_thd) == SQLCOM_TRUNCATE) { - - n_rows = 1; - - /* We need to reset the m_prebuilt value too, otherwise - checks for values greater than the last value written - to the table will fail and the autoinc counter will - not be updated. This will force write_row() into - attempting an update of the table's AUTOINC counter. */ - - m_prebuilt->autoinc_last_value = 0; - } - - /* Take page_size from first partition. */ - ib_table = m_part_share->get_table_part(0); - const page_size_t& page_size = - dict_table_page_size(ib_table); - - stats.records = (ha_rows) n_rows; - stats.deleted = 0; - stats.data_file_length = - ((ulonglong) stat_clustered_index_size) - * page_size.physical(); - stats.index_file_length = - ((ulonglong) stat_sum_of_other_index_sizes) - * page_size.physical(); - - /* See ha_innobase::info_low() for comments! */ - if ((flag & HA_STATUS_NO_LOCK) == 0 - && (flag & HA_STATUS_VARIABLE_EXTRA) != 0 - && srv_force_recovery < SRV_FORCE_NO_IBUF_MERGE) { - stats.delete_length = avail_space * 1024; - } - - stats.check_time = 0; - stats.mrr_length_per_rec = ref_length + sizeof(void*) - - PARTITION_BYTES_IN_POS; - - if (stats.records == 0) { - stats.mean_rec_length = 0; - } else { - stats.mean_rec_length = (ulong) - (stats.data_file_length / stats.records); - } - } - - if ((flag & HA_STATUS_CONST) != 0) { - /* Find max rows and biggest partition. */ - for (uint i = 0; i < m_tot_parts; i++) { - /* Skip partitions from above. */ - if ((flag & HA_STATUS_VARIABLE) == 0 - || !bitmap_is_set(&(m_part_info->read_partitions), - i)) { - - ib_table = m_part_share->get_table_part(i); - if (ib_table->stat_n_rows > max_rows) { - max_rows = ib_table->stat_n_rows; - biggest_partition = i; - } - } - } - ib_table = m_part_share->get_table_part(biggest_partition); - /* Verify the number of index in InnoDB and MySQL - matches up. If m_prebuilt->clust_index_was_generated - holds, InnoDB defines GEN_CLUST_INDEX internally. */ - ulint num_innodb_index = UT_LIST_GET_LEN(ib_table->indexes) - - m_prebuilt->clust_index_was_generated; - if (table->s->keys < num_innodb_index) { - /* If there are too many indexes defined - inside InnoDB, ignore those that are being - created, because MySQL will only consider - the fully built indexes here. */ - - for (const dict_index_t* index = - UT_LIST_GET_FIRST(ib_table->indexes); - index != NULL; - index = UT_LIST_GET_NEXT(indexes, index)) { - - /* First, online index creation is - completed inside InnoDB, and then - MySQL attempts to upgrade the - meta-data lock so that it can rebuild - the .frm file. If we get here in that - time frame, dict_index_is_online_ddl() - would not hold and the index would - still not be included in TABLE_SHARE. */ - if (!index->is_committed()) { - num_innodb_index--; - } - } - - if (table->s->keys < num_innodb_index - && (innobase_fts_check_doc_id_index(ib_table, - NULL, NULL) - == FTS_EXIST_DOC_ID_INDEX)) { - num_innodb_index--; - } - } - - if (table->s->keys != num_innodb_index) { - ib::error() << "Table " - << ib_table->name << " contains " - << num_innodb_index - << " indexes inside InnoDB, which" - " is different from the number of" - " indexes " << table->s->keys - << " defined in the MySQL"; - } - - if ((flag & HA_STATUS_NO_LOCK) == 0) { - dict_table_stats_lock(ib_table, RW_S_LATCH); - } - - ut_a(ib_table->stat_initialized); - - for (ulong i = 0; i < table->s->keys; i++) { - ulong j; - /* We could get index quickly through internal - index mapping with the index translation table. - The identity of index (match up index name with - that of table->key_info[i]) is already verified in - innopart_get_index(). */ - dict_index_t* index = innopart_get_index( - biggest_partition, i); - - if (index == NULL) { - ib::error() << "Table " - << ib_table->name << " contains fewer" - " indexes inside InnoDB than" - " are defined in the MySQL" - " .frm file. Have you mixed up" - " .frm files from different" - " installations? " - << TROUBLESHOOTING_MSG; - break; - } - - KEY* key = &table->key_info[i]; - for (j = 0; - j < key->actual_key_parts; - j++) { - - if ((key->flags & HA_FULLTEXT) != 0) { - /* The whole concept has no validity - for FTS indexes. */ - key->rec_per_key[j] = 1; - continue; - } - - if ((j + 1) > index->n_uniq) { - ib::error() << "Index " << index->name - << " of " << ib_table->name - << " has " << index->n_uniq - << " columns unique inside" - " InnoDB, but MySQL is" - " asking statistics for " - << j + 1 << " columns. Have" - " you mixed up .frm files" - " from different" - " installations? " - << TROUBLESHOOTING_MSG; - break; - } - - /* innodb_rec_per_key() will use - index->stat_n_diff_key_vals[] and the value we - pass index->table->stat_n_rows. Both are - calculated by ANALYZE and by the background - stats gathering thread (which kicks in when too - much of the table has been changed). In - addition table->stat_n_rows is adjusted with - each DML (e.g. ++ on row insert). Those - adjustments are not MVCC'ed and not even - reversed on rollback. So, - index->stat_n_diff_key_vals[] and - index->table->stat_n_rows could have been - calculated at different time. This is - acceptable. */ - const rec_per_key_t rec_per_key = - innodb_rec_per_key( - index, j, - max_rows); - - key->set_records_per_key(j, rec_per_key); - - /* The code below is legacy and should be - removed together with this comment once we - are sure the new floating point rec_per_key, - set via set_records_per_key(), works fine. */ - - ulong rec_per_key_int = static_cast<ulong>( - innodb_rec_per_key(index, j, - max_rows)); - - /* Since MySQL seems to favor table scans - too much over index searches, we pretend - index selectivity is 2 times better than - our estimate: */ - - rec_per_key_int = rec_per_key_int / 2; - - if (rec_per_key_int == 0) { - rec_per_key_int = 1; - } - - key->rec_per_key[j] = rec_per_key_int; - } - } - - if ((flag & HA_STATUS_NO_LOCK) == 0) { - dict_table_stats_unlock(ib_table, RW_S_LATCH); - } - - char path[FN_REFLEN]; - os_file_stat_t stat_info; - /* Use the first partition for create time until new DD. */ - ib_table = m_part_share->get_table_part(0); - my_snprintf(path, sizeof(path), "%s/%s%s", - mysql_data_home, - table->s->normalized_path.str, - reg_ext); - - unpack_filename(path,path); - - if (os_file_get_status(path, &stat_info, false, true) == DB_SUCCESS) { - stats.create_time = (ulong) stat_info.ctime; - } - } - - if (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE) { - - goto func_exit; - } - - if ((flag & HA_STATUS_ERRKEY) != 0) { - const dict_index_t* err_index; - - ut_a(m_prebuilt->trx); - ut_a(m_prebuilt->trx->magic_n == TRX_MAGIC_N); - - err_index = trx_get_error_info(m_prebuilt->trx); - - if (err_index != NULL) { - errkey = m_part_share->get_mysql_key(m_last_part, - err_index); - } else { - errkey = (unsigned int) ( - (m_prebuilt->trx->error_key_num - == ULINT_UNDEFINED) - ? UINT_MAX - : m_prebuilt->trx->error_key_num); - } - } - - if ((flag & HA_STATUS_AUTO) != 0) { - /* auto_inc is only supported in first key for InnoDB! */ - ut_ad(table_share->next_number_keypart == 0); - DBUG_PRINT("info", ("HA_STATUS_AUTO")); - if (table->found_next_number_field == NULL) { - stats.auto_increment_value = 0; - } else { - /* Lock to avoid two concurrent initializations. */ - lock_auto_increment(); - if (m_part_share->auto_inc_initialized) { - stats.auto_increment_value = - m_part_share->next_auto_inc_val; - } else { - /* The auto-inc mutex in the table_share is - locked, so we do not need to have the handlers - locked. */ - - error = initialize_auto_increment( - (flag & HA_STATUS_NO_LOCK) != 0); - stats.auto_increment_value = - m_part_share->next_auto_inc_val; - } - unlock_auto_increment(); - } - } - -func_exit: - m_prebuilt->trx->op_info = (char*)""; - - DBUG_RETURN(error); -} - -/** Optimize table. -This is mapped to "ALTER TABLE tablename ENGINE=InnoDB", which rebuilds -the table in MySQL. -@param[in] thd Connection thread handle. -@param[in] check_opt Currently ignored. -@return 0 for success else error code. */ -int -ha_innopart::optimize( - THD* thd, - HA_CHECK_OPT* check_opt) -{ - return(HA_ADMIN_TRY_ALTER); -} - -/** Checks a partitioned table. -Tries to check that an InnoDB table is not corrupted. If corruption is -noticed, prints to stderr information about it. In case of corruption -may also assert a failure and crash the server. Also checks for records -in wrong partition. -@param[in] thd MySQL THD object/thread handle. -@param[in] check_opt Check options. -@return HA_ADMIN_CORRUPT or HA_ADMIN_OK. */ -int -ha_innopart::check( - THD* thd, - HA_CHECK_OPT* check_opt) -{ - uint error = HA_ADMIN_OK; - uint i; - - DBUG_ENTER("ha_innopart::check"); - /* TODO: Enhance this to: - - Every partition has the same structure. - - The names are correct (partition names checked in ::open()?) - Currently it only does normal InnoDB check of each partition. */ - - if (set_altered_partitions()) { - ut_ad(0); // Already checked by set_part_state()! - DBUG_RETURN(HA_ADMIN_INVALID); - } - for (i = m_part_info->get_first_used_partition(); - i < m_tot_parts; - i = m_part_info->get_next_used_partition(i)) { - - m_prebuilt->table = m_part_share->get_table_part(i); - error = ha_innobase::check(thd, check_opt); - if (error != 0) { - break; - } - if ((check_opt->flags & (T_MEDIUM | T_EXTEND)) != 0) { - error = Partition_helper::check_misplaced_rows(i, false); - if (error != 0) { - break; - } - } - } - if (error != 0) { - print_admin_msg( - thd, - 256, - "error", - table_share->db.str, - table->alias, - "check", - m_is_sub_partitioned ? - "Subpartition %s returned error" - : "Partition %s returned error", - m_part_share->get_partition_name(i)); - } - - DBUG_RETURN(error); -} - -/** Repair a partitioned table. -Only repairs records in wrong partitions (moves them to the correct -partition or deletes them if not in any partition). -@param[in] thd MySQL THD object/thread handle. -@param[in] repair_opt Repair options. -@return 0 or error code. */ -int -ha_innopart::repair( - THD* thd, - HA_CHECK_OPT* repair_opt) -{ - uint error = HA_ADMIN_OK; - - DBUG_ENTER("ha_innopart::repair"); - - /* TODO: enable this warning to be clear about what is repaired. - Currently disabled to generate smaller test diffs. */ -#ifdef ADD_WARNING_FOR_REPAIR_ONLY_PARTITION - push_warning_printf(thd, Sql_condition::SL_WARNING, - ER_ILLEGAL_HA, - "Only moving rows from wrong partition to correct" - " partition is supported," - " repairing InnoDB indexes is not yet supported!"); -#endif - - /* Only repair partitions for MEDIUM or EXTENDED options. */ - if ((repair_opt->flags & (T_MEDIUM | T_EXTEND)) == 0) { - DBUG_RETURN(HA_ADMIN_OK); - } - if (set_altered_partitions()) { - ut_ad(0); // Already checked by set_part_state()! - DBUG_RETURN(HA_ADMIN_INVALID); - } - for (uint i = m_part_info->get_first_used_partition(); - i < m_tot_parts; - i = m_part_info->get_next_used_partition(i)) { - - /* TODO: Implement and use ha_innobase::repair()! */ - error = Partition_helper::check_misplaced_rows(i, true); - if (error != 0) { - print_admin_msg( - thd, - 256, - "error", - table_share->db.str, - table->alias, - "repair", - m_is_sub_partitioned ? - "Subpartition %s returned error" - : "Partition %s returned error", - m_part_share->get_partition_name(i)); - break; - } - } - - DBUG_RETURN(error); -} - -/** Check if possible to switch engine (no foreign keys). -Checks if ALTER TABLE may change the storage engine of the table. -Changing storage engines is not allowed for tables for which there -are foreign key constraints (parent or child tables). -@return true if can switch engines. */ -bool -ha_innopart::can_switch_engines() -{ - bool can_switch; - - DBUG_ENTER("ha_innopart::can_switch_engines"); - can_switch = ha_innobase::can_switch_engines(); - ut_ad(can_switch); - - DBUG_RETURN(can_switch); -} - -/** Checks if a table is referenced by a foreign key. -The MySQL manual states that a REPLACE is either equivalent to an INSERT, -or DELETE(s) + INSERT. Only a delete is then allowed internally to resolve -a duplicate key conflict in REPLACE, not an update. -@return > 0 if referenced by a FOREIGN KEY. */ -uint -ha_innopart::referenced_by_foreign_key() -{ - if (dict_table_is_referenced_by_foreign_key(m_prebuilt->table)) { - -#ifndef HA_INNOPART_SUPPORTS_FOREIGN_KEYS - ut_ad(0); -#endif /* HA_INNOPART_SUPPORTS_FOREIGN_KEYS */ - return(1); - } - - return(0); -} - -/** Start statement. -MySQL calls this function at the start of each SQL statement inside LOCK -TABLES. Inside LOCK TABLES the ::external_lock method does not work to -mark SQL statement borders. Note also a special case: if a temporary table -is created inside LOCK TABLES, MySQL has not called external_lock() at all -on that table. -MySQL-5.0 also calls this before each statement in an execution of a stored -procedure. To make the execution more deterministic for binlogging, MySQL-5.0 -locks all tables involved in a stored procedure with full explicit table -locks (thd_in_lock_tables(thd) holds in store_lock()) before executing the -procedure. -@param[in] thd Handle to the user thread. -@param[in] lock_type Lock type. -@return 0 or error code. */ -int -ha_innopart::start_stmt( - THD* thd, - thr_lock_type lock_type) -{ - int error = 0; - - if (m_part_info->get_first_used_partition() == MY_BIT_NONE) { - /* All partitions pruned away, do nothing! */ - return(error); - } - - error = ha_innobase::start_stmt(thd, lock_type); - if (m_prebuilt->sql_stat_start) { - memset(m_sql_stat_start_parts, 0xff, - UT_BITS_IN_BYTES(m_tot_parts)); - } else { - memset(m_sql_stat_start_parts, 0, - UT_BITS_IN_BYTES(m_tot_parts)); - } - return(error); -} - -/** Function to store lock for all partitions in native partitioned table. Also -look at ha_innobase::store_lock for more details. -@param[in] thd user thread handle -@param[in] to pointer to the current element in an array of -pointers to lock structs -@param[in] lock_type lock type to store in 'lock'; this may also be -TL_IGNORE -@retval to pointer to the current element in the 'to' array */ -THR_LOCK_DATA** -ha_innopart::store_lock( - THD* thd, - THR_LOCK_DATA** to, - thr_lock_type lock_type) -{ - trx_t* trx = m_prebuilt->trx; - const uint sql_command = thd_sql_command(thd); - - ha_innobase::store_lock(thd, to, lock_type); - - if (sql_command == SQLCOM_FLUSH - && lock_type == TL_READ_NO_INSERT) { - for (uint i = 1; i < m_tot_parts; i++) { - dict_table_t* table = m_part_share->get_table_part(i); - - dberr_t err = row_quiesce_set_state( - table, QUIESCE_START, trx); - ut_a(err == DB_SUCCESS || err == DB_UNSUPPORTED); - } - } - - return to; -} - -/** Lock/prepare to lock table. -As MySQL will execute an external lock for every new table it uses when it -starts to process an SQL statement (an exception is when MySQL calls -start_stmt for the handle) we can use this function to store the pointer to -the THD in the handle. We will also use this function to communicate -to InnoDB that a new SQL statement has started and that we must store a -savepoint to our transaction handle, so that we are able to roll back -the SQL statement in case of an error. -@param[in] thd Handle to the user thread. -@param[in] lock_type Lock type. -@return 0 or error number. */ -int -ha_innopart::external_lock( - THD* thd, - int lock_type) -{ - int error = 0; - - if (m_part_info->get_first_used_partition() == MY_BIT_NONE - && !(m_mysql_has_locked - && lock_type == F_UNLCK)) { - - /* All partitions pruned away, do nothing! */ - ut_ad(!m_mysql_has_locked); - return(error); - } - ut_ad(m_mysql_has_locked || lock_type != F_UNLCK); - - m_prebuilt->table = m_part_share->get_table_part(0); - error = ha_innobase::external_lock(thd, lock_type); - - for (uint i = 0; i < m_tot_parts; i++) { - dict_table_t* table = m_part_share->get_table_part(i); - - switch (table->quiesce) { - case QUIESCE_START: - /* Check for FLUSH TABLE t WITH READ LOCK */ - if (!srv_read_only_mode - && thd_sql_command(thd) == SQLCOM_FLUSH - && lock_type == F_RDLCK) { - - ut_ad(table->quiesce == QUIESCE_START); - - row_quiesce_table_start(table, - m_prebuilt->trx); - - /* Use the transaction instance to track - UNLOCK TABLES. It can be done via START - TRANSACTION; too implicitly. */ - - ++m_prebuilt->trx->flush_tables; - } - break; - - case QUIESCE_COMPLETE: - /* Check for UNLOCK TABLES; implicit or explicit - or trx interruption. */ - if (m_prebuilt->trx->flush_tables > 0 - && (lock_type == F_UNLCK - || trx_is_interrupted(m_prebuilt->trx))) { - - ut_ad(table->quiesce == QUIESCE_COMPLETE); - row_quiesce_table_complete(table, - m_prebuilt->trx); - - ut_a(m_prebuilt->trx->flush_tables > 0); - --m_prebuilt->trx->flush_tables; - } - break; - - case QUIESCE_NONE: - break; - - default: - ut_ad(0); - } - } - - ut_ad(!m_auto_increment_lock); - ut_ad(!m_auto_increment_safe_stmt_log_lock); - - if (m_prebuilt->sql_stat_start) { - memset(m_sql_stat_start_parts, 0xff, - UT_BITS_IN_BYTES(m_tot_parts)); - } else { - memset(m_sql_stat_start_parts, 0, - UT_BITS_IN_BYTES(m_tot_parts)); - } - return(error); -} - -/** Get the current auto_increment value. -@param[in] offset Table auto-inc offset. -@param[in] increment Table auto-inc increment. -@param[in] nb_desired_values Number of required values. -@param[out] first_value The auto increment value. -@param[out] nb_reserved_values Number of reserved values. -@return Auto increment value, or ~0 on failure. */ -void -ha_innopart::get_auto_increment( - ulonglong offset, - ulonglong increment, - ulonglong nb_desired_values, - ulonglong* first_value, - ulonglong* nb_reserved_values) -{ - DBUG_ENTER("ha_innopart::get_auto_increment"); - if (table_share->next_number_keypart != 0) { - /* Only first key part allowed as autoinc for InnoDB tables! */ - ut_ad(0); - *first_value = ULLONG_MAX; - DBUG_VOID_RETURN; - } - get_auto_increment_first_field( - increment, - nb_desired_values, - first_value, - nb_reserved_values); - DBUG_VOID_RETURN; -} - -/** Compares two 'refs'. -A 'ref' is the (internal) primary key value of the row. -If there is no explicitly declared non-null unique key or a primary key, then -InnoDB internally uses the row id as the primary key. -It will use the partition id as secondary compare. -@param[in] ref1 An (internal) primary key value in the MySQL key value -format. -@param[in] ref2 Reference to compare with (same type as ref1). -@return < 0 if ref1 < ref2, 0 if equal, else > 0. */ -int -ha_innopart::cmp_ref( - const uchar* ref1, - const uchar* ref2) -{ - int cmp; - - cmp = ha_innobase::cmp_ref(ref1 + PARTITION_BYTES_IN_POS, - ref2 + PARTITION_BYTES_IN_POS); - - if (cmp != 0) { - return(cmp); - } - - cmp = static_cast<int>(uint2korr(ref1)) - - static_cast<int>(uint2korr(ref2)); - - return(cmp); -} - -/** Prepare for creating new partitions during ALTER TABLE ... PARTITION. -@param[in] num_partitions Number of new partitions to be created. -@param[in] only_create True if only creating the partition -(no open/lock is needed). -@return 0 for success else error code. */ -int -ha_innopart::prepare_for_new_partitions( - uint num_partitions, - bool only_create) -{ - m_new_partitions = UT_NEW(Altered_partitions(num_partitions, - only_create), - mem_key_partitioning); - if (m_new_partitions == NULL) { - return(HA_ERR_OUT_OF_MEM); - } - if (m_new_partitions->initialize()) { - UT_DELETE(m_new_partitions); - m_new_partitions = NULL; - return(HA_ERR_OUT_OF_MEM); - } - return(0); -} - -/** Create a new partition to be filled during ALTER TABLE ... PARTITION. -@param[in] table Table to create the partition in. -@param[in] create_info Table/partition specific create info. -@param[in] part_name Partition name. -@param[in] new_part_id Partition id in new table. -@param[in] part_elem Partition element. -@return 0 for success else error code. */ -int -ha_innopart::create_new_partition( - TABLE* table, - HA_CREATE_INFO* create_info, - const char* part_name, - uint new_part_id, - partition_element* part_elem) -{ - int error; - char norm_name[FN_REFLEN]; - const char* data_file_name_backup = create_info->data_file_name; - DBUG_ENTER("ha_innopart::create_new_partition"); - /* Delete by ddl_log on failure. */ - normalize_table_name(norm_name, part_name); - set_create_info_dir(part_elem, create_info); - - error = ha_innobase::create(norm_name, table, create_info); - create_info->data_file_name = data_file_name_backup; - if (error == HA_ERR_FOUND_DUPP_KEY) { - DBUG_RETURN(HA_ERR_TABLE_EXIST); - } - if (error != 0) { - DBUG_RETURN(error); - } - if (!m_new_partitions->only_create()) - { - dict_table_t* part; - part = dict_table_open_on_name(norm_name, - false, - true, - DICT_ERR_IGNORE_NONE); - if (part == NULL) { - DBUG_RETURN(HA_ERR_INTERNAL_ERROR); - } - m_new_partitions->set_part(new_part_id, part); - } - DBUG_RETURN(0); -} - -/** Close and finalize new partitions. */ -void -ha_innopart::close_new_partitions() -{ - if (m_new_partitions != NULL) { - UT_DELETE(m_new_partitions); - m_new_partitions = NULL; - } -} - -/** write row to new partition. -@param[in] new_part New partition to write to. -@return 0 for success else error code. */ -int -ha_innopart::write_row_in_new_part( - uint new_part) -{ - int result; - DBUG_ENTER("ha_innopart::write_row_in_new_part"); - - m_last_part = new_part; - if (m_new_partitions->part(new_part) == NULL) { - /* Altered partition contains misplaced row. */ - m_err_rec = table->record[0]; - DBUG_RETURN(HA_ERR_ROW_IN_WRONG_PARTITION); - } - m_new_partitions->get_prebuilt(m_prebuilt, new_part); - result = ha_innobase::write_row(table->record[0]); - m_new_partitions->set_from_prebuilt(m_prebuilt, new_part); - DBUG_RETURN(result); -} - -/** Allocate the array to hold blob heaps for all partitions */ -mem_heap_t** -ha_innopart::alloc_blob_heap_array() -{ - DBUG_ENTER("ha_innopart::alloc_blob_heap_array"); - - const ulint len = sizeof(mem_heap_t*) * m_tot_parts; - m_blob_heap_parts = static_cast<mem_heap_t**>( - ut_zalloc(len, mem_key_partitioning)); - if (m_blob_heap_parts == NULL) { - DBUG_RETURN(NULL); - } - - DBUG_RETURN(m_blob_heap_parts); -} - -/** Free the array that holds blob heaps for all partitions */ -void -ha_innopart::free_blob_heap_array() -{ - DBUG_ENTER("ha_innopart::free_blob_heap_array"); - - if (m_blob_heap_parts != NULL) { - clear_blob_heaps(); - ut_free(m_blob_heap_parts); - m_blob_heap_parts = NULL; - } - - DBUG_VOID_RETURN; -} - -void -ha_innopart::clear_blob_heaps() -{ - DBUG_ENTER("ha_innopart::clear_blob_heaps"); - - if (m_blob_heap_parts == NULL) { - DBUG_VOID_RETURN; - } - - for (uint i = 0; i < m_tot_parts; i++) { - if (m_blob_heap_parts[i] != NULL) { - DBUG_PRINT("ha_innopart", ("freeing blob_heap: %p", - m_blob_heap_parts[i])); - mem_heap_free(m_blob_heap_parts[i]); - m_blob_heap_parts[i] = NULL; - } - } - - /* Reset blob_heap in m_prebuilt after freeing all heaps. It is set in - ha_innopart::set_partition to the blob heap of current partition. */ - m_prebuilt->blob_heap = NULL; - - DBUG_VOID_RETURN; -} - -/** Reset state of file to after 'open'. This function is called -after every statement for all tables used by that statement. */ -int -ha_innopart::reset() -{ - DBUG_ENTER("ha_innopart::reset"); - - clear_blob_heaps(); - - DBUG_RETURN(ha_innobase::reset()); -} - -/**************************************************************************** - * DS-MRR implementation - ***************************************************************************/ - -/* TODO: move the default implementations into the base handler class! */ -/* TODO: See if it could be optimized for partitioned tables? */ -/* Use default ha_innobase implementation for now... */ diff --git a/storage/innobase/handler/ha_innopart.h b/storage/innobase/handler/ha_innopart.h deleted file mode 100644 index 67db9e07150..00000000000 --- a/storage/innobase/handler/ha_innopart.h +++ /dev/null @@ -1,1315 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2014, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2016, MariaDB Corporation. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/* The InnoDB Partition handler: the interface between MySQL and InnoDB. */ - -#ifndef ha_innopart_h -#define ha_innopart_h - -#include "partitioning/partition_handler.h" - -/* Forward declarations */ -class Altered_partitions; -class partition_info; - -/** HA_DUPLICATE_POS and HA_READ_BEFORE_WRITE_REMOVAL is not -set from ha_innobase, but cannot yet be supported in ha_innopart. -Full text and geometry is not yet supported. */ -const handler::Table_flags HA_INNOPART_DISABLED_TABLE_FLAGS = - ( HA_CAN_FULLTEXT - | HA_CAN_FULLTEXT_EXT - | HA_CAN_GEOMETRY - | HA_DUPLICATE_POS - | HA_READ_BEFORE_WRITE_REMOVAL); - -/** InnoDB partition specific Handler_share. */ -class Ha_innopart_share : public Partition_share -{ -private: - /** Array of all included table definitions (one per partition). */ - dict_table_t** m_table_parts; - - /** Instead of INNOBASE_SHARE::idx_trans_tbl. Maps MySQL index number - to InnoDB index per partition. */ - dict_index_t** m_index_mapping; - - /** Total number of partitions. */ - uint m_tot_parts; - - /** Number of indexes. */ - uint m_index_count; - - /** Reference count. */ - uint m_ref_count; - - /** Pointer back to owning TABLE_SHARE. */ - TABLE_SHARE* m_table_share; - -public: - Ha_innopart_share( - TABLE_SHARE* table_share); - - ~Ha_innopart_share(); - - /** Set innodb table for given partition. - @param[in] part_id Partition number. - @param[in] table Table. */ - inline - void - set_table_part( - uint part_id, - dict_table_t* table) - { - ut_ad(m_table_parts != NULL); - ut_ad(part_id < m_tot_parts); - m_table_parts[part_id] = table; - } - - /** Return innodb table for given partition. - @param[in] part_id Partition number. - @return InnoDB table. */ - inline - dict_table_t* - get_table_part( - uint part_id) const - { - ut_ad(m_table_parts != NULL); - ut_ad(part_id < m_tot_parts); - return(m_table_parts[part_id]); - } - - /** Return innodb index for given partition and key number. - @param[in] part_id Partition number. - @param[in] keynr Key number. - @return InnoDB index. */ - dict_index_t* - get_index( - uint part_id, - uint keynr); - - /** Get MySQL key number corresponding to InnoDB index. - @param[in] part_id Partition number. - @param[in] index InnoDB index. - @return MySQL key number or MAX_KEY if non-existent. */ - uint - get_mysql_key( - uint part_id, - const dict_index_t* index); - - /** Initialize the share with table and indexes per partition. - @param[in] part_info Partition info (partition names to use) - @param[in] table_name Table name (db/table_name) - @return false on success else true. */ - bool - open_table_parts( - partition_info* part_info, - const char* table_name); - - /** Close the table partitions. - If all instances are closed, also release the resources. */ - void - close_table_parts(); - - /* Static helper functions. */ - /** Fold to lower case if windows or lower_case_table_names == 1. - @param[in,out] s String to fold.*/ - static - void - partition_name_casedn_str( - char* s); - - /** Translate and append partition name. - @param[out] to String to write in filesystem charset - @param[in] from Name in system charset - @param[in] sep Separator - @param[in] len Max length of to buffer - @return length of written string. */ - static - size_t - append_sep_and_name( - char* to, - const char* from, - const char* sep, - size_t len); - - /** Set up the virtual column template for partition table, and points - all m_table_parts[]->vc_templ to it. - @param[in] table MySQL TABLE object - @param[in] ib_table InnoDB dict_table_t - @param[in] table_name Table name (db/table_name) */ - void - set_v_templ( - TABLE* table, - dict_table_t* ib_table, - const char* name); - -private: - /** Disable default constructor. */ - Ha_innopart_share() {}; - - /** Open one partition (lower lever innodb table). - @param[in] part_id Partition to open. - @param[in] partition_name Name of partition. - @return false on success else true. */ - bool - open_one_table_part( - uint part_id, - const char* partition_name); -}; - -/** The class defining a partitioning aware handle to an InnoDB table. -Based on ha_innobase and extended with -- Partition_helper for re-using common partitioning functionality -- Partition_handler for providing partitioning specific api calls. -Generic partitioning functions are implemented in Partition_helper. -Lower level storage functions are implemented in ha_innobase. -Partition_handler is inherited for implementing the handler level interface -for partitioning specific functions, like change_partitions and -truncate_partition. -InnoDB specific functions related to partitioning is implemented here. */ -class ha_innopart: - public ha_innobase, - public Partition_helper, - public Partition_handler -{ -public: - ha_innopart( - handlerton* hton, - TABLE_SHARE* table_arg); - - ~ha_innopart(); - - /** Clone this handler, used when needing more than one cursor - to the same table. - @param[in] name Table name. - @param[in] mem_root mem_root to allocate from. - @retval Pointer to clone or NULL if error. */ - handler* - clone( - const char* name, - MEM_ROOT* mem_root); - - /** Check and register a table in the query cache. - Ask InnoDB if a query to a table can be cached. - @param[in] thd User thread handle. - @param[in] table_key Normalized path to the table. - @param[in] key_length Lenght of table_key. - @param[out] call_back Function pointer for checking if data - has changed. - @param[in,out] engine_data Data for call_back (not used). - @return TRUE if query caching of the table is permitted. */ - my_bool - register_query_cache_table( - THD* thd, - char* table_key, - size_t key_length, - qc_engine_callback* call_back, - ulonglong* engine_data) - { - /* Currently this would need to go through every - [sub] partition in the table to see if any of them has changed. - See row_search_check_if_query_cache_permitted(). - So disabled until we can avoid check all partitions. */ - return(FALSE); - } - - /** On-line ALTER TABLE interface @see handler0alter.cc @{ */ - - /** Check if InnoDB supports a particular alter table in-place. - @param[in] altered_table TABLE object for new version of table. - @param[in,out] ha_alter_info Structure describing changes to be done - by ALTER TABLE and holding data used during in-place alter. - @retval HA_ALTER_INPLACE_NOT_SUPPORTED Not supported - @retval HA_ALTER_INPLACE_NO_LOCK Supported - @retval HA_ALTER_INPLACE_SHARED_LOCK_AFTER_PREPARE Supported, but - requires lock during main phase and exclusive lock during prepare - phase. - @retval HA_ALTER_INPLACE_NO_LOCK_AFTER_PREPARE Supported, prepare - phase requires exclusive lock. */ - enum_alter_inplace_result - check_if_supported_inplace_alter( - TABLE* altered_table, - Alter_inplace_info* ha_alter_info); - - /** Prepare in-place ALTER for table. - Allows InnoDB to update internal structures with concurrent - writes blocked (provided that check_if_supported_inplace_alter() - did not return HA_ALTER_INPLACE_NO_LOCK). - This will be invoked before inplace_alter_table(). - @param[in] altered_table TABLE object for new version of table. - @param[in,out] ha_alter_info Structure describing changes to be done - by ALTER TABLE and holding data used during in-place alter. - @retval true Failure. - @retval false Success. */ - bool - prepare_inplace_alter_table( - TABLE* altered_table, - Alter_inplace_info* ha_alter_info); - - /** Alter the table structure in-place. - Alter the table structure in-place with operations - specified using HA_ALTER_FLAGS and Alter_inplace_information. - The level of concurrency allowed during this operation depends - on the return value from check_if_supported_inplace_alter(). - @param[in] altered_table TABLE object for new version of table. - @param[in,out] ha_alter_info Structure describing changes to be done - by ALTER TABLE and holding data used during in-place alter. - @retval true Failure. - @retval false Success. */ - bool - inplace_alter_table( - TABLE* altered_table, - Alter_inplace_info* ha_alter_info); - - /** Commit or rollback. - Commit or rollback the changes made during - prepare_inplace_alter_table() and inplace_alter_table() inside - the storage engine. Note that the allowed level of concurrency - during this operation will be the same as for - inplace_alter_table() and thus might be higher than during - prepare_inplace_alter_table(). (E.g concurrent writes were - blocked during prepare, but might not be during commit). - @param[in] altered_table TABLE object for new version of table. - @param[in] ha_alter_info Structure describing changes to be done - by ALTER TABLE and holding data used during in-place alter. - @param[in,out] commit true => Commit, false => Rollback. - @retval true Failure. - @retval false Success. */ - bool - commit_inplace_alter_table( - TABLE* altered_table, - Alter_inplace_info* ha_alter_info, - bool commit); - - /** Notify the storage engine that the table structure (.frm) has - been updated. - - ha_partition allows inplace operations that also upgrades the engine - if it supports partitioning natively. So if this is the case then - we will remove the .par file since it is not used with ha_innopart - (we use the internal data dictionary instead). */ - void - notify_table_changed(); - /** @} */ - - // TODO: should we implement init_table_handle_for_HANDLER() ? - // (or is sql_stat_start handled correctly anyway?) - int - optimize( - THD* thd, - HA_CHECK_OPT* check_opt); - - int - discard_or_import_tablespace( - my_bool discard); - - /** Compare key and rowid. - Helper function for sorting records in the priority queue. - a/b points to table->record[0] rows which must have the - key fields set. The bytes before a and b store the rowid. - This is used for comparing/sorting rows first according to - KEY and if same KEY, by rowid (ref). - - @param[in] key_info Null terminated array of index - information. - @param[in] a Pointer to record+ref in first record. - @param[in] b Pointer to record+ref in second record. - @return Return value is SIGN(first_rec - second_rec) - @retval 0 Keys are equal. - @retval -1 second_rec is greater than first_rec. - @retval +1 first_rec is greater than second_rec. */ - static - int - key_and_rowid_cmp( - KEY** key_info, - uchar *a, - uchar *b); - - int - extra( - enum ha_extra_function operation); - - void - print_error( - int error, - myf errflag); - - bool - is_ignorable_error( - int error); - - int - start_stmt( - THD* thd, - thr_lock_type lock_type); - - ha_rows - records_in_range( - uint inx, - key_range* min_key, - key_range* max_key); - - ha_rows - estimate_rows_upper_bound(); - - uint - alter_table_flags( - uint flags); - - void - update_create_info( - HA_CREATE_INFO* create_info); - - int - create( - const char* name, - TABLE* form, - HA_CREATE_INFO* create_info); - - int - truncate(); - - int - check( - THD* thd, - HA_CHECK_OPT* check_opt); - - /** Repair table. - Will only handle records in wrong partition, not repairing - corrupt innodb indexes. - @param[in] thd Thread context. - @param[in] repair_opt Repair options. - @return 0 or error code. */ - int - repair( - THD* thd, - HA_CHECK_OPT* repair_opt); - - bool - can_switch_engines(); - - uint - referenced_by_foreign_key(); - - void - get_auto_increment( - ulonglong offset, - ulonglong increment, - ulonglong nb_desired_values, - ulonglong* first_value, - ulonglong* nb_reserved_values); - - int - cmp_ref( - const uchar* ref1, - const uchar* ref2); - - int - read_range_first( - const key_range* start_key, - const key_range* end_key, - bool eq_range_arg, - bool sorted) - { - return(Partition_helper::ph_read_range_first( - start_key, - end_key, - eq_range_arg, - sorted)); - } - - void - position( - const uchar* record) - { - Partition_helper::ph_position(record); - } - - int - rnd_pos_by_record( - uchar* record) - { - return(Partition_helper::ph_rnd_pos_by_record(record)); - } - - /* TODO: Implement these! */ - bool - check_if_incompatible_data( - HA_CREATE_INFO* info, - uint table_changes) - { - ut_ad(0); - return(COMPATIBLE_DATA_NO); - } - - int - delete_all_rows() - { - return(handler::delete_all_rows()); - } - - int - disable_indexes( - uint mode) - { - return(HA_ERR_WRONG_COMMAND); - } - - int - enable_indexes( - uint mode) - { - return(HA_ERR_WRONG_COMMAND); - } - - void - free_foreign_key_create_info( - char* str) - { - ut_ad(0); - } - - int - ft_init() - { - ut_ad(0); - return(HA_ERR_WRONG_COMMAND); - } - - FT_INFO* - ft_init_ext( - uint flags, - uint inx, - String* key) - { - ut_ad(0); - return(NULL); - } - - FT_INFO* - ft_init_ext_with_hints( - uint inx, - String* key, - Ft_hints* hints) - { - ut_ad(0); - return(NULL); - } - - int - ft_read( - uchar* buf) - { - ut_ad(0); - return(HA_ERR_WRONG_COMMAND); - } - - bool - get_foreign_dup_key( - char* child_table_name, - uint child_table_name_len, - char* child_key_name, - uint child_key_name_len) - { - ut_ad(0); - return(false); - } - - // TODO: not yet supporting FK. - char* - get_foreign_key_create_info() - { - return(NULL); - } - - // TODO: not yet supporting FK. - int - get_foreign_key_list( - THD* thd, - List<FOREIGN_KEY_INFO>* f_key_list) - { - return(0); - } - - // TODO: not yet supporting FK. - int - get_parent_foreign_key_list( - THD* thd, - List<FOREIGN_KEY_INFO>* f_key_list) - { - return(0); - } - - // TODO: not yet supporting FK. - int - get_cascade_foreign_key_table_list( - THD* thd, - List<st_handler_tablename>* fk_table_list) - { - return(0); - } - - int - read_range_next() - { - return(Partition_helper::ph_read_range_next()); - } - - uint32 - calculate_key_hash_value( - Field** field_array) - { - return(Partition_helper::ph_calculate_key_hash_value(field_array)); - } - - Table_flags - table_flags() const - { - return(ha_innobase::table_flags() | HA_CAN_REPAIR); - } - - void - release_auto_increment() - { - Partition_helper::ph_release_auto_increment(); - } - - /** Implementing Partition_handler interface @see partition_handler.h - @{ */ - - /** See Partition_handler. */ - void - get_dynamic_partition_info( - ha_statistics* stat_info, - ha_checksum* check_sum, - uint part_id) - { - Partition_helper::get_dynamic_partition_info_low( - stat_info, - check_sum, - part_id); - } - - uint - alter_flags( - uint flags MY_ATTRIBUTE((unused))) const - { - return(HA_PARTITION_FUNCTION_SUPPORTED - | HA_FAST_CHANGE_PARTITION); - } - - Partition_handler* - get_partition_handler() - { - return(static_cast<Partition_handler*>(this)); - } - - void - set_part_info( - partition_info* part_info, - bool early) - { - Partition_helper::set_part_info_low(part_info, early); - } - - void - initialize_partitioning( - partition_info* part_info, - bool early) - { - Partition_helper::set_part_info_low(part_info, early); - } - - handler* - get_handler() - { - return(static_cast<handler*>(this)); - } - /** @} */ - -private: - /** Pointer to Ha_innopart_share on the TABLE_SHARE. */ - Ha_innopart_share* m_part_share; - - /** ins_node per partition. Synchronized with prebuilt->ins_node - when changing partitions. */ - ins_node_t** m_ins_node_parts; - - /** upd_node per partition. Synchronized with prebuilt->upd_node - when changing partitions. */ - upd_node_t** m_upd_node_parts; - - /** blob_heap per partition. Synchronized with prebuilt->blob_heap - when changing partitions. */ - mem_heap_t** m_blob_heap_parts; - - /** trx_id from the partitions table->def_trx_id. Keep in sync - with prebuilt->trx_id when changing partitions. - prebuilt only reflects the current partition! */ - trx_id_t* m_trx_id_parts; - - /** row_read_type per partition. */ - ulint* m_row_read_type_parts; - - /** sql_stat_start per partition. */ - uchar* m_sql_stat_start_parts; - - /** persistent cursors per partition. */ - btr_pcur_t* m_pcur_parts; - - /** persistent cluster cursors per partition. */ - btr_pcur_t* m_clust_pcur_parts; - - /** map from part_id to offset in above two arrays. */ - uint16_t* m_pcur_map; - - /** Original m_prebuilt->pcur. */ - btr_pcur_t* m_pcur; - - /** Original m_prebuilt->clust_pcur. */ - btr_pcur_t* m_clust_pcur; - - /** New partitions during ADD/REORG/... PARTITION. */ - Altered_partitions* m_new_partitions; - - /** Clear used ins_nodes and upd_nodes. */ - void - clear_ins_upd_nodes(); - - /** Clear the blob heaps for all partitions */ - void - clear_blob_heaps(); - - /** Reset state of file to after 'open'. This function is called - after every statement for all tables used by that statement. */ - int - reset(); - - /** Allocate the array to hold blob heaps for all partitions */ - mem_heap_t** - alloc_blob_heap_array(); - - /** Free the array that holds blob heaps for all partitions */ - void - free_blob_heap_array(); - - /** Changes the active index of a handle. - @param[in] part_id Use this partition. - @param[in] keynr Use this index; MAX_KEY means always - clustered index, even if it was internally generated by InnoDB. - @return 0 or error code. */ - int - change_active_index( - uint part_id, - uint keynr); - - /** Move to next partition and set its index. - @return 0 for success else error number. */ - int - next_partition_index(); - - /** Get the index for the current partition - @param[in] keynr MySQL index number. - @return InnoDB index or NULL. */ - dict_index_t* - innobase_get_index( - uint keynr); - - /** Get the index for a handle. - Does not change active index. - @param[in] keynr use this index; MAX_KEY means always clustered - index, even if it was internally generated by InnoDB. - @param[in] part_id From this partition. - @return NULL or index instance. */ - dict_index_t* - innopart_get_index( - uint part_id, - uint keynr); - - /** Change active partition. - Copies needed info into m_prebuilt from the partition specific memory. - @param[in] part_id Partition to set as active. */ - void - set_partition( - uint part_id); - - /** Update active partition. - Copies needed info from m_prebuilt into the partition specific memory. - @param[in] part_id Partition to set as active. */ - void - update_partition( - uint part_id); - - /** Helpers needed by Partition_helper, @see partition_handler.h @{ */ - - /** Set the autoinc column max value. - This should only be called once from ha_innobase::open(). - Therefore there's no need for a covering lock. - @param[in] no_lock If locking should be skipped. Not used! - @return 0 on success else error code. */ - int - initialize_auto_increment( - bool /* no_lock */); - - /** Setup the ordered record buffer and the priority queue. - @param[in] used_parts Number of used partitions in query. - @return false for success, else true. */ - int - init_record_priority_queue_for_parts( - uint used_parts); - - /** Destroy the ordered record buffer and the priority queue. */ - void - destroy_record_priority_queue_for_parts(); - - /** Prepare for creating new partitions during ALTER TABLE ... - PARTITION. - @param[in] num_partitions Number of new partitions to be created. - @param[in] only_create True if only creating the partition - (no open/lock is needed). - @return 0 for success else error code. */ - int - prepare_for_new_partitions( - uint num_partitions, - bool only_create); - - /** Create a new partition to be filled during ALTER TABLE ... - PARTITION. - @param[in] table Table to create the partition in. - @param[in] create_info Table/partition specific create info. - @param[in] part_name Partition name. - @param[in] new_part_id Partition id in new table. - @param[in] part_elem Partition element. - @return 0 for success else error code. */ - int - create_new_partition( - TABLE* table, - HA_CREATE_INFO* create_info, - const char* part_name, - uint new_part_id, - partition_element* part_elem); - - /** Close and finalize new partitions. */ - void - close_new_partitions(); - - /** write row to new partition. - @param[in] new_part New partition to write to. - @return 0 for success else error code. */ - int - write_row_in_new_part( - uint new_part); - - /** Write a row in specific partition. - Stores a row in an InnoDB database, to the table specified in this - handle. - @param[in] part_id Partition to write to. - @param[in] row A row in MySQL format. - @return error code. */ - int - write_row_in_part( - uint part_id, - uchar* row); - - /** Update a row in partition. - Updates a row given as a parameter to a new value. - @param[in] part_id Partition to update row in. - @param[in] old_row Old row in MySQL format. - @param[in] new_row New row in MySQL format. - @return error number or 0. */ - int - update_row_in_part( - uint part_id, - const uchar* old_row, - uchar* new_row); - - /** Deletes a row in partition. - @param[in] part_id Partition to delete from. - @param[in] row Row to delete in MySQL format. - @return error number or 0. */ - int - delete_row_in_part( - uint part_id, - const uchar* row); - - /** Return first record in index from a partition. - @param[in] part Partition to read from. - @param[out] record First record in index in the partition. - @return error number or 0. */ - int - index_first_in_part( - uint part, - uchar* record); - - /** Return last record in index from a partition. - @param[in] part Partition to read from. - @param[out] record Last record in index in the partition. - @return error number or 0. */ - int - index_last_in_part( - uint part, - uchar* record); - - /** Return previous record in index from a partition. - @param[in] part Partition to read from. - @param[out] record Last record in index in the partition. - @return error number or 0. */ - int - index_prev_in_part( - uint part, - uchar* record); - - /** Return next record in index from a partition. - @param[in] part Partition to read from. - @param[out] record Last record in index in the partition. - @return error number or 0. */ - int - index_next_in_part( - uint part, - uchar* record); - - /** Return next same record in index from a partition. - This routine is used to read the next record, but only if the key is - the same as supplied in the call. - @param[in] part Partition to read from. - @param[out] record Last record in index in the partition. - @param[in] key Key to match. - @param[in] length Length of key. - @return error number or 0. */ - int - index_next_same_in_part( - uint part, - uchar* record, - const uchar* key, - uint length); - - /** Start index scan and return first record from a partition. - This routine starts an index scan using a start key. The calling - function will check the end key on its own. - @param[in] part Partition to read from. - @param[out] record First matching record in index in the partition. - @param[in] key Key to match. - @param[in] keypart_map Which part of the key to use. - @param[in] find_flag Key condition/direction to use. - @return error number or 0. */ - int - index_read_map_in_part( - uint part, - uchar* record, - const uchar* key, - key_part_map keypart_map, - enum ha_rkey_function find_flag); - - /** Return last matching record in index from a partition. - @param[in] part Partition to read from. - @param[out] record Last matching record in index in the partition. - @param[in] key Key to match. - @param[in] keypart_map Which part of the key to use. - @return error number or 0. */ - int - index_read_last_map_in_part( - uint part, - uchar* record, - const uchar* key, - key_part_map keypart_map); - - /** Start index scan and return first record from a partition. - This routine starts an index scan using a start and end key. - @param[in] part Partition to read from. - @param[out] record First matching record in index in the partition. - if NULL use table->record[0] as return buffer. - @param[in] start_key Start key to match. - @param[in] end_key End key to match. - @param[in] eq_range Is equal range, start_key == end_key. - @param[in] sorted Return rows in sorted order. - @return error number or 0. */ - int - read_range_first_in_part( - uint part, - uchar* record, - const key_range* start_key, - const key_range* end_key, - bool eq_range, - bool sorted); - - /** Return next record in index range scan from a partition. - @param[in] part Partition to read from. - @param[out] record First matching record in index in the partition. - if NULL use table->record[0] as return buffer. - @return error number or 0. */ - int - read_range_next_in_part( - uint part, - uchar* record); - - /** Start index scan and return first record from a partition. - This routine starts an index scan using a start key. The calling - function will check the end key on its own. - @param[in] part Partition to read from. - @param[out] record First matching record in index in the partition. - @param[in] index Index to read from. - @param[in] key Key to match. - @param[in] keypart_map Which part of the key to use. - @param[in] find_flag Key condition/direction to use. - @return error number or 0. */ - int - index_read_idx_map_in_part( - uint part, - uchar* record, - uint index, - const uchar* key, - key_part_map keypart_map, - enum ha_rkey_function find_flag); - - /** Initialize random read/scan of a specific partition. - @param[in] part_id Partition to initialize. - @param[in] table_scan True for scan else random access. - @return error number or 0. */ - int - rnd_init_in_part( - uint part_id, - bool table_scan); - - /** Get next row during scan of a specific partition. - @param[in] part_id Partition to read from. - @param[out] record Next row. - @return error number or 0. */ - int - rnd_next_in_part( - uint part_id, - uchar* record); - - /** End random read/scan of a specific partition. - @param[in] part_id Partition to end random read/scan. - @param[in] table_scan True for scan else random access. - @return error number or 0. */ - int - rnd_end_in_part( - uint part_id, - bool table_scan); - - /** Get a reference to the current cursor position in the last used - partition. - @param[out] ref Reference (PK if exists else row_id). - @param[in] record Record to position. */ - void - position_in_last_part( - uchar* ref, - const uchar* record); - - /** Read record by given record (by its PK) from the last used partition. - see handler::rnd_pos_by_record(). - @param[in,out] record Record to position. - @return 0 or error number. */ - int - rnd_pos_by_record_in_last_part( - uchar* record) - { - /* Not much overhead to use default function. - This avoids out-of-sync code. */ - return(handler::rnd_pos_by_record(record)); - } - - /** Copy a cached MySQL record. - @param[out] to_record Where to copy the MySQL record. - @param[in] from_record Which record to copy. */ - void - copy_cached_row( - uchar* to_record, - const uchar* from_record); - /** @} */ - - /* Private handler:: functions specific for native InnoDB partitioning. - @see handler.h @{ */ - - int - open( - const char* name, - int mode, - uint test_if_locked); - - int - close(); - - double - scan_time(); - - /** Was the last returned row semi consistent read. - In an UPDATE or DELETE, if the row under the cursor was locked by - another transaction, and the engine used an optimistic read of the last - committed row value under the cursor, then the engine returns 1 from - this function. MySQL must NOT try to update this optimistic value. If - the optimistic value does not match the WHERE condition, MySQL can - decide to skip over this row. This can be used to avoid unnecessary - lock waits. - - If this method returns true, it will also signal the storage - engine that the next read will be a locking re-read of the row. - @see handler.h and row0mysql.h - @return true if last read was semi consistent else false. */ - bool was_semi_consistent_read(); - - /** Try semi consistent read. - Tell the engine whether it should avoid unnecessary lock waits. - If yes, in an UPDATE or DELETE, if the row under the cursor was locked - by another transaction, the engine may try an optimistic read of - the last committed row value under the cursor. - @see handler.h and row0mysql.h - @param[in] yes Should semi-consistent read be used. */ - void try_semi_consistent_read( - bool yes); - - /** Removes a lock on a row. - Removes a new lock set on a row, if it was not read optimistically. - This can be called after a row has been read in the processing of - an UPDATE or a DELETE query. @see ha_innobase::unlock_row(). */ - void unlock_row(); - - int - index_init( - uint index, - bool sorted); - - int - index_end(); - - int - rnd_init( - bool scan) - { - return(Partition_helper::ph_rnd_init(scan)); - } - - int - rnd_end() - { - return(Partition_helper::ph_rnd_end()); - } - - int - external_lock( - THD* thd, - int lock_type); - - THR_LOCK_DATA** - store_lock( - THD* thd, - THR_LOCK_DATA** to, - thr_lock_type lock_type); - - int - write_row( - uchar* record) - { - return(Partition_helper::ph_write_row(record)); - } - - int - update_row( - const uchar* old_record, - uchar* new_record) - { - return(Partition_helper::ph_update_row(old_record, new_record)); - } - - int - delete_row( - const uchar* record) - { - return(Partition_helper::ph_delete_row(record)); - } - /** @} */ - - /** Truncate partition. - Called from Partition_handler::trunctate_partition(). */ - int - truncate_partition_low(); - - /** Change partitions according to ALTER TABLE ... PARTITION ... - Called from Partition_handler::change_partitions(). - @param[in] create_info Table create info. - @param[in] path Path including db/table_name. - @param[out] copied Number of copied rows. - @param[out] deleted Number of deleted rows. - @return 0 for success or error code. */ - int - change_partitions_low( - HA_CREATE_INFO* create_info, - const char* path, - ulonglong* const copied, - ulonglong* const deleted) - { - return(Partition_helper::change_partitions( - create_info, - path, - copied, - deleted)); - } - - /** Access methods to protected areas in handler to avoid adding - friend class Partition_helper in class handler. - @see partition_handler.h @{ */ - - THD* - get_thd() const - { - return ha_thd(); - } - - TABLE* - get_table() const - { - return table; - } - - bool - get_eq_range() const - { - return eq_range; - } - - void - set_eq_range(bool eq_range_arg) - { - eq_range= eq_range_arg; - } - - void - set_range_key_part(KEY_PART_INFO *key_part) - { - range_key_part= key_part; - } - /** @} */ - - /** Fill in data_dir_path and tablespace name from internal data - dictionary. - @param part_elem Partition element to fill. - @param ib_table InnoDB table to copy from. */ - void - update_part_elem( - partition_element* part_elem, - dict_table_t* ib_table); -protected: - /* Protected handler:: functions specific for native InnoDB partitioning. - @see handler.h @{ */ - - int - rnd_next( - uchar* record) - { - return(Partition_helper::ph_rnd_next(record)); - } - - int - rnd_pos( - uchar* record, - uchar* pos); - - int - index_next( - uchar* record) - { - return(Partition_helper::ph_index_next(record)); - } - - int - index_next_same( - uchar* record, - const uchar* key, - uint keylen) - { - return(Partition_helper::ph_index_next_same(record, key, keylen)); - } - - int - index_prev( - uchar* record) - { - return(Partition_helper::ph_index_prev(record)); - } - - int - index_first( - uchar* record) - { - return(Partition_helper::ph_index_first(record)); - } - - int - index_last( - uchar* record) - { - return(Partition_helper::ph_index_last(record)); - } - - int - index_read_last_map( - uchar* record, - const uchar* key, - key_part_map keypart_map) - { - return(Partition_helper::ph_index_read_last_map( - record, - key, - keypart_map)); - } - - int - index_read_map( - uchar* buf, - const uchar* key, - key_part_map keypart_map, - enum ha_rkey_function find_flag) - { - return(Partition_helper::ph_index_read_map( - buf, - key, - keypart_map, - find_flag)); - } - - int - index_read_idx_map( - uchar* buf, - uint index, - const uchar* key, - key_part_map keypart_map, - enum ha_rkey_function find_flag) - { - return(Partition_helper::ph_index_read_idx_map( - buf, - index, - key, - keypart_map, - find_flag)); - } - /** @} */ - - /** Updates and return statistics. - Returns statistics information of the table to the MySQL interpreter, - in various fields of the handle object. - @param[in] flag Flags for what to update and return. - @param[in] is_analyze True if called from ::analyze(). - @return HA_ERR_* error code or 0. */ - int - info_low( - uint flag, - bool is_analyze); -}; -#endif /* ha_innopart_h */ diff --git a/storage/innobase/handler/handler0alter.cc b/storage/innobase/handler/handler0alter.cc index ceaa07bbd40..e3bc5fd5799 100644 --- a/storage/innobase/handler/handler0alter.cc +++ b/storage/innobase/handler/handler0alter.cc @@ -320,14 +320,22 @@ my_error_innodb( case DB_CORRUPTION: my_error(ER_NOT_KEYFILE, MYF(0), table); break; - case DB_TOO_BIG_RECORD: - /* We limit max record size to 16k for 64k page size. */ - my_error(ER_TOO_BIG_ROWSIZE, MYF(0), - srv_page_size == UNIV_PAGE_SIZE_MAX - ? REC_MAX_DATA_SIZE - 1 - : page_get_free_space_of_empty( - flags & DICT_TF_COMPACT) / 2); + case DB_TOO_BIG_RECORD: { + /* Note that in page0zip.ic page_zip_rec_needs_ext() rec_size + is limited to COMPRESSED_REC_MAX_DATA_SIZE (16K) or + REDUNDANT_REC_MAX_DATA_SIZE (16K-1). */ + bool comp = !!(flags & DICT_TF_COMPACT); + ulint free_space = page_get_free_space_of_empty(comp) / 2; + + if (free_space >= (comp ? COMPRESSED_REC_MAX_DATA_SIZE : + REDUNDANT_REC_MAX_DATA_SIZE)) { + free_space = (comp ? COMPRESSED_REC_MAX_DATA_SIZE : + REDUNDANT_REC_MAX_DATA_SIZE) - 1; + } + + my_error(ER_TOO_BIG_ROWSIZE, MYF(0), free_space); break; + } case DB_INVALID_NULL: /* TODO: report the row, as we do for DB_DUPLICATE_KEY */ my_error(ER_INVALID_USE_OF_NULL, MYF(0)); @@ -3293,8 +3301,8 @@ innobase_pk_col_prefix_compare( ulint new_prefix_len, ulint old_prefix_len) { - ut_ad(new_prefix_len < REC_MAX_DATA_SIZE); - ut_ad(old_prefix_len < REC_MAX_DATA_SIZE); + ut_ad(new_prefix_len < COMPRESSED_REC_MAX_DATA_SIZE); + ut_ad(old_prefix_len < COMPRESSED_REC_MAX_DATA_SIZE); if (new_prefix_len == old_prefix_len) { return(0); @@ -6372,6 +6380,7 @@ ha_innobase::inplace_alter_table( DBUG_ENTER("inplace_alter_table"); DBUG_ASSERT(!srv_read_only_mode); + ut_ad(!sync_check_iterate(sync_check())); ut_ad(!rw_lock_own(dict_operation_lock, RW_LOCK_X)); ut_ad(!rw_lock_own(dict_operation_lock, RW_LOCK_S)); diff --git a/storage/innobase/handler/handler0alter_innopart.cc b/storage/innobase/handler/handler0alter_innopart.cc deleted file mode 100644 index 0f2d5c7e576..00000000000 --- a/storage/innobase/handler/handler0alter_innopart.cc +++ /dev/null @@ -1,307 +0,0 @@ -/* JAN: TODO: MySQL 5.7 InnoDB partitioning. */ - -/** Prepare inplace alter table. -Allows InnoDB to update internal structures with concurrent -writes blocked (provided that check_if_supported_inplace_alter() -did not return HA_ALTER_INPLACE_NO_LOCK). -This will be invoked before inplace_alter_table(). -@param[in] altered_table TABLE object for new version of table. -@param[in] ha_alter_info Structure describing changes to be done -by ALTER TABLE and holding data used during in-place alter. -@retval true Failure. -@retval false Success. */ -bool -ha_innopart::prepare_inplace_alter_table( - TABLE* altered_table, - Alter_inplace_info* ha_alter_info) -{ - THD* thd; - ha_innopart_inplace_ctx* ctx_parts; - bool res = true; - DBUG_ENTER("ha_innopart::prepare_inplace_alter_table"); - DBUG_ASSERT(ha_alter_info->handler_ctx == NULL); - - thd = ha_thd(); - - /* Clean up all ins/upd nodes. */ - clear_ins_upd_nodes(); - /* Based on Sql_alloc class, return NULL for new on failure. */ - ctx_parts = new ha_innopart_inplace_ctx(thd, m_tot_parts); - if (!ctx_parts) { - DBUG_RETURN(HA_ALTER_ERROR); - } - - uint ctx_array_size = sizeof(inplace_alter_handler_ctx*) - * (m_tot_parts + 1); - ctx_parts->ctx_array = - static_cast<inplace_alter_handler_ctx**>( - ut_malloc(ctx_array_size, - mem_key_partitioning)); - if (!ctx_parts->ctx_array) { - DBUG_RETURN(HA_ALTER_ERROR); - } - - /* Set all to NULL, including the terminating one. */ - memset(ctx_parts->ctx_array, 0, ctx_array_size); - - ctx_parts->prebuilt_array = static_cast<row_prebuilt_t**>( - ut_malloc(sizeof(row_prebuilt_t*) - * m_tot_parts, - mem_key_partitioning)); - if (!ctx_parts->prebuilt_array) { - DBUG_RETURN(HA_ALTER_ERROR); - } - /* For the first partition use the current prebuilt. */ - ctx_parts->prebuilt_array[0] = m_prebuilt; - /* Create new prebuilt for the rest of the partitions. - It is needed for the current implementation of - ha_innobase::commit_inplace_alter_table(). */ - for (uint i = 1; i < m_tot_parts; i++) { - row_prebuilt_t* tmp_prebuilt; - tmp_prebuilt = row_create_prebuilt( - m_part_share->get_table_part(i), - table_share->reclength); - /* Use same trx as original prebuilt. */ - tmp_prebuilt->trx = m_prebuilt->trx; - ctx_parts->prebuilt_array[i] = tmp_prebuilt; - } - - for (uint i = 0; i < m_tot_parts; i++) { - m_prebuilt = ctx_parts->prebuilt_array[i]; - m_prebuilt_ptr = ctx_parts->prebuilt_array + i; - ha_alter_info->handler_ctx = ctx_parts->ctx_array[i]; - set_partition(i); - res = ha_innobase::prepare_inplace_alter_table(altered_table, - ha_alter_info); - update_partition(i); - ctx_parts->ctx_array[i] = ha_alter_info->handler_ctx; - if (res) { - break; - } - } - m_prebuilt = ctx_parts->prebuilt_array[0]; - m_prebuilt_ptr = &m_prebuilt; - ha_alter_info->handler_ctx = ctx_parts; - ha_alter_info->group_commit_ctx = ctx_parts->ctx_array; - DBUG_RETURN(res); -} - -/** Inplace alter table. -Alter the table structure in-place with operations -specified using Alter_inplace_info. -The level of concurrency allowed during this operation depends -on the return value from check_if_supported_inplace_alter(). -@param[in] altered_table TABLE object for new version of table. -@param[in] ha_alter_info Structure describing changes to be done -by ALTER TABLE and holding data used during in-place alter. -@retval true Failure. -@retval false Success. */ -bool -ha_innopart::inplace_alter_table( - TABLE* altered_table, - Alter_inplace_info* ha_alter_info) -{ - bool res = true; - ha_innopart_inplace_ctx* ctx_parts; - - ctx_parts = static_cast<ha_innopart_inplace_ctx*>( - ha_alter_info->handler_ctx); - for (uint i = 0; i < m_tot_parts; i++) { - m_prebuilt = ctx_parts->prebuilt_array[i]; - ha_alter_info->handler_ctx = ctx_parts->ctx_array[i]; - set_partition(i); - res = ha_innobase::inplace_alter_table(altered_table, - ha_alter_info); - ut_ad(ctx_parts->ctx_array[i] == ha_alter_info->handler_ctx); - ctx_parts->ctx_array[i] = ha_alter_info->handler_ctx; - if (res) { - break; - } - } - m_prebuilt = ctx_parts->prebuilt_array[0]; - ha_alter_info->handler_ctx = ctx_parts; - return(res); -} - -/** Commit or rollback inplace alter table. -Commit or rollback the changes made during -prepare_inplace_alter_table() and inplace_alter_table() inside -the storage engine. Note that the allowed level of concurrency -during this operation will be the same as for -inplace_alter_table() and thus might be higher than during -prepare_inplace_alter_table(). (E.g concurrent writes were -blocked during prepare, but might not be during commit). -@param[in] altered_table TABLE object for new version of table. -@param[in] ha_alter_info Structure describing changes to be done -by ALTER TABLE and holding data used during in-place alter. -@param[in] commit true => Commit, false => Rollback. -@retval true Failure. -@retval false Success. */ -bool -ha_innopart::commit_inplace_alter_table( - TABLE* altered_table, - Alter_inplace_info* ha_alter_info, - bool commit) -{ - bool res = false; - ha_innopart_inplace_ctx* ctx_parts; - - ctx_parts = static_cast<ha_innopart_inplace_ctx*>( - ha_alter_info->handler_ctx); - ut_ad(ctx_parts); - ut_ad(ctx_parts->prebuilt_array); - ut_ad(ctx_parts->prebuilt_array[0] == m_prebuilt); - if (commit) { - /* Commit is done through first partition (group commit). */ - ut_ad(ha_alter_info->group_commit_ctx == ctx_parts->ctx_array); - ha_alter_info->handler_ctx = ctx_parts->ctx_array[0]; - set_partition(0); - res = ha_innobase::commit_inplace_alter_table(altered_table, - ha_alter_info, - commit); - ut_ad(res || !ha_alter_info->group_commit_ctx); - goto end; - } - /* Rollback is done for each partition. */ - for (uint i = 0; i < m_tot_parts; i++) { - m_prebuilt = ctx_parts->prebuilt_array[i]; - ha_alter_info->handler_ctx = ctx_parts->ctx_array[i]; - set_partition(i); - if (ha_innobase::commit_inplace_alter_table(altered_table, - ha_alter_info, commit)) { - res = true; - } - ut_ad(ctx_parts->ctx_array[i] == ha_alter_info->handler_ctx); - ctx_parts->ctx_array[i] = ha_alter_info->handler_ctx; - } -end: - /* Move the ownership of the new tables back to - the m_part_share. */ - ha_innobase_inplace_ctx* ctx; - for (uint i = 0; i < m_tot_parts; i++) { - /* TODO: Fix to only use one prebuilt (i.e. make inplace - alter partition aware instead of using multiple prebuilt - copies... */ - ctx = static_cast<ha_innobase_inplace_ctx*>( - ctx_parts->ctx_array[i]); - if (ctx) { - m_part_share->set_table_part(i, ctx->prebuilt->table); - ctx->prebuilt->table = NULL; - ctx_parts->prebuilt_array[i] = ctx->prebuilt; - } - } - /* The above juggling of prebuilt must be reset here. */ - m_prebuilt = ctx_parts->prebuilt_array[0]; - m_prebuilt->table = m_part_share->get_table_part(0); - ha_alter_info->handler_ctx = ctx_parts; - return(res); -} - -/** Notify the storage engine that the table structure (.frm) has -been updated. - -ha_partition allows inplace operations that also upgrades the engine -if it supports partitioning natively. So if this is the case then -we will remove the .par file since it is not used with ha_innopart -(we use the internal data dictionary instead). */ -void -ha_innopart::notify_table_changed() -{ - char tmp_par_path[FN_REFLEN + 1]; - strxnmov(tmp_par_path, FN_REFLEN, table->s->normalized_path.str, - ".par", NullS); - - if (my_access(tmp_par_path, W_OK) == 0) - { - my_delete(tmp_par_path, MYF(0)); - } -} - -/** Check if supported inplace alter table. -@param[in] altered_table Altered MySQL table. -@param[in] ha_alter_info Information about inplace operations to do. -@return Lock level, not supported or error */ -enum_alter_inplace_result -ha_innopart::check_if_supported_inplace_alter( - TABLE* altered_table, - Alter_inplace_info* ha_alter_info) -{ - DBUG_ENTER("ha_innopart::check_if_supported_inplace_alter"); - DBUG_ASSERT(ha_alter_info->handler_ctx == NULL); - - /* Not supporting these for partitioned tables yet! */ - - /* FK not yet supported. */ - if (ha_alter_info->handler_flags - & (Alter_inplace_info::ADD_FOREIGN_KEY - | Alter_inplace_info::DROP_FOREIGN_KEY)) { - - ha_alter_info->unsupported_reason = innobase_get_err_msg( - ER_FOREIGN_KEY_ON_PARTITIONED); - DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED); - } - /* FTS not yet supported either. */ - if ((ha_alter_info->handler_flags - & Alter_inplace_info::ADD_INDEX)) { - - for (uint i = 0; i < ha_alter_info->index_add_count; i++) { - const KEY* key = - &ha_alter_info->key_info_buffer[ - ha_alter_info->index_add_buffer[i]]; - if (key->flags & HA_FULLTEXT) { - DBUG_ASSERT(!(key->flags & HA_KEYFLAG_MASK - & ~(HA_FULLTEXT - | HA_PACK_KEY - | HA_GENERATED_KEY - | HA_BINARY_PACK_KEY))); - ha_alter_info->unsupported_reason = - innobase_get_err_msg( - ER_FULLTEXT_NOT_SUPPORTED_WITH_PARTITIONING); - DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED); - } - } - } - /* We cannot allow INPLACE to change order of KEY partitioning fields! */ - if ((ha_alter_info->handler_flags - & Alter_inplace_info::ALTER_STORED_COLUMN_ORDER) - && !m_part_info->same_key_column_order( - &ha_alter_info->alter_info->create_list)) { - - DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED); - } - - /* Cannot allow INPLACE for drop and create PRIMARY KEY if partition is - on Primary Key - PARTITION BY KEY() */ - if ((ha_alter_info->handler_flags - & (Alter_inplace_info::ADD_PK_INDEX - | Alter_inplace_info::DROP_PK_INDEX))) { - - /* Check partition by key(). */ - if ((m_part_info->part_type == HASH_PARTITION) - && m_part_info->list_of_part_fields - && m_part_info->part_field_list.is_empty()) { - - DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED); - } - - /* Check sub-partition by key(). */ - if ((m_part_info->subpart_type == HASH_PARTITION) - && m_part_info->list_of_subpart_fields - && m_part_info->subpart_field_list.is_empty()) { - - DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED); - } - } - - /* Check for PK and UNIQUE should already be done when creating the - new table metadata. - (fix_partition_info/check_primary_key+check_unique_key) */ - - set_partition(0); - enum_alter_inplace_result res = - ha_innobase::check_if_supported_inplace_alter(altered_table, - ha_alter_info); - - DBEUG_RETURN(res); -} - diff --git a/storage/innobase/include/dict0dict.h b/storage/innobase/include/dict0dict.h index f80047f29a9..8836e858018 100644 --- a/storage/innobase/include/dict0dict.h +++ b/storage/innobase/include/dict0dict.h @@ -1730,9 +1730,6 @@ struct dict_sys_t{ on name */ hash_table_t* table_id_hash; /*!< hash table of the tables, based on id */ - lint size; /*!< varying space in bytes occupied - by the data dictionary table and - index objects */ dict_table_t* sys_tables; /*!< SYS_TABLES table */ dict_table_t* sys_columns; /*!< SYS_COLUMNS table */ dict_table_t* sys_indexes; /*!< SYS_INDEXES table */ @@ -2032,6 +2029,13 @@ dict_table_decode_n_col( ulint* n_col, ulint* n_v_col); +/** Calculate the used memory occupied by the data dictionary +table and index objects. +@return number of bytes occupied. */ +UNIV_INTERN +ulint +dict_sys_get_size(); + /** Look for any dictionary objects that are found in the given tablespace. @param[in] space_id Tablespace ID to search for. @return true if tablespace is empty. */ diff --git a/storage/innobase/include/fsp0fsp.h b/storage/innobase/include/fsp0fsp.h index e5057b30501..6697c1f37ed 100644 --- a/storage/innobase/include/fsp0fsp.h +++ b/storage/innobase/include/fsp0fsp.h @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2013, 2017, MariaDB Corporation. All Rights Reserved. +Copyright (c) 2013, 2017, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -588,15 +588,12 @@ fseg_free_page_func( # define fseg_free_page(header, space_id, page, ahi, mtr) \ fseg_free_page_func(header, space_id, page, mtr) #endif /* BTR_CUR_HASH_ADAPT */ -/**********************************************************************//** -Checks if a single page of a segment is free. -@return true if free */ +/** Determine whether a page is free. +@param[in,out] space tablespace +@param[in] page page number +@return whether the page is marked as free */ bool -fseg_page_is_free( -/*==============*/ - fseg_header_t* seg_header, /*!< in: segment header */ - ulint space_id, /*!< in: space id */ - ulint page) /*!< in: page offset */ +fseg_page_is_free(fil_space_t* space, unsigned page) MY_ATTRIBUTE((nonnull, warn_unused_result)); /**********************************************************************//** Frees part of a segment. This function can be used to free a segment @@ -834,22 +831,6 @@ xdes_calc_descriptor_page( const page_size_t& page_size, ulint offset); -/**********************************************************************//** -Checks if a single page is free. -@return true if free */ -UNIV_INTERN -bool -fsp_page_is_free_func( -/*==============*/ - ulint space, /*!< in: space id */ - ulint page, /*!< in: page offset */ - mtr_t* mtr, /*!< in/out: mini-transaction */ - const char *file, - unsigned line); - -#define fsp_page_is_free(space,page,mtr) \ - fsp_page_is_free_func(space,page,mtr, __FILE__, __LINE__) - #endif /* UNIV_INNOCHECKSUM */ #include "fsp0fsp.ic" diff --git a/storage/innobase/include/log0log.ic b/storage/innobase/include/log0log.ic index f743985147c..58da7bacc6f 100644 --- a/storage/innobase/include/log0log.ic +++ b/storage/innobase/include/log0log.ic @@ -489,6 +489,7 @@ log_free_check(void) commit_try_rebuild() */ SYNC_DICT_OPERATION, /* dict_operation_lock X-latch during commit_try_rebuild() */ + SYNC_FTS_CACHE, /* fts_cache_t::lock */ SYNC_INDEX_TREE /* index->lock */ }; #endif /* UNIV_DEBUG */ diff --git a/storage/innobase/include/mtr0types.h b/storage/innobase/include/mtr0types.h index 0725a5405a4..e47e89ae4ba 100644 --- a/storage/innobase/include/mtr0types.h +++ b/storage/innobase/include/mtr0types.h @@ -129,14 +129,6 @@ enum mlog_id_t { MLOG_LSN = 28, #endif /* UNIV_LOG_LSN_DEBUG */ - /** this means that a file page is taken into use and the prior - contents of the page should be ignored: in recovery we must not - trust the lsn values stored to the file page. - Note: it's deprecated because it causes crash recovery problem - in bulk create index, and actually we don't need to reset page - lsn in recv_recover_page_func() now. */ - MLOG_INIT_FILE_PAGE = 29, - /** write a string to a page */ MLOG_WRITE_STRING = 30, @@ -224,8 +216,7 @@ enum mlog_id_t { /** create a R-tree compact page */ MLOG_COMP_PAGE_CREATE_RTREE = 58, - /** this means that a file page is taken into use. - We use it to replace MLOG_INIT_FILE_PAGE. */ + /** initialize a file page */ MLOG_INIT_FILE_PAGE2 = 59, /** Table is being truncated. (Marked only for file-per-table) */ diff --git a/storage/innobase/include/page0zip.ic b/storage/innobase/include/page0zip.ic index fa03279f9bc..b471e2cf64e 100644 --- a/storage/innobase/include/page0zip.ic +++ b/storage/innobase/include/page0zip.ic @@ -168,8 +168,9 @@ page_zip_rec_needs_ext( > ulint(comp ? REC_N_NEW_EXTRA_BYTES : REC_N_OLD_EXTRA_BYTES)); ut_ad(comp || !page_size.is_compressed()); -#if UNIV_PAGE_SIZE_MAX > REC_MAX_DATA_SIZE - if (rec_size >= REC_MAX_DATA_SIZE) { +#if UNIV_PAGE_SIZE_MAX > COMPRESSED_REC_MAX_DATA_SIZE + if (comp ? rec_size >= COMPRESSED_REC_MAX_DATA_SIZE : + rec_size >= REDUNDANT_REC_MAX_DATA_SIZE) { return(TRUE); } #endif diff --git a/storage/innobase/include/rem0rec.h b/storage/innobase/include/rem0rec.h index 8d3f87450f8..a5e3268b7d7 100644 --- a/storage/innobase/include/rem0rec.h +++ b/storage/innobase/include/rem0rec.h @@ -1099,9 +1099,15 @@ are given in one byte (resp. two byte) format. */ #define REC_1BYTE_OFFS_LIMIT 0x7FUL #define REC_2BYTE_OFFS_LIMIT 0x7FFFUL -/* The data size of record must be smaller than this because we reserve -two upmost bits in a two byte offset for special purposes */ -#define REC_MAX_DATA_SIZE 16384 +/* The data size of record must not be larger than this on +REDUNDANT row format because we reserve two upmost bits in a +two byte offset for special purposes */ +#define REDUNDANT_REC_MAX_DATA_SIZE (16383) + +/* The data size of record must be smaller than this on +COMPRESSED row format because we reserve two upmost bits in a +two byte offset for special purposes */ +#define COMPRESSED_REC_MAX_DATA_SIZE (16384) #ifdef WITH_WSREP int wsrep_rec_get_foreign_key( diff --git a/storage/innobase/include/row0sel.h b/storage/innobase/include/row0sel.h index d4d55601bc2..d73c186b12e 100644 --- a/storage/innobase/include/row0sel.h +++ b/storage/innobase/include/row0sel.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1997, 2017, Oracle and/or its affiliates. Copyright (c) 2017, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under @@ -244,6 +244,18 @@ struct sel_buf_t{ when data != NULL */ }; +/** Copy used fields from cached row. +Copy cache record field by field, don't touch fields that +are not covered by current key. +@param[out] buf Where to copy the MySQL row. +@param[in] cached_rec What to copy (in MySQL row format). +@param[in] prebuilt prebuilt struct. */ +void +row_sel_copy_cached_fields_for_mysql( + byte* buf, + const byte* cached_rec, + row_prebuilt_t* prebuilt); + /** Query plan */ struct plan_t{ dict_table_t* table; /*!< table struct in the dictionary diff --git a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h index 891f25f68f1..3eddd300acc 100644 --- a/storage/innobase/include/srv0srv.h +++ b/storage/innobase/include/srv0srv.h @@ -227,12 +227,6 @@ extern ib_mutex_t page_zip_stat_per_index_mutex; extern ib_mutex_t srv_monitor_file_mutex; /* Temporary file for innodb monitor output */ extern FILE* srv_monitor_file; -/* Mutex for locking srv_dict_tmpfile. Only created if !srv_read_only_mode. -This mutex has a very high rank; threads reserving it should not -be holding any InnoDB latches. */ -extern ib_mutex_t srv_dict_tmpfile_mutex; -/* Temporary file for output from the data dictionary */ -extern FILE* srv_dict_tmpfile; /* Mutex for locking srv_misc_tmpfile. Only created if !srv_read_only_mode. This mutex has a very low rank; threads reserving it should not acquire any further latches or sleep before releasing this one. */ @@ -507,7 +501,9 @@ enum srv_operation_mode { /** Mariabackup taking a backup */ SRV_OPERATION_BACKUP, /** Mariabackup restoring a backup */ - SRV_OPERATION_RESTORE + SRV_OPERATION_RESTORE, + /** Mariabackup restoring the incremental part of a backup */ + SRV_OPERATION_RESTORE_DELTA }; /** Current mode of operation */ diff --git a/storage/innobase/include/sync0policy.h b/storage/innobase/include/sync0policy.h index 410e46f9c68..1b86d2633bf 100644 --- a/storage/innobase/include/sync0policy.h +++ b/storage/innobase/include/sync0policy.h @@ -61,7 +61,7 @@ public: : latch_t(id) { - /* No op */ + ut_ad(id != LATCH_ID_NONE); } /** Set to locked state diff --git a/storage/innobase/include/sync0sync.h b/storage/innobase/include/sync0sync.h index 7157b07e9d0..55aaf5032e8 100644 --- a/storage/innobase/include/sync0sync.h +++ b/storage/innobase/include/sync0sync.h @@ -91,7 +91,6 @@ extern mysql_pfs_key_t rw_lock_debug_mutex_key; # endif /* UNIV_DEBUG */ extern mysql_pfs_key_t rw_lock_list_mutex_key; extern mysql_pfs_key_t rw_lock_mutex_key; -extern mysql_pfs_key_t srv_dict_tmpfile_mutex_key; extern mysql_pfs_key_t srv_innodb_monitor_mutex_key; extern mysql_pfs_key_t srv_misc_tmpfile_mutex_key; extern mysql_pfs_key_t srv_monitor_file_mutex_key; diff --git a/storage/innobase/include/sync0types.h b/storage/innobase/include/sync0types.h index bcbcf70bfc7..8d08416cccd 100644 --- a/storage/innobase/include/sync0types.h +++ b/storage/innobase/include/sync0types.h @@ -260,9 +260,9 @@ enum latch_level_t { SYNC_TREE_NODE, SYNC_TREE_NODE_FROM_HASH, SYNC_TREE_NODE_NEW, + SYNC_IBUF_PESS_INSERT_MUTEX, SYNC_INDEX_TREE, - SYNC_IBUF_PESS_INSERT_MUTEX, SYNC_IBUF_HEADER, SYNC_DICT_HEADER, SYNC_STATS_AUTO_RECALC, @@ -270,10 +270,10 @@ enum latch_level_t { SYNC_DICT, SYNC_FTS_CACHE, - SYNC_DICT_OPERATION, - SYNC_FILE_FORMAT_TAG, + SYNC_DICT_OPERATION, + SYNC_TRX_I_S_LAST_READ, SYNC_TRX_I_S_RWLOCK, @@ -335,7 +335,6 @@ enum latch_id_t { LATCH_ID_RTR_PATH_MUTEX, LATCH_ID_RW_LOCK_LIST, LATCH_ID_RW_LOCK_MUTEX, - LATCH_ID_SRV_DICT_TMPFILE, LATCH_ID_SRV_INNODB_MONITOR, LATCH_ID_SRV_MISC_TMPFILE, LATCH_ID_SRV_MONITOR_FILE, diff --git a/storage/innobase/include/trx0rseg.h b/storage/innobase/include/trx0rseg.h index 3078aa8faf1..48c5133644c 100644 --- a/storage/innobase/include/trx0rseg.h +++ b/storage/innobase/include/trx0rseg.h @@ -203,9 +203,17 @@ struct trx_rseg_t { bool is_persistent() const { ut_ad(space == SRV_TMP_SPACE_ID - || space <= TRX_SYS_MAX_UNDO_SPACES); + || space == TRX_SYS_SPACE + || (srv_undo_space_id_start > 0 + && space >= srv_undo_space_id_start + && space <= srv_undo_space_id_start + + TRX_SYS_MAX_UNDO_SPACES)); ut_ad(space == SRV_TMP_SPACE_ID - || space <= srv_undo_tablespaces_active + || space == TRX_SYS_SPACE + || (srv_undo_space_id_start > 0 + && space >= srv_undo_space_id_start + && space <= srv_undo_space_id_start + + srv_undo_tablespaces_active) || !srv_was_started); return(space != SRV_TMP_SPACE_ID); } diff --git a/storage/innobase/innodb.cmake b/storage/innobase/innodb.cmake index fe2d537c50e..d916d8b4160 100644 --- a/storage/innobase/innodb.cmake +++ b/storage/innobase/innodb.cmake @@ -156,9 +156,9 @@ IF(HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE) ENDIF() IF(NOT MSVC) - # workaround for gcc 4.1.2 RHEL5/x86, gcc atomic ops only work under -march=i686 + # workaround for old gcc on x86, gcc atomic ops only work under -march=i686 IF(CMAKE_SYSTEM_PROCESSOR STREQUAL "i686" AND CMAKE_COMPILER_IS_GNUCC AND - CMAKE_C_COMPILER_VERSION VERSION_LESS "4.1.3") + CMAKE_C_COMPILER_VERSION VERSION_LESS "4.4.0") SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=i686") SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=i686") ENDIF() diff --git a/storage/innobase/log/log0log.cc b/storage/innobase/log/log0log.cc index d892f22f967..0d0e84ab555 100644 --- a/storage/innobase/log/log0log.cc +++ b/storage/innobase/log/log0log.cc @@ -484,7 +484,6 @@ log_close(void) lsn_t checkpoint_age; ut_ad(log_mutex_own()); - ut_ad(!recv_no_log_write); lsn = log->lsn; @@ -1944,6 +1943,7 @@ loop: thread_name = "lock_wait_timeout_thread"; } else if (srv_buf_dump_thread_active) { thread_name = "buf_dump_thread"; + goto wait_suspend_loop; } else if (btr_defragment_thread_active) { thread_name = "btr_defragment_thread"; } else if (srv_fast_shutdown != 2 && trx_rollback_or_clean_is_active) { diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc index 0e0e0aeb357..6b3ef28a788 100644 --- a/storage/innobase/log/log0recv.cc +++ b/storage/innobase/log/log0recv.cc @@ -1400,7 +1400,6 @@ parse_log: /* Allow anything in page_type when creating a page. */ ptr = ibuf_parse_bitmap_init(ptr, end_ptr, block, mtr); break; - case MLOG_INIT_FILE_PAGE: case MLOG_INIT_FILE_PAGE2: /* Allow anything in page_type when creating a page. */ ptr = fsp_parse_init_file_page(ptr, end_ptr, block); @@ -1753,18 +1752,6 @@ recv_recover_page(bool just_read_in, buf_block_t* block) buf = ((byte*)(recv->data)) + sizeof(recv_data_t); } - if (recv->type == MLOG_INIT_FILE_PAGE) { - page_lsn = page_newest_lsn; - - memset(FIL_PAGE_LSN + page, 0, 8); - memset(UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM - + page, 0, 8); - - if (page_zip) { - memset(FIL_PAGE_LSN + page_zip->data, 0, 8); - } - } - /* If per-table tablespace was truncated and there exist REDO records before truncate that are to be applied as part of recovery (checkpoint didn't happen since truncate was done) @@ -3619,9 +3606,6 @@ get_mlog_string(mlog_id_t type) return("MLOG_LSN"); #endif /* UNIV_LOG_LSN_DEBUG */ - case MLOG_INIT_FILE_PAGE: - return("MLOG_INIT_FILE_PAGE"); - case MLOG_WRITE_STRING: return("MLOG_WRITE_STRING"); diff --git a/storage/innobase/os/os0file.cc b/storage/innobase/os/os0file.cc index 2acb190f7e4..837e60882e6 100644 --- a/storage/innobase/os/os0file.cc +++ b/storage/innobase/os/os0file.cc @@ -857,17 +857,29 @@ os_file_get_block_size( sizeof(disk_alignment), &tmp); - CloseHandle(volume_handle); - if (!result) { - os_file_handle_error_no_exit(volume, - "DeviceIoControl(IOCTL_STORAGE_QUERY_PROPERTY)", FALSE); + if (GetLastError() == ERROR_INVALID_FUNCTION) { + // Don't report error, it is driver's fault, not ours or users. + // We handle this with fallback. Report wit info message, just once. + static bool write_info = true; + if (write_info) { + ib::info() << "DeviceIoControl(IOCTL_STORAGE_QUERY_PROPERTY)" + << " unsupported on volume " << volume; + write_info = false; + } + } else { + os_file_handle_error_no_exit(volume, + "DeviceIoControl(IOCTL_STORAGE_QUERY_PROPERTY)", FALSE); + } goto end; } fblock_size = disk_alignment.BytesPerPhysicalSector; end: + if (volume_handle != INVALID_HANDLE_VALUE) { + CloseHandle(volume_handle); + } #endif /* _WIN32 */ /* Currently we support file block size up to 4Kb */ diff --git a/storage/innobase/row/row0ins.cc b/storage/innobase/row/row0ins.cc index df91ecc7a9b..654f3ba286c 100644 --- a/storage/innobase/row/row0ins.cc +++ b/storage/innobase/row/row0ins.cc @@ -770,9 +770,7 @@ row_ins_foreign_trx_print( ulint n_trx_locks; ulint heap_size; - if (srv_read_only_mode) { - return; - } + ut_ad(!srv_read_only_mode); lock_mutex_enter(); n_rec_locks = lock_number_of_rows_locked(&trx->lock); @@ -1759,13 +1757,6 @@ row_ins_check_foreign_constraint( cmp = cmp_dtuple_rec(entry, rec, offsets); if (cmp == 0) { - - ulint lock_type; - - lock_type = skip_gap_lock - ? LOCK_REC_NOT_GAP - : LOCK_ORDINARY; - if (rec_get_deleted_flag(rec, rec_offs_comp(offsets))) { /* In delete-marked records, DB_TRX_ID must @@ -1775,7 +1766,9 @@ row_ins_check_foreign_constraint( offsets)); err = row_ins_set_shared_rec_lock( - lock_type, block, + skip_gap_lock + ? LOCK_REC_NOT_GAP + : LOCK_ORDINARY, block, rec, check_index, offsets, thr); switch (err) { case DB_SUCCESS_LOCKED_REC: @@ -1857,23 +1850,21 @@ row_ins_check_foreign_constraint( } else { ut_a(cmp < 0); - err = DB_SUCCESS; - - if (!skip_gap_lock) { - err = row_ins_set_shared_rec_lock( + err = skip_gap_lock + ? DB_SUCCESS + : row_ins_set_shared_rec_lock( LOCK_GAP, block, rec, check_index, offsets, thr); - } switch (err) { case DB_SUCCESS_LOCKED_REC: + err = DB_SUCCESS; + /* fall through */ case DB_SUCCESS: if (check_ref) { err = DB_NO_REFERENCED_ROW; row_ins_foreign_report_add_err( trx, foreign, rec, entry); - } else { - err = DB_SUCCESS; } default: break; @@ -1921,19 +1912,11 @@ do_possible_lock_wait: thr->lock_state = QUE_THR_LOCK_NOLOCK; - DBUG_PRINT("to_be_dropped", - ("table: %s", check_table->name.m_name)); - if (check_table->to_be_dropped) { - /* The table is being dropped. We shall timeout - this operation */ - err = DB_LOCK_WAIT_TIMEOUT; - - goto exit_func; - } - + err = check_table->to_be_dropped + ? DB_LOCK_WAIT_TIMEOUT + : trx->error_state; } - exit_func: if (heap != NULL) { mem_heap_free(heap); diff --git a/storage/innobase/row/row0merge.cc b/storage/innobase/row/row0merge.cc index de1f35a876e..cba453ced24 100644 --- a/storage/innobase/row/row0merge.cc +++ b/storage/innobase/row/row0merge.cc @@ -1989,6 +1989,8 @@ row_merge_read_clustered_index( row_ext_t* ext; page_cur_t* cur = btr_pcur_get_page_cur(&pcur); + mem_heap_empty(row_heap); + /* Do not continue if table pages are still encrypted */ if (!old_table->is_readable() || !new_table->is_readable()) { @@ -3616,7 +3618,16 @@ row_merge_insert_index_tuples( dtuple, tuple_heap); } +#ifdef UNIV_DEBUG + static const latch_level_t latches[] = { + SYNC_INDEX_TREE, /* index->lock */ + SYNC_LEVEL_VARYING /* btr_bulk->m_page_bulks */ + }; +#endif /* UNIV_DEBUG */ + ut_ad(dtuple_validate(dtuple)); + ut_ad(!sync_check_iterate(sync_allowed_latches(latches, + latches + 2))); error = btr_bulk->insert(dtuple); if (error != DB_SUCCESS) { diff --git a/storage/innobase/row/row0mysql.cc b/storage/innobase/row/row0mysql.cc index c205d818802..fb4cbe5731b 100644 --- a/storage/innobase/row/row0mysql.cc +++ b/storage/innobase/row/row0mysql.cc @@ -3668,7 +3668,13 @@ row_drop_table_for_mysql( dict_stats_recalc_pool_del(table); dict_stats_defrag_pool_del(table, NULL); - btr_defragment_remove_table(table); + if (btr_defragment_thread_active) { + /* During fts_drop_orphaned_tables() in + recv_recovery_rollback_active() the + btr_defragment_mutex has not yet been + initialized by btr_defragment_init(). */ + btr_defragment_remove_table(table); + } /* Remove stats for this table and all of its indexes from the persistent storage if it exists and if there are stats for this diff --git a/storage/innobase/row/row0sel.cc b/storage/innobase/row/row0sel.cc index b9ee44873ec..585c72be30e 100644 --- a/storage/innobase/row/row0sel.cc +++ b/storage/innobase/row/row0sel.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1997, 2017, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2008, Google Inc. Copyright (c) 2015, 2017, MariaDB Corporation. @@ -2779,28 +2779,14 @@ Stores a non-SQL-NULL field in the MySQL format. The counterpart of this function is row_mysql_store_col_in_innobase_format() in row0mysql.cc. */ void row_sel_field_store_in_mysql_format_func( -/*=====================================*/ - byte* dest, /*!< in/out: buffer where to store; NOTE - that BLOBs are not in themselves - stored here: the caller must allocate - and copy the BLOB into buffer before, - and pass the pointer to the BLOB in - 'data' */ + byte* dest, const mysql_row_templ_t* templ, - /*!< in: MySQL column template. - Its following fields are referenced: - type, is_unsigned, mysql_col_len, - mbminlen, mbmaxlen */ #ifdef UNIV_DEBUG const dict_index_t* index, - /*!< in: InnoDB index */ ulint field_no, - /*!< in: templ->rec_field_no or - templ->clust_rec_field_no or - templ->icp_rec_field_no */ #endif /* UNIV_DEBUG */ - const byte* data, /*!< in: data to store */ - ulint len) /*!< in: length of the data */ + const byte* data, + ulint len) { byte* ptr; #ifdef UNIV_DEBUG diff --git a/storage/innobase/row/row0upd.cc b/storage/innobase/row/row0upd.cc index 63f6c03187b..56aaff3c2aa 100644 --- a/storage/innobase/row/row0upd.cc +++ b/storage/innobase/row/row0upd.cc @@ -455,6 +455,25 @@ func_exit: return(err); } + +/** Determine if a FOREIGN KEY constraint needs to be processed. +@param[in] node query node +@param[in] trx transaction +@return whether the node cannot be ignored */ +static +bool +wsrep_must_process_fk(const upd_node_t* node, const trx_t* trx) +{ + if (que_node_get_type(node->common.parent) != QUE_NODE_UPDATE + || !wsrep_on(trx->mysql_thd)) { + return false; + } + + const upd_cascade_t& nodes = *static_cast<const upd_node_t*>( + node->common.parent)->cascade_upd_nodes; + const upd_cascade_t::const_iterator end = nodes.end(); + return std::find(nodes.begin(), end, node) == end; +} #endif /* WITH_WSREP */ /*********************************************************************//** @@ -2414,29 +2433,18 @@ row_upd_sec_index_entry( row_ins_sec_index_entry() below */ if (!rec_get_deleted_flag( rec, dict_table_is_comp(index->table))) { - -#ifdef WITH_WSREP - que_node_t *parent = que_node_get_parent(node); -#endif err = btr_cur_del_mark_set_sec_rec( flags, btr_cur, TRUE, thr, &mtr); if (err != DB_SUCCESS) { break; } #ifdef WITH_WSREP - if (err == DB_SUCCESS && !referenced && - !(parent && que_node_get_type(parent) == - QUE_NODE_UPDATE && - (std::find(((upd_node_t*)parent)->cascade_upd_nodes->begin(), - ((upd_node_t*)parent)->cascade_upd_nodes->end(), - node) == - ((upd_node_t*)parent)->cascade_upd_nodes->end())) && - foreign - ) { - ulint* offsets = - rec_get_offsets( - rec, index, NULL, ULINT_UNDEFINED, - &heap); + if (!referenced && foreign + && wsrep_must_process_fk(node, trx) + && !wsrep_thd_is_BF(trx->mysql_thd, FALSE)) { + ulint* offsets = rec_get_offsets( + rec, index, NULL, ULINT_UNDEFINED, + &heap); err = wsrep_row_upd_check_foreign_constraints( node, &pcur, index->table, @@ -2450,14 +2458,14 @@ row_upd_sec_index_entry( case DB_DEADLOCK: if (wsrep_debug) { ib::warn() << "WSREP: sec index FK check fail for deadlock" - << " index " << index->name() - << " table " << index->table->name.m_name; + << " index " << index->name + << " table " << index->table->name; } break; default: - ib::error() << "WSREP: referenced FK check fail: " << err - << " index " << index->name() - << " table " << index->table->name.m_name; + ib::error() << "WSREP: referenced FK check fail: " << ut_strerr(err) + << " index " << index->name + << " table " << index->table->name; break; } @@ -2651,9 +2659,6 @@ row_upd_clust_rec_by_insert( dberr_t err; rec_t* rec; ulint* offsets = NULL; -#ifdef WITH_WSREP - que_node_t *parent = que_node_get_parent(node); -#endif ut_ad(node); ut_ad(dict_index_is_clust(index)); @@ -2741,18 +2746,8 @@ check_fk: if (err != DB_SUCCESS) { goto err_exit; } - } #ifdef WITH_WSREP - if (!referenced && - !(parent && que_node_get_type(parent) == QUE_NODE_UPDATE && - (std::find(((upd_node_t*)parent)->cascade_upd_nodes->begin(), - ((upd_node_t*)parent)->cascade_upd_nodes->end(), - node) == - ((upd_node_t*)parent)->cascade_upd_nodes->end())) && - foreign - ) { - err = wsrep_row_upd_check_foreign_constraints( - node, pcur, table, index, offsets, thr, mtr); + } else if (foreign && wsrep_must_process_fk(node, trx)) { switch (err) { case DB_SUCCESS: case DB_NO_REFERENCED_ROW: @@ -2761,14 +2756,14 @@ check_fk: case DB_DEADLOCK: if (wsrep_debug) { ib::warn() << "WSREP: sec index FK check fail for deadlock" - << " index " << index->name() - << " table " << index->table->name.m_name; + << " index " << index->name + << " table " << index->table->name; } break; default: - ib::error() << "WSREP: referenced FK check fail: " << err - << " index " << index->name() - << " table " << index->table->name.m_name; + ib::error() << "WSREP: referenced FK check fail: " << ut_strerr(err) + << " index " << index->name + << " table " << index->table->name; break; } @@ -2776,8 +2771,8 @@ check_fk: if (err != DB_SUCCESS) { goto err_exit; } - } #endif /* WITH_WSREP */ + } } mtr_commit(mtr); @@ -2959,9 +2954,7 @@ row_upd_del_mark_clust_rec( btr_cur_t* btr_cur; dberr_t err; rec_t* rec; -#ifdef WITH_WSREP - que_node_t *parent = que_node_get_parent(node); -#endif + trx_t* trx = thr_get_trx(thr); ut_ad(node); ut_ad(dict_index_is_clust(index)); ut_ad(node->is_delete); @@ -2972,7 +2965,7 @@ row_upd_del_mark_clust_rec( /* Store row because we have to build also the secondary index entries */ - row_upd_store_row(node, thr_get_trx(thr)->mysql_thd, + row_upd_store_row(node, trx->mysql_thd, thr->prebuilt ? thr->prebuilt->m_mysql_table : NULL); /* Mark the clustered index record deleted; we do not have to check @@ -2984,22 +2977,14 @@ row_upd_del_mark_clust_rec( btr_cur_get_block(btr_cur), rec, index, offsets, thr, node->row, mtr); - if (err == DB_SUCCESS && referenced) { + if (err != DB_SUCCESS) { + } else if (referenced) { /* NOTE that the following call loses the position of pcur ! */ err = row_upd_check_references_constraints( node, pcur, index->table, index, offsets, thr, mtr); - } #ifdef WITH_WSREP - if (err == DB_SUCCESS && !referenced && - !(parent && que_node_get_type(parent) == QUE_NODE_UPDATE && - (std::find(((upd_node_t*)parent)->cascade_upd_nodes->begin(), - ((upd_node_t*)parent)->cascade_upd_nodes->end(), - node) == - ((upd_node_t*)parent)->cascade_upd_nodes->end())) && - thr_get_trx(thr) && - foreign - ) { + } else if (foreign && wsrep_must_process_fk(node, trx)) { err = wsrep_row_upd_check_foreign_constraints( node, pcur, index->table, index, offsets, thr, mtr); switch (err) { @@ -3010,19 +2995,19 @@ row_upd_del_mark_clust_rec( case DB_DEADLOCK: if (wsrep_debug) { ib::warn() << "WSREP: sec index FK check fail for deadlock" - << " index " << index->name() - << " table " << index->table->name.m_name; + << " index " << index->name + << " table " << index->table->name; } break; default: - ib::error() << "WSREP: referenced FK check fail: " << err - << " index " << index->name() - << " table " << index->table->name.m_name; + ib::error() << "WSREP: referenced FK check fail: " << ut_strerr(err) + << " index " << index->name + << " table " << index->table->name; break; } - } #endif /* WITH_WSREP */ + } mtr_commit(mtr); diff --git a/storage/innobase/srv/srv0srv.cc b/storage/innobase/srv/srv0srv.cc index 2894be6b12c..663487fc3a6 100644 --- a/storage/innobase/srv/srv0srv.cc +++ b/storage/innobase/srv/srv0srv.cc @@ -478,12 +478,6 @@ ib_mutex_t srv_monitor_file_mutex; /** Temporary file for innodb monitor output */ FILE* srv_monitor_file; -/** Mutex for locking srv_dict_tmpfile. Not created if srv_read_only_mode. -This mutex has a very high rank; threads reserving it should not -be holding any InnoDB latches. */ -ib_mutex_t srv_dict_tmpfile_mutex; -/** Temporary file for output from the data dictionary */ -FILE* srv_dict_tmpfile; /** Mutex for locking srv_misc_tmpfile. Not created if srv_read_only_mode. This mutex has a very low rank; threads reserving it should not acquire any further latches or sleep before releasing this one. */ @@ -1363,7 +1357,7 @@ srv_printf_innodb_monitor( "Total large memory allocated " ULINTPF "\n" "Dictionary memory allocated " ULINTPF "\n", os_total_large_mem_allocated, - dict_sys->size); + dict_sys_get_size()); buf_print_io(file); diff --git a/storage/innobase/srv/srv0start.cc b/storage/innobase/srv/srv0start.cc index 46a757be5be..d4922e33ef5 100644 --- a/storage/innobase/srv/srv0start.cc +++ b/storage/innobase/srv/srv0start.cc @@ -890,12 +890,30 @@ srv_undo_tablespaces_init(bool create_new_db) the system tablespace (0). If we are creating a new instance then we build the undo_tablespace_ids ourselves since they don't already exist. */ + n_undo_tablespaces = create_new_db + || srv_operation == SRV_OPERATION_BACKUP + || srv_operation == SRV_OPERATION_RESTORE_DELTA + ? srv_undo_tablespaces + : trx_rseg_get_n_undo_tablespaces(undo_tablespace_ids); + srv_undo_tablespaces_active = srv_undo_tablespaces; - if (!create_new_db && srv_operation == SRV_OPERATION_NORMAL) { - n_undo_tablespaces = trx_rseg_get_n_undo_tablespaces( - undo_tablespace_ids); - - srv_undo_tablespaces_active = n_undo_tablespaces; + switch (srv_operation) { + case SRV_OPERATION_RESTORE_DELTA: + case SRV_OPERATION_BACKUP: + /* MDEV-13561 FIXME: Determine srv_undo_space_id_start + from the undo001 file. */ + srv_undo_space_id_start = 1; + for (i = 0; i < n_undo_tablespaces; i++) { + undo_tablespace_ids[i] = i + srv_undo_space_id_start; + } + break; + case SRV_OPERATION_NORMAL: + if (create_new_db) { + break; + } + /* fall through */ + case SRV_OPERATION_RESTORE: + ut_ad(!create_new_db); /* Check if any of the UNDO tablespace needs fix-up because server crashed while truncate was active on UNDO tablespace.*/ @@ -929,14 +947,7 @@ srv_undo_tablespaces_init(bool create_new_db) undo_tablespace_ids[i]); } } - } else { - srv_undo_tablespaces_active = srv_undo_tablespaces; - n_undo_tablespaces = srv_undo_tablespaces; - - if (n_undo_tablespaces != 0) { - srv_undo_space_id_start = undo_tablespace_ids[0]; - prev_space_id = srv_undo_space_id_start - 1; - } + break; } /* Open all the undo tablespaces that are currently in use. If we @@ -1308,6 +1319,7 @@ srv_shutdown_all_bg_threads() switch (srv_operation) { case SRV_OPERATION_BACKUP: + case SRV_OPERATION_RESTORE_DELTA: break; case SRV_OPERATION_NORMAL: case SRV_OPERATION_RESTORE: @@ -1752,15 +1764,6 @@ innobase_start_or_create_for_mysql() } } - mutex_create(LATCH_ID_SRV_DICT_TMPFILE, - &srv_dict_tmpfile_mutex); - - srv_dict_tmpfile = os_file_create_tmpfile(NULL); - - if (!srv_dict_tmpfile && err == DB_SUCCESS) { - err = DB_ERROR; - } - mutex_create(LATCH_ID_SRV_MISC_TMPFILE, &srv_misc_tmpfile_mutex); @@ -2809,6 +2812,7 @@ innodb_shutdown() switch (srv_operation) { case SRV_OPERATION_BACKUP: case SRV_OPERATION_RESTORE: + case SRV_OPERATION_RESTORE_DELTA: fil_close_all_files(); break; case SRV_OPERATION_NORMAL: @@ -2834,11 +2838,6 @@ innodb_shutdown() } } - if (srv_dict_tmpfile) { - fclose(srv_dict_tmpfile); - srv_dict_tmpfile = 0; - } - if (srv_misc_tmpfile) { fclose(srv_misc_tmpfile); srv_misc_tmpfile = 0; @@ -2903,7 +2902,6 @@ innodb_shutdown() the temp files that the cover. */ if (!srv_read_only_mode) { mutex_free(&srv_monitor_file_mutex); - mutex_free(&srv_dict_tmpfile_mutex); mutex_free(&srv_misc_tmpfile_mutex); } diff --git a/storage/innobase/sync/sync0debug.cc b/storage/innobase/sync/sync0debug.cc index d6f3ef6c986..c80ea6aef3e 100644 --- a/storage/innobase/sync/sync0debug.cc +++ b/storage/innobase/sync/sync0debug.cc @@ -1431,9 +1431,6 @@ sync_latch_meta_init() LATCH_ADD_MUTEX(RW_LOCK_MUTEX, SYNC_NO_ORDER_CHECK, rw_lock_mutex_key); - LATCH_ADD_MUTEX(SRV_DICT_TMPFILE, SYNC_DICT_OPERATION, - srv_dict_tmpfile_mutex_key); - LATCH_ADD_MUTEX(SRV_INNODB_MONITOR, SYNC_NO_ORDER_CHECK, srv_innodb_monitor_mutex_key); @@ -1518,11 +1515,12 @@ sync_latch_meta_init() buf_block_lock_key); #ifdef UNIV_DEBUG - LATCH_ADD_RWLOCK(BUF_BLOCK_DEBUG, SYNC_NO_ORDER_CHECK, + LATCH_ADD_RWLOCK(BUF_BLOCK_DEBUG, SYNC_LEVEL_VARYING, buf_block_debug_latch_key); #endif /* UNIV_DEBUG */ - LATCH_ADD_RWLOCK(DICT_OPERATION, SYNC_DICT, dict_operation_lock_key); + LATCH_ADD_RWLOCK(DICT_OPERATION, SYNC_DICT_OPERATION, + dict_operation_lock_key); LATCH_ADD_RWLOCK(CHECKPOINT, SYNC_NO_ORDER_CHECK, checkpoint_lock_key); diff --git a/storage/innobase/sync/sync0sync.cc b/storage/innobase/sync/sync0sync.cc index 099a56c5457..4be7162f631 100644 --- a/storage/innobase/sync/sync0sync.cc +++ b/storage/innobase/sync/sync0sync.cc @@ -78,7 +78,6 @@ mysql_pfs_key_t rtr_path_mutex_key; mysql_pfs_key_t rtr_ssn_mutex_key; mysql_pfs_key_t rw_lock_list_mutex_key; mysql_pfs_key_t rw_lock_mutex_key; -mysql_pfs_key_t srv_dict_tmpfile_mutex_key; mysql_pfs_key_t srv_innodb_monitor_mutex_key; mysql_pfs_key_t srv_misc_tmpfile_mutex_key; mysql_pfs_key_t srv_monitor_file_mutex_key; diff --git a/storage/innobase/trx/trx0trx.cc b/storage/innobase/trx/trx0trx.cc index 2fe13ae7e9d..31e70a5aaa6 100644 --- a/storage/innobase/trx/trx0trx.cc +++ b/storage/innobase/trx/trx0trx.cc @@ -3091,7 +3091,7 @@ trx_set_rw_mode( ut_ad(!trx->in_rw_trx_list); ut_ad(!trx_is_autocommit_non_locking(trx)); - if (srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO) { + if (high_level_read_only) { return; } |