summaryrefslogtreecommitdiff
path: root/storage/innobase
diff options
context:
space:
mode:
Diffstat (limited to 'storage/innobase')
-rw-r--r--storage/innobase/CMakeLists.txt1
-rw-r--r--storage/innobase/btr/btr0btr.cc17
-rw-r--r--storage/innobase/btr/btr0cur.cc6
-rw-r--r--storage/innobase/buf/buf0buf.cc10
-rw-r--r--storage/innobase/dict/dict0crea.cc1
-rw-r--r--storage/innobase/dict/dict0dict.cc70
-rw-r--r--storage/innobase/dict/dict0load.cc5
-rw-r--r--storage/innobase/dict/dict0stats.cc23
-rw-r--r--storage/innobase/fil/fil0crypt.cc123
-rw-r--r--storage/innobase/fsp/fsp0fsp.cc143
-rw-r--r--storage/innobase/handler/ha_innodb.cc67
-rw-r--r--storage/innobase/handler/ha_innodb.h4
-rw-r--r--storage/innobase/handler/ha_innopart.cc4264
-rw-r--r--storage/innobase/handler/ha_innopart.h1315
-rw-r--r--storage/innobase/handler/handler0alter.cc27
-rw-r--r--storage/innobase/handler/handler0alter_innopart.cc307
-rw-r--r--storage/innobase/include/dict0dict.h10
-rw-r--r--storage/innobase/include/fsp0fsp.h31
-rw-r--r--storage/innobase/include/log0log.ic1
-rw-r--r--storage/innobase/include/mtr0types.h11
-rw-r--r--storage/innobase/include/page0zip.ic5
-rw-r--r--storage/innobase/include/rem0rec.h12
-rw-r--r--storage/innobase/include/row0sel.h14
-rw-r--r--storage/innobase/include/srv0srv.h10
-rw-r--r--storage/innobase/include/sync0policy.h2
-rw-r--r--storage/innobase/include/sync0sync.h1
-rw-r--r--storage/innobase/include/sync0types.h7
-rw-r--r--storage/innobase/include/trx0rseg.h12
-rw-r--r--storage/innobase/innodb.cmake4
-rw-r--r--storage/innobase/log/log0log.cc2
-rw-r--r--storage/innobase/log/log0recv.cc16
-rw-r--r--storage/innobase/os/os0file.cc20
-rw-r--r--storage/innobase/row/row0ins.cc41
-rw-r--r--storage/innobase/row/row0merge.cc11
-rw-r--r--storage/innobase/row/row0mysql.cc8
-rw-r--r--storage/innobase/row/row0sel.cc22
-rw-r--r--storage/innobase/row/row0upd.cc111
-rw-r--r--storage/innobase/srv/srv0srv.cc8
-rw-r--r--storage/innobase/srv/srv0start.cc54
-rw-r--r--storage/innobase/sync/sync0debug.cc8
-rw-r--r--storage/innobase/sync/sync0sync.cc1
-rw-r--r--storage/innobase/trx/trx0trx.cc2
42 files changed, 473 insertions, 6334 deletions
diff --git a/storage/innobase/CMakeLists.txt b/storage/innobase/CMakeLists.txt
index c4da4b179be..91c5adc4f07 100644
--- a/storage/innobase/CMakeLists.txt
+++ b/storage/innobase/CMakeLists.txt
@@ -74,7 +74,6 @@ SET(INNOBASE_SOURCES
gis/gis0sea.cc
fts/fts0plugin.cc
handler/ha_innodb.cc
-# handler/ha_innopart.cc
handler/handler0alter.cc
handler/i_s.cc
ibuf/ibuf0ibuf.cc
diff --git a/storage/innobase/btr/btr0btr.cc b/storage/innobase/btr/btr0btr.cc
index 64a14d0e959..815324825bd 100644
--- a/storage/innobase/btr/btr0btr.cc
+++ b/storage/innobase/btr/btr0btr.cc
@@ -4846,7 +4846,6 @@ btr_validate_level(
bool ret = true;
mtr_t mtr;
mem_heap_t* heap = mem_heap_create(256);
- fseg_header_t* seg;
ulint* offsets = NULL;
ulint* offsets2= NULL;
#ifdef UNIV_ZIP_DEBUG
@@ -4870,7 +4869,6 @@ btr_validate_level(
block = btr_root_block_get(index, RW_SX_LATCH, &mtr);
page = buf_block_get_frame(block);
- seg = page + PAGE_HEADER + PAGE_BTR_SEG_TOP;
#ifdef UNIV_DEBUG
if (dict_index_is_spatial(index)) {
@@ -4879,7 +4877,7 @@ btr_validate_level(
}
#endif
- const fil_space_t* space = fil_space_get(index->space);
+ fil_space_t* space = fil_space_get(index->space);
const page_size_t table_page_size(
dict_table_page_size(index->table));
const page_size_t space_page_size(space->flags);
@@ -4897,9 +4895,7 @@ btr_validate_level(
while (level != btr_page_get_level(page, &mtr)) {
const rec_t* node_ptr;
- if (fseg_page_is_free(seg,
- block->page.id.space(),
- block->page.id.page_no())) {
+ if (fseg_page_is_free(space, block->page.id.page_no())) {
btr_validate_report1(index, level, block);
@@ -4959,11 +4955,6 @@ btr_validate_level(
/* Now we are on the desired level. Loop through the pages on that
level. */
- if (level == 0) {
- /* Leaf pages are managed in their own file segment. */
- seg -= PAGE_BTR_SEG_TOP - PAGE_BTR_SEG_LEAF;
- }
-
loop:
mem_heap_empty(heap);
offsets = offsets2 = NULL;
@@ -4982,9 +4973,7 @@ loop:
ut_a(block->page.id.space() == index->space);
- if (fseg_page_is_free(seg,
- block->page.id.space(),
- block->page.id.page_no())) {
+ if (fseg_page_is_free(space, block->page.id.page_no())) {
btr_validate_report1(index, level, block);
diff --git a/storage/innobase/btr/btr0cur.cc b/storage/innobase/btr/btr0cur.cc
index 7bdd03c8a9e..e740370d2f0 100644
--- a/storage/innobase/btr/btr0cur.cc
+++ b/storage/innobase/btr/btr0cur.cc
@@ -3901,8 +3901,10 @@ any_extern:
}
/* We limit max record size to 16k even for 64k page size. */
- if (new_rec_size >= REC_MAX_DATA_SIZE) {
- err = DB_OVERFLOW;
+ if (new_rec_size >= COMPRESSED_REC_MAX_DATA_SIZE ||
+ (!dict_table_is_comp(index->table)
+ && new_rec_size >= REDUNDANT_REC_MAX_DATA_SIZE)) {
+ err = DB_OVERFLOW;
goto func_exit;
}
diff --git a/storage/innobase/buf/buf0buf.cc b/storage/innobase/buf/buf0buf.cc
index b57fba75869..ad93238410e 100644
--- a/storage/innobase/buf/buf0buf.cc
+++ b/storage/innobase/buf/buf0buf.cc
@@ -1477,17 +1477,15 @@ buf_block_init(
rw_lock_create(PFS_NOT_INSTRUMENTED, &block->lock, SYNC_LEVEL_VARYING);
- ut_d(rw_lock_create(
- PFS_NOT_INSTRUMENTED,
- &block->debug_latch, SYNC_NO_ORDER_CHECK));
+ ut_d(rw_lock_create(PFS_NOT_INSTRUMENTED, &block->debug_latch,
+ SYNC_LEVEL_VARYING));
#else /* PFS_SKIP_BUFFER_MUTEX_RWLOCK || PFS_GROUP_BUFFER_SYNC */
rw_lock_create(buf_block_lock_key, &block->lock, SYNC_LEVEL_VARYING);
- ut_d(rw_lock_create(
- buf_block_debug_latch_key,
- &block->debug_latch, SYNC_NO_ORDER_CHECK));
+ ut_d(rw_lock_create(buf_block_debug_latch_key,
+ &block->debug_latch, SYNC_LEVEL_VARYING));
#endif /* PFS_SKIP_BUFFER_MUTEX_RWLOCK || PFS_GROUP_BUFFER_SYNC */
diff --git a/storage/innobase/dict/dict0crea.cc b/storage/innobase/dict/dict0crea.cc
index 8db1878b4ef..1c28a39a62e 100644
--- a/storage/innobase/dict/dict0crea.cc
+++ b/storage/innobase/dict/dict0crea.cc
@@ -104,6 +104,7 @@ dict_create_sys_tables_tuple(
| ((table->flags & DICT_TF_COMPACT) << 31));
dfield_set_data(dfield, ptr, 4);
+
/* 5: TYPE (table flags) -----------------------------*/
dfield = dtuple_get_nth_field(
entry, DICT_COL__SYS_TABLES__TYPE);
diff --git a/storage/innobase/dict/dict0dict.cc b/storage/innobase/dict/dict0dict.cc
index 943a37ef4e0..55429b2680f 100644
--- a/storage/innobase/dict/dict0dict.cc
+++ b/storage/innobase/dict/dict0dict.cc
@@ -1375,9 +1375,6 @@ dict_table_add_to_cache(
}
ut_ad(dict_lru_validate());
-
- dict_sys->size += mem_heap_get_size(table->heap)
- + strlen(table->name.m_name) + 1;
}
/**********************************************************************//**
@@ -1756,9 +1753,6 @@ dict_table_rename_in_cache(
HASH_INSERT(dict_table_t, name_hash, dict_sys->table_hash, fold,
table);
- dict_sys->size += strlen(new_name) - strlen(old_name);
- ut_a(dict_sys->size > 0);
-
/* Update the table_name field in indexes */
for (index = dict_table_get_first_index(table);
index != NULL;
@@ -2049,7 +2043,6 @@ dict_table_remove_from_cache_low(
{
dict_foreign_t* foreign;
dict_index_t* index;
- lint size;
ut_ad(table);
ut_ad(dict_lru_validate());
@@ -2130,12 +2123,6 @@ dict_table_remove_from_cache_low(
UT_DELETE(table->vc_templ);
}
- size = mem_heap_get_size(table->heap) + strlen(table->name.m_name) + 1;
-
- ut_ad(dict_sys->size >= size);
-
- dict_sys->size -= size;
-
dict_mem_table_free(table);
}
@@ -2330,9 +2317,10 @@ dict_index_too_big_for_tree(
page(16k for 64k page size). No additional sparse
page directory entry will be generated for the first
few user records. */
- page_rec_max = srv_page_size == UNIV_PAGE_SIZE_MAX
- ? REC_MAX_DATA_SIZE - 1
- : page_get_free_space_of_empty(comp) / 2;
+ page_rec_max = (comp || srv_page_size < UNIV_PAGE_SIZE_MAX)
+ ? page_get_free_space_of_empty(comp) / 2
+ : REDUNDANT_REC_MAX_DATA_SIZE;
+
page_ptr_max = page_rec_max;
/* Each record has a header. */
rec_max_size = comp
@@ -2610,8 +2598,6 @@ dict_index_add_to_cache_w_vcol(
rw_lock_create(index_tree_rw_lock_key, &new_index->lock,
SYNC_INDEX_TREE);
- dict_sys->size += mem_heap_get_size(new_index->heap);
-
dict_mem_index_free(index);
return(DB_SUCCESS);
@@ -2628,8 +2614,6 @@ dict_index_remove_from_cache_low(
ibool lru_evict) /*!< in: TRUE if index being evicted
to make room in the table LRU list */
{
- lint size;
-
ut_ad(table && index);
ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
@@ -2730,12 +2714,6 @@ dict_index_remove_from_cache_low(
}
}
- size = mem_heap_get_size(index->heap);
-
- ut_ad(dict_sys->size >= size);
-
- dict_sys->size -= size;
-
dict_mem_index_free(index);
}
@@ -4623,7 +4601,6 @@ dict_create_foreign_constraints_low(
if (!success) {
ib::error() << "Could not find the table " << create_name << " being" << operation << " near to "
<< orig;
- mutex_exit(&dict_foreign_err_mutex);
ib_push_warning(trx, DB_ERROR,
"%s table %s with foreign key constraint"
@@ -5301,6 +5278,7 @@ try_find_index:
" failed. You have more than one on delete or on update clause"
" in '%s' near '%s'.\n",
operation, create_name, start_of_latest_foreign, start_of_latest_set);
+ mutex_exit(&dict_foreign_err_mutex);
ib_push_warning(trx, DB_CANNOT_ADD_CONSTRAINT,
"%s table %s with foreign key constraint"
@@ -5309,7 +5287,6 @@ try_find_index:
operation, create_name, start_of_latest_foreign, start_of_latest_set);
dict_foreign_free(foreign);
- mutex_exit(&dict_foreign_err_mutex);
return(DB_CANNOT_ADD_CONSTRAINT);
}
@@ -6872,8 +6849,6 @@ dict_close(void)
mutex_free(&dict_foreign_err_mutex);
- ut_ad(dict_sys->size == 0);
-
ut_free(dict_sys);
dict_sys = NULL;
@@ -7246,6 +7221,41 @@ dict_tf_to_row_format_string(
return(0);
}
+/** Calculate the used memory occupied by the data dictionary
+table and index objects.
+@return number of bytes occupied. */
+UNIV_INTERN
+ulint
+dict_sys_get_size()
+{
+ ulint size = 0;
+
+ ut_ad(dict_sys);
+
+ mutex_enter(&dict_sys->mutex);
+
+ for(ulint i = 0; i < hash_get_n_cells(dict_sys->table_hash); i++) {
+ dict_table_t* table;
+
+ for (table = static_cast<dict_table_t*>(HASH_GET_FIRST(dict_sys->table_hash,i));
+ table != NULL;
+ table = static_cast<dict_table_t*>(HASH_GET_NEXT(name_hash, table))) {
+ dict_index_t* index;
+ size += mem_heap_get_size(table->heap) + strlen(table->name.m_name) +1;
+
+ for(index = dict_table_get_first_index(table);
+ index != NULL;
+ index = dict_table_get_next_index(index)) {
+ size += mem_heap_get_size(index->heap);
+ }
+ }
+ }
+
+ mutex_exit(&dict_sys->mutex);
+
+ return (size);
+}
+
/** Look for any dictionary objects that are found in the given tablespace.
@param[in] space_id Tablespace ID to search for.
@return true if tablespace is empty. */
diff --git a/storage/innobase/dict/dict0load.cc b/storage/innobase/dict/dict0load.cc
index 55d84bf17df..6193a8f66f5 100644
--- a/storage/innobase/dict/dict0load.cc
+++ b/storage/innobase/dict/dict0load.cc
@@ -3529,17 +3529,12 @@ dict_load_foreign(
here. The child table will be loaded later, along with its
foreign key constraint. */
- lint old_size = mem_heap_get_size(ref_table->heap);
-
ut_a(ref_table != NULL);
fk_tables.push_back(
mem_heap_strdupl(ref_table->heap,
foreign->foreign_table_name_lookup,
foreign_table_name_len));
- lint new_size = mem_heap_get_size(ref_table->heap);
- dict_sys->size += new_size - old_size;
-
dict_foreign_remove_from_cache(foreign);
DBUG_RETURN(DB_SUCCESS);
}
diff --git a/storage/innobase/dict/dict0stats.cc b/storage/innobase/dict/dict0stats.cc
index 9350b5d400d..177a16a2b37 100644
--- a/storage/innobase/dict/dict0stats.cc
+++ b/storage/innobase/dict/dict0stats.cc
@@ -1335,16 +1335,6 @@ dict_stats_analyze_index_level(
mem_heap_free(heap);
}
-/* aux enum for controlling the behavior of dict_stats_scan_page() @{ */
-enum page_scan_method_t {
- /** scan the records on the given page, counting the number
- of distinct ones; @see srv_stats_include_delete_marked */
- COUNT_ALL_NON_BORING,
- /** quit on the first record that differs from its right neighbor */
- QUIT_ON_FIRST_NON_BORING
-};
-/* @} */
-
/** Scan a page, reading records from left to right and counting the number
of distinct records (looking only at the first n_prefix
columns) and the number of external pages pointed by records from this page.
@@ -1361,7 +1351,7 @@ be big enough)
@param[in] index index of the page
@param[in] page the page to scan
@param[in] n_prefix look at the first n_prefix columns
-@param[in] scan_method scan to the end of the page or not
+@param[in] is_leaf whether this is the leaf page
@param[out] n_diff number of distinct records encountered
@param[out] n_external_pages if this is non-NULL then it will be set
to the number of externally stored pages which were encountered
@@ -1376,7 +1366,7 @@ dict_stats_scan_page(
const dict_index_t* index,
const page_t* page,
ulint n_prefix,
- page_scan_method_t scan_method,
+ bool is_leaf,
ib_uint64_t* n_diff,
ib_uint64_t* n_external_pages)
{
@@ -1388,8 +1378,9 @@ dict_stats_scan_page(
Because offsets1,offsets2 should be big enough,
this memory heap should never be used. */
mem_heap_t* heap = NULL;
+ ut_ad(is_leaf == page_is_leaf(page));
const rec_t* (*get_next)(const rec_t*)
- = srv_stats_include_delete_marked
+ = !is_leaf || srv_stats_include_delete_marked
? page_rec_get_next_const
: page_rec_get_next_non_del_marked;
@@ -1440,7 +1431,7 @@ dict_stats_scan_page(
(*n_diff)++;
- if (scan_method == QUIT_ON_FIRST_NON_BORING) {
+ if (!is_leaf) {
break;
}
}
@@ -1566,7 +1557,7 @@ dict_stats_analyze_index_below_cur(
/* search for the first non-boring record on the page */
offsets_rec = dict_stats_scan_page(
&rec, offsets1, offsets2, index, page, n_prefix,
- QUIT_ON_FIRST_NON_BORING, n_diff, NULL);
+ false, n_diff, NULL);
/* pages on level > 0 are not allowed to be empty */
ut_a(offsets_rec != NULL);
@@ -1611,7 +1602,7 @@ dict_stats_analyze_index_below_cur(
offsets_rec = dict_stats_scan_page(
&rec, offsets1, offsets2, index, page, n_prefix,
- COUNT_ALL_NON_BORING, n_diff,
+ true, n_diff,
n_external_pages);
#if 0
diff --git a/storage/innobase/fil/fil0crypt.cc b/storage/innobase/fil/fil0crypt.cc
index 6e431a6ee0f..e1b5bcbc325 100644
--- a/storage/innobase/fil/fil0crypt.cc
+++ b/storage/innobase/fil/fil0crypt.cc
@@ -93,13 +93,20 @@ static ib_mutex_t crypt_stat_mutex;
extern my_bool srv_background_scrub_data_uncompressed;
extern my_bool srv_background_scrub_data_compressed;
+/***********************************************************************
+Check if a key needs rotation given a key_state
+@param[in] encrypt_mode Encryption mode
+@param[in] key_version Current key version
+@param[in] latest_key_version Latest key version
+@param[in] rotate_key_age when to rotate
+@return true if key needs rotation, false if not */
static bool
fil_crypt_needs_rotation(
- fil_encryption_t encrypt_mode, /*!< in: Encryption
- mode */
- uint key_version, /*!< in: Key version */
- uint latest_key_version, /*!< in: Latest key version */
- uint rotate_key_age); /*!< in: When to rotate */
+ fil_encryption_t encrypt_mode,
+ uint key_version,
+ uint latest_key_version,
+ uint rotate_key_age)
+ MY_ATTRIBUTE((warn_unused_result));
/*********************************************************************
Init space crypt */
@@ -326,10 +333,17 @@ fil_space_destroy_crypt_data(
fil_space_crypt_t **crypt_data)
{
if (crypt_data != NULL && (*crypt_data) != NULL) {
- mutex_enter(&fil_crypt_threads_mutex);
- fil_space_crypt_t* c = *crypt_data;
- *crypt_data = NULL;
- mutex_exit(&fil_crypt_threads_mutex);
+ fil_space_crypt_t* c;
+ if (UNIV_LIKELY(fil_crypt_threads_inited)) {
+ mutex_enter(&fil_crypt_threads_mutex);
+ c = *crypt_data;
+ *crypt_data = NULL;
+ mutex_exit(&fil_crypt_threads_mutex);
+ } else {
+ ut_ad(srv_read_only_mode || !srv_was_started);
+ c = *crypt_data;
+ *crypt_data = NULL;
+ }
if (c) {
c->~fil_space_crypt_t();
ut_free(c);
@@ -1582,20 +1596,6 @@ fil_crypt_find_page_to_rotate(
return found;
}
-/***********************************************************************
-Check if a page is uninitialized (doesn't need to be rotated)
-@param[in] frame Page to check
-@param[in] page_size Page size
-@return true if page is uninitialized, false if not. */
-static inline
-bool
-fil_crypt_is_page_uninitialized(
- const byte *frame,
- const page_size_t& page_size)
-{
- return (buf_page_is_zeroes(frame, page_size));
-}
-
#define fil_crypt_get_page_throttle(state,offset,mtr,sleeptime_ms) \
fil_crypt_get_page_throttle_func(state, offset, mtr, \
sleeptime_ms, __FILE__, __LINE__)
@@ -1709,7 +1709,7 @@ btr_scrub_get_block_and_allocation_status(
mtr_start(&local_mtr);
- *allocation_status = fsp_page_is_free(space->id, offset, &local_mtr) ?
+ *allocation_status = fseg_page_is_free(space, offset) ?
BTR_SCRUB_PAGE_FREE :
BTR_SCRUB_PAGE_ALLOCATED;
@@ -1756,9 +1756,9 @@ fil_crypt_rotate_page(
ulint offset = state->offset;
ulint sleeptime_ms = 0;
fil_space_crypt_t *crypt_data = space->crypt_data;
- const page_size_t page_size = page_size_t(space->flags);
ut_ad(space->n_pending_ops > 0);
+ ut_ad(offset > 0);
/* In fil_crypt_thread where key rotation is done we have
acquired space and checked that this space is not yet
@@ -1773,44 +1773,55 @@ fil_crypt_rotate_page(
return;
}
+ ut_d(const bool was_free = fseg_page_is_free(space, offset));
+
mtr_t mtr;
mtr.start();
if (buf_block_t* block = fil_crypt_get_page_throttle(state,
offset, &mtr,
&sleeptime_ms)) {
- mtr.set_named_space(space);
-
bool modified = false;
int needs_scrubbing = BTR_SCRUB_SKIP_PAGE;
lsn_t block_lsn = block->page.newest_modification;
byte* frame = buf_block_get_frame(block);
uint kv = mach_read_from_4(frame+FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION);
- /* check if tablespace is closing after reading page */
- if (!space->is_stopping()) {
-
- if (kv == 0 &&
- fil_crypt_is_page_uninitialized(frame, page_size)) {
- ;
- } else if (fil_crypt_needs_rotation(
- crypt_data->encryption,
- kv, key_state->key_version,
- key_state->rotate_key_age)) {
-
- modified = true;
-
- /* force rotation by dummy updating page */
- mlog_write_ulint(frame +
- FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID,
- space_id, MLOG_4BYTES, &mtr);
-
- /* statistics */
- state->crypt_stat.pages_modified++;
- } else {
- if (crypt_data->is_encrypted()) {
- if (kv < state->min_key_version_found) {
- state->min_key_version_found = kv;
- }
+ if (space->is_stopping()) {
+ /* The tablespace is closing (in DROP TABLE or
+ TRUNCATE TABLE or similar): avoid further access */
+ } else if (!*reinterpret_cast<uint32_t*>(FIL_PAGE_OFFSET
+ + frame)) {
+ /* It looks like this page was never
+ allocated. Because key rotation is accessing
+ pages in a pattern that is unlike the normal
+ B-tree and undo log access pattern, we cannot
+ invoke fseg_page_is_free() here, because that
+ could result in a deadlock. If we invoked
+ fseg_page_is_free() and released the
+ tablespace latch before acquiring block->lock,
+ then the fseg_page_is_free() information
+ could be stale already. */
+ ut_ad(was_free);
+ ut_ad(kv == 0);
+ ut_ad(page_get_space_id(frame) == 0);
+ } else if (fil_crypt_needs_rotation(
+ crypt_data->encryption,
+ kv, key_state->key_version,
+ key_state->rotate_key_age)) {
+
+ mtr.set_named_space(space);
+ modified = true;
+
+ /* force rotation by dummy updating page */
+ mlog_write_ulint(frame + FIL_PAGE_SPACE_ID,
+ space_id, MLOG_4BYTES, &mtr);
+
+ /* statistics */
+ state->crypt_stat.pages_modified++;
+ } else {
+ if (crypt_data->is_encrypted()) {
+ if (kv < state->min_key_version_found) {
+ state->min_key_version_found = kv;
}
}
@@ -1920,7 +1931,8 @@ fil_crypt_rotate_pages(
rotate_thread_t* state)
{
ulint space = state->space->id;
- ulint end = state->offset + state->batch;
+ ulint end = std::min(state->offset + state->batch,
+ state->space->free_limit);
ut_ad(state->space->n_pending_ops > 0);
@@ -2375,7 +2387,10 @@ fil_space_crypt_close_tablespace(
ib::warn() << "Waited "
<< now - start
<< " seconds to drop space: "
- << space->name << ".";
+ << space->name << " ("
+ << space->id << ") active threads "
+ << cnt << "flushing="
+ << flushing << ".";
last = now;
}
}
diff --git a/storage/innobase/fsp/fsp0fsp.cc b/storage/innobase/fsp/fsp0fsp.cc
index 33b237bd488..77b28d2c01b 100644
--- a/storage/innobase/fsp/fsp0fsp.cc
+++ b/storage/innobase/fsp/fsp0fsp.cc
@@ -436,7 +436,8 @@ xdes_get_descriptor_with_space_hdr(
&& (init_space
|| space->purpose == FIL_TYPE_TEMPORARY
|| (srv_startup_is_before_trx_rollback_phase
- && space->id <= srv_undo_tablespaces))));
+ && (space->id == TRX_SYS_SPACE
+ || srv_is_undo_tablespace(space->id))))));
ut_ad(size == space->size_in_header);
if ((offset >= size) || (offset >= limit)) {
@@ -506,6 +507,51 @@ xdes_get_descriptor(
sp_header, space, offset, mtr));
}
+/** Get the extent descriptor of a page.
+The page where the extent descriptor resides is x-locked. If the page
+offset is equal to the free limit of the space, we will add new
+extents from above the free limit to the space free list, if not free
+limit == space size. This adding is necessary to make the descriptor
+defined, as they are uninitialized above the free limit.
+@param[in] space tablespace
+@param[in] page descriptor page offset
+@param[in] offset page offset
+@param[in] page_size page size
+@param[in,out] mtr mini-transaction
+@return the extent descriptor
+@retval NULL if the descriptor is not available */
+MY_ATTRIBUTE((warn_unused_result))
+static
+const xdes_t*
+xdes_get_descriptor_const(
+ const fil_space_t* space,
+ page_no_t page,
+ page_no_t offset,
+ const page_size_t& page_size,
+ mtr_t* mtr)
+{
+ ut_ad(mtr_memo_contains(mtr, &space->latch, MTR_MEMO_S_LOCK));
+ ut_ad(offset < space->free_limit);
+ ut_ad(offset < space->size_in_header);
+
+ if (buf_block_t* block = buf_page_get(page_id_t(space->id, page),
+ page_size, RW_S_LATCH, mtr)) {
+ buf_block_dbg_add_level(block, SYNC_FSP_PAGE);
+
+ ut_ad(page != 0 || space->free_limit == mach_read_from_4(
+ FSP_FREE_LIMIT + FSP_HEADER_OFFSET
+ + block->frame));
+ ut_ad(page != 0 || space->size_in_header == mach_read_from_4(
+ FSP_SIZE + FSP_HEADER_OFFSET
+ + block->frame));
+
+ return(block->frame + XDES_ARR_OFFSET + XDES_SIZE
+ * xdes_calc_descriptor_index(page_size, offset));
+ }
+
+ return(NULL);
+}
+
/** Get a pointer to the extent descriptor. The page where the
extent descriptor resides is x-locked.
@param[in] space tablespace
@@ -611,25 +657,31 @@ fsp_space_modify_check(
#endif /* UNIV_DEBUG */
/** Initialize a file page.
-@param[in] space tablespace
@param[in,out] block file page
@param[in,out] mtr mini-transaction */
-MY_ATTRIBUTE((nonnull))
static
void
-fsp_init_file_page(
- const fil_space_t* space MY_ATTRIBUTE((unused)),
- buf_block_t* block,
- mtr_t* mtr)
+fsp_init_file_page(buf_block_t* block, mtr_t* mtr)
{
- ut_d(fsp_space_modify_check(space, mtr));
- ut_ad(space->id == block->page.id.space());
fsp_init_file_page_low(block);
mlog_write_initial_log_record(buf_block_get_frame(block),
MLOG_INIT_FILE_PAGE2, mtr);
}
+#ifdef UNIV_DEBUG
+static
+void
+fsp_init_file_page(const fil_space_t* space, buf_block_t* block, mtr_t* mtr)
+{
+ ut_d(fsp_space_modify_check(space, mtr));
+ ut_ad(space->id == block->page.id.space());
+ fsp_init_file_page(block, mtr);
+}
+#else /* UNIV_DEBUG */
+# define fsp_init_file_page(space, block, mtr) fsp_init_file_page(block, mtr)
+#endif
+
/***********************************************************//**
Parses a redo log record of a file page init.
@return end of log record or NULL */
@@ -3149,39 +3201,31 @@ fseg_free_page_func(
DBUG_VOID_RETURN;
}
-/**********************************************************************//**
-Checks if a single page of a segment is free.
-@return true if free */
+/** Determine whether a page is free.
+@param[in,out] space tablespace
+@param[in] page page number
+@return whether the page is marked as free */
bool
-fseg_page_is_free(
-/*==============*/
- fseg_header_t* seg_header, /*!< in: segment header */
- ulint space_id, /*!< in: space id */
- ulint page) /*!< in: page offset */
+fseg_page_is_free(fil_space_t* space, unsigned page)
{
+ bool is_free;
mtr_t mtr;
- ibool is_free;
- xdes_t* descr;
- fseg_inode_t* seg_inode;
-
- mtr_start(&mtr);
- const fil_space_t* space = mtr_x_lock_space(space_id, &mtr);
- const page_size_t page_size(space->flags);
-
- seg_inode = fseg_inode_get(seg_header, space_id, page_size, &mtr);
-
- ut_a(seg_inode);
- ut_ad(mach_read_from_4(seg_inode + FSEG_MAGIC_N)
- == FSEG_MAGIC_N_VALUE);
- ut_ad(!((page_offset(seg_inode) - FSEG_ARR_OFFSET) % FSEG_INODE_SIZE));
-
- descr = xdes_get_descriptor(space, page, page_size, &mtr);
- ut_a(descr);
+ page_size_t page_size(space->flags);
+ page_no_t dpage = xdes_calc_descriptor_page(page_size, page);
- is_free = xdes_mtr_get_bit(
- descr, XDES_FREE_BIT, page % FSP_EXTENT_SIZE, &mtr);
+ mtr.start();
+ mtr_s_lock(&space->latch, &mtr);
- mtr_commit(&mtr);
+ if (page >= space->free_limit || page >= space->size_in_header) {
+ is_free = true;
+ } else if (const xdes_t* descr = xdes_get_descriptor_const(
+ space, dpage, page, page_size, &mtr)) {
+ is_free = xdes_get_bit(descr, XDES_FREE_BIT,
+ page % FSP_EXTENT_SIZE);
+ } else {
+ is_free = true;
+ }
+ mtr.commit();
return(is_free);
}
@@ -3563,28 +3607,3 @@ fseg_header::to_stream(std::ostream& out) const
return(out);
}
#endif /* UNIV_DEBUG */
-
-/**********************************************************************//**
-Checks if a single page is free.
-@return true if free */
-UNIV_INTERN
-bool
-fsp_page_is_free_func(
-/*==============*/
- ulint space_id, /*!< in: space id */
- ulint page_no, /*!< in: page offset */
- mtr_t* mtr, /*!< in/out: mini-transaction */
- const char *file,
- unsigned line)
-{
- ut_ad(mtr);
-
- fil_space_t* space = mtr_x_lock_space(space_id, mtr);
- const page_size_t page_size(space->flags);
-
- xdes_t* descr = xdes_get_descriptor(space, page_no, page_size, mtr);
- ut_a(descr);
-
- return xdes_mtr_get_bit(
- descr, XDES_FREE_BIT, page_no % FSP_EXTENT_SIZE, mtr);
-}
diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
index faa00407397..02225d9f49f 100644
--- a/storage/innobase/handler/ha_innodb.cc
+++ b/storage/innobase/handler/ha_innodb.cc
@@ -351,10 +351,11 @@ thd_destructor_proxy(void *)
mysql_mutex_unlock(&thd_destructor_mutex);
srv_running = NULL;
- if (srv_fast_shutdown == 0) {
- while (trx_sys_any_active_transactions()) {
- os_thread_sleep(1000);
- }
+ while (srv_fast_shutdown == 0 &&
+ (trx_sys_any_active_transactions() ||
+ (uint)thread_count > srv_n_purge_threads + 1)) {
+ thd_proc_info(thd, "InnoDB slow shutdown wait");
+ os_thread_sleep(1000);
}
/* Some background threads might generate undo pages that will
@@ -631,7 +632,6 @@ static PSI_mutex_info all_innodb_mutexes[] = {
# endif /* UNIV_DEBUG */
PSI_KEY(rw_lock_list_mutex),
PSI_KEY(rw_lock_mutex),
- PSI_KEY(srv_dict_tmpfile_mutex),
PSI_KEY(srv_innodb_monitor_mutex),
PSI_KEY(srv_misc_tmpfile_mutex),
PSI_KEY(srv_monitor_file_mutex),
@@ -723,6 +723,7 @@ static PSI_file_info all_innodb_files[] = {
static void innodb_remember_check_sysvar_funcs();
mysql_var_check_func check_sysvar_enum;
+mysql_var_check_func check_sysvar_int;
// should page compression be used by default for new tables
static MYSQL_THDVAR_BOOL(compression_default, PLUGIN_VAR_OPCMDARG,
@@ -1745,8 +1746,9 @@ innobase_reset_background_thd(MYSQL_THD thd)
ut_ad(THDVAR(thd, background_thread));
/* background purge thread */
+ const char *proc_info= thd_proc_info(thd, "reset");
reset_thd(thd);
- thd_proc_info(thd, "");
+ thd_proc_info(thd, proc_info);
}
@@ -2164,15 +2166,21 @@ convert_error_code_to_mysql(
locally for BLOB fields. Refer to dict_table_get_format().
We limit max record size to 16k for 64k page size. */
bool prefix = (dict_tf_get_format(flags) == UNIV_FORMAT_A);
+ bool comp = !!(flags & DICT_TF_COMPACT);
+ ulint free_space = page_get_free_space_of_empty(comp) / 2;
+
+ if (free_space >= (comp ? COMPRESSED_REC_MAX_DATA_SIZE :
+ REDUNDANT_REC_MAX_DATA_SIZE)) {
+ free_space = (comp ? COMPRESSED_REC_MAX_DATA_SIZE :
+ REDUNDANT_REC_MAX_DATA_SIZE) - 1;
+ }
+
my_printf_error(ER_TOO_BIG_ROWSIZE,
- "Row size too large (> %lu). Changing some columns"
- " to TEXT or BLOB %smay help. In current row"
- " format, BLOB prefix of %d bytes is stored inline.",
+ "Row size too large (> " ULINTPF "). Changing some columns "
+ "to TEXT or BLOB %smay help. In current row "
+ "format, BLOB prefix of %d bytes is stored inline.",
MYF(0),
- srv_page_size == UNIV_PAGE_SIZE_MAX
- ? REC_MAX_DATA_SIZE - 1
- : page_get_free_space_of_empty(flags &
- DICT_TF_COMPACT) / 2,
+ free_space,
prefix
? "or using ROW_FORMAT=DYNAMIC or"
" ROW_FORMAT=COMPRESSED "
@@ -18302,6 +18310,34 @@ innodb_file_format_name_validate(
return(1);
}
+/*************************************************************//**
+Don't allow to set innodb_fast_shutdown=0 if purge threads are
+already down.
+@return 0 if innodb_fast_shutdown can be set */
+static
+int
+fast_shutdown_validate(
+/*=============================*/
+ THD* thd, /*!< in: thread handle */
+ struct st_mysql_sys_var* var, /*!< in: pointer to system
+ variable */
+ void* save, /*!< out: immediate result
+ for update function */
+ struct st_mysql_value* value) /*!< in: incoming string */
+{
+ if (check_sysvar_int(thd, var, save, value)) {
+ return(1);
+ }
+
+ uint new_val = *reinterpret_cast<uint*>(save);
+
+ if (srv_fast_shutdown && !new_val && !srv_running) {
+ return(1);
+ }
+
+ return(0);
+}
+
/****************************************************************//**
Update the system variable innodb_file_format using the "saved"
value. This function is registered as a callback with MySQL. */
@@ -20644,7 +20680,7 @@ static MYSQL_SYSVAR_UINT(fast_shutdown, srv_fast_shutdown,
PLUGIN_VAR_OPCMDARG,
"Speeds up the shutdown process of the InnoDB storage engine. Possible"
" values are 0, 1 (faster) or 2 (fastest - crash-like).",
- NULL, NULL, 1, 0, 2, 0);
+ fast_shutdown_validate, NULL, 1, 0, 2, 0);
static MYSQL_SYSVAR_BOOL(file_per_table, srv_file_per_table,
PLUGIN_VAR_NOCMDARG,
@@ -22940,6 +22976,9 @@ static void innodb_remember_check_sysvar_funcs()
/* remember build-in sysvar check functions */
ut_ad((MYSQL_SYSVAR_NAME(checksum_algorithm).flags & 0x1FF) == PLUGIN_VAR_ENUM);
check_sysvar_enum = MYSQL_SYSVAR_NAME(checksum_algorithm).check;
+
+ ut_ad((MYSQL_SYSVAR_NAME(flush_log_at_timeout).flags & 15) == PLUGIN_VAR_INT);
+ check_sysvar_int = MYSQL_SYSVAR_NAME(flush_log_at_timeout).check;
}
/********************************************************************//**
diff --git a/storage/innobase/handler/ha_innodb.h b/storage/innobase/handler/ha_innodb.h
index 82ba8bab6e6..d7f5d36a680 100644
--- a/storage/innobase/handler/ha_innodb.h
+++ b/storage/innobase/handler/ha_innodb.h
@@ -170,6 +170,10 @@ public:
int index_last(uchar * buf);
+ /* Copy a cached MySQL row. If requested, also avoids
+ overwriting non-read columns. */
+ void copy_cached_row(uchar *to_rec, const uchar *from_rec,
+ uint rec_length);
int rnd_init(bool scan);
int rnd_end();
diff --git a/storage/innobase/handler/ha_innopart.cc b/storage/innobase/handler/ha_innopart.cc
deleted file mode 100644
index fb6f4b89a41..00000000000
--- a/storage/innobase/handler/ha_innopart.cc
+++ /dev/null
@@ -1,4264 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2014, 2017, Oracle and/or its affiliates. All rights reserved.
-Copyright (c) 2016, 2017, MariaDB Corporation.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/** @file ha_innopart.cc
-Code for native partitioning in InnoDB.
-
-Created Nov 22, 2013 Mattias Jonsson */
-
-#include "univ.i"
-
-/* Include necessary SQL headers */
-#include <debug_sync.h>
-#include <log.h>
-#include <strfunc.h>
-#include <sql_acl.h>
-#include <sql_class.h>
-#include <sql_show.h>
-#include <sql_table.h>
-#include <my_check_opt.h>
-
-/* Include necessary InnoDB headers */
-#include "btr0sea.h"
-#include "dict0dict.h"
-#include "dict0stats.h"
-#include "lock0lock.h"
-#include "row0import.h"
-#include "row0merge.h"
-#include "row0mysql.h"
-#include "row0quiesce.h"
-#include "row0sel.h"
-#include "row0ins.h"
-#include "row0upd.h"
-#include "fsp0sysspace.h"
-#include "ut0ut.h"
-
-#include "ha_innodb.h"
-#include "ha_innopart.h"
-#include "partition_info.h"
-#include "key.h"
-
-#define INSIDE_HA_INNOPART_CC
-
-/* To be backwards compatible we also fold partition separator on windows. */
-#ifdef _WIN32
-static const char* part_sep = "#p#";
-static const char* sub_sep = "#sp#";
-#else
-static const char* part_sep = "#P#";
-static const char* sub_sep = "#SP#";
-#endif /* _WIN32 */
-
-/* Partition separator for *nix platforms */
-const char* part_sep_nix = "#P#";
-const char* sub_sep_nix = "#SP#";
-
-extern char* innobase_file_format_max;
-
-Ha_innopart_share::Ha_innopart_share(
- TABLE_SHARE* table_share)
- :
- Partition_share(),
- m_table_parts(),
- m_index_mapping(),
- m_tot_parts(),
- m_index_count(),
- m_ref_count(),
- m_table_share(table_share)
-{}
-
-Ha_innopart_share::~Ha_innopart_share()
-{
- ut_ad(m_ref_count == 0);
- if (m_table_parts != NULL) {
- ut_free(m_table_parts);
- m_table_parts = NULL;
- }
- if (m_index_mapping != NULL) {
- ut_free(m_index_mapping);
- m_index_mapping = NULL;
- }
-}
-
-/** Fold to lower case if windows or lower_case_table_names == 1.
-@param[in,out] s String to fold.*/
-void
-Ha_innopart_share::partition_name_casedn_str(
- char* s)
-{
-#ifdef _WIN32
- innobase_casedn_str(s);
-#endif
-}
-
-/** Translate and append partition name.
-@param[out] to String to write in filesystem charset
-@param[in] from Name in system charset
-@param[in] sep Separator
-@param[in] len Max length of to buffer
-@return length of written string. */
-size_t
-Ha_innopart_share::append_sep_and_name(
- char* to,
- const char* from,
- const char* sep,
- size_t len)
-{
- size_t ret;
- size_t sep_len = strlen(sep);
-
- ut_ad(len > sep_len + strlen(from));
- ut_ad(to != NULL);
- ut_ad(from != NULL);
- ut_ad(from[0] != '\0');
- memcpy(to, sep, sep_len);
-
- ret = tablename_to_filename(from, to + sep_len,
- len - sep_len);
-
- /* Don't convert to lower case for nix style name. */
- if (strcmp(sep, part_sep_nix) != 0
- && strcmp(sep, sub_sep_nix) != 0) {
-
- partition_name_casedn_str(to);
- }
-
- return(ret + sep_len);
-}
-
-/** Copy a cached MySQL row.
-If requested, also avoids overwriting non-read columns.
-@param[out] buf Row in MySQL format.
-@param[in] cached_row Which row to copy. */
-inline
-void
-ha_innopart::copy_cached_row(
- uchar* buf,
- const uchar* cached_row)
-{
- if (m_prebuilt->keep_other_fields_on_keyread) {
- row_sel_copy_cached_fields_for_mysql(buf, cached_row,
- m_prebuilt);
- } else {
- memcpy(buf, cached_row, m_rec_length);
- }
-}
-
-/** Open one partition.
-@param[in] part_id Partition id to open.
-@param[in] partition_name Name of internal innodb table to open.
-@return false on success else true. */
-bool
-Ha_innopart_share::open_one_table_part(
- uint part_id,
- const char* partition_name)
-{
- char norm_name[FN_REFLEN];
-
- normalize_table_name(norm_name, partition_name);
- m_table_parts[part_id] =
- ha_innobase::open_dict_table(partition_name, norm_name,
- TRUE, DICT_ERR_IGNORE_NONE);
-
- if (m_table_parts[part_id] == NULL) {
- return(true);
- }
-
- dict_table_t *ib_table = m_table_parts[part_id];
- if ((!DICT_TF2_FLAG_IS_SET(ib_table, DICT_TF2_FTS_HAS_DOC_ID)
- && m_table_share->fields
- != (dict_table_get_n_user_cols(ib_table)
- + dict_table_get_n_v_cols(ib_table)))
- || (DICT_TF2_FLAG_IS_SET(ib_table, DICT_TF2_FTS_HAS_DOC_ID)
- && (m_table_share->fields
- != dict_table_get_n_user_cols(ib_table)
- + dict_table_get_n_v_cols(ib_table) - 1))) {
- ib::warn() << "Partition `" << get_partition_name(part_id)
- << "` contains " << dict_table_get_n_user_cols(ib_table)
- << " user defined columns in InnoDB, but "
- << m_table_share->fields
- << " columns in MySQL. Please check"
- " INFORMATION_SCHEMA.INNODB_SYS_COLUMNS and " REFMAN
- "innodb-troubleshooting.html for how to resolve the"
- " issue.";
-
- /* Mark this partition as corrupted, so the drop table
- or force recovery can still use it, but not others.
- TODO: persist table->corrupted so it will be retained on
- restart and out-of-bounds operations will see it. */
-
- ib_table->corrupted = true;
- dict_table_close(ib_table, FALSE, FALSE);
- }
-
- /* TODO: To save memory, compare with first partition and reuse
- the column names etc. in the internal InnoDB meta-data cache. */
-
- return(false);
-}
-
-/** Set up the virtual column template for partition table, and points
-all m_table_parts[]->vc_templ to it.
-@param[in] table MySQL TABLE object
-@param[in] ib_table InnoDB dict_table_t
-@param[in] table_name Table name (db/table_name) */
-void
-Ha_innopart_share::set_v_templ(
- TABLE* table,
- dict_table_t* ib_table,
- const char* name)
-{
- ut_ad(mutex_own(&dict_sys->mutex));
-
- if (ib_table->n_v_cols > 0) {
- for (ulint i = 0; i < m_tot_parts; i++) {
- if (m_table_parts[i]->vc_templ == NULL) {
- m_table_parts[i]->vc_templ
- = UT_NEW_NOKEY(dict_vcol_templ_t());
- m_table_parts[i]->vc_templ->vtempl = NULL;
- } else if (m_table_parts[i]->get_ref_count() == 1) {
- /* Clean and refresh the template */
- dict_free_vc_templ(m_table_parts[i]->vc_templ);
- m_table_parts[i]->vc_templ->vtempl = NULL;
- }
-
- if (m_table_parts[i]->vc_templ->vtempl == NULL) {
- innobase_build_v_templ(
- table, ib_table,
- m_table_parts[i]->vc_templ,
- NULL, true, name);
- }
- }
- }
-}
-
-/** Initialize the share with table and indexes per partition.
-@param[in] part_info Partition info (partition names to use).
-@param[in] table_name Table name (db/table_name).
-@return false on success else true. */
-bool
-Ha_innopart_share::open_table_parts(
- partition_info* part_info,
- const char* table_name)
-{
- size_t table_name_len;
- size_t len;
- uint ib_num_index;
- uint mysql_num_index;
- char partition_name[FN_REFLEN];
- bool index_loaded = true;
-
-#ifndef DBUG_OFF
- if (m_table_share->tmp_table == NO_TMP_TABLE) {
- mysql_mutex_assert_owner(&m_table_share->LOCK_ha_data);
- }
-#endif /* DBUG_OFF */
- m_ref_count++;
- if (m_table_parts != NULL) {
- ut_ad(m_ref_count > 1);
- ut_ad(m_tot_parts > 0);
-
- /* Increment dict_table_t reference count for all partitions */
- mutex_enter(&dict_sys->mutex);
- for (uint i = 0; i < m_tot_parts; i++) {
- dict_table_t* table = m_table_parts[i];
- table->acquire();
- ut_ad(table->get_ref_count() >= m_ref_count);
- }
- mutex_exit(&dict_sys->mutex);
-
- return(false);
- }
- ut_ad(m_ref_count == 1);
- m_tot_parts = part_info->get_tot_partitions();
- size_t table_parts_size = sizeof(dict_table_t*) * m_tot_parts;
- m_table_parts = static_cast<dict_table_t**>(
- ut_zalloc(table_parts_size, mem_key_partitioning));
- if (m_table_parts == NULL) {
- m_ref_count--;
- return(true);
- }
-
- /* Set up the array over all table partitions. */
- table_name_len = strlen(table_name);
- memcpy(partition_name, table_name, table_name_len);
- List_iterator<partition_element>
- part_it(part_info->partitions);
- partition_element* part_elem;
- uint i = 0;
-
- while ((part_elem = part_it++)) {
- len = append_sep_and_name(
- partition_name + table_name_len,
- part_elem->partition_name,
- part_sep_nix,
- FN_REFLEN - table_name_len);
- if (part_info->is_sub_partitioned()) {
- List_iterator<partition_element>
- sub_it(part_elem->subpartitions);
- partition_element* sub_elem;
- while ((sub_elem = sub_it++)) {
- append_sep_and_name(
- partition_name
- + table_name_len + len,
- sub_elem->partition_name,
- sub_sep_nix,
- FN_REFLEN - table_name_len - len);
- if (open_one_table_part(i, partition_name)) {
- goto err;
- }
- i++;
- }
- } else {
- if (open_one_table_part(i, partition_name)) {
- goto err;
- }
- i++;
- }
- }
- ut_ad(i == m_tot_parts);
-
- /* Create the mapping of mysql index number to innodb indexes. */
-
- ib_num_index = (uint) UT_LIST_GET_LEN(m_table_parts[0]->indexes);
- mysql_num_index = part_info->table->s->keys;
-
- /* If there exists inconsistency between MySQL and InnoDB dictionary
- (metadata) information, the number of index defined in MySQL
- could exceed that in InnoDB, do not build index translation
- table in such case. */
-
- if (ib_num_index < mysql_num_index) {
- ut_ad(0);
- goto err;
- }
-
- if (mysql_num_index != 0) {
- size_t alloc_size = mysql_num_index * m_tot_parts
- * sizeof(*m_index_mapping);
- m_index_mapping = static_cast<dict_index_t**>(
- ut_zalloc(alloc_size, mem_key_partitioning));
- if (m_index_mapping == NULL) {
-
- /* Report an error if index_mapping continues to be
- NULL and mysql_num_index is a non-zero value. */
-
- ib::error() << "Failed to allocate memory for"
- " index translation table. Number of"
- " Index:" << mysql_num_index;
- goto err;
- }
- }
-
- /* For each index in the mysql key_info array, fetch its
- corresponding InnoDB index pointer into index_mapping
- array. */
-
- for (ulint idx = 0; idx < mysql_num_index; idx++) {
- for (ulint part = 0; part < m_tot_parts; part++) {
- ulint count = part * mysql_num_index + idx;
-
- /* Fetch index pointers into index_mapping according
- to mysql index sequence. */
-
- m_index_mapping[count] = dict_table_get_index_on_name(
- m_table_parts[part],
- part_info->table->key_info[idx].name);
-
- if (m_index_mapping[count] == NULL) {
- ib::error() << "Cannot find index `"
- << part_info->table->key_info[idx].name
- << "` in InnoDB index dictionary"
- " partition `"
- << get_partition_name(part) << "`.";
- index_loaded = false;
- break;
- }
-
- /* Double check fetched index has the same
- column info as those in mysql key_info. */
-
- if (!innobase_match_index_columns(
- &part_info->table->key_info[idx],
- m_index_mapping[count])) {
- ib::error() << "Found index `"
- << part_info->table->key_info[idx].name
- << "` whose column info does not match"
- " that of MySQL.";
- index_loaded = false;
- break;
- }
- }
- }
- if (!index_loaded && m_index_mapping != NULL) {
- ut_free(m_index_mapping);
- m_index_mapping = NULL;
- }
-
- /* Successfully built the translation table. */
- m_index_count = mysql_num_index;
-
- return(false);
-err:
- close_table_parts();
-
- return(true);
-}
-
-/** Close all partitions. */
-void
-Ha_innopart_share::close_table_parts()
-{
-#ifndef DBUG_OFF
- if (m_table_share->tmp_table == NO_TMP_TABLE) {
- mysql_mutex_assert_owner(&m_table_share->LOCK_ha_data);
- }
-#endif /* DBUG_OFF */
- m_ref_count--;
- if (m_ref_count != 0) {
-
- /* Decrement dict_table_t reference count for all partitions */
- mutex_enter(&dict_sys->mutex);
- for (uint i = 0; i < m_tot_parts; i++) {
- dict_table_t* table = m_table_parts[i];
- table->release();
- ut_ad(table->get_ref_count() >= m_ref_count);
- }
- mutex_exit(&dict_sys->mutex);
-
- return;
- }
-
- /* Last instance closed, close all table partitions and
- free the memory. */
-
- mutex_enter(&dict_sys->mutex);
- if (m_table_parts != NULL) {
- for (uint i = 0; i < m_tot_parts; i++) {
- if (m_table_parts[i] != NULL) {
- dict_table_close(m_table_parts[i], TRUE, TRUE);
- }
- }
- ut_free(m_table_parts);
- m_table_parts = NULL;
- }
- mutex_exit(&dict_sys->mutex);
- if (m_index_mapping != NULL) {
- ut_free(m_index_mapping);
- m_index_mapping = NULL;
- }
-
- m_tot_parts = 0;
- m_index_count = 0;
-}
-
-/** Get index.
-Find the index of the specified partition and key number.
-@param[in] part_id Partition number.
-@param[in] keynr Key number.
-@return Index pointer or NULL. */
-inline
-dict_index_t*
-Ha_innopart_share::get_index(
- uint part_id,
- uint keynr)
-{
- ut_a(part_id < m_tot_parts);
- ut_ad(keynr < m_index_count || keynr == MAX_KEY);
- if (m_index_mapping == NULL
- || keynr >= m_index_count) {
-
- if (keynr == MAX_KEY) {
- return(dict_table_get_first_index(
- get_table_part(part_id)));
- }
- return(NULL);
- }
- return(m_index_mapping[m_index_count * part_id + keynr]);
-}
-
-/** Get MySQL key number corresponding to InnoDB index.
-Calculates the key number used inside MySQL for an Innobase index. We will
-first check the "index translation table" for a match of the index to get
-the index number. If there does not exist an "index translation table",
-or not able to find the index in the translation table, then we will fall back
-to the traditional way of looping through dict_index_t list to find a
-match. In this case, we have to take into account if we generated a
-default clustered index for the table
-@param[in] part_id Partition the index belongs to.
-@param[in] index Index to return MySQL key number for.
-@return the key number used inside MySQL or UINT_MAX if key is not found. */
-inline
-uint
-Ha_innopart_share::get_mysql_key(
- uint part_id,
- const dict_index_t* index)
-{
- ut_ad(index != NULL);
- ut_ad(m_index_mapping != NULL);
- ut_ad(m_tot_parts);
-
- if (index != NULL && m_index_mapping != NULL) {
- uint start;
- uint end;
-
- if (part_id < m_tot_parts) {
- start = part_id * m_index_count;
- end = start + m_index_count;
- } else {
- start = 0;
- end = m_tot_parts * m_index_count;
- }
- for (uint i = start; i < end; i++) {
- if (m_index_mapping[i] == index) {
- return(i % m_index_count);
- }
- }
-
- /* Print an error message if we cannot find the index
- in the "index translation table". */
-
- if (index->is_committed()) {
- ib::error() << "Cannot find index "
- << index->name
- << " in InnoDB index translation table.";
- }
- }
-
- return(UINT_MAX);
-}
-
-/** Helper function for set bit in bitmap.
-@param[in,out] buf Bitmap buffer to update bit in.
-@param[in] bit_pos Bit number (index starts at 0). */
-static
-inline
-void
-set_bit(
- byte* buf,
- size_t pos)
-{
- buf[pos/8] |= (0x1 << (pos & 0x7));
-}
-
-/** Helper function for clear bit in bitmap.
-@param[in,out] buf Bitmap buffer to update bit in.
-@param[in] bit_pos Bit number (index starts at 0). */
-static
-inline
-void
-clear_bit(
- byte* buf,
- size_t pos)
-{
- buf[pos/8] &= ~(0x1 << (pos & 0x7));
-}
-
-/** Helper function for get bit in bitmap.
-@param[in,out] buf Bitmap buffer.
-@param[in] bit_pos Bit number (index starts at 0).
-@return byte set to 0x0 or 0x1.
-@retval 0x0 bit not set.
-@retval 0x1 bet set. */
-static
-inline
-byte
-get_bit(
- byte* buf,
- size_t pos)
-{
- return((buf[pos/8] >> (pos & 0x7)) & 0x1);
-}
-
-/** Helper class for encapsulating new/altered partitions during
-ADD/REORG/... PARTITION. */
-class Altered_partitions
-{
-private:
- /** New partitions during ADD/REORG/... PARTITION. */
- dict_table_t** m_new_table_parts;
-
- /** Insert nodes per partition. */
- ins_node_t** m_ins_nodes;
-
- /** sql_stat_start per partition. */
- byte* m_sql_stat_start;
-
- /** Trx id per partition. */
- trx_id_t* m_trx_ids;
-
- /** Number of new partitions. */
- size_t m_num_new_parts;
-
- /** Only need to create the partitions (no open/lock). */
- bool m_only_create;
-
-public:
- Altered_partitions(
- uint n_partitions,
- bool only_create);
-
- ~Altered_partitions();
-
- bool
- initialize();
-
- bool
- only_create() const
- {
- return(m_only_create);
- }
-
- /** Set currently used partition.
- @param[in] new_part_id Partition id to set.
- @param[in] part InnoDB table to use. */
- inline
- void
- set_part(
- ulint new_part_id,
- dict_table_t* part)
- {
- ut_ad(m_new_table_parts[new_part_id] == NULL);
- m_new_table_parts[new_part_id] = part;
- set_bit(m_sql_stat_start, new_part_id);
- }
-
- /** Get lower level InnoDB table for partition.
- @param[in] part_id Partition id.
- @return Lower level InnoDB table for the partition id. */
- inline
- dict_table_t*
- part(
- uint part_id) const
- {
- ut_ad(part_id < m_num_new_parts);
- return(m_new_table_parts[part_id]);
- }
-
- /** Set up prebuilt for using a specified partition.
- @param[in] prebuilt Prebuilt to update.
- @param[in] new_part_id Partition to use. */
- inline
- void
- get_prebuilt(
- row_prebuilt_t* prebuilt,
- uint new_part_id) const
- {
- ut_ad(m_new_table_parts[new_part_id]);
- prebuilt->table = m_new_table_parts[new_part_id];
- prebuilt->ins_node = m_ins_nodes[new_part_id];
- prebuilt->trx_id = m_trx_ids[new_part_id];
- prebuilt->sql_stat_start = get_bit(m_sql_stat_start,
- new_part_id);
- }
-
- /** Update cached values for a partition from prebuilt.
- @param[in] prebuilt Prebuilt to copy from.
- @param[in] new_part_id Partition id to copy. */
- inline
- void
- set_from_prebuilt(
- row_prebuilt_t* prebuilt,
- uint new_part_id)
- {
- ut_ad(m_new_table_parts[new_part_id] == prebuilt->table);
- m_ins_nodes[new_part_id] = prebuilt->ins_node;
- m_trx_ids[new_part_id] = prebuilt->trx_id;
- if (prebuilt->sql_stat_start == 0) {
- clear_bit(m_sql_stat_start, new_part_id);
- }
- }
-};
-
-Altered_partitions::Altered_partitions(
- uint n_partitions,
- bool only_create)
- :
- m_new_table_parts(),
- m_ins_nodes(),
- m_sql_stat_start(),
- m_trx_ids(),
- m_num_new_parts(n_partitions),
- m_only_create(only_create)
- {}
-
-Altered_partitions::~Altered_partitions()
-{
- if (m_new_table_parts != NULL) {
- for (ulint i = 0; i < m_num_new_parts; i++) {
- if (m_new_table_parts[i] != NULL) {
- dict_table_close(m_new_table_parts[i],
- false, true);
- }
- }
- ut_free(m_new_table_parts);
- m_new_table_parts = NULL;
- }
- if (m_ins_nodes != NULL) {
- for (ulint i = 0; i < m_num_new_parts; i++) {
- if (m_ins_nodes[i] != NULL) {
- ins_node_t* ins = m_ins_nodes[i];
- ut_ad(ins->select == NULL);
- que_graph_free_recursive(ins->select);
- ins->select = NULL;
- if (ins->entry_sys_heap != NULL) {
- mem_heap_free(ins->entry_sys_heap);
- ins->entry_sys_heap = NULL;
- }
- }
- }
- ut_free(m_ins_nodes);
- m_ins_nodes = NULL;
- }
- if (m_sql_stat_start != NULL) {
- ut_free(m_sql_stat_start);
- m_sql_stat_start = NULL;
- }
- if (m_trx_ids != NULL) {
- ut_free(m_trx_ids);
- m_trx_ids = NULL;
- }
-}
-
-/** Initialize the object.
-@return false on success else true. */
-bool
-Altered_partitions::initialize()
-{
- size_t alloc_size = sizeof(*m_new_table_parts) * m_num_new_parts;
- m_new_table_parts = static_cast<dict_table_t**>(
- ut_zalloc(alloc_size, mem_key_partitioning));
- if (m_new_table_parts == NULL) {
- return(true);
- }
-
- alloc_size = sizeof(*m_ins_nodes) * m_num_new_parts;
- m_ins_nodes = static_cast<ins_node_t**>(
- ut_zalloc(alloc_size, mem_key_partitioning));
- if (m_ins_nodes == NULL) {
- ut_free(m_new_table_parts);
- m_new_table_parts = NULL;
- return(true);
- }
-
- alloc_size = sizeof(*m_sql_stat_start)
- * UT_BITS_IN_BYTES(m_num_new_parts);
- m_sql_stat_start = static_cast<byte*>(
- ut_zalloc(alloc_size, mem_key_partitioning));
- if (m_sql_stat_start == NULL) {
- ut_free(m_new_table_parts);
- m_new_table_parts = NULL;
- ut_free(m_ins_nodes);
- m_ins_nodes = NULL;
- return(true);
- }
-
- alloc_size = sizeof(*m_trx_ids) * m_num_new_parts;
- m_trx_ids = static_cast<trx_id_t*>(
- ut_zalloc(alloc_size, mem_key_partitioning));
- if (m_trx_ids == NULL) {
- ut_free(m_new_table_parts);
- m_new_table_parts = NULL;
- ut_free(m_ins_nodes);
- m_ins_nodes = NULL;
- ut_free(m_sql_stat_start);
- m_sql_stat_start = NULL;
- return(true);
- }
-
- return(false);
-}
-
-/** Construct ha_innopart handler.
-@param[in] hton Handlerton.
-@param[in] table_arg MySQL Table.
-@return a new ha_innopart handler. */
-ha_innopart::ha_innopart(
- handlerton* hton,
- TABLE_SHARE* table_arg)
- :
- ha_innobase(hton, table_arg),
- Partition_helper(this),
- m_ins_node_parts(),
- m_upd_node_parts(),
- m_blob_heap_parts(),
- m_trx_id_parts(),
- m_row_read_type_parts(),
- m_sql_stat_start_parts(),
- m_pcur(),
- m_clust_pcur(),
- m_new_partitions()
-{
- m_int_table_flags &= ~(HA_INNOPART_DISABLED_TABLE_FLAGS);
-
- /* INNOBASE_SHARE is not used in ha_innopart.
- This also flags for ha_innobase that it is a partitioned table.
- And make it impossible to use legacy share functionality. */
-
- m_share = NULL;
-}
-
-/** Destruct ha_innopart handler. */
-ha_innopart::~ha_innopart()
-{}
-
-/** Returned supported alter table flags.
-@param[in] flags Flags to support.
-@return Supported flags. */
-uint
-ha_innopart::alter_table_flags(
- uint flags)
-{
- return(HA_PARTITION_FUNCTION_SUPPORTED | HA_FAST_CHANGE_PARTITION);
-}
-
-/** Set the autoinc column max value.
-This should only be called once from ha_innobase::open().
-Therefore there's no need for a covering lock.
-@param[in] no_lock Ignored!
-@return 0 for success or error code. */
-inline
-int
-ha_innopart::initialize_auto_increment(
- bool /* no_lock */)
-{
- int error = 0;
- ulonglong auto_inc = 0;
- const Field* field = table->found_next_number_field;
-
-#ifndef DBUG_OFF
- if (table_share->tmp_table == NO_TMP_TABLE)
- {
- mysql_mutex_assert_owner(m_part_share->auto_inc_mutex);
- }
-#endif
-
- /* Since a table can already be "open" in InnoDB's internal
- data dictionary, we only init the autoinc counter once, the
- first time the table is loaded. We can safely reuse the
- autoinc value from a previous MySQL open. */
-
- if (m_part_share->auto_inc_initialized) {
- /* Already initialized, nothing to do. */
- return(0);
- }
-
- if (field == NULL) {
- ib::info() << "Unable to determine the AUTOINC column name";
- }
-
- if (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE) {
- /* If the recovery level is set so high that writes
- are disabled we force the AUTOINC counter to 0
- value effectively disabling writes to the table.
- Secondly, we avoid reading the table in case the read
- results in failure due to a corrupted table/index.
-
- We will not return an error to the client, so that the
- tables can be dumped with minimal hassle. If an error
- were returned in this case, the first attempt to read
- the table would fail and subsequent SELECTs would succeed. */
-
- } else if (field == NULL) {
- /* This is a far more serious error, best to avoid
- opening the table and return failure. */
-
- my_error(ER_AUTOINC_READ_FAILED, MYF(0));
- error = HA_ERR_AUTOINC_READ_FAILED;
- } else {
- ib_uint64_t col_max_value = field->get_max_int_value();
-
- update_thd(ha_thd());
-
- for (uint part = 0; part < m_tot_parts; part++) {
- dict_table_t* ib_table
- = m_part_share->get_table_part(part);
- dict_table_autoinc_lock(ib_table);
- ut_ad(ib_table->persistent_autoinc);
- ib_uint64_t read_auto_inc
- = dict_table_autoinc_read(ib_table);
- if (read_auto_inc == 0) {
- read_auto_inc = btr_read_autoinc(
- dict_table_get_first_index(ib_table));
-
- /* At the this stage we do not know the
- increment nor the offset,
- so use a default increment of 1. */
-
- read_auto_inc = innobase_next_autoinc(
- read_auto_inc, 1, 1, 0, col_max_value);
- dict_table_autoinc_initialize(ib_table,
- read_auto_inc);
- }
- set_if_bigger(auto_inc, read_auto_inc);
- dict_table_autoinc_unlock(ib_table);
- }
- }
-
-done:
- m_part_share->next_auto_inc_val = auto_inc;
- m_part_share->auto_inc_initialized = true;
- return(error);
-}
-
-/** Opens a partitioned InnoDB table.
-Initializes needed data and opens the table which already exists
-in an InnoDB database.
-@param[in] name Table name (db/tablename)
-@param[in] mode Not used
-@param[in] test_if_locked Not used
-@return 0 or error number. */
-int
-ha_innopart::open(
- const char* name,
- int /*mode*/,
- uint /*test_if_locked*/)
-{
- dict_table_t* ib_table;
- char norm_name[FN_REFLEN];
-
- DBUG_ENTER("ha_innopart::open");
-
- ut_ad(table);
- if (m_part_info == NULL) {
- /* Must be during ::clone()! */
- ut_ad(table->part_info != NULL);
- m_part_info = table->part_info;
- }
-
- /* Under some cases MySQL seems to call this function while
- holding search latch(es). This breaks the latching order as
- we acquire dict_sys->mutex below and leads to a deadlock. */
-
- normalize_table_name(norm_name, name);
-
- m_user_thd = NULL;
-
- /* Get the Ha_innopart_share from the TABLE_SHARE. */
- lock_shared_ha_data();
- m_part_share = static_cast<Ha_innopart_share*>(get_ha_share_ptr());
- if (m_part_share == NULL) {
- m_part_share = new (std::nothrow)
- Ha_innopart_share(table_share);
- if (m_part_share == NULL) {
-share_error:
- unlock_shared_ha_data();
- DBUG_RETURN(HA_ERR_INTERNAL_ERROR);
- }
- set_ha_share_ptr(static_cast<Handler_share*>(m_part_share));
- }
- if (m_part_share->open_table_parts(m_part_info, name)
- || m_part_share->populate_partition_name_hash(m_part_info)) {
- goto share_error;
- }
- if (m_part_share->auto_inc_mutex == NULL
- && table->found_next_number_field != NULL) {
- if (m_part_share->init_auto_inc_mutex(table_share)) {
- goto share_error;
- }
- }
- unlock_shared_ha_data();
-
- /* Will be allocated if it is needed in ::update_row(). */
- m_upd_buf = NULL;
- m_upd_buf_size = 0;
-
- /* Get pointer to a table object in InnoDB dictionary cache. */
- ib_table = m_part_share->get_table_part(0);
-
- m_pcur_parts = NULL;
- m_clust_pcur_parts = NULL;
- m_pcur_map = NULL;
-
- /* TODO: Handle mismatching #P# vs #p# in upgrading to new DD instead!
- See bug#58406, The problem exists when moving partitioned tables
- between Windows and Unix-like platforms. InnoDB always folds the name
- on windows, partitioning never folds partition (and #P# separator).
- I.e. non of it follows lower_case_table_names correctly :( */
-
- if (open_partitioning(m_part_share))
- {
- close();
- DBUG_RETURN(HA_ERR_INITIALIZATION);
- }
-
- /* Currently we track statistics for all partitions, but for
- the secondary indexes we only use the biggest partition. */
-
- for (uint part_id = 0; part_id < m_tot_parts; part_id++) {
- innobase_copy_frm_flags_from_table_share(
- m_part_share->get_table_part(part_id),
- table->s);
- dict_stats_init(m_part_share->get_table_part(part_id));
- }
-
- MONITOR_INC(MONITOR_TABLE_OPEN);
-
- bool no_tablespace;
- THD* thd = ha_thd();
-
- /* TODO: Should we do this check for every partition during ::open()? */
- /* TODO: refactor this in ha_innobase so it can increase code reuse. */
- if (dict_table_is_discarded(ib_table)) {
-
- ib_senderrf(thd,
- IB_LOG_LEVEL_WARN, ER_TABLESPACE_DISCARDED,
- table->s->table_name.str);
-
- /* Allow an open because a proper DISCARD should have set
- all the flags and index root page numbers to FIL_NULL that
- should prevent any DML from running but it should allow DDL
- operations. */
-
- no_tablespace = false;
-
- } else if (ib_table->ibd_file_missing) {
-
- ib_senderrf(
- thd, IB_LOG_LEVEL_WARN,
- ER_TABLESPACE_MISSING, norm_name);
-
- /* This means we have no idea what happened to the tablespace
- file, best to play it safe. */
-
- no_tablespace = true;
- } else {
- no_tablespace = false;
- }
-
- if (!thd_tablespace_op(thd) && no_tablespace) {
- set_my_errno(ENOENT);
-
- lock_shared_ha_data();
- m_part_share->close_table_parts();
- unlock_shared_ha_data();
- m_part_share = NULL;
-
- DBUG_RETURN(HA_ERR_NO_SUCH_TABLE);
- }
-
- m_prebuilt = row_create_prebuilt(ib_table, table->s->reclength);
-
- m_prebuilt->default_rec = table->s->default_values;
- ut_ad(m_prebuilt->default_rec);
-
- DBUG_ASSERT(table != NULL);
- m_prebuilt->m_mysql_table = table;
-
- if (ib_table->n_v_cols > 0) {
- mutex_enter(&dict_sys->mutex);
- m_part_share->set_v_templ(table, ib_table, name);
- mutex_exit(&dict_sys->mutex);
- }
-
- /* Looks like MySQL-3.23 sometimes has primary key number != 0. */
- m_primary_key = table->s->primary_key;
- key_used_on_scan = m_primary_key;
-
- /* Allocate a buffer for a 'row reference'. A row reference is
- a string of bytes of length ref_length which uniquely specifies
- a row in our table. Note that MySQL may also compare two row
- references for equality by doing a simple memcmp on the strings
- of length ref_length! */
-
- if (!row_table_got_default_clust_index(ib_table)) {
-
- m_prebuilt->clust_index_was_generated = FALSE;
-
- if (UNIV_UNLIKELY(m_primary_key >= MAX_KEY)) {
- table_name_t table_name;
- table_name.m_name = const_cast<char*>(name);
- ib::error() << "Table " << table_name
- << " has a primary key in InnoDB data"
- " dictionary, but not in MySQL!";
-
- /* This mismatch could cause further problems
- if not attended, bring this to the user's attention
- by printing a warning in addition to log a message
- in the errorlog. */
-
- push_warning_printf(thd, Sql_condition::SL_WARNING,
- ER_NO_SUCH_INDEX,
- "Table %s has a"
- " primary key in InnoDB data"
- " dictionary, but not in"
- " MySQL!", name);
-
- /* If m_primary_key >= MAX_KEY, its (m_primary_key)
- value could be out of bound if continue to index
- into key_info[] array. Find InnoDB primary index,
- and assign its key_length to ref_length.
- In addition, since MySQL indexes are sorted starting
- with primary index, unique index etc., initialize
- ref_length to the first index key length in
- case we fail to find InnoDB cluster index.
-
- Please note, this will not resolve the primary
- index mismatch problem, other side effects are
- possible if users continue to use the table.
- However, we allow this table to be opened so
- that user can adopt necessary measures for the
- mismatch while still being accessible to the table
- date. */
-
- if (table->key_info == NULL) {
- ut_ad(table->s->keys == 0);
- ref_length = 0;
- } else {
- ref_length = table->key_info[0].key_length;
- }
-
- /* Find corresponding cluster index
- key length in MySQL's key_info[] array. */
-
- for (uint i = 0; i < table->s->keys; i++) {
- dict_index_t* index;
- index = innopart_get_index(0, i);
- if (dict_index_is_clust(index)) {
- ref_length =
- table->key_info[i].key_length;
- }
- }
- ut_a(ref_length);
- ref_length += PARTITION_BYTES_IN_POS;
- } else {
- /* MySQL allocates the buffer for ref.
- key_info->key_length includes space for all key
- columns + one byte for each column that may be
- NULL. ref_length must be as exact as possible to
- save space, because all row reference buffers are
- allocated based on ref_length. */
-
- ref_length = table->key_info[m_primary_key].key_length;
- ref_length += PARTITION_BYTES_IN_POS;
- }
- } else {
- if (m_primary_key != MAX_KEY) {
- table_name_t table_name;
- table_name.m_name = const_cast<char*>(name);
- ib::error() << "Table " << table_name
- << " has no primary key in InnoDB data"
- " dictionary, but has one in MySQL! If you"
- " created the table with a MySQL version <"
- " 3.23.54 and did not define a primary key,"
- " but defined a unique key with all non-NULL"
- " columns, then MySQL internally treats that"
- " key as the primary key. You can fix this"
- " error by dump + DROP + CREATE + reimport"
- " of the table.";
-
- /* This mismatch could cause further problems
- if not attended, bring this to the user attention
- by printing a warning in addition to log a message
- in the errorlog. */
-
- push_warning_printf(thd, Sql_condition::SL_WARNING,
- ER_NO_SUCH_INDEX,
- "InnoDB: Table %s has no"
- " primary key in InnoDB data"
- " dictionary, but has one in"
- " MySQL!", name);
- }
-
- m_prebuilt->clust_index_was_generated = TRUE;
-
- ref_length = DATA_ROW_ID_LEN;
- ref_length += PARTITION_BYTES_IN_POS;
-
- /* If we automatically created the clustered index, then
- MySQL does not know about it, and MySQL must NOT be aware
- of the index used on scan, to make it avoid checking if we
- update the column of the index. That is why we assert below
- that key_used_on_scan is the undefined value MAX_KEY.
- The column is the row id in the automatical generation case,
- and it will never be updated anyway. */
-
- if (key_used_on_scan != MAX_KEY) {
- table_name_t table_name;
- table_name.m_name = const_cast<char*>(name);
- ib::warn() << "Table " << table_name
- << " key_used_on_scan is "
- << key_used_on_scan << " even though there is"
- " no primary key inside InnoDB.";
- }
- }
-
- /* Index block size in InnoDB: used by MySQL in query optimization. */
- stats.block_size = UNIV_PAGE_SIZE;
-
- if (m_prebuilt->table != NULL) {
- /* We update the highest file format in the system table
- space, if this table has higher file format setting. */
-
- trx_sys_file_format_max_upgrade(
- (const char**) &innobase_file_format_max,
- dict_table_get_format(m_prebuilt->table));
- }
-
- /* Only if the table has an AUTOINC column. */
- if (m_prebuilt->table != NULL
- && !m_prebuilt->table->ibd_file_missing
- && table->found_next_number_field != NULL) {
- int error;
-
- /* Since a table can already be "open" in InnoDB's internal
- data dictionary, we only init the autoinc counter once, the
- first time the table is loaded,
- see ha_innopart::initialize_auto_increment.
- We can safely reuse the autoinc value from a previous MySQL
- open. */
-
- lock_auto_increment();
- error = initialize_auto_increment(false);
- unlock_auto_increment();
- if (error != 0) {
- close();
- DBUG_RETURN(error);
- }
- }
-
-#ifdef HA_INNOPART_SUPPORTS_FULLTEXT
- /* Set plugin parser for fulltext index. */
- for (uint i = 0; i < table->s->keys; i++) {
- if (table->key_info[i].flags & HA_USES_PARSER) {
- dict_index_t* index = innobase_get_index(i);
- plugin_ref parser = table->key_info[i].parser;
-
- ut_ad(index->type & DICT_FTS);
- index->parser =
- static_cast<st_mysql_ftparser *>(
- plugin_decl(parser)->info);
-
- DBUG_EXECUTE_IF("fts_instrument_use_default_parser",
- index->parser = &fts_default_parser;);
- }
- }
-#endif /* HA_INNOPART_SUPPORTS_FULLTEXT */
-
- size_t alloc_size = sizeof(*m_ins_node_parts) * m_tot_parts;
- m_ins_node_parts = static_cast<ins_node_t**>(
- ut_zalloc(alloc_size, mem_key_partitioning));
-
- alloc_size = sizeof(*m_upd_node_parts) * m_tot_parts;
- m_upd_node_parts = static_cast<upd_node_t**>(
- ut_zalloc(alloc_size, mem_key_partitioning));
-
- alloc_blob_heap_array();
-
- alloc_size = sizeof(*m_trx_id_parts) * m_tot_parts;
- m_trx_id_parts = static_cast<trx_id_t*>(
- ut_zalloc(alloc_size, mem_key_partitioning));
-
- alloc_size = sizeof(*m_row_read_type_parts) * m_tot_parts;
- m_row_read_type_parts = static_cast<ulint*>(
- ut_zalloc(alloc_size, mem_key_partitioning));
-
- alloc_size = UT_BITS_IN_BYTES(m_tot_parts);
- m_sql_stat_start_parts = static_cast<uchar*>(
- ut_zalloc(alloc_size, mem_key_partitioning));
- if (m_ins_node_parts == NULL
- || m_upd_node_parts == NULL
- || m_blob_heap_parts == NULL
- || m_trx_id_parts == NULL
- || m_row_read_type_parts == NULL
- || m_sql_stat_start_parts == NULL) {
- close(); // Frees all the above.
- DBUG_RETURN(HA_ERR_OUT_OF_MEM);
- }
- info(HA_STATUS_NO_LOCK | HA_STATUS_VARIABLE | HA_STATUS_CONST);
-
- DBUG_RETURN(0);
-}
-
-/** Get a cloned ha_innopart handler.
-@param[in] name Table name.
-@param[in] mem_root MySQL mem_root to use.
-@return new ha_innopart handler. */
-handler*
-ha_innopart::clone(
- const char* name,
- MEM_ROOT* mem_root)
-{
- ha_innopart* new_handler;
-
- DBUG_ENTER("ha_innopart::clone");
-
- new_handler = dynamic_cast<ha_innopart*>(handler::clone(name,
- mem_root));
- if (new_handler != NULL) {
- ut_ad(new_handler->m_prebuilt != NULL);
-
- new_handler->m_prebuilt->select_lock_type =
- m_prebuilt->select_lock_type;
- }
-
- DBUG_RETURN(new_handler);
-}
-
-/** Clear used ins_nodes and upd_nodes. */
-void ha_innopart::clear_ins_upd_nodes()
-{
- /* Free memory from insert nodes. */
- if (m_ins_node_parts != NULL) {
- for (uint i = 0; i < m_tot_parts; i++) {
- if (m_ins_node_parts[i] != NULL) {
- ins_node_t* ins = m_ins_node_parts[i];
- if (ins->select != NULL) {
- que_graph_free_recursive(ins->select);
- ins->select = NULL;
- }
-
- if (ins->entry_sys_heap != NULL) {
- mem_heap_free(ins->entry_sys_heap);
- ins->entry_sys_heap = NULL;
- }
- m_ins_node_parts[i] = NULL;
- }
- }
- }
-
- /* Free memory from update nodes. */
- if (m_upd_node_parts != NULL) {
- for (uint i = 0; i < m_tot_parts; i++) {
- if (m_upd_node_parts[i] != NULL) {
- upd_node_t* upd = m_upd_node_parts[i];
- if (upd->cascade_top) {
- mem_heap_free(upd->cascade_heap);
- upd->cascade_top = false;
- upd->cascade_heap = NULL;
- }
- if (upd->in_mysql_interface) {
- btr_pcur_free_for_mysql(upd->pcur);
- upd->in_mysql_interface = FALSE;
- }
-
- if (upd->select != NULL) {
- que_graph_free_recursive(upd->select);
- upd->select = NULL;
- }
- if (upd->heap != NULL) {
- mem_heap_free(upd->heap);
- upd->heap = NULL;
- }
- m_upd_node_parts[i] = NULL;
- }
- }
- }
-}
-
-/** Closes a handle to an InnoDB table.
-@return 0 */
-int
-ha_innopart::close()
-{
- DBUG_ENTER("ha_innopart::close");
-
- ut_ad(m_pcur_parts == NULL);
- ut_ad(m_clust_pcur_parts == NULL);
- close_partitioning();
-
- ut_ad(m_part_share != NULL);
- if (m_part_share != NULL) {
- lock_shared_ha_data();
- m_part_share->close_table_parts();
- unlock_shared_ha_data();
- m_part_share = NULL;
- }
- clear_ins_upd_nodes();
- free_blob_heap_array();
-
- /* Prevent double close of m_prebuilt->table. The real one was done
- done in m_part_share->close_table_parts(). */
- m_prebuilt->table = NULL;
- row_prebuilt_free(m_prebuilt, FALSE);
-
- if (m_upd_buf != NULL) {
- ut_ad(m_upd_buf_size != 0);
- /* Allocated with my_malloc! */
- my_free(m_upd_buf);
- m_upd_buf = NULL;
- m_upd_buf_size = 0;
- }
-
- if (m_ins_node_parts != NULL) {
- ut_free(m_ins_node_parts);
- m_ins_node_parts = NULL;
- }
- if (m_upd_node_parts != NULL) {
- ut_free(m_upd_node_parts);
- m_upd_node_parts = NULL;
- }
- if (m_trx_id_parts != NULL) {
- ut_free(m_trx_id_parts);
- m_trx_id_parts = NULL;
- }
- if (m_row_read_type_parts != NULL) {
- ut_free(m_row_read_type_parts);
- m_row_read_type_parts = NULL;
- }
- if (m_sql_stat_start_parts != NULL) {
- ut_free(m_sql_stat_start_parts);
- m_sql_stat_start_parts = NULL;
- }
-
- MONITOR_INC(MONITOR_TABLE_CLOSE);
-
- /* Tell InnoDB server that there might be work for
- utility threads: */
-
- srv_active_wake_master_thread();
-
- DBUG_RETURN(0);
-}
-
-/** Change active partition.
-Copies needed info into m_prebuilt from the partition specific memory.
-@param[in] part_id Partition to set as active. */
-void
-ha_innopart::set_partition(
- uint part_id)
-{
- DBUG_ENTER("ha_innopart::set_partition");
-
- DBUG_PRINT("ha_innopart", ("partition id: %u", part_id));
-
- if (part_id >= m_tot_parts) {
- ut_ad(0);
- DBUG_VOID_RETURN;
- }
- if (m_pcur_parts != NULL) {
- m_prebuilt->pcur = &m_pcur_parts[m_pcur_map[part_id]];
- }
- if (m_clust_pcur_parts != NULL) {
- m_prebuilt->clust_pcur =
- &m_clust_pcur_parts[m_pcur_map[part_id]];
- }
- m_prebuilt->ins_node = m_ins_node_parts[part_id];
- m_prebuilt->upd_node = m_upd_node_parts[part_id];
-
- /* For unordered scan and table scan, use blob_heap from first
- partition as we need exactly one blob. */
- m_prebuilt->blob_heap = m_blob_heap_parts[m_ordered ? part_id : 0];
-
-#ifdef UNIV_DEBUG
- if (m_prebuilt->blob_heap != NULL) {
- DBUG_PRINT("ha_innopart", ("validating blob_heap: %p",
- m_prebuilt->blob_heap));
- mem_heap_validate(m_prebuilt->blob_heap);
- }
-#endif
-
- m_prebuilt->trx_id = m_trx_id_parts[part_id];
- m_prebuilt->row_read_type = m_row_read_type_parts[part_id];
- m_prebuilt->sql_stat_start = get_bit(m_sql_stat_start_parts, part_id);
- m_prebuilt->table = m_part_share->get_table_part(part_id);
- m_prebuilt->index = innopart_get_index(part_id, active_index);
-
- DBUG_VOID_RETURN;
-}
-
-/** Update active partition.
-Copies needed info from m_prebuilt into the partition specific memory.
-@param[in] part_id Partition to set as active. */
-void
-ha_innopart::update_partition(
- uint part_id)
-{
- DBUG_ENTER("ha_innopart::update_partition");
- DBUG_PRINT("ha_innopart", ("partition id: %u", part_id));
-
- if (part_id >= m_tot_parts) {
- ut_ad(0);
- DBUG_VOID_RETURN;
- }
- m_ins_node_parts[part_id] = m_prebuilt->ins_node;
- m_upd_node_parts[part_id] = m_prebuilt->upd_node;
-
-#ifdef UNIV_DEBUG
- if (m_prebuilt->blob_heap != NULL) {
- DBUG_PRINT("ha_innopart", ("validating blob_heap: %p",
- m_prebuilt->blob_heap));
- mem_heap_validate(m_prebuilt->blob_heap);
- }
-#endif
-
- /* For unordered scan and table scan, use blob_heap from first
- partition as we need exactly one blob anytime. */
- m_blob_heap_parts[m_ordered ? part_id : 0] = m_prebuilt->blob_heap;
-
- m_trx_id_parts[part_id] = m_prebuilt->trx_id;
- m_row_read_type_parts[part_id] = m_prebuilt->row_read_type;
- if (m_prebuilt->sql_stat_start == 0) {
- clear_bit(m_sql_stat_start_parts, part_id);
- }
- m_last_part = part_id;
- DBUG_VOID_RETURN;
-}
-
-/** Was the last returned row semi consistent read.
-In an UPDATE or DELETE, if the row under the cursor was locked by
-another transaction, and the engine used an optimistic read of the last
-committed row value under the cursor, then the engine returns 1 from
-this function. MySQL must NOT try to update this optimistic value. If
-the optimistic value does not match the WHERE condition, MySQL can
-decide to skip over this row. This can be used to avoid unnecessary
-lock waits.
-
-If this method returns true, it will also signal the storage
-engine that the next read will be a locking re-read of the row.
-@see handler.h and row0mysql.h
-@return true if last read was semi consistent else false. */
-bool
-ha_innopart::was_semi_consistent_read()
-{
- return(m_row_read_type_parts[m_last_part]
- == ROW_READ_DID_SEMI_CONSISTENT);
-}
-
-/** Try semi consistent read.
-Tell the engine whether it should avoid unnecessary lock waits.
-If yes, in an UPDATE or DELETE, if the row under the cursor was locked
-by another transaction, the engine may try an optimistic read of
-the last committed row value under the cursor.
-@see handler.h and row0mysql.h
-@param[in] yes Should semi-consistent read be used. */
-void
-ha_innopart::try_semi_consistent_read(
- bool yes)
-{
- ha_innobase::try_semi_consistent_read(yes);
- for (uint i = m_part_info->get_first_used_partition();
- i < m_tot_parts;
- i = m_part_info->get_next_used_partition(i)) {
-
- m_row_read_type_parts[i] = m_prebuilt->row_read_type;
- }
-}
-
-/** Removes a lock on a row.
-Removes a new lock set on a row, if it was not read optimistically.
-This can be called after a row has been read in the processing of
-an UPDATE or a DELETE query. @see ha_innobase::unlock_row(). */
-void
-ha_innopart::unlock_row()
-{
- ut_ad(m_last_part < m_tot_parts);
- set_partition(m_last_part);
- ha_innobase::unlock_row();
- update_partition(m_last_part);
-}
-
-/** Write a row in partition.
-Stores a row in an InnoDB database, to the table specified in this
-handle.
-@param[in] part_id Partition to write to.
-@param[in] record A row in MySQL format.
-@return 0 or error code. */
-int
-ha_innopart::write_row_in_part(
- uint part_id,
- uchar* record)
-{
- int error;
- Field* saved_next_number_field = table->next_number_field;
- DBUG_ENTER("ha_innopart::write_row_in_part");
- set_partition(part_id);
-
- /* Prevent update_auto_increment to be called
- again in ha_innobase::write_row(). */
-
- table->next_number_field = NULL;
-
- /* TODO: try to avoid creating a new dtuple
- (in row_get_prebuilt_insert_row()) for each partition).
- Might be needed due to ins_node implementation. */
-
- error = ha_innobase::write_row(record);
- update_partition(part_id);
- table->next_number_field = saved_next_number_field;
- DBUG_RETURN(error);
-}
-
-/** Update a row in partition.
-Updates a row given as a parameter to a new value.
-@param[in] part_id Partition to update row in.
-@param[in] old_row Old row in MySQL format.
-@param[in] new_row New row in MySQL format.
-@return 0 or error number. */
-int
-ha_innopart::update_row_in_part(
- uint part_id,
- const uchar* old_row,
- uchar* new_row)
-{
- int error;
- DBUG_ENTER("ha_innopart::update_row_in_part");
-
- set_partition(part_id);
- error = ha_innobase::update_row(old_row, new_row);
- update_partition(part_id);
- DBUG_RETURN(error);
-}
-
-/** Deletes a row in partition.
-@param[in] part_id Partition to delete from.
-@param[in] record Row to delete in MySQL format.
-@return 0 or error number. */
-int
-ha_innopart::delete_row_in_part(
- uint part_id,
- const uchar* record)
-{
- int error;
- DBUG_ENTER("ha_innopart::delete_row_in_part");
- m_err_rec = NULL;
-
- m_last_part = part_id;
- set_partition(part_id);
- error = ha_innobase::delete_row(record);
- update_partition(part_id);
- DBUG_RETURN(error);
-}
-
-/** Initializes a handle to use an index.
-@param[in] keynr Key (index) number.
-@param[in] sorted True if result MUST be sorted according to index.
-@return 0 or error number. */
-int
-ha_innopart::index_init(
- uint keynr,
- bool sorted)
-{
- int error;
- uint part_id = m_part_info->get_first_used_partition();
- DBUG_ENTER("ha_innopart::index_init");
-
- active_index = keynr;
- if (part_id == MY_BIT_NONE) {
- DBUG_RETURN(0);
- }
-
- error = ph_index_init_setup(keynr, sorted);
- if (error != 0) {
- DBUG_RETURN(error);
- }
-
- if (sorted) {
- error = init_record_priority_queue();
- if (error != 0) {
- /* Needs cleanup in case it returns error. */
- destroy_record_priority_queue();
- DBUG_RETURN(error);
- }
- /* Disable prefetch.
- The prefetch buffer is not partitioning aware, so it may return
- rows from a different partition if either the prefetch buffer is
- full, or it is non-empty and the partition is exhausted. */
- m_prebuilt->m_no_prefetch = true;
- }
-
- /* For scan across partitions, the keys needs to be materialized */
- m_prebuilt->m_read_virtual_key = true;
-
- error = change_active_index(part_id, keynr);
- if (error != 0) {
- destroy_record_priority_queue();
- DBUG_RETURN(error);
- }
-
- DBUG_EXECUTE_IF("partition_fail_index_init", {
- destroy_record_priority_queue();
- DBUG_RETURN(HA_ERR_NO_PARTITION_FOUND);
- });
-
- DBUG_RETURN(0);
-}
-
-/** End index cursor.
-@return 0 or error code. */
-int
-ha_innopart::index_end()
-{
- uint part_id = m_part_info->get_first_used_partition();
- DBUG_ENTER("ha_innopart::index_end");
-
- if (part_id == MY_BIT_NONE) {
- /* Never initialized any index. */
- active_index = MAX_KEY;
- DBUG_RETURN(0);
- }
- if (m_ordered) {
- destroy_record_priority_queue();
- m_prebuilt->m_no_prefetch = false;
- }
- m_prebuilt->m_read_virtual_key = false;
-
- DBUG_RETURN(ha_innobase::index_end());
-}
-
-/* Partitioning support functions. */
-
-/** Setup the ordered record buffer and the priority queue.
-@param[in] used_parts Number of used partitions in query.
-@return false for success else true. */
-int
-ha_innopart::init_record_priority_queue_for_parts(
- uint used_parts)
-{
- size_t alloc_size;
- void* buf;
-
- DBUG_ENTER("ha_innopart::init_record_priority_queue_for_parts");
- ut_ad(used_parts >= 1);
- /* TODO: Don't use this if only one partition is used! */
- //ut_ad(used_parts > 1);
-
- /* We could reuse current m_prebuilt->pcur/clust_pcur for the first
- used partition, but it would complicate and affect performance,
- so we trade some extra memory instead. */
-
- m_pcur = m_prebuilt->pcur;
- m_clust_pcur = m_prebuilt->clust_pcur;
-
- /* If we searching for secondary key or doing a write/update
- we will need two pcur, one for the active (secondary) index and
- one for the clustered index. */
-
- bool need_clust_index =
- m_curr_key_info[1] != NULL
- || get_lock_type() != F_RDLCK;
-
- /* pcur and clust_pcur per partition.
- By using zalloc, we do not need to initialize the pcur's! */
-
- alloc_size = used_parts * sizeof(btr_pcur_t);
- if (need_clust_index) {
- alloc_size *= 2;
- }
- buf = ut_zalloc(alloc_size, mem_key_partitioning);
- if (buf == NULL) {
- DBUG_RETURN(true);
- }
- m_pcur_parts = static_cast<btr_pcur_t*>(buf);
- if (need_clust_index) {
- m_clust_pcur_parts = &m_pcur_parts[used_parts];
- }
- /* mapping from part_id to pcur. */
- alloc_size = m_tot_parts * sizeof(*m_pcur_map);
- buf = ut_zalloc(alloc_size, mem_key_partitioning);
- if (buf == NULL) {
- DBUG_RETURN(true);
- }
- m_pcur_map = static_cast<uint16_t*>(buf);
- {
- uint16_t pcur_count = 0;
- for (uint i = m_part_info->get_first_used_partition();
- i < m_tot_parts;
- i = m_part_info->get_next_used_partition(i)) {
- m_pcur_map[i] = pcur_count++;
- }
- }
-
- DBUG_RETURN(false);
-}
-
-/** Destroy the ordered record buffer and the priority queue. */
-inline
-void
-ha_innopart::destroy_record_priority_queue_for_parts()
-{
- DBUG_ENTER("ha_innopart::destroy_record_priority_queue");
- if (m_pcur_parts != NULL) {
- uint used_parts;
- used_parts = bitmap_bits_set(&m_part_info->read_partitions);
- for (uint i = 0; i < used_parts; i++) {
- btr_pcur_free(&m_pcur_parts[i]);
- if (m_clust_pcur_parts != NULL) {
- btr_pcur_free(&m_clust_pcur_parts[i]);
- }
- }
- ut_free(m_pcur_parts);
- m_clust_pcur_parts = NULL;
- m_pcur_parts = NULL;
- /* Reset the original m_prebuilt->pcur. */
- m_prebuilt->pcur = m_pcur;
- m_prebuilt->clust_pcur = m_clust_pcur;
- }
- if (m_pcur_map != NULL) {
- ut_free(m_pcur_map);
- m_pcur_map = NULL;
- }
- DBUG_VOID_RETURN;
-}
-
-/** Print error information.
-@param[in] error Error code (MySQL).
-@param[in] errflag Flags. */
-void
-ha_innopart::print_error(
- int error,
- myf errflag)
-{
- DBUG_ENTER("ha_innopart::print_error");
- if (print_partition_error(error, errflag)) {
- ha_innobase::print_error(error, errflag);
- }
-
- DBUG_VOID_RETURN;
-}
-
-/** Can error be ignored.
-@param[in] error Error code to check.
-@return true if ignorable else false. */
-bool
-ha_innopart::is_ignorable_error(
- int error)
-{
- if (ha_innobase::is_ignorable_error(error)
- || error == HA_ERR_NO_PARTITION_FOUND
- || error == HA_ERR_NOT_IN_LOCK_PARTITIONS) {
-
- return(true);
- }
- return(false);
-}
-
-/** Get the index for the current partition
-@param[in] keynr MySQL index number.
-@return InnoDB index or NULL. */
-inline
-dict_index_t*
-ha_innopart::innobase_get_index(
- uint keynr)
-{
- uint part_id = m_last_part;
- if (part_id >= m_tot_parts) {
- ut_ad(0);
- part_id = 0;
- }
- return(innopart_get_index(part_id, keynr));
-}
-
-/** Get the index for a handle.
-Does not change active index.
-@param[in] keynr Use this index; MAX_KEY means always clustered index,
-even if it was internally generated by InnoDB.
-@param[in] part_id From this partition.
-@return NULL or index instance. */
-inline
-dict_index_t*
-ha_innopart::innopart_get_index(
- uint part_id,
- uint keynr)
-{
- KEY* key = NULL;
- dict_index_t* index = NULL;
-
- DBUG_ENTER("innopart_get_index");
-
- if (keynr != MAX_KEY && table->s->keys > 0) {
- key = table->key_info + keynr;
-
- index = m_part_share->get_index(part_id, keynr);
-
- if (index != NULL) {
- ut_a(ut_strcmp(index->name, key->name) == 0);
- } else {
- /* Can't find index with keynr in the translation
- table. Only print message if the index translation
- table exists. */
-
- ib::warn() << "InnoDB could not find index "
- << (key ? key->name : "NULL")
- << " key no " << keynr << " for table "
- << m_prebuilt->table->name
- << " through its index translation table";
-
- index = dict_table_get_index_on_name(m_prebuilt->table,
- key->name);
- }
- } else {
- /* Get the generated index. */
- ut_ad(keynr == MAX_KEY);
- index = dict_table_get_first_index(
- m_part_share->get_table_part(part_id));
- }
-
- if (index == NULL) {
- ib::error() << "InnoDB could not find key n:o "
- << keynr << " with name " << (key ? key->name : "NULL")
- << " from dict cache for table "
- << m_prebuilt->table->name << " partition n:o "
- << part_id;
- }
-
- DBUG_RETURN(index);
-}
-
-/** Changes the active index of a handle.
-@param[in] part_id Use this partition.
-@param[in] keynr Use this index; MAX_KEY means always clustered index,
-even if it was internally generated by InnoDB.
-@return 0 or error number. */
-int
-ha_innopart::change_active_index(
- uint part_id,
- uint keynr)
-{
- DBUG_ENTER("ha_innopart::change_active_index");
-
- ut_ad(m_user_thd == ha_thd());
- ut_a(m_prebuilt->trx == thd_to_trx(m_user_thd));
-
- active_index = keynr;
- set_partition(part_id);
-
- if (UNIV_UNLIKELY(m_prebuilt->index == NULL)) {
- ib::warn() << "change_active_index(" << part_id
- << "," << keynr << ") failed";
- m_prebuilt->index_usable = FALSE;
- DBUG_RETURN(1);
- }
-
- m_prebuilt->index_usable = row_merge_is_index_usable(m_prebuilt->trx,
- m_prebuilt->index);
-
- if (UNIV_UNLIKELY(!m_prebuilt->index_usable)) {
- if (dict_index_is_corrupted(m_prebuilt->index)) {
- char table_name[MAX_FULL_NAME_LEN + 1];
-
- innobase_format_name(
- table_name, sizeof table_name,
- m_prebuilt->index->table->name.m_name);
-
- push_warning_printf(
- m_user_thd, Sql_condition::SL_WARNING,
- HA_ERR_INDEX_CORRUPT,
- "InnoDB: Index %s for table %s is"
- " marked as corrupted"
- " (partition %u)",
- m_prebuilt->index->name(), table_name, part_id);
- DBUG_RETURN(HA_ERR_INDEX_CORRUPT);
- } else {
- push_warning_printf(
- m_user_thd, Sql_condition::SL_WARNING,
- HA_ERR_TABLE_DEF_CHANGED,
- "InnoDB: insufficient history for index %u",
- keynr);
- }
-
- /* The caller seems to ignore this. Thus, we must check
- this again in row_search_for_mysql(). */
-
- DBUG_RETURN(HA_ERR_TABLE_DEF_CHANGED);
- }
-
- ut_a(m_prebuilt->search_tuple != NULL);
-
- /* If too expensive, cache the keynr and only update search_tuple when
- keynr changes. Remember that the clustered index is also used for
- MAX_KEY. */
- dtuple_set_n_fields(m_prebuilt->search_tuple,
- m_prebuilt->index->n_fields);
-
- dict_index_copy_types(m_prebuilt->search_tuple, m_prebuilt->index,
- m_prebuilt->index->n_fields);
-
- /* MySQL changes the active index for a handle also during some
- queries, for example SELECT MAX(a), SUM(a) first retrieves the
- MAX() and then calculates the sum. Previously we played safe
- and used the flag ROW_MYSQL_WHOLE_ROW below, but that caused
- unnecessary copying. Starting from MySQL-4.1 we use a more
- efficient flag here. */
-
- /* TODO: Is this really needed?
- Will it not be built in index_read? */
-
- build_template(false);
-
- DBUG_RETURN(0);
-}
-
-/** Return first record in index from a partition.
-@param[in] part Partition to read from.
-@param[out] record First record in index in the partition.
-@return error number or 0. */
-int
-ha_innopart::index_first_in_part(
- uint part,
- uchar* record)
-{
- int error;
- DBUG_ENTER("ha_innopart::index_first_in_part");
-
- set_partition(part);
- error = ha_innobase::index_first(record);
- update_partition(part);
-
- DBUG_RETURN(error);
-}
-
-/** Return next record in index from a partition.
-@param[in] part Partition to read from.
-@param[out] record Last record in index in the partition.
-@return error number or 0. */
-int
-ha_innopart::index_next_in_part(
- uint part,
- uchar* record)
-{
- DBUG_ENTER("ha_innopart::index_next_in_part");
-
- int error;
-
- set_partition(part);
- error = ha_innobase::index_next(record);
- update_partition(part);
-
- ut_ad(m_ordered_scan_ongoing
- || m_ordered_rec_buffer == NULL
- || m_prebuilt->used_in_HANDLER
- || m_part_spec.start_part >= m_part_spec.end_part);
-
- DBUG_RETURN(error);
-}
-
-/** Return next same record in index from a partition.
-This routine is used to read the next record, but only if the key is
-the same as supplied in the call.
-@param[in] part Partition to read from.
-@param[out] record Last record in index in the partition.
-@param[in] key Key to match.
-@param[in] length Length of key.
-@return error number or 0. */
-int
-ha_innopart::index_next_same_in_part(
- uint part,
- uchar* record,
- const uchar* key,
- uint length)
-{
- int error;
-
- set_partition(part);
- error = ha_innobase::index_next_same(record, key, length);
- update_partition(part);
- return(error);
-}
-
-/** Return last record in index from a partition.
-@param[in] part Partition to read from.
-@param[out] record Last record in index in the partition.
-@return error number or 0. */
-int
-ha_innopart::index_last_in_part(
- uint part,
- uchar* record)
-{
- int error;
-
- set_partition(part);
- error = ha_innobase::index_last(record);
- update_partition(part);
- return(error);
-}
-
-/** Return previous record in index from a partition.
-@param[in] part Partition to read from.
-@param[out] record Last record in index in the partition.
-@return error number or 0. */
-int
-ha_innopart::index_prev_in_part(
- uint part,
- uchar* record)
-{
- int error;
-
- set_partition(part);
- error = ha_innobase::index_prev(record);
- update_partition(part);
-
- ut_ad(m_ordered_scan_ongoing
- || m_ordered_rec_buffer == NULL
- || m_prebuilt->used_in_HANDLER
- || m_part_spec.start_part >= m_part_spec.end_part);
-
- return(error);
-}
-
-/** Start index scan and return first record from a partition.
-This routine starts an index scan using a start key. The calling
-function will check the end key on its own.
-@param[in] part Partition to read from.
-@param[out] record First matching record in index in the partition.
-@param[in] key Key to match.
-@param[in] keypart_map Which part of the key to use.
-@param[in] find_flag Key condition/direction to use.
-@return error number or 0. */
-int
-ha_innopart::index_read_map_in_part(
- uint part,
- uchar* record,
- const uchar* key,
- key_part_map keypart_map,
- enum ha_rkey_function find_flag)
-{
- int error;
-
- set_partition(part);
- error = ha_innobase::index_read_map(
- record,
- key,
- keypart_map,
- find_flag);
- update_partition(part);
- return(error);
-}
-
-/** Start index scan and return first record from a partition.
-This routine starts an index scan using a start key. The calling
-function will check the end key on its own.
-@param[in] part Partition to read from.
-@param[out] record First matching record in index in the partition.
-@param[in] index Index to read from.
-@param[in] key Key to match.
-@param[in] keypart_map Which part of the key to use.
-@param[in] find_flag Key condition/direction to use.
-@return error number or 0. */
-int
-ha_innopart::index_read_idx_map_in_part(
- uint part,
- uchar* record,
- uint index,
- const uchar* key,
- key_part_map keypart_map,
- enum ha_rkey_function find_flag)
-{
- int error;
-
- set_partition(part);
- error = ha_innobase::index_read_idx_map(
- record,
- index,
- key,
- keypart_map,
- find_flag);
- update_partition(part);
- return(error);
-}
-
-/** Return last matching record in index from a partition.
-@param[in] part Partition to read from.
-@param[out] record Last matching record in index in the partition.
-@param[in] key Key to match.
-@param[in] keypart_map Which part of the key to use.
-@return error number or 0. */
-int
-ha_innopart::index_read_last_map_in_part(
- uint part,
- uchar* record,
- const uchar* key,
- key_part_map keypart_map)
-{
- int error;
- set_partition(part);
- error = ha_innobase::index_read_last_map(record, key, keypart_map);
- update_partition(part);
- return(error);
-}
-
-/** Start index scan and return first record from a partition.
-This routine starts an index scan using a start and end key.
-@param[in] part Partition to read from.
-@param[in,out] record First matching record in index in the partition,
-if NULL use table->record[0] as return buffer.
-@param[in] start_key Start key to match.
-@param[in] end_key End key to match.
-@param[in] eq_range Is equal range, start_key == end_key.
-@param[in] sorted Return rows in sorted order.
-@return error number or 0. */
-int
-ha_innopart::read_range_first_in_part(
- uint part,
- uchar* record,
- const key_range* start_key,
- const key_range* end_key,
- bool eq_range,
- bool sorted)
-{
- int error;
- uchar* read_record = record;
- set_partition(part);
- if (read_record == NULL) {
- read_record = table->record[0];
- }
- if (m_start_key.key != NULL) {
- error = ha_innobase::index_read(
- read_record,
- m_start_key.key,
- m_start_key.length,
- m_start_key.flag);
- } else {
- error = ha_innobase::index_first(read_record);
- }
- if (error == HA_ERR_KEY_NOT_FOUND) {
- error = HA_ERR_END_OF_FILE;
- } else if (error == 0 && !in_range_check_pushed_down) {
- /* compare_key uses table->record[0], so we
- need to copy the data if not already there. */
-
- if (record != NULL) {
- copy_cached_row(table->record[0], read_record);
- }
- if (compare_key(end_range) > 0) {
- /* must use ha_innobase:: due to set/update_partition
- could overwrite states if ha_innopart::unlock_row()
- was used. */
- ha_innobase::unlock_row();
- error = HA_ERR_END_OF_FILE;
- }
- }
- update_partition(part);
- return(error);
-}
-
-/** Return next record in index range scan from a partition.
-@param[in] part Partition to read from.
-@param[in,out] record First matching record in index in the partition,
-if NULL use table->record[0] as return buffer.
-@return error number or 0. */
-int
-ha_innopart::read_range_next_in_part(
- uint part,
- uchar* record)
-{
- int error;
- uchar* read_record = record;
-
- set_partition(part);
- if (read_record == NULL) {
- read_record = table->record[0];
- }
-
- /* TODO: Implement ha_innobase::read_range*?
- So it will return HA_ERR_END_OF_FILE or
- HA_ERR_KEY_NOT_FOUND when passing end_range. */
-
- error = ha_innobase::index_next(read_record);
- if (error == 0 && !in_range_check_pushed_down) {
- /* compare_key uses table->record[0], so we
- need to copy the data if not already there. */
-
- if (record != NULL) {
- copy_cached_row(table->record[0], read_record);
- }
- if (compare_key(end_range) > 0) {
- /* must use ha_innobase:: due to set/update_partition
- could overwrite states if ha_innopart::unlock_row()
- was used. */
- ha_innobase::unlock_row();
- error = HA_ERR_END_OF_FILE;
- }
- }
- update_partition(part);
-
- return(error);
-}
-
-/** Initialize a table scan in a specific partition.
-@param[in] part_id Partition to initialize.
-@param[in] scan True if table/index scan false otherwise (for rnd_pos)
-@return 0 or error number. */
-int
-ha_innopart::rnd_init_in_part(
- uint part_id,
- bool scan)
-{
- int err;
-
- if (m_prebuilt->clust_index_was_generated) {
- err = change_active_index(part_id, MAX_KEY);
- } else {
- err = change_active_index(part_id, m_primary_key);
- }
-
- m_start_of_scan = 1;
-
- /* Don't use semi-consistent read in random row reads (by position).
- This means we must disable semi_consistent_read if scan is false. */
-
- if (!scan) {
- try_semi_consistent_read(false);
- }
-
- return(err);
-}
-
-/** Ends a table scan.
-@param[in] part_id Partition to end table scan in.
-@param[in] scan True for scan else random access.
-@return 0 or error number. */
-int
-ha_innopart::rnd_end_in_part(
- uint part_id,
- bool scan)
-{
- return(index_end());
-}
-
-/** Read next row in partition.
-Reads the next row in a table scan (also used to read the FIRST row
-in a table scan).
-@param[in] part_id Partition to end table scan in.
-@param[out] buf Returns the row in this buffer, in MySQL format.
-@return 0, HA_ERR_END_OF_FILE or error number. */
-int
-ha_innopart::rnd_next_in_part(
- uint part_id,
- uchar* buf)
-{
- int error;
-
- DBUG_ENTER("ha_innopart::rnd_next_in_part");
-
- set_partition(part_id);
- if (m_start_of_scan) {
- error = ha_innobase::index_first(buf);
-
- if (error == HA_ERR_KEY_NOT_FOUND) {
- error = HA_ERR_END_OF_FILE;
- }
- m_start_of_scan = 0;
- } else {
- ha_statistic_increment(&SSV::ha_read_rnd_next_count);
- error = ha_innobase::general_fetch(buf, ROW_SEL_NEXT, 0);
- }
-
- update_partition(part_id);
- DBUG_RETURN(error);
-}
-
-/** Get a row from a position.
-Fetches a row from the table based on a row reference.
-@param[out] buf Returns the row in this buffer, in MySQL format.
-@param[in] pos Position, given as primary key value or DB_ROW_ID
-(if no primary key) of the row in MySQL format. The length of data in pos has
-to be ref_length.
-@return 0, HA_ERR_KEY_NOT_FOUND or error code. */
-int
-ha_innopart::rnd_pos(
- uchar* buf,
- uchar* pos)
-{
- int error;
- uint part_id;
- DBUG_ENTER("ha_innopart::rnd_pos");
- ut_ad(PARTITION_BYTES_IN_POS == 2);
- DBUG_DUMP("pos", pos, ref_length);
-
- ha_statistic_increment(&SSV::ha_read_rnd_count);
-
- ut_a(m_prebuilt->trx == thd_to_trx(ha_thd()));
-
- /* Restore used partition. */
- part_id = uint2korr(pos);
-
- set_partition(part_id);
-
- /* Note that we assume the length of the row reference is fixed
- for the table, and it is == ref_length. */
-
- error = ha_innobase::index_read(buf, pos + PARTITION_BYTES_IN_POS,
- ref_length - PARTITION_BYTES_IN_POS,
- HA_READ_KEY_EXACT);
- DBUG_PRINT("info", ("part %u index_read returned %d", part_id, error));
- DBUG_DUMP("buf", buf, table_share->reclength);
-
- update_partition(part_id);
-
- DBUG_RETURN(error);
-}
-
-/** Return position for cursor in last used partition.
-Stores a reference to the current row to 'ref' field of the handle. Note
-that in the case where we have generated the clustered index for the
-table, the function parameter is illogical: we MUST ASSUME that 'record'
-is the current 'position' of the handle, because if row ref is actually
-the row id internally generated in InnoDB, then 'record' does not contain
-it. We just guess that the row id must be for the record where the handle
-was positioned the last time.
-@param[out] ref_arg Pointer to buffer where to write the position.
-@param[in] record Record to position for. */
-void
-ha_innopart::position_in_last_part(
- uchar* ref_arg,
- const uchar* record)
-{
- if (m_prebuilt->clust_index_was_generated) {
- /* No primary key was defined for the table and we
- generated the clustered index from row id: the
- row reference will be the row id, not any key value
- that MySQL knows of. */
-
- memcpy(ref_arg, m_prebuilt->row_id, DATA_ROW_ID_LEN);
- } else {
-
- /* Copy primary key as the row reference */
- KEY* key_info = table->key_info + m_primary_key;
- key_copy(ref_arg, (uchar*)record, key_info,
- key_info->key_length);
- }
-}
-
-/** Fill in data_dir_path and tablespace name from internal data
-dictionary.
-@param part_elem Partition element to fill.
-@param ib_table InnoDB table to copy from. */
-void
-ha_innopart::update_part_elem(
- partition_element* part_elem,
- dict_table_t* ib_table)
-{
- dict_get_and_save_data_dir_path(ib_table, false);
- if (ib_table->data_dir_path != NULL) {
- if (part_elem->data_file_name == NULL
- || strcmp(ib_table->data_dir_path,
- part_elem->data_file_name) != 0) {
-
- /* Play safe and allocate memory from TABLE and copy
- instead of expose the internal data dictionary. */
- part_elem->data_file_name =
- strdup_root(&table->mem_root,
- ib_table->data_dir_path);
- }
- } else {
- part_elem->data_file_name = NULL;
- }
-
- part_elem->index_file_name = NULL;
-}
-
-/** Update create_info.
-Used in SHOW CREATE TABLE et al.
-@param[in,out] create_info Create info to update. */
-void
-ha_innopart::update_create_info(
- HA_CREATE_INFO* create_info)
-{
- uint num_subparts = m_part_info->num_subparts;
- uint num_parts;
- uint part;
- dict_table_t* table;
- List_iterator<partition_element>
- part_it(m_part_info->partitions);
- partition_element* part_elem;
- partition_element* sub_elem;
- DBUG_ENTER("ha_innopart::update_create_info");
- if ((create_info->used_fields & HA_CREATE_USED_AUTO) == 0) {
- info(HA_STATUS_AUTO);
- create_info->auto_increment_value = stats.auto_increment_value;
- }
-
- num_parts = (num_subparts != 0) ? m_tot_parts / num_subparts : m_tot_parts;
-
- /* DATA/INDEX DIRECTORY are never applied to the whole partitioned
- table, only to its parts. */
-
- create_info->data_file_name = NULL;
- create_info->index_file_name = NULL;
-
- /* Since update_create_info() can be called from
- mysql_prepare_alter_table() when not all partitions are set up,
- we look for that condition first.
- If all partitions are not available then simply return,
- since it does not need any updated partitioning info. */
-
- if (!m_part_info->temp_partitions.is_empty()) {
- DBUG_VOID_RETURN;
- }
- part = 0;
- while ((part_elem = part_it++)) {
- if (part >= num_parts) {
- DBUG_VOID_RETURN;
- }
- if (m_part_info->is_sub_partitioned()) {
- List_iterator<partition_element>
- subpart_it(part_elem->subpartitions);
- uint subpart = 0;
- while ((sub_elem = subpart_it++)) {
- if (subpart >= num_subparts) {
- DBUG_VOID_RETURN;
- }
- subpart++;
- }
- if (subpart != num_subparts) {
- DBUG_VOID_RETURN;
- }
- }
- part++;
- }
- if (part != num_parts) {
- DBUG_VOID_RETURN;
- }
-
- /* part_elem->data_file_name should be correct from
- the .frm, but may have been changed, so update from SYS_DATAFILES.
- index_file_name is ignored, so remove it. */
-
- part = 0;
- part_it.rewind();
- while ((part_elem = part_it++)) {
- if (m_part_info->is_sub_partitioned()) {
- List_iterator<partition_element>
- subpart_it(part_elem->subpartitions);
- while ((sub_elem = subpart_it++)) {
- table = m_part_share->get_table_part(part++);
- update_part_elem(sub_elem, table);
- }
- } else {
- table = m_part_share->get_table_part(part++);
- update_part_elem(part_elem, table);
- }
- }
- DBUG_VOID_RETURN;
-}
-
-/** Set create_info->data_file_name.
-@param[in] part_elem Partition to copy from.
-@param[in,out] info Create info to set. */
-static
-void
-set_create_info_dir(
- partition_element* part_elem,
- HA_CREATE_INFO* info)
-{
- if (part_elem->data_file_name != NULL
- && part_elem->data_file_name[0] != '\0') {
- info->data_file_name = part_elem->data_file_name;
- }
- if (part_elem->index_file_name != NULL
- && part_elem->index_file_name[0] != '\0') {
- info->index_file_name = part_elem->index_file_name;
- }
-}
-
-/** Set flags and append '/' to remote path if necessary. */
-void
-create_table_info_t::set_remote_path_flags()
-{
- if (m_remote_path[0] != '\0') {
- ut_ad(DICT_TF_HAS_DATA_DIR(m_flags) != 0);
-
- /* os_file_make_remote_pathname will truncate
- everything after the last '/', so append '/'
- if it is not the last character. */
-
- size_t len = strlen(m_remote_path);
- if (m_remote_path[len - 1] != OS_PATH_SEPARATOR) {
- m_remote_path[len] = OS_PATH_SEPARATOR;
- m_remote_path[len + 1] = '\0';
- }
- } else {
- ut_ad(DICT_TF_HAS_DATA_DIR(m_flags) == 0);
- }
-}
-
-/** Creates a new table to an InnoDB database.
-@param[in] name Table name (in filesystem charset).
-@param[in] form MySQL Table containing information of
-partitions, columns and indexes etc.
-@param[in] create_info Additional create information, like
-create statement string.
-@return 0 or error number. */
-int
-ha_innopart::create(
- const char* name,
- TABLE* form,
- HA_CREATE_INFO* create_info)
-{
- int error;
- /** {database}/{tablename} */
- char table_name[FN_REFLEN];
- /** absolute path of table */
- char remote_path[FN_REFLEN];
- char partition_name[FN_REFLEN];
- char* table_name_end;
- size_t table_name_len;
- char* partition_name_start;
- char table_data_file_name[FN_REFLEN];
- const char* index_file_name;
- size_t len;
-
- create_table_info_t info(ha_thd(),
- form,
- create_info,
- table_name,
- remote_path);
-
- DBUG_ENTER("ha_innopart::create");
- ut_ad(create_info != NULL);
- ut_ad(m_part_info == form->part_info);
- ut_ad(table_share != NULL);
-
- /* Not allowed to create temporary partitioned tables. */
- if (create_info != NULL
- && (create_info->options & HA_LEX_CREATE_TMP_TABLE) != 0) {
- my_error(ER_PARTITION_NO_TEMPORARY, MYF(0));
- ut_ad(0); // Can we support partitioned temporary tables?
- DBUG_RETURN(HA_ERR_INTERNAL_ERROR);
- }
-
- error = info.initialize();
- if (error != 0) {
- DBUG_RETURN(error);
- }
-
- /* Setup and check table level options. */
- error = info.prepare_create_table(name);
- if (error != 0) {
- DBUG_RETURN(error);
- }
- strcpy(partition_name, table_name);
- partition_name_start = partition_name + strlen(partition_name);
- table_name_len = strlen(table_name);
- table_name_end = table_name + table_name_len;
- if (create_info->data_file_name != NULL) {
- /* Strip the tablename from the path. */
- strncpy(table_data_file_name, create_info->data_file_name,
- FN_REFLEN-1);
- table_data_file_name[FN_REFLEN - 1] = '\0';
- char* ptr = strrchr(table_data_file_name, OS_PATH_SEPARATOR);
- ut_ad(ptr != NULL);
- if (ptr != NULL) {
- ptr++;
- *ptr = '\0';
- create_info->data_file_name = table_data_file_name;
- }
- } else {
- table_data_file_name[0] = '\0';
- }
- index_file_name = create_info->index_file_name;
-
- info.allocate_trx();
-
- /* Latch the InnoDB data dictionary exclusively so that no deadlocks
- or lock waits can happen in it during a table create operation.
- Drop table etc. do this latching in row0mysql.cc. */
-
- row_mysql_lock_data_dictionary(info.trx());
-
- /* TODO: use the new DD tables instead to decrease duplicate info. */
- List_iterator_fast <partition_element>
- part_it(form->part_info->partitions);
- partition_element* part_elem;
- while ((part_elem = part_it++)) {
- /* Append the partition name to the table name. */
- len = Ha_innopart_share::append_sep_and_name(
- partition_name_start,
- part_elem->partition_name,
- part_sep,
- FN_REFLEN - table_name_len);
- if ((table_name_len + len) >= FN_REFLEN) {
- ut_ad(0);
- goto cleanup;
- }
-
- /* Override table level DATA/INDEX DIRECTORY. */
- set_create_info_dir(part_elem, create_info);
-
- if (!form->part_info->is_sub_partitioned()) {
- error = info.prepare_create_table(partition_name);
- if (error != 0) {
- goto cleanup;
- }
- info.set_remote_path_flags();
- error = info.create_table();
- if (error != 0) {
- goto cleanup;
- }
- } else {
- size_t part_name_len = strlen(partition_name_start)
- + table_name_len;
- char* part_name_end = partition_name + part_name_len;
- List_iterator_fast <partition_element>
- sub_it(part_elem->subpartitions);
- partition_element* sub_elem;
-
- while ((sub_elem = sub_it++)) {
- ut_ad(sub_elem->partition_name != NULL);
-
- /* 'table' will be
- <name>#P#<part_name>#SP#<subpart_name>.
- Append the sub-partition name to
- the partition name. */
-
- len = Ha_innopart_share::append_sep_and_name(
- part_name_end,
- sub_elem->partition_name,
- sub_sep,
- FN_REFLEN - part_name_len);
- if ((len + part_name_len) >= FN_REFLEN) {
- ut_ad(0);
- goto cleanup;
- }
- /* Override part level DATA/INDEX DIRECTORY. */
- set_create_info_dir(sub_elem, create_info);
-
- Ha_innopart_share::partition_name_casedn_str(
- part_name_end + 4);
- error = info.prepare_create_table(partition_name);
- if (error != 0) {
- goto cleanup;
- }
- info.set_remote_path_flags();
- error = info.create_table();
- if (error != 0) {
- goto cleanup;
- }
-
- /* Reset partition level
- DATA/INDEX DIRECTORY. */
-
- create_info->data_file_name =
- table_data_file_name;
- create_info->index_file_name =
- index_file_name;
- set_create_info_dir(part_elem, create_info);
- }
- }
- /* Reset table level DATA/INDEX DIRECTORY. */
- create_info->data_file_name = table_data_file_name;
- create_info->index_file_name = index_file_name;
- }
-
- innobase_commit_low(info.trx());
-
- row_mysql_unlock_data_dictionary(info.trx());
-
- /* Flush the log to reduce probability that the .frm files and
- the InnoDB data dictionary get out-of-sync if the user runs
- with innodb_flush_log_at_trx_commit = 0. */
-
- log_buffer_flush_to_disk();
-
- part_it.rewind();
- /* No need to use these now, only table_name will be used. */
- create_info->data_file_name = NULL;
- create_info->index_file_name = NULL;
- while ((part_elem = part_it++)) {
- Ha_innopart_share::append_sep_and_name(
- table_name_end,
- part_elem->partition_name,
- part_sep,
- FN_REFLEN - table_name_len);
- if (!form->part_info->is_sub_partitioned()) {
- error = info.create_table_update_dict();
- if (error != 0) {
- ut_ad(0);
- goto end;
- }
- } else {
- size_t part_name_len = strlen(table_name_end);
- char* part_name_end = table_name_end + part_name_len;
- List_iterator_fast <partition_element>
- sub_it(part_elem->subpartitions);
- partition_element* sub_elem;
- while ((sub_elem = sub_it++)) {
- Ha_innopart_share::append_sep_and_name(
- part_name_end,
- sub_elem->partition_name,
- sub_sep,
- FN_REFLEN - table_name_len
- - part_name_len);
- error = info.create_table_update_dict();
- if (error != 0) {
- ut_ad(0);
- goto end;
- }
- }
- }
- }
-
-end:
- /* Tell the InnoDB server that there might be work for
- utility threads: */
-
- srv_active_wake_master_thread();
-
- trx_free_for_mysql(info.trx());
-
- DBUG_RETURN(error);
-
-cleanup:
- trx_rollback_for_mysql(info.trx());
-
- row_mysql_unlock_data_dictionary(info.trx());
-
- trx_free_for_mysql(info.trx());
-
- DBUG_RETURN(error);
-}
-
-/** Discards or imports an InnoDB tablespace.
-@param[in] discard True if discard, else import.
-@return 0 or error number. */
-int
-ha_innopart::discard_or_import_tablespace(
- my_bool discard)
-{
- int error = 0;
- uint i;
- DBUG_ENTER("ha_innopart::discard_or_import_tablespace");
-
- for (i= m_part_info->get_first_used_partition();
- i < m_tot_parts;
- i= m_part_info->get_next_used_partition(i)) {
-
- m_prebuilt->table = m_part_share->get_table_part(i);
- error= ha_innobase::discard_or_import_tablespace(discard);
- if (error != 0) {
- break;
- }
- }
- m_prebuilt->table = m_part_share->get_table_part(0);
-
- /* IMPORT/DISCARD also means resetting auto_increment. Make sure
- that auto_increment initialization is done after all partitions
- are imported. */
- if (table->found_next_number_field != NULL) {
- lock_auto_increment();
- m_part_share->next_auto_inc_val = 0;
- m_part_share->auto_inc_initialized = false;
- unlock_auto_increment();
- }
-
- DBUG_RETURN(error);
-}
-
-/** Compare key and rowid.
-Helper function for sorting records in the priority queue.
-a/b points to table->record[0] rows which must have the
-key fields set. The bytes before a and b store the rowid.
-This is used for comparing/sorting rows first according to
-KEY and if same KEY, by rowid (ref).
-@param[in] key_info Null terminated array of index information.
-@param[in] a Pointer to record+ref in first record.
-@param[in] b Pointer to record+ref in second record.
-@return Return value is SIGN(first_rec - second_rec)
-@retval 0 Keys are equal.
-@retval -1 second_rec is greater than first_rec.
-@retval +1 first_rec is greater than second_rec. */
-int
-ha_innopart::key_and_rowid_cmp(
- KEY** key_info,
- uchar *a,
- uchar *b)
-{
- int cmp = key_rec_cmp(key_info, a, b);
- if (cmp != 0) {
- return(cmp);
- }
-
- /* We must compare by rowid, which is added before the record,
- in the priority queue. */
-
- return(memcmp(a - DATA_ROW_ID_LEN, b - DATA_ROW_ID_LEN,
- DATA_ROW_ID_LEN));
-}
-
-/** Extra hints from MySQL.
-@param[in] operation Operation hint.
-@return 0 or error number. */
-int
-ha_innopart::extra(
- enum ha_extra_function operation)
-{
- if (operation == HA_EXTRA_SECONDARY_SORT_ROWID) {
- /* index_init(sorted=true) must have been called! */
- ut_ad(m_ordered);
- ut_ad(m_ordered_rec_buffer != NULL);
- /* No index_read call must have been done! */
- ut_ad(m_queue->empty());
-
- /* If not PK is set as secondary sort, do secondary sort by
- rowid/ref. */
-
- ut_ad(m_curr_key_info[1] != NULL
- || m_prebuilt->clust_index_was_generated != 0
- || m_curr_key_info[0]
- == table->key_info + table->s->primary_key);
-
- if (m_curr_key_info[1] == NULL
- && m_prebuilt->clust_index_was_generated) {
- m_ref_usage = Partition_helper::REF_USED_FOR_SORT;
- m_queue->m_fun = key_and_rowid_cmp;
- }
- return(0);
- }
- return(ha_innobase::extra(operation));
-}
-
-/** Delete all rows in a partition.
-@return 0 or error number. */
-int
-ha_innopart::truncate_partition_low()
-{
- return(truncate());
-}
-
-/** Deletes all rows of a partitioned InnoDB table.
-@return 0 or error number. */
-int
-ha_innopart::truncate()
-{
- dberr_t err = DB_SUCCESS;
- int error;
-
- DBUG_ENTER("ha_innopart::truncate");
-
- if (high_level_read_only) {
- DBUG_RETURN(HA_ERR_TABLE_READONLY);
- }
-
- /* TRUNCATE also means resetting auto_increment. Hence, reset
- it so that it will be initialized again at the next use. */
-
- if (table->found_next_number_field != NULL) {
- lock_auto_increment();
- m_part_share->next_auto_inc_val= 0;
- m_part_share->auto_inc_initialized= false;
- unlock_auto_increment();
- }
-
- /* Get the transaction associated with the current thd, or create one
- if not yet created, and update m_prebuilt->trx. */
-
- update_thd(ha_thd());
-
- if (!trx_is_started(m_prebuilt->trx)) {
- ++m_prebuilt->trx->will_lock;
- }
- /* Truncate the table in InnoDB. */
-
- for (uint i = m_part_info->get_first_used_partition();
- i < m_tot_parts;
- i = m_part_info->get_next_used_partition(i)) {
-
- set_partition(i);
- err = row_truncate_table_for_mysql(m_prebuilt->table,
- m_prebuilt->trx);
- update_partition(i);
- if (err != DB_SUCCESS) {
- break;
- }
- }
-
- switch (err) {
-
- case DB_TABLESPACE_DELETED:
- case DB_TABLESPACE_NOT_FOUND:
- ib_senderrf(
- m_prebuilt->trx->mysql_thd, IB_LOG_LEVEL_ERROR,
- (err == DB_TABLESPACE_DELETED ?
- ER_TABLESPACE_DISCARDED : ER_TABLESPACE_MISSING),
- table->s->table_name.str);
- table->status = STATUS_NOT_FOUND;
- error = HA_ERR_NO_SUCH_TABLE;
- break;
-
- default:
- error = convert_error_code_to_mysql(
- err, m_prebuilt->table->flags,
- m_prebuilt->trx->mysql_thd);
- table->status = STATUS_NOT_FOUND;
- break;
- }
- DBUG_RETURN(error);
-}
-
-/** Estimates the number of index records in a range.
-@param[in] keynr Index number.
-@param[in] min_key Start key value (or NULL).
-@param[in] max_key End key value (or NULL).
-@return estimated number of rows. */
-ha_rows
-ha_innopart::records_in_range(
- uint keynr,
- key_range* min_key,
- key_range* max_key)
-{
- KEY* key;
- dict_index_t* index;
- dtuple_t* range_start;
- dtuple_t* range_end;
- int64_t n_rows = 0;
- page_cur_mode_t mode1;
- page_cur_mode_t mode2;
- mem_heap_t* heap;
- uint part_id;
-
- DBUG_ENTER("ha_innopart::records_in_range");
- DBUG_PRINT("info", ("keynr %u min %p max %p", keynr, min_key, max_key));
-
- ut_a(m_prebuilt->trx == thd_to_trx(ha_thd()));
-
- m_prebuilt->trx->op_info = (char*)"estimating records in index range";
-
- active_index = keynr;
-
- key = table->key_info + active_index;
-
- part_id = m_part_info->get_first_used_partition();
- if (part_id == MY_BIT_NONE) {
- DBUG_RETURN(0);
- }
- /* This also sets m_prebuilt->index! */
- set_partition(part_id);
- index = m_prebuilt->index;
-
- /* Only validate the first partition, to avoid too much overhead. */
-
- /* There exists possibility of not being able to find requested
- index due to inconsistency between MySQL and InoDB dictionary info.
- Necessary message should have been printed in innopart_get_index(). */
- if (index == NULL
- || dict_table_is_discarded(m_prebuilt->table)
- || dict_index_is_corrupted(index)
- || !row_merge_is_index_usable(m_prebuilt->trx, index)) {
-
- n_rows = HA_POS_ERROR;
- goto func_exit;
- }
-
- heap = mem_heap_create(2 * (key->actual_key_parts * sizeof(dfield_t)
- + sizeof(dtuple_t)));
-
- range_start = dtuple_create(heap, key->actual_key_parts);
- dict_index_copy_types(range_start, index, key->actual_key_parts);
-
- range_end = dtuple_create(heap, key->actual_key_parts);
- dict_index_copy_types(range_end, index, key->actual_key_parts);
-
- row_sel_convert_mysql_key_to_innobase(
- range_start,
- m_prebuilt->srch_key_val1,
- m_prebuilt->srch_key_val_len,
- index,
- (byte*) (min_key ? min_key->key : (const uchar*) 0),
- (ulint) (min_key ? min_key->length : 0),
- m_prebuilt->trx);
-
- ut_ad(min_key != NULL
- ? range_start->n_fields > 0
- : range_start->n_fields == 0);
-
- row_sel_convert_mysql_key_to_innobase(
- range_end,
- m_prebuilt->srch_key_val2,
- m_prebuilt->srch_key_val_len,
- index,
- (byte*) (max_key != NULL ? max_key->key : (const uchar*) 0),
- (ulint) (max_key != NULL ? max_key->length : 0),
- m_prebuilt->trx);
-
- ut_ad(max_key != NULL
- ? range_end->n_fields > 0
- : range_end->n_fields == 0);
-
- mode1 = convert_search_mode_to_innobase(min_key ? min_key->flag :
- HA_READ_KEY_EXACT);
- mode2 = convert_search_mode_to_innobase(max_key ? max_key->flag :
- HA_READ_KEY_EXACT);
-
- if (mode1 != PAGE_CUR_UNSUPP && mode2 != PAGE_CUR_UNSUPP) {
-
- n_rows = btr_estimate_n_rows_in_range(index, range_start,
- mode1, range_end,
- mode2);
- DBUG_PRINT("info", ("part_id %u rows %ld", part_id,
- (long int) n_rows));
- for (part_id = m_part_info->get_next_used_partition(part_id);
- part_id < m_tot_parts;
- part_id = m_part_info->get_next_used_partition(part_id)) {
-
- index = m_part_share->get_index(part_id, keynr);
- int64_t n = btr_estimate_n_rows_in_range(index,
- range_start,
- mode1,
- range_end,
- mode2);
- n_rows += n;
- DBUG_PRINT("info", ("part_id %u rows %ld (%ld)",
- part_id,
- (long int) n,
- (long int) n_rows));
- }
- } else {
-
- n_rows = HA_POS_ERROR;
- }
-
- mem_heap_free(heap);
-
-func_exit:
-
- m_prebuilt->trx->op_info = (char*)"";
-
- /* The MySQL optimizer seems to believe an estimate of 0 rows is
- always accurate and may return the result 'Empty set' based on that.
- The accuracy is not guaranteed, and even if it were, for a locking
- read we should anyway perform the search to set the next-key lock.
- Add 1 to the value to make sure MySQL does not make the assumption! */
-
- if (n_rows == 0) {
- n_rows = 1;
- }
-
- DBUG_RETURN((ha_rows) n_rows);
-}
-
-/** Gives an UPPER BOUND to the number of rows in a table.
-This is used in filesort.cc.
-@return upper bound of rows. */
-ha_rows
-ha_innopart::estimate_rows_upper_bound()
-{
- const dict_index_t* index;
- ulonglong estimate = 0;
- ulonglong local_data_file_length;
- ulint stat_n_leaf_pages;
-
- DBUG_ENTER("ha_innopart::estimate_rows_upper_bound");
-
- /* We do not know if MySQL can call this function before calling
- external_lock(). To be safe, update the thd of the current table
- handle. */
-
- update_thd(ha_thd());
-
- m_prebuilt->trx->op_info = "calculating upper bound for table rows";
-
- for (uint i = m_part_info->get_first_used_partition();
- i < m_tot_parts;
- i = m_part_info->get_next_used_partition(i)) {
-
- m_prebuilt->table = m_part_share->get_table_part(i);
- index = dict_table_get_first_index(m_prebuilt->table);
-
- stat_n_leaf_pages = index->stat_n_leaf_pages;
-
- ut_a(stat_n_leaf_pages > 0);
-
- local_data_file_length =
- ((ulonglong) stat_n_leaf_pages) * UNIV_PAGE_SIZE;
-
- /* Calculate a minimum length for a clustered index record
- and from that an upper bound for the number of rows.
- Since we only calculate new statistics in row0mysql.cc when a
- table has grown by a threshold factor,
- we must add a safety factor 2 in front of the formula below. */
-
- estimate += 2 * local_data_file_length
- / dict_index_calc_min_rec_len(index);
- }
-
- m_prebuilt->trx->op_info = "";
-
- DBUG_RETURN((ha_rows) estimate);
-}
-
-/** Time estimate for full table scan.
-How many seeks it will take to read through the table. This is to be
-comparable to the number returned by records_in_range so that we can
-decide if we should scan the table or use keys.
-@return estimated time measured in disk seeks. */
-double
-ha_innopart::scan_time()
-{
- double scan_time = 0.0;
- DBUG_ENTER("ha_innopart::scan_time");
-
- for (uint i = m_part_info->get_first_used_partition();
- i < m_tot_parts;
- i = m_part_info->get_next_used_partition(i)) {
- m_prebuilt->table = m_part_share->get_table_part(i);
- scan_time += ha_innobase::scan_time();
- }
- DBUG_RETURN(scan_time);
-}
-
-/** Updates the statistics for one partition (table).
-@param[in] table Table to update the statistics for.
-@param[in] is_analyze True if called from ::analyze().
-@return error code. */
-static
-int
-update_table_stats(
- dict_table_t* table,
- bool is_analyze)
-{
- dict_stats_upd_option_t opt;
- dberr_t ret;
-
- if (dict_stats_is_persistent_enabled(table)) {
- if (is_analyze) {
- opt = DICT_STATS_RECALC_PERSISTENT;
- } else {
- /* This is e.g. 'SHOW INDEXES',
- fetch the persistent stats from disk. */
- opt = DICT_STATS_FETCH_ONLY_IF_NOT_IN_MEMORY;
- }
- } else {
- opt = DICT_STATS_RECALC_TRANSIENT;
- }
-
- ut_ad(!mutex_own(&dict_sys->mutex));
- ret = dict_stats_update(table, opt);
-
- if (ret != DB_SUCCESS) {
- return(HA_ERR_GENERIC);
- }
- return(0);
-}
-
-/** Updates and return statistics.
-Returns statistics information of the table to the MySQL interpreter,
-in various fields of the handle object.
-@param[in] flag Flags for what to update and return.
-@param[in] is_analyze True if called from ::analyze().
-@return HA_ERR_* error code or 0. */
-int
-ha_innopart::info_low(
- uint flag,
- bool is_analyze)
-{
- dict_table_t* ib_table;
- ib_uint64_t max_rows = 0;
- uint biggest_partition = 0;
- int error = 0;
-
- DBUG_ENTER("ha_innopart::info_low");
-
- /* If we are forcing recovery at a high level, we will suppress
- statistics calculation on tables, because that may crash the
- server if an index is badly corrupted. */
-
- /* We do not know if MySQL can call this function before calling
- external_lock(). To be safe, update the thd of the current table
- handle. */
-
- update_thd(ha_thd());
-
- m_prebuilt->trx->op_info = "returning various info to MySQL";
-
- ut_ad(m_part_share->get_table_part(0)->n_ref_count > 0);
-
- if ((flag & HA_STATUS_TIME) != 0) {
- stats.update_time = 0;
-
- if (is_analyze) {
- /* Only analyze the given partitions. */
- int error = set_altered_partitions();
- if (error != 0) {
- /* Already checked in mysql_admin_table! */
- ut_ad(0);
- DBUG_RETURN(error);
- }
- }
- if (is_analyze || innobase_stats_on_metadata) {
- m_prebuilt->trx->op_info = "updating table statistics";
- }
-
- /* TODO: Only analyze the PK for all partitions,
- then the secondary indexes only for the largest partition! */
- for (uint i = m_part_info->get_first_used_partition();
- i < m_tot_parts;
- i = m_part_info->get_next_used_partition(i)) {
-
- ib_table = m_part_share->get_table_part(i);
- if (is_analyze || innobase_stats_on_metadata) {
- error = update_table_stats(ib_table, is_analyze);
- if (error != 0) {
- m_prebuilt->trx->op_info = "";
- DBUG_RETURN(error);
- }
- }
- set_if_bigger(stats.update_time,
- (ulong) ib_table->update_time);
- }
-
- if (is_analyze || innobase_stats_on_metadata) {
- m_prebuilt->trx->op_info =
- "returning various info to MySQL";
- }
- }
-
- if ((flag & HA_STATUS_VARIABLE) != 0) {
-
- /* TODO: If this is called after pruning, then we could
- also update the statistics according to the non-pruned
- partitions, by allocating new rec_per_key on the TABLE,
- instead of using the info from the TABLE_SHARE. */
- ulint stat_clustered_index_size = 0;
- ulint stat_sum_of_other_index_sizes = 0;
- ib_uint64_t n_rows = 0;
- ulint avail_space = 0;
- bool checked_sys_tablespace = false;
-
- if ((flag & HA_STATUS_VARIABLE_EXTRA) != 0) {
- stats.delete_length = 0;
- }
-
- for (uint i = m_part_info->get_first_used_partition();
- i < m_tot_parts;
- i = m_part_info->get_next_used_partition(i)) {
-
- ib_table = m_part_share->get_table_part(i);
- if ((flag & HA_STATUS_NO_LOCK) == 0) {
- dict_table_stats_lock(ib_table, RW_S_LATCH);
- }
-
- ut_a(ib_table->stat_initialized);
-
- n_rows += ib_table->stat_n_rows;
- if (ib_table->stat_n_rows > max_rows) {
- max_rows = ib_table->stat_n_rows;
- biggest_partition = i;
- }
-
- stat_clustered_index_size +=
- ib_table->stat_clustered_index_size;
-
- stat_sum_of_other_index_sizes +=
- ib_table->stat_sum_of_other_index_sizes;
-
- if ((flag & HA_STATUS_NO_LOCK) == 0) {
- dict_table_stats_unlock(ib_table, RW_S_LATCH);
- }
-
- if ((flag & HA_STATUS_VARIABLE_EXTRA) != 0
- && (flag & HA_STATUS_NO_LOCK) == 0
- && srv_force_recovery < SRV_FORCE_NO_IBUF_MERGE
- && avail_space != ULINT_UNDEFINED) {
-
- /* Only count system tablespace once! */
- if (is_system_tablespace(ib_table->space)) {
- if (checked_sys_tablespace) {
- continue;
- }
- checked_sys_tablespace = true;
- }
-
- uintmax_t space =
- fsp_get_available_space_in_free_extents(
- ib_table->space);
- if (space == UINTMAX_MAX) {
- THD* thd = ha_thd();
- const char* table_name
- = ib_table->name.m_name;
-
- push_warning_printf(
- thd,
- Sql_condition::SL_WARNING,
- ER_CANT_GET_STAT,
- "InnoDB: Trying to get the"
- " free space for partition %s"
- " but its tablespace has been"
- " discarded or the .ibd file"
- " is missing. Setting the free"
- " space of the partition to"
- " zero.",
- ut_get_name(
- m_prebuilt->trx,
- table_name).c_str());
- } else {
- avail_space +=
- static_cast<ulint>(space);
- }
- }
- }
-
- /*
- The MySQL optimizer seems to assume in a left join that n_rows
- is an accurate estimate if it is zero. Of course, it is not,
- since we do not have any locks on the rows yet at this phase.
- Since SHOW TABLE STATUS seems to call this function with the
- HA_STATUS_TIME flag set, while the left join optimizer does not
- set that flag, we add one to a zero value if the flag is not
- set. That way SHOW TABLE STATUS will show the best estimate,
- while the optimizer never sees the table empty. */
-
- if (n_rows == 0 && (flag & HA_STATUS_TIME) == 0) {
- n_rows++;
- }
-
- /* Fix bug#40386: Not flushing query cache after truncate.
- n_rows can not be 0 unless the table is empty, set to 1
- instead. The original problem of bug#29507 is actually
- fixed in the server code. */
- if (thd_sql_command(m_user_thd) == SQLCOM_TRUNCATE) {
-
- n_rows = 1;
-
- /* We need to reset the m_prebuilt value too, otherwise
- checks for values greater than the last value written
- to the table will fail and the autoinc counter will
- not be updated. This will force write_row() into
- attempting an update of the table's AUTOINC counter. */
-
- m_prebuilt->autoinc_last_value = 0;
- }
-
- /* Take page_size from first partition. */
- ib_table = m_part_share->get_table_part(0);
- const page_size_t& page_size =
- dict_table_page_size(ib_table);
-
- stats.records = (ha_rows) n_rows;
- stats.deleted = 0;
- stats.data_file_length =
- ((ulonglong) stat_clustered_index_size)
- * page_size.physical();
- stats.index_file_length =
- ((ulonglong) stat_sum_of_other_index_sizes)
- * page_size.physical();
-
- /* See ha_innobase::info_low() for comments! */
- if ((flag & HA_STATUS_NO_LOCK) == 0
- && (flag & HA_STATUS_VARIABLE_EXTRA) != 0
- && srv_force_recovery < SRV_FORCE_NO_IBUF_MERGE) {
- stats.delete_length = avail_space * 1024;
- }
-
- stats.check_time = 0;
- stats.mrr_length_per_rec = ref_length + sizeof(void*)
- - PARTITION_BYTES_IN_POS;
-
- if (stats.records == 0) {
- stats.mean_rec_length = 0;
- } else {
- stats.mean_rec_length = (ulong)
- (stats.data_file_length / stats.records);
- }
- }
-
- if ((flag & HA_STATUS_CONST) != 0) {
- /* Find max rows and biggest partition. */
- for (uint i = 0; i < m_tot_parts; i++) {
- /* Skip partitions from above. */
- if ((flag & HA_STATUS_VARIABLE) == 0
- || !bitmap_is_set(&(m_part_info->read_partitions),
- i)) {
-
- ib_table = m_part_share->get_table_part(i);
- if (ib_table->stat_n_rows > max_rows) {
- max_rows = ib_table->stat_n_rows;
- biggest_partition = i;
- }
- }
- }
- ib_table = m_part_share->get_table_part(biggest_partition);
- /* Verify the number of index in InnoDB and MySQL
- matches up. If m_prebuilt->clust_index_was_generated
- holds, InnoDB defines GEN_CLUST_INDEX internally. */
- ulint num_innodb_index = UT_LIST_GET_LEN(ib_table->indexes)
- - m_prebuilt->clust_index_was_generated;
- if (table->s->keys < num_innodb_index) {
- /* If there are too many indexes defined
- inside InnoDB, ignore those that are being
- created, because MySQL will only consider
- the fully built indexes here. */
-
- for (const dict_index_t* index =
- UT_LIST_GET_FIRST(ib_table->indexes);
- index != NULL;
- index = UT_LIST_GET_NEXT(indexes, index)) {
-
- /* First, online index creation is
- completed inside InnoDB, and then
- MySQL attempts to upgrade the
- meta-data lock so that it can rebuild
- the .frm file. If we get here in that
- time frame, dict_index_is_online_ddl()
- would not hold and the index would
- still not be included in TABLE_SHARE. */
- if (!index->is_committed()) {
- num_innodb_index--;
- }
- }
-
- if (table->s->keys < num_innodb_index
- && (innobase_fts_check_doc_id_index(ib_table,
- NULL, NULL)
- == FTS_EXIST_DOC_ID_INDEX)) {
- num_innodb_index--;
- }
- }
-
- if (table->s->keys != num_innodb_index) {
- ib::error() << "Table "
- << ib_table->name << " contains "
- << num_innodb_index
- << " indexes inside InnoDB, which"
- " is different from the number of"
- " indexes " << table->s->keys
- << " defined in the MySQL";
- }
-
- if ((flag & HA_STATUS_NO_LOCK) == 0) {
- dict_table_stats_lock(ib_table, RW_S_LATCH);
- }
-
- ut_a(ib_table->stat_initialized);
-
- for (ulong i = 0; i < table->s->keys; i++) {
- ulong j;
- /* We could get index quickly through internal
- index mapping with the index translation table.
- The identity of index (match up index name with
- that of table->key_info[i]) is already verified in
- innopart_get_index(). */
- dict_index_t* index = innopart_get_index(
- biggest_partition, i);
-
- if (index == NULL) {
- ib::error() << "Table "
- << ib_table->name << " contains fewer"
- " indexes inside InnoDB than"
- " are defined in the MySQL"
- " .frm file. Have you mixed up"
- " .frm files from different"
- " installations? "
- << TROUBLESHOOTING_MSG;
- break;
- }
-
- KEY* key = &table->key_info[i];
- for (j = 0;
- j < key->actual_key_parts;
- j++) {
-
- if ((key->flags & HA_FULLTEXT) != 0) {
- /* The whole concept has no validity
- for FTS indexes. */
- key->rec_per_key[j] = 1;
- continue;
- }
-
- if ((j + 1) > index->n_uniq) {
- ib::error() << "Index " << index->name
- << " of " << ib_table->name
- << " has " << index->n_uniq
- << " columns unique inside"
- " InnoDB, but MySQL is"
- " asking statistics for "
- << j + 1 << " columns. Have"
- " you mixed up .frm files"
- " from different"
- " installations? "
- << TROUBLESHOOTING_MSG;
- break;
- }
-
- /* innodb_rec_per_key() will use
- index->stat_n_diff_key_vals[] and the value we
- pass index->table->stat_n_rows. Both are
- calculated by ANALYZE and by the background
- stats gathering thread (which kicks in when too
- much of the table has been changed). In
- addition table->stat_n_rows is adjusted with
- each DML (e.g. ++ on row insert). Those
- adjustments are not MVCC'ed and not even
- reversed on rollback. So,
- index->stat_n_diff_key_vals[] and
- index->table->stat_n_rows could have been
- calculated at different time. This is
- acceptable. */
- const rec_per_key_t rec_per_key =
- innodb_rec_per_key(
- index, j,
- max_rows);
-
- key->set_records_per_key(j, rec_per_key);
-
- /* The code below is legacy and should be
- removed together with this comment once we
- are sure the new floating point rec_per_key,
- set via set_records_per_key(), works fine. */
-
- ulong rec_per_key_int = static_cast<ulong>(
- innodb_rec_per_key(index, j,
- max_rows));
-
- /* Since MySQL seems to favor table scans
- too much over index searches, we pretend
- index selectivity is 2 times better than
- our estimate: */
-
- rec_per_key_int = rec_per_key_int / 2;
-
- if (rec_per_key_int == 0) {
- rec_per_key_int = 1;
- }
-
- key->rec_per_key[j] = rec_per_key_int;
- }
- }
-
- if ((flag & HA_STATUS_NO_LOCK) == 0) {
- dict_table_stats_unlock(ib_table, RW_S_LATCH);
- }
-
- char path[FN_REFLEN];
- os_file_stat_t stat_info;
- /* Use the first partition for create time until new DD. */
- ib_table = m_part_share->get_table_part(0);
- my_snprintf(path, sizeof(path), "%s/%s%s",
- mysql_data_home,
- table->s->normalized_path.str,
- reg_ext);
-
- unpack_filename(path,path);
-
- if (os_file_get_status(path, &stat_info, false, true) == DB_SUCCESS) {
- stats.create_time = (ulong) stat_info.ctime;
- }
- }
-
- if (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE) {
-
- goto func_exit;
- }
-
- if ((flag & HA_STATUS_ERRKEY) != 0) {
- const dict_index_t* err_index;
-
- ut_a(m_prebuilt->trx);
- ut_a(m_prebuilt->trx->magic_n == TRX_MAGIC_N);
-
- err_index = trx_get_error_info(m_prebuilt->trx);
-
- if (err_index != NULL) {
- errkey = m_part_share->get_mysql_key(m_last_part,
- err_index);
- } else {
- errkey = (unsigned int) (
- (m_prebuilt->trx->error_key_num
- == ULINT_UNDEFINED)
- ? UINT_MAX
- : m_prebuilt->trx->error_key_num);
- }
- }
-
- if ((flag & HA_STATUS_AUTO) != 0) {
- /* auto_inc is only supported in first key for InnoDB! */
- ut_ad(table_share->next_number_keypart == 0);
- DBUG_PRINT("info", ("HA_STATUS_AUTO"));
- if (table->found_next_number_field == NULL) {
- stats.auto_increment_value = 0;
- } else {
- /* Lock to avoid two concurrent initializations. */
- lock_auto_increment();
- if (m_part_share->auto_inc_initialized) {
- stats.auto_increment_value =
- m_part_share->next_auto_inc_val;
- } else {
- /* The auto-inc mutex in the table_share is
- locked, so we do not need to have the handlers
- locked. */
-
- error = initialize_auto_increment(
- (flag & HA_STATUS_NO_LOCK) != 0);
- stats.auto_increment_value =
- m_part_share->next_auto_inc_val;
- }
- unlock_auto_increment();
- }
- }
-
-func_exit:
- m_prebuilt->trx->op_info = (char*)"";
-
- DBUG_RETURN(error);
-}
-
-/** Optimize table.
-This is mapped to "ALTER TABLE tablename ENGINE=InnoDB", which rebuilds
-the table in MySQL.
-@param[in] thd Connection thread handle.
-@param[in] check_opt Currently ignored.
-@return 0 for success else error code. */
-int
-ha_innopart::optimize(
- THD* thd,
- HA_CHECK_OPT* check_opt)
-{
- return(HA_ADMIN_TRY_ALTER);
-}
-
-/** Checks a partitioned table.
-Tries to check that an InnoDB table is not corrupted. If corruption is
-noticed, prints to stderr information about it. In case of corruption
-may also assert a failure and crash the server. Also checks for records
-in wrong partition.
-@param[in] thd MySQL THD object/thread handle.
-@param[in] check_opt Check options.
-@return HA_ADMIN_CORRUPT or HA_ADMIN_OK. */
-int
-ha_innopart::check(
- THD* thd,
- HA_CHECK_OPT* check_opt)
-{
- uint error = HA_ADMIN_OK;
- uint i;
-
- DBUG_ENTER("ha_innopart::check");
- /* TODO: Enhance this to:
- - Every partition has the same structure.
- - The names are correct (partition names checked in ::open()?)
- Currently it only does normal InnoDB check of each partition. */
-
- if (set_altered_partitions()) {
- ut_ad(0); // Already checked by set_part_state()!
- DBUG_RETURN(HA_ADMIN_INVALID);
- }
- for (i = m_part_info->get_first_used_partition();
- i < m_tot_parts;
- i = m_part_info->get_next_used_partition(i)) {
-
- m_prebuilt->table = m_part_share->get_table_part(i);
- error = ha_innobase::check(thd, check_opt);
- if (error != 0) {
- break;
- }
- if ((check_opt->flags & (T_MEDIUM | T_EXTEND)) != 0) {
- error = Partition_helper::check_misplaced_rows(i, false);
- if (error != 0) {
- break;
- }
- }
- }
- if (error != 0) {
- print_admin_msg(
- thd,
- 256,
- "error",
- table_share->db.str,
- table->alias,
- "check",
- m_is_sub_partitioned ?
- "Subpartition %s returned error"
- : "Partition %s returned error",
- m_part_share->get_partition_name(i));
- }
-
- DBUG_RETURN(error);
-}
-
-/** Repair a partitioned table.
-Only repairs records in wrong partitions (moves them to the correct
-partition or deletes them if not in any partition).
-@param[in] thd MySQL THD object/thread handle.
-@param[in] repair_opt Repair options.
-@return 0 or error code. */
-int
-ha_innopart::repair(
- THD* thd,
- HA_CHECK_OPT* repair_opt)
-{
- uint error = HA_ADMIN_OK;
-
- DBUG_ENTER("ha_innopart::repair");
-
- /* TODO: enable this warning to be clear about what is repaired.
- Currently disabled to generate smaller test diffs. */
-#ifdef ADD_WARNING_FOR_REPAIR_ONLY_PARTITION
- push_warning_printf(thd, Sql_condition::SL_WARNING,
- ER_ILLEGAL_HA,
- "Only moving rows from wrong partition to correct"
- " partition is supported,"
- " repairing InnoDB indexes is not yet supported!");
-#endif
-
- /* Only repair partitions for MEDIUM or EXTENDED options. */
- if ((repair_opt->flags & (T_MEDIUM | T_EXTEND)) == 0) {
- DBUG_RETURN(HA_ADMIN_OK);
- }
- if (set_altered_partitions()) {
- ut_ad(0); // Already checked by set_part_state()!
- DBUG_RETURN(HA_ADMIN_INVALID);
- }
- for (uint i = m_part_info->get_first_used_partition();
- i < m_tot_parts;
- i = m_part_info->get_next_used_partition(i)) {
-
- /* TODO: Implement and use ha_innobase::repair()! */
- error = Partition_helper::check_misplaced_rows(i, true);
- if (error != 0) {
- print_admin_msg(
- thd,
- 256,
- "error",
- table_share->db.str,
- table->alias,
- "repair",
- m_is_sub_partitioned ?
- "Subpartition %s returned error"
- : "Partition %s returned error",
- m_part_share->get_partition_name(i));
- break;
- }
- }
-
- DBUG_RETURN(error);
-}
-
-/** Check if possible to switch engine (no foreign keys).
-Checks if ALTER TABLE may change the storage engine of the table.
-Changing storage engines is not allowed for tables for which there
-are foreign key constraints (parent or child tables).
-@return true if can switch engines. */
-bool
-ha_innopart::can_switch_engines()
-{
- bool can_switch;
-
- DBUG_ENTER("ha_innopart::can_switch_engines");
- can_switch = ha_innobase::can_switch_engines();
- ut_ad(can_switch);
-
- DBUG_RETURN(can_switch);
-}
-
-/** Checks if a table is referenced by a foreign key.
-The MySQL manual states that a REPLACE is either equivalent to an INSERT,
-or DELETE(s) + INSERT. Only a delete is then allowed internally to resolve
-a duplicate key conflict in REPLACE, not an update.
-@return > 0 if referenced by a FOREIGN KEY. */
-uint
-ha_innopart::referenced_by_foreign_key()
-{
- if (dict_table_is_referenced_by_foreign_key(m_prebuilt->table)) {
-
-#ifndef HA_INNOPART_SUPPORTS_FOREIGN_KEYS
- ut_ad(0);
-#endif /* HA_INNOPART_SUPPORTS_FOREIGN_KEYS */
- return(1);
- }
-
- return(0);
-}
-
-/** Start statement.
-MySQL calls this function at the start of each SQL statement inside LOCK
-TABLES. Inside LOCK TABLES the ::external_lock method does not work to
-mark SQL statement borders. Note also a special case: if a temporary table
-is created inside LOCK TABLES, MySQL has not called external_lock() at all
-on that table.
-MySQL-5.0 also calls this before each statement in an execution of a stored
-procedure. To make the execution more deterministic for binlogging, MySQL-5.0
-locks all tables involved in a stored procedure with full explicit table
-locks (thd_in_lock_tables(thd) holds in store_lock()) before executing the
-procedure.
-@param[in] thd Handle to the user thread.
-@param[in] lock_type Lock type.
-@return 0 or error code. */
-int
-ha_innopart::start_stmt(
- THD* thd,
- thr_lock_type lock_type)
-{
- int error = 0;
-
- if (m_part_info->get_first_used_partition() == MY_BIT_NONE) {
- /* All partitions pruned away, do nothing! */
- return(error);
- }
-
- error = ha_innobase::start_stmt(thd, lock_type);
- if (m_prebuilt->sql_stat_start) {
- memset(m_sql_stat_start_parts, 0xff,
- UT_BITS_IN_BYTES(m_tot_parts));
- } else {
- memset(m_sql_stat_start_parts, 0,
- UT_BITS_IN_BYTES(m_tot_parts));
- }
- return(error);
-}
-
-/** Function to store lock for all partitions in native partitioned table. Also
-look at ha_innobase::store_lock for more details.
-@param[in] thd user thread handle
-@param[in] to pointer to the current element in an array of
-pointers to lock structs
-@param[in] lock_type lock type to store in 'lock'; this may also be
-TL_IGNORE
-@retval to pointer to the current element in the 'to' array */
-THR_LOCK_DATA**
-ha_innopart::store_lock(
- THD* thd,
- THR_LOCK_DATA** to,
- thr_lock_type lock_type)
-{
- trx_t* trx = m_prebuilt->trx;
- const uint sql_command = thd_sql_command(thd);
-
- ha_innobase::store_lock(thd, to, lock_type);
-
- if (sql_command == SQLCOM_FLUSH
- && lock_type == TL_READ_NO_INSERT) {
- for (uint i = 1; i < m_tot_parts; i++) {
- dict_table_t* table = m_part_share->get_table_part(i);
-
- dberr_t err = row_quiesce_set_state(
- table, QUIESCE_START, trx);
- ut_a(err == DB_SUCCESS || err == DB_UNSUPPORTED);
- }
- }
-
- return to;
-}
-
-/** Lock/prepare to lock table.
-As MySQL will execute an external lock for every new table it uses when it
-starts to process an SQL statement (an exception is when MySQL calls
-start_stmt for the handle) we can use this function to store the pointer to
-the THD in the handle. We will also use this function to communicate
-to InnoDB that a new SQL statement has started and that we must store a
-savepoint to our transaction handle, so that we are able to roll back
-the SQL statement in case of an error.
-@param[in] thd Handle to the user thread.
-@param[in] lock_type Lock type.
-@return 0 or error number. */
-int
-ha_innopart::external_lock(
- THD* thd,
- int lock_type)
-{
- int error = 0;
-
- if (m_part_info->get_first_used_partition() == MY_BIT_NONE
- && !(m_mysql_has_locked
- && lock_type == F_UNLCK)) {
-
- /* All partitions pruned away, do nothing! */
- ut_ad(!m_mysql_has_locked);
- return(error);
- }
- ut_ad(m_mysql_has_locked || lock_type != F_UNLCK);
-
- m_prebuilt->table = m_part_share->get_table_part(0);
- error = ha_innobase::external_lock(thd, lock_type);
-
- for (uint i = 0; i < m_tot_parts; i++) {
- dict_table_t* table = m_part_share->get_table_part(i);
-
- switch (table->quiesce) {
- case QUIESCE_START:
- /* Check for FLUSH TABLE t WITH READ LOCK */
- if (!srv_read_only_mode
- && thd_sql_command(thd) == SQLCOM_FLUSH
- && lock_type == F_RDLCK) {
-
- ut_ad(table->quiesce == QUIESCE_START);
-
- row_quiesce_table_start(table,
- m_prebuilt->trx);
-
- /* Use the transaction instance to track
- UNLOCK TABLES. It can be done via START
- TRANSACTION; too implicitly. */
-
- ++m_prebuilt->trx->flush_tables;
- }
- break;
-
- case QUIESCE_COMPLETE:
- /* Check for UNLOCK TABLES; implicit or explicit
- or trx interruption. */
- if (m_prebuilt->trx->flush_tables > 0
- && (lock_type == F_UNLCK
- || trx_is_interrupted(m_prebuilt->trx))) {
-
- ut_ad(table->quiesce == QUIESCE_COMPLETE);
- row_quiesce_table_complete(table,
- m_prebuilt->trx);
-
- ut_a(m_prebuilt->trx->flush_tables > 0);
- --m_prebuilt->trx->flush_tables;
- }
- break;
-
- case QUIESCE_NONE:
- break;
-
- default:
- ut_ad(0);
- }
- }
-
- ut_ad(!m_auto_increment_lock);
- ut_ad(!m_auto_increment_safe_stmt_log_lock);
-
- if (m_prebuilt->sql_stat_start) {
- memset(m_sql_stat_start_parts, 0xff,
- UT_BITS_IN_BYTES(m_tot_parts));
- } else {
- memset(m_sql_stat_start_parts, 0,
- UT_BITS_IN_BYTES(m_tot_parts));
- }
- return(error);
-}
-
-/** Get the current auto_increment value.
-@param[in] offset Table auto-inc offset.
-@param[in] increment Table auto-inc increment.
-@param[in] nb_desired_values Number of required values.
-@param[out] first_value The auto increment value.
-@param[out] nb_reserved_values Number of reserved values.
-@return Auto increment value, or ~0 on failure. */
-void
-ha_innopart::get_auto_increment(
- ulonglong offset,
- ulonglong increment,
- ulonglong nb_desired_values,
- ulonglong* first_value,
- ulonglong* nb_reserved_values)
-{
- DBUG_ENTER("ha_innopart::get_auto_increment");
- if (table_share->next_number_keypart != 0) {
- /* Only first key part allowed as autoinc for InnoDB tables! */
- ut_ad(0);
- *first_value = ULLONG_MAX;
- DBUG_VOID_RETURN;
- }
- get_auto_increment_first_field(
- increment,
- nb_desired_values,
- first_value,
- nb_reserved_values);
- DBUG_VOID_RETURN;
-}
-
-/** Compares two 'refs'.
-A 'ref' is the (internal) primary key value of the row.
-If there is no explicitly declared non-null unique key or a primary key, then
-InnoDB internally uses the row id as the primary key.
-It will use the partition id as secondary compare.
-@param[in] ref1 An (internal) primary key value in the MySQL key value
-format.
-@param[in] ref2 Reference to compare with (same type as ref1).
-@return < 0 if ref1 < ref2, 0 if equal, else > 0. */
-int
-ha_innopart::cmp_ref(
- const uchar* ref1,
- const uchar* ref2)
-{
- int cmp;
-
- cmp = ha_innobase::cmp_ref(ref1 + PARTITION_BYTES_IN_POS,
- ref2 + PARTITION_BYTES_IN_POS);
-
- if (cmp != 0) {
- return(cmp);
- }
-
- cmp = static_cast<int>(uint2korr(ref1))
- - static_cast<int>(uint2korr(ref2));
-
- return(cmp);
-}
-
-/** Prepare for creating new partitions during ALTER TABLE ... PARTITION.
-@param[in] num_partitions Number of new partitions to be created.
-@param[in] only_create True if only creating the partition
-(no open/lock is needed).
-@return 0 for success else error code. */
-int
-ha_innopart::prepare_for_new_partitions(
- uint num_partitions,
- bool only_create)
-{
- m_new_partitions = UT_NEW(Altered_partitions(num_partitions,
- only_create),
- mem_key_partitioning);
- if (m_new_partitions == NULL) {
- return(HA_ERR_OUT_OF_MEM);
- }
- if (m_new_partitions->initialize()) {
- UT_DELETE(m_new_partitions);
- m_new_partitions = NULL;
- return(HA_ERR_OUT_OF_MEM);
- }
- return(0);
-}
-
-/** Create a new partition to be filled during ALTER TABLE ... PARTITION.
-@param[in] table Table to create the partition in.
-@param[in] create_info Table/partition specific create info.
-@param[in] part_name Partition name.
-@param[in] new_part_id Partition id in new table.
-@param[in] part_elem Partition element.
-@return 0 for success else error code. */
-int
-ha_innopart::create_new_partition(
- TABLE* table,
- HA_CREATE_INFO* create_info,
- const char* part_name,
- uint new_part_id,
- partition_element* part_elem)
-{
- int error;
- char norm_name[FN_REFLEN];
- const char* data_file_name_backup = create_info->data_file_name;
- DBUG_ENTER("ha_innopart::create_new_partition");
- /* Delete by ddl_log on failure. */
- normalize_table_name(norm_name, part_name);
- set_create_info_dir(part_elem, create_info);
-
- error = ha_innobase::create(norm_name, table, create_info);
- create_info->data_file_name = data_file_name_backup;
- if (error == HA_ERR_FOUND_DUPP_KEY) {
- DBUG_RETURN(HA_ERR_TABLE_EXIST);
- }
- if (error != 0) {
- DBUG_RETURN(error);
- }
- if (!m_new_partitions->only_create())
- {
- dict_table_t* part;
- part = dict_table_open_on_name(norm_name,
- false,
- true,
- DICT_ERR_IGNORE_NONE);
- if (part == NULL) {
- DBUG_RETURN(HA_ERR_INTERNAL_ERROR);
- }
- m_new_partitions->set_part(new_part_id, part);
- }
- DBUG_RETURN(0);
-}
-
-/** Close and finalize new partitions. */
-void
-ha_innopart::close_new_partitions()
-{
- if (m_new_partitions != NULL) {
- UT_DELETE(m_new_partitions);
- m_new_partitions = NULL;
- }
-}
-
-/** write row to new partition.
-@param[in] new_part New partition to write to.
-@return 0 for success else error code. */
-int
-ha_innopart::write_row_in_new_part(
- uint new_part)
-{
- int result;
- DBUG_ENTER("ha_innopart::write_row_in_new_part");
-
- m_last_part = new_part;
- if (m_new_partitions->part(new_part) == NULL) {
- /* Altered partition contains misplaced row. */
- m_err_rec = table->record[0];
- DBUG_RETURN(HA_ERR_ROW_IN_WRONG_PARTITION);
- }
- m_new_partitions->get_prebuilt(m_prebuilt, new_part);
- result = ha_innobase::write_row(table->record[0]);
- m_new_partitions->set_from_prebuilt(m_prebuilt, new_part);
- DBUG_RETURN(result);
-}
-
-/** Allocate the array to hold blob heaps for all partitions */
-mem_heap_t**
-ha_innopart::alloc_blob_heap_array()
-{
- DBUG_ENTER("ha_innopart::alloc_blob_heap_array");
-
- const ulint len = sizeof(mem_heap_t*) * m_tot_parts;
- m_blob_heap_parts = static_cast<mem_heap_t**>(
- ut_zalloc(len, mem_key_partitioning));
- if (m_blob_heap_parts == NULL) {
- DBUG_RETURN(NULL);
- }
-
- DBUG_RETURN(m_blob_heap_parts);
-}
-
-/** Free the array that holds blob heaps for all partitions */
-void
-ha_innopart::free_blob_heap_array()
-{
- DBUG_ENTER("ha_innopart::free_blob_heap_array");
-
- if (m_blob_heap_parts != NULL) {
- clear_blob_heaps();
- ut_free(m_blob_heap_parts);
- m_blob_heap_parts = NULL;
- }
-
- DBUG_VOID_RETURN;
-}
-
-void
-ha_innopart::clear_blob_heaps()
-{
- DBUG_ENTER("ha_innopart::clear_blob_heaps");
-
- if (m_blob_heap_parts == NULL) {
- DBUG_VOID_RETURN;
- }
-
- for (uint i = 0; i < m_tot_parts; i++) {
- if (m_blob_heap_parts[i] != NULL) {
- DBUG_PRINT("ha_innopart", ("freeing blob_heap: %p",
- m_blob_heap_parts[i]));
- mem_heap_free(m_blob_heap_parts[i]);
- m_blob_heap_parts[i] = NULL;
- }
- }
-
- /* Reset blob_heap in m_prebuilt after freeing all heaps. It is set in
- ha_innopart::set_partition to the blob heap of current partition. */
- m_prebuilt->blob_heap = NULL;
-
- DBUG_VOID_RETURN;
-}
-
-/** Reset state of file to after 'open'. This function is called
-after every statement for all tables used by that statement. */
-int
-ha_innopart::reset()
-{
- DBUG_ENTER("ha_innopart::reset");
-
- clear_blob_heaps();
-
- DBUG_RETURN(ha_innobase::reset());
-}
-
-/****************************************************************************
- * DS-MRR implementation
- ***************************************************************************/
-
-/* TODO: move the default implementations into the base handler class! */
-/* TODO: See if it could be optimized for partitioned tables? */
-/* Use default ha_innobase implementation for now... */
diff --git a/storage/innobase/handler/ha_innopart.h b/storage/innobase/handler/ha_innopart.h
deleted file mode 100644
index 67db9e07150..00000000000
--- a/storage/innobase/handler/ha_innopart.h
+++ /dev/null
@@ -1,1315 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2014, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2016, MariaDB Corporation.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/* The InnoDB Partition handler: the interface between MySQL and InnoDB. */
-
-#ifndef ha_innopart_h
-#define ha_innopart_h
-
-#include "partitioning/partition_handler.h"
-
-/* Forward declarations */
-class Altered_partitions;
-class partition_info;
-
-/** HA_DUPLICATE_POS and HA_READ_BEFORE_WRITE_REMOVAL is not
-set from ha_innobase, but cannot yet be supported in ha_innopart.
-Full text and geometry is not yet supported. */
-const handler::Table_flags HA_INNOPART_DISABLED_TABLE_FLAGS =
- ( HA_CAN_FULLTEXT
- | HA_CAN_FULLTEXT_EXT
- | HA_CAN_GEOMETRY
- | HA_DUPLICATE_POS
- | HA_READ_BEFORE_WRITE_REMOVAL);
-
-/** InnoDB partition specific Handler_share. */
-class Ha_innopart_share : public Partition_share
-{
-private:
- /** Array of all included table definitions (one per partition). */
- dict_table_t** m_table_parts;
-
- /** Instead of INNOBASE_SHARE::idx_trans_tbl. Maps MySQL index number
- to InnoDB index per partition. */
- dict_index_t** m_index_mapping;
-
- /** Total number of partitions. */
- uint m_tot_parts;
-
- /** Number of indexes. */
- uint m_index_count;
-
- /** Reference count. */
- uint m_ref_count;
-
- /** Pointer back to owning TABLE_SHARE. */
- TABLE_SHARE* m_table_share;
-
-public:
- Ha_innopart_share(
- TABLE_SHARE* table_share);
-
- ~Ha_innopart_share();
-
- /** Set innodb table for given partition.
- @param[in] part_id Partition number.
- @param[in] table Table. */
- inline
- void
- set_table_part(
- uint part_id,
- dict_table_t* table)
- {
- ut_ad(m_table_parts != NULL);
- ut_ad(part_id < m_tot_parts);
- m_table_parts[part_id] = table;
- }
-
- /** Return innodb table for given partition.
- @param[in] part_id Partition number.
- @return InnoDB table. */
- inline
- dict_table_t*
- get_table_part(
- uint part_id) const
- {
- ut_ad(m_table_parts != NULL);
- ut_ad(part_id < m_tot_parts);
- return(m_table_parts[part_id]);
- }
-
- /** Return innodb index for given partition and key number.
- @param[in] part_id Partition number.
- @param[in] keynr Key number.
- @return InnoDB index. */
- dict_index_t*
- get_index(
- uint part_id,
- uint keynr);
-
- /** Get MySQL key number corresponding to InnoDB index.
- @param[in] part_id Partition number.
- @param[in] index InnoDB index.
- @return MySQL key number or MAX_KEY if non-existent. */
- uint
- get_mysql_key(
- uint part_id,
- const dict_index_t* index);
-
- /** Initialize the share with table and indexes per partition.
- @param[in] part_info Partition info (partition names to use)
- @param[in] table_name Table name (db/table_name)
- @return false on success else true. */
- bool
- open_table_parts(
- partition_info* part_info,
- const char* table_name);
-
- /** Close the table partitions.
- If all instances are closed, also release the resources. */
- void
- close_table_parts();
-
- /* Static helper functions. */
- /** Fold to lower case if windows or lower_case_table_names == 1.
- @param[in,out] s String to fold.*/
- static
- void
- partition_name_casedn_str(
- char* s);
-
- /** Translate and append partition name.
- @param[out] to String to write in filesystem charset
- @param[in] from Name in system charset
- @param[in] sep Separator
- @param[in] len Max length of to buffer
- @return length of written string. */
- static
- size_t
- append_sep_and_name(
- char* to,
- const char* from,
- const char* sep,
- size_t len);
-
- /** Set up the virtual column template for partition table, and points
- all m_table_parts[]->vc_templ to it.
- @param[in] table MySQL TABLE object
- @param[in] ib_table InnoDB dict_table_t
- @param[in] table_name Table name (db/table_name) */
- void
- set_v_templ(
- TABLE* table,
- dict_table_t* ib_table,
- const char* name);
-
-private:
- /** Disable default constructor. */
- Ha_innopart_share() {};
-
- /** Open one partition (lower lever innodb table).
- @param[in] part_id Partition to open.
- @param[in] partition_name Name of partition.
- @return false on success else true. */
- bool
- open_one_table_part(
- uint part_id,
- const char* partition_name);
-};
-
-/** The class defining a partitioning aware handle to an InnoDB table.
-Based on ha_innobase and extended with
-- Partition_helper for re-using common partitioning functionality
-- Partition_handler for providing partitioning specific api calls.
-Generic partitioning functions are implemented in Partition_helper.
-Lower level storage functions are implemented in ha_innobase.
-Partition_handler is inherited for implementing the handler level interface
-for partitioning specific functions, like change_partitions and
-truncate_partition.
-InnoDB specific functions related to partitioning is implemented here. */
-class ha_innopart:
- public ha_innobase,
- public Partition_helper,
- public Partition_handler
-{
-public:
- ha_innopart(
- handlerton* hton,
- TABLE_SHARE* table_arg);
-
- ~ha_innopart();
-
- /** Clone this handler, used when needing more than one cursor
- to the same table.
- @param[in] name Table name.
- @param[in] mem_root mem_root to allocate from.
- @retval Pointer to clone or NULL if error. */
- handler*
- clone(
- const char* name,
- MEM_ROOT* mem_root);
-
- /** Check and register a table in the query cache.
- Ask InnoDB if a query to a table can be cached.
- @param[in] thd User thread handle.
- @param[in] table_key Normalized path to the table.
- @param[in] key_length Lenght of table_key.
- @param[out] call_back Function pointer for checking if data
- has changed.
- @param[in,out] engine_data Data for call_back (not used).
- @return TRUE if query caching of the table is permitted. */
- my_bool
- register_query_cache_table(
- THD* thd,
- char* table_key,
- size_t key_length,
- qc_engine_callback* call_back,
- ulonglong* engine_data)
- {
- /* Currently this would need to go through every
- [sub] partition in the table to see if any of them has changed.
- See row_search_check_if_query_cache_permitted().
- So disabled until we can avoid check all partitions. */
- return(FALSE);
- }
-
- /** On-line ALTER TABLE interface @see handler0alter.cc @{ */
-
- /** Check if InnoDB supports a particular alter table in-place.
- @param[in] altered_table TABLE object for new version of table.
- @param[in,out] ha_alter_info Structure describing changes to be done
- by ALTER TABLE and holding data used during in-place alter.
- @retval HA_ALTER_INPLACE_NOT_SUPPORTED Not supported
- @retval HA_ALTER_INPLACE_NO_LOCK Supported
- @retval HA_ALTER_INPLACE_SHARED_LOCK_AFTER_PREPARE Supported, but
- requires lock during main phase and exclusive lock during prepare
- phase.
- @retval HA_ALTER_INPLACE_NO_LOCK_AFTER_PREPARE Supported, prepare
- phase requires exclusive lock. */
- enum_alter_inplace_result
- check_if_supported_inplace_alter(
- TABLE* altered_table,
- Alter_inplace_info* ha_alter_info);
-
- /** Prepare in-place ALTER for table.
- Allows InnoDB to update internal structures with concurrent
- writes blocked (provided that check_if_supported_inplace_alter()
- did not return HA_ALTER_INPLACE_NO_LOCK).
- This will be invoked before inplace_alter_table().
- @param[in] altered_table TABLE object for new version of table.
- @param[in,out] ha_alter_info Structure describing changes to be done
- by ALTER TABLE and holding data used during in-place alter.
- @retval true Failure.
- @retval false Success. */
- bool
- prepare_inplace_alter_table(
- TABLE* altered_table,
- Alter_inplace_info* ha_alter_info);
-
- /** Alter the table structure in-place.
- Alter the table structure in-place with operations
- specified using HA_ALTER_FLAGS and Alter_inplace_information.
- The level of concurrency allowed during this operation depends
- on the return value from check_if_supported_inplace_alter().
- @param[in] altered_table TABLE object for new version of table.
- @param[in,out] ha_alter_info Structure describing changes to be done
- by ALTER TABLE and holding data used during in-place alter.
- @retval true Failure.
- @retval false Success. */
- bool
- inplace_alter_table(
- TABLE* altered_table,
- Alter_inplace_info* ha_alter_info);
-
- /** Commit or rollback.
- Commit or rollback the changes made during
- prepare_inplace_alter_table() and inplace_alter_table() inside
- the storage engine. Note that the allowed level of concurrency
- during this operation will be the same as for
- inplace_alter_table() and thus might be higher than during
- prepare_inplace_alter_table(). (E.g concurrent writes were
- blocked during prepare, but might not be during commit).
- @param[in] altered_table TABLE object for new version of table.
- @param[in] ha_alter_info Structure describing changes to be done
- by ALTER TABLE and holding data used during in-place alter.
- @param[in,out] commit true => Commit, false => Rollback.
- @retval true Failure.
- @retval false Success. */
- bool
- commit_inplace_alter_table(
- TABLE* altered_table,
- Alter_inplace_info* ha_alter_info,
- bool commit);
-
- /** Notify the storage engine that the table structure (.frm) has
- been updated.
-
- ha_partition allows inplace operations that also upgrades the engine
- if it supports partitioning natively. So if this is the case then
- we will remove the .par file since it is not used with ha_innopart
- (we use the internal data dictionary instead). */
- void
- notify_table_changed();
- /** @} */
-
- // TODO: should we implement init_table_handle_for_HANDLER() ?
- // (or is sql_stat_start handled correctly anyway?)
- int
- optimize(
- THD* thd,
- HA_CHECK_OPT* check_opt);
-
- int
- discard_or_import_tablespace(
- my_bool discard);
-
- /** Compare key and rowid.
- Helper function for sorting records in the priority queue.
- a/b points to table->record[0] rows which must have the
- key fields set. The bytes before a and b store the rowid.
- This is used for comparing/sorting rows first according to
- KEY and if same KEY, by rowid (ref).
-
- @param[in] key_info Null terminated array of index
- information.
- @param[in] a Pointer to record+ref in first record.
- @param[in] b Pointer to record+ref in second record.
- @return Return value is SIGN(first_rec - second_rec)
- @retval 0 Keys are equal.
- @retval -1 second_rec is greater than first_rec.
- @retval +1 first_rec is greater than second_rec. */
- static
- int
- key_and_rowid_cmp(
- KEY** key_info,
- uchar *a,
- uchar *b);
-
- int
- extra(
- enum ha_extra_function operation);
-
- void
- print_error(
- int error,
- myf errflag);
-
- bool
- is_ignorable_error(
- int error);
-
- int
- start_stmt(
- THD* thd,
- thr_lock_type lock_type);
-
- ha_rows
- records_in_range(
- uint inx,
- key_range* min_key,
- key_range* max_key);
-
- ha_rows
- estimate_rows_upper_bound();
-
- uint
- alter_table_flags(
- uint flags);
-
- void
- update_create_info(
- HA_CREATE_INFO* create_info);
-
- int
- create(
- const char* name,
- TABLE* form,
- HA_CREATE_INFO* create_info);
-
- int
- truncate();
-
- int
- check(
- THD* thd,
- HA_CHECK_OPT* check_opt);
-
- /** Repair table.
- Will only handle records in wrong partition, not repairing
- corrupt innodb indexes.
- @param[in] thd Thread context.
- @param[in] repair_opt Repair options.
- @return 0 or error code. */
- int
- repair(
- THD* thd,
- HA_CHECK_OPT* repair_opt);
-
- bool
- can_switch_engines();
-
- uint
- referenced_by_foreign_key();
-
- void
- get_auto_increment(
- ulonglong offset,
- ulonglong increment,
- ulonglong nb_desired_values,
- ulonglong* first_value,
- ulonglong* nb_reserved_values);
-
- int
- cmp_ref(
- const uchar* ref1,
- const uchar* ref2);
-
- int
- read_range_first(
- const key_range* start_key,
- const key_range* end_key,
- bool eq_range_arg,
- bool sorted)
- {
- return(Partition_helper::ph_read_range_first(
- start_key,
- end_key,
- eq_range_arg,
- sorted));
- }
-
- void
- position(
- const uchar* record)
- {
- Partition_helper::ph_position(record);
- }
-
- int
- rnd_pos_by_record(
- uchar* record)
- {
- return(Partition_helper::ph_rnd_pos_by_record(record));
- }
-
- /* TODO: Implement these! */
- bool
- check_if_incompatible_data(
- HA_CREATE_INFO* info,
- uint table_changes)
- {
- ut_ad(0);
- return(COMPATIBLE_DATA_NO);
- }
-
- int
- delete_all_rows()
- {
- return(handler::delete_all_rows());
- }
-
- int
- disable_indexes(
- uint mode)
- {
- return(HA_ERR_WRONG_COMMAND);
- }
-
- int
- enable_indexes(
- uint mode)
- {
- return(HA_ERR_WRONG_COMMAND);
- }
-
- void
- free_foreign_key_create_info(
- char* str)
- {
- ut_ad(0);
- }
-
- int
- ft_init()
- {
- ut_ad(0);
- return(HA_ERR_WRONG_COMMAND);
- }
-
- FT_INFO*
- ft_init_ext(
- uint flags,
- uint inx,
- String* key)
- {
- ut_ad(0);
- return(NULL);
- }
-
- FT_INFO*
- ft_init_ext_with_hints(
- uint inx,
- String* key,
- Ft_hints* hints)
- {
- ut_ad(0);
- return(NULL);
- }
-
- int
- ft_read(
- uchar* buf)
- {
- ut_ad(0);
- return(HA_ERR_WRONG_COMMAND);
- }
-
- bool
- get_foreign_dup_key(
- char* child_table_name,
- uint child_table_name_len,
- char* child_key_name,
- uint child_key_name_len)
- {
- ut_ad(0);
- return(false);
- }
-
- // TODO: not yet supporting FK.
- char*
- get_foreign_key_create_info()
- {
- return(NULL);
- }
-
- // TODO: not yet supporting FK.
- int
- get_foreign_key_list(
- THD* thd,
- List<FOREIGN_KEY_INFO>* f_key_list)
- {
- return(0);
- }
-
- // TODO: not yet supporting FK.
- int
- get_parent_foreign_key_list(
- THD* thd,
- List<FOREIGN_KEY_INFO>* f_key_list)
- {
- return(0);
- }
-
- // TODO: not yet supporting FK.
- int
- get_cascade_foreign_key_table_list(
- THD* thd,
- List<st_handler_tablename>* fk_table_list)
- {
- return(0);
- }
-
- int
- read_range_next()
- {
- return(Partition_helper::ph_read_range_next());
- }
-
- uint32
- calculate_key_hash_value(
- Field** field_array)
- {
- return(Partition_helper::ph_calculate_key_hash_value(field_array));
- }
-
- Table_flags
- table_flags() const
- {
- return(ha_innobase::table_flags() | HA_CAN_REPAIR);
- }
-
- void
- release_auto_increment()
- {
- Partition_helper::ph_release_auto_increment();
- }
-
- /** Implementing Partition_handler interface @see partition_handler.h
- @{ */
-
- /** See Partition_handler. */
- void
- get_dynamic_partition_info(
- ha_statistics* stat_info,
- ha_checksum* check_sum,
- uint part_id)
- {
- Partition_helper::get_dynamic_partition_info_low(
- stat_info,
- check_sum,
- part_id);
- }
-
- uint
- alter_flags(
- uint flags MY_ATTRIBUTE((unused))) const
- {
- return(HA_PARTITION_FUNCTION_SUPPORTED
- | HA_FAST_CHANGE_PARTITION);
- }
-
- Partition_handler*
- get_partition_handler()
- {
- return(static_cast<Partition_handler*>(this));
- }
-
- void
- set_part_info(
- partition_info* part_info,
- bool early)
- {
- Partition_helper::set_part_info_low(part_info, early);
- }
-
- void
- initialize_partitioning(
- partition_info* part_info,
- bool early)
- {
- Partition_helper::set_part_info_low(part_info, early);
- }
-
- handler*
- get_handler()
- {
- return(static_cast<handler*>(this));
- }
- /** @} */
-
-private:
- /** Pointer to Ha_innopart_share on the TABLE_SHARE. */
- Ha_innopart_share* m_part_share;
-
- /** ins_node per partition. Synchronized with prebuilt->ins_node
- when changing partitions. */
- ins_node_t** m_ins_node_parts;
-
- /** upd_node per partition. Synchronized with prebuilt->upd_node
- when changing partitions. */
- upd_node_t** m_upd_node_parts;
-
- /** blob_heap per partition. Synchronized with prebuilt->blob_heap
- when changing partitions. */
- mem_heap_t** m_blob_heap_parts;
-
- /** trx_id from the partitions table->def_trx_id. Keep in sync
- with prebuilt->trx_id when changing partitions.
- prebuilt only reflects the current partition! */
- trx_id_t* m_trx_id_parts;
-
- /** row_read_type per partition. */
- ulint* m_row_read_type_parts;
-
- /** sql_stat_start per partition. */
- uchar* m_sql_stat_start_parts;
-
- /** persistent cursors per partition. */
- btr_pcur_t* m_pcur_parts;
-
- /** persistent cluster cursors per partition. */
- btr_pcur_t* m_clust_pcur_parts;
-
- /** map from part_id to offset in above two arrays. */
- uint16_t* m_pcur_map;
-
- /** Original m_prebuilt->pcur. */
- btr_pcur_t* m_pcur;
-
- /** Original m_prebuilt->clust_pcur. */
- btr_pcur_t* m_clust_pcur;
-
- /** New partitions during ADD/REORG/... PARTITION. */
- Altered_partitions* m_new_partitions;
-
- /** Clear used ins_nodes and upd_nodes. */
- void
- clear_ins_upd_nodes();
-
- /** Clear the blob heaps for all partitions */
- void
- clear_blob_heaps();
-
- /** Reset state of file to after 'open'. This function is called
- after every statement for all tables used by that statement. */
- int
- reset();
-
- /** Allocate the array to hold blob heaps for all partitions */
- mem_heap_t**
- alloc_blob_heap_array();
-
- /** Free the array that holds blob heaps for all partitions */
- void
- free_blob_heap_array();
-
- /** Changes the active index of a handle.
- @param[in] part_id Use this partition.
- @param[in] keynr Use this index; MAX_KEY means always
- clustered index, even if it was internally generated by InnoDB.
- @return 0 or error code. */
- int
- change_active_index(
- uint part_id,
- uint keynr);
-
- /** Move to next partition and set its index.
- @return 0 for success else error number. */
- int
- next_partition_index();
-
- /** Get the index for the current partition
- @param[in] keynr MySQL index number.
- @return InnoDB index or NULL. */
- dict_index_t*
- innobase_get_index(
- uint keynr);
-
- /** Get the index for a handle.
- Does not change active index.
- @param[in] keynr use this index; MAX_KEY means always clustered
- index, even if it was internally generated by InnoDB.
- @param[in] part_id From this partition.
- @return NULL or index instance. */
- dict_index_t*
- innopart_get_index(
- uint part_id,
- uint keynr);
-
- /** Change active partition.
- Copies needed info into m_prebuilt from the partition specific memory.
- @param[in] part_id Partition to set as active. */
- void
- set_partition(
- uint part_id);
-
- /** Update active partition.
- Copies needed info from m_prebuilt into the partition specific memory.
- @param[in] part_id Partition to set as active. */
- void
- update_partition(
- uint part_id);
-
- /** Helpers needed by Partition_helper, @see partition_handler.h @{ */
-
- /** Set the autoinc column max value.
- This should only be called once from ha_innobase::open().
- Therefore there's no need for a covering lock.
- @param[in] no_lock If locking should be skipped. Not used!
- @return 0 on success else error code. */
- int
- initialize_auto_increment(
- bool /* no_lock */);
-
- /** Setup the ordered record buffer and the priority queue.
- @param[in] used_parts Number of used partitions in query.
- @return false for success, else true. */
- int
- init_record_priority_queue_for_parts(
- uint used_parts);
-
- /** Destroy the ordered record buffer and the priority queue. */
- void
- destroy_record_priority_queue_for_parts();
-
- /** Prepare for creating new partitions during ALTER TABLE ...
- PARTITION.
- @param[in] num_partitions Number of new partitions to be created.
- @param[in] only_create True if only creating the partition
- (no open/lock is needed).
- @return 0 for success else error code. */
- int
- prepare_for_new_partitions(
- uint num_partitions,
- bool only_create);
-
- /** Create a new partition to be filled during ALTER TABLE ...
- PARTITION.
- @param[in] table Table to create the partition in.
- @param[in] create_info Table/partition specific create info.
- @param[in] part_name Partition name.
- @param[in] new_part_id Partition id in new table.
- @param[in] part_elem Partition element.
- @return 0 for success else error code. */
- int
- create_new_partition(
- TABLE* table,
- HA_CREATE_INFO* create_info,
- const char* part_name,
- uint new_part_id,
- partition_element* part_elem);
-
- /** Close and finalize new partitions. */
- void
- close_new_partitions();
-
- /** write row to new partition.
- @param[in] new_part New partition to write to.
- @return 0 for success else error code. */
- int
- write_row_in_new_part(
- uint new_part);
-
- /** Write a row in specific partition.
- Stores a row in an InnoDB database, to the table specified in this
- handle.
- @param[in] part_id Partition to write to.
- @param[in] row A row in MySQL format.
- @return error code. */
- int
- write_row_in_part(
- uint part_id,
- uchar* row);
-
- /** Update a row in partition.
- Updates a row given as a parameter to a new value.
- @param[in] part_id Partition to update row in.
- @param[in] old_row Old row in MySQL format.
- @param[in] new_row New row in MySQL format.
- @return error number or 0. */
- int
- update_row_in_part(
- uint part_id,
- const uchar* old_row,
- uchar* new_row);
-
- /** Deletes a row in partition.
- @param[in] part_id Partition to delete from.
- @param[in] row Row to delete in MySQL format.
- @return error number or 0. */
- int
- delete_row_in_part(
- uint part_id,
- const uchar* row);
-
- /** Return first record in index from a partition.
- @param[in] part Partition to read from.
- @param[out] record First record in index in the partition.
- @return error number or 0. */
- int
- index_first_in_part(
- uint part,
- uchar* record);
-
- /** Return last record in index from a partition.
- @param[in] part Partition to read from.
- @param[out] record Last record in index in the partition.
- @return error number or 0. */
- int
- index_last_in_part(
- uint part,
- uchar* record);
-
- /** Return previous record in index from a partition.
- @param[in] part Partition to read from.
- @param[out] record Last record in index in the partition.
- @return error number or 0. */
- int
- index_prev_in_part(
- uint part,
- uchar* record);
-
- /** Return next record in index from a partition.
- @param[in] part Partition to read from.
- @param[out] record Last record in index in the partition.
- @return error number or 0. */
- int
- index_next_in_part(
- uint part,
- uchar* record);
-
- /** Return next same record in index from a partition.
- This routine is used to read the next record, but only if the key is
- the same as supplied in the call.
- @param[in] part Partition to read from.
- @param[out] record Last record in index in the partition.
- @param[in] key Key to match.
- @param[in] length Length of key.
- @return error number or 0. */
- int
- index_next_same_in_part(
- uint part,
- uchar* record,
- const uchar* key,
- uint length);
-
- /** Start index scan and return first record from a partition.
- This routine starts an index scan using a start key. The calling
- function will check the end key on its own.
- @param[in] part Partition to read from.
- @param[out] record First matching record in index in the partition.
- @param[in] key Key to match.
- @param[in] keypart_map Which part of the key to use.
- @param[in] find_flag Key condition/direction to use.
- @return error number or 0. */
- int
- index_read_map_in_part(
- uint part,
- uchar* record,
- const uchar* key,
- key_part_map keypart_map,
- enum ha_rkey_function find_flag);
-
- /** Return last matching record in index from a partition.
- @param[in] part Partition to read from.
- @param[out] record Last matching record in index in the partition.
- @param[in] key Key to match.
- @param[in] keypart_map Which part of the key to use.
- @return error number or 0. */
- int
- index_read_last_map_in_part(
- uint part,
- uchar* record,
- const uchar* key,
- key_part_map keypart_map);
-
- /** Start index scan and return first record from a partition.
- This routine starts an index scan using a start and end key.
- @param[in] part Partition to read from.
- @param[out] record First matching record in index in the partition.
- if NULL use table->record[0] as return buffer.
- @param[in] start_key Start key to match.
- @param[in] end_key End key to match.
- @param[in] eq_range Is equal range, start_key == end_key.
- @param[in] sorted Return rows in sorted order.
- @return error number or 0. */
- int
- read_range_first_in_part(
- uint part,
- uchar* record,
- const key_range* start_key,
- const key_range* end_key,
- bool eq_range,
- bool sorted);
-
- /** Return next record in index range scan from a partition.
- @param[in] part Partition to read from.
- @param[out] record First matching record in index in the partition.
- if NULL use table->record[0] as return buffer.
- @return error number or 0. */
- int
- read_range_next_in_part(
- uint part,
- uchar* record);
-
- /** Start index scan and return first record from a partition.
- This routine starts an index scan using a start key. The calling
- function will check the end key on its own.
- @param[in] part Partition to read from.
- @param[out] record First matching record in index in the partition.
- @param[in] index Index to read from.
- @param[in] key Key to match.
- @param[in] keypart_map Which part of the key to use.
- @param[in] find_flag Key condition/direction to use.
- @return error number or 0. */
- int
- index_read_idx_map_in_part(
- uint part,
- uchar* record,
- uint index,
- const uchar* key,
- key_part_map keypart_map,
- enum ha_rkey_function find_flag);
-
- /** Initialize random read/scan of a specific partition.
- @param[in] part_id Partition to initialize.
- @param[in] table_scan True for scan else random access.
- @return error number or 0. */
- int
- rnd_init_in_part(
- uint part_id,
- bool table_scan);
-
- /** Get next row during scan of a specific partition.
- @param[in] part_id Partition to read from.
- @param[out] record Next row.
- @return error number or 0. */
- int
- rnd_next_in_part(
- uint part_id,
- uchar* record);
-
- /** End random read/scan of a specific partition.
- @param[in] part_id Partition to end random read/scan.
- @param[in] table_scan True for scan else random access.
- @return error number or 0. */
- int
- rnd_end_in_part(
- uint part_id,
- bool table_scan);
-
- /** Get a reference to the current cursor position in the last used
- partition.
- @param[out] ref Reference (PK if exists else row_id).
- @param[in] record Record to position. */
- void
- position_in_last_part(
- uchar* ref,
- const uchar* record);
-
- /** Read record by given record (by its PK) from the last used partition.
- see handler::rnd_pos_by_record().
- @param[in,out] record Record to position.
- @return 0 or error number. */
- int
- rnd_pos_by_record_in_last_part(
- uchar* record)
- {
- /* Not much overhead to use default function.
- This avoids out-of-sync code. */
- return(handler::rnd_pos_by_record(record));
- }
-
- /** Copy a cached MySQL record.
- @param[out] to_record Where to copy the MySQL record.
- @param[in] from_record Which record to copy. */
- void
- copy_cached_row(
- uchar* to_record,
- const uchar* from_record);
- /** @} */
-
- /* Private handler:: functions specific for native InnoDB partitioning.
- @see handler.h @{ */
-
- int
- open(
- const char* name,
- int mode,
- uint test_if_locked);
-
- int
- close();
-
- double
- scan_time();
-
- /** Was the last returned row semi consistent read.
- In an UPDATE or DELETE, if the row under the cursor was locked by
- another transaction, and the engine used an optimistic read of the last
- committed row value under the cursor, then the engine returns 1 from
- this function. MySQL must NOT try to update this optimistic value. If
- the optimistic value does not match the WHERE condition, MySQL can
- decide to skip over this row. This can be used to avoid unnecessary
- lock waits.
-
- If this method returns true, it will also signal the storage
- engine that the next read will be a locking re-read of the row.
- @see handler.h and row0mysql.h
- @return true if last read was semi consistent else false. */
- bool was_semi_consistent_read();
-
- /** Try semi consistent read.
- Tell the engine whether it should avoid unnecessary lock waits.
- If yes, in an UPDATE or DELETE, if the row under the cursor was locked
- by another transaction, the engine may try an optimistic read of
- the last committed row value under the cursor.
- @see handler.h and row0mysql.h
- @param[in] yes Should semi-consistent read be used. */
- void try_semi_consistent_read(
- bool yes);
-
- /** Removes a lock on a row.
- Removes a new lock set on a row, if it was not read optimistically.
- This can be called after a row has been read in the processing of
- an UPDATE or a DELETE query. @see ha_innobase::unlock_row(). */
- void unlock_row();
-
- int
- index_init(
- uint index,
- bool sorted);
-
- int
- index_end();
-
- int
- rnd_init(
- bool scan)
- {
- return(Partition_helper::ph_rnd_init(scan));
- }
-
- int
- rnd_end()
- {
- return(Partition_helper::ph_rnd_end());
- }
-
- int
- external_lock(
- THD* thd,
- int lock_type);
-
- THR_LOCK_DATA**
- store_lock(
- THD* thd,
- THR_LOCK_DATA** to,
- thr_lock_type lock_type);
-
- int
- write_row(
- uchar* record)
- {
- return(Partition_helper::ph_write_row(record));
- }
-
- int
- update_row(
- const uchar* old_record,
- uchar* new_record)
- {
- return(Partition_helper::ph_update_row(old_record, new_record));
- }
-
- int
- delete_row(
- const uchar* record)
- {
- return(Partition_helper::ph_delete_row(record));
- }
- /** @} */
-
- /** Truncate partition.
- Called from Partition_handler::trunctate_partition(). */
- int
- truncate_partition_low();
-
- /** Change partitions according to ALTER TABLE ... PARTITION ...
- Called from Partition_handler::change_partitions().
- @param[in] create_info Table create info.
- @param[in] path Path including db/table_name.
- @param[out] copied Number of copied rows.
- @param[out] deleted Number of deleted rows.
- @return 0 for success or error code. */
- int
- change_partitions_low(
- HA_CREATE_INFO* create_info,
- const char* path,
- ulonglong* const copied,
- ulonglong* const deleted)
- {
- return(Partition_helper::change_partitions(
- create_info,
- path,
- copied,
- deleted));
- }
-
- /** Access methods to protected areas in handler to avoid adding
- friend class Partition_helper in class handler.
- @see partition_handler.h @{ */
-
- THD*
- get_thd() const
- {
- return ha_thd();
- }
-
- TABLE*
- get_table() const
- {
- return table;
- }
-
- bool
- get_eq_range() const
- {
- return eq_range;
- }
-
- void
- set_eq_range(bool eq_range_arg)
- {
- eq_range= eq_range_arg;
- }
-
- void
- set_range_key_part(KEY_PART_INFO *key_part)
- {
- range_key_part= key_part;
- }
- /** @} */
-
- /** Fill in data_dir_path and tablespace name from internal data
- dictionary.
- @param part_elem Partition element to fill.
- @param ib_table InnoDB table to copy from. */
- void
- update_part_elem(
- partition_element* part_elem,
- dict_table_t* ib_table);
-protected:
- /* Protected handler:: functions specific for native InnoDB partitioning.
- @see handler.h @{ */
-
- int
- rnd_next(
- uchar* record)
- {
- return(Partition_helper::ph_rnd_next(record));
- }
-
- int
- rnd_pos(
- uchar* record,
- uchar* pos);
-
- int
- index_next(
- uchar* record)
- {
- return(Partition_helper::ph_index_next(record));
- }
-
- int
- index_next_same(
- uchar* record,
- const uchar* key,
- uint keylen)
- {
- return(Partition_helper::ph_index_next_same(record, key, keylen));
- }
-
- int
- index_prev(
- uchar* record)
- {
- return(Partition_helper::ph_index_prev(record));
- }
-
- int
- index_first(
- uchar* record)
- {
- return(Partition_helper::ph_index_first(record));
- }
-
- int
- index_last(
- uchar* record)
- {
- return(Partition_helper::ph_index_last(record));
- }
-
- int
- index_read_last_map(
- uchar* record,
- const uchar* key,
- key_part_map keypart_map)
- {
- return(Partition_helper::ph_index_read_last_map(
- record,
- key,
- keypart_map));
- }
-
- int
- index_read_map(
- uchar* buf,
- const uchar* key,
- key_part_map keypart_map,
- enum ha_rkey_function find_flag)
- {
- return(Partition_helper::ph_index_read_map(
- buf,
- key,
- keypart_map,
- find_flag));
- }
-
- int
- index_read_idx_map(
- uchar* buf,
- uint index,
- const uchar* key,
- key_part_map keypart_map,
- enum ha_rkey_function find_flag)
- {
- return(Partition_helper::ph_index_read_idx_map(
- buf,
- index,
- key,
- keypart_map,
- find_flag));
- }
- /** @} */
-
- /** Updates and return statistics.
- Returns statistics information of the table to the MySQL interpreter,
- in various fields of the handle object.
- @param[in] flag Flags for what to update and return.
- @param[in] is_analyze True if called from ::analyze().
- @return HA_ERR_* error code or 0. */
- int
- info_low(
- uint flag,
- bool is_analyze);
-};
-#endif /* ha_innopart_h */
diff --git a/storage/innobase/handler/handler0alter.cc b/storage/innobase/handler/handler0alter.cc
index ceaa07bbd40..e3bc5fd5799 100644
--- a/storage/innobase/handler/handler0alter.cc
+++ b/storage/innobase/handler/handler0alter.cc
@@ -320,14 +320,22 @@ my_error_innodb(
case DB_CORRUPTION:
my_error(ER_NOT_KEYFILE, MYF(0), table);
break;
- case DB_TOO_BIG_RECORD:
- /* We limit max record size to 16k for 64k page size. */
- my_error(ER_TOO_BIG_ROWSIZE, MYF(0),
- srv_page_size == UNIV_PAGE_SIZE_MAX
- ? REC_MAX_DATA_SIZE - 1
- : page_get_free_space_of_empty(
- flags & DICT_TF_COMPACT) / 2);
+ case DB_TOO_BIG_RECORD: {
+ /* Note that in page0zip.ic page_zip_rec_needs_ext() rec_size
+ is limited to COMPRESSED_REC_MAX_DATA_SIZE (16K) or
+ REDUNDANT_REC_MAX_DATA_SIZE (16K-1). */
+ bool comp = !!(flags & DICT_TF_COMPACT);
+ ulint free_space = page_get_free_space_of_empty(comp) / 2;
+
+ if (free_space >= (comp ? COMPRESSED_REC_MAX_DATA_SIZE :
+ REDUNDANT_REC_MAX_DATA_SIZE)) {
+ free_space = (comp ? COMPRESSED_REC_MAX_DATA_SIZE :
+ REDUNDANT_REC_MAX_DATA_SIZE) - 1;
+ }
+
+ my_error(ER_TOO_BIG_ROWSIZE, MYF(0), free_space);
break;
+ }
case DB_INVALID_NULL:
/* TODO: report the row, as we do for DB_DUPLICATE_KEY */
my_error(ER_INVALID_USE_OF_NULL, MYF(0));
@@ -3293,8 +3301,8 @@ innobase_pk_col_prefix_compare(
ulint new_prefix_len,
ulint old_prefix_len)
{
- ut_ad(new_prefix_len < REC_MAX_DATA_SIZE);
- ut_ad(old_prefix_len < REC_MAX_DATA_SIZE);
+ ut_ad(new_prefix_len < COMPRESSED_REC_MAX_DATA_SIZE);
+ ut_ad(old_prefix_len < COMPRESSED_REC_MAX_DATA_SIZE);
if (new_prefix_len == old_prefix_len) {
return(0);
@@ -6372,6 +6380,7 @@ ha_innobase::inplace_alter_table(
DBUG_ENTER("inplace_alter_table");
DBUG_ASSERT(!srv_read_only_mode);
+ ut_ad(!sync_check_iterate(sync_check()));
ut_ad(!rw_lock_own(dict_operation_lock, RW_LOCK_X));
ut_ad(!rw_lock_own(dict_operation_lock, RW_LOCK_S));
diff --git a/storage/innobase/handler/handler0alter_innopart.cc b/storage/innobase/handler/handler0alter_innopart.cc
deleted file mode 100644
index 0f2d5c7e576..00000000000
--- a/storage/innobase/handler/handler0alter_innopart.cc
+++ /dev/null
@@ -1,307 +0,0 @@
-/* JAN: TODO: MySQL 5.7 InnoDB partitioning. */
-
-/** Prepare inplace alter table.
-Allows InnoDB to update internal structures with concurrent
-writes blocked (provided that check_if_supported_inplace_alter()
-did not return HA_ALTER_INPLACE_NO_LOCK).
-This will be invoked before inplace_alter_table().
-@param[in] altered_table TABLE object for new version of table.
-@param[in] ha_alter_info Structure describing changes to be done
-by ALTER TABLE and holding data used during in-place alter.
-@retval true Failure.
-@retval false Success. */
-bool
-ha_innopart::prepare_inplace_alter_table(
- TABLE* altered_table,
- Alter_inplace_info* ha_alter_info)
-{
- THD* thd;
- ha_innopart_inplace_ctx* ctx_parts;
- bool res = true;
- DBUG_ENTER("ha_innopart::prepare_inplace_alter_table");
- DBUG_ASSERT(ha_alter_info->handler_ctx == NULL);
-
- thd = ha_thd();
-
- /* Clean up all ins/upd nodes. */
- clear_ins_upd_nodes();
- /* Based on Sql_alloc class, return NULL for new on failure. */
- ctx_parts = new ha_innopart_inplace_ctx(thd, m_tot_parts);
- if (!ctx_parts) {
- DBUG_RETURN(HA_ALTER_ERROR);
- }
-
- uint ctx_array_size = sizeof(inplace_alter_handler_ctx*)
- * (m_tot_parts + 1);
- ctx_parts->ctx_array =
- static_cast<inplace_alter_handler_ctx**>(
- ut_malloc(ctx_array_size,
- mem_key_partitioning));
- if (!ctx_parts->ctx_array) {
- DBUG_RETURN(HA_ALTER_ERROR);
- }
-
- /* Set all to NULL, including the terminating one. */
- memset(ctx_parts->ctx_array, 0, ctx_array_size);
-
- ctx_parts->prebuilt_array = static_cast<row_prebuilt_t**>(
- ut_malloc(sizeof(row_prebuilt_t*)
- * m_tot_parts,
- mem_key_partitioning));
- if (!ctx_parts->prebuilt_array) {
- DBUG_RETURN(HA_ALTER_ERROR);
- }
- /* For the first partition use the current prebuilt. */
- ctx_parts->prebuilt_array[0] = m_prebuilt;
- /* Create new prebuilt for the rest of the partitions.
- It is needed for the current implementation of
- ha_innobase::commit_inplace_alter_table(). */
- for (uint i = 1; i < m_tot_parts; i++) {
- row_prebuilt_t* tmp_prebuilt;
- tmp_prebuilt = row_create_prebuilt(
- m_part_share->get_table_part(i),
- table_share->reclength);
- /* Use same trx as original prebuilt. */
- tmp_prebuilt->trx = m_prebuilt->trx;
- ctx_parts->prebuilt_array[i] = tmp_prebuilt;
- }
-
- for (uint i = 0; i < m_tot_parts; i++) {
- m_prebuilt = ctx_parts->prebuilt_array[i];
- m_prebuilt_ptr = ctx_parts->prebuilt_array + i;
- ha_alter_info->handler_ctx = ctx_parts->ctx_array[i];
- set_partition(i);
- res = ha_innobase::prepare_inplace_alter_table(altered_table,
- ha_alter_info);
- update_partition(i);
- ctx_parts->ctx_array[i] = ha_alter_info->handler_ctx;
- if (res) {
- break;
- }
- }
- m_prebuilt = ctx_parts->prebuilt_array[0];
- m_prebuilt_ptr = &m_prebuilt;
- ha_alter_info->handler_ctx = ctx_parts;
- ha_alter_info->group_commit_ctx = ctx_parts->ctx_array;
- DBUG_RETURN(res);
-}
-
-/** Inplace alter table.
-Alter the table structure in-place with operations
-specified using Alter_inplace_info.
-The level of concurrency allowed during this operation depends
-on the return value from check_if_supported_inplace_alter().
-@param[in] altered_table TABLE object for new version of table.
-@param[in] ha_alter_info Structure describing changes to be done
-by ALTER TABLE and holding data used during in-place alter.
-@retval true Failure.
-@retval false Success. */
-bool
-ha_innopart::inplace_alter_table(
- TABLE* altered_table,
- Alter_inplace_info* ha_alter_info)
-{
- bool res = true;
- ha_innopart_inplace_ctx* ctx_parts;
-
- ctx_parts = static_cast<ha_innopart_inplace_ctx*>(
- ha_alter_info->handler_ctx);
- for (uint i = 0; i < m_tot_parts; i++) {
- m_prebuilt = ctx_parts->prebuilt_array[i];
- ha_alter_info->handler_ctx = ctx_parts->ctx_array[i];
- set_partition(i);
- res = ha_innobase::inplace_alter_table(altered_table,
- ha_alter_info);
- ut_ad(ctx_parts->ctx_array[i] == ha_alter_info->handler_ctx);
- ctx_parts->ctx_array[i] = ha_alter_info->handler_ctx;
- if (res) {
- break;
- }
- }
- m_prebuilt = ctx_parts->prebuilt_array[0];
- ha_alter_info->handler_ctx = ctx_parts;
- return(res);
-}
-
-/** Commit or rollback inplace alter table.
-Commit or rollback the changes made during
-prepare_inplace_alter_table() and inplace_alter_table() inside
-the storage engine. Note that the allowed level of concurrency
-during this operation will be the same as for
-inplace_alter_table() and thus might be higher than during
-prepare_inplace_alter_table(). (E.g concurrent writes were
-blocked during prepare, but might not be during commit).
-@param[in] altered_table TABLE object for new version of table.
-@param[in] ha_alter_info Structure describing changes to be done
-by ALTER TABLE and holding data used during in-place alter.
-@param[in] commit true => Commit, false => Rollback.
-@retval true Failure.
-@retval false Success. */
-bool
-ha_innopart::commit_inplace_alter_table(
- TABLE* altered_table,
- Alter_inplace_info* ha_alter_info,
- bool commit)
-{
- bool res = false;
- ha_innopart_inplace_ctx* ctx_parts;
-
- ctx_parts = static_cast<ha_innopart_inplace_ctx*>(
- ha_alter_info->handler_ctx);
- ut_ad(ctx_parts);
- ut_ad(ctx_parts->prebuilt_array);
- ut_ad(ctx_parts->prebuilt_array[0] == m_prebuilt);
- if (commit) {
- /* Commit is done through first partition (group commit). */
- ut_ad(ha_alter_info->group_commit_ctx == ctx_parts->ctx_array);
- ha_alter_info->handler_ctx = ctx_parts->ctx_array[0];
- set_partition(0);
- res = ha_innobase::commit_inplace_alter_table(altered_table,
- ha_alter_info,
- commit);
- ut_ad(res || !ha_alter_info->group_commit_ctx);
- goto end;
- }
- /* Rollback is done for each partition. */
- for (uint i = 0; i < m_tot_parts; i++) {
- m_prebuilt = ctx_parts->prebuilt_array[i];
- ha_alter_info->handler_ctx = ctx_parts->ctx_array[i];
- set_partition(i);
- if (ha_innobase::commit_inplace_alter_table(altered_table,
- ha_alter_info, commit)) {
- res = true;
- }
- ut_ad(ctx_parts->ctx_array[i] == ha_alter_info->handler_ctx);
- ctx_parts->ctx_array[i] = ha_alter_info->handler_ctx;
- }
-end:
- /* Move the ownership of the new tables back to
- the m_part_share. */
- ha_innobase_inplace_ctx* ctx;
- for (uint i = 0; i < m_tot_parts; i++) {
- /* TODO: Fix to only use one prebuilt (i.e. make inplace
- alter partition aware instead of using multiple prebuilt
- copies... */
- ctx = static_cast<ha_innobase_inplace_ctx*>(
- ctx_parts->ctx_array[i]);
- if (ctx) {
- m_part_share->set_table_part(i, ctx->prebuilt->table);
- ctx->prebuilt->table = NULL;
- ctx_parts->prebuilt_array[i] = ctx->prebuilt;
- }
- }
- /* The above juggling of prebuilt must be reset here. */
- m_prebuilt = ctx_parts->prebuilt_array[0];
- m_prebuilt->table = m_part_share->get_table_part(0);
- ha_alter_info->handler_ctx = ctx_parts;
- return(res);
-}
-
-/** Notify the storage engine that the table structure (.frm) has
-been updated.
-
-ha_partition allows inplace operations that also upgrades the engine
-if it supports partitioning natively. So if this is the case then
-we will remove the .par file since it is not used with ha_innopart
-(we use the internal data dictionary instead). */
-void
-ha_innopart::notify_table_changed()
-{
- char tmp_par_path[FN_REFLEN + 1];
- strxnmov(tmp_par_path, FN_REFLEN, table->s->normalized_path.str,
- ".par", NullS);
-
- if (my_access(tmp_par_path, W_OK) == 0)
- {
- my_delete(tmp_par_path, MYF(0));
- }
-}
-
-/** Check if supported inplace alter table.
-@param[in] altered_table Altered MySQL table.
-@param[in] ha_alter_info Information about inplace operations to do.
-@return Lock level, not supported or error */
-enum_alter_inplace_result
-ha_innopart::check_if_supported_inplace_alter(
- TABLE* altered_table,
- Alter_inplace_info* ha_alter_info)
-{
- DBUG_ENTER("ha_innopart::check_if_supported_inplace_alter");
- DBUG_ASSERT(ha_alter_info->handler_ctx == NULL);
-
- /* Not supporting these for partitioned tables yet! */
-
- /* FK not yet supported. */
- if (ha_alter_info->handler_flags
- & (Alter_inplace_info::ADD_FOREIGN_KEY
- | Alter_inplace_info::DROP_FOREIGN_KEY)) {
-
- ha_alter_info->unsupported_reason = innobase_get_err_msg(
- ER_FOREIGN_KEY_ON_PARTITIONED);
- DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
- }
- /* FTS not yet supported either. */
- if ((ha_alter_info->handler_flags
- & Alter_inplace_info::ADD_INDEX)) {
-
- for (uint i = 0; i < ha_alter_info->index_add_count; i++) {
- const KEY* key =
- &ha_alter_info->key_info_buffer[
- ha_alter_info->index_add_buffer[i]];
- if (key->flags & HA_FULLTEXT) {
- DBUG_ASSERT(!(key->flags & HA_KEYFLAG_MASK
- & ~(HA_FULLTEXT
- | HA_PACK_KEY
- | HA_GENERATED_KEY
- | HA_BINARY_PACK_KEY)));
- ha_alter_info->unsupported_reason =
- innobase_get_err_msg(
- ER_FULLTEXT_NOT_SUPPORTED_WITH_PARTITIONING);
- DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
- }
- }
- }
- /* We cannot allow INPLACE to change order of KEY partitioning fields! */
- if ((ha_alter_info->handler_flags
- & Alter_inplace_info::ALTER_STORED_COLUMN_ORDER)
- && !m_part_info->same_key_column_order(
- &ha_alter_info->alter_info->create_list)) {
-
- DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
- }
-
- /* Cannot allow INPLACE for drop and create PRIMARY KEY if partition is
- on Primary Key - PARTITION BY KEY() */
- if ((ha_alter_info->handler_flags
- & (Alter_inplace_info::ADD_PK_INDEX
- | Alter_inplace_info::DROP_PK_INDEX))) {
-
- /* Check partition by key(). */
- if ((m_part_info->part_type == HASH_PARTITION)
- && m_part_info->list_of_part_fields
- && m_part_info->part_field_list.is_empty()) {
-
- DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
- }
-
- /* Check sub-partition by key(). */
- if ((m_part_info->subpart_type == HASH_PARTITION)
- && m_part_info->list_of_subpart_fields
- && m_part_info->subpart_field_list.is_empty()) {
-
- DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
- }
- }
-
- /* Check for PK and UNIQUE should already be done when creating the
- new table metadata.
- (fix_partition_info/check_primary_key+check_unique_key) */
-
- set_partition(0);
- enum_alter_inplace_result res =
- ha_innobase::check_if_supported_inplace_alter(altered_table,
- ha_alter_info);
-
- DBEUG_RETURN(res);
-}
-
diff --git a/storage/innobase/include/dict0dict.h b/storage/innobase/include/dict0dict.h
index f80047f29a9..8836e858018 100644
--- a/storage/innobase/include/dict0dict.h
+++ b/storage/innobase/include/dict0dict.h
@@ -1730,9 +1730,6 @@ struct dict_sys_t{
on name */
hash_table_t* table_id_hash; /*!< hash table of the tables, based
on id */
- lint size; /*!< varying space in bytes occupied
- by the data dictionary table and
- index objects */
dict_table_t* sys_tables; /*!< SYS_TABLES table */
dict_table_t* sys_columns; /*!< SYS_COLUMNS table */
dict_table_t* sys_indexes; /*!< SYS_INDEXES table */
@@ -2032,6 +2029,13 @@ dict_table_decode_n_col(
ulint* n_col,
ulint* n_v_col);
+/** Calculate the used memory occupied by the data dictionary
+table and index objects.
+@return number of bytes occupied. */
+UNIV_INTERN
+ulint
+dict_sys_get_size();
+
/** Look for any dictionary objects that are found in the given tablespace.
@param[in] space_id Tablespace ID to search for.
@return true if tablespace is empty. */
diff --git a/storage/innobase/include/fsp0fsp.h b/storage/innobase/include/fsp0fsp.h
index e5057b30501..6697c1f37ed 100644
--- a/storage/innobase/include/fsp0fsp.h
+++ b/storage/innobase/include/fsp0fsp.h
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2013, 2017, MariaDB Corporation. All Rights Reserved.
+Copyright (c) 2013, 2017, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -588,15 +588,12 @@ fseg_free_page_func(
# define fseg_free_page(header, space_id, page, ahi, mtr) \
fseg_free_page_func(header, space_id, page, mtr)
#endif /* BTR_CUR_HASH_ADAPT */
-/**********************************************************************//**
-Checks if a single page of a segment is free.
-@return true if free */
+/** Determine whether a page is free.
+@param[in,out] space tablespace
+@param[in] page page number
+@return whether the page is marked as free */
bool
-fseg_page_is_free(
-/*==============*/
- fseg_header_t* seg_header, /*!< in: segment header */
- ulint space_id, /*!< in: space id */
- ulint page) /*!< in: page offset */
+fseg_page_is_free(fil_space_t* space, unsigned page)
MY_ATTRIBUTE((nonnull, warn_unused_result));
/**********************************************************************//**
Frees part of a segment. This function can be used to free a segment
@@ -834,22 +831,6 @@ xdes_calc_descriptor_page(
const page_size_t& page_size,
ulint offset);
-/**********************************************************************//**
-Checks if a single page is free.
-@return true if free */
-UNIV_INTERN
-bool
-fsp_page_is_free_func(
-/*==============*/
- ulint space, /*!< in: space id */
- ulint page, /*!< in: page offset */
- mtr_t* mtr, /*!< in/out: mini-transaction */
- const char *file,
- unsigned line);
-
-#define fsp_page_is_free(space,page,mtr) \
- fsp_page_is_free_func(space,page,mtr, __FILE__, __LINE__)
-
#endif /* UNIV_INNOCHECKSUM */
#include "fsp0fsp.ic"
diff --git a/storage/innobase/include/log0log.ic b/storage/innobase/include/log0log.ic
index f743985147c..58da7bacc6f 100644
--- a/storage/innobase/include/log0log.ic
+++ b/storage/innobase/include/log0log.ic
@@ -489,6 +489,7 @@ log_free_check(void)
commit_try_rebuild() */
SYNC_DICT_OPERATION, /* dict_operation_lock X-latch during
commit_try_rebuild() */
+ SYNC_FTS_CACHE, /* fts_cache_t::lock */
SYNC_INDEX_TREE /* index->lock */
};
#endif /* UNIV_DEBUG */
diff --git a/storage/innobase/include/mtr0types.h b/storage/innobase/include/mtr0types.h
index 0725a5405a4..e47e89ae4ba 100644
--- a/storage/innobase/include/mtr0types.h
+++ b/storage/innobase/include/mtr0types.h
@@ -129,14 +129,6 @@ enum mlog_id_t {
MLOG_LSN = 28,
#endif /* UNIV_LOG_LSN_DEBUG */
- /** this means that a file page is taken into use and the prior
- contents of the page should be ignored: in recovery we must not
- trust the lsn values stored to the file page.
- Note: it's deprecated because it causes crash recovery problem
- in bulk create index, and actually we don't need to reset page
- lsn in recv_recover_page_func() now. */
- MLOG_INIT_FILE_PAGE = 29,
-
/** write a string to a page */
MLOG_WRITE_STRING = 30,
@@ -224,8 +216,7 @@ enum mlog_id_t {
/** create a R-tree compact page */
MLOG_COMP_PAGE_CREATE_RTREE = 58,
- /** this means that a file page is taken into use.
- We use it to replace MLOG_INIT_FILE_PAGE. */
+ /** initialize a file page */
MLOG_INIT_FILE_PAGE2 = 59,
/** Table is being truncated. (Marked only for file-per-table) */
diff --git a/storage/innobase/include/page0zip.ic b/storage/innobase/include/page0zip.ic
index fa03279f9bc..b471e2cf64e 100644
--- a/storage/innobase/include/page0zip.ic
+++ b/storage/innobase/include/page0zip.ic
@@ -168,8 +168,9 @@ page_zip_rec_needs_ext(
> ulint(comp ? REC_N_NEW_EXTRA_BYTES : REC_N_OLD_EXTRA_BYTES));
ut_ad(comp || !page_size.is_compressed());
-#if UNIV_PAGE_SIZE_MAX > REC_MAX_DATA_SIZE
- if (rec_size >= REC_MAX_DATA_SIZE) {
+#if UNIV_PAGE_SIZE_MAX > COMPRESSED_REC_MAX_DATA_SIZE
+ if (comp ? rec_size >= COMPRESSED_REC_MAX_DATA_SIZE :
+ rec_size >= REDUNDANT_REC_MAX_DATA_SIZE) {
return(TRUE);
}
#endif
diff --git a/storage/innobase/include/rem0rec.h b/storage/innobase/include/rem0rec.h
index 8d3f87450f8..a5e3268b7d7 100644
--- a/storage/innobase/include/rem0rec.h
+++ b/storage/innobase/include/rem0rec.h
@@ -1099,9 +1099,15 @@ are given in one byte (resp. two byte) format. */
#define REC_1BYTE_OFFS_LIMIT 0x7FUL
#define REC_2BYTE_OFFS_LIMIT 0x7FFFUL
-/* The data size of record must be smaller than this because we reserve
-two upmost bits in a two byte offset for special purposes */
-#define REC_MAX_DATA_SIZE 16384
+/* The data size of record must not be larger than this on
+REDUNDANT row format because we reserve two upmost bits in a
+two byte offset for special purposes */
+#define REDUNDANT_REC_MAX_DATA_SIZE (16383)
+
+/* The data size of record must be smaller than this on
+COMPRESSED row format because we reserve two upmost bits in a
+two byte offset for special purposes */
+#define COMPRESSED_REC_MAX_DATA_SIZE (16384)
#ifdef WITH_WSREP
int wsrep_rec_get_foreign_key(
diff --git a/storage/innobase/include/row0sel.h b/storage/innobase/include/row0sel.h
index d4d55601bc2..d73c186b12e 100644
--- a/storage/innobase/include/row0sel.h
+++ b/storage/innobase/include/row0sel.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1997, 2017, Oracle and/or its affiliates.
Copyright (c) 2017, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
@@ -244,6 +244,18 @@ struct sel_buf_t{
when data != NULL */
};
+/** Copy used fields from cached row.
+Copy cache record field by field, don't touch fields that
+are not covered by current key.
+@param[out] buf Where to copy the MySQL row.
+@param[in] cached_rec What to copy (in MySQL row format).
+@param[in] prebuilt prebuilt struct. */
+void
+row_sel_copy_cached_fields_for_mysql(
+ byte* buf,
+ const byte* cached_rec,
+ row_prebuilt_t* prebuilt);
+
/** Query plan */
struct plan_t{
dict_table_t* table; /*!< table struct in the dictionary
diff --git a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h
index 891f25f68f1..3eddd300acc 100644
--- a/storage/innobase/include/srv0srv.h
+++ b/storage/innobase/include/srv0srv.h
@@ -227,12 +227,6 @@ extern ib_mutex_t page_zip_stat_per_index_mutex;
extern ib_mutex_t srv_monitor_file_mutex;
/* Temporary file for innodb monitor output */
extern FILE* srv_monitor_file;
-/* Mutex for locking srv_dict_tmpfile. Only created if !srv_read_only_mode.
-This mutex has a very high rank; threads reserving it should not
-be holding any InnoDB latches. */
-extern ib_mutex_t srv_dict_tmpfile_mutex;
-/* Temporary file for output from the data dictionary */
-extern FILE* srv_dict_tmpfile;
/* Mutex for locking srv_misc_tmpfile. Only created if !srv_read_only_mode.
This mutex has a very low rank; threads reserving it should not
acquire any further latches or sleep before releasing this one. */
@@ -507,7 +501,9 @@ enum srv_operation_mode {
/** Mariabackup taking a backup */
SRV_OPERATION_BACKUP,
/** Mariabackup restoring a backup */
- SRV_OPERATION_RESTORE
+ SRV_OPERATION_RESTORE,
+ /** Mariabackup restoring the incremental part of a backup */
+ SRV_OPERATION_RESTORE_DELTA
};
/** Current mode of operation */
diff --git a/storage/innobase/include/sync0policy.h b/storage/innobase/include/sync0policy.h
index 410e46f9c68..1b86d2633bf 100644
--- a/storage/innobase/include/sync0policy.h
+++ b/storage/innobase/include/sync0policy.h
@@ -61,7 +61,7 @@ public:
:
latch_t(id)
{
- /* No op */
+ ut_ad(id != LATCH_ID_NONE);
}
/** Set to locked state
diff --git a/storage/innobase/include/sync0sync.h b/storage/innobase/include/sync0sync.h
index 7157b07e9d0..55aaf5032e8 100644
--- a/storage/innobase/include/sync0sync.h
+++ b/storage/innobase/include/sync0sync.h
@@ -91,7 +91,6 @@ extern mysql_pfs_key_t rw_lock_debug_mutex_key;
# endif /* UNIV_DEBUG */
extern mysql_pfs_key_t rw_lock_list_mutex_key;
extern mysql_pfs_key_t rw_lock_mutex_key;
-extern mysql_pfs_key_t srv_dict_tmpfile_mutex_key;
extern mysql_pfs_key_t srv_innodb_monitor_mutex_key;
extern mysql_pfs_key_t srv_misc_tmpfile_mutex_key;
extern mysql_pfs_key_t srv_monitor_file_mutex_key;
diff --git a/storage/innobase/include/sync0types.h b/storage/innobase/include/sync0types.h
index bcbcf70bfc7..8d08416cccd 100644
--- a/storage/innobase/include/sync0types.h
+++ b/storage/innobase/include/sync0types.h
@@ -260,9 +260,9 @@ enum latch_level_t {
SYNC_TREE_NODE,
SYNC_TREE_NODE_FROM_HASH,
SYNC_TREE_NODE_NEW,
+ SYNC_IBUF_PESS_INSERT_MUTEX,
SYNC_INDEX_TREE,
- SYNC_IBUF_PESS_INSERT_MUTEX,
SYNC_IBUF_HEADER,
SYNC_DICT_HEADER,
SYNC_STATS_AUTO_RECALC,
@@ -270,10 +270,10 @@ enum latch_level_t {
SYNC_DICT,
SYNC_FTS_CACHE,
- SYNC_DICT_OPERATION,
-
SYNC_FILE_FORMAT_TAG,
+ SYNC_DICT_OPERATION,
+
SYNC_TRX_I_S_LAST_READ,
SYNC_TRX_I_S_RWLOCK,
@@ -335,7 +335,6 @@ enum latch_id_t {
LATCH_ID_RTR_PATH_MUTEX,
LATCH_ID_RW_LOCK_LIST,
LATCH_ID_RW_LOCK_MUTEX,
- LATCH_ID_SRV_DICT_TMPFILE,
LATCH_ID_SRV_INNODB_MONITOR,
LATCH_ID_SRV_MISC_TMPFILE,
LATCH_ID_SRV_MONITOR_FILE,
diff --git a/storage/innobase/include/trx0rseg.h b/storage/innobase/include/trx0rseg.h
index 3078aa8faf1..48c5133644c 100644
--- a/storage/innobase/include/trx0rseg.h
+++ b/storage/innobase/include/trx0rseg.h
@@ -203,9 +203,17 @@ struct trx_rseg_t {
bool is_persistent() const
{
ut_ad(space == SRV_TMP_SPACE_ID
- || space <= TRX_SYS_MAX_UNDO_SPACES);
+ || space == TRX_SYS_SPACE
+ || (srv_undo_space_id_start > 0
+ && space >= srv_undo_space_id_start
+ && space <= srv_undo_space_id_start
+ + TRX_SYS_MAX_UNDO_SPACES));
ut_ad(space == SRV_TMP_SPACE_ID
- || space <= srv_undo_tablespaces_active
+ || space == TRX_SYS_SPACE
+ || (srv_undo_space_id_start > 0
+ && space >= srv_undo_space_id_start
+ && space <= srv_undo_space_id_start
+ + srv_undo_tablespaces_active)
|| !srv_was_started);
return(space != SRV_TMP_SPACE_ID);
}
diff --git a/storage/innobase/innodb.cmake b/storage/innobase/innodb.cmake
index fe2d537c50e..d916d8b4160 100644
--- a/storage/innobase/innodb.cmake
+++ b/storage/innobase/innodb.cmake
@@ -156,9 +156,9 @@ IF(HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE)
ENDIF()
IF(NOT MSVC)
- # workaround for gcc 4.1.2 RHEL5/x86, gcc atomic ops only work under -march=i686
+ # workaround for old gcc on x86, gcc atomic ops only work under -march=i686
IF(CMAKE_SYSTEM_PROCESSOR STREQUAL "i686" AND CMAKE_COMPILER_IS_GNUCC AND
- CMAKE_C_COMPILER_VERSION VERSION_LESS "4.1.3")
+ CMAKE_C_COMPILER_VERSION VERSION_LESS "4.4.0")
SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=i686")
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=i686")
ENDIF()
diff --git a/storage/innobase/log/log0log.cc b/storage/innobase/log/log0log.cc
index d892f22f967..0d0e84ab555 100644
--- a/storage/innobase/log/log0log.cc
+++ b/storage/innobase/log/log0log.cc
@@ -484,7 +484,6 @@ log_close(void)
lsn_t checkpoint_age;
ut_ad(log_mutex_own());
- ut_ad(!recv_no_log_write);
lsn = log->lsn;
@@ -1944,6 +1943,7 @@ loop:
thread_name = "lock_wait_timeout_thread";
} else if (srv_buf_dump_thread_active) {
thread_name = "buf_dump_thread";
+ goto wait_suspend_loop;
} else if (btr_defragment_thread_active) {
thread_name = "btr_defragment_thread";
} else if (srv_fast_shutdown != 2 && trx_rollback_or_clean_is_active) {
diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc
index 0e0e0aeb357..6b3ef28a788 100644
--- a/storage/innobase/log/log0recv.cc
+++ b/storage/innobase/log/log0recv.cc
@@ -1400,7 +1400,6 @@ parse_log:
/* Allow anything in page_type when creating a page. */
ptr = ibuf_parse_bitmap_init(ptr, end_ptr, block, mtr);
break;
- case MLOG_INIT_FILE_PAGE:
case MLOG_INIT_FILE_PAGE2:
/* Allow anything in page_type when creating a page. */
ptr = fsp_parse_init_file_page(ptr, end_ptr, block);
@@ -1753,18 +1752,6 @@ recv_recover_page(bool just_read_in, buf_block_t* block)
buf = ((byte*)(recv->data)) + sizeof(recv_data_t);
}
- if (recv->type == MLOG_INIT_FILE_PAGE) {
- page_lsn = page_newest_lsn;
-
- memset(FIL_PAGE_LSN + page, 0, 8);
- memset(UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM
- + page, 0, 8);
-
- if (page_zip) {
- memset(FIL_PAGE_LSN + page_zip->data, 0, 8);
- }
- }
-
/* If per-table tablespace was truncated and there exist REDO
records before truncate that are to be applied as part of
recovery (checkpoint didn't happen since truncate was done)
@@ -3619,9 +3606,6 @@ get_mlog_string(mlog_id_t type)
return("MLOG_LSN");
#endif /* UNIV_LOG_LSN_DEBUG */
- case MLOG_INIT_FILE_PAGE:
- return("MLOG_INIT_FILE_PAGE");
-
case MLOG_WRITE_STRING:
return("MLOG_WRITE_STRING");
diff --git a/storage/innobase/os/os0file.cc b/storage/innobase/os/os0file.cc
index 2acb190f7e4..837e60882e6 100644
--- a/storage/innobase/os/os0file.cc
+++ b/storage/innobase/os/os0file.cc
@@ -857,17 +857,29 @@ os_file_get_block_size(
sizeof(disk_alignment),
&tmp);
- CloseHandle(volume_handle);
-
if (!result) {
- os_file_handle_error_no_exit(volume,
- "DeviceIoControl(IOCTL_STORAGE_QUERY_PROPERTY)", FALSE);
+ if (GetLastError() == ERROR_INVALID_FUNCTION) {
+ // Don't report error, it is driver's fault, not ours or users.
+ // We handle this with fallback. Report wit info message, just once.
+ static bool write_info = true;
+ if (write_info) {
+ ib::info() << "DeviceIoControl(IOCTL_STORAGE_QUERY_PROPERTY)"
+ << " unsupported on volume " << volume;
+ write_info = false;
+ }
+ } else {
+ os_file_handle_error_no_exit(volume,
+ "DeviceIoControl(IOCTL_STORAGE_QUERY_PROPERTY)", FALSE);
+ }
goto end;
}
fblock_size = disk_alignment.BytesPerPhysicalSector;
end:
+ if (volume_handle != INVALID_HANDLE_VALUE) {
+ CloseHandle(volume_handle);
+ }
#endif /* _WIN32 */
/* Currently we support file block size up to 4Kb */
diff --git a/storage/innobase/row/row0ins.cc b/storage/innobase/row/row0ins.cc
index df91ecc7a9b..654f3ba286c 100644
--- a/storage/innobase/row/row0ins.cc
+++ b/storage/innobase/row/row0ins.cc
@@ -770,9 +770,7 @@ row_ins_foreign_trx_print(
ulint n_trx_locks;
ulint heap_size;
- if (srv_read_only_mode) {
- return;
- }
+ ut_ad(!srv_read_only_mode);
lock_mutex_enter();
n_rec_locks = lock_number_of_rows_locked(&trx->lock);
@@ -1759,13 +1757,6 @@ row_ins_check_foreign_constraint(
cmp = cmp_dtuple_rec(entry, rec, offsets);
if (cmp == 0) {
-
- ulint lock_type;
-
- lock_type = skip_gap_lock
- ? LOCK_REC_NOT_GAP
- : LOCK_ORDINARY;
-
if (rec_get_deleted_flag(rec,
rec_offs_comp(offsets))) {
/* In delete-marked records, DB_TRX_ID must
@@ -1775,7 +1766,9 @@ row_ins_check_foreign_constraint(
offsets));
err = row_ins_set_shared_rec_lock(
- lock_type, block,
+ skip_gap_lock
+ ? LOCK_REC_NOT_GAP
+ : LOCK_ORDINARY, block,
rec, check_index, offsets, thr);
switch (err) {
case DB_SUCCESS_LOCKED_REC:
@@ -1857,23 +1850,21 @@ row_ins_check_foreign_constraint(
} else {
ut_a(cmp < 0);
- err = DB_SUCCESS;
-
- if (!skip_gap_lock) {
- err = row_ins_set_shared_rec_lock(
+ err = skip_gap_lock
+ ? DB_SUCCESS
+ : row_ins_set_shared_rec_lock(
LOCK_GAP, block,
rec, check_index, offsets, thr);
- }
switch (err) {
case DB_SUCCESS_LOCKED_REC:
+ err = DB_SUCCESS;
+ /* fall through */
case DB_SUCCESS:
if (check_ref) {
err = DB_NO_REFERENCED_ROW;
row_ins_foreign_report_add_err(
trx, foreign, rec, entry);
- } else {
- err = DB_SUCCESS;
}
default:
break;
@@ -1921,19 +1912,11 @@ do_possible_lock_wait:
thr->lock_state = QUE_THR_LOCK_NOLOCK;
- DBUG_PRINT("to_be_dropped",
- ("table: %s", check_table->name.m_name));
- if (check_table->to_be_dropped) {
- /* The table is being dropped. We shall timeout
- this operation */
- err = DB_LOCK_WAIT_TIMEOUT;
-
- goto exit_func;
- }
-
+ err = check_table->to_be_dropped
+ ? DB_LOCK_WAIT_TIMEOUT
+ : trx->error_state;
}
-
exit_func:
if (heap != NULL) {
mem_heap_free(heap);
diff --git a/storage/innobase/row/row0merge.cc b/storage/innobase/row/row0merge.cc
index de1f35a876e..cba453ced24 100644
--- a/storage/innobase/row/row0merge.cc
+++ b/storage/innobase/row/row0merge.cc
@@ -1989,6 +1989,8 @@ row_merge_read_clustered_index(
row_ext_t* ext;
page_cur_t* cur = btr_pcur_get_page_cur(&pcur);
+ mem_heap_empty(row_heap);
+
/* Do not continue if table pages are still encrypted */
if (!old_table->is_readable() ||
!new_table->is_readable()) {
@@ -3616,7 +3618,16 @@ row_merge_insert_index_tuples(
dtuple, tuple_heap);
}
+#ifdef UNIV_DEBUG
+ static const latch_level_t latches[] = {
+ SYNC_INDEX_TREE, /* index->lock */
+ SYNC_LEVEL_VARYING /* btr_bulk->m_page_bulks */
+ };
+#endif /* UNIV_DEBUG */
+
ut_ad(dtuple_validate(dtuple));
+ ut_ad(!sync_check_iterate(sync_allowed_latches(latches,
+ latches + 2)));
error = btr_bulk->insert(dtuple);
if (error != DB_SUCCESS) {
diff --git a/storage/innobase/row/row0mysql.cc b/storage/innobase/row/row0mysql.cc
index c205d818802..fb4cbe5731b 100644
--- a/storage/innobase/row/row0mysql.cc
+++ b/storage/innobase/row/row0mysql.cc
@@ -3668,7 +3668,13 @@ row_drop_table_for_mysql(
dict_stats_recalc_pool_del(table);
dict_stats_defrag_pool_del(table, NULL);
- btr_defragment_remove_table(table);
+ if (btr_defragment_thread_active) {
+ /* During fts_drop_orphaned_tables() in
+ recv_recovery_rollback_active() the
+ btr_defragment_mutex has not yet been
+ initialized by btr_defragment_init(). */
+ btr_defragment_remove_table(table);
+ }
/* Remove stats for this table and all of its indexes from the
persistent storage if it exists and if there are stats for this
diff --git a/storage/innobase/row/row0sel.cc b/storage/innobase/row/row0sel.cc
index b9ee44873ec..585c72be30e 100644
--- a/storage/innobase/row/row0sel.cc
+++ b/storage/innobase/row/row0sel.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1997, 2017, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2008, Google Inc.
Copyright (c) 2015, 2017, MariaDB Corporation.
@@ -2779,28 +2779,14 @@ Stores a non-SQL-NULL field in the MySQL format. The counterpart of this
function is row_mysql_store_col_in_innobase_format() in row0mysql.cc. */
void
row_sel_field_store_in_mysql_format_func(
-/*=====================================*/
- byte* dest, /*!< in/out: buffer where to store; NOTE
- that BLOBs are not in themselves
- stored here: the caller must allocate
- and copy the BLOB into buffer before,
- and pass the pointer to the BLOB in
- 'data' */
+ byte* dest,
const mysql_row_templ_t* templ,
- /*!< in: MySQL column template.
- Its following fields are referenced:
- type, is_unsigned, mysql_col_len,
- mbminlen, mbmaxlen */
#ifdef UNIV_DEBUG
const dict_index_t* index,
- /*!< in: InnoDB index */
ulint field_no,
- /*!< in: templ->rec_field_no or
- templ->clust_rec_field_no or
- templ->icp_rec_field_no */
#endif /* UNIV_DEBUG */
- const byte* data, /*!< in: data to store */
- ulint len) /*!< in: length of the data */
+ const byte* data,
+ ulint len)
{
byte* ptr;
#ifdef UNIV_DEBUG
diff --git a/storage/innobase/row/row0upd.cc b/storage/innobase/row/row0upd.cc
index 63f6c03187b..56aaff3c2aa 100644
--- a/storage/innobase/row/row0upd.cc
+++ b/storage/innobase/row/row0upd.cc
@@ -455,6 +455,25 @@ func_exit:
return(err);
}
+
+/** Determine if a FOREIGN KEY constraint needs to be processed.
+@param[in] node query node
+@param[in] trx transaction
+@return whether the node cannot be ignored */
+static
+bool
+wsrep_must_process_fk(const upd_node_t* node, const trx_t* trx)
+{
+ if (que_node_get_type(node->common.parent) != QUE_NODE_UPDATE
+ || !wsrep_on(trx->mysql_thd)) {
+ return false;
+ }
+
+ const upd_cascade_t& nodes = *static_cast<const upd_node_t*>(
+ node->common.parent)->cascade_upd_nodes;
+ const upd_cascade_t::const_iterator end = nodes.end();
+ return std::find(nodes.begin(), end, node) == end;
+}
#endif /* WITH_WSREP */
/*********************************************************************//**
@@ -2414,29 +2433,18 @@ row_upd_sec_index_entry(
row_ins_sec_index_entry() below */
if (!rec_get_deleted_flag(
rec, dict_table_is_comp(index->table))) {
-
-#ifdef WITH_WSREP
- que_node_t *parent = que_node_get_parent(node);
-#endif
err = btr_cur_del_mark_set_sec_rec(
flags, btr_cur, TRUE, thr, &mtr);
if (err != DB_SUCCESS) {
break;
}
#ifdef WITH_WSREP
- if (err == DB_SUCCESS && !referenced &&
- !(parent && que_node_get_type(parent) ==
- QUE_NODE_UPDATE &&
- (std::find(((upd_node_t*)parent)->cascade_upd_nodes->begin(),
- ((upd_node_t*)parent)->cascade_upd_nodes->end(),
- node) ==
- ((upd_node_t*)parent)->cascade_upd_nodes->end())) &&
- foreign
- ) {
- ulint* offsets =
- rec_get_offsets(
- rec, index, NULL, ULINT_UNDEFINED,
- &heap);
+ if (!referenced && foreign
+ && wsrep_must_process_fk(node, trx)
+ && !wsrep_thd_is_BF(trx->mysql_thd, FALSE)) {
+ ulint* offsets = rec_get_offsets(
+ rec, index, NULL, ULINT_UNDEFINED,
+ &heap);
err = wsrep_row_upd_check_foreign_constraints(
node, &pcur, index->table,
@@ -2450,14 +2458,14 @@ row_upd_sec_index_entry(
case DB_DEADLOCK:
if (wsrep_debug) {
ib::warn() << "WSREP: sec index FK check fail for deadlock"
- << " index " << index->name()
- << " table " << index->table->name.m_name;
+ << " index " << index->name
+ << " table " << index->table->name;
}
break;
default:
- ib::error() << "WSREP: referenced FK check fail: " << err
- << " index " << index->name()
- << " table " << index->table->name.m_name;
+ ib::error() << "WSREP: referenced FK check fail: " << ut_strerr(err)
+ << " index " << index->name
+ << " table " << index->table->name;
break;
}
@@ -2651,9 +2659,6 @@ row_upd_clust_rec_by_insert(
dberr_t err;
rec_t* rec;
ulint* offsets = NULL;
-#ifdef WITH_WSREP
- que_node_t *parent = que_node_get_parent(node);
-#endif
ut_ad(node);
ut_ad(dict_index_is_clust(index));
@@ -2741,18 +2746,8 @@ check_fk:
if (err != DB_SUCCESS) {
goto err_exit;
}
- }
#ifdef WITH_WSREP
- if (!referenced &&
- !(parent && que_node_get_type(parent) == QUE_NODE_UPDATE &&
- (std::find(((upd_node_t*)parent)->cascade_upd_nodes->begin(),
- ((upd_node_t*)parent)->cascade_upd_nodes->end(),
- node) ==
- ((upd_node_t*)parent)->cascade_upd_nodes->end())) &&
- foreign
- ) {
- err = wsrep_row_upd_check_foreign_constraints(
- node, pcur, table, index, offsets, thr, mtr);
+ } else if (foreign && wsrep_must_process_fk(node, trx)) {
switch (err) {
case DB_SUCCESS:
case DB_NO_REFERENCED_ROW:
@@ -2761,14 +2756,14 @@ check_fk:
case DB_DEADLOCK:
if (wsrep_debug) {
ib::warn() << "WSREP: sec index FK check fail for deadlock"
- << " index " << index->name()
- << " table " << index->table->name.m_name;
+ << " index " << index->name
+ << " table " << index->table->name;
}
break;
default:
- ib::error() << "WSREP: referenced FK check fail: " << err
- << " index " << index->name()
- << " table " << index->table->name.m_name;
+ ib::error() << "WSREP: referenced FK check fail: " << ut_strerr(err)
+ << " index " << index->name
+ << " table " << index->table->name;
break;
}
@@ -2776,8 +2771,8 @@ check_fk:
if (err != DB_SUCCESS) {
goto err_exit;
}
- }
#endif /* WITH_WSREP */
+ }
}
mtr_commit(mtr);
@@ -2959,9 +2954,7 @@ row_upd_del_mark_clust_rec(
btr_cur_t* btr_cur;
dberr_t err;
rec_t* rec;
-#ifdef WITH_WSREP
- que_node_t *parent = que_node_get_parent(node);
-#endif
+ trx_t* trx = thr_get_trx(thr);
ut_ad(node);
ut_ad(dict_index_is_clust(index));
ut_ad(node->is_delete);
@@ -2972,7 +2965,7 @@ row_upd_del_mark_clust_rec(
/* Store row because we have to build also the secondary index
entries */
- row_upd_store_row(node, thr_get_trx(thr)->mysql_thd,
+ row_upd_store_row(node, trx->mysql_thd,
thr->prebuilt ? thr->prebuilt->m_mysql_table : NULL);
/* Mark the clustered index record deleted; we do not have to check
@@ -2984,22 +2977,14 @@ row_upd_del_mark_clust_rec(
btr_cur_get_block(btr_cur), rec,
index, offsets, thr, node->row, mtr);
- if (err == DB_SUCCESS && referenced) {
+ if (err != DB_SUCCESS) {
+ } else if (referenced) {
/* NOTE that the following call loses the position of pcur ! */
err = row_upd_check_references_constraints(
node, pcur, index->table, index, offsets, thr, mtr);
- }
#ifdef WITH_WSREP
- if (err == DB_SUCCESS && !referenced &&
- !(parent && que_node_get_type(parent) == QUE_NODE_UPDATE &&
- (std::find(((upd_node_t*)parent)->cascade_upd_nodes->begin(),
- ((upd_node_t*)parent)->cascade_upd_nodes->end(),
- node) ==
- ((upd_node_t*)parent)->cascade_upd_nodes->end())) &&
- thr_get_trx(thr) &&
- foreign
- ) {
+ } else if (foreign && wsrep_must_process_fk(node, trx)) {
err = wsrep_row_upd_check_foreign_constraints(
node, pcur, index->table, index, offsets, thr, mtr);
switch (err) {
@@ -3010,19 +2995,19 @@ row_upd_del_mark_clust_rec(
case DB_DEADLOCK:
if (wsrep_debug) {
ib::warn() << "WSREP: sec index FK check fail for deadlock"
- << " index " << index->name()
- << " table " << index->table->name.m_name;
+ << " index " << index->name
+ << " table " << index->table->name;
}
break;
default:
- ib::error() << "WSREP: referenced FK check fail: " << err
- << " index " << index->name()
- << " table " << index->table->name.m_name;
+ ib::error() << "WSREP: referenced FK check fail: " << ut_strerr(err)
+ << " index " << index->name
+ << " table " << index->table->name;
break;
}
- }
#endif /* WITH_WSREP */
+ }
mtr_commit(mtr);
diff --git a/storage/innobase/srv/srv0srv.cc b/storage/innobase/srv/srv0srv.cc
index 2894be6b12c..663487fc3a6 100644
--- a/storage/innobase/srv/srv0srv.cc
+++ b/storage/innobase/srv/srv0srv.cc
@@ -478,12 +478,6 @@ ib_mutex_t srv_monitor_file_mutex;
/** Temporary file for innodb monitor output */
FILE* srv_monitor_file;
-/** Mutex for locking srv_dict_tmpfile. Not created if srv_read_only_mode.
-This mutex has a very high rank; threads reserving it should not
-be holding any InnoDB latches. */
-ib_mutex_t srv_dict_tmpfile_mutex;
-/** Temporary file for output from the data dictionary */
-FILE* srv_dict_tmpfile;
/** Mutex for locking srv_misc_tmpfile. Not created if srv_read_only_mode.
This mutex has a very low rank; threads reserving it should not
acquire any further latches or sleep before releasing this one. */
@@ -1363,7 +1357,7 @@ srv_printf_innodb_monitor(
"Total large memory allocated " ULINTPF "\n"
"Dictionary memory allocated " ULINTPF "\n",
os_total_large_mem_allocated,
- dict_sys->size);
+ dict_sys_get_size());
buf_print_io(file);
diff --git a/storage/innobase/srv/srv0start.cc b/storage/innobase/srv/srv0start.cc
index 46a757be5be..d4922e33ef5 100644
--- a/storage/innobase/srv/srv0start.cc
+++ b/storage/innobase/srv/srv0start.cc
@@ -890,12 +890,30 @@ srv_undo_tablespaces_init(bool create_new_db)
the system tablespace (0). If we are creating a new instance then
we build the undo_tablespace_ids ourselves since they don't
already exist. */
+ n_undo_tablespaces = create_new_db
+ || srv_operation == SRV_OPERATION_BACKUP
+ || srv_operation == SRV_OPERATION_RESTORE_DELTA
+ ? srv_undo_tablespaces
+ : trx_rseg_get_n_undo_tablespaces(undo_tablespace_ids);
+ srv_undo_tablespaces_active = srv_undo_tablespaces;
- if (!create_new_db && srv_operation == SRV_OPERATION_NORMAL) {
- n_undo_tablespaces = trx_rseg_get_n_undo_tablespaces(
- undo_tablespace_ids);
-
- srv_undo_tablespaces_active = n_undo_tablespaces;
+ switch (srv_operation) {
+ case SRV_OPERATION_RESTORE_DELTA:
+ case SRV_OPERATION_BACKUP:
+ /* MDEV-13561 FIXME: Determine srv_undo_space_id_start
+ from the undo001 file. */
+ srv_undo_space_id_start = 1;
+ for (i = 0; i < n_undo_tablespaces; i++) {
+ undo_tablespace_ids[i] = i + srv_undo_space_id_start;
+ }
+ break;
+ case SRV_OPERATION_NORMAL:
+ if (create_new_db) {
+ break;
+ }
+ /* fall through */
+ case SRV_OPERATION_RESTORE:
+ ut_ad(!create_new_db);
/* Check if any of the UNDO tablespace needs fix-up because
server crashed while truncate was active on UNDO tablespace.*/
@@ -929,14 +947,7 @@ srv_undo_tablespaces_init(bool create_new_db)
undo_tablespace_ids[i]);
}
}
- } else {
- srv_undo_tablespaces_active = srv_undo_tablespaces;
- n_undo_tablespaces = srv_undo_tablespaces;
-
- if (n_undo_tablespaces != 0) {
- srv_undo_space_id_start = undo_tablespace_ids[0];
- prev_space_id = srv_undo_space_id_start - 1;
- }
+ break;
}
/* Open all the undo tablespaces that are currently in use. If we
@@ -1308,6 +1319,7 @@ srv_shutdown_all_bg_threads()
switch (srv_operation) {
case SRV_OPERATION_BACKUP:
+ case SRV_OPERATION_RESTORE_DELTA:
break;
case SRV_OPERATION_NORMAL:
case SRV_OPERATION_RESTORE:
@@ -1752,15 +1764,6 @@ innobase_start_or_create_for_mysql()
}
}
- mutex_create(LATCH_ID_SRV_DICT_TMPFILE,
- &srv_dict_tmpfile_mutex);
-
- srv_dict_tmpfile = os_file_create_tmpfile(NULL);
-
- if (!srv_dict_tmpfile && err == DB_SUCCESS) {
- err = DB_ERROR;
- }
-
mutex_create(LATCH_ID_SRV_MISC_TMPFILE,
&srv_misc_tmpfile_mutex);
@@ -2809,6 +2812,7 @@ innodb_shutdown()
switch (srv_operation) {
case SRV_OPERATION_BACKUP:
case SRV_OPERATION_RESTORE:
+ case SRV_OPERATION_RESTORE_DELTA:
fil_close_all_files();
break;
case SRV_OPERATION_NORMAL:
@@ -2834,11 +2838,6 @@ innodb_shutdown()
}
}
- if (srv_dict_tmpfile) {
- fclose(srv_dict_tmpfile);
- srv_dict_tmpfile = 0;
- }
-
if (srv_misc_tmpfile) {
fclose(srv_misc_tmpfile);
srv_misc_tmpfile = 0;
@@ -2903,7 +2902,6 @@ innodb_shutdown()
the temp files that the cover. */
if (!srv_read_only_mode) {
mutex_free(&srv_monitor_file_mutex);
- mutex_free(&srv_dict_tmpfile_mutex);
mutex_free(&srv_misc_tmpfile_mutex);
}
diff --git a/storage/innobase/sync/sync0debug.cc b/storage/innobase/sync/sync0debug.cc
index d6f3ef6c986..c80ea6aef3e 100644
--- a/storage/innobase/sync/sync0debug.cc
+++ b/storage/innobase/sync/sync0debug.cc
@@ -1431,9 +1431,6 @@ sync_latch_meta_init()
LATCH_ADD_MUTEX(RW_LOCK_MUTEX, SYNC_NO_ORDER_CHECK, rw_lock_mutex_key);
- LATCH_ADD_MUTEX(SRV_DICT_TMPFILE, SYNC_DICT_OPERATION,
- srv_dict_tmpfile_mutex_key);
-
LATCH_ADD_MUTEX(SRV_INNODB_MONITOR, SYNC_NO_ORDER_CHECK,
srv_innodb_monitor_mutex_key);
@@ -1518,11 +1515,12 @@ sync_latch_meta_init()
buf_block_lock_key);
#ifdef UNIV_DEBUG
- LATCH_ADD_RWLOCK(BUF_BLOCK_DEBUG, SYNC_NO_ORDER_CHECK,
+ LATCH_ADD_RWLOCK(BUF_BLOCK_DEBUG, SYNC_LEVEL_VARYING,
buf_block_debug_latch_key);
#endif /* UNIV_DEBUG */
- LATCH_ADD_RWLOCK(DICT_OPERATION, SYNC_DICT, dict_operation_lock_key);
+ LATCH_ADD_RWLOCK(DICT_OPERATION, SYNC_DICT_OPERATION,
+ dict_operation_lock_key);
LATCH_ADD_RWLOCK(CHECKPOINT, SYNC_NO_ORDER_CHECK, checkpoint_lock_key);
diff --git a/storage/innobase/sync/sync0sync.cc b/storage/innobase/sync/sync0sync.cc
index 099a56c5457..4be7162f631 100644
--- a/storage/innobase/sync/sync0sync.cc
+++ b/storage/innobase/sync/sync0sync.cc
@@ -78,7 +78,6 @@ mysql_pfs_key_t rtr_path_mutex_key;
mysql_pfs_key_t rtr_ssn_mutex_key;
mysql_pfs_key_t rw_lock_list_mutex_key;
mysql_pfs_key_t rw_lock_mutex_key;
-mysql_pfs_key_t srv_dict_tmpfile_mutex_key;
mysql_pfs_key_t srv_innodb_monitor_mutex_key;
mysql_pfs_key_t srv_misc_tmpfile_mutex_key;
mysql_pfs_key_t srv_monitor_file_mutex_key;
diff --git a/storage/innobase/trx/trx0trx.cc b/storage/innobase/trx/trx0trx.cc
index 2fe13ae7e9d..31e70a5aaa6 100644
--- a/storage/innobase/trx/trx0trx.cc
+++ b/storage/innobase/trx/trx0trx.cc
@@ -3091,7 +3091,7 @@ trx_set_rw_mode(
ut_ad(!trx->in_rw_trx_list);
ut_ad(!trx_is_autocommit_non_locking(trx));
- if (srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO) {
+ if (high_level_read_only) {
return;
}