diff options
author | Alexander Barkov <bar@mariadb.org> | 2017-10-30 20:47:39 +0400 |
---|---|---|
committer | Alexander Barkov <bar@mariadb.org> | 2017-10-30 20:47:39 +0400 |
commit | 835cbbcc7b797188a89671019f2b2844e1a14e0c (patch) | |
tree | 010dd112f16b88bb655c32abb6b93987fe5c6c99 /storage/innobase | |
parent | fe8cf8fdf1c4c0a9ec60690a8d2738fd255c8dd5 (diff) | |
parent | 003cb2f42477772ae43228c0bc0f8492246b9340 (diff) | |
download | mariadb-git-835cbbcc7b797188a89671019f2b2844e1a14e0c.tar.gz |
Merge remote-tracking branch 'origin/bb-10.2-ext' into 10.3
TODO: enable MDEV-13049 optimization for 10.3
Diffstat (limited to 'storage/innobase')
50 files changed, 757 insertions, 797 deletions
diff --git a/storage/innobase/btr/btr0btr.cc b/storage/innobase/btr/btr0btr.cc index cf2aebe80f5..0fe807ceb00 100644 --- a/storage/innobase/btr/btr0btr.cc +++ b/storage/innobase/btr/btr0btr.cc @@ -77,22 +77,85 @@ btr_corruption_report( /* Latching strategy of the InnoDB B-tree -------------------------------------- -A tree latch protects all non-leaf nodes of the tree. Each node of a tree -also has a latch of its own. - -A B-tree operation normally first acquires an S-latch on the tree. It -searches down the tree and releases the tree latch when it has the -leaf node latch. To save CPU time we do not acquire any latch on -non-leaf nodes of the tree during a search, those pages are only bufferfixed. - -If an operation needs to restructure the tree, it acquires an X-latch on -the tree before searching to a leaf node. If it needs, for example, to -split a leaf, -(1) InnoDB decides the split point in the leaf, -(2) allocates a new page, -(3) inserts the appropriate node pointer to the first non-leaf level, -(4) releases the tree X-latch, -(5) and then moves records from the leaf to the new allocated page. + +Node pointer page latches acquisition is protected by index->lock latch. + +Before MariaDB 10.2.2, all node pointer pages were protected by index->lock +either in S (shared) or X (exclusive) mode and block->lock was not acquired on +node pointer pages. + +After MariaDB 10.2.2, block->lock S-latch or X-latch is used to protect +node pointer pages and obtaiment of node pointer page latches is protected by +index->lock. + +(0) Definition: B-tree level. + +(0.1) The leaf pages of the B-tree are at level 0. + +(0.2) The parent of a page at level L has level L+1. (The level of the +root page is equal to the tree height.) + +(0.3) The B-tree lock (index->lock) is the parent of the root page and +has a level = tree height + 1. + +Index->lock has 3 possible locking modes: + +(1) S-latch: + +(1.1) All latches for pages must be obtained in descending order of tree level. + +(1.2) Before obtaining the first node pointer page latch at a given B-tree +level, parent latch must be held (at level +1 ). + +(1.3) If a node pointer page is already latched at the same level +we can only obtain latch to its right sibling page latch at the same level. + +(1.4) Release of the node pointer page latches must be done in +child-to-parent order. (Prevents deadlocks when obtained index->lock +in SX mode). + +(1.4.1) Level L node pointer page latch can be released only when +no latches at children level i.e. level < L are hold. + +(1.4.2) All latches from node pointer pages must be released so +that no latches are obtained between. + +(1.5) [implied by (1.1), (1.2)] Root page latch must be first node pointer +latch obtained. + +(2) SX-latch: + +In this case rules (1.2) and (1.3) from S-latch case are relaxed and +merged into (2.2) and rule (1.4) is removed. Thus, latch acquisition +can be skipped at some tree levels and latches can be obtained in +a less restricted order. + +(2.1) [identical to (1.1)]: All latches for pages must be obtained in descending +order of tree level. + +(2.2) When a node pointer latch at level L is obtained, +the left sibling page latch in the same level or some ancestor +page latch (at level > L) must be hold. + +(2.3) [implied by (2.1), (2.2)] The first node pointer page latch obtained can +be any node pointer page. + +(3) X-latch: + +Node pointer latches can be obtained in any order. + +NOTE: New rules after MariaDB 10.2.2 does not affect the latching rules of leaf pages: + +index->lock S-latch is needed in read for the node pointer traversal. When the leaf +level is reached, index-lock can be released (and with the MariaDB 10.2.2 changes, all +node pointer latches). Left to right index travelsal in leaf page level can be safely done +by obtaining right sibling leaf page latch and then releasing the old page latch. + +Single leaf page modifications (BTR_MODIFY_LEAF) are protected by index->lock +S-latch. + +B-tree operations involving page splits or merges (BTR_MODIFY_TREE) and page +allocations are protected by index->lock X-latch. Node pointers ------------- @@ -1041,7 +1104,8 @@ btr_free_root( { fseg_header_t* header; - ut_ad(mtr_memo_contains_flagged(mtr, block, MTR_MEMO_PAGE_X_FIX)); + ut_ad(mtr_memo_contains_flagged(mtr, block, MTR_MEMO_PAGE_X_FIX + | MTR_MEMO_PAGE_SX_FIX)); ut_ad(mtr->is_named_space(block->page.id.space())); btr_search_drop_page_hash_index(block); @@ -1069,8 +1133,6 @@ btr_free_root_invalidate( buf_block_t* block, mtr_t* mtr) { - ut_ad(page_is_root(block->frame)); - btr_page_set_index_id( buf_block_get_frame(block), buf_block_get_page_zip(block), @@ -1371,6 +1433,7 @@ btr_free_if_exists( return; } + ut_ad(page_is_root(root->frame)); btr_free_but_not_root(root, mtr->get_log_mode()); mtr->set_named_space(page_id.space()); btr_free_root(root, mtr); diff --git a/storage/innobase/btr/btr0bulk.cc b/storage/innobase/btr/btr0bulk.cc index a2bd25b4a04..e6b1b94f828 100644 --- a/storage/innobase/btr/btr0bulk.cc +++ b/storage/innobase/btr/btr0bulk.cc @@ -565,8 +565,7 @@ PageBulk::storeExt( page_cur->block = m_block; dberr_t err = btr_store_big_rec_extern_fields( - &btr_pcur, NULL, offsets, big_rec, m_mtr, - BTR_STORE_INSERT_BULK); + &btr_pcur, offsets, big_rec, m_mtr, BTR_STORE_INSERT_BULK); ut_ad(page_offset(m_cur_rec) == page_offset(page_cur->rec)); diff --git a/storage/innobase/btr/btr0cur.cc b/storage/innobase/btr/btr0cur.cc index e96aceb5f5d..83e64c854d6 100644 --- a/storage/innobase/btr/btr0cur.cc +++ b/storage/innobase/btr/btr0cur.cc @@ -7096,7 +7096,6 @@ btr_store_big_rec_extern_fields( btr_pcur_t* pcur, /*!< in/out: a persistent cursor. if btr_mtr is restarted, then this can be repositioned. */ - const upd_t* upd, /*!< in: update vector */ ulint* offsets, /*!< in/out: rec_get_offsets() on pcur. the "external storage" flags in offsets will correctly correspond diff --git a/storage/innobase/btr/btr0defragment.cc b/storage/innobase/btr/btr0defragment.cc index 335b4fc220d..70444ca1830 100644 --- a/storage/innobase/btr/btr0defragment.cc +++ b/storage/innobase/btr/btr0defragment.cc @@ -564,7 +564,7 @@ btr_defragment_merge_pages( page_get_infimum_rec(from_page)); node_ptr = dict_index_build_node_ptr( index, rec, page_get_page_no(from_page), - heap, level + 1); + heap, level); btr_insert_on_non_leaf_level(0, index, level+1, node_ptr, mtr); } @@ -797,11 +797,16 @@ DECLARE_THREAD(btr_defragment_thread)(void*) now = ut_timer_now(); mtr_start(&mtr); - btr_pcur_restore_position(BTR_MODIFY_TREE, pcur, &mtr); cursor = btr_pcur_get_btr_cur(pcur); index = btr_cur_get_index(cursor); - first_block = btr_cur_get_block(cursor); mtr.set_named_space(index->space); + /* To follow the latching order defined in WL#6326, acquire index->lock X-latch. + This entitles us to acquire page latches in any order for the index. */ + mtr_x_lock(&index->lock, &mtr); + /* This will acquire index->lock SX-latch, which per WL#6363 is allowed + when we are already holding the X-latch. */ + btr_pcur_restore_position(BTR_MODIFY_TREE, pcur, &mtr); + first_block = btr_cur_get_block(cursor); last_block = btr_defragment_n_pages(first_block, index, srv_defragment_n_pages, diff --git a/storage/innobase/buf/buf0flu.cc b/storage/innobase/buf/buf0flu.cc index 0d6917489f0..44a3c19235b 100644 --- a/storage/innobase/buf/buf0flu.cc +++ b/storage/innobase/buf/buf0flu.cc @@ -825,7 +825,6 @@ buf_flush_update_zip_checksum( static_cast<srv_checksum_algorithm_t>(srv_checksum_algorithm)); mach_write_to_8(page + FIL_PAGE_LSN, lsn); - memset(page + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION, 0, 8); mach_write_to_4(page + FIL_PAGE_SPACE_OR_CHKSUM, checksum); } @@ -1079,7 +1078,6 @@ buf_flush_write_block_low( bpage->newest_modification); ut_a(page_zip_verify_checksum(frame, bpage->size.physical())); - memset(frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION, 0, 8); break; case BUF_BLOCK_FILE_PAGE: frame = bpage->zip.data; diff --git a/storage/innobase/buf/buf0rea.cc b/storage/innobase/buf/buf0rea.cc index f7ea768f5c1..598da3ff706 100644 --- a/storage/innobase/buf/buf0rea.cc +++ b/storage/innobase/buf/buf0rea.cc @@ -176,6 +176,17 @@ buf_read_page_low( dst = ((buf_block_t*) bpage)->frame; } + DBUG_EXECUTE_IF( + "innodb_invalid_read_after_truncate", + if (fil_space_t* space = fil_space_acquire(page_id.space())) { + if (!strcmp(space->name, "test/t1") + && page_id.page_no() == space->size - 1) { + type = 0; + sync = true; + } + fil_space_release(space); + }); + IORequest request(type | IORequest::READ); *err = fil_io( @@ -321,6 +332,19 @@ buf_read_ahead_random( that is, reside near the start of the LRU list. */ for (i = low; i < high; i++) { + DBUG_EXECUTE_IF( + "innodb_invalid_read_after_truncate", + if (fil_space_t* space = fil_space_acquire( + page_id.space())) { + bool skip = !strcmp(space->name, "test/t1"); + fil_space_release(space); + if (skip) { + high = space->size; + buf_pool_mutex_exit(buf_pool); + goto read_ahead; + } + }); + const buf_page_t* bpage = buf_page_hash_get( buf_pool, page_id_t(page_id.space(), i)); diff --git a/storage/innobase/dict/dict0dict.cc b/storage/innobase/dict/dict0dict.cc index 4313fa16370..071218c4060 100644 --- a/storage/innobase/dict/dict0dict.cc +++ b/storage/innobase/dict/dict0dict.cc @@ -501,15 +501,14 @@ dict_table_close( ut_ad(mutex_own(&dict_sys->mutex)); ut_a(table->get_ref_count() > 0); - table->release(); + const bool last_handle = table->release(); /* Force persistent stats re-read upon next open of the table so that FLUSH TABLE can be used to forcibly fetch stats from disk if they have been manually modified. We reset table->stat_initialized only if table reference count is 0 because we do not want too frequent stats re-reads (e.g. in other cases than FLUSH TABLE). */ - if (strchr(table->name.m_name, '/') != NULL - && table->get_ref_count() == 0 + if (last_handle && strchr(table->name.m_name, '/') != NULL && dict_stats_is_persistent_enabled(table)) { dict_stats_deinit(table); @@ -529,11 +528,8 @@ dict_table_close( if (!dict_locked) { table_id_t table_id = table->id; - ibool drop_aborted; - - drop_aborted = try_drop + const bool drop_aborted = last_handle && try_drop && table->drop_aborted - && table->get_ref_count() == 1 && dict_table_get_first_index(table); mutex_exit(&dict_sys->mutex); @@ -914,8 +910,7 @@ dict_index_contains_col_or_prefix( ut_ad(index->magic_n == DICT_INDEX_MAGIC_N); if (dict_index_is_clust(index)) { - - return(TRUE); + return(!is_virtual); } if (is_virtual) { @@ -2057,8 +2052,9 @@ dict_table_remove_from_cache_low( ut_ad(dict_lru_validate()); if (lru_evict && table->drop_aborted) { - /* Do as dict_table_try_drop_aborted() does. */ - + /* When evicting the table definition, + drop the orphan indexes from the data dictionary + and free the index pages. */ trx_t* trx = trx_allocate_for_background(); ut_ad(mutex_own(&dict_sys->mutex)); @@ -2068,12 +2064,7 @@ dict_table_remove_from_cache_low( trx->dict_operation_lock_mode = RW_X_LATCH; trx_set_dict_operation(trx, TRX_DICT_OP_INDEX); - - /* Silence a debug assertion in row_merge_drop_indexes(). */ - ut_d(table->acquire()); - row_merge_drop_indexes(trx, table, TRUE); - ut_d(table->release()); - ut_ad(table->get_ref_count() == 0); + row_merge_drop_indexes_dict(trx, table->id); trx_commit_for_mysql(trx); trx->dict_operation_lock_mode = 0; trx_free_for_background(trx); diff --git a/storage/innobase/dict/dict0mem.cc b/storage/innobase/dict/dict0mem.cc index e7e0e997fa7..aaed29dc58c 100644 --- a/storage/innobase/dict/dict0mem.cc +++ b/storage/innobase/dict/dict0mem.cc @@ -438,6 +438,9 @@ dict_mem_table_col_rename_low( ut_ad(from_len <= NAME_LEN); ut_ad(to_len <= NAME_LEN); + char from[NAME_LEN]; + strncpy(from, s, NAME_LEN); + if (from_len == to_len) { /* The easy case: simply replace the column name in table->col_names. */ @@ -522,14 +525,54 @@ dict_mem_table_col_rename_low( foreign = *it; - for (unsigned f = 0; f < foreign->n_fields; f++) { - /* These can point straight to - table->col_names, because the foreign key - constraints will be freed at the same time - when the table object is freed. */ - foreign->foreign_col_names[f] - = dict_index_get_nth_field( - foreign->foreign_index, f)->name; + if (foreign->foreign_index == NULL) { + /* We may go here when we set foreign_key_checks to 0, + and then try to rename a column and modify the + corresponding foreign key constraint. The index + would have been dropped, we have to find an equivalent + one */ + for (unsigned f = 0; f < foreign->n_fields; f++) { + if (strcmp(foreign->foreign_col_names[f], from) + == 0) { + + char** rc = const_cast<char**>( + foreign->foreign_col_names + + f); + + if (to_len <= strlen(*rc)) { + memcpy(*rc, to, to_len + 1); + } else { + *rc = static_cast<char*>( + mem_heap_dup( + foreign->heap, + to, + to_len + 1)); + } + } + } + + dict_index_t* new_index = dict_foreign_find_index( + foreign->foreign_table, NULL, + foreign->foreign_col_names, + foreign->n_fields, NULL, true, false, + NULL, NULL, NULL); + /* There must be an equivalent index in this case. */ + ut_ad(new_index != NULL); + + foreign->foreign_index = new_index; + + } else { + + for (unsigned f = 0; f < foreign->n_fields; f++) { + /* These can point straight to + table->col_names, because the foreign key + constraints will be freed at the same time + when the table object is freed. */ + foreign->foreign_col_names[f] + = dict_index_get_nth_field( + foreign->foreign_index, + f)->name; + } } } @@ -539,6 +582,8 @@ dict_mem_table_col_rename_low( foreign = *it; + ut_ad(foreign->referenced_index != NULL); + for (unsigned f = 0; f < foreign->n_fields; f++) { /* foreign->referenced_col_names[] need to be copies, because the constraint may become diff --git a/storage/innobase/dict/dict0stats_bg.cc b/storage/innobase/dict/dict0stats_bg.cc index f2ef18b116d..133e7904c94 100644 --- a/storage/innobase/dict/dict0stats_bg.cc +++ b/storage/innobase/dict/dict0stats_bg.cc @@ -259,7 +259,7 @@ dict_stats_wait_bg_to_stop_using_table( unlocking/locking the data dict */ { while (!dict_stats_stop_bg(table)) { - DICT_STATS_BG_YIELD(trx); + DICT_BG_YIELD(trx); } } diff --git a/storage/innobase/fil/fil0fil.cc b/storage/innobase/fil/fil0fil.cc index 2897d5f9be8..73132754fdf 100644 --- a/storage/innobase/fil/fil0fil.cc +++ b/storage/innobase/fil/fil0fil.cc @@ -1061,139 +1061,28 @@ fil_space_extend_must_retry( const page_size_t pageSize(space->flags); const ulint page_size = pageSize.physical(); -#ifdef _WIN32 - os_offset_t new_file_size = - std::max( - os_offset_t(size - file_start_page_no) * page_size, - os_offset_t(FIL_IBD_FILE_INITIAL_SIZE * UNIV_PAGE_SIZE)); - - /* os_file_change_size_win32() handles both compressed(sparse) - and normal files correctly. - It allocates physical storage for normal files and "virtual" - storage for sparse ones.*/ - *success = os_file_change_size_win32(node->name, - node->handle, new_file_size); + /* fil_read_first_page() expects UNIV_PAGE_SIZE bytes. + fil_node_open_file() expects at least 4 * UNIV_PAGE_SIZE bytes.*/ + os_offset_t new_size = std::max( + os_offset_t(size - file_start_page_no) * page_size, + os_offset_t(FIL_IBD_FILE_INITIAL_SIZE * UNIV_PAGE_SIZE)); + + *success = os_file_set_size(node->name, node->handle, new_size, + FSP_FLAGS_HAS_PAGE_COMPRESSION(space->flags)); + os_has_said_disk_full = *success; if (*success) { last_page_no = size; } else { - ib::error() << "extending file '" << node->name - << " to size " << new_file_size << " failed"; - } -#else - /* We will logically extend the file with ftruncate() if - page_compression is enabled, because the file is expected to - be sparse in that case. Make sure that ftruncate() can deal - with large files. */ - const bool is_sparse = sizeof(off_t) >= 8 - && FSP_FLAGS_HAS_PAGE_COMPRESSION(space->flags); - - if (is_sparse) { - /* fil_read_first_page() expects UNIV_PAGE_SIZE bytes. - fil_node_open_file() expects at least 4 * UNIV_PAGE_SIZE bytes. - Do not shrink short ROW_FORMAT=COMPRESSED files. */ - off_t s = std::max(off_t(size - file_start_page_no) - * off_t(page_size), - off_t(FIL_IBD_FILE_INITIAL_SIZE - * UNIV_PAGE_SIZE)); - *success = !ftruncate(node->handle, s); - if (!*success) { - ib::error() << "ftruncate of file '" << node->name - << "' from " - << os_offset_t(last_page_no - - file_start_page_no) - * page_size << " to " << os_offset_t(s) - << " bytes failed with " << errno; - } else { - last_page_no = size; - } - } else { - const os_offset_t start_offset - = os_offset_t(last_page_no - file_start_page_no) - * page_size; - const ulint n_pages = size - last_page_no; - const os_offset_t len = os_offset_t(n_pages) * page_size; -# ifdef HAVE_POSIX_FALLOCATE - int err; - do { - err = posix_fallocate(node->handle, start_offset, len); - } while (err == EINTR - && srv_shutdown_state == SRV_SHUTDOWN_NONE); - - if (err != EINVAL) { - - *success = !err; - if (!*success) { - ib::error() << "extending file '" << node->name - << "' from " - << start_offset - << " to " << len + start_offset - << " bytes failed with: " << err; - } - } else -# endif /* HAVE_POSIX_FALLOCATE */ - { - /* Extend at most 1 megabyte pages at a time */ - ulint n_bytes = std::min(ulint(1) << 20, n_pages) - * page_size; - byte* buf2 = static_cast<byte*>( - calloc(1, n_bytes + page_size)); - *success = buf2 != NULL; - if (!buf2) { - ib::error() << "Cannot allocate " - << n_bytes + page_size - << " bytes to extend file"; - } - byte* const buf = static_cast<byte*>( - ut_align(buf2, page_size)); - IORequest request(IORequest::WRITE); - - - os_offset_t offset = start_offset; - const os_offset_t end = start_offset + len; - const bool read_only_mode = space->purpose - == FIL_TYPE_TEMPORARY && srv_read_only_mode; - - while (*success && offset < end) { - dberr_t err = os_aio( - request, OS_AIO_SYNC, node->name, - node->handle, buf, offset, n_bytes, - read_only_mode, NULL, NULL); - - if (err != DB_SUCCESS) { - *success = false; - ib::error() << "writing zeroes to file '" - << node->name << "' from " - << offset << " to " << offset + n_bytes - << " bytes failed with: " - << ut_strerr(err); - break; - } - - offset += n_bytes; - - n_bytes = std::min(n_bytes, - static_cast<ulint>(end - offset)); - } - - free(buf2); - } + /* Let us measure the size of the file + to determine how much we were able to + extend it */ + os_offset_t fsize = os_file_get_size(node->handle); + ut_a(fsize != os_offset_t(-1)); - os_has_said_disk_full = *success; - if (*success) { - last_page_no = size; - } else { - /* Let us measure the size of the file - to determine how much we were able to - extend it */ - os_offset_t fsize = os_file_get_size(node->handle); - ut_a(fsize != os_offset_t(-1)); - - last_page_no = ulint(fsize / page_size) - + file_start_page_no; - } + last_page_no = ulint(fsize / page_size) + + file_start_page_no; } -#endif mutex_enter(&fil_system->mutex); ut_a(node->being_extended); @@ -1206,11 +1095,7 @@ fil_space_extend_must_retry( const ulint pages_in_MiB = node->size & ~((1 << (20 - UNIV_PAGE_SIZE_SHIFT)) - 1); - fil_node_complete_io(node, -#ifndef _WIN32 - !is_sparse ? IORequestWrite : -#endif /* _WIN32 */ - IORequestRead); + fil_node_complete_io(node,IORequestRead); /* Keep the last data file size info up to date, rounded to full megabytes */ @@ -1333,6 +1218,7 @@ fil_mutex_enter_and_prepare_for_io( fil_flush_file_spaces(FIL_TYPE_TABLESPACE); count++; + mutex_enter(&fil_system->mutex); continue; } } @@ -3237,10 +3123,11 @@ fil_truncate_tablespace( bool success = os_file_truncate(node->name, node->handle, 0); if (success) { - os_offset_t size = size_in_pages * UNIV_PAGE_SIZE; + os_offset_t size = os_offset_t(size_in_pages) * UNIV_PAGE_SIZE; success = os_file_set_size( - node->name, node->handle, size, srv_read_only_mode); + node->name, node->handle, size, + FSP_FLAGS_HAS_PAGE_COMPRESSION(space->flags)); if (success) { space->stop_new_ops = false; @@ -3835,72 +3722,17 @@ fil_ibd_create( return(DB_ERROR); } - bool punch_hole = false; + const bool is_compressed = FSP_FLAGS_HAS_PAGE_COMPRESSION(flags); #ifdef _WIN32 - - if (FSP_FLAGS_HAS_PAGE_COMPRESSION(flags)) { - punch_hole = os_file_set_sparse_win32(file); - } - - success = os_file_change_size_win32(path, file, size * UNIV_PAGE_SIZE); - -#else - - success= false; -#ifdef HAVE_POSIX_FALLOCATE - /* - Extend the file using posix_fallocate(). This is required by - FusionIO HW/Firmware but should also be the prefered way to extend - a file. - */ - int ret; - do { - ret = posix_fallocate(file, 0, size * UNIV_PAGE_SIZE); - } while (ret == EINTR - && srv_shutdown_state == SRV_SHUTDOWN_NONE); - - if (ret == 0) { - success = true; - } else if (ret != EINVAL) { - ib::error() << - "posix_fallocate(): Failed to preallocate" - " data for file " << path - << ", desired size " - << size * UNIV_PAGE_SIZE - << " Operating system error number " << ret - << ". Check" - " that the disk is not full or a disk quota" - " exceeded. Some operating system error" - " numbers are described at " REFMAN - "operating-system-error-codes.html"; - } -#endif /* HAVE_POSIX_FALLOCATE */ - - if (!success) { - success = os_file_set_size( - path, file, size * UNIV_PAGE_SIZE, srv_read_only_mode); - } - - /* Note: We are actually punching a hole, previous contents will - be lost after this call, if it succeeds. In this case the file - should be full of NULs. */ - - punch_hole = os_is_sparse_file_supported(file); - - if (punch_hole) { - - dberr_t punch_err; - - punch_err = os_file_punch_hole(file, 0, size * UNIV_PAGE_SIZE); - - if (punch_err != DB_SUCCESS) { - punch_hole = false; - } + if (is_compressed) { + os_file_set_sparse_win32(file); } #endif - ulint block_size = os_file_get_block_size(file, path); + success = os_file_set_size( + path, file, + os_offset_t(size) << UNIV_PAGE_SIZE_SHIFT, is_compressed); if (!success) { os_file_close(file); @@ -3908,6 +3740,10 @@ fil_ibd_create( return(DB_OUT_OF_FILE_SPACE); } + bool punch_hole = os_is_sparse_file_supported(file); + + ulint block_size = os_file_get_block_size(file, path); + /* We have to write the space id to the file immediately and flush the file to disk. This is because in crash recovery we must be aware what tablespaces exist and what are their space id's, so that we can apply diff --git a/storage/innobase/fsp/fsp0sysspace.cc b/storage/innobase/fsp/fsp0sysspace.cc index cc156a5353a..c459c8296e0 100644 --- a/storage/innobase/fsp/fsp0sysspace.cc +++ b/storage/innobase/fsp/fsp0sysspace.cc @@ -410,8 +410,7 @@ SysTablespace::set_size( bool success = os_file_set_size( file.m_filepath, file.m_handle, - static_cast<os_offset_t>(file.m_size << UNIV_PAGE_SIZE_SHIFT), - m_ignore_read_only ? false : srv_read_only_mode); + static_cast<os_offset_t>(file.m_size) << UNIV_PAGE_SIZE_SHIFT); if (success) { ib::info() << "File '" << file.filepath() << "' size is now " diff --git a/storage/innobase/fts/fts0fts.cc b/storage/innobase/fts/fts0fts.cc index 0174e51ec07..8e9c897274b 100644 --- a/storage/innobase/fts/fts0fts.cc +++ b/storage/innobase/fts/fts0fts.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2011, 2016, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2011, 2017, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2016, 2017, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under @@ -28,6 +28,7 @@ Full Text Search interface #include "row0mysql.h" #include "row0upd.h" #include "dict0types.h" +#include "dict0stats_bg.h" #include "row0sel.h" #include "fts0fts.h" #include "fts0priv.h" @@ -860,18 +861,37 @@ fts_drop_index( err = fts_drop_index_tables(trx, index); - fts_free(table); - + for(;;) { + bool retry = false; + if (index->index_fts_syncing) { + retry = true; + } + if (!retry){ + fts_free(table); + break; + } + DICT_BG_YIELD(trx); + } return(err); } - current_doc_id = table->fts->cache->next_doc_id; - first_doc_id = table->fts->cache->first_doc_id; - fts_cache_clear(table->fts->cache); - fts_cache_destroy(table->fts->cache); - table->fts->cache = fts_cache_create(table); - table->fts->cache->next_doc_id = current_doc_id; - table->fts->cache->first_doc_id = first_doc_id; + for(;;) { + bool retry = false; + if (index->index_fts_syncing) { + retry = true; + } + if (!retry){ + current_doc_id = table->fts->cache->next_doc_id; + first_doc_id = table->fts->cache->first_doc_id; + fts_cache_clear(table->fts->cache); + fts_cache_destroy(table->fts->cache); + table->fts->cache = fts_cache_create(table); + table->fts->cache->next_doc_id = current_doc_id; + table->fts->cache->first_doc_id = first_doc_id; + break; + } + DICT_BG_YIELD(trx); + } } else { fts_cache_t* cache = table->fts->cache; fts_index_cache_t* index_cache; @@ -881,9 +901,17 @@ fts_drop_index( index_cache = fts_find_index_cache(cache, index); if (index_cache != NULL) { - if (index_cache->words) { - fts_words_free(index_cache->words); - rbt_free(index_cache->words); + for(;;) { + bool retry = false; + if (index->index_fts_syncing) { + retry = true; + } + if (!retry && index_cache->words) { + fts_words_free(index_cache->words); + rbt_free(index_cache->words); + break; + } + DICT_BG_YIELD(trx); } ib_vector_remove(cache->indexes, *(void**) index_cache); @@ -2635,8 +2663,9 @@ fts_get_next_doc_id( return(DB_SUCCESS); } + DEBUG_SYNC_C("get_next_FTS_DOC_ID"); mutex_enter(&cache->doc_id_lock); - *doc_id = ++cache->next_doc_id; + *doc_id = cache->next_doc_id++; mutex_exit(&cache->doc_id_lock); return(DB_SUCCESS); @@ -3019,52 +3048,6 @@ fts_modify( } /*********************************************************************//** -Create a new document id. -@return DB_SUCCESS if all went well else error */ -dberr_t -fts_create_doc_id( -/*==============*/ - dict_table_t* table, /*!< in: row is of this table. */ - dtuple_t* row, /* in/out: add doc id value to this - row. This is the current row that is - being inserted. */ - mem_heap_t* heap) /*!< in: heap */ -{ - doc_id_t doc_id; - dberr_t error = DB_SUCCESS; - - ut_a(table->fts->doc_col != ULINT_UNDEFINED); - - if (!DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID)) { - if (table->fts->cache->first_doc_id == FTS_NULL_DOC_ID) { - error = fts_get_next_doc_id(table, &doc_id); - } - return(error); - } - - error = fts_get_next_doc_id(table, &doc_id); - - if (error == DB_SUCCESS) { - dfield_t* dfield; - doc_id_t* write_doc_id; - - ut_a(doc_id > 0); - - dfield = dtuple_get_nth_field(row, table->fts->doc_col); - write_doc_id = static_cast<doc_id_t*>( - mem_heap_alloc(heap, sizeof(*write_doc_id))); - - ut_a(doc_id != FTS_NULL_DOC_ID); - ut_a(sizeof(doc_id) == dfield->type.len); - fts_write_doc_id((byte*) write_doc_id, doc_id); - - dfield_set_data(dfield, write_doc_id, sizeof(*write_doc_id)); - } - - return(error); -} - -/*********************************************************************//** The given transaction is about to be committed; do whatever is necessary from the FTS system's POV. @return DB_SUCCESS or error code */ @@ -4291,6 +4274,16 @@ begin_sync: index_cache = static_cast<fts_index_cache_t*>( ib_vector_get(cache->indexes, i)); + if (index_cache->index->to_be_dropped + || index_cache->index->table->to_be_dropped) { + continue; + } + + index_cache->index->index_fts_syncing = true; + DBUG_EXECUTE_IF("fts_instrument_sync_sleep_drop_waits", + os_thread_sleep(10000000); + ); + error = fts_sync_index(sync, index_cache); if (error != DB_SUCCESS && !sync->interrupted) { @@ -4328,6 +4321,15 @@ end_sync: } rw_lock_x_lock(&cache->lock); + /* Clear fts syncing flags of any indexes incase sync is + interrupeted */ + for (i = 0; i < ib_vector_size(cache->indexes); ++i) { + fts_index_cache_t* index_cache; + index_cache = static_cast<fts_index_cache_t*>( + ib_vector_get(cache->indexes, i)); + index_cache->index->index_fts_syncing = false; + } + sync->interrupted = false; sync->in_progress = false; os_event_set(sync->event); diff --git a/storage/innobase/fts/fts0opt.cc b/storage/innobase/fts/fts0opt.cc index 820e82dc600..d7e434320a3 100644 --- a/storage/innobase/fts/fts0opt.cc +++ b/storage/innobase/fts/fts0opt.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2007, 2016, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2007, 2017, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2016, MariaDB Corporation. All Rights reserved. This program is free software; you can redistribute it and/or modify it under @@ -2953,13 +2953,6 @@ fts_optimize_sync_table( { dict_table_t* table = NULL; - /* Prevent DROP INDEX etc. from running when we are syncing - cache in background. */ - if (!rw_lock_s_lock_nowait(dict_operation_lock, __FILE__, __LINE__)) { - /* Exit when fail to get dict operation lock. */ - return; - } - table = dict_table_open_on_id(table_id, FALSE, DICT_TABLE_OP_NORMAL); if (table) { @@ -2969,8 +2962,6 @@ fts_optimize_sync_table( dict_table_close(table, FALSE, FALSE); } - - rw_lock_s_unlock(dict_operation_lock); } /**********************************************************************//** diff --git a/storage/innobase/fts/fts0que.cc b/storage/innobase/fts/fts0que.cc index 594f337c978..1b03e041871 100644 --- a/storage/innobase/fts/fts0que.cc +++ b/storage/innobase/fts/fts0que.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2007, 2016, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2007, 2017, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2017, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under @@ -149,13 +149,6 @@ struct fts_query_t { bool multi_exist; /*!< multiple FTS_EXIST oper */ st_mysql_ftparser* parser; /*!< fts plugin parser */ - - /** limit value for the fts query */ - ulonglong limit; - - /** number of docs fetched by query. This is to restrict the - result with limit value */ - ulonglong n_docs; }; /** For phrase matching, first we collect the documents and the positions @@ -3228,11 +3221,6 @@ fts_query_filter_doc_ids( ulint decoded = 0; ib_rbt_t* doc_freqs = word_freq->doc_freqs; - if (query->limit != ULONG_UNDEFINED - && query->n_docs >= query->limit) { - return(DB_SUCCESS); - } - /* Decode the ilist and add the doc ids to the query doc_id set. */ while (decoded < len) { ulint freq = 0; @@ -3320,17 +3308,11 @@ fts_query_filter_doc_ids( /* Add the word to the document's matched RB tree. */ fts_query_add_word_to_document(query, doc_id, word); } - - if (query->limit != ULONG_UNDEFINED - && query->limit <= ++query->n_docs) { - goto func_exit; - } } /* Some sanity checks. */ ut_a(doc_id == node->last_doc_id); -func_exit: if (query->total_size > fts_result_cache_limit) { return(DB_FTS_EXCEED_RESULT_CACHE_LIMIT); } else { @@ -3813,6 +3795,10 @@ fts_query_free( fts_doc_ids_free(query->deleted); } + if (query->intersection) { + fts_query_free_doc_ids(query, query->intersection); + } + if (query->doc_ids) { fts_query_free_doc_ids(query, query->doc_ids); } @@ -3941,7 +3927,6 @@ fts_query_can_optimize( @param[in] query_str FTS query @param[in] query_len FTS query string len in bytes @param[in,out] result result doc ids -@param[in] limit limit value @return DB_SUCCESS if successful otherwise error code */ dberr_t fts_query( @@ -3950,8 +3935,7 @@ fts_query( uint flags, const byte* query_str, ulint query_len, - fts_result_t** result, - ulonglong limit) + fts_result_t** result) { fts_query_t query; dberr_t error = DB_SUCCESS; @@ -4013,10 +3997,6 @@ fts_query( query.total_docs = dict_table_get_n_rows(index->table); - query.limit = limit; - - query.n_docs = 0; - query.fts_common_table.suffix = "DELETED"; /* Read the deleted doc_ids, we need these for filtering. */ @@ -4078,19 +4058,6 @@ fts_query( fts_result_cache_limit = 2048; ); - /* Optimisation is allowed for limit value - when - i) No ranking involved - ii) Only FTS Union operations involved. */ - if (query.limit != ULONG_UNDEFINED - && !fts_ast_node_check_union(ast)) { - query.limit = ULONG_UNDEFINED; - } - - DBUG_EXECUTE_IF("fts_union_limit_off", - query.limit = ULONG_UNDEFINED; - ); - /* Traverse the Abstract Syntax Tree (AST) and execute the query. */ query.error = fts_ast_visit( diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index 0c8cdab4d15..10d54498feb 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -6209,16 +6209,27 @@ ha_innobase::open(const char* name, int, uint) ib_table = open_dict_table(name, norm_name, is_part, ignore_err); - uint n_fields = mysql_fields(table); + if (NULL == ib_table) { - if (ib_table != NULL - && ((!DICT_TF2_FLAG_IS_SET(ib_table, DICT_TF2_FTS_HAS_DOC_ID) - && n_fields != dict_table_get_n_tot_u_cols(ib_table)) - || (DICT_TF2_FLAG_IS_SET(ib_table, DICT_TF2_FTS_HAS_DOC_ID) - && (n_fields != dict_table_get_n_tot_u_cols(ib_table) - 1)))) { + if (is_part) { + sql_print_error("Failed to open table %s.\n", + norm_name); + } +no_such_table: + free_share(m_share); + set_my_errno(ENOENT); + DBUG_RETURN(HA_ERR_NO_SUCH_TABLE); + } + + uint n_fields = mysql_fields(table); + uint n_cols = dict_table_get_n_user_cols(ib_table) + + dict_table_get_n_v_cols(ib_table) + - !!DICT_TF2_FLAG_IS_SET(ib_table, DICT_TF2_FTS_HAS_DOC_ID); + + if (n_cols != n_fields) { ib::warn() << "Table " << norm_name << " contains " - << dict_table_get_n_tot_u_cols(ib_table) << " user" + << n_cols << " user" " defined columns in InnoDB, but " << n_fields << " columns in MariaDB. Please check" " INFORMATION_SCHEMA.INNODB_SYS_COLUMNS and " REFMAN @@ -6230,21 +6241,7 @@ ha_innobase::open(const char* name, int, uint) ib_table->file_unreadable = true; ib_table->corrupted = true; dict_table_close(ib_table, FALSE, FALSE); - ib_table = NULL; - is_part = NULL; - } - - if (NULL == ib_table) { - - if (is_part) { - sql_print_error("Failed to open table %s.\n", - norm_name); - } - - free_share(m_share); - set_my_errno(ENOENT); - - DBUG_RETURN(HA_ERR_NO_SUCH_TABLE); + goto no_such_table; } innobase_copy_frm_flags_from_table_share(ib_table, table->s); @@ -7983,13 +7980,16 @@ no_icp: } else { ibool contain; - if (innobase_is_v_fld(table->field[i])) { - contain = dict_index_contains_col_or_prefix( - index, num_v, true); - } else { + if (!innobase_is_v_fld(table->field[i])) { contain = dict_index_contains_col_or_prefix( index, i - num_v, false); + } else if (dict_index_is_clust(index)) { + num_v++; + continue; + } else { + contain = dict_index_contains_col_or_prefix( + index, num_v, true); } field = build_template_needs_field( @@ -10313,10 +10313,8 @@ ha_innobase::ft_init_ext( const byte* q = reinterpret_cast<const byte*>( const_cast<char*>(query)); - // JAN: TODO: support for ft_init_ext_with_hints(), remove the line below - m_prebuilt->m_fts_limit= ULONG_UNDEFINED; - dberr_t error = fts_query(trx, index, flags, q, query_len, &result, - m_prebuilt->m_fts_limit); + // FIXME: support ft_init_ext_with_hints(), pass LIMIT + dberr_t error = fts_query(trx, index, flags, q, query_len, &result); if (error != DB_SUCCESS) { my_error(convert_error_code_to_mysql(error, 0, NULL), MYF(0)); @@ -14777,7 +14775,7 @@ ha_innobase::optimize( calls to OPTIMIZE, which is undesirable. */ /* TODO: Defragment is disabled for now */ - if (0) { + if (srv_defragment) { int err; err = defragment_table(m_prebuilt->table->name.m_name, NULL, false); @@ -19279,7 +19277,7 @@ buffer_pool_load_now( const void* save) /*!< in: immediate result from check function */ { - if (*(my_bool*) save) { + if (*(my_bool*) save && !srv_read_only_mode) { buf_load_start(); } } @@ -19302,7 +19300,7 @@ buffer_pool_load_abort( const void* save) /*!< in: immediate result from check function */ { - if (*(my_bool*) save) { + if (*(my_bool*) save && !srv_read_only_mode) { buf_load_abort(); } } @@ -21858,7 +21856,7 @@ const char* TROUBLESHOOT_DATADICT_MSG = " for how to resolve the issue."; const char* BUG_REPORT_MSG = - "Submit a detailed bug report to http://bugs.mysql.com"; + "Submit a detailed bug report to https://jira.mariadb.org/"; const char* FORCE_RECOVERY_MSG = "Please refer to " REFMAN "forcing-innodb-recovery.html" diff --git a/storage/innobase/handler/handler0alter.cc b/storage/innobase/handler/handler0alter.cc index 552350ef805..a8769820563 100644 --- a/storage/innobase/handler/handler0alter.cc +++ b/storage/innobase/handler/handler0alter.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2005, 2016, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2005, 2017, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2013, 2017, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under @@ -471,34 +471,6 @@ innobase_need_rebuild( return(false); } - /* If alter table changes column name and adds a new - index, we need to check is this new index created - to new column name. This is because column name - changes are done normally after creating indexes. */ - if ((ha_alter_info->handler_flags - & Alter_inplace_info::ALTER_COLUMN_NAME) && - ((ha_alter_info->handler_flags - & Alter_inplace_info::ADD_INDEX) || - (ha_alter_info->handler_flags - & Alter_inplace_info::ADD_FOREIGN_KEY))) { - for (ulint i = 0; i < ha_alter_info->index_add_count; i++) { - const KEY* key = &ha_alter_info->key_info_buffer[ - ha_alter_info->index_add_buffer[i]]; - - for (ulint j = 0; j < key->user_defined_key_parts; j++) { - const KEY_PART_INFO* key_part = &(key->key_part[j]); - const Field* field = altered_table->field[key_part->fieldnr]; - - /* Field used on added index is renamed on - this same alter table. We need table - rebuild. */ - if (field && field->flags & FIELD_IS_RENAMED) { - return (true); - } - } - } - } - return(!!(ha_alter_info->handler_flags & INNOBASE_ALTER_REBUILD)); } @@ -625,6 +597,13 @@ ha_innobase::check_if_supported_inplace_alter( { DBUG_ENTER("check_if_supported_inplace_alter"); + /* Before 10.2.2 information about virtual columns was not stored in + system tables. We need to do a full alter to rebuild proper 10.2.2+ + metadata with the information about virtual columns */ + if (table->s->mysql_version < 100202 && table->s->virtual_fields) { + DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED); + } + if (high_level_read_only || srv_sys_space.created_new_raw() || srv_force_recovery) { @@ -1129,8 +1108,15 @@ ha_innobase::check_if_supported_inplace_alter( /* Compute the DEFAULT values of non-constant columns (VCOL_SESSION_FUNC | VCOL_TIME_FUNC). */ - (*af)->set_default(); - goto next_column; + switch ((*af)->set_default()) { + case 0: /* OK */ + case 3: /* DATETIME to TIME or DATE conversion */ + goto next_column; + case -1: /* OOM, or GEOMETRY type mismatch */ + case 1: /* A number adjusted to the min/max value */ + case 2: /* String truncation, or conversion problem */ + break; + } } DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED); @@ -2163,19 +2149,18 @@ name_ok: } /** Create index field definition for key part -@param[in] altered_table MySQL table that is being altered, - or NULL if a new clustered index - is not being created -@param[in] key_part MySQL key definition -@param[in,out] index_field index field -@param[in] new_clustered new cluster */ -static +@param[in] new_clustered true if alter is generating a new clustered +index +@param[in] altered_table MySQL table that is being altered +@param[in] key_part MySQL key definition +@param[out] index_field index field defition for key_part */ +static MY_ATTRIBUTE((nonnull(2,3))) void innobase_create_index_field_def( + bool new_clustered, const TABLE* altered_table, const KEY_PART_INFO* key_part, - index_field_t* index_field, - bool new_clustered) + index_field_t* index_field) { const Field* field; ibool is_unsigned; @@ -2186,11 +2171,11 @@ innobase_create_index_field_def( ut_ad(key_part); ut_ad(index_field); + ut_ad(altered_table); field = new_clustered ? altered_table->field[key_part->fieldnr] : key_part->field; - ut_a(field); for (ulint i = 0; i < key_part->fieldnr; i++) { if (innobase_is_v_fld(altered_table->field[i])) { @@ -2251,9 +2236,10 @@ innobase_create_index_def( DBUG_ENTER("innobase_create_index_def"); DBUG_ASSERT(!key_clustered || new_clustered); + ut_ad(altered_table); + index->fields = static_cast<index_field_t*>( mem_heap_alloc(heap, n_fields * sizeof *index->fields)); - memset(index->fields, 0, n_fields * sizeof *index->fields); index->parser = NULL; index->key_number = key_number; @@ -2331,8 +2317,8 @@ innobase_create_index_def( if (!(key->flags & HA_SPATIAL)) { for (i = 0; i < n_fields; i++) { innobase_create_index_field_def( - altered_table, &key->key_part[i], - &index->fields[i], new_clustered); + new_clustered, altered_table, + &key->key_part[i], &index->fields[i]); if (index->fields[i].is_v_col) { index->ind_type |= DICT_VIRTUAL; @@ -2745,7 +2731,6 @@ created_clustered: index->fields = static_cast<index_field_t*>( mem_heap_alloc(heap, sizeof *index->fields)); - memset(index->fields, 0, sizeof *index->fields); index->n_fields = 1; index->fields->col_no = fts_doc_id_col; index->fields->prefix_len = 0; @@ -4213,7 +4198,7 @@ innobase_add_instant_try( if (big_rec) { if (error == DB_SUCCESS) { error = btr_store_big_rec_extern_fields( - &pcur, update, offsets, big_rec, &mtr, + &pcur, offsets, big_rec, &mtr, BTR_STORE_UPDATE); } @@ -5078,8 +5063,7 @@ new_clustered_failed: } ctx->add_index[a] = row_merge_create_index( - ctx->new_table, - &index_defs[a], add_v, ctx->col_names); + ctx->new_table, &index_defs[a], add_v); add_key_nums[a] = index_defs[a].key_number; @@ -9112,7 +9096,47 @@ ha_innobase::commit_inplace_alter_table( break; } - DICT_STATS_BG_YIELD(trx); + DICT_BG_YIELD(trx); + } + + /* Make a concurrent Drop fts Index to wait until sync of that + fts index is happening in the background */ + for (;;) { + bool retry = false; + + for (inplace_alter_handler_ctx** pctx = ctx_array; + *pctx; pctx++) { + int count =0; + ha_innobase_inplace_ctx* ctx + = static_cast<ha_innobase_inplace_ctx*>(*pctx); + DBUG_ASSERT(new_clustered == ctx->need_rebuild()); + + if (dict_fts_index_syncing(ctx->old_table)) { + count++; + if (count == 100) { + fprintf(stderr, + "Drop index waiting for background sync" + "to finish\n"); + } + retry = true; + } + + if (new_clustered && dict_fts_index_syncing(ctx->new_table)) { + count++; + if (count == 100) { + fprintf(stderr, + "Drop index waiting for background sync" + "to finish\n"); + } + retry = true; + } + } + + if (!retry) { + break; + } + + DICT_BG_YIELD(trx); } /* Apply the changes to the data dictionary tables, for all @@ -9482,8 +9506,13 @@ foreign_fail: ut_d(dict_table_check_for_dup_indexes( ctx->new_table, CHECK_ABORTED_OK)); - ut_a(fts_check_cached_index(ctx->new_table)); +#ifdef UNIV_DEBUG + if (!(ctx->new_table->fts != NULL + && ctx->new_table->fts->cache->sync->in_progress)) { + ut_a(fts_check_cached_index(ctx->new_table)); + } +#endif if (new_clustered) { /* Since the table has been rebuilt, we remove all persistent statistics corresponding to the diff --git a/storage/innobase/handler/i_s.cc b/storage/innobase/handler/i_s.cc index 632fcebc2f6..d1728db68e9 100644 --- a/storage/innobase/handler/i_s.cc +++ b/storage/innobase/handler/i_s.cc @@ -6397,6 +6397,7 @@ i_s_sys_tables_fill_table_stats( } heap = mem_heap_create(1000); + rw_lock_s_lock(dict_operation_lock); mutex_enter(&dict_sys->mutex); mtr_start(&mtr); @@ -6405,7 +6406,6 @@ i_s_sys_tables_fill_table_stats( while (rec) { const char* err_msg; dict_table_t* table_rec; - ulint ref_count; /* Fetch the dict_table_t structure corresponding to this SYS_TABLES record */ @@ -6413,16 +6413,7 @@ i_s_sys_tables_fill_table_stats( heap, rec, &table_rec, DICT_TABLE_LOAD_FROM_CACHE, &mtr); - if (table_rec != NULL) { - ut_ad(err_msg == NULL); - - ref_count = table_rec->get_ref_count(); - - /* Protect the dict_table_t object by incrementing - the reference count. */ - table_rec->acquire(); - } - + ulint ref_count = table_rec ? table_rec->get_ref_count() : 0; mutex_exit(&dict_sys->mutex); DBUG_EXECUTE_IF("test_sys_tablestats", { @@ -6431,29 +6422,30 @@ i_s_sys_tables_fill_table_stats( }}); if (table_rec != NULL) { + ut_ad(err_msg == NULL); i_s_dict_fill_sys_tablestats(thd, table_rec, ref_count, tables->table); } else { + ut_ad(err_msg != NULL); push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, ER_CANT_FIND_SYSTEM_REC, "%s", err_msg); } + rw_lock_s_unlock(dict_operation_lock); mem_heap_empty(heap); /* Get the next record */ + rw_lock_s_lock(dict_operation_lock); mutex_enter(&dict_sys->mutex); - if (table_rec != NULL) { - table_rec->release(); - } - mtr_start(&mtr); rec = dict_getnext_system(&pcur, &mtr); } mtr_commit(&mtr); mutex_exit(&dict_sys->mutex); + rw_lock_s_unlock(dict_operation_lock); mem_heap_free(heap); DBUG_RETURN(0); @@ -8009,18 +8001,16 @@ i_s_dict_fill_sys_tablespaces( ulint flags, /*!< in: tablespace flags */ TABLE* table_to_fill) /*!< in/out: fill this table */ { - Field** fields; - ulint atomic_blobs = FSP_FLAGS_HAS_ATOMIC_BLOBS(flags); - bool is_compressed = FSP_FLAGS_GET_ZIP_SSIZE(flags); - const char* row_format; - const page_size_t page_size(flags); - const char* space_type; + Field** fields; + ulint atomic_blobs = FSP_FLAGS_HAS_ATOMIC_BLOBS(flags); + const char* file_format; + const char* row_format; DBUG_ENTER("i_s_dict_fill_sys_tablespaces"); if (is_system_tablespace(space)) { - row_format = "Compact or Redundant"; - } else if (is_compressed) { + row_format = "Compact, Redundant or Dynamic"; + } else if (FSP_FLAGS_GET_ZIP_SSIZE(flags)) { row_format = "Compressed"; } else if (atomic_blobs) { row_format = "Dynamic"; @@ -8028,12 +8018,6 @@ i_s_dict_fill_sys_tablespaces( row_format = "Compact or Redundant"; } - if (is_system_tablespace(space)) { - space_type = "System"; - } else { - space_type = "Single"; - } - fields = table_to_fill->field; OK(fields[SYS_TABLESPACES_SPACE]->store(space, true)); @@ -8044,19 +8028,32 @@ i_s_dict_fill_sys_tablespaces( OK(field_store_string(fields[SYS_TABLESPACES_ROW_FORMAT], row_format)); + OK(field_store_string(fields[SYS_TABLESPACES_SPACE_TYPE], + is_system_tablespace(space) + ? "System" : "Single")); + + ulint cflags = fsp_flags_is_valid(flags, space) + ? flags : fsp_flags_convert_from_101(flags); + if (cflags == ULINT_UNDEFINED) { + fields[SYS_TABLESPACES_PAGE_SIZE]->set_null(); + fields[SYS_TABLESPACES_ZIP_PAGE_SIZE]->set_null(); + fields[SYS_TABLESPACES_FS_BLOCK_SIZE]->set_null(); + fields[SYS_TABLESPACES_FILE_SIZE]->set_null(); + fields[SYS_TABLESPACES_ALLOC_SIZE]->set_null(); + OK(schema_table_store_record(thd, table_to_fill)); + DBUG_RETURN(0); + } + + const page_size_t page_size(cflags); + OK(fields[SYS_TABLESPACES_PAGE_SIZE]->store( - univ_page_size.physical(), true)); + page_size.logical(), true)); OK(fields[SYS_TABLESPACES_ZIP_PAGE_SIZE]->store( - page_size.is_compressed() - ? page_size.physical() - : 0, true)); - - OK(field_store_string(fields[SYS_TABLESPACES_SPACE_TYPE], - space_type)); + page_size.physical(), true)); char* filepath = NULL; - if (FSP_FLAGS_HAS_DATA_DIR(flags)) { + if (FSP_FLAGS_HAS_DATA_DIR(cflags)) { mutex_enter(&dict_sys->mutex); filepath = dict_get_first_path(space); mutex_exit(&dict_sys->mutex); diff --git a/storage/innobase/ibuf/ibuf0ibuf.cc b/storage/innobase/ibuf/ibuf0ibuf.cc index e52dfe12b6a..c276a4ce137 100644 --- a/storage/innobase/ibuf/ibuf0ibuf.cc +++ b/storage/innobase/ibuf/ibuf0ibuf.cc @@ -4212,7 +4212,7 @@ ibuf_delete( fprintf(stderr, "\nspace " UINT32PF " offset " UINT32PF " (%u records, index id %llu)\n" "InnoDB: Submit a detailed bug report" - " to http://bugs.mysql.com\n", + " to https://jira.mariadb.org/\n", block->page.id.space(), block->page.id.page_no(), (unsigned) page_get_n_recs(page), diff --git a/storage/innobase/include/btr0cur.h b/storage/innobase/include/btr0cur.h index 0445d0ef59c..76f13325e2a 100644 --- a/storage/innobase/include/btr0cur.h +++ b/storage/innobase/include/btr0cur.h @@ -679,7 +679,6 @@ btr_store_big_rec_extern_fields( btr_pcur_t* pcur, /*!< in/out: a persistent cursor. if btr_mtr is restarted, then this can be repositioned. */ - const upd_t* upd, /*!< in: update vector */ ulint* offsets, /*!< in/out: rec_get_offsets() on pcur. the "external storage" flags in offsets will correctly correspond diff --git a/storage/innobase/include/data0type.ic b/storage/innobase/include/data0type.ic index 3d2f46ed877..a68f4829561 100644 --- a/storage/innobase/include/data0type.ic +++ b/storage/innobase/include/data0type.ic @@ -527,7 +527,7 @@ dtype_get_fixed_size_low( } } /* Treat as variable-length. */ - /* Fall through */ + /* fall through */ case DATA_VARCHAR: case DATA_BINARY: case DATA_DECIMAL: diff --git a/storage/innobase/include/dict0dict.h b/storage/innobase/include/dict0dict.h index 03175936f7e..f14487f09d0 100644 --- a/storage/innobase/include/dict0dict.h +++ b/storage/innobase/include/dict0dict.h @@ -789,14 +789,6 @@ dict_table_get_n_user_cols( /*=======================*/ const dict_table_t* table) /*!< in: table */ MY_ATTRIBUTE((warn_unused_result)); -/** Gets the number of user-defined virtual and non-virtual columns in a table -in the dictionary cache. -@param[in] table table -@return number of user-defined (e.g., not ROW_ID) columns of a table */ -UNIV_INLINE -ulint -dict_table_get_n_tot_u_cols( - const dict_table_t* table); /********************************************************************//** Gets the number of all non-virtual columns (also system) in a table in the dictionary cache. diff --git a/storage/innobase/include/dict0dict.ic b/storage/innobase/include/dict0dict.ic index 06cd2434942..26918251d8b 100644 --- a/storage/innobase/include/dict0dict.ic +++ b/storage/innobase/include/dict0dict.ic @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1996, 2017, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2013, 2017, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under @@ -391,22 +391,6 @@ dict_table_get_n_user_cols( return(table->n_cols - DATA_N_SYS_COLS); } -/** Gets the number of user-defined virtual and non-virtual columns in a table -in the dictionary cache. -@param[in] table table -@return number of user-defined (e.g., not ROW_ID) columns of a table */ -UNIV_INLINE -ulint -dict_table_get_n_tot_u_cols( - const dict_table_t* table) -{ - ut_ad(table); - ut_ad(table->magic_n == DICT_TABLE_MAGIC_N); - - return(dict_table_get_n_user_cols(table) - + dict_table_get_n_v_cols(table)); -} - /********************************************************************//** Gets the number of all non-virtual columns (also system) in a table in the dictionary cache. @@ -930,6 +914,27 @@ dict_table_x_lock_indexes( } /*********************************************************************//** +Returns true if the particular FTS index in the table is still syncing +in the background, false otherwise. +@param [in] table Table containing FTS index +@return True if sync of fts index is still going in the background */ +UNIV_INLINE +bool +dict_fts_index_syncing( + dict_table_t* table) +{ + dict_index_t* index; + + for (index = dict_table_get_first_index(table); + index != NULL; + index = dict_table_get_next_index(index)) { + if (index->index_fts_syncing) { + return(true); + } + } + return(false); +} +/*********************************************************************//** Release the exclusive locks on all index tree. */ UNIV_INLINE void @@ -1536,14 +1541,15 @@ dict_table_t::acquire() ++n_ref_count; } -/** Release the table handle. */ +/** Release the table handle. +@return whether the last handle was released */ inline -void +bool dict_table_t::release() { ut_ad(mutex_own(&dict_sys->mutex)); ut_ad(n_ref_count > 0); - --n_ref_count; + return !--n_ref_count; } /** Encode the number of columns and number of virtual columns in a diff --git a/storage/innobase/include/dict0mem.h b/storage/innobase/include/dict0mem.h index ecfe37f9cc7..ed39ed8a6c6 100644 --- a/storage/innobase/include/dict0mem.h +++ b/storage/innobase/include/dict0mem.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1996, 2017, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2012, Facebook Inc. Copyright (c) 2013, 2017, MariaDB Corporation. @@ -938,6 +938,9 @@ struct dict_index_t{ bool has_new_v_col; /*!< whether it has a newly added virtual column in ALTER */ + bool index_fts_syncing;/*!< Whether the fts index is + still syncing in the background; + FIXME: remove this and use MDL */ UT_LIST_NODE_T(dict_index_t) indexes;/*!< list of indexes of the table */ #ifdef BTR_CUR_ADAPT @@ -1428,8 +1431,9 @@ struct dict_table_t { /** Acquire the table handle. */ inline void acquire(); - /** Release the table handle. */ - inline void release(); + /** Release the table handle. + @return whether the last handle was released */ + inline bool release(); /** @return whether the table supports transactions */ bool no_rollback() const diff --git a/storage/innobase/include/dict0stats_bg.h b/storage/innobase/include/dict0stats_bg.h index f99cbeed780..e66666b66a3 100644 --- a/storage/innobase/include/dict0stats_bg.h +++ b/storage/innobase/include/dict0stats_bg.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2012, 2016, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2012, 2017, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2017, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under @@ -56,8 +56,8 @@ dict_stats_recalc_pool_del( /** Yield the data dictionary latch when waiting for the background thread to stop accessing a table. -@param trx transaction holding the data dictionary locks */ -#define DICT_STATS_BG_YIELD(trx) do { \ +@param trx transaction holding the data dictionary locks */ +#define DICT_BG_YIELD(trx) do { \ row_mysql_unlock_data_dictionary(trx); \ os_thread_sleep(250000); \ row_mysql_lock_data_dictionary(trx); \ diff --git a/storage/innobase/include/fts0fts.h b/storage/innobase/include/fts0fts.h index f1d53165cdd..813e34b43d3 100644 --- a/storage/innobase/include/fts0fts.h +++ b/storage/innobase/include/fts0fts.h @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 2011, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2016, MariaDB Corporation. All Rights reserved. +Copyright (c) 2016, 2017, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -456,20 +456,6 @@ fts_update_next_doc_id( doc_id_t doc_id); /*!< in: DOC ID to set */ /******************************************************************//** -Create a new document id . -@return DB_SUCCESS if all went well else error */ -dberr_t -fts_create_doc_id( -/*==============*/ - dict_table_t* table, /*!< in: row is of this - table. */ - dtuple_t* row, /*!< in/out: add doc id - value to this row. This is the - current row that is being - inserted. */ - mem_heap_t* heap); /*!< in: heap */ - -/******************************************************************//** Create a new fts_doc_ids_t. @return new fts_doc_ids_t. */ fts_doc_ids_t* @@ -579,7 +565,6 @@ fts_commit( @param[in] query_str FTS query @param[in] query_len FTS query string len in bytes @param[in,out] result result doc ids -@param[in] limit limit value @return DB_SUCCESS if successful otherwise error code */ dberr_t fts_query( @@ -588,8 +573,7 @@ fts_query( uint flags, const byte* query_str, ulint query_len, - fts_result_t** result, - ulonglong limit) + fts_result_t** result) MY_ATTRIBUTE((warn_unused_result)); /******************************************************************//** @@ -1033,4 +1017,3 @@ fts_check_corrupt( #endif /*!< fts0fts.h */ - diff --git a/storage/innobase/include/mtr0types.h b/storage/innobase/include/mtr0types.h index f6263ab66ef..ac24812cdfc 100644 --- a/storage/innobase/include/mtr0types.h +++ b/storage/innobase/include/mtr0types.h @@ -109,9 +109,6 @@ enum mlog_id_t { /** initialize a page in an undo log */ MLOG_UNDO_INIT = 22, - /** discard an update undo log header */ - MLOG_UNDO_HDR_DISCARD = 23, - /** create an undo log header */ MLOG_UNDO_HDR_CREATE = 25, diff --git a/storage/innobase/include/os0file.h b/storage/innobase/include/os0file.h index 4cd8d79bb41..064430cbf4b 100644 --- a/storage/innobase/include/os0file.h +++ b/storage/innobase/include/os0file.h @@ -1230,19 +1230,27 @@ os_file_get_size( os_file_t file) MY_ATTRIBUTE((warn_unused_result)); -/** Write the specified number of zeros to a newly created file. -@param[in] name name of the file or path as a null-terminated - string -@param[in] file handle to a file -@param[in] size file size -@param[in] read_only Enable read-only checks if true -@return true if success */ +/** Extend a file. + +On Windows, extending a file allocates blocks for the file, +unless the file is sparse. + +On Unix, we will extend the file with ftruncate(), if +file needs to be sparse. Otherwise posix_fallocate() is used +when available, and if not, binary zeroes are added to the end +of file. + +@param[in] name file name +@param[in] file file handle +@param[in] size desired file size +@param[in] sparse whether to create a sparse file (no preallocating) +@return whether the operation succeeded */ bool os_file_set_size( const char* name, os_file_t file, os_offset_t size, - bool read_only) + bool is_sparse = false) MY_ATTRIBUTE((warn_unused_result)); /** Truncates a file at its current position. @@ -1573,8 +1581,10 @@ os_file_set_umask(ulint umask); Make file sparse, on Windows. @param[in] file file handle +@param[in] is_sparse if true, make file sparse, + otherwise "unsparse" the file @return true on success, false on error */ -bool os_file_set_sparse_win32(os_file_t file); +bool os_file_set_sparse_win32(os_file_t file, bool is_sparse = true); /** Changes file size on Windows diff --git a/storage/innobase/include/row0merge.h b/storage/innobase/include/row0merge.h index b7f9dd02cb0..ca620cbef59 100644 --- a/storage/innobase/include/row0merge.h +++ b/storage/innobase/include/row0merge.h @@ -267,15 +267,12 @@ row_merge_rename_index_to_drop( @param[in] index_def the index definition @param[in] add_v new virtual columns added along with add index call -@param[in] col_names column names if columns are renamed - or NULL @return index, or NULL on error */ dict_index_t* row_merge_create_index( dict_table_t* table, const index_def_t* index_def, - const dict_add_v_col_t* add_v, - const char** col_names) + const dict_add_v_col_t* add_v) MY_ATTRIBUTE((warn_unused_result)); /*********************************************************************//** diff --git a/storage/innobase/include/row0mysql.h b/storage/innobase/include/row0mysql.h index 8d3752974a6..a7a55d202e8 100644 --- a/storage/innobase/include/row0mysql.h +++ b/storage/innobase/include/row0mysql.h @@ -844,9 +844,6 @@ struct row_prebuilt_t { /** The MySQL table object */ TABLE* m_mysql_table; - - /** limit value to avoid fts result overflow */ - ulonglong m_fts_limit; }; /** Callback for row_mysql_sys_index_iterate() */ diff --git a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h index c471ce5d57d..580a660cedc 100644 --- a/storage/innobase/include/srv0srv.h +++ b/storage/innobase/include/srv0srv.h @@ -489,10 +489,12 @@ enum srv_operation_mode { SRV_OPERATION_NORMAL, /** Mariabackup taking a backup */ SRV_OPERATION_BACKUP, - /** Mariabackup restoring a backup */ + /** Mariabackup restoring a backup for subsequent --copy-back */ SRV_OPERATION_RESTORE, /** Mariabackup restoring the incremental part of a backup */ - SRV_OPERATION_RESTORE_DELTA + SRV_OPERATION_RESTORE_DELTA, + /** Mariabackup restoring a backup for subsequent --export */ + SRV_OPERATION_RESTORE_EXPORT }; /** Current mode of operation */ diff --git a/storage/innobase/include/trx0undo.h b/storage/innobase/include/trx0undo.h index 270bee99a72..51f8035d886 100644 --- a/storage/innobase/include/trx0undo.h +++ b/storage/innobase/include/trx0undo.h @@ -337,16 +337,6 @@ trx_undo_parse_page_header( const byte* end_ptr, page_t* page, mtr_t* mtr); -/***********************************************************//** -Parses the redo log entry of an undo log page header discard. -@return end of log record or NULL */ -byte* -trx_undo_parse_discard_latest( -/*==========================*/ - byte* ptr, /*!< in: buffer */ - byte* end_ptr,/*!< in: buffer end */ - page_t* page, /*!< in: page or NULL */ - mtr_t* mtr); /*!< in: mtr or NULL */ /************************************************************************ Frees an undo log memory copy. */ void diff --git a/storage/innobase/include/univ.i b/storage/innobase/include/univ.i index 8656b231a88..80c0b5476b0 100644 --- a/storage/innobase/include/univ.i +++ b/storage/innobase/include/univ.i @@ -41,7 +41,7 @@ Created 1/20/1994 Heikki Tuuri #define INNODB_VERSION_MAJOR 5 #define INNODB_VERSION_MINOR 7 -#define INNODB_VERSION_BUGFIX 19 +#define INNODB_VERSION_BUGFIX 20 /* The following is the InnoDB version as shown in SELECT plugin_version FROM information_schema.plugins; diff --git a/storage/innobase/lock/lock0lock.cc b/storage/innobase/lock/lock0lock.cc index fe9d9683785..ed3281e1453 100644 --- a/storage/innobase/lock/lock0lock.cc +++ b/storage/innobase/lock/lock0lock.cc @@ -6657,15 +6657,15 @@ lock_validate() Release both mutexes during the validation check. */ for (ulint i = 0; i < hash_get_n_cells(lock_sys->rec_hash); i++) { - const lock_t* lock; ib_uint64_t limit = 0; - while ((lock = lock_rec_validate(i, &limit)) != 0) { - - ulint space = lock->un_member.rec_lock.space; - ulint page_no = lock->un_member.rec_lock.page_no; - - pages.insert(std::make_pair(space, page_no)); + while (const lock_t* lock = lock_rec_validate(i, &limit)) { + if (lock_rec_find_set_bit(lock) == ULINT_UNDEFINED) { + /* The lock bitmap is empty; ignore it. */ + continue; + } + const lock_rec_t& l = lock->un_member.rec_lock; + pages.insert(std::make_pair(l.space, l.page_no)); } } diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc index cba08271bac..21450767689 100644 --- a/storage/innobase/log/log0recv.cc +++ b/storage/innobase/log/log0recv.cc @@ -452,35 +452,6 @@ recv_sys_close() recv_spaces.clear(); } -/********************************************************//** -Frees the recovery system memory. */ -void -recv_sys_mem_free(void) -/*===================*/ -{ - if (recv_sys != NULL) { - if (recv_sys->addr_hash != NULL) { - hash_table_free(recv_sys->addr_hash); - } - - if (recv_sys->heap != NULL) { - mem_heap_free(recv_sys->heap); - } - - if (recv_sys->flush_start != NULL) { - os_event_destroy(recv_sys->flush_start); - } - - if (recv_sys->flush_end != NULL) { - os_event_destroy(recv_sys->flush_end); - } - - ut_free(recv_sys->buf); - ut_free(recv_sys); - recv_sys = NULL; - } -} - /************************************************************ Reset the state of the recovery system variables. */ void @@ -1422,10 +1393,6 @@ parse_log: /* Allow anything in page_type when creating a page. */ ptr = trx_undo_parse_page_init(ptr, end_ptr, page, mtr); break; - case MLOG_UNDO_HDR_DISCARD: - ut_ad(!page || page_type == FIL_PAGE_UNDO_LOG); - ptr = trx_undo_parse_discard_latest(ptr, end_ptr, page, mtr); - break; case MLOG_UNDO_HDR_CREATE: ut_ad(!page || page_type == FIL_PAGE_UNDO_LOG); ptr = trx_undo_parse_page_header(ptr, end_ptr, page, mtr); @@ -1985,7 +1952,8 @@ void recv_apply_hashed_log_recs(bool last_batch) { ut_ad(srv_operation == SRV_OPERATION_NORMAL - || srv_operation == SRV_OPERATION_RESTORE); + || srv_operation == SRV_OPERATION_RESTORE + || srv_operation == SRV_OPERATION_RESTORE_EXPORT); mutex_enter(&recv_sys->mutex); @@ -2004,7 +1972,8 @@ recv_apply_hashed_log_recs(bool last_batch) ut_ad(!last_batch == log_mutex_own()); recv_no_ibuf_operations = !last_batch - || srv_operation == SRV_OPERATION_RESTORE; + || srv_operation == SRV_OPERATION_RESTORE + || srv_operation == SRV_OPERATION_RESTORE_EXPORT; ut_d(recv_no_log_write = recv_no_ibuf_operations); @@ -3023,7 +2992,8 @@ static dberr_t recv_init_missing_space(dberr_t err, const recv_spaces_t::const_iterator& i) { - if (srv_operation == SRV_OPERATION_RESTORE) { + if (srv_operation == SRV_OPERATION_RESTORE + || srv_operation == SRV_OPERATION_RESTORE_EXPORT) { ib::warn() << "Tablespace " << i->first << " was not" " found at " << i->second.name << " when" " restoring a (partial?) backup. All redo log" @@ -3181,7 +3151,8 @@ recv_recovery_from_checkpoint_start(lsn_t flush_lsn) dberr_t err = DB_SUCCESS; ut_ad(srv_operation == SRV_OPERATION_NORMAL - || srv_operation == SRV_OPERATION_RESTORE); + || srv_operation == SRV_OPERATION_RESTORE + || srv_operation == SRV_OPERATION_RESTORE_EXPORT); /* Initialize red-black tree for fast insertions into the flush_list during recovery process. */ @@ -3653,9 +3624,6 @@ get_mlog_string(mlog_id_t type) case MLOG_UNDO_INIT: return("MLOG_UNDO_INIT"); - case MLOG_UNDO_HDR_DISCARD: - return("MLOG_UNDO_HDR_DISCARD"); - case MLOG_UNDO_HDR_CREATE: return("MLOG_UNDO_HDR_CREATE"); diff --git a/storage/innobase/os/os0file.cc b/storage/innobase/os/os0file.cc index 0fd98350579..c6b9def79a1 100644 --- a/storage/innobase/os/os0file.cc +++ b/storage/innobase/os/os0file.cc @@ -775,9 +775,9 @@ os_win32_device_io_control( OVERLAPPED overlapped = { 0 }; overlapped.hEvent = win_get_syncio_event(); BOOL result = DeviceIoControl(handle, code, inbuf, inbuf_size, outbuf, - outbuf_size, bytes_returned, &overlapped); + outbuf_size, NULL, &overlapped); - if (!result && (GetLastError() == ERROR_IO_PENDING)) { + if (result || (GetLastError() == ERROR_IO_PENDING)) { /* Wait for async io to complete */ result = GetOverlappedResult(handle, &overlapped, bytes_returned, TRUE); } @@ -3238,17 +3238,10 @@ os_file_close_func( @param[in] file handle to an open file @return file size, or (os_offset_t) -1 on failure */ os_offset_t -os_file_get_size( - os_file_t file) +os_file_get_size(os_file_t file) { - /* Store current position */ - os_offset_t pos = lseek(file, 0, SEEK_CUR); - os_offset_t file_size = lseek(file, 0, SEEK_END); - - /* Restore current position as the function should not change it */ - lseek(file, pos, SEEK_SET); - - return(file_size); + struct stat statbuf; + return fstat(file, &statbuf) ? os_offset_t(-1) : statbuf.st_size; } /** Gets a file size. @@ -3458,14 +3451,14 @@ SyncFileIO::execute(const IORequest& request) if (request.is_read()) { ret = ReadFile(m_fh, m_buf, - static_cast<DWORD>(m_n), &n_bytes, &seek); + static_cast<DWORD>(m_n), NULL, &seek); } else { ut_ad(request.is_write()); ret = WriteFile(m_fh, m_buf, - static_cast<DWORD>(m_n), &n_bytes, &seek); + static_cast<DWORD>(m_n), NULL, &seek); } - if (!ret && (GetLastError() == ERROR_IO_PENDING)) { + if (ret || (GetLastError() == ERROR_IO_PENDING)) { /* Wait for async io to complete */ ret = GetOverlappedResult(m_fh, &seek, &n_bytes, TRUE); } @@ -3485,17 +3478,17 @@ SyncFileIO::execute(Slot* slot) ret = ReadFile( slot->file, slot->ptr, slot->len, - &slot->n_bytes, &slot->control); + NULL, &slot->control); } else { ut_ad(slot->type.is_write()); ret = WriteFile( slot->file, slot->ptr, slot->len, - &slot->n_bytes, &slot->control); + NULL, &slot->control); } - if (!ret && (GetLastError() == ERROR_IO_PENDING)) { + if (ret || (GetLastError() == ERROR_IO_PENDING)) { /* Wait for async io to complete */ ret = GetOverlappedResult(slot->file, &slot->control, &slot->n_bytes, TRUE); } @@ -4743,11 +4736,20 @@ Sets a sparse flag on Windows file. @param[in] file file handle @return true on success, false on error */ -bool os_file_set_sparse_win32(os_file_t file) +#include <versionhelpers.h> +bool os_file_set_sparse_win32(os_file_t file, bool is_sparse) { - + if (!is_sparse && !IsWindows8OrGreater()) { + /* Cannot unset sparse flag on older Windows. + Until Windows8 it is documented to produce unpredictable results, + if there are unallocated ranges in file.*/ + return false; + } DWORD temp; - return os_win32_device_io_control(file, FSCTL_SET_SPARSE, 0, 0, 0, 0,&temp); + FILE_SET_SPARSE_BUFFER sparse_buffer; + sparse_buffer.SetSparse = is_sparse; + return os_win32_device_io_control(file, + FSCTL_SET_SPARSE, &sparse_buffer, sizeof(sparse_buffer), 0, 0,&temp); } @@ -5319,23 +5321,77 @@ short_warning: #endif /* _WIN32 */ -/** Write the specified number of zeros to a newly created file. -@param[in] name name of the file or path as a null-terminated - string -@param[in] file handle to a file -@param[in] size file size -@param[in] read_only Enable read-only checks if true -@return true if success */ +/** Extend a file. + +On Windows, extending a file allocates blocks for the file, +unless the file is sparse. + +On Unix, we will extend the file with ftruncate(), if +file needs to be sparse. Otherwise posix_fallocate() is used +when available, and if not, binary zeroes are added to the end +of file. + +@param[in] name file name +@param[in] file file handle +@param[in] size desired file size +@param[in] sparse whether to create a sparse file (no preallocating) +@return whether the operation succeeded */ bool os_file_set_size( const char* name, os_file_t file, os_offset_t size, - bool read_only) + bool is_sparse) { #ifdef _WIN32 + /* On Windows, changing file size works well and as expected for both + sparse and normal files. + + However, 10.2 up until 10.2.9 made every file sparse in innodb, + causing NTFS fragmentation issues(MDEV-13941). We try to undo + the damage, and unsparse the file.*/ + + if (!is_sparse && os_is_sparse_file_supported(file)) { + if (!os_file_set_sparse_win32(file, false)) + /* Unsparsing file failed. Fallback to writing binary + zeros, to avoid even higher fragmentation.*/ + goto fallback; + } + return os_file_change_size_win32(name, file, size); -#endif + +fallback: +#else + if (is_sparse) { + bool success = !ftruncate(file, size); + if (!success) { + ib::error() << "ftruncate of file " << name << " to " + << size << " bytes failed with error " + << errno; + } + return(success); + } + +# ifdef HAVE_POSIX_FALLOCATE + int err; + do { + os_offset_t current_size = os_file_get_size(file); + err = current_size >= size + ? 0 : posix_fallocate(file, current_size, + size - current_size); + } while (err == EINTR + && srv_shutdown_state == SRV_SHUTDOWN_NONE); + + if (err) { + ib::error() << "preallocating " + << size << " bytes for file " << name + << " failed with error " << err; + } + errno = err; + return(!err); +# endif /* HAVE_POSIX_ALLOCATE */ +#endif /* _WIN32*/ + /* Write up to 1 megabyte at a time. */ ulint buf_size = ut_min( static_cast<ulint>(64), @@ -5353,13 +5409,14 @@ os_file_set_size( /* Write buffer full of zeros */ memset(buf, 0, buf_size); - if (size >= (os_offset_t) 100 << 20) { + os_offset_t current_size = os_file_get_size(file); + bool write_progress_info = + (size - current_size >= (os_offset_t) 100 << 20); + if (write_progress_info) { ib::info() << "Progress in MB:"; } - os_offset_t current_size = 0; - while (current_size < size) { ulint n_bytes; @@ -5382,8 +5439,9 @@ os_file_set_size( } /* Print about progress for each 100 MB written */ - if ((current_size + n_bytes) / (100 << 20) - != current_size / (100 << 20)) { + if (write_progress_info && + ((current_size + n_bytes) / (100 << 20) + != current_size / (100 << 20))) { fprintf(stderr, " %lu00", (ulong) ((current_size + n_bytes) @@ -5393,7 +5451,7 @@ os_file_set_size( current_size += n_bytes; } - if (size >= (os_offset_t) 100 << 20) { + if (write_progress_info) { fprintf(stderr, "\n"); } @@ -5578,10 +5636,11 @@ os_is_sparse_file_supported(os_file_t fh) ); #ifdef _WIN32 - BY_HANDLE_FILE_INFORMATION info; - if (GetFileInformationByHandle(fh,&info)) { - if (info.dwFileAttributes != INVALID_FILE_ATTRIBUTES) { - return (info.dwFileAttributes & FILE_ATTRIBUTE_SPARSE_FILE) != 0; + FILE_ATTRIBUTE_TAG_INFO info; + if (GetFileInformationByHandleEx(fh, FileAttributeTagInfo, + &info, (DWORD)sizeof(info))) { + if (info.FileAttributes != INVALID_FILE_ATTRIBUTES) { + return (info.FileAttributes & FILE_ATTRIBUTE_SPARSE_FILE) != 0; } } return false; @@ -6667,7 +6726,7 @@ try_again: #ifdef WIN_ASYNC_IO ret = ReadFile( file, slot->ptr, slot->len, - &slot->n_bytes, &slot->control); + NULL, &slot->control); #elif defined(LINUX_NATIVE_AIO) if (!array->linux_dispatch(slot)) { goto err_exit; @@ -6685,7 +6744,7 @@ try_again: #ifdef WIN_ASYNC_IO ret = WriteFile( file, slot->ptr, slot->len, - &slot->n_bytes, &slot->control); + NULL, &slot->control); #elif defined(LINUX_NATIVE_AIO) if (!array->linux_dispatch(slot)) { goto err_exit; @@ -6701,8 +6760,7 @@ try_again: } #ifdef WIN_ASYNC_IO - if ((ret && slot->len == slot->n_bytes) - || (!ret && GetLastError() == ERROR_IO_PENDING)) { + if (ret || (GetLastError() == ERROR_IO_PENDING)) { /* aio completed or was queued successfully! */ return(DB_SUCCESS); } diff --git a/storage/innobase/rem/rem0rec.cc b/storage/innobase/rem/rem0rec.cc index c9d35521708..70710a1e6ca 100644 --- a/storage/innobase/rem/rem0rec.cc +++ b/storage/innobase/rem/rem0rec.cc @@ -816,9 +816,10 @@ rec_get_offsets_func( case REC_STATUS_SUPREMUM: /* infimum or supremum record */ ut_ad(rec_get_heap_no_new(rec) - == (rec_get_status(rec) == REC_STATUS_INFIMUM - ? PAGE_HEAP_NO_INFIMUM - : PAGE_HEAP_NO_SUPREMUM)); + == ulint(rec_get_status(rec) + == REC_STATUS_INFIMUM + ? PAGE_HEAP_NO_INFIMUM + : PAGE_HEAP_NO_SUPREMUM)); n = 1; break; default: diff --git a/storage/innobase/row/row0ins.cc b/storage/innobase/row/row0ins.cc index 88508cd8ce3..3729256c03b 100644 --- a/storage/innobase/row/row0ins.cc +++ b/storage/innobase/row/row0ins.cc @@ -398,7 +398,7 @@ row_ins_clust_index_entry_by_modify( DEBUG_SYNC_C("before_row_ins_upd_extern"); err = btr_store_big_rec_extern_fields( - pcur, update, *offsets, big_rec, mtr, + pcur, *offsets, big_rec, mtr, BTR_STORE_INSERT_UPDATE); DEBUG_SYNC_C("after_row_ins_upd_extern"); dtuple_big_rec_free(big_rec); @@ -1240,7 +1240,7 @@ row_ins_foreign_check_on_constraint( rec_print(stderr, clust_rec, clust_index); fputs("\n" "InnoDB: Submit a detailed bug report to" - " http://bugs.mysql.com\n", stderr); + " https://jira.mariadb.org/\n", stderr); ut_ad(0); err = DB_SUCCESS; @@ -2504,7 +2504,7 @@ row_ins_index_entry_big_rec( DEBUG_SYNC_C_IF_THD(thd, "before_row_ins_extern"); error = btr_store_big_rec_extern_fields( - &pcur, 0, offsets, big_rec, &mtr, BTR_STORE_INSERT); + &pcur, offsets, big_rec, &mtr, BTR_STORE_INSERT); DEBUG_SYNC_C_IF_THD(thd, "after_row_ins_extern"); if (error == DB_SUCCESS diff --git a/storage/innobase/row/row0log.cc b/storage/innobase/row/row0log.cc index edb55534ada..be4a7138ac7 100644 --- a/storage/innobase/row/row0log.cc +++ b/storage/innobase/row/row0log.cc @@ -2393,7 +2393,7 @@ func_exit_committed: if (big_rec) { if (error == DB_SUCCESS) { error = btr_store_big_rec_extern_fields( - &pcur, update, cur_offsets, big_rec, &mtr, + &pcur, cur_offsets, big_rec, &mtr, BTR_STORE_UPDATE); } diff --git a/storage/innobase/row/row0merge.cc b/storage/innobase/row/row0merge.cc index a2442222d6d..6a6c65cd70c 100644 --- a/storage/innobase/row/row0merge.cc +++ b/storage/innobase/row/row0merge.cc @@ -4401,15 +4401,12 @@ row_merge_rename_tables_dict( @param[in] index_def the index definition @param[in] add_v new virtual columns added along with add index call -@param[in] col_names column names if columns are renamed - or NULL @return index, or NULL on error */ dict_index_t* row_merge_create_index( dict_table_t* table, const index_def_t* index_def, - const dict_add_v_col_t* add_v, - const char** col_names) + const dict_add_v_col_t* add_v) { dict_index_t* index; ulint n_fields = index_def->n_fields; @@ -4446,20 +4443,7 @@ row_merge_create_index( table, ifield->col_no); } } else { - /* - Alter table renaming a column and then adding a index - to this new name e.g ALTER TABLE t - CHANGE COLUMN b c INT NOT NULL, ADD UNIQUE INDEX (c); - requires additional check as column names are not yet - changed when new index definitions are created. Table's - new column names are on a array of column name pointers - if any of the column names are changed. */ - - if (col_names && col_names[i]) { - name = col_names[i]; - } else { - name = dict_table_get_col_name(table, ifield->col_no); - } + name = dict_table_get_col_name(table, ifield->col_no); } dict_mem_index_add_field(index, name, ifield->prefix_len); diff --git a/storage/innobase/row/row0mysql.cc b/storage/innobase/row/row0mysql.cc index f16155ef152..3f7c500c7f5 100644 --- a/storage/innobase/row/row0mysql.cc +++ b/storage/innobase/row/row0mysql.cc @@ -715,11 +715,33 @@ next_column: /* If there is a FTS doc id column and it is not user supplied ( generated by server) then assign it a new doc id. */ - if (prebuilt->table->fts) { + if (!prebuilt->table->fts) { + return; + } + + ut_a(prebuilt->table->fts->doc_col != ULINT_UNDEFINED); + + doc_id_t doc_id; + + if (!DICT_TF2_FLAG_IS_SET(prebuilt->table, DICT_TF2_FTS_HAS_DOC_ID)) { + if (prebuilt->table->fts->cache->first_doc_id + == FTS_NULL_DOC_ID) { + fts_get_next_doc_id(prebuilt->table, &doc_id); + } + return; + } - ut_a(prebuilt->table->fts->doc_col != ULINT_UNDEFINED); + dfield_t* fts_doc_id = dtuple_get_nth_field( + row, prebuilt->table->fts->doc_col); - fts_create_doc_id(prebuilt->table, row, prebuilt->heap); + if (fts_get_next_doc_id(prebuilt->table, &doc_id) == DB_SUCCESS) { + ut_a(doc_id != FTS_NULL_DOC_ID); + ut_ad(sizeof(doc_id) == fts_doc_id->type.len); + dfield_set_data(fts_doc_id, prebuilt->ins_upd_rec_buff + + prebuilt->mysql_row_len, 8); + fts_write_doc_id(fts_doc_id->data, doc_id); + } else { + dfield_set_null(fts_doc_id); } } @@ -1168,7 +1190,10 @@ row_get_prebuilt_insert_row( prebuilt->ins_upd_rec_buff = static_cast<byte*>( mem_heap_alloc( prebuilt->heap, - prebuilt->mysql_row_len)); + DICT_TF2_FLAG_IS_SET(prebuilt->table, + DICT_TF2_FTS_HAS_DOC_ID) + ? prebuilt->mysql_row_len + 8/* FTS_DOC_ID */ + : prebuilt->mysql_row_len)); } dtuple_t* row; @@ -3618,6 +3643,16 @@ row_drop_table_for_mysql( ut_ad(!table->fts->add_wq); ut_ad(lock_trx_has_sys_table_locks(trx) == 0); + for (;;) { + bool retry = false; + if (dict_fts_index_syncing(table)) { + retry = true; + } + if (!retry) { + break; + } + DICT_BG_YIELD(trx); + } row_mysql_unlock_data_dictionary(trx); fts_optimize_remove_table(table); row_mysql_lock_data_dictionary(trx); diff --git a/storage/innobase/row/row0sel.cc b/storage/innobase/row/row0sel.cc index ad583393d23..11823087f1f 100644 --- a/storage/innobase/row/row0sel.cc +++ b/storage/innobase/row/row0sel.cc @@ -2946,6 +2946,7 @@ row_sel_field_store_in_mysql_format_func( case DATA_SYS: /* These column types should never be shipped to MySQL. */ ut_ad(0); + /* fall through */ case DATA_CHAR: case DATA_FIXBINARY: @@ -3457,7 +3458,7 @@ row_sel_get_clust_rec_for_mysql( trx_print(stderr, trx, 600); fputs("\n" "InnoDB: Submit a detailed bug report" - " to http://bugs.mysql.com\n", stderr); + " to https://jira.mariadb.org/\n", stderr); ut_ad(0); } diff --git a/storage/innobase/row/row0upd.cc b/storage/innobase/row/row0upd.cc index a61a074e89b..26ee2849be3 100644 --- a/storage/innobase/row/row0upd.cc +++ b/storage/innobase/row/row0upd.cc @@ -2907,8 +2907,7 @@ row_upd_clust_rec( DEBUG_SYNC_C("before_row_upd_extern"); err = btr_store_big_rec_extern_fields( - pcur, node->update, offsets, big_rec, mtr, - BTR_STORE_UPDATE); + pcur, offsets, big_rec, mtr, BTR_STORE_UPDATE); DEBUG_SYNC_C("after_row_upd_extern"); } diff --git a/storage/innobase/srv/srv0srv.cc b/storage/innobase/srv/srv0srv.cc index e7af965ef28..e3a4eb05010 100644 --- a/storage/innobase/srv/srv0srv.cc +++ b/storage/innobase/srv/srv0srv.cc @@ -1891,7 +1891,7 @@ loop: ib::error() << "Old log sequence number " << old_lsn << " was" << " greater than the new log sequence number " << new_lsn << ". Please submit a bug report to" - " http://bugs.mysql.com"; + " https://jira.mariadb.org/"; ut_ad(0); } diff --git a/storage/innobase/srv/srv0start.cc b/storage/innobase/srv/srv0start.cc index f16a3f4c178..25a5a964375 100644 --- a/storage/innobase/srv/srv0start.cc +++ b/storage/innobase/srv/srv0start.cc @@ -375,8 +375,7 @@ create_log_file( ib::info() << "Setting log file " << name << " size to " << srv_log_file_size << " bytes"; - ret = os_file_set_size(name, *file, srv_log_file_size, - srv_read_only_mode); + ret = os_file_set_size(name, *file, srv_log_file_size); if (!ret) { ib::error() << "Cannot set log file " << name << " size to " << srv_log_file_size << " bytes"; @@ -395,13 +394,14 @@ create_log_file( /** Delete all log files. @param[in,out] logfilename buffer for log file name @param[in] dirnamelen length of the directory path -@param[in] n_files number of files to delete */ +@param[in] n_files number of files to delete +@param[in] i first file to delete */ static void -delete_log_files(char* logfilename, size_t dirnamelen, unsigned n_files) +delete_log_files(char* logfilename, size_t dirnamelen, uint n_files, uint i=0) { /* Remove any old log files. */ - for (unsigned i = 0; i < n_files; i++) { + for (; i < n_files; i++) { sprintf(logfilename + dirnamelen, "ib_logfile%u", i); /* Ignore errors about non-existent files or files @@ -655,8 +655,7 @@ srv_undo_tablespace_create( << "wait..."; ret = os_file_set_size( - name, fh, size << UNIV_PAGE_SIZE_SHIFT, - srv_read_only_mode); + name, fh, os_offset_t(size) << UNIV_PAGE_SIZE_SHIFT); if (!ret) { ib::info() << "Error in creating " << name @@ -910,6 +909,7 @@ srv_undo_tablespaces_init(bool create_new_db) } /* fall through */ case SRV_OPERATION_RESTORE: + case SRV_OPERATION_RESTORE_EXPORT: ut_ad(!create_new_db); /* Check if any of the UNDO tablespace needs fix-up because @@ -1316,6 +1316,7 @@ srv_shutdown_all_bg_threads() break; case SRV_OPERATION_NORMAL: case SRV_OPERATION_RESTORE: + case SRV_OPERATION_RESTORE_EXPORT: if (!buf_page_cleaner_is_active && os_aio_all_slots_free()) { os_aio_wake_all_threads_at_shutdown(); @@ -1487,7 +1488,8 @@ innobase_start_or_create_for_mysql() unsigned i = 0; ut_ad(srv_operation == SRV_OPERATION_NORMAL - || srv_operation == SRV_OPERATION_RESTORE); + || srv_operation == SRV_OPERATION_RESTORE + || srv_operation == SRV_OPERATION_RESTORE_EXPORT); if (srv_force_recovery == SRV_FORCE_NO_LOG_REDO) { srv_read_only_mode = true; @@ -1978,7 +1980,9 @@ innobase_start_or_create_for_mysql() if (err == DB_NOT_FOUND) { if (i == 0) { if (srv_operation - == SRV_OPERATION_RESTORE) { + == SRV_OPERATION_RESTORE + || srv_operation + == SRV_OPERATION_RESTORE_EXPORT) { return(DB_SUCCESS); } if (flushed_lsn @@ -2042,6 +2046,26 @@ innobase_start_or_create_for_mysql() } if (i == 0) { + if (size == 0 + && (srv_operation + == SRV_OPERATION_RESTORE + || srv_operation + == SRV_OPERATION_RESTORE_EXPORT)) { + /* Tolerate an empty ib_logfile0 + from a previous run of + mariabackup --prepare. */ + return(DB_SUCCESS); + } + /* The first log file must consist of + at least the following 512-byte pages: + header, checkpoint page 1, empty, + checkpoint page 2, redo log page(s) */ + if (size <= OS_FILE_LOG_BLOCK_SIZE * 4) { + ib::error() << "Log file " + << logfilename << " size " + << size << " is too small"; + return(srv_init_abort(DB_ERROR)); + } srv_log_file_size = size; } else if (size != srv_log_file_size) { @@ -2286,11 +2310,13 @@ files_checked: recv_recovery_from_checkpoint_finish(); - if (srv_operation == SRV_OPERATION_RESTORE) { + if (srv_operation == SRV_OPERATION_RESTORE + || srv_operation == SRV_OPERATION_RESTORE_EXPORT) { /* After applying the redo log from SRV_OPERATION_BACKUP, flush the changes - to the data files and delete the log file. - No further change to InnoDB files is needed. */ + to the data files and truncate or delete the log. + Unless --export is specified, no further change to + InnoDB files is needed. */ ut_ad(!srv_force_recovery); ut_ad(srv_n_log_files_found <= 1); ut_ad(recv_no_log_write); @@ -2300,8 +2326,18 @@ files_checked: fil_close_log_files(true); log_group_close_all(); if (err == DB_SUCCESS) { + bool trunc = srv_operation + == SRV_OPERATION_RESTORE; + /* Delete subsequent log files. */ delete_log_files(logfilename, dirnamelen, - srv_n_log_files_found); + srv_n_log_files_found, trunc); + if (trunc) { + /* Truncate the first log file. */ + strcpy(logfilename + dirnamelen, + "ib_logfile0"); + FILE* f = fopen(logfilename, "w"); + fclose(f); + } } return(err); } @@ -2745,6 +2781,7 @@ innodb_shutdown() case SRV_OPERATION_BACKUP: case SRV_OPERATION_RESTORE: case SRV_OPERATION_RESTORE_DELTA: + case SRV_OPERATION_RESTORE_EXPORT: fil_close_all_files(); break; case SRV_OPERATION_NORMAL: diff --git a/storage/innobase/sync/sync0rw.cc b/storage/innobase/sync/sync0rw.cc index 328094f5fd3..b7b68f98c19 100644 --- a/storage/innobase/sync/sync0rw.cc +++ b/storage/innobase/sync/sync0rw.cc @@ -84,10 +84,15 @@ lock_word < -(X_LOCK_DECR + X_LOCK_HALF_DECR): 2 - (lock_word + X_LOCK_DECR + X_LOCK_HALF_DECR) LOCK COMPATIBILITY MATRIX - S SX X - S + + - - SX + - - - X - - - + + | S|SX| X| + --+--+--+--+ + S| +| +| -| + --+--+--+--+ + SX| +| -| -| + --+--+--+--+ + X| -| -| -| + --+--+--+--+ The lock_word is always read and updated atomically and consistently, so that it always represents the state of the lock, and the state of the lock changes diff --git a/storage/innobase/trx/trx0purge.cc b/storage/innobase/trx/trx0purge.cc index 1d4668e54f8..738f713298b 100644 --- a/storage/innobase/trx/trx0purge.cc +++ b/storage/innobase/trx/trx0purge.cc @@ -1166,7 +1166,7 @@ trx_purge_rseg_get_next_history_log( " list, but its length is still reported as " << trx_sys->rseg_history_len << "! Make" " a detailed bug report, and submit it to" - " http://bugs.mysql.com"; + " https://jira.mariadb.org/"; ut_ad(0); } diff --git a/storage/innobase/trx/trx0rec.cc b/storage/innobase/trx/trx0rec.cc index 931d50e4b82..50a321ea68b 100644 --- a/storage/innobase/trx/trx0rec.cc +++ b/storage/innobase/trx/trx0rec.cc @@ -1214,7 +1214,25 @@ trx_undo_page_report_modify( const char* col_name = dict_table_get_col_name(table, col_no); - if (col->ord_part) { + if (!col->ord_part) { + continue; + } + + if (update) { + for (i = 0; i < update->n_fields; i++) { + const ulint field_no + = upd_get_nth_field(update, i) + ->field_no; + if (field_no >= index->n_fields + || dict_index_get_nth_field( + index, field_no)->col + == col) { + goto already_logged; + } + } + } + + if (true) { ulint pos; spatial_status_t spatial_status; @@ -1311,6 +1329,9 @@ trx_undo_page_report_modify( } } } + +already_logged: + continue; } for (col_no = 0; col_no < dict_table_get_n_v_cols(table); diff --git a/storage/innobase/trx/trx0trx.cc b/storage/innobase/trx/trx0trx.cc index 20fed90c712..75131847da5 100644 --- a/storage/innobase/trx/trx0trx.cc +++ b/storage/innobase/trx/trx0trx.cc @@ -616,10 +616,12 @@ trx_free_prepared( trx_t* trx) /*!< in, own: trx object */ { ut_a(trx_state_eq(trx, TRX_STATE_PREPARED) - || (trx_state_eq(trx, TRX_STATE_ACTIVE) - && trx->is_recovered + || (trx->is_recovered + && (trx_state_eq(trx, TRX_STATE_ACTIVE) + || trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY)) && (!srv_was_started || srv_operation == SRV_OPERATION_RESTORE + || srv_operation == SRV_OPERATION_RESTORE_EXPORT || srv_read_only_mode || srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO))); ut_a(trx->magic_n == TRX_MAGIC_N); diff --git a/storage/innobase/trx/trx0undo.cc b/storage/innobase/trx/trx0undo.cc index e193697139a..9b0c43e4609 100644 --- a/storage/innobase/trx/trx0undo.cc +++ b/storage/innobase/trx/trx0undo.cc @@ -116,15 +116,6 @@ trx_undo_mem_create( const XID* xid, /*!< in: X/Open XA transaction identification*/ ulint page_no,/*!< in: undo log header page number */ ulint offset);/*!< in: undo log header byte offset on page */ -/**********************************************************************//** -If an update undo log can be discarded immediately, this function frees the -space, resetting the page to the proper state for caching. */ -static -void -trx_undo_discard_latest_update_undo( -/*================================*/ - page_t* undo_page, /*!< in: header page of an undo log of size 1 */ - mtr_t* mtr); /*!< in: mtr */ /***********************************************************************//** Gets the previous record in an undo log from the previous page. @@ -674,80 +665,6 @@ trx_undo_parse_page_header( return(const_cast<byte*>(ptr)); } -/**********************************************************************//** -Writes the redo log entry of an update undo log header discard. */ -UNIV_INLINE -void -trx_undo_discard_latest_log( -/*========================*/ - page_t* undo_page, /*!< in: undo log header page */ - mtr_t* mtr) /*!< in: mtr */ -{ - mlog_write_initial_log_record(undo_page, MLOG_UNDO_HDR_DISCARD, mtr); -} - -/***********************************************************//** -Parses the redo log entry of an undo log page header discard. -@return end of log record or NULL */ -byte* -trx_undo_parse_discard_latest( -/*==========================*/ - byte* ptr, /*!< in: buffer */ - byte* end_ptr MY_ATTRIBUTE((unused)), /*!< in: buffer end */ - page_t* page, /*!< in: page or NULL */ - mtr_t* mtr) /*!< in: mtr or NULL */ -{ - ut_ad(end_ptr); - - if (page) { - trx_undo_discard_latest_update_undo(page, mtr); - } - - return(ptr); -} - -/**********************************************************************//** -If an update undo log can be discarded immediately, this function frees the -space, resetting the page to the proper state for caching. */ -static -void -trx_undo_discard_latest_update_undo( -/*================================*/ - page_t* undo_page, /*!< in: header page of an undo log of size 1 */ - mtr_t* mtr) /*!< in: mtr */ -{ - trx_usegf_t* seg_hdr; - trx_upagef_t* page_hdr; - trx_ulogf_t* log_hdr; - trx_ulogf_t* prev_log_hdr; - ulint free; - ulint prev_hdr_offset; - - seg_hdr = undo_page + TRX_UNDO_SEG_HDR; - page_hdr = undo_page + TRX_UNDO_PAGE_HDR; - - free = mach_read_from_2(seg_hdr + TRX_UNDO_LAST_LOG); - log_hdr = undo_page + free; - - prev_hdr_offset = mach_read_from_2(log_hdr + TRX_UNDO_PREV_LOG); - - if (prev_hdr_offset != 0) { - prev_log_hdr = undo_page + prev_hdr_offset; - - mach_write_to_2(page_hdr + TRX_UNDO_PAGE_START, - mach_read_from_2(prev_log_hdr - + TRX_UNDO_LOG_START)); - mach_write_to_2(prev_log_hdr + TRX_UNDO_NEXT_LOG, 0); - } - - mach_write_to_2(page_hdr + TRX_UNDO_PAGE_FREE, free); - - mach_write_to_2(seg_hdr + TRX_UNDO_STATE, TRX_UNDO_CACHED); - mach_write_to_2(seg_hdr + TRX_UNDO_LAST_LOG, prev_hdr_offset); - - trx_undo_discard_latest_log(undo_page, mtr); -} - /** Allocate an undo log page. @param[in,out] trx transaction @param[in,out] undo undo log @@ -1677,6 +1594,12 @@ trx_undo_free_prepared( switch (undo->state) { case TRX_UNDO_PREPARED: break; + case TRX_UNDO_CACHED: + case TRX_UNDO_TO_FREE: + case TRX_UNDO_TO_PURGE: + ut_ad(trx_state_eq(trx, + TRX_STATE_COMMITTED_IN_MEMORY)); + /* fall through */ case TRX_UNDO_ACTIVE: /* lock_trx_release_locks() assigns trx->is_recovered=false */ @@ -1697,6 +1620,12 @@ trx_undo_free_prepared( switch (undo->state) { case TRX_UNDO_PREPARED: break; + case TRX_UNDO_CACHED: + case TRX_UNDO_TO_FREE: + case TRX_UNDO_TO_PURGE: + ut_ad(trx_state_eq(trx, + TRX_STATE_COMMITTED_IN_MEMORY)); + /* fall through */ case TRX_UNDO_ACTIVE: /* lock_trx_release_locks() assigns trx->is_recovered=false */ diff --git a/storage/innobase/ut/ut0dbg.cc b/storage/innobase/ut/ut0dbg.cc index 28643645de9..9e596dcda81 100644 --- a/storage/innobase/ut/ut0dbg.cc +++ b/storage/innobase/ut/ut0dbg.cc @@ -48,7 +48,7 @@ ut_dbg_assertion_failed( fputs("InnoDB: We intentionally generate a memory trap.\n" "InnoDB: Submit a detailed bug report" - " to http://bugs.mysql.com.\n" + " to https://jira.mariadb.org/\n" "InnoDB: If you get repeated assertion failures" " or crashes, even\n" "InnoDB: immediately after the mysqld startup, there may be\n" |