diff options
Diffstat (limited to 'extra/mariabackup/compact.cc')
-rw-r--r-- | extra/mariabackup/compact.cc | 1059 |
1 files changed, 1059 insertions, 0 deletions
diff --git a/extra/mariabackup/compact.cc b/extra/mariabackup/compact.cc new file mode 100644 index 00000000000..5d08a6e02b2 --- /dev/null +++ b/extra/mariabackup/compact.cc @@ -0,0 +1,1059 @@ +/****************************************************** +XtraBackup: hot backup tool for InnoDB +(c) 2009-2014 Percona LLC and/or its affiliates. +Originally Created 3/3/2009 Yasufumi Kinoshita +Written by Alexey Kopytov, Aleksandr Kuzminsky, Stewart Smith, Vadim Tkachenko, +Yasufumi Kinoshita, Ignacio Nin and Baron Schwartz. + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + +*******************************************************/ + +/* Compact backups implementation */ + +#include <my_base.h> +#include <table.h> + +#include <univ.i> +#include <dict0mem.h> +#include <dict0priv.h> +#include <fsp0fsp.h> +#include <handler0alter.h> +#include <ibuf0ibuf.h> +#include <page0page.h> +#include <row0merge.h> +#include "common.h" +#include "write_filt.h" +#include "fil_cur.h" +#include "xtrabackup.h" +#include "ds_buffer.h" +#include "xb0xb.h" + +/* Number of the first primary key page in an .ibd file */ +#define XB_FIRST_CLUSTERED_INDEX_PAGE_NO 3 + +/* Suffix for page map files */ +#define XB_PAGE_MAP_SUFFIX ".pmap" +#define XB_TMPFILE_SUFFIX ".tmp" + +/* Page range */ +struct page_range_t { + ulint from; /*!< range start */ + ulint to; /*!< range end */ +}; + +/* Cursor in a page map file */ +struct page_map_cursor_t { + File fd; /*!< file descriptor */ + IO_CACHE cache; /*!< IO_CACHE associated with fd */ +}; + +/* Table descriptor for the index rebuild operation */ +struct index_rebuild_table_t { + char* name; /* table name */ + ulint space_id; /* space ID */ + UT_LIST_NODE_T(index_rebuild_table_t) list; /* list node */ +}; + +/* Thread descriptor for the index rebuild operation */ +struct index_rebuild_thread_t { + ulint num; /* thread number */ + pthread_t id; /* thread ID */ +}; + +/* Empty page use to replace skipped pages in the data files */ +static byte empty_page[UNIV_PAGE_SIZE_MAX]; +static const char compacted_page_magic[] = "COMPACTP"; +static const size_t compacted_page_magic_size = + sizeof(compacted_page_magic) - 1; +static const ulint compacted_page_magic_offset = FIL_PAGE_DATA; + +/* Mutex protecting table_list */ +static pthread_mutex_t table_list_mutex; +/* List of tablespaces to process by the index rebuild operation */ +static UT_LIST_BASE_NODE_T(index_rebuild_table_t) table_list; + + +/************************************************************************ +Compact page filter. */ +static my_bool wf_compact_init(xb_write_filt_ctxt_t *ctxt, char *dst_name, + xb_fil_cur_t *cursor); +static my_bool wf_compact_process(xb_write_filt_ctxt_t *ctxt, + ds_file_t *dstfile); +static my_bool wf_compact_finalize(xb_write_filt_ctxt_t *ctxt, + ds_file_t *dstfile); +xb_write_filt_t wf_compact = { + &wf_compact_init, + &wf_compact_process, + &wf_compact_finalize, + NULL +}; + +/************************************************************************ +Initialize the compact page filter. + +@return TRUE on success, FALSE on error. */ +static my_bool +wf_compact_init(xb_write_filt_ctxt_t *ctxt, + char *dst_name __attribute__((unused)), xb_fil_cur_t *cursor) +{ + xb_wf_compact_ctxt_t *cp = &(ctxt->u.wf_compact_ctxt); + char page_map_name[FN_REFLEN]; + MY_STAT mystat; + + ctxt->cursor = cursor; + cp->clustered_index_found = FALSE; + cp->inside_skipped_range = FALSE; + cp->free_limit = 0; + + /* Don't compact the system table space */ + cp->skip = cursor->is_system; + if (cp->skip) { + return(TRUE); + } + + snprintf(page_map_name, sizeof(page_map_name), "%s%s", dst_name, + XB_PAGE_MAP_SUFFIX); + + cp->ds_buffer = ds_create(xtrabackup_target_dir, DS_TYPE_BUFFER); + if (cp->ds_buffer == NULL) { + return(FALSE); + } + + ds_set_pipe(cp->ds_buffer, ds_meta); + + memset(&mystat, 0, sizeof(mystat)); + mystat.st_mtime = my_time(0); + cp->buffer = ds_open(cp->ds_buffer, page_map_name, &mystat); + if (cp->buffer == NULL) { + msg("xtrabackup: Error: cannot open output stream for %s\n", + page_map_name); + return(FALSE); + } + + return(TRUE); +} + +/************************************************************************ +Check if the specified page should be skipped. We currently skip all +non-clustered index pages for compact backups. + +@return TRUE if the page should be skipped. */ +static my_bool +check_if_skip_page(xb_wf_compact_ctxt_t *cp, xb_fil_cur_t *cursor, ulint offset) +{ + byte *page; + ulint page_no; + ulint page_type; + index_id_t index_id; + + + xb_ad(cursor->is_system == FALSE); + + page = cursor->buf + cursor->page_size * offset; + page_no = cursor->buf_page_no + offset; + page_type = fil_page_get_type(page); + + if (UNIV_UNLIKELY(page_no == 0)) { + + cp->free_limit = mach_read_from_4(page + FSP_HEADER_OFFSET + + FSP_FREE_LIMIT); + } else if (UNIV_UNLIKELY(page_no == XB_FIRST_CLUSTERED_INDEX_PAGE_NO)) { + + xb_ad(cp->clustered_index_found == FALSE); + + if (page_type != FIL_PAGE_INDEX) { + + /* Uninitialized clustered index root page, there's + nothing we can do to compact the space.*/ + + msg("[%02u] Uninitialized page type value (%lu) in the " + "clustered index root page of tablespace %s. " + "Will not be compacted.\n", + cursor->thread_n, + page_type, cursor->rel_path); + + cp->skip = TRUE; + + return(FALSE); + } + + cp->clustered_index = + mach_read_from_8(page + PAGE_HEADER + PAGE_INDEX_ID); + cp->clustered_index_found = TRUE; + } else if (UNIV_UNLIKELY(page_no >= cp->free_limit)) { + + /* Skip unused pages above free limit, if that value is set in + the FSP header.*/ + + return(cp->free_limit > 0); + } else if (cp->clustered_index_found && page_type == FIL_PAGE_INDEX) { + + index_id = mach_read_from_8(page + PAGE_HEADER + PAGE_INDEX_ID); + if (index_id != cp->clustered_index) { + + ulint fseg_hdr_space = + mach_read_from_4(page + PAGE_HEADER + + PAGE_BTR_SEG_TOP); + ulint fseg_hdr_page_no = + mach_read_from_4(page + PAGE_HEADER + + PAGE_BTR_SEG_TOP + 4); + ulint fseg_hdr_offset = + mach_read_from_2(page + PAGE_HEADER + + PAGE_BTR_SEG_TOP + 8); + + /* Don't skip root index pages, i.e. the ones where the + above fields are defined. We need root index pages to be + able to correctly drop the indexes later, as they + contain fseg inode pointers. */ + + return(fseg_hdr_space == 0 && + fseg_hdr_page_no == 0 && + fseg_hdr_offset == 0); + } + } + + return(FALSE); +} + +/************************************************************************ +Run the next batch of pages through the compact page filter. + +@return TRUE on success, FALSE on error. */ +static my_bool +wf_compact_process(xb_write_filt_ctxt_t *ctxt, ds_file_t *dstfile) +{ + xb_fil_cur_t *cursor = ctxt->cursor; + ulint page_size = cursor->page_size; + byte *page; + byte *buf_end; + byte *write_from; + xb_wf_compact_ctxt_t *cp = &(ctxt->u.wf_compact_ctxt); + ulint i; + ulint page_no; + byte tmp[4]; + + if (cp->skip) { + return(!ds_write(dstfile, cursor->buf, cursor->buf_read)); + } + + write_from = NULL; + buf_end = cursor->buf + cursor->buf_read; + for (i = 0, page = cursor->buf; page < buf_end; + i++, page += page_size) { + + page_no = cursor->buf_page_no + i; + + if (!check_if_skip_page(cp, cursor, i)) { + + if (write_from == NULL) { + write_from = page; + } + + if (cp->inside_skipped_range) { + cp->inside_skipped_range = FALSE; + + /* Write the last range endpoint to the + skipped pages map */ + + xb_ad(page_no > 0); + mach_write_to_4(tmp, page_no - 1); + if (ds_write(cp->buffer, tmp, sizeof(tmp))) { + return(FALSE); + } + } + continue; + } + + if (write_from != NULL) { + + /* The first skipped page in this block, write the + non-skipped ones to the data file */ + + if (ds_write(dstfile, write_from, page - write_from)) { + return(FALSE); + } + + write_from = NULL; + } + + if (!cp->inside_skipped_range) { + + /* The first skipped page in range, write the first + range endpoint to the skipped pages map */ + + cp->inside_skipped_range = TRUE; + + mach_write_to_4(tmp, page_no); + if (ds_write(cp->buffer, tmp, sizeof(tmp))) { + return(FALSE); + } + } + } + + /* Write the remaining pages in the buffer, if any */ + if (write_from != NULL && + ds_write(dstfile, write_from, buf_end - write_from)) { + return(FALSE); + } + + return(TRUE); +} + +/************************************************************************ +Close the compact filter's page map stream. + +@return TRUE on success, FALSE on error. */ +static my_bool +wf_compact_finalize(xb_write_filt_ctxt_t *ctxt, + ds_file_t *dstfile __attribute__((unused))) +{ + xb_fil_cur_t *cursor = ctxt->cursor; + xb_wf_compact_ctxt_t *cp = &(ctxt->u.wf_compact_ctxt); + my_bool rc = TRUE; + + /* Write the last endpoint of the current range, if the last pages of + the space have been skipped. */ + if (cp->inside_skipped_range) { + byte tmp[4]; + + mach_write_to_4(tmp, cursor->space_size - 1); + if (ds_write(cp->buffer, tmp, sizeof(tmp))) { + return(FALSE); + } + + cp->inside_skipped_range = FALSE; + } + + if (cp->buffer) { + if (ds_close(cp->buffer)) { + rc = FALSE; + } + } + if (cp->ds_buffer) { + ds_destroy(cp->ds_buffer); + } + + return(rc); +} + +/************************************************************************ +Open a page map file and return a cursor. + +@return page map cursor, or NULL if the file doesn't exist. */ +static page_map_cursor_t * +page_map_file_open(const char *path) +{ + MY_STAT statinfo; + page_map_cursor_t *pmap_cur; + int rc; + + if (my_stat(path, &statinfo, MYF(0)) == NULL) { + + return(NULL); + } + + /* The maximum possible page map file corresponds to a 64 TB tablespace + and the worst case when every other page was skipped. That is, 2^32/2 + page ranges = 16 GB. */ + xb_a(statinfo.st_size < (off_t) 16 * 1024 * 1024 * 1024); + + /* Must be a series of 8-byte tuples */ + xb_a(statinfo.st_size % 8 == 0); + + pmap_cur = (page_map_cursor_t *) my_malloc(sizeof(page_map_cursor_t), + MYF(MY_FAE)); + + pmap_cur->fd = my_open(path, O_RDONLY, MYF(MY_WME)); + xb_a(pmap_cur->fd != 0); + + rc = init_io_cache(&pmap_cur->cache, pmap_cur->fd, 0, READ_CACHE, + 0, 0, MYF(MY_WME)); + xb_a(rc == 0); + + return(pmap_cur); +} + +/************************************************************************ +Read the next range from a page map file and update the cursor. + +@return TRUE on success, FALSE on end-of-file. */ +static ibool +page_map_file_next(page_map_cursor_t *pmap_cur, page_range_t *range) +{ + byte buf[8]; + + xb_ad(pmap_cur != NULL); + + if (my_b_read(&pmap_cur->cache, buf, sizeof(buf))) { + return(FALSE); + } + + range->from = mach_read_from_4(buf); + range->to = mach_read_from_4(buf + 4); + + return(TRUE); +} + +/************************************************************************ +Close the page map cursor.*/ +static void +page_map_file_close(page_map_cursor_t *pmap_cur) +{ + int rc; + + xb_ad(pmap_cur != NULL); + + rc = end_io_cache(&pmap_cur->cache); + xb_a(rc == 0); + + posix_fadvise(pmap_cur->fd, 0, 0, POSIX_FADV_DONTNEED); + + rc = my_close(pmap_cur->fd, MY_WME); + xb_a(rc == 0); + + my_free(pmap_cur); +} + +/**************************************************************************** +Expand a single data file according to the skipped pages maps created by +--compact. + +@return TRUE on success, FALSE on failure. */ +static my_bool +xb_expand_file(fil_node_t *node) +{ + char pmapfile_path[FN_REFLEN]; + char tmpfile_path[FN_REFLEN]; + xb_fil_cur_t cursor; + xb_fil_cur_result_t res; + ds_ctxt_t *ds_local; + ds_ctxt_t *ds_buffer; + ds_file_t *tmpfile; + my_bool success = FALSE; + ulint i; + byte *page; + ulint page_expected_no; + page_map_cursor_t *pmap_cur; + ibool have_next_range; + page_range_t pmap_range; + + xb_ad(trx_sys_sys_space(node->space->id) == FALSE); + + snprintf(pmapfile_path, sizeof(pmapfile_path), "%s%s", + node->name, XB_PAGE_MAP_SUFFIX); + + /* Skip files that don't have a corresponding page map file */ + + if (!(pmap_cur = page_map_file_open(pmapfile_path))) { + + msg("Not expanding %s\n", node->name); + + return(FALSE); + } + + msg("Expanding %s\n", node->name); + + ds_local = ds_create(".", DS_TYPE_LOCAL); + ds_buffer = ds_create(".", DS_TYPE_BUFFER); + + xb_a(ds_local != NULL && ds_buffer != NULL); + + ds_buffer_set_size(ds_buffer, FSP_EXTENT_SIZE * UNIV_PAGE_SIZE_MAX); + + ds_set_pipe(ds_buffer, ds_local); + + res = xb_fil_cur_open(&cursor, &rf_pass_through, node, 1); + xb_a(res == XB_FIL_CUR_SUCCESS); + + snprintf(tmpfile_path, sizeof(tmpfile_path), "%s%s", + node->name, XB_TMPFILE_SUFFIX); + + tmpfile = ds_open(ds_buffer, tmpfile_path, &cursor.statinfo); + if (tmpfile == NULL) { + + msg("Could not open temporary file '%s'\n", tmpfile_path); + goto error; + } + + have_next_range = page_map_file_next(pmap_cur, &pmap_range); + + page_expected_no = 0; + + /* Initialize and mark the empty page which is used to replace + skipped pages. */ + memset(empty_page, 0, cursor.page_size); + memcpy(empty_page + compacted_page_magic_offset, + compacted_page_magic, compacted_page_magic_size); + mach_write_to_4(empty_page + FIL_PAGE_SPACE_OR_CHKSUM, + BUF_NO_CHECKSUM_MAGIC); + mach_write_to_4(empty_page + cursor.page_size - + FIL_PAGE_END_LSN_OLD_CHKSUM, + BUF_NO_CHECKSUM_MAGIC); + + + /* Main copy loop */ + + while ((res = xb_fil_cur_read(&cursor)) == XB_FIL_CUR_SUCCESS) { + + for (i = 0, page = cursor.buf; i < cursor.buf_npages; + i++, page += cursor.page_size) { + + ulint page_read_no; + + page_read_no = mach_read_from_4(page + FIL_PAGE_OFFSET); + xb_a(!page_read_no || page_expected_no <= page_read_no); + + if (have_next_range && + page_expected_no == pmap_range.from) { + + xb_a(pmap_range.from <= pmap_range.to); + + /* Write empty pages instead of skipped ones, if + necessary. */ + + while (page_expected_no <= pmap_range.to) { + + if (ds_write(tmpfile, empty_page, + cursor.page_size)) { + + goto write_error; + } + + page_expected_no++; + } + + have_next_range = + page_map_file_next(pmap_cur, + &pmap_range); + } + + /* Write the current page */ + + if (ds_write(tmpfile, page, cursor.page_size)) { + + goto write_error; + } + + page_expected_no++; + } + } + + if (res != XB_FIL_CUR_EOF) { + + goto error; + } + + /* Write empty pages instead of trailing skipped ones, if any */ + + if (have_next_range) { + + xb_a(page_expected_no == pmap_range.from); + xb_a(pmap_range.from <= pmap_range.to); + + while (page_expected_no <= pmap_range.to) { + + if (ds_write(tmpfile, empty_page, + cursor.page_size)) { + + goto write_error; + } + + page_expected_no++; + } + + xb_a(!page_map_file_next(pmap_cur, &pmap_range)); + } + + /* Replace the original .ibd file with the expanded file */ + if (my_rename(tmpfile_path, node->name, MYF(MY_WME))) { + + msg("Failed to rename '%s' to '%s'\n", + tmpfile_path, node->name); + goto error; + } + + my_delete(pmapfile_path, MYF(MY_WME)); + + if (!ds_close(tmpfile)) { + success = TRUE; + } + tmpfile = NULL; + + goto end; + +write_error: + msg("Write to '%s' failed\n", tmpfile_path); + +error: + if (tmpfile != NULL) { + + ds_close(tmpfile); + my_delete(tmpfile_path, MYF(MY_WME)); + } + +end: + ds_destroy(ds_buffer); + ds_destroy(ds_local); + + xb_fil_cur_close(&cursor); + + page_map_file_close(pmap_cur); + + return(success); +} + +/****************************************************************************** +Expand the data files according to the skipped pages maps created by --compact. +@return TRUE on success, FALSE on failure. */ +my_bool +xb_expand_datafiles(void) +/*=====================*/ +{ + ulint nfiles; + datafiles_iter_t *it = NULL; + fil_node_t *node; + fil_space_t *space; + + msg("Starting to expand compacted .ibd files.\n"); + + /* Initialize the tablespace cache */ + if (xb_data_files_init() != DB_SUCCESS) { + return(FALSE); + } + + nfiles = UT_LIST_GET_LEN(fil_system->space_list); + xb_a(nfiles > 0); + + it = datafiles_iter_new(fil_system); + if (it == NULL) { + msg("xtrabackup: error: datafiles_iter_new() failed.\n"); + goto error; + } + + while ((node = datafiles_iter_next(it)) != NULL) { + + space = node->space; + + /* System tablespace cannot be compacted */ + if (!fil_is_user_tablespace_id(space->id)) { + + continue; + } + + if (!xb_expand_file(node)) { + + goto error; + } + } + + datafiles_iter_free(it); + xb_data_files_close(); + + return(TRUE); + +error: + if (it != NULL) { + datafiles_iter_free(it); + } + + xb_data_files_close(); + + return(FALSE); +} + +/****************************************************************************** +Callback used in buf_page_io_complete() to detect compacted pages. +@return TRUE if the page is marked as compacted, FALSE otherwise. */ +ibool +buf_page_is_compacted( +/*==================*/ + const byte* page) /*!< in: a database page */ +{ + return !memcmp(page + compacted_page_magic_offset, + compacted_page_magic, compacted_page_magic_size); +} + +/***************************************************************************** +Builds an index definition corresponding to an index object. It is roughly +similar to innobase_create_index_def() / innobase_create_index_field_def() and +the opposite to dict_mem_index_create() / dict_mem_index_add_field(). */ +static +void +xb_build_index_def( +/*=======================*/ + mem_heap_t* heap, /*!< in: heap */ + const dict_index_t* index, /*!< in: index */ + index_def_t* index_def) /*!< out: index definition */ +{ + index_field_t* fields; + ulint n_fields; + ulint i; + + ut_a(index->n_fields); + ut_ad(index->magic_n == DICT_INDEX_MAGIC_N); + + /* Use n_user_defined_cols instead of n_fields, as the index will + contain a part of the primary key after n_user_defined_cols, and those + columns will be created automatically in + dict_index_build_internal_clust(). */ + n_fields = index->n_user_defined_cols; + + memset(index_def, 0, sizeof(*index_def)); + + index_def->name = mem_heap_strdup(heap, index->name); + index_def->ind_type = index->type; + + fields = static_cast<index_field_t *> + (mem_heap_alloc(heap, n_fields * sizeof(*fields))); + + for (i = 0; i < n_fields; i++) { + dict_field_t* field; + + field = dict_index_get_nth_field(index, i); + fields[i].col_no = dict_col_get_no(field->col); + fields[i].prefix_len = field->prefix_len; + } + + index_def->fields = fields; + index_def->n_fields = n_fields; +} + +/* A dummy autoc_inc sequence for row_merge_build_indexes(). */ +static ib_sequence_t null_seq(NULL, 0, 0); +/* A dummy table share and table for row_merge_build_indexes() error reporting. +Assumes that no errors are going to be reported. */ +static struct TABLE_SHARE dummy_table_share; +static struct TABLE dummy_table; + +/********************************************************************//** +Rebuild secondary indexes for a given table. */ +static +void +xb_rebuild_indexes_for_table( +/*=========================*/ + dict_table_t* table, /*!< in: table */ + trx_t* trx, /*!< in: transaction handle */ + ulint thread_n) /*!< in: thread number */ +{ + dict_index_t* index; + dict_index_t** indexes; + ulint n_indexes; + index_def_t* index_defs; + ulint i; + mem_heap_t* heap; + ulint error; + ulint* add_key_nums; + + ut_ad(!mutex_own(&(dict_sys->mutex))); + ut_ad(table); + + ut_a(UT_LIST_GET_LEN(table->indexes) > 0); + + n_indexes = UT_LIST_GET_LEN(table->indexes) - 1; + if (!n_indexes) { + /* Only the primary key, nothing to do. */ + return; + } + + heap = mem_heap_create(1024); + + indexes = (dict_index_t**) mem_heap_alloc(heap, + n_indexes * sizeof(*indexes)); + index_defs = (index_def_t*) mem_heap_alloc(heap, n_indexes * + sizeof(*index_defs)); + add_key_nums = static_cast<ulint *> + (mem_heap_alloc(heap, n_indexes * sizeof(*add_key_nums))); + + /* Skip the primary key. */ + index = dict_table_get_first_index(table); + ut_a(dict_index_is_clust(index)); + + row_mysql_lock_data_dictionary(trx); + + for (i = 0; (index = dict_table_get_next_index(index)); i++) { + + msg("[%02lu] Found index %s\n", thread_n, index->name); + + /* Pretend that it's the current trx that created this index. + Required to avoid 5.6+ debug assertions. */ + index->trx_id = trx->id; + + xb_build_index_def(heap, index, &index_defs[i]); + + /* In 5.6+, row_merge_drop_indexes() drops all the indexes on + the table that have the temp index prefix. It does not accept + an array of indexes to drop as in 5.5-. */ + row_merge_rename_index_to_drop(trx, table->id, index->id); + } + + ut_ad(i == n_indexes); + + row_merge_drop_indexes(trx, table, TRUE); + + index = dict_table_get_first_index(table); + ut_a(dict_index_is_clust(index)); + index = dict_table_get_next_index(index); + while (index) { + + /* In 5.6+, row_merge_drop_indexes() does not remove the + indexes from the dictionary cache nor from any foreign key + list. This may cause invalid dereferences as we try to access + the dropped indexes from other tables as FKs. */ + + dict_index_t* next_index = dict_table_get_next_index(index); + index->to_be_dropped = 1; + + /* Patch up any FK referencing this index with NULL */ + dict_foreign_replace_index(table, NULL, index); + + dict_index_remove_from_cache(table, index); + + index = next_index; + } + + msg("[%02lu] Rebuilding %lu index(es).\n", thread_n, n_indexes); + + error = row_merge_lock_table(trx, table, LOCK_X); + xb_a(error == DB_SUCCESS); + + for (i = 0; i < n_indexes; i++) { + indexes[i] = row_merge_create_index(trx, table, + &index_defs[i]); + add_key_nums[i] = index_defs[i].key_number; + } + + /* Commit trx to release latches on system tables */ + trx_commit_for_mysql(trx); + trx_start_for_ddl(trx, TRX_DICT_OP_INDEX); + + row_mysql_unlock_data_dictionary(trx); + + /* Reacquire table lock for row_merge_build_indexes() */ + error = row_merge_lock_table(trx, table, LOCK_X); + xb_a(error == DB_SUCCESS); + + error = row_merge_build_indexes(trx, table, table, FALSE, indexes, + add_key_nums, n_indexes, &dummy_table, + NULL, NULL, ULINT_UNDEFINED, null_seq); + ut_a(error == DB_SUCCESS); + + mem_heap_free(heap); + + trx_commit_for_mysql(trx); + + trx_start_for_ddl(trx, TRX_DICT_OP_INDEX); +} + +/************************************************************************** +Worker thread function for index rebuild. */ +static +void * +xb_rebuild_indexes_thread_func( +/*===========================*/ + void* arg) /* thread context */ +{ + dict_table_t* table; + index_rebuild_table_t* rebuild_table; + index_rebuild_thread_t* thread; + trx_t* trx; + + thread = (index_rebuild_thread_t *) arg; + + trx = trx_allocate_for_mysql(); + + /* Suppress foreign key checks, as we are going to drop and recreate all + secondary keys. */ + trx->check_foreigns = FALSE; + trx_start_for_ddl(trx, TRX_DICT_OP_INDEX); + + /* Loop until there are no more tables in tables list */ + for (;;) { + pthread_mutex_lock(&table_list_mutex); + + rebuild_table = UT_LIST_GET_FIRST(table_list); + + if (rebuild_table == NULL) { + + pthread_mutex_unlock(&table_list_mutex); + break; + } + + UT_LIST_REMOVE(list, table_list, rebuild_table); + + pthread_mutex_unlock(&table_list_mutex); + + ut_ad(rebuild_table->name); + ut_ad(fil_is_user_tablespace_id(rebuild_table->space_id)); + + row_mysql_lock_data_dictionary(trx); + + table = dict_table_get_low(rebuild_table->name); + + ut_d(table->n_ref_count++); + + row_mysql_unlock_data_dictionary(trx); + + ut_a(table != NULL); + ut_a(table->space == rebuild_table->space_id); + + /* Discard change buffer entries for this space */ + ibuf_delete_for_discarded_space(rebuild_table->space_id); + + msg("[%02lu] Checking if there are indexes to rebuild in table " + "%s (space id: %lu)\n", + thread->num, + rebuild_table->name, rebuild_table->space_id); + + xb_rebuild_indexes_for_table(table, trx, thread->num); + + ut_d(table->n_ref_count--); + + mem_free(rebuild_table->name); + mem_free(rebuild_table); + } + + trx_commit_for_mysql(trx); + + trx_free_for_mysql(trx); + + return(NULL); +} + +/****************************************************************************** +Rebuild all secondary indexes in all tables in separate spaces. Called from +innobase_start_or_create_for_mysql(). */ +void +xb_compact_rebuild_indexes(void) +/*=============================*/ +{ + dict_table_t* sys_tables; + dict_index_t* sys_index; + btr_pcur_t pcur; + const rec_t* rec; + mtr_t mtr; + const byte* field; + ulint len; + ulint space_id; + trx_t* trx; + index_rebuild_table_t* rebuild_table; + index_rebuild_thread_t* threads; + ulint i; + + /* Set up the dummy table for the index rebuild error reporting */ + dummy_table_share.fields = 0; + dummy_table.s = &dummy_table_share; + + /* Iterate all tables that are not in the system tablespace and add them + to the list of tables to be rebuilt later. */ + + trx = trx_allocate_for_mysql(); + trx_start_for_ddl(trx, TRX_DICT_OP_INDEX); + + row_mysql_lock_data_dictionary(trx); + + /* Enlarge the fatal lock wait timeout during index rebuild + operation. */ + os_increment_counter_by_amount(server_mutex, + srv_fatal_semaphore_wait_threshold, + 7200); + + mtr_start(&mtr); + + sys_tables = dict_table_get_low("SYS_TABLES"); + sys_index = UT_LIST_GET_FIRST(sys_tables->indexes); + ut_a(!dict_table_is_comp(sys_tables)); + + pthread_mutex_init(&table_list_mutex, NULL); + UT_LIST_INIT(table_list); + + btr_pcur_open_at_index_side(TRUE, sys_index, BTR_SEARCH_LEAF, &pcur, + TRUE, 0, &mtr); + for (;;) { + btr_pcur_move_to_next_user_rec(&pcur, &mtr); + + rec = btr_pcur_get_rec(&pcur); + + if (!btr_pcur_is_on_user_rec(&pcur)) { + /* end of index */ + + break; + } + + if (rec_get_deleted_flag(rec, 0)) { + continue; + } + + field = rec_get_nth_field_old(rec, 9, &len); + ut_a(len == 4); + + space_id = mach_read_from_4(field); + + /* Don't touch tables in the system tablespace */ + if (!fil_is_user_tablespace_id(space_id)) { + + continue; + } + + field = rec_get_nth_field_old(rec, 0, &len); + + rebuild_table = static_cast<index_rebuild_table_t *> + (mem_alloc(sizeof(*rebuild_table))); + rebuild_table->name = mem_strdupl((char*) field, len); + rebuild_table->space_id = space_id; + + UT_LIST_ADD_LAST(list, table_list, rebuild_table); + } + + btr_pcur_close(&pcur); + mtr_commit(&mtr); + + row_mysql_unlock_data_dictionary(trx); + + trx_commit_for_mysql(trx); + + trx_free_for_mysql(trx); + + /* Start worker threads for the index rebuild operation */ + ut_ad(xtrabackup_rebuild_threads > 0); + + if (xtrabackup_rebuild_threads > 1) { + msg("Starting %lu threads to rebuild indexes.\n", + xtrabackup_rebuild_threads); + } + + threads = (index_rebuild_thread_t *) + mem_alloc(sizeof(*threads) * + xtrabackup_rebuild_threads); + + for (i = 0; i < xtrabackup_rebuild_threads; i++) { + + threads[i].num = i+1; + if (pthread_create(&threads[i].id, NULL, + xb_rebuild_indexes_thread_func, + &threads[i])) { + + msg("error: pthread_create() failed: errno = %d\n", + errno); + ut_a(0); + } + } + + /* Wait for worker threads to finish */ + for (i = 0; i < xtrabackup_rebuild_threads; i++) { + pthread_join(threads[i].id, NULL); + } + + mem_free(threads); +} |