summaryrefslogtreecommitdiff
path: root/extra/mariabackup/compact.cc
diff options
context:
space:
mode:
Diffstat (limited to 'extra/mariabackup/compact.cc')
-rw-r--r--extra/mariabackup/compact.cc1059
1 files changed, 1059 insertions, 0 deletions
diff --git a/extra/mariabackup/compact.cc b/extra/mariabackup/compact.cc
new file mode 100644
index 00000000000..5d08a6e02b2
--- /dev/null
+++ b/extra/mariabackup/compact.cc
@@ -0,0 +1,1059 @@
+/******************************************************
+XtraBackup: hot backup tool for InnoDB
+(c) 2009-2014 Percona LLC and/or its affiliates.
+Originally Created 3/3/2009 Yasufumi Kinoshita
+Written by Alexey Kopytov, Aleksandr Kuzminsky, Stewart Smith, Vadim Tkachenko,
+Yasufumi Kinoshita, Ignacio Nin and Baron Schwartz.
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+
+*******************************************************/
+
+/* Compact backups implementation */
+
+#include <my_base.h>
+#include <table.h>
+
+#include <univ.i>
+#include <dict0mem.h>
+#include <dict0priv.h>
+#include <fsp0fsp.h>
+#include <handler0alter.h>
+#include <ibuf0ibuf.h>
+#include <page0page.h>
+#include <row0merge.h>
+#include "common.h"
+#include "write_filt.h"
+#include "fil_cur.h"
+#include "xtrabackup.h"
+#include "ds_buffer.h"
+#include "xb0xb.h"
+
+/* Number of the first primary key page in an .ibd file */
+#define XB_FIRST_CLUSTERED_INDEX_PAGE_NO 3
+
+/* Suffix for page map files */
+#define XB_PAGE_MAP_SUFFIX ".pmap"
+#define XB_TMPFILE_SUFFIX ".tmp"
+
+/* Page range */
+struct page_range_t {
+ ulint from; /*!< range start */
+ ulint to; /*!< range end */
+};
+
+/* Cursor in a page map file */
+struct page_map_cursor_t {
+ File fd; /*!< file descriptor */
+ IO_CACHE cache; /*!< IO_CACHE associated with fd */
+};
+
+/* Table descriptor for the index rebuild operation */
+struct index_rebuild_table_t {
+ char* name; /* table name */
+ ulint space_id; /* space ID */
+ UT_LIST_NODE_T(index_rebuild_table_t) list; /* list node */
+};
+
+/* Thread descriptor for the index rebuild operation */
+struct index_rebuild_thread_t {
+ ulint num; /* thread number */
+ pthread_t id; /* thread ID */
+};
+
+/* Empty page use to replace skipped pages in the data files */
+static byte empty_page[UNIV_PAGE_SIZE_MAX];
+static const char compacted_page_magic[] = "COMPACTP";
+static const size_t compacted_page_magic_size =
+ sizeof(compacted_page_magic) - 1;
+static const ulint compacted_page_magic_offset = FIL_PAGE_DATA;
+
+/* Mutex protecting table_list */
+static pthread_mutex_t table_list_mutex;
+/* List of tablespaces to process by the index rebuild operation */
+static UT_LIST_BASE_NODE_T(index_rebuild_table_t) table_list;
+
+
+/************************************************************************
+Compact page filter. */
+static my_bool wf_compact_init(xb_write_filt_ctxt_t *ctxt, char *dst_name,
+ xb_fil_cur_t *cursor);
+static my_bool wf_compact_process(xb_write_filt_ctxt_t *ctxt,
+ ds_file_t *dstfile);
+static my_bool wf_compact_finalize(xb_write_filt_ctxt_t *ctxt,
+ ds_file_t *dstfile);
+xb_write_filt_t wf_compact = {
+ &wf_compact_init,
+ &wf_compact_process,
+ &wf_compact_finalize,
+ NULL
+};
+
+/************************************************************************
+Initialize the compact page filter.
+
+@return TRUE on success, FALSE on error. */
+static my_bool
+wf_compact_init(xb_write_filt_ctxt_t *ctxt,
+ char *dst_name __attribute__((unused)), xb_fil_cur_t *cursor)
+{
+ xb_wf_compact_ctxt_t *cp = &(ctxt->u.wf_compact_ctxt);
+ char page_map_name[FN_REFLEN];
+ MY_STAT mystat;
+
+ ctxt->cursor = cursor;
+ cp->clustered_index_found = FALSE;
+ cp->inside_skipped_range = FALSE;
+ cp->free_limit = 0;
+
+ /* Don't compact the system table space */
+ cp->skip = cursor->is_system;
+ if (cp->skip) {
+ return(TRUE);
+ }
+
+ snprintf(page_map_name, sizeof(page_map_name), "%s%s", dst_name,
+ XB_PAGE_MAP_SUFFIX);
+
+ cp->ds_buffer = ds_create(xtrabackup_target_dir, DS_TYPE_BUFFER);
+ if (cp->ds_buffer == NULL) {
+ return(FALSE);
+ }
+
+ ds_set_pipe(cp->ds_buffer, ds_meta);
+
+ memset(&mystat, 0, sizeof(mystat));
+ mystat.st_mtime = my_time(0);
+ cp->buffer = ds_open(cp->ds_buffer, page_map_name, &mystat);
+ if (cp->buffer == NULL) {
+ msg("xtrabackup: Error: cannot open output stream for %s\n",
+ page_map_name);
+ return(FALSE);
+ }
+
+ return(TRUE);
+}
+
+/************************************************************************
+Check if the specified page should be skipped. We currently skip all
+non-clustered index pages for compact backups.
+
+@return TRUE if the page should be skipped. */
+static my_bool
+check_if_skip_page(xb_wf_compact_ctxt_t *cp, xb_fil_cur_t *cursor, ulint offset)
+{
+ byte *page;
+ ulint page_no;
+ ulint page_type;
+ index_id_t index_id;
+
+
+ xb_ad(cursor->is_system == FALSE);
+
+ page = cursor->buf + cursor->page_size * offset;
+ page_no = cursor->buf_page_no + offset;
+ page_type = fil_page_get_type(page);
+
+ if (UNIV_UNLIKELY(page_no == 0)) {
+
+ cp->free_limit = mach_read_from_4(page + FSP_HEADER_OFFSET +
+ FSP_FREE_LIMIT);
+ } else if (UNIV_UNLIKELY(page_no == XB_FIRST_CLUSTERED_INDEX_PAGE_NO)) {
+
+ xb_ad(cp->clustered_index_found == FALSE);
+
+ if (page_type != FIL_PAGE_INDEX) {
+
+ /* Uninitialized clustered index root page, there's
+ nothing we can do to compact the space.*/
+
+ msg("[%02u] Uninitialized page type value (%lu) in the "
+ "clustered index root page of tablespace %s. "
+ "Will not be compacted.\n",
+ cursor->thread_n,
+ page_type, cursor->rel_path);
+
+ cp->skip = TRUE;
+
+ return(FALSE);
+ }
+
+ cp->clustered_index =
+ mach_read_from_8(page + PAGE_HEADER + PAGE_INDEX_ID);
+ cp->clustered_index_found = TRUE;
+ } else if (UNIV_UNLIKELY(page_no >= cp->free_limit)) {
+
+ /* Skip unused pages above free limit, if that value is set in
+ the FSP header.*/
+
+ return(cp->free_limit > 0);
+ } else if (cp->clustered_index_found && page_type == FIL_PAGE_INDEX) {
+
+ index_id = mach_read_from_8(page + PAGE_HEADER + PAGE_INDEX_ID);
+ if (index_id != cp->clustered_index) {
+
+ ulint fseg_hdr_space =
+ mach_read_from_4(page + PAGE_HEADER +
+ PAGE_BTR_SEG_TOP);
+ ulint fseg_hdr_page_no =
+ mach_read_from_4(page + PAGE_HEADER +
+ PAGE_BTR_SEG_TOP + 4);
+ ulint fseg_hdr_offset =
+ mach_read_from_2(page + PAGE_HEADER +
+ PAGE_BTR_SEG_TOP + 8);
+
+ /* Don't skip root index pages, i.e. the ones where the
+ above fields are defined. We need root index pages to be
+ able to correctly drop the indexes later, as they
+ contain fseg inode pointers. */
+
+ return(fseg_hdr_space == 0 &&
+ fseg_hdr_page_no == 0 &&
+ fseg_hdr_offset == 0);
+ }
+ }
+
+ return(FALSE);
+}
+
+/************************************************************************
+Run the next batch of pages through the compact page filter.
+
+@return TRUE on success, FALSE on error. */
+static my_bool
+wf_compact_process(xb_write_filt_ctxt_t *ctxt, ds_file_t *dstfile)
+{
+ xb_fil_cur_t *cursor = ctxt->cursor;
+ ulint page_size = cursor->page_size;
+ byte *page;
+ byte *buf_end;
+ byte *write_from;
+ xb_wf_compact_ctxt_t *cp = &(ctxt->u.wf_compact_ctxt);
+ ulint i;
+ ulint page_no;
+ byte tmp[4];
+
+ if (cp->skip) {
+ return(!ds_write(dstfile, cursor->buf, cursor->buf_read));
+ }
+
+ write_from = NULL;
+ buf_end = cursor->buf + cursor->buf_read;
+ for (i = 0, page = cursor->buf; page < buf_end;
+ i++, page += page_size) {
+
+ page_no = cursor->buf_page_no + i;
+
+ if (!check_if_skip_page(cp, cursor, i)) {
+
+ if (write_from == NULL) {
+ write_from = page;
+ }
+
+ if (cp->inside_skipped_range) {
+ cp->inside_skipped_range = FALSE;
+
+ /* Write the last range endpoint to the
+ skipped pages map */
+
+ xb_ad(page_no > 0);
+ mach_write_to_4(tmp, page_no - 1);
+ if (ds_write(cp->buffer, tmp, sizeof(tmp))) {
+ return(FALSE);
+ }
+ }
+ continue;
+ }
+
+ if (write_from != NULL) {
+
+ /* The first skipped page in this block, write the
+ non-skipped ones to the data file */
+
+ if (ds_write(dstfile, write_from, page - write_from)) {
+ return(FALSE);
+ }
+
+ write_from = NULL;
+ }
+
+ if (!cp->inside_skipped_range) {
+
+ /* The first skipped page in range, write the first
+ range endpoint to the skipped pages map */
+
+ cp->inside_skipped_range = TRUE;
+
+ mach_write_to_4(tmp, page_no);
+ if (ds_write(cp->buffer, tmp, sizeof(tmp))) {
+ return(FALSE);
+ }
+ }
+ }
+
+ /* Write the remaining pages in the buffer, if any */
+ if (write_from != NULL &&
+ ds_write(dstfile, write_from, buf_end - write_from)) {
+ return(FALSE);
+ }
+
+ return(TRUE);
+}
+
+/************************************************************************
+Close the compact filter's page map stream.
+
+@return TRUE on success, FALSE on error. */
+static my_bool
+wf_compact_finalize(xb_write_filt_ctxt_t *ctxt,
+ ds_file_t *dstfile __attribute__((unused)))
+{
+ xb_fil_cur_t *cursor = ctxt->cursor;
+ xb_wf_compact_ctxt_t *cp = &(ctxt->u.wf_compact_ctxt);
+ my_bool rc = TRUE;
+
+ /* Write the last endpoint of the current range, if the last pages of
+ the space have been skipped. */
+ if (cp->inside_skipped_range) {
+ byte tmp[4];
+
+ mach_write_to_4(tmp, cursor->space_size - 1);
+ if (ds_write(cp->buffer, tmp, sizeof(tmp))) {
+ return(FALSE);
+ }
+
+ cp->inside_skipped_range = FALSE;
+ }
+
+ if (cp->buffer) {
+ if (ds_close(cp->buffer)) {
+ rc = FALSE;
+ }
+ }
+ if (cp->ds_buffer) {
+ ds_destroy(cp->ds_buffer);
+ }
+
+ return(rc);
+}
+
+/************************************************************************
+Open a page map file and return a cursor.
+
+@return page map cursor, or NULL if the file doesn't exist. */
+static page_map_cursor_t *
+page_map_file_open(const char *path)
+{
+ MY_STAT statinfo;
+ page_map_cursor_t *pmap_cur;
+ int rc;
+
+ if (my_stat(path, &statinfo, MYF(0)) == NULL) {
+
+ return(NULL);
+ }
+
+ /* The maximum possible page map file corresponds to a 64 TB tablespace
+ and the worst case when every other page was skipped. That is, 2^32/2
+ page ranges = 16 GB. */
+ xb_a(statinfo.st_size < (off_t) 16 * 1024 * 1024 * 1024);
+
+ /* Must be a series of 8-byte tuples */
+ xb_a(statinfo.st_size % 8 == 0);
+
+ pmap_cur = (page_map_cursor_t *) my_malloc(sizeof(page_map_cursor_t),
+ MYF(MY_FAE));
+
+ pmap_cur->fd = my_open(path, O_RDONLY, MYF(MY_WME));
+ xb_a(pmap_cur->fd != 0);
+
+ rc = init_io_cache(&pmap_cur->cache, pmap_cur->fd, 0, READ_CACHE,
+ 0, 0, MYF(MY_WME));
+ xb_a(rc == 0);
+
+ return(pmap_cur);
+}
+
+/************************************************************************
+Read the next range from a page map file and update the cursor.
+
+@return TRUE on success, FALSE on end-of-file. */
+static ibool
+page_map_file_next(page_map_cursor_t *pmap_cur, page_range_t *range)
+{
+ byte buf[8];
+
+ xb_ad(pmap_cur != NULL);
+
+ if (my_b_read(&pmap_cur->cache, buf, sizeof(buf))) {
+ return(FALSE);
+ }
+
+ range->from = mach_read_from_4(buf);
+ range->to = mach_read_from_4(buf + 4);
+
+ return(TRUE);
+}
+
+/************************************************************************
+Close the page map cursor.*/
+static void
+page_map_file_close(page_map_cursor_t *pmap_cur)
+{
+ int rc;
+
+ xb_ad(pmap_cur != NULL);
+
+ rc = end_io_cache(&pmap_cur->cache);
+ xb_a(rc == 0);
+
+ posix_fadvise(pmap_cur->fd, 0, 0, POSIX_FADV_DONTNEED);
+
+ rc = my_close(pmap_cur->fd, MY_WME);
+ xb_a(rc == 0);
+
+ my_free(pmap_cur);
+}
+
+/****************************************************************************
+Expand a single data file according to the skipped pages maps created by
+--compact.
+
+@return TRUE on success, FALSE on failure. */
+static my_bool
+xb_expand_file(fil_node_t *node)
+{
+ char pmapfile_path[FN_REFLEN];
+ char tmpfile_path[FN_REFLEN];
+ xb_fil_cur_t cursor;
+ xb_fil_cur_result_t res;
+ ds_ctxt_t *ds_local;
+ ds_ctxt_t *ds_buffer;
+ ds_file_t *tmpfile;
+ my_bool success = FALSE;
+ ulint i;
+ byte *page;
+ ulint page_expected_no;
+ page_map_cursor_t *pmap_cur;
+ ibool have_next_range;
+ page_range_t pmap_range;
+
+ xb_ad(trx_sys_sys_space(node->space->id) == FALSE);
+
+ snprintf(pmapfile_path, sizeof(pmapfile_path), "%s%s",
+ node->name, XB_PAGE_MAP_SUFFIX);
+
+ /* Skip files that don't have a corresponding page map file */
+
+ if (!(pmap_cur = page_map_file_open(pmapfile_path))) {
+
+ msg("Not expanding %s\n", node->name);
+
+ return(FALSE);
+ }
+
+ msg("Expanding %s\n", node->name);
+
+ ds_local = ds_create(".", DS_TYPE_LOCAL);
+ ds_buffer = ds_create(".", DS_TYPE_BUFFER);
+
+ xb_a(ds_local != NULL && ds_buffer != NULL);
+
+ ds_buffer_set_size(ds_buffer, FSP_EXTENT_SIZE * UNIV_PAGE_SIZE_MAX);
+
+ ds_set_pipe(ds_buffer, ds_local);
+
+ res = xb_fil_cur_open(&cursor, &rf_pass_through, node, 1);
+ xb_a(res == XB_FIL_CUR_SUCCESS);
+
+ snprintf(tmpfile_path, sizeof(tmpfile_path), "%s%s",
+ node->name, XB_TMPFILE_SUFFIX);
+
+ tmpfile = ds_open(ds_buffer, tmpfile_path, &cursor.statinfo);
+ if (tmpfile == NULL) {
+
+ msg("Could not open temporary file '%s'\n", tmpfile_path);
+ goto error;
+ }
+
+ have_next_range = page_map_file_next(pmap_cur, &pmap_range);
+
+ page_expected_no = 0;
+
+ /* Initialize and mark the empty page which is used to replace
+ skipped pages. */
+ memset(empty_page, 0, cursor.page_size);
+ memcpy(empty_page + compacted_page_magic_offset,
+ compacted_page_magic, compacted_page_magic_size);
+ mach_write_to_4(empty_page + FIL_PAGE_SPACE_OR_CHKSUM,
+ BUF_NO_CHECKSUM_MAGIC);
+ mach_write_to_4(empty_page + cursor.page_size -
+ FIL_PAGE_END_LSN_OLD_CHKSUM,
+ BUF_NO_CHECKSUM_MAGIC);
+
+
+ /* Main copy loop */
+
+ while ((res = xb_fil_cur_read(&cursor)) == XB_FIL_CUR_SUCCESS) {
+
+ for (i = 0, page = cursor.buf; i < cursor.buf_npages;
+ i++, page += cursor.page_size) {
+
+ ulint page_read_no;
+
+ page_read_no = mach_read_from_4(page + FIL_PAGE_OFFSET);
+ xb_a(!page_read_no || page_expected_no <= page_read_no);
+
+ if (have_next_range &&
+ page_expected_no == pmap_range.from) {
+
+ xb_a(pmap_range.from <= pmap_range.to);
+
+ /* Write empty pages instead of skipped ones, if
+ necessary. */
+
+ while (page_expected_no <= pmap_range.to) {
+
+ if (ds_write(tmpfile, empty_page,
+ cursor.page_size)) {
+
+ goto write_error;
+ }
+
+ page_expected_no++;
+ }
+
+ have_next_range =
+ page_map_file_next(pmap_cur,
+ &pmap_range);
+ }
+
+ /* Write the current page */
+
+ if (ds_write(tmpfile, page, cursor.page_size)) {
+
+ goto write_error;
+ }
+
+ page_expected_no++;
+ }
+ }
+
+ if (res != XB_FIL_CUR_EOF) {
+
+ goto error;
+ }
+
+ /* Write empty pages instead of trailing skipped ones, if any */
+
+ if (have_next_range) {
+
+ xb_a(page_expected_no == pmap_range.from);
+ xb_a(pmap_range.from <= pmap_range.to);
+
+ while (page_expected_no <= pmap_range.to) {
+
+ if (ds_write(tmpfile, empty_page,
+ cursor.page_size)) {
+
+ goto write_error;
+ }
+
+ page_expected_no++;
+ }
+
+ xb_a(!page_map_file_next(pmap_cur, &pmap_range));
+ }
+
+ /* Replace the original .ibd file with the expanded file */
+ if (my_rename(tmpfile_path, node->name, MYF(MY_WME))) {
+
+ msg("Failed to rename '%s' to '%s'\n",
+ tmpfile_path, node->name);
+ goto error;
+ }
+
+ my_delete(pmapfile_path, MYF(MY_WME));
+
+ if (!ds_close(tmpfile)) {
+ success = TRUE;
+ }
+ tmpfile = NULL;
+
+ goto end;
+
+write_error:
+ msg("Write to '%s' failed\n", tmpfile_path);
+
+error:
+ if (tmpfile != NULL) {
+
+ ds_close(tmpfile);
+ my_delete(tmpfile_path, MYF(MY_WME));
+ }
+
+end:
+ ds_destroy(ds_buffer);
+ ds_destroy(ds_local);
+
+ xb_fil_cur_close(&cursor);
+
+ page_map_file_close(pmap_cur);
+
+ return(success);
+}
+
+/******************************************************************************
+Expand the data files according to the skipped pages maps created by --compact.
+@return TRUE on success, FALSE on failure. */
+my_bool
+xb_expand_datafiles(void)
+/*=====================*/
+{
+ ulint nfiles;
+ datafiles_iter_t *it = NULL;
+ fil_node_t *node;
+ fil_space_t *space;
+
+ msg("Starting to expand compacted .ibd files.\n");
+
+ /* Initialize the tablespace cache */
+ if (xb_data_files_init() != DB_SUCCESS) {
+ return(FALSE);
+ }
+
+ nfiles = UT_LIST_GET_LEN(fil_system->space_list);
+ xb_a(nfiles > 0);
+
+ it = datafiles_iter_new(fil_system);
+ if (it == NULL) {
+ msg("xtrabackup: error: datafiles_iter_new() failed.\n");
+ goto error;
+ }
+
+ while ((node = datafiles_iter_next(it)) != NULL) {
+
+ space = node->space;
+
+ /* System tablespace cannot be compacted */
+ if (!fil_is_user_tablespace_id(space->id)) {
+
+ continue;
+ }
+
+ if (!xb_expand_file(node)) {
+
+ goto error;
+ }
+ }
+
+ datafiles_iter_free(it);
+ xb_data_files_close();
+
+ return(TRUE);
+
+error:
+ if (it != NULL) {
+ datafiles_iter_free(it);
+ }
+
+ xb_data_files_close();
+
+ return(FALSE);
+}
+
+/******************************************************************************
+Callback used in buf_page_io_complete() to detect compacted pages.
+@return TRUE if the page is marked as compacted, FALSE otherwise. */
+ibool
+buf_page_is_compacted(
+/*==================*/
+ const byte* page) /*!< in: a database page */
+{
+ return !memcmp(page + compacted_page_magic_offset,
+ compacted_page_magic, compacted_page_magic_size);
+}
+
+/*****************************************************************************
+Builds an index definition corresponding to an index object. It is roughly
+similar to innobase_create_index_def() / innobase_create_index_field_def() and
+the opposite to dict_mem_index_create() / dict_mem_index_add_field(). */
+static
+void
+xb_build_index_def(
+/*=======================*/
+ mem_heap_t* heap, /*!< in: heap */
+ const dict_index_t* index, /*!< in: index */
+ index_def_t* index_def) /*!< out: index definition */
+{
+ index_field_t* fields;
+ ulint n_fields;
+ ulint i;
+
+ ut_a(index->n_fields);
+ ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
+
+ /* Use n_user_defined_cols instead of n_fields, as the index will
+ contain a part of the primary key after n_user_defined_cols, and those
+ columns will be created automatically in
+ dict_index_build_internal_clust(). */
+ n_fields = index->n_user_defined_cols;
+
+ memset(index_def, 0, sizeof(*index_def));
+
+ index_def->name = mem_heap_strdup(heap, index->name);
+ index_def->ind_type = index->type;
+
+ fields = static_cast<index_field_t *>
+ (mem_heap_alloc(heap, n_fields * sizeof(*fields)));
+
+ for (i = 0; i < n_fields; i++) {
+ dict_field_t* field;
+
+ field = dict_index_get_nth_field(index, i);
+ fields[i].col_no = dict_col_get_no(field->col);
+ fields[i].prefix_len = field->prefix_len;
+ }
+
+ index_def->fields = fields;
+ index_def->n_fields = n_fields;
+}
+
+/* A dummy autoc_inc sequence for row_merge_build_indexes(). */
+static ib_sequence_t null_seq(NULL, 0, 0);
+/* A dummy table share and table for row_merge_build_indexes() error reporting.
+Assumes that no errors are going to be reported. */
+static struct TABLE_SHARE dummy_table_share;
+static struct TABLE dummy_table;
+
+/********************************************************************//**
+Rebuild secondary indexes for a given table. */
+static
+void
+xb_rebuild_indexes_for_table(
+/*=========================*/
+ dict_table_t* table, /*!< in: table */
+ trx_t* trx, /*!< in: transaction handle */
+ ulint thread_n) /*!< in: thread number */
+{
+ dict_index_t* index;
+ dict_index_t** indexes;
+ ulint n_indexes;
+ index_def_t* index_defs;
+ ulint i;
+ mem_heap_t* heap;
+ ulint error;
+ ulint* add_key_nums;
+
+ ut_ad(!mutex_own(&(dict_sys->mutex)));
+ ut_ad(table);
+
+ ut_a(UT_LIST_GET_LEN(table->indexes) > 0);
+
+ n_indexes = UT_LIST_GET_LEN(table->indexes) - 1;
+ if (!n_indexes) {
+ /* Only the primary key, nothing to do. */
+ return;
+ }
+
+ heap = mem_heap_create(1024);
+
+ indexes = (dict_index_t**) mem_heap_alloc(heap,
+ n_indexes * sizeof(*indexes));
+ index_defs = (index_def_t*) mem_heap_alloc(heap, n_indexes *
+ sizeof(*index_defs));
+ add_key_nums = static_cast<ulint *>
+ (mem_heap_alloc(heap, n_indexes * sizeof(*add_key_nums)));
+
+ /* Skip the primary key. */
+ index = dict_table_get_first_index(table);
+ ut_a(dict_index_is_clust(index));
+
+ row_mysql_lock_data_dictionary(trx);
+
+ for (i = 0; (index = dict_table_get_next_index(index)); i++) {
+
+ msg("[%02lu] Found index %s\n", thread_n, index->name);
+
+ /* Pretend that it's the current trx that created this index.
+ Required to avoid 5.6+ debug assertions. */
+ index->trx_id = trx->id;
+
+ xb_build_index_def(heap, index, &index_defs[i]);
+
+ /* In 5.6+, row_merge_drop_indexes() drops all the indexes on
+ the table that have the temp index prefix. It does not accept
+ an array of indexes to drop as in 5.5-. */
+ row_merge_rename_index_to_drop(trx, table->id, index->id);
+ }
+
+ ut_ad(i == n_indexes);
+
+ row_merge_drop_indexes(trx, table, TRUE);
+
+ index = dict_table_get_first_index(table);
+ ut_a(dict_index_is_clust(index));
+ index = dict_table_get_next_index(index);
+ while (index) {
+
+ /* In 5.6+, row_merge_drop_indexes() does not remove the
+ indexes from the dictionary cache nor from any foreign key
+ list. This may cause invalid dereferences as we try to access
+ the dropped indexes from other tables as FKs. */
+
+ dict_index_t* next_index = dict_table_get_next_index(index);
+ index->to_be_dropped = 1;
+
+ /* Patch up any FK referencing this index with NULL */
+ dict_foreign_replace_index(table, NULL, index);
+
+ dict_index_remove_from_cache(table, index);
+
+ index = next_index;
+ }
+
+ msg("[%02lu] Rebuilding %lu index(es).\n", thread_n, n_indexes);
+
+ error = row_merge_lock_table(trx, table, LOCK_X);
+ xb_a(error == DB_SUCCESS);
+
+ for (i = 0; i < n_indexes; i++) {
+ indexes[i] = row_merge_create_index(trx, table,
+ &index_defs[i]);
+ add_key_nums[i] = index_defs[i].key_number;
+ }
+
+ /* Commit trx to release latches on system tables */
+ trx_commit_for_mysql(trx);
+ trx_start_for_ddl(trx, TRX_DICT_OP_INDEX);
+
+ row_mysql_unlock_data_dictionary(trx);
+
+ /* Reacquire table lock for row_merge_build_indexes() */
+ error = row_merge_lock_table(trx, table, LOCK_X);
+ xb_a(error == DB_SUCCESS);
+
+ error = row_merge_build_indexes(trx, table, table, FALSE, indexes,
+ add_key_nums, n_indexes, &dummy_table,
+ NULL, NULL, ULINT_UNDEFINED, null_seq);
+ ut_a(error == DB_SUCCESS);
+
+ mem_heap_free(heap);
+
+ trx_commit_for_mysql(trx);
+
+ trx_start_for_ddl(trx, TRX_DICT_OP_INDEX);
+}
+
+/**************************************************************************
+Worker thread function for index rebuild. */
+static
+void *
+xb_rebuild_indexes_thread_func(
+/*===========================*/
+ void* arg) /* thread context */
+{
+ dict_table_t* table;
+ index_rebuild_table_t* rebuild_table;
+ index_rebuild_thread_t* thread;
+ trx_t* trx;
+
+ thread = (index_rebuild_thread_t *) arg;
+
+ trx = trx_allocate_for_mysql();
+
+ /* Suppress foreign key checks, as we are going to drop and recreate all
+ secondary keys. */
+ trx->check_foreigns = FALSE;
+ trx_start_for_ddl(trx, TRX_DICT_OP_INDEX);
+
+ /* Loop until there are no more tables in tables list */
+ for (;;) {
+ pthread_mutex_lock(&table_list_mutex);
+
+ rebuild_table = UT_LIST_GET_FIRST(table_list);
+
+ if (rebuild_table == NULL) {
+
+ pthread_mutex_unlock(&table_list_mutex);
+ break;
+ }
+
+ UT_LIST_REMOVE(list, table_list, rebuild_table);
+
+ pthread_mutex_unlock(&table_list_mutex);
+
+ ut_ad(rebuild_table->name);
+ ut_ad(fil_is_user_tablespace_id(rebuild_table->space_id));
+
+ row_mysql_lock_data_dictionary(trx);
+
+ table = dict_table_get_low(rebuild_table->name);
+
+ ut_d(table->n_ref_count++);
+
+ row_mysql_unlock_data_dictionary(trx);
+
+ ut_a(table != NULL);
+ ut_a(table->space == rebuild_table->space_id);
+
+ /* Discard change buffer entries for this space */
+ ibuf_delete_for_discarded_space(rebuild_table->space_id);
+
+ msg("[%02lu] Checking if there are indexes to rebuild in table "
+ "%s (space id: %lu)\n",
+ thread->num,
+ rebuild_table->name, rebuild_table->space_id);
+
+ xb_rebuild_indexes_for_table(table, trx, thread->num);
+
+ ut_d(table->n_ref_count--);
+
+ mem_free(rebuild_table->name);
+ mem_free(rebuild_table);
+ }
+
+ trx_commit_for_mysql(trx);
+
+ trx_free_for_mysql(trx);
+
+ return(NULL);
+}
+
+/******************************************************************************
+Rebuild all secondary indexes in all tables in separate spaces. Called from
+innobase_start_or_create_for_mysql(). */
+void
+xb_compact_rebuild_indexes(void)
+/*=============================*/
+{
+ dict_table_t* sys_tables;
+ dict_index_t* sys_index;
+ btr_pcur_t pcur;
+ const rec_t* rec;
+ mtr_t mtr;
+ const byte* field;
+ ulint len;
+ ulint space_id;
+ trx_t* trx;
+ index_rebuild_table_t* rebuild_table;
+ index_rebuild_thread_t* threads;
+ ulint i;
+
+ /* Set up the dummy table for the index rebuild error reporting */
+ dummy_table_share.fields = 0;
+ dummy_table.s = &dummy_table_share;
+
+ /* Iterate all tables that are not in the system tablespace and add them
+ to the list of tables to be rebuilt later. */
+
+ trx = trx_allocate_for_mysql();
+ trx_start_for_ddl(trx, TRX_DICT_OP_INDEX);
+
+ row_mysql_lock_data_dictionary(trx);
+
+ /* Enlarge the fatal lock wait timeout during index rebuild
+ operation. */
+ os_increment_counter_by_amount(server_mutex,
+ srv_fatal_semaphore_wait_threshold,
+ 7200);
+
+ mtr_start(&mtr);
+
+ sys_tables = dict_table_get_low("SYS_TABLES");
+ sys_index = UT_LIST_GET_FIRST(sys_tables->indexes);
+ ut_a(!dict_table_is_comp(sys_tables));
+
+ pthread_mutex_init(&table_list_mutex, NULL);
+ UT_LIST_INIT(table_list);
+
+ btr_pcur_open_at_index_side(TRUE, sys_index, BTR_SEARCH_LEAF, &pcur,
+ TRUE, 0, &mtr);
+ for (;;) {
+ btr_pcur_move_to_next_user_rec(&pcur, &mtr);
+
+ rec = btr_pcur_get_rec(&pcur);
+
+ if (!btr_pcur_is_on_user_rec(&pcur)) {
+ /* end of index */
+
+ break;
+ }
+
+ if (rec_get_deleted_flag(rec, 0)) {
+ continue;
+ }
+
+ field = rec_get_nth_field_old(rec, 9, &len);
+ ut_a(len == 4);
+
+ space_id = mach_read_from_4(field);
+
+ /* Don't touch tables in the system tablespace */
+ if (!fil_is_user_tablespace_id(space_id)) {
+
+ continue;
+ }
+
+ field = rec_get_nth_field_old(rec, 0, &len);
+
+ rebuild_table = static_cast<index_rebuild_table_t *>
+ (mem_alloc(sizeof(*rebuild_table)));
+ rebuild_table->name = mem_strdupl((char*) field, len);
+ rebuild_table->space_id = space_id;
+
+ UT_LIST_ADD_LAST(list, table_list, rebuild_table);
+ }
+
+ btr_pcur_close(&pcur);
+ mtr_commit(&mtr);
+
+ row_mysql_unlock_data_dictionary(trx);
+
+ trx_commit_for_mysql(trx);
+
+ trx_free_for_mysql(trx);
+
+ /* Start worker threads for the index rebuild operation */
+ ut_ad(xtrabackup_rebuild_threads > 0);
+
+ if (xtrabackup_rebuild_threads > 1) {
+ msg("Starting %lu threads to rebuild indexes.\n",
+ xtrabackup_rebuild_threads);
+ }
+
+ threads = (index_rebuild_thread_t *)
+ mem_alloc(sizeof(*threads) *
+ xtrabackup_rebuild_threads);
+
+ for (i = 0; i < xtrabackup_rebuild_threads; i++) {
+
+ threads[i].num = i+1;
+ if (pthread_create(&threads[i].id, NULL,
+ xb_rebuild_indexes_thread_func,
+ &threads[i])) {
+
+ msg("error: pthread_create() failed: errno = %d\n",
+ errno);
+ ut_a(0);
+ }
+ }
+
+ /* Wait for worker threads to finish */
+ for (i = 0; i < xtrabackup_rebuild_threads; i++) {
+ pthread_join(threads[i].id, NULL);
+ }
+
+ mem_free(threads);
+}