summaryrefslogtreecommitdiff
path: root/storage/xtradb/row/row0merge.c
diff options
context:
space:
mode:
Diffstat (limited to 'storage/xtradb/row/row0merge.c')
-rw-r--r--storage/xtradb/row/row0merge.c2358
1 files changed, 2358 insertions, 0 deletions
diff --git a/storage/xtradb/row/row0merge.c b/storage/xtradb/row/row0merge.c
new file mode 100644
index 00000000000..efed3d26e5b
--- /dev/null
+++ b/storage/xtradb/row/row0merge.c
@@ -0,0 +1,2358 @@
+/*****************************************************************************
+
+Copyright (c) 2005, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/******************************************************
+New index creation routines using a merge sort
+
+Created 12/4/2005 Jan Lindstrom
+Completed by Sunny Bains and Marko Makela
+*******************************************************/
+
+#include "row0merge.h"
+#include "row0ext.h"
+#include "row0row.h"
+#include "row0upd.h"
+#include "row0ins.h"
+#include "row0sel.h"
+#include "dict0dict.h"
+#include "dict0mem.h"
+#include "dict0boot.h"
+#include "dict0crea.h"
+#include "dict0load.h"
+#include "btr0btr.h"
+#include "mach0data.h"
+#include "trx0rseg.h"
+#include "trx0trx.h"
+#include "trx0roll.h"
+#include "trx0undo.h"
+#include "trx0purge.h"
+#include "trx0rec.h"
+#include "que0que.h"
+#include "rem0cmp.h"
+#include "read0read.h"
+#include "os0file.h"
+#include "lock0lock.h"
+#include "data0data.h"
+#include "data0type.h"
+#include "que0que.h"
+#include "pars0pars.h"
+#include "mem0mem.h"
+#include "log0log.h"
+#include "ut0sort.h"
+#include "handler0alter.h"
+
+#ifdef UNIV_DEBUG
+/* Set these in order ot enable debug printout. */
+static ibool row_merge_print_cmp;
+static ibool row_merge_print_read;
+static ibool row_merge_print_write;
+#endif /* UNIV_DEBUG */
+
+/* Block size for I/O operations in merge sort. The minimum is
+UNIV_PAGE_SIZE, or page_get_free_space_of_empty() rounded to a power of 2.
+
+When not creating a PRIMARY KEY that contains column prefixes, this
+can be set as small as UNIV_PAGE_SIZE / 2. See the comment above
+ut_ad(data_size < sizeof(row_merge_block_t)). */
+
+typedef byte row_merge_block_t[1048576];
+
+/* Secondary buffer for I/O operations of merge records. This buffer
+is used for writing or reading a record that spans two row_merge_block_t.
+Thus, it must be able to hold one merge record, whose maximum size is
+the same as the minimum size of row_merge_block_t. */
+
+typedef byte mrec_buf_t[UNIV_PAGE_SIZE];
+
+/* Merge record in row_merge_block_t. The format is the same as a
+record in ROW_FORMAT=COMPACT with the exception that the
+REC_N_NEW_EXTRA_BYTES are omitted. */
+typedef byte mrec_t;
+
+/* Buffer for sorting in main memory. */
+struct row_merge_buf_struct {
+ mem_heap_t* heap; /* memory heap where allocated */
+ dict_index_t* index; /* the index the tuples belong to */
+ ulint total_size; /* total amount of data bytes */
+ ulint n_tuples; /* number of data tuples */
+ ulint max_tuples; /* maximum number of data tuples */
+ const dfield_t**tuples; /* array of pointers to
+ arrays of fields that form
+ the data tuples */
+ const dfield_t**tmp_tuples; /* temporary copy of tuples,
+ for sorting */
+};
+
+typedef struct row_merge_buf_struct row_merge_buf_t;
+
+/* Information about temporary files used in merge sort are stored
+to this structure */
+
+struct merge_file_struct {
+ int fd; /* File descriptor */
+ ulint offset; /* File offset */
+};
+
+typedef struct merge_file_struct merge_file_t;
+
+#ifdef UNIV_DEBUG
+/**********************************************************
+Display a merge tuple. */
+static
+void
+row_merge_tuple_print(
+/*==================*/
+ FILE* f, /* in: output stream */
+ const dfield_t* entry, /* in: tuple to print */
+ ulint n_fields)/* in: number of fields in the tuple */
+{
+ ulint j;
+
+ for (j = 0; j < n_fields; j++) {
+ const dfield_t* field = &entry[j];
+
+ if (dfield_is_null(field)) {
+ fputs("\n NULL;", f);
+ } else {
+ ulint field_len = dfield_get_len(field);
+ ulint len = ut_min(field_len, 20);
+ if (dfield_is_ext(field)) {
+ fputs("\nE", f);
+ } else {
+ fputs("\n ", f);
+ }
+ ut_print_buf(f, dfield_get_data(field), len);
+ if (len != field_len) {
+ fprintf(f, " (total %lu bytes)", field_len);
+ }
+ }
+ }
+ putc('\n', f);
+}
+#endif /* UNIV_DEBUG */
+
+/**********************************************************
+Allocate a sort buffer. */
+static
+row_merge_buf_t*
+row_merge_buf_create_low(
+/*=====================*/
+ /* out,own: sort buffer */
+ mem_heap_t* heap, /* in: heap where allocated */
+ dict_index_t* index, /* in: secondary index */
+ ulint max_tuples, /* in: maximum number of data tuples */
+ ulint buf_size) /* in: size of the buffer, in bytes */
+{
+ row_merge_buf_t* buf;
+
+ ut_ad(max_tuples > 0);
+ ut_ad(max_tuples <= sizeof(row_merge_block_t));
+ ut_ad(max_tuples < buf_size);
+
+ buf = mem_heap_zalloc(heap, buf_size);
+ buf->heap = heap;
+ buf->index = index;
+ buf->max_tuples = max_tuples;
+ buf->tuples = mem_heap_alloc(heap,
+ 2 * max_tuples * sizeof *buf->tuples);
+ buf->tmp_tuples = buf->tuples + max_tuples;
+
+ return(buf);
+}
+
+/**********************************************************
+Allocate a sort buffer. */
+static
+row_merge_buf_t*
+row_merge_buf_create(
+/*=================*/
+ /* out,own: sort buffer */
+ dict_index_t* index) /* in: secondary index */
+{
+ row_merge_buf_t* buf;
+ ulint max_tuples;
+ ulint buf_size;
+ mem_heap_t* heap;
+
+ max_tuples = sizeof(row_merge_block_t)
+ / ut_max(1, dict_index_get_min_size(index));
+
+ buf_size = (sizeof *buf) + (max_tuples - 1) * sizeof *buf->tuples;
+
+ heap = mem_heap_create(buf_size + sizeof(row_merge_block_t));
+
+ buf = row_merge_buf_create_low(heap, index, max_tuples, buf_size);
+
+ return(buf);
+}
+
+/**********************************************************
+Empty a sort buffer. */
+static
+row_merge_buf_t*
+row_merge_buf_empty(
+/*================*/
+ /* out: sort buffer */
+ row_merge_buf_t* buf) /* in,own: sort buffer */
+{
+ ulint buf_size;
+ ulint max_tuples = buf->max_tuples;
+ mem_heap_t* heap = buf->heap;
+ dict_index_t* index = buf->index;
+
+ buf_size = (sizeof *buf) + (max_tuples - 1) * sizeof *buf->tuples;
+
+ mem_heap_empty(heap);
+
+ return(row_merge_buf_create_low(heap, index, max_tuples, buf_size));
+}
+
+/**********************************************************
+Deallocate a sort buffer. */
+static
+void
+row_merge_buf_free(
+/*===============*/
+ row_merge_buf_t* buf) /* in,own: sort buffer, to be freed */
+{
+ mem_heap_free(buf->heap);
+}
+
+/**********************************************************
+Insert a data tuple into a sort buffer. */
+static
+ibool
+row_merge_buf_add(
+/*==============*/
+ /* out: TRUE if added,
+ FALSE if out of space */
+ row_merge_buf_t* buf, /* in/out: sort buffer */
+ const dtuple_t* row, /* in: row in clustered index */
+ const row_ext_t* ext) /* in: cache of externally stored
+ column prefixes, or NULL */
+{
+ ulint i;
+ ulint n_fields;
+ ulint data_size;
+ ulint extra_size;
+ const dict_index_t* index;
+ dfield_t* entry;
+ dfield_t* field;
+
+ if (buf->n_tuples >= buf->max_tuples) {
+ return(FALSE);
+ }
+
+ UNIV_PREFETCH_R(row->fields);
+
+ index = buf->index;
+
+ n_fields = dict_index_get_n_fields(index);
+
+ entry = mem_heap_alloc(buf->heap, n_fields * sizeof *entry);
+ buf->tuples[buf->n_tuples] = entry;
+ field = entry;
+
+ data_size = 0;
+ extra_size = UT_BITS_IN_BYTES(index->n_nullable);
+
+ for (i = 0; i < n_fields; i++, field++) {
+ const dict_field_t* ifield;
+ const dict_col_t* col;
+ ulint col_no;
+ const dfield_t* row_field;
+ ulint len;
+
+ ifield = dict_index_get_nth_field(index, i);
+ col = ifield->col;
+ col_no = dict_col_get_no(col);
+ row_field = dtuple_get_nth_field(row, col_no);
+ dfield_copy(field, row_field);
+ len = dfield_get_len(field);
+
+ if (dfield_is_null(field)) {
+ ut_ad(!(col->prtype & DATA_NOT_NULL));
+ continue;
+ } else if (UNIV_LIKELY(!ext)) {
+ } else if (dict_index_is_clust(index)) {
+ /* Flag externally stored fields. */
+ const byte* buf = row_ext_lookup(ext, col_no,
+ &len);
+ if (UNIV_LIKELY_NULL(buf)) {
+ ut_a(buf != field_ref_zero);
+ if (i < dict_index_get_n_unique(index)) {
+ dfield_set_data(field, buf, len);
+ } else {
+ dfield_set_ext(field);
+ len = dfield_get_len(field);
+ }
+ }
+ } else {
+ const byte* buf = row_ext_lookup(ext, col_no,
+ &len);
+ if (UNIV_LIKELY_NULL(buf)) {
+ ut_a(buf != field_ref_zero);
+ dfield_set_data(field, buf, len);
+ }
+ }
+
+ /* If a column prefix index, take only the prefix */
+
+ if (ifield->prefix_len) {
+ len = dtype_get_at_most_n_mbchars(
+ col->prtype,
+ col->mbminlen, col->mbmaxlen,
+ ifield->prefix_len,
+ len, dfield_get_data(field));
+ dfield_set_len(field, len);
+ }
+
+ ut_ad(len <= col->len || col->mtype == DATA_BLOB);
+
+ if (ifield->fixed_len) {
+ ut_ad(len == ifield->fixed_len);
+ ut_ad(!dfield_is_ext(field));
+ } else if (dfield_is_ext(field)) {
+ extra_size += 2;
+ } else if (len < 128
+ || (col->len < 256 && col->mtype != DATA_BLOB)) {
+ extra_size++;
+ } else {
+ /* For variable-length columns, we look up the
+ maximum length from the column itself. If this
+ is a prefix index column shorter than 256 bytes,
+ this will waste one byte. */
+ extra_size += 2;
+ }
+ data_size += len;
+ }
+
+#ifdef UNIV_DEBUG
+ {
+ ulint size;
+ ulint extra;
+
+ size = rec_get_converted_size_comp(index,
+ REC_STATUS_ORDINARY,
+ entry, n_fields, &extra);
+
+ ut_ad(data_size + extra_size + REC_N_NEW_EXTRA_BYTES == size);
+ ut_ad(extra_size + REC_N_NEW_EXTRA_BYTES == extra);
+ }
+#endif /* UNIV_DEBUG */
+
+ /* Add to the total size of the record in row_merge_block_t
+ the encoded length of extra_size and the extra bytes (extra_size).
+ See row_merge_buf_write() for the variable-length encoding
+ of extra_size. */
+ data_size += (extra_size + 1) + ((extra_size + 1) >= 0x80);
+
+ /* The following assertion may fail if row_merge_block_t is
+ declared very small and a PRIMARY KEY is being created with
+ many prefix columns. In that case, the record may exceed the
+ page_zip_rec_needs_ext() limit. However, no further columns
+ will be moved to external storage until the record is inserted
+ to the clustered index B-tree. */
+ ut_ad(data_size < sizeof(row_merge_block_t));
+
+ /* Reserve one byte for the end marker of row_merge_block_t. */
+ if (buf->total_size + data_size >= sizeof(row_merge_block_t) - 1) {
+ return(FALSE);
+ }
+
+ buf->total_size += data_size;
+ buf->n_tuples++;
+
+ field = entry;
+
+ /* Copy the data fields. */
+
+ do {
+ dfield_dup(field++, buf->heap);
+ } while (--n_fields);
+
+ return(TRUE);
+}
+
+/* Structure for reporting duplicate records. */
+struct row_merge_dup_struct {
+ const dict_index_t* index; /* index being sorted */
+ TABLE* table; /* MySQL table object */
+ ulint n_dup; /* number of duplicates */
+};
+
+typedef struct row_merge_dup_struct row_merge_dup_t;
+
+/*****************************************************************
+Report a duplicate key. */
+static
+void
+row_merge_dup_report(
+/*=================*/
+ row_merge_dup_t* dup, /* in/out: for reporting duplicates */
+ const dfield_t* entry) /* in: duplicate index entry */
+{
+ mrec_buf_t buf;
+ const dtuple_t* tuple;
+ dtuple_t tuple_store;
+ const rec_t* rec;
+ const dict_index_t* index = dup->index;
+ ulint n_fields= dict_index_get_n_fields(index);
+ mem_heap_t* heap = NULL;
+ ulint offsets_[REC_OFFS_NORMAL_SIZE];
+ ulint* offsets;
+ ulint n_ext;
+
+ if (dup->n_dup++) {
+ /* Only report the first duplicate record,
+ but count all duplicate records. */
+ return;
+ }
+
+ rec_offs_init(offsets_);
+
+ /* Convert the tuple to a record and then to MySQL format. */
+
+ tuple = dtuple_from_fields(&tuple_store, entry, n_fields);
+ n_ext = dict_index_is_clust(index) ? dtuple_get_n_ext(tuple) : 0;
+
+ rec = rec_convert_dtuple_to_rec(buf, index, tuple, n_ext);
+ offsets = rec_get_offsets(rec, index, offsets_, ULINT_UNDEFINED,
+ &heap);
+
+ innobase_rec_to_mysql(dup->table, rec, index, offsets);
+
+ if (UNIV_LIKELY_NULL(heap)) {
+ mem_heap_free(heap);
+ }
+}
+
+/*****************************************************************
+Compare two tuples. */
+static
+int
+row_merge_tuple_cmp(
+/*================*/
+ /* out: 1, 0, -1 if a is greater,
+ equal, less, respectively, than b */
+ ulint n_field,/* in: number of fields */
+ const dfield_t* a, /* in: first tuple to be compared */
+ const dfield_t* b, /* in: second tuple to be compared */
+ row_merge_dup_t* dup) /* in/out: for reporting duplicates */
+{
+ int cmp;
+ const dfield_t* field = a;
+
+ /* Compare the fields of the tuples until a difference is
+ found or we run out of fields to compare. If !cmp at the
+ end, the tuples are equal. */
+ do {
+ cmp = cmp_dfield_dfield(a++, b++);
+ } while (!cmp && --n_field);
+
+ if (UNIV_UNLIKELY(!cmp) && UNIV_LIKELY_NULL(dup)) {
+ /* Report a duplicate value error if the tuples are
+ logically equal. NULL columns are logically inequal,
+ although they are equal in the sorting order. Find
+ out if any of the fields are NULL. */
+ for (b = field; b != a; b++) {
+ if (dfield_is_null(b)) {
+
+ goto func_exit;
+ }
+ }
+
+ row_merge_dup_report(dup, field);
+ }
+
+func_exit:
+ return(cmp);
+}
+
+/**************************************************************************
+Merge sort the tuple buffer in main memory. */
+static
+void
+row_merge_tuple_sort(
+/*=================*/
+ ulint n_field,/* in: number of fields */
+ row_merge_dup_t* dup, /* in/out: for reporting duplicates */
+ const dfield_t** tuples, /* in/out: tuples */
+ const dfield_t** aux, /* in/out: work area */
+ ulint low, /* in: lower bound of the
+ sorting area, inclusive */
+ ulint high) /* in: upper bound of the
+ sorting area, exclusive */
+{
+#define row_merge_tuple_sort_ctx(a,b,c,d) \
+ row_merge_tuple_sort(n_field, dup, a, b, c, d)
+#define row_merge_tuple_cmp_ctx(a,b) row_merge_tuple_cmp(n_field, a, b, dup)
+
+ UT_SORT_FUNCTION_BODY(row_merge_tuple_sort_ctx,
+ tuples, aux, low, high, row_merge_tuple_cmp_ctx);
+}
+
+/**********************************************************
+Sort a buffer. */
+static
+void
+row_merge_buf_sort(
+/*===============*/
+ row_merge_buf_t* buf, /* in/out: sort buffer */
+ row_merge_dup_t* dup) /* in/out: for reporting duplicates */
+{
+ row_merge_tuple_sort(dict_index_get_n_unique(buf->index), dup,
+ buf->tuples, buf->tmp_tuples, 0, buf->n_tuples);
+}
+
+/**********************************************************
+Write a buffer to a block. */
+static
+void
+row_merge_buf_write(
+/*================*/
+ const row_merge_buf_t* buf, /* in: sorted buffer */
+#ifdef UNIV_DEBUG
+ const merge_file_t* of, /* in: output file */
+#endif /* UNIV_DEBUG */
+ row_merge_block_t* block) /* out: buffer for writing to file */
+#ifndef UNIV_DEBUG
+# define row_merge_buf_write(buf, of, block) row_merge_buf_write(buf, block)
+#endif /* !UNIV_DEBUG */
+{
+ const dict_index_t* index = buf->index;
+ ulint n_fields= dict_index_get_n_fields(index);
+ byte* b = &(*block)[0];
+
+ ulint i;
+
+ for (i = 0; i < buf->n_tuples; i++) {
+ ulint size;
+ ulint extra_size;
+ const dfield_t* entry = buf->tuples[i];
+
+ size = rec_get_converted_size_comp(index,
+ REC_STATUS_ORDINARY,
+ entry, n_fields,
+ &extra_size);
+ ut_ad(size > extra_size);
+ ut_ad(extra_size >= REC_N_NEW_EXTRA_BYTES);
+ extra_size -= REC_N_NEW_EXTRA_BYTES;
+ size -= REC_N_NEW_EXTRA_BYTES;
+
+ /* Encode extra_size + 1 */
+ if (extra_size + 1 < 0x80) {
+ *b++ = (byte) (extra_size + 1);
+ } else {
+ ut_ad((extra_size + 1) < 0x8000);
+ *b++ = (byte) (0x80 | ((extra_size + 1) >> 8));
+ *b++ = (byte) (extra_size + 1);
+ }
+
+ ut_ad(b + size < block[1]);
+
+ rec_convert_dtuple_to_rec_comp(b + extra_size, 0, index,
+ REC_STATUS_ORDINARY,
+ entry, n_fields);
+
+ b += size;
+
+#ifdef UNIV_DEBUG
+ if (row_merge_print_write) {
+ fprintf(stderr, "row_merge_buf_write %p,%d,%lu %lu",
+ (void*) b, of->fd, (ulong) of->offset,
+ (ulong) i);
+ row_merge_tuple_print(stderr, entry, n_fields);
+ }
+#endif /* UNIV_DEBUG */
+ }
+
+ /* Write an "end-of-chunk" marker. */
+ ut_a(b < block[1]);
+ ut_a(b == block[0] + buf->total_size);
+ *b++ = 0;
+#ifdef UNIV_DEBUG_VALGRIND
+ /* The rest of the block is uninitialized. Initialize it
+ to avoid bogus warnings. */
+ memset(b, 0xff, block[1] - b);
+#endif /* UNIV_DEBUG_VALGRIND */
+#ifdef UNIV_DEBUG
+ if (row_merge_print_write) {
+ fprintf(stderr, "row_merge_buf_write %p,%d,%lu EOF\n",
+ (void*) b, of->fd, (ulong) of->offset);
+ }
+#endif /* UNIV_DEBUG */
+}
+
+/**********************************************************
+Create a memory heap and allocate space for row_merge_rec_offsets(). */
+static
+mem_heap_t*
+row_merge_heap_create(
+/*==================*/
+ /* out: memory heap */
+ const dict_index_t* index, /* in: record descriptor */
+ ulint** offsets1, /* out: offsets */
+ ulint** offsets2) /* out: offsets */
+{
+ ulint i = 1 + REC_OFFS_HEADER_SIZE
+ + dict_index_get_n_fields(index);
+ mem_heap_t* heap = mem_heap_create(2 * i * sizeof *offsets1);
+
+ *offsets1 = mem_heap_alloc(heap, i * sizeof *offsets1);
+ *offsets2 = mem_heap_alloc(heap, i * sizeof *offsets2);
+
+ (*offsets1)[0] = (*offsets2)[0] = i;
+ (*offsets1)[1] = (*offsets2)[1] = dict_index_get_n_fields(index);
+
+ return(heap);
+}
+
+/**************************************************************************
+Search an index object by name and column names. If several indexes match,
+return the index with the max id. */
+static
+dict_index_t*
+row_merge_dict_table_get_index(
+/*===========================*/
+ /* out: matching index,
+ NULL if not found */
+ dict_table_t* table, /* in: table */
+ const merge_index_def_t*index_def) /* in: index definition */
+{
+ ulint i;
+ dict_index_t* index;
+ const char** column_names;
+
+ column_names = mem_alloc(index_def->n_fields * sizeof *column_names);
+
+ for (i = 0; i < index_def->n_fields; ++i) {
+ column_names[i] = index_def->fields[i].field_name;
+ }
+
+ index = dict_table_get_index_by_max_id(
+ table, index_def->name, column_names, index_def->n_fields);
+
+ mem_free((void*) column_names);
+
+ return(index);
+}
+
+/************************************************************************
+Read a merge block from the file system. */
+static
+ibool
+row_merge_read(
+/*===========*/
+ /* out: TRUE if request was
+ successful, FALSE if fail */
+ int fd, /* in: file descriptor */
+ ulint offset, /* in: offset where to read */
+ row_merge_block_t* buf) /* out: data */
+{
+ ib_uint64_t ofs = ((ib_uint64_t) offset) * sizeof *buf;
+ ibool success;
+
+ success = os_file_read_no_error_handling(OS_FILE_FROM_FD(fd), buf,
+ (ulint) (ofs & 0xFFFFFFFF),
+ (ulint) (ofs >> 32),
+ sizeof *buf);
+ if (UNIV_UNLIKELY(!success)) {
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+ " InnoDB: failed to read merge block at %llu\n", ofs);
+ }
+
+ return(UNIV_LIKELY(success));
+}
+
+/************************************************************************
+Read a merge block from the file system. */
+static
+ibool
+row_merge_write(
+/*============*/
+ /* out: TRUE if request was
+ successful, FALSE if fail */
+ int fd, /* in: file descriptor */
+ ulint offset, /* in: offset where to write */
+ const void* buf) /* in: data */
+{
+ ib_uint64_t ofs = ((ib_uint64_t) offset)
+ * sizeof(row_merge_block_t);
+
+ return(UNIV_LIKELY(os_file_write("(merge)", OS_FILE_FROM_FD(fd), buf,
+ (ulint) (ofs & 0xFFFFFFFF),
+ (ulint) (ofs >> 32),
+ sizeof(row_merge_block_t))));
+}
+
+/************************************************************************
+Read a merge record. */
+static
+const byte*
+row_merge_read_rec(
+/*===============*/
+ /* out: pointer to next record,
+ or NULL on I/O error
+ or end of list */
+ row_merge_block_t* block, /* in/out: file buffer */
+ mrec_buf_t* buf, /* in/out: secondary buffer */
+ const byte* b, /* in: pointer to record */
+ const dict_index_t* index, /* in: index of the record */
+ int fd, /* in: file descriptor */
+ ulint* foffs, /* in/out: file offset */
+ const mrec_t** mrec, /* out: pointer to merge record,
+ or NULL on end of list
+ (non-NULL on I/O error) */
+ ulint* offsets)/* out: offsets of mrec */
+{
+ ulint extra_size;
+ ulint data_size;
+ ulint avail_size;
+
+ ut_ad(block);
+ ut_ad(buf);
+ ut_ad(b >= block[0]);
+ ut_ad(b < block[1]);
+ ut_ad(index);
+ ut_ad(foffs);
+ ut_ad(mrec);
+ ut_ad(offsets);
+
+ ut_ad(*offsets == 1 + REC_OFFS_HEADER_SIZE
+ + dict_index_get_n_fields(index));
+
+ extra_size = *b++;
+
+ if (UNIV_UNLIKELY(!extra_size)) {
+ /* End of list */
+ *mrec = NULL;
+#ifdef UNIV_DEBUG
+ if (row_merge_print_read) {
+ fprintf(stderr, "row_merge_read %p,%p,%d,%lu EOF\n",
+ (const void*) b, (const void*) block,
+ fd, (ulong) *foffs);
+ }
+#endif /* UNIV_DEBUG */
+ return(NULL);
+ }
+
+ if (extra_size >= 0x80) {
+ /* Read another byte of extra_size. */
+
+ if (UNIV_UNLIKELY(b >= block[1])) {
+ if (!row_merge_read(fd, ++(*foffs), block)) {
+err_exit:
+ /* Signal I/O error. */
+ *mrec = b;
+ return(NULL);
+ }
+
+ /* Wrap around to the beginning of the buffer. */
+ b = block[0];
+ }
+
+ extra_size = (extra_size & 0x7f) << 8;
+ extra_size |= *b++;
+ }
+
+ /* Normalize extra_size. Above, value 0 signals "end of list". */
+ extra_size--;
+
+ /* Read the extra bytes. */
+
+ if (UNIV_UNLIKELY(b + extra_size >= block[1])) {
+ /* The record spans two blocks. Copy the entire record
+ to the auxiliary buffer and handle this as a special
+ case. */
+
+ avail_size = block[1] - b;
+
+ memcpy(*buf, b, avail_size);
+
+ if (!row_merge_read(fd, ++(*foffs), block)) {
+
+ goto err_exit;
+ }
+
+ /* Wrap around to the beginning of the buffer. */
+ b = block[0];
+
+ /* Copy the record. */
+ memcpy(*buf + avail_size, b, extra_size - avail_size);
+ b += extra_size - avail_size;
+
+ *mrec = *buf + extra_size;
+
+ rec_init_offsets_comp_ordinary(*mrec, 0, index, offsets);
+
+ data_size = rec_offs_data_size(offsets);
+
+ /* These overflows should be impossible given that
+ records are much smaller than either buffer, and
+ the record starts near the beginning of each buffer. */
+ ut_a(extra_size + data_size < sizeof *buf);
+ ut_a(b + data_size < block[1]);
+
+ /* Copy the data bytes. */
+ memcpy(*buf + extra_size, b, data_size);
+ b += data_size;
+
+ goto func_exit;
+ }
+
+ *mrec = b + extra_size;
+
+ rec_init_offsets_comp_ordinary(*mrec, 0, index, offsets);
+
+ data_size = rec_offs_data_size(offsets);
+ ut_ad(extra_size + data_size < sizeof *buf);
+
+ b += extra_size + data_size;
+
+ if (UNIV_LIKELY(b < block[1])) {
+ /* The record fits entirely in the block.
+ This is the normal case. */
+ goto func_exit;
+ }
+
+ /* The record spans two blocks. Copy it to buf. */
+
+ b -= extra_size + data_size;
+ avail_size = block[1] - b;
+ memcpy(*buf, b, avail_size);
+ *mrec = *buf + extra_size;
+ rec_offs_make_valid(*mrec, index, offsets);
+
+ if (!row_merge_read(fd, ++(*foffs), block)) {
+
+ goto err_exit;
+ }
+
+ /* Wrap around to the beginning of the buffer. */
+ b = block[0];
+
+ /* Copy the rest of the record. */
+ memcpy(*buf + avail_size, b, extra_size + data_size - avail_size);
+ b += extra_size + data_size - avail_size;
+
+func_exit:
+#ifdef UNIV_DEBUG
+ if (row_merge_print_read) {
+ fprintf(stderr, "row_merge_read %p,%p,%d,%lu ",
+ (const void*) b, (const void*) block,
+ fd, (ulong) *foffs);
+ rec_print_comp(stderr, *mrec, offsets);
+ putc('\n', stderr);
+ }
+#endif /* UNIV_DEBUG */
+
+ return(b);
+}
+
+/************************************************************************
+Write a merge record. */
+static
+void
+row_merge_write_rec_low(
+/*====================*/
+ byte* b, /* out: buffer */
+ ulint e, /* in: encoded extra_size */
+#ifdef UNIV_DEBUG
+ ulint size, /* in: total size to write */
+ int fd, /* in: file descriptor */
+ ulint foffs, /* in: file offset */
+#endif /* UNIV_DEBUG */
+ const mrec_t* mrec, /* in: record to write */
+ const ulint* offsets)/* in: offsets of mrec */
+#ifndef UNIV_DEBUG
+# define row_merge_write_rec_low(b, e, size, fd, foffs, mrec, offsets) \
+ row_merge_write_rec_low(b, e, mrec, offsets)
+#endif /* !UNIV_DEBUG */
+{
+#ifdef UNIV_DEBUG
+ const byte* const end = b + size;
+ ut_ad(e == rec_offs_extra_size(offsets) + 1);
+
+ if (row_merge_print_write) {
+ fprintf(stderr, "row_merge_write %p,%d,%lu ",
+ (void*) b, fd, (ulong) foffs);
+ rec_print_comp(stderr, mrec, offsets);
+ putc('\n', stderr);
+ }
+#endif /* UNIV_DEBUG */
+
+ if (e < 0x80) {
+ *b++ = (byte) e;
+ } else {
+ *b++ = (byte) (0x80 | (e >> 8));
+ *b++ = (byte) e;
+ }
+
+ memcpy(b, mrec - rec_offs_extra_size(offsets), rec_offs_size(offsets));
+ ut_ad(b + rec_offs_size(offsets) == end);
+}
+
+/************************************************************************
+Write a merge record. */
+static
+byte*
+row_merge_write_rec(
+/*================*/
+ /* out: pointer to end of block,
+ or NULL on error */
+ row_merge_block_t* block, /* in/out: file buffer */
+ mrec_buf_t* buf, /* in/out: secondary buffer */
+ byte* b, /* in: pointer to end of block */
+ int fd, /* in: file descriptor */
+ ulint* foffs, /* in/out: file offset */
+ const mrec_t* mrec, /* in: record to write */
+ const ulint* offsets)/* in: offsets of mrec */
+{
+ ulint extra_size;
+ ulint size;
+ ulint avail_size;
+
+ ut_ad(block);
+ ut_ad(buf);
+ ut_ad(b >= block[0]);
+ ut_ad(b < block[1]);
+ ut_ad(mrec);
+ ut_ad(foffs);
+ ut_ad(mrec < block[0] || mrec > block[1]);
+ ut_ad(mrec < buf[0] || mrec > buf[1]);
+
+ /* Normalize extra_size. Value 0 signals "end of list". */
+ extra_size = rec_offs_extra_size(offsets) + 1;
+
+ size = extra_size + (extra_size >= 0x80)
+ + rec_offs_data_size(offsets);
+
+ if (UNIV_UNLIKELY(b + size >= block[1])) {
+ /* The record spans two blocks.
+ Copy it to the temporary buffer first. */
+ avail_size = block[1] - b;
+
+ row_merge_write_rec_low(buf[0],
+ extra_size, size, fd, *foffs,
+ mrec, offsets);
+
+ /* Copy the head of the temporary buffer, write
+ the completed block, and copy the tail of the
+ record to the head of the new block. */
+ memcpy(b, buf[0], avail_size);
+
+ if (!row_merge_write(fd, (*foffs)++, block)) {
+ return(NULL);
+ }
+
+ UNIV_MEM_INVALID(block[0], sizeof block[0]);
+
+ /* Copy the rest. */
+ b = block[0];
+ memcpy(b, buf[0] + avail_size, size - avail_size);
+ b += size - avail_size;
+ } else {
+ row_merge_write_rec_low(b, extra_size, size, fd, *foffs,
+ mrec, offsets);
+ b += size;
+ }
+
+ return(b);
+}
+
+/************************************************************************
+Write an end-of-list marker. */
+static
+byte*
+row_merge_write_eof(
+/*================*/
+ /* out: pointer to end of block,
+ or NULL on error */
+ row_merge_block_t* block, /* in/out: file buffer */
+ byte* b, /* in: pointer to end of block */
+ int fd, /* in: file descriptor */
+ ulint* foffs) /* in/out: file offset */
+{
+ ut_ad(block);
+ ut_ad(b >= block[0]);
+ ut_ad(b < block[1]);
+ ut_ad(foffs);
+#ifdef UNIV_DEBUG
+ if (row_merge_print_write) {
+ fprintf(stderr, "row_merge_write %p,%p,%d,%lu EOF\n",
+ (void*) b, (void*) block, fd, (ulong) *foffs);
+ }
+#endif /* UNIV_DEBUG */
+
+ *b++ = 0;
+ UNIV_MEM_ASSERT_RW(block[0], b - block[0]);
+ UNIV_MEM_ASSERT_W(block[0], sizeof block[0]);
+#ifdef UNIV_DEBUG_VALGRIND
+ /* The rest of the block is uninitialized. Initialize it
+ to avoid bogus warnings. */
+ memset(b, 0xff, block[1] - b);
+#endif /* UNIV_DEBUG_VALGRIND */
+
+ if (!row_merge_write(fd, (*foffs)++, block)) {
+ return(NULL);
+ }
+
+ UNIV_MEM_INVALID(block[0], sizeof block[0]);
+ return(block[0]);
+}
+
+/*****************************************************************
+Compare two merge records. */
+static
+int
+row_merge_cmp(
+/*==========*/
+ /* out: 1, 0, -1 if
+ mrec1 is greater, equal, less,
+ respectively, than mrec2 */
+ const mrec_t* mrec1, /* in: first merge
+ record to be compared */
+ const mrec_t* mrec2, /* in: second merge
+ record to be compared */
+ const ulint* offsets1, /* in: first record offsets */
+ const ulint* offsets2, /* in: second record offsets */
+ const dict_index_t* index) /* in: index */
+{
+ int cmp;
+
+ cmp = cmp_rec_rec_simple(mrec1, mrec2, offsets1, offsets2, index);
+
+#ifdef UNIV_DEBUG
+ if (row_merge_print_cmp) {
+ fputs("row_merge_cmp1 ", stderr);
+ rec_print_comp(stderr, mrec1, offsets1);
+ fputs("\nrow_merge_cmp2 ", stderr);
+ rec_print_comp(stderr, mrec2, offsets2);
+ fprintf(stderr, "\nrow_merge_cmp=%d\n", cmp);
+ }
+#endif /* UNIV_DEBUG */
+
+ return(cmp);
+}
+
+/************************************************************************
+Reads clustered index of the table and create temporary files
+containing the index entries for the indexes to be built. */
+static
+ulint
+row_merge_read_clustered_index(
+/*===========================*/
+ /* out: DB_SUCCESS or error */
+ trx_t* trx, /* in: transaction */
+ TABLE* table, /* in/out: MySQL table object,
+ for reporting erroneous records */
+ const dict_table_t* old_table,/* in: table where rows are
+ read from */
+ const dict_table_t* new_table,/* in: table where indexes are
+ created; identical to old_table
+ unless creating a PRIMARY KEY */
+ dict_index_t** index, /* in: indexes to be created */
+ merge_file_t* files, /* in: temporary files */
+ ulint n_index,/* in: number of indexes to create */
+ row_merge_block_t* block) /* in/out: file buffer */
+{
+ dict_index_t* clust_index; /* Clustered index */
+ mem_heap_t* row_heap; /* Heap memory to create
+ clustered index records */
+ row_merge_buf_t** merge_buf; /* Temporary list for records*/
+ btr_pcur_t pcur; /* Persistent cursor on the
+ clustered index */
+ mtr_t mtr; /* Mini transaction */
+ ulint err = DB_SUCCESS;/* Return code */
+ ulint i;
+ ulint n_nonnull = 0; /* number of columns
+ changed to NOT NULL */
+ ulint* nonnull = NULL; /* NOT NULL columns */
+
+ trx->op_info = "reading clustered index";
+
+ ut_ad(trx);
+ ut_ad(old_table);
+ ut_ad(new_table);
+ ut_ad(index);
+ ut_ad(files);
+
+ /* Create and initialize memory for record buffers */
+
+ merge_buf = mem_alloc(n_index * sizeof *merge_buf);
+
+ for (i = 0; i < n_index; i++) {
+ merge_buf[i] = row_merge_buf_create(index[i]);
+ }
+
+ mtr_start(&mtr);
+
+ /* Find the clustered index and create a persistent cursor
+ based on that. */
+
+ clust_index = dict_table_get_first_index(old_table);
+
+ btr_pcur_open_at_index_side(
+ TRUE, clust_index, BTR_SEARCH_LEAF, &pcur, TRUE, &mtr);
+
+ if (UNIV_UNLIKELY(old_table != new_table)) {
+ ulint n_cols = dict_table_get_n_cols(old_table);
+
+ /* A primary key will be created. Identify the
+ columns that were flagged NOT NULL in the new table,
+ so that we can quickly check that the records in the
+ (old) clustered index do not violate the added NOT
+ NULL constraints. */
+
+ ut_a(n_cols == dict_table_get_n_cols(new_table));
+
+ nonnull = mem_alloc(n_cols * sizeof *nonnull);
+
+ for (i = 0; i < n_cols; i++) {
+ if (dict_table_get_nth_col(old_table, i)->prtype
+ & DATA_NOT_NULL) {
+
+ continue;
+ }
+
+ if (dict_table_get_nth_col(new_table, i)->prtype
+ & DATA_NOT_NULL) {
+
+ nonnull[n_nonnull++] = i;
+ }
+ }
+
+ if (!n_nonnull) {
+ mem_free(nonnull);
+ nonnull = NULL;
+ }
+ }
+
+ row_heap = mem_heap_create(sizeof(mrec_buf_t));
+
+ /* Scan the clustered index. */
+ for (;;) {
+ const rec_t* rec;
+ ulint* offsets;
+ dtuple_t* row = NULL;
+ row_ext_t* ext;
+ ibool has_next = TRUE;
+
+ btr_pcur_move_to_next_on_page(&pcur);
+
+ /* When switching pages, commit the mini-transaction
+ in order to release the latch on the old page. */
+
+ if (btr_pcur_is_after_last_on_page(&pcur)) {
+ btr_pcur_store_position(&pcur, &mtr);
+ mtr_commit(&mtr);
+ mtr_start(&mtr);
+ btr_pcur_restore_position(BTR_SEARCH_LEAF,
+ &pcur, &mtr);
+ has_next = btr_pcur_move_to_next_user_rec(&pcur, &mtr);
+ }
+
+ if (UNIV_LIKELY(has_next)) {
+ rec = btr_pcur_get_rec(&pcur);
+ offsets = rec_get_offsets(rec, clust_index, NULL,
+ ULINT_UNDEFINED, &row_heap);
+
+ /* Skip delete marked records. */
+ if (rec_get_deleted_flag(
+ rec, dict_table_is_comp(old_table))) {
+ continue;
+ }
+
+ srv_n_rows_inserted++;
+
+ /* Build a row based on the clustered index. */
+
+ row = row_build(ROW_COPY_POINTERS, clust_index,
+ rec, offsets,
+ new_table, &ext, row_heap);
+
+ if (UNIV_LIKELY_NULL(nonnull)) {
+ for (i = 0; i < n_nonnull; i++) {
+ dfield_t* field
+ = &row->fields[nonnull[i]];
+ dtype_t* field_type
+ = dfield_get_type(field);
+
+ ut_a(!(field_type->prtype
+ & DATA_NOT_NULL));
+
+ if (dfield_is_null(field)) {
+ err = DB_PRIMARY_KEY_IS_NULL;
+ i = 0;
+ goto err_exit;
+ }
+
+ field_type->prtype |= DATA_NOT_NULL;
+ }
+ }
+ }
+
+ /* Build all entries for all the indexes to be created
+ in a single scan of the clustered index. */
+
+ for (i = 0; i < n_index; i++) {
+ row_merge_buf_t* buf = merge_buf[i];
+ merge_file_t* file = &files[i];
+ const dict_index_t* index = buf->index;
+
+ if (UNIV_LIKELY
+ (row && row_merge_buf_add(buf, row, ext))) {
+ continue;
+ }
+
+ /* The buffer must be sufficiently large
+ to hold at least one record. */
+ ut_ad(buf->n_tuples || !has_next);
+
+ /* We have enough data tuples to form a block.
+ Sort them and write to disk. */
+
+ if (buf->n_tuples) {
+ if (dict_index_is_unique(index)) {
+ row_merge_dup_t dup;
+ dup.index = buf->index;
+ dup.table = table;
+ dup.n_dup = 0;
+
+ row_merge_buf_sort(buf, &dup);
+
+ if (dup.n_dup) {
+ err = DB_DUPLICATE_KEY;
+err_exit:
+ trx->error_key_num = i;
+ goto func_exit;
+ }
+ } else {
+ row_merge_buf_sort(buf, NULL);
+ }
+ }
+
+ row_merge_buf_write(buf, file, block);
+
+ if (!row_merge_write(file->fd, file->offset++,
+ block)) {
+ err = DB_OUT_OF_FILE_SPACE;
+ goto err_exit;
+ }
+
+ UNIV_MEM_INVALID(block[0], sizeof block[0]);
+ merge_buf[i] = row_merge_buf_empty(buf);
+
+ /* Try writing the record again, now that
+ the buffer has been written out and emptied. */
+
+ if (UNIV_UNLIKELY
+ (row && !row_merge_buf_add(buf, row, ext))) {
+ /* An empty buffer should have enough
+ room for at least one record. */
+ ut_error;
+ }
+ }
+
+ mem_heap_empty(row_heap);
+
+ if (UNIV_UNLIKELY(!has_next)) {
+ goto func_exit;
+ }
+ }
+
+func_exit:
+ btr_pcur_close(&pcur);
+ mtr_commit(&mtr);
+ mem_heap_free(row_heap);
+
+ if (UNIV_LIKELY_NULL(nonnull)) {
+ mem_free(nonnull);
+ }
+
+ for (i = 0; i < n_index; i++) {
+ row_merge_buf_free(merge_buf[i]);
+ }
+
+ mem_free(merge_buf);
+
+ trx->op_info = "";
+
+ return(err);
+}
+
+/*****************************************************************
+Merge two blocks of linked lists on disk and write a bigger block. */
+static
+ulint
+row_merge_blocks(
+/*=============*/
+ /* out: DB_SUCCESS or error code */
+ const dict_index_t* index, /* in: index being created */
+ merge_file_t* file, /* in/out: file containing
+ index entries */
+ row_merge_block_t* block, /* in/out: 3 buffers */
+ ulint* foffs0, /* in/out: offset of first
+ source list in the file */
+ ulint* foffs1, /* in/out: offset of second
+ source list in the file */
+ merge_file_t* of, /* in/out: output file */
+ TABLE* table) /* in/out: MySQL table, for
+ reporting erroneous key value
+ if applicable */
+{
+ mem_heap_t* heap; /* memory heap for offsets0, offsets1 */
+
+ mrec_buf_t buf[3]; /* buffer for handling split mrec in block[] */
+ const byte* b0; /* pointer to block[0] */
+ const byte* b1; /* pointer to block[1] */
+ byte* b2; /* pointer to block[2] */
+ const mrec_t* mrec0; /* merge rec, points to block[0] or buf[0] */
+ const mrec_t* mrec1; /* merge rec, points to block[1] or buf[1] */
+ ulint* offsets0;/* offsets of mrec0 */
+ ulint* offsets1;/* offsets of mrec1 */
+
+ heap = row_merge_heap_create(index, &offsets0, &offsets1);
+
+ /* Write a record and read the next record. Split the output
+ file in two halves, which can be merged on the following pass. */
+#define ROW_MERGE_WRITE_GET_NEXT(N, AT_END) \
+ do { \
+ b2 = row_merge_write_rec(&block[2], &buf[2], b2, \
+ of->fd, &of->offset, \
+ mrec##N, offsets##N); \
+ if (UNIV_UNLIKELY(!b2)) { \
+ goto corrupt; \
+ } \
+ b##N = row_merge_read_rec(&block[N], &buf[N], \
+ b##N, index, \
+ file->fd, foffs##N, \
+ &mrec##N, offsets##N); \
+ if (UNIV_UNLIKELY(!b##N)) { \
+ if (mrec##N) { \
+ goto corrupt; \
+ } \
+ AT_END; \
+ } \
+ } while (0)
+
+ if (!row_merge_read(file->fd, *foffs0, &block[0])
+ || !row_merge_read(file->fd, *foffs1, &block[1])) {
+corrupt:
+ mem_heap_free(heap);
+ return(DB_CORRUPTION);
+ }
+
+ b0 = block[0];
+ b1 = block[1];
+ b2 = block[2];
+
+ b0 = row_merge_read_rec(&block[0], &buf[0], b0, index, file->fd,
+ foffs0, &mrec0, offsets0);
+ b1 = row_merge_read_rec(&block[1], &buf[1], b1, index, file->fd,
+ foffs1, &mrec1, offsets1);
+ if (UNIV_UNLIKELY(!b0 && mrec0)
+ || UNIV_UNLIKELY(!b1 && mrec1)) {
+
+ goto corrupt;
+ }
+
+ while (mrec0 && mrec1) {
+ switch (row_merge_cmp(mrec0, mrec1,
+ offsets0, offsets1, index)) {
+ case 0:
+ if (UNIV_UNLIKELY
+ (dict_index_is_unique(index))) {
+ innobase_rec_to_mysql(table, mrec0,
+ index, offsets0);
+ mem_heap_free(heap);
+ return(DB_DUPLICATE_KEY);
+ }
+ /* fall through */
+ case -1:
+ ROW_MERGE_WRITE_GET_NEXT(0, goto merged);
+ break;
+ case 1:
+ ROW_MERGE_WRITE_GET_NEXT(1, goto merged);
+ break;
+ default:
+ ut_error;
+ }
+
+ }
+
+merged:
+ if (mrec0) {
+ /* append all mrec0 to output */
+ for (;;) {
+ ROW_MERGE_WRITE_GET_NEXT(0, goto done0);
+ }
+ }
+done0:
+ if (mrec1) {
+ /* append all mrec1 to output */
+ for (;;) {
+ ROW_MERGE_WRITE_GET_NEXT(1, goto done1);
+ }
+ }
+done1:
+
+ mem_heap_free(heap);
+ b2 = row_merge_write_eof(&block[2], b2, of->fd, &of->offset);
+ return(b2 ? DB_SUCCESS : DB_CORRUPTION);
+}
+
+/*****************************************************************
+Merge disk files. */
+static
+ulint
+row_merge(
+/*======*/
+ /* out: DB_SUCCESS or error code */
+ const dict_index_t* index, /* in: index being created */
+ merge_file_t* file, /* in/out: file containing
+ index entries */
+ ulint half, /* in: half the file */
+ row_merge_block_t* block, /* in/out: 3 buffers */
+ int* tmpfd, /* in/out: temporary file handle */
+ TABLE* table) /* in/out: MySQL table, for
+ reporting erroneous key value
+ if applicable */
+{
+ ulint foffs0; /* first input offset */
+ ulint foffs1; /* second input offset */
+ ulint error; /* error code */
+ merge_file_t of; /* output file */
+
+ UNIV_MEM_ASSERT_W(block[0], 3 * sizeof block[0]);
+ ut_ad(half > 0);
+
+ of.fd = *tmpfd;
+ of.offset = 0;
+
+ /* Merge blocks to the output file. */
+ foffs0 = 0;
+ foffs1 = half;
+
+ for (; foffs0 < half && foffs1 < file->offset; foffs0++, foffs1++) {
+ error = row_merge_blocks(index, file, block,
+ &foffs0, &foffs1, &of, table);
+
+ if (error != DB_SUCCESS) {
+ return(error);
+ }
+ }
+
+ /* Copy the last block, if there is one. */
+ while (foffs0 < half) {
+ if (!row_merge_read(file->fd, foffs0++, block)
+ || !row_merge_write(of.fd, of.offset++, block)) {
+ return(DB_CORRUPTION);
+ }
+ }
+ while (foffs1 < file->offset) {
+ if (!row_merge_read(file->fd, foffs1++, block)
+ || !row_merge_write(of.fd, of.offset++, block)) {
+ return(DB_CORRUPTION);
+ }
+ }
+
+ /* Swap file descriptors for the next pass. */
+ *tmpfd = file->fd;
+ *file = of;
+
+ UNIV_MEM_INVALID(block[0], 3 * sizeof block[0]);
+
+ return(DB_SUCCESS);
+}
+
+/*****************************************************************
+Merge disk files. */
+static
+ulint
+row_merge_sort(
+/*===========*/
+ /* out: DB_SUCCESS or error code */
+ const dict_index_t* index, /* in: index being created */
+ merge_file_t* file, /* in/out: file containing
+ index entries */
+ row_merge_block_t* block, /* in/out: 3 buffers */
+ int* tmpfd, /* in/out: temporary file handle */
+ TABLE* table) /* in/out: MySQL table, for
+ reporting erroneous key value
+ if applicable */
+{
+ ulint blksz; /* block size */
+
+ for (blksz = 1; blksz < file->offset; blksz *= 2) {
+ ulint half;
+ ulint error;
+
+ ut_ad(ut_is_2pow(blksz));
+ half = ut_2pow_round((file->offset + (blksz - 1)) / 2, blksz);
+ error = row_merge(index, file, half, block, tmpfd, table);
+
+ if (error != DB_SUCCESS) {
+ return(error);
+ }
+ }
+
+ return(DB_SUCCESS);
+}
+
+/*****************************************************************
+Copy externally stored columns to the data tuple. */
+static
+void
+row_merge_copy_blobs(
+/*=================*/
+ const mrec_t* mrec, /* in: merge record */
+ const ulint* offsets,/* in: offsets of mrec */
+ ulint zip_size,/* in: compressed page size in bytes, or 0 */
+ dtuple_t* tuple, /* in/out: data tuple */
+ mem_heap_t* heap) /* in/out: memory heap */
+{
+ ulint i;
+ ulint n_fields = dtuple_get_n_fields(tuple);
+
+ for (i = 0; i < n_fields; i++) {
+ ulint len;
+ const void* data;
+ dfield_t* field = dtuple_get_nth_field(tuple, i);
+
+ if (!dfield_is_ext(field)) {
+ continue;
+ }
+
+ ut_ad(!dfield_is_null(field));
+
+ /* The table is locked during index creation.
+ Therefore, externally stored columns cannot possibly
+ be freed between the time the BLOB pointers are read
+ (row_merge_read_clustered_index()) and dereferenced
+ (below). */
+ data = btr_rec_copy_externally_stored_field(
+ mrec, offsets, zip_size, i, &len, heap);
+
+ dfield_set_data(field, data, len);
+ }
+}
+
+/************************************************************************
+Read sorted file containing index data tuples and insert these data
+tuples to the index */
+static
+ulint
+row_merge_insert_index_tuples(
+/*==========================*/
+ /* out: DB_SUCCESS or error number */
+ trx_t* trx, /* in: transaction */
+ dict_index_t* index, /* in: index */
+ dict_table_t* table, /* in: new table */
+ ulint zip_size,/* in: compressed page size of
+ the old table, or 0 if uncompressed */
+ int fd, /* in: file descriptor */
+ row_merge_block_t* block) /* in/out: file buffer */
+{
+ mrec_buf_t buf;
+ const byte* b;
+ que_thr_t* thr;
+ ins_node_t* node;
+ mem_heap_t* tuple_heap;
+ mem_heap_t* graph_heap;
+ ulint error = DB_SUCCESS;
+ ulint foffs = 0;
+ ulint* offsets;
+
+ ut_ad(trx);
+ ut_ad(index);
+ ut_ad(table);
+
+ /* We use the insert query graph as the dummy graph
+ needed in the row module call */
+
+ trx->op_info = "inserting index entries";
+
+ graph_heap = mem_heap_create(500);
+ node = ins_node_create(INS_DIRECT, table, graph_heap);
+
+ thr = pars_complete_graph_for_exec(node, trx, graph_heap);
+
+ que_thr_move_to_run_state_for_mysql(thr, trx);
+
+ tuple_heap = mem_heap_create(1000);
+
+ {
+ ulint i = 1 + REC_OFFS_HEADER_SIZE
+ + dict_index_get_n_fields(index);
+ offsets = mem_heap_alloc(graph_heap, i * sizeof *offsets);
+ offsets[0] = i;
+ offsets[1] = dict_index_get_n_fields(index);
+ }
+
+ b = *block;
+
+ if (!row_merge_read(fd, foffs, block)) {
+ error = DB_CORRUPTION;
+ } else {
+ for (;;) {
+ const mrec_t* mrec;
+ dtuple_t* dtuple;
+ ulint n_ext;
+
+ b = row_merge_read_rec(block, &buf, b, index,
+ fd, &foffs, &mrec, offsets);
+ if (UNIV_UNLIKELY(!b)) {
+ /* End of list, or I/O error */
+ if (mrec) {
+ error = DB_CORRUPTION;
+ }
+ break;
+ }
+
+ dtuple = row_rec_to_index_entry_low(
+ mrec, index, offsets, &n_ext, tuple_heap);
+
+ if (UNIV_UNLIKELY(n_ext)) {
+ row_merge_copy_blobs(mrec, offsets, zip_size,
+ dtuple, tuple_heap);
+ }
+
+ node->row = dtuple;
+ node->table = table;
+ node->trx_id = trx->id;
+
+ ut_ad(dtuple_validate(dtuple));
+
+ do {
+ thr->run_node = thr;
+ thr->prev_node = thr->common.parent;
+
+ error = row_ins_index_entry(index, dtuple,
+ 0, FALSE, thr);
+
+ if (UNIV_LIKELY(error == DB_SUCCESS)) {
+
+ goto next_rec;
+ }
+
+ thr->lock_state = QUE_THR_LOCK_ROW;
+ trx->error_state = error;
+ que_thr_stop_for_mysql(thr);
+ thr->lock_state = QUE_THR_LOCK_NOLOCK;
+ } while (row_mysql_handle_errors(&error, trx,
+ thr, NULL));
+
+ goto err_exit;
+next_rec:
+ mem_heap_empty(tuple_heap);
+ }
+ }
+
+ que_thr_stop_for_mysql_no_error(thr, trx);
+err_exit:
+ que_graph_free(thr->graph);
+
+ trx->op_info = "";
+
+ mem_heap_free(tuple_heap);
+
+ return(error);
+}
+
+/*************************************************************************
+Sets an exclusive lock on a table, for the duration of creating indexes. */
+UNIV_INTERN
+ulint
+row_merge_lock_table(
+/*=================*/
+ /* out: error code or DB_SUCCESS */
+ trx_t* trx, /* in/out: transaction */
+ dict_table_t* table, /* in: table to lock */
+ enum lock_mode mode) /* in: LOCK_X or LOCK_S */
+{
+ mem_heap_t* heap;
+ que_thr_t* thr;
+ ulint err;
+ sel_node_t* node;
+
+ ut_ad(trx);
+ ut_ad(trx->mysql_thread_id == os_thread_get_curr_id());
+ ut_ad(mode == LOCK_X || mode == LOCK_S);
+
+ heap = mem_heap_create(512);
+
+ trx->op_info = "setting table lock for creating or dropping index";
+
+ node = sel_node_create(heap);
+ thr = pars_complete_graph_for_exec(node, trx, heap);
+ thr->graph->state = QUE_FORK_ACTIVE;
+
+ /* We use the select query graph as the dummy graph needed
+ in the lock module call */
+
+ thr = que_fork_get_first_thr(que_node_get_parent(thr));
+ que_thr_move_to_run_state_for_mysql(thr, trx);
+
+run_again:
+ thr->run_node = thr;
+ thr->prev_node = thr->common.parent;
+
+ err = lock_table(0, table, mode, thr);
+
+ trx->error_state = err;
+
+ if (UNIV_LIKELY(err == DB_SUCCESS)) {
+ que_thr_stop_for_mysql_no_error(thr, trx);
+ } else {
+ que_thr_stop_for_mysql(thr);
+
+ if (err != DB_QUE_THR_SUSPENDED) {
+ ibool was_lock_wait;
+
+ was_lock_wait = row_mysql_handle_errors(
+ &err, trx, thr, NULL);
+
+ if (was_lock_wait) {
+ goto run_again;
+ }
+ } else {
+ que_thr_t* run_thr;
+ que_node_t* parent;
+
+ parent = que_node_get_parent(thr);
+ run_thr = que_fork_start_command(parent);
+
+ ut_a(run_thr == thr);
+
+ /* There was a lock wait but the thread was not
+ in a ready to run or running state. */
+ trx->error_state = DB_LOCK_WAIT;
+
+ goto run_again;
+ }
+ }
+
+ que_graph_free(thr->graph);
+ trx->op_info = "";
+
+ return(err);
+}
+
+/*************************************************************************
+Drop an index from the InnoDB system tables. The data dictionary must
+have been locked exclusively by the caller, because the transaction
+will not be committed. */
+UNIV_INTERN
+void
+row_merge_drop_index(
+/*=================*/
+ dict_index_t* index, /* in: index to be removed */
+ dict_table_t* table, /* in: table */
+ trx_t* trx) /* in: transaction handle */
+{
+ ulint err;
+ pars_info_t* info = pars_info_create();
+
+ /* We use the private SQL parser of Innobase to generate the
+ query graphs needed in deleting the dictionary data from system
+ tables in Innobase. Deleting a row from SYS_INDEXES table also
+ frees the file segments of the B-tree associated with the index. */
+
+ static const char str1[] =
+ "PROCEDURE DROP_INDEX_PROC () IS\n"
+ "BEGIN\n"
+ "DELETE FROM SYS_FIELDS WHERE INDEX_ID = :indexid;\n"
+ "DELETE FROM SYS_INDEXES WHERE ID = :indexid\n"
+ " AND TABLE_ID = :tableid;\n"
+ "END;\n";
+
+ ut_ad(index && table && trx);
+
+ pars_info_add_dulint_literal(info, "indexid", index->id);
+ pars_info_add_dulint_literal(info, "tableid", table->id);
+
+ trx_start_if_not_started(trx);
+ trx->op_info = "dropping index";
+
+ ut_a(trx->dict_operation_lock_mode == RW_X_LATCH);
+
+ err = que_eval_sql(info, str1, FALSE, trx);
+
+ ut_a(err == DB_SUCCESS);
+
+ /* Replace this index with another equivalent index for all
+ foreign key constraints on this table where this index is used */
+
+ dict_table_replace_index_in_foreign_list(table, index);
+ dict_index_remove_from_cache(table, index);
+
+ trx->op_info = "";
+}
+
+/*************************************************************************
+Drop those indexes which were created before an error occurred when
+building an index. The data dictionary must have been locked
+exclusively by the caller, because the transaction will not be
+committed. */
+UNIV_INTERN
+void
+row_merge_drop_indexes(
+/*===================*/
+ trx_t* trx, /* in: transaction */
+ dict_table_t* table, /* in: table containing the indexes */
+ dict_index_t** index, /* in: indexes to drop */
+ ulint num_created) /* in: number of elements in index[] */
+{
+ ulint key_num;
+
+ for (key_num = 0; key_num < num_created; key_num++) {
+ row_merge_drop_index(index[key_num], table, trx);
+ }
+}
+
+/*************************************************************************
+Drop all partially created indexes during crash recovery. */
+UNIV_INTERN
+void
+row_merge_drop_temp_indexes(void)
+/*=============================*/
+{
+ trx_t* trx;
+ ulint err;
+
+ /* We use the private SQL parser of Innobase to generate the
+ query graphs needed in deleting the dictionary data from system
+ tables in Innobase. Deleting a row from SYS_INDEXES table also
+ frees the file segments of the B-tree associated with the index. */
+#if TEMP_INDEX_PREFIX != '\377'
+# error "TEMP_INDEX_PREFIX != '\377'"
+#endif
+ static const char drop_temp_indexes[] =
+ "PROCEDURE DROP_TEMP_INDEXES_PROC () IS\n"
+ "indexid CHAR;\n"
+ "DECLARE CURSOR c IS SELECT ID FROM SYS_INDEXES\n"
+ "WHERE SUBSTR(NAME,0,1)='\377';\n"
+ "BEGIN\n"
+ "\tOPEN c;\n"
+ "\tWHILE 1=1 LOOP\n"
+ "\t\tFETCH c INTO indexid;\n"
+ "\t\tIF (SQL % NOTFOUND) THEN\n"
+ "\t\t\tEXIT;\n"
+ "\t\tEND IF;\n"
+ "\t\tDELETE FROM SYS_FIELDS WHERE INDEX_ID = indexid;\n"
+ "\t\tDELETE FROM SYS_INDEXES WHERE ID = indexid;\n"
+ "\tEND LOOP;\n"
+ "\tCLOSE c;\n"
+ "\tCOMMIT WORK;\n"
+ "END;\n";
+
+ trx = trx_allocate_for_background();
+ trx->op_info = "dropping partially created indexes";
+ row_mysql_lock_data_dictionary(trx);
+
+ /* Incomplete transactions may be holding some locks on the
+ data dictionary tables. However, they should never have been
+ able to lock the records corresponding to the partially
+ created indexes that we are attempting to delete, because the
+ table was locked when the indexes were being created. We will
+ drop the partially created indexes before the rollback of
+ incomplete transactions is initiated. Thus, this should not
+ interfere with the incomplete transactions. */
+ trx->isolation_level = TRX_ISO_READ_UNCOMMITTED;
+ err = que_eval_sql(NULL, drop_temp_indexes, FALSE, trx);
+ ut_a(err == DB_SUCCESS);
+
+ row_mysql_unlock_data_dictionary(trx);
+ trx_free_for_background(trx);
+}
+
+/*************************************************************************
+Create a merge file. */
+static
+void
+row_merge_file_create(
+/*==================*/
+ merge_file_t* merge_file) /* out: merge file structure */
+{
+ merge_file->fd = innobase_mysql_tmpfile();
+ merge_file->offset = 0;
+}
+
+/*************************************************************************
+Destroy a merge file. */
+static
+void
+row_merge_file_destroy(
+/*===================*/
+ merge_file_t* merge_file) /* out: merge file structure */
+{
+ if (merge_file->fd != -1) {
+ close(merge_file->fd);
+ merge_file->fd = -1;
+ }
+}
+
+/*************************************************************************
+Determine the precise type of a column that is added to a tem
+if a column must be constrained NOT NULL. */
+UNIV_INLINE
+ulint
+row_merge_col_prtype(
+/*=================*/
+ /* out: col->prtype, possibly
+ ORed with DATA_NOT_NULL */
+ const dict_col_t* col, /* in: column */
+ const char* col_name, /* in: name of the column */
+ const merge_index_def_t*index_def) /* in: the index definition
+ of the primary key */
+{
+ ulint prtype = col->prtype;
+ ulint i;
+
+ ut_ad(index_def->ind_type & DICT_CLUSTERED);
+
+ if (prtype & DATA_NOT_NULL) {
+
+ return(prtype);
+ }
+
+ /* All columns that are included
+ in the PRIMARY KEY must be NOT NULL. */
+
+ for (i = 0; i < index_def->n_fields; i++) {
+ if (!strcmp(col_name, index_def->fields[i].field_name)) {
+ return(prtype | DATA_NOT_NULL);
+ }
+ }
+
+ return(prtype);
+}
+
+/*************************************************************************
+Create a temporary table for creating a primary key, using the definition
+of an existing table. */
+UNIV_INTERN
+dict_table_t*
+row_merge_create_temporary_table(
+/*=============================*/
+ /* out: table,
+ or NULL on error */
+ const char* table_name, /* in: new table name */
+ const merge_index_def_t*index_def, /* in: the index definition
+ of the primary key */
+ const dict_table_t* table, /* in: old table definition */
+ trx_t* trx) /* in/out: transaction
+ (sets error_state) */
+{
+ ulint i;
+ dict_table_t* new_table = NULL;
+ ulint n_cols = dict_table_get_n_user_cols(table);
+ ulint error;
+ mem_heap_t* heap = mem_heap_create(1000);
+
+ ut_ad(table_name);
+ ut_ad(index_def);
+ ut_ad(table);
+ ut_ad(mutex_own(&dict_sys->mutex));
+
+ new_table = dict_mem_table_create(table_name, 0, n_cols, table->flags);
+
+ for (i = 0; i < n_cols; i++) {
+ const dict_col_t* col;
+ const char* col_name;
+
+ col = dict_table_get_nth_col(table, i);
+ col_name = dict_table_get_col_name(table, i);
+
+ dict_mem_table_add_col(new_table, heap, col_name, col->mtype,
+ row_merge_col_prtype(col, col_name,
+ index_def),
+ col->len);
+ }
+
+ error = row_create_table_for_mysql(new_table, trx);
+ mem_heap_free(heap);
+
+ if (error != DB_SUCCESS) {
+ trx->error_state = error;
+ new_table = NULL;
+ }
+
+ return(new_table);
+}
+
+/*************************************************************************
+Rename the temporary indexes in the dictionary to permanent ones. The
+data dictionary must have been locked exclusively by the caller,
+because the transaction will not be committed. */
+UNIV_INTERN
+ulint
+row_merge_rename_indexes(
+/*=====================*/
+ /* out: DB_SUCCESS if all OK */
+ trx_t* trx, /* in/out: transaction */
+ dict_table_t* table) /* in/out: table with new indexes */
+{
+ ulint err = DB_SUCCESS;
+ pars_info_t* info = pars_info_create();
+
+ /* We use the private SQL parser of Innobase to generate the
+ query graphs needed in renaming indexes. */
+
+#if TEMP_INDEX_PREFIX != '\377'
+# error "TEMP_INDEX_PREFIX != '\377'"
+#endif
+
+ static const char rename_indexes[] =
+ "PROCEDURE RENAME_INDEXES_PROC () IS\n"
+ "BEGIN\n"
+ "UPDATE SYS_INDEXES SET NAME=SUBSTR(NAME,1,LENGTH(NAME)-1)\n"
+ "WHERE TABLE_ID = :tableid AND SUBSTR(NAME,0,1)='\377';\n"
+ "END;\n";
+
+ ut_ad(table);
+ ut_ad(trx);
+ ut_a(trx->dict_operation_lock_mode == RW_X_LATCH);
+
+ trx->op_info = "renaming indexes";
+
+ pars_info_add_dulint_literal(info, "tableid", table->id);
+
+ err = que_eval_sql(info, rename_indexes, FALSE, trx);
+
+ if (err == DB_SUCCESS) {
+ dict_index_t* index = dict_table_get_first_index(table);
+ do {
+ if (*index->name == TEMP_INDEX_PREFIX) {
+ index->name++;
+ }
+ index = dict_table_get_next_index(index);
+ } while (index);
+ }
+
+ trx->op_info = "";
+
+ return(err);
+}
+
+/*************************************************************************
+Rename the tables in the data dictionary. The data dictionary must
+have been locked exclusively by the caller, because the transaction
+will not be committed. */
+UNIV_INTERN
+ulint
+row_merge_rename_tables(
+/*====================*/
+ /* out: error code or DB_SUCCESS */
+ dict_table_t* old_table, /* in/out: old table, renamed to
+ tmp_name */
+ dict_table_t* new_table, /* in/out: new table, renamed to
+ old_table->name */
+ const char* tmp_name, /* in: new name for old_table */
+ trx_t* trx) /* in: transaction handle */
+{
+ ulint err = DB_ERROR;
+ pars_info_t* info;
+ const char* old_name= old_table->name;
+
+ ut_ad(trx->mysql_thread_id == os_thread_get_curr_id());
+ ut_ad(old_table != new_table);
+ ut_ad(mutex_own(&dict_sys->mutex));
+
+ ut_a(trx->dict_operation_lock_mode == RW_X_LATCH);
+
+ trx->op_info = "renaming tables";
+
+ /* We use the private SQL parser of Innobase to generate the query
+ graphs needed in updating the dictionary data in system tables. */
+
+ info = pars_info_create();
+
+ pars_info_add_str_literal(info, "new_name", new_table->name);
+ pars_info_add_str_literal(info, "old_name", old_name);
+ pars_info_add_str_literal(info, "tmp_name", tmp_name);
+
+ err = que_eval_sql(info,
+ "PROCEDURE RENAME_TABLES () IS\n"
+ "BEGIN\n"
+ "UPDATE SYS_TABLES SET NAME = :tmp_name\n"
+ " WHERE NAME = :old_name;\n"
+ "UPDATE SYS_TABLES SET NAME = :old_name\n"
+ " WHERE NAME = :new_name;\n"
+ "END;\n", FALSE, trx);
+
+ if (err != DB_SUCCESS) {
+
+ goto err_exit;
+ }
+
+ /* The following calls will also rename the .ibd data files if
+ the tables are stored in a single-table tablespace */
+
+ if (!dict_table_rename_in_cache(old_table, tmp_name, FALSE)
+ || !dict_table_rename_in_cache(new_table, old_name, FALSE)) {
+
+ err = DB_ERROR;
+ goto err_exit;
+ }
+
+ err = dict_load_foreigns(old_name, TRUE);
+
+ if (err != DB_SUCCESS) {
+err_exit:
+ trx->error_state = DB_SUCCESS;
+ trx_general_rollback_for_mysql(trx, FALSE, NULL);
+ trx->error_state = DB_SUCCESS;
+ }
+
+ trx->op_info = "";
+
+ return(err);
+}
+
+/*************************************************************************
+Create and execute a query graph for creating an index. */
+static
+ulint
+row_merge_create_index_graph(
+/*=========================*/
+ /* out: DB_SUCCESS or error code */
+ trx_t* trx, /* in: trx */
+ dict_table_t* table, /* in: table */
+ dict_index_t* index) /* in: index */
+{
+ ind_node_t* node; /* Index creation node */
+ mem_heap_t* heap; /* Memory heap */
+ que_thr_t* thr; /* Query thread */
+ ulint err;
+
+ ut_ad(trx);
+ ut_ad(table);
+ ut_ad(index);
+
+ heap = mem_heap_create(512);
+
+ index->table = table;
+ node = ind_create_graph_create(index, heap);
+ thr = pars_complete_graph_for_exec(node, trx, heap);
+
+ ut_a(thr == que_fork_start_command(que_node_get_parent(thr)));
+
+ que_run_threads(thr);
+
+ err = trx->error_state;
+
+ que_graph_free((que_t*) que_node_get_parent(thr));
+
+ return(err);
+}
+
+/*************************************************************************
+Create the index and load in to the dictionary. */
+UNIV_INTERN
+dict_index_t*
+row_merge_create_index(
+/*===================*/
+ /* out: index, or NULL on error */
+ trx_t* trx, /* in/out: trx (sets error_state) */
+ dict_table_t* table, /* in: the index is on this table */
+ const merge_index_def_t* /* in: the index definition */
+ index_def)
+{
+ dict_index_t* index;
+ ulint err;
+ ulint n_fields = index_def->n_fields;
+ ulint i;
+
+ /* Create the index prototype, using the passed in def, this is not
+ a persistent operation. We pass 0 as the space id, and determine at
+ a lower level the space id where to store the table. */
+
+ index = dict_mem_index_create(table->name, index_def->name,
+ 0, index_def->ind_type, n_fields);
+
+ ut_a(index);
+
+ for (i = 0; i < n_fields; i++) {
+ merge_index_field_t* ifield = &index_def->fields[i];
+
+ dict_mem_index_add_field(index, ifield->field_name,
+ ifield->prefix_len);
+ }
+
+ /* Add the index to SYS_INDEXES, using the index prototype. */
+ err = row_merge_create_index_graph(trx, table, index);
+
+ if (err == DB_SUCCESS) {
+
+ index = row_merge_dict_table_get_index(
+ table, index_def);
+
+ ut_a(index);
+
+#ifdef ROW_MERGE_IS_INDEX_USABLE
+ /* Note the id of the transaction that created this
+ index, we use it to restrict readers from accessing
+ this index, to ensure read consistency. */
+ index->trx_id = trx->id;
+#endif /* ROW_MERGE_IS_INDEX_USABLE */
+ } else {
+ index = NULL;
+ }
+
+ return(index);
+}
+
+#ifdef ROW_MERGE_IS_INDEX_USABLE
+/*************************************************************************
+Check if a transaction can use an index. */
+UNIV_INTERN
+ibool
+row_merge_is_index_usable(
+/*======================*/
+ const trx_t* trx, /* in: transaction */
+ const dict_index_t* index) /* in: index to check */
+{
+ if (!trx->read_view) {
+ return(TRUE);
+ }
+
+ return(ut_dulint_cmp(index->trx_id, trx->read_view->low_limit_id) < 0);
+}
+#endif /* ROW_MERGE_IS_INDEX_USABLE */
+
+/*************************************************************************
+Drop the old table. */
+UNIV_INTERN
+ulint
+row_merge_drop_table(
+/*=================*/
+ /* out: DB_SUCCESS or error code */
+ trx_t* trx, /* in: transaction */
+ dict_table_t* table) /* in: table to drop */
+{
+ /* There must be no open transactions on the table. */
+ ut_a(table->n_mysql_handles_opened == 0);
+
+ return(row_drop_table_for_mysql(table->name, trx, FALSE));
+}
+
+/*************************************************************************
+Build indexes on a table by reading a clustered index,
+creating a temporary file containing index entries, merge sorting
+these index entries and inserting sorted index entries to indexes. */
+UNIV_INTERN
+ulint
+row_merge_build_indexes(
+/*====================*/
+ /* out: DB_SUCCESS or error code */
+ trx_t* trx, /* in: transaction */
+ dict_table_t* old_table, /* in: table where rows are
+ read from */
+ dict_table_t* new_table, /* in: table where indexes are
+ created; identical to old_table
+ unless creating a PRIMARY KEY */
+ dict_index_t** indexes, /* in: indexes to be created */
+ ulint n_indexes, /* in: size of indexes[] */
+ TABLE* table) /* in/out: MySQL table, for
+ reporting erroneous key value
+ if applicable */
+{
+ merge_file_t* merge_files;
+ row_merge_block_t* block;
+ ulint block_size;
+ ulint i;
+ ulint error;
+ int tmpfd;
+
+ ut_ad(trx);
+ ut_ad(old_table);
+ ut_ad(new_table);
+ ut_ad(indexes);
+ ut_ad(n_indexes);
+
+ trx_start_if_not_started(trx);
+
+ /* Allocate memory for merge file data structure and initialize
+ fields */
+
+ merge_files = mem_alloc(n_indexes * sizeof *merge_files);
+ block_size = 3 * sizeof *block;
+ block = os_mem_alloc_large(&block_size);
+
+ for (i = 0; i < n_indexes; i++) {
+
+ row_merge_file_create(&merge_files[i]);
+ }
+
+ tmpfd = innobase_mysql_tmpfile();
+
+ /* Reset the MySQL row buffer that is used when reporting
+ duplicate keys. */
+ innobase_rec_reset(table);
+
+ /* Read clustered index of the table and create files for
+ secondary index entries for merge sort */
+
+ error = row_merge_read_clustered_index(
+ trx, table, old_table, new_table, indexes,
+ merge_files, n_indexes, block);
+
+ if (error != DB_SUCCESS) {
+
+ goto func_exit;
+ }
+
+ /* Now we have files containing index entries ready for
+ sorting and inserting. */
+
+ for (i = 0; i < n_indexes; i++) {
+ error = row_merge_sort(indexes[i], &merge_files[i],
+ block, &tmpfd, table);
+
+ if (error == DB_SUCCESS) {
+ error = row_merge_insert_index_tuples(
+ trx, indexes[i], new_table,
+ dict_table_zip_size(old_table),
+ merge_files[i].fd, block);
+ }
+
+ /* Close the temporary file to free up space. */
+ row_merge_file_destroy(&merge_files[i]);
+
+ if (error != DB_SUCCESS) {
+ trx->error_key_num = i;
+ goto func_exit;
+ }
+ }
+
+func_exit:
+ close(tmpfd);
+
+ for (i = 0; i < n_indexes; i++) {
+ row_merge_file_destroy(&merge_files[i]);
+ }
+
+ mem_free(merge_files);
+ os_mem_free_large(block, block_size);
+
+ return(error);
+}