summaryrefslogtreecommitdiff
path: root/storage/innobase/row
diff options
context:
space:
mode:
Diffstat (limited to 'storage/innobase/row')
-rw-r--r--storage/innobase/row/row0ext.cc32
-rw-r--r--storage/innobase/row/row0ftsort.cc4
-rw-r--r--storage/innobase/row/row0import.cc6
-rw-r--r--storage/innobase/row/row0ins.cc58
-rw-r--r--storage/innobase/row/row0log.cc529
-rw-r--r--storage/innobase/row/row0merge.cc383
-rw-r--r--storage/innobase/row/row0mysql.cc82
-rw-r--r--storage/innobase/row/row0purge.cc104
-rw-r--r--storage/innobase/row/row0quiesce.cc11
-rw-r--r--storage/innobase/row/row0sel.cc12
-rw-r--r--storage/innobase/row/row0uins.cc28
-rw-r--r--storage/innobase/row/row0umod.cc48
-rw-r--r--storage/innobase/row/row0upd.cc4
13 files changed, 715 insertions, 586 deletions
diff --git a/storage/innobase/row/row0ext.cc b/storage/innobase/row/row0ext.cc
index f084fa09c5a..32b78391d6a 100644
--- a/storage/innobase/row/row0ext.cc
+++ b/storage/innobase/row/row0ext.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2006, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2006, 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -58,14 +58,28 @@ row_ext_cache_fill(
/* The BLOB pointer is not set: we cannot fetch it */
ext->len[i] = 0;
} else {
- /* Fetch at most ext->max_len of the column.
- The column should be non-empty. However,
- trx_rollback_or_clean_all_recovered() may try to
- access a half-deleted BLOB if the server previously
- crashed during the execution of
- btr_free_externally_stored_field(). */
- ext->len[i] = btr_copy_externally_stored_field_prefix(
- buf, ext->max_len, zip_size, field, f_len);
+ if (ext->max_len == REC_VERSION_56_MAX_INDEX_COL_LEN
+ && f_len > BTR_EXTERN_FIELD_REF_SIZE) {
+ /* In this case, the field is in B format or beyond,
+ (refer to the definition of row_ext_t.max_len)
+ and the field is already fill with prefix, otherwise
+ f_len would be BTR_EXTERN_FIELD_REF_SIZE.
+ So there is no need to re-read the prefix externally,
+ but just copy the local prefix to buf. Please note
+ if the ext->len[i] is zero, it means an error
+ as above. */
+ memcpy(buf, field, f_len - BTR_EXTERN_FIELD_REF_SIZE);
+ ext->len[i] = f_len - BTR_EXTERN_FIELD_REF_SIZE;
+ } else {
+ /* Fetch at most ext->max_len of the column.
+ The column should be non-empty. However,
+ trx_rollback_or_clean_all_recovered() may try to
+ access a half-deleted BLOB if the server previously
+ crashed during the execution of
+ btr_free_externally_stored_field(). */
+ ext->len[i] = btr_copy_externally_stored_field_prefix(
+ buf, ext->max_len, zip_size, field, f_len);
+ }
}
}
diff --git a/storage/innobase/row/row0ftsort.cc b/storage/innobase/row/row0ftsort.cc
index 9a6af50e09d..275fedbfb5d 100644
--- a/storage/innobase/row/row0ftsort.cc
+++ b/storage/innobase/row/row0ftsort.cc
@@ -96,7 +96,7 @@ row_merge_create_fts_sort_index(
field->prefix_len = 0;
field->col = static_cast<dict_col_t*>(
mem_heap_alloc(new_index->heap, sizeof(dict_col_t)));
- field->col->len = fts_max_token_size;
+ field->col->len = FTS_MAX_WORD_LEN;
if (strcmp(charset->name, "latin1_swedish_ci") == 0) {
field->col->mtype = DATA_VARCHAR;
@@ -450,7 +450,7 @@ row_merge_fts_doc_tokenize(
field->type.prtype = word_dtype->prtype | DATA_NOT_NULL;
/* Variable length field, set to max size. */
- field->type.len = fts_max_token_size;
+ field->type.len = FTS_MAX_WORD_LEN;
field->type.mbminmaxlen = word_dtype->mbminmaxlen;
cur_len += len;
diff --git a/storage/innobase/row/row0import.cc b/storage/innobase/row/row0import.cc
index f5eb31191a5..b753574158a 100644
--- a/storage/innobase/row/row0import.cc
+++ b/storage/innobase/row/row0import.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2012, 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -1944,7 +1944,7 @@ PageConverter::update_index_page(
page_set_max_trx_id(block, m_page_zip_ptr, m_trx->id, 0);
- if (page_get_n_recs(block->frame) == 0) {
+ if (page_is_empty(block->frame)) {
/* Only a root page can be empty. */
if (!is_root_page(block->frame)) {
@@ -2269,7 +2269,7 @@ row_import_cleanup(
DBUG_EXECUTE_IF("ib_import_before_checkpoint_crash", DBUG_SUICIDE(););
- log_make_checkpoint_at(IB_ULONGLONG_MAX, TRUE);
+ log_make_checkpoint_at(LSN_MAX, TRUE);
return(err);
}
diff --git a/storage/innobase/row/row0ins.cc b/storage/innobase/row/row0ins.cc
index c1c27152831..49fb374e2aa 100644
--- a/storage/innobase/row/row0ins.cc
+++ b/storage/innobase/row/row0ins.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -1705,16 +1705,22 @@ do_possible_lock_wait:
/* We had temporarily released dict_operation_lock in
above lock sleep wait, now we have the lock again, and
we will need to re-check whether the foreign key has been
- dropped */
- for (const dict_foreign_t* check_foreign = UT_LIST_GET_FIRST(
- table->referenced_list);
- check_foreign;
- check_foreign = UT_LIST_GET_NEXT(
- referenced_list, check_foreign)) {
- if (check_foreign == foreign) {
- verified = true;
- break;
+ dropped. We only need to verify if the table is referenced
+ table case (check_ref == 0), since MDL lock will prevent
+ concurrent DDL and DML on the same table */
+ if (!check_ref) {
+ for (const dict_foreign_t* check_foreign
+ = UT_LIST_GET_FIRST( table->referenced_list);
+ check_foreign;
+ check_foreign = UT_LIST_GET_NEXT(
+ referenced_list, check_foreign)) {
+ if (check_foreign == foreign) {
+ verified = true;
+ break;
+ }
}
+ } else {
+ verified = true;
}
if (!verified) {
@@ -1938,6 +1944,7 @@ row_ins_scan_sec_index_for_duplicate(
do {
const rec_t* rec = btr_pcur_get_rec(&pcur);
const buf_block_t* block = btr_pcur_get_block(&pcur);
+ ulint lock_type;
if (page_rec_is_infimum(rec)) {
@@ -1947,6 +1954,16 @@ row_ins_scan_sec_index_for_duplicate(
offsets = rec_get_offsets(rec, index, offsets,
ULINT_UNDEFINED, &offsets_heap);
+ /* If the transaction isolation level is no stronger than
+ READ COMMITTED, then avoid gap locks. */
+ if (!page_rec_is_supremum(rec)
+ && thr_get_trx(thr)->isolation_level
+ <= TRX_ISO_READ_COMMITTED) {
+ lock_type = LOCK_REC_NOT_GAP;
+ } else {
+ lock_type = LOCK_ORDINARY;
+ }
+
if (flags & BTR_NO_LOCKING_FLAG) {
/* Set no locks when applying log
in online table rebuild. */
@@ -1958,13 +1975,11 @@ row_ins_scan_sec_index_for_duplicate(
INSERT ON DUPLICATE KEY UPDATE). */
err = row_ins_set_exclusive_rec_lock(
- LOCK_ORDINARY, block,
- rec, index, offsets, thr);
+ lock_type, block, rec, index, offsets, thr);
} else {
err = row_ins_set_shared_rec_lock(
- LOCK_ORDINARY, block,
- rec, index, offsets, thr);
+ lock_type, block, rec, index, offsets, thr);
}
switch (err) {
@@ -1990,6 +2005,19 @@ row_ins_scan_sec_index_for_duplicate(
thr_get_trx(thr)->error_info = index;
+ /* If the duplicate is on hidden FTS_DOC_ID,
+ state so in the error log */
+ if (DICT_TF2_FLAG_IS_SET(
+ index->table,
+ DICT_TF2_FTS_HAS_DOC_ID)
+ && strcmp(index->name,
+ FTS_DOC_ID_INDEX_NAME) == 0) {
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Duplicate FTS_DOC_ID value"
+ " on table %s",
+ index->table->name);
+ }
+
goto end_scan;
}
} else {
@@ -2484,7 +2512,7 @@ err_exit:
DBUG_EXECUTE_IF(
"row_ins_extern_checkpoint",
log_make_checkpoint_at(
- IB_ULONGLONG_MAX, TRUE););
+ LSN_MAX, TRUE););
err = row_ins_index_entry_big_rec(
entry, big_rec, offsets, &offsets_heap, index,
thr_get_trx(thr)->mysql_thd,
diff --git a/storage/innobase/row/row0log.cc b/storage/innobase/row/row0log.cc
index 01270300924..170358147b1 100644
--- a/storage/innobase/row/row0log.cc
+++ b/storage/innobase/row/row0log.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2011, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2011, 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -38,7 +38,7 @@ Created 2011-05-26 Marko Makela
#include "que0que.h"
#include "handler0alter.h"
-#include<set>
+#include<map>
/** Table row modification operations during online table rebuild.
Delete-marked records are not copied to the rebuilt table. */
@@ -72,18 +72,86 @@ static bool row_log_apply_print;
/** Size of the modification log entry header, in bytes */
#define ROW_LOG_HEADER_SIZE 2/*op, extra_size*/
-/** Log block for modifications during online index creation */
+/** Log block for modifications during online ALTER TABLE */
struct row_log_buf_t {
byte* block; /*!< file block buffer */
mrec_buf_t buf; /*!< buffer for accessing a record
that spans two blocks */
ulint blocks; /*!< current position in blocks */
ulint bytes; /*!< current position within buf */
+ ulonglong total; /*!< logical position, in bytes from
+ the start of the row_log_table log;
+ 0 for row_log_online_op() and
+ row_log_apply(). */
};
-/** Set of transactions that rolled back inserts of BLOBs during
-online table rebuild */
-typedef std::set<trx_id_t> trx_id_set;
+/** Tracks BLOB allocation during online ALTER TABLE */
+class row_log_table_blob_t {
+public:
+ /** Constructor (declaring a BLOB freed)
+ @param offset_arg row_log_t::tail::total */
+#ifdef UNIV_DEBUG
+ row_log_table_blob_t(ulonglong offset_arg) :
+ old_offset (0), free_offset (offset_arg),
+ offset (BLOB_FREED) {}
+#else /* UNIV_DEBUG */
+ row_log_table_blob_t() :
+ offset (BLOB_FREED) {}
+#endif /* UNIV_DEBUG */
+
+ /** Declare a BLOB freed again.
+ @param offset_arg row_log_t::tail::total */
+#ifdef UNIV_DEBUG
+ void blob_free(ulonglong offset_arg)
+#else /* UNIV_DEBUG */
+ void blob_free()
+#endif /* UNIV_DEBUG */
+ {
+ ut_ad(offset < offset_arg);
+ ut_ad(offset != BLOB_FREED);
+ ut_d(old_offset = offset);
+ ut_d(free_offset = offset_arg);
+ offset = BLOB_FREED;
+ }
+ /** Declare a freed BLOB reused.
+ @param offset_arg row_log_t::tail::total */
+ void blob_alloc(ulonglong offset_arg) {
+ ut_ad(free_offset <= offset_arg);
+ ut_d(old_offset = offset);
+ offset = offset_arg;
+ }
+ /** Determine if a BLOB was freed at a given log position
+ @param offset_arg row_log_t::head::total after the log record
+ @return true if freed */
+ bool is_freed(ulonglong offset_arg) const {
+ /* This is supposed to be the offset at the end of the
+ current log record. */
+ ut_ad(offset_arg > 0);
+ /* We should never get anywhere close the magic value. */
+ ut_ad(offset_arg < BLOB_FREED);
+ return(offset_arg < offset);
+ }
+private:
+ /** Magic value for a freed BLOB */
+ static const ulonglong BLOB_FREED = ~0ULL;
+#ifdef UNIV_DEBUG
+ /** Old offset, in case a page was freed, reused, freed, ... */
+ ulonglong old_offset;
+ /** Offset of last blob_free() */
+ ulonglong free_offset;
+#endif /* UNIV_DEBUG */
+ /** Byte offset to the log file */
+ ulonglong offset;
+};
+
+/** @brief Map of off-page column page numbers to 0 or log byte offsets.
+
+If there is no mapping for a page number, it is safe to access.
+If a page number maps to 0, it is an off-page column that has been freed.
+If a page number maps to a nonzero number, the number is a byte offset
+into the index->online_log, indicating that the page is safe to access
+when applying log records starting from that offset. */
+typedef std::map<ulint, row_log_table_blob_t> page_no_map;
/** @brief Buffer for logging modifications during online index creation
@@ -99,11 +167,12 @@ directly. When also head.bytes == tail.bytes, both counts will be
reset to 0 and the file will be truncated. */
struct row_log_t {
int fd; /*!< file descriptor */
- ib_mutex_t mutex; /*!< mutex protecting trx_log, error,
+ ib_mutex_t mutex; /*!< mutex protecting error,
max_trx and tail */
- trx_id_set* trx_rb; /*!< set of transactions that rolled back
- inserts of BLOBs during online table rebuild;
- protected by mutex */
+ page_no_map* blobs; /*!< map of page numbers of off-page columns
+ that have been freed during table-rebuilding
+ ALTER TABLE (row_log_table_*); protected by
+ index->lock X-latch only */
dict_table_t* table; /*!< table that is being rebuilt,
or NULL when this is a secondary
index that is being created online */
@@ -347,6 +416,7 @@ write_failed:
ut_ad(b == log->tail.block + log->tail.bytes);
}
+ log->tail.total += size;
UNIV_MEM_INVALID(log->tail.buf, sizeof log->tail.buf);
mutex_exit(&log->mutex);
}
@@ -371,6 +441,7 @@ row_log_table_delete(
dict_index_t* index, /*!< in/out: clustered index, S-latched
or X-latched */
const ulint* offsets,/*!< in: rec_get_offsets(rec,index) */
+ bool purge, /*!< in: true=purging BLOBs */
trx_id_t trx_id) /*!< in: DB_TRX_ID of the record before
it was deleted */
{
@@ -460,6 +531,7 @@ row_log_table_delete(
ut_ad(dfield_get_type(dfield)->prtype
== (DATA_NOT_NULL | DATA_TRX_ID));
ut_ad(dfield_get_len(dfield) == DATA_TRX_ID_LEN);
+ dfield_dup(dfield, heap);
trx_write_trx_id(static_cast<byte*>(dfield->data), trx_id);
}
@@ -473,27 +545,25 @@ row_log_table_delete(
mrec_size = 4 + old_pk_size;
- /* If the row is marked as rollback, we will need to
- log the enough prefix of the BLOB unless both the
- old and new table are in COMPACT or REDUNDANT format */
- if ((dict_table_get_format(index->table) >= UNIV_FORMAT_B
- || dict_table_get_format(new_table) >= UNIV_FORMAT_B)
- && row_log_table_is_rollback(index, trx_id)) {
- if (rec_offs_any_extern(offsets)) {
- /* Build a cache of those off-page column
- prefixes that are referenced by secondary
- indexes. It can be that none of the off-page
- columns are needed. */
- row_build(ROW_COPY_DATA, index, rec,
- offsets, NULL, NULL, NULL, &ext, heap);
- if (ext) {
- /* Log the row_ext_t, ext->ext and ext->buf */
- ext_size = ext->n_ext * ext->max_len
- + sizeof(*ext)
- + ext->n_ext * sizeof(ulint)
- + (ext->n_ext - 1) * sizeof ext->len;
- mrec_size += ext_size;
- }
+ /* Log enough prefix of the BLOB unless both the
+ old and new table are in COMPACT or REDUNDANT format,
+ which store the prefix in the clustered index record. */
+ if (purge && rec_offs_any_extern(offsets)
+ && (dict_table_get_format(index->table) >= UNIV_FORMAT_B
+ || dict_table_get_format(new_table) >= UNIV_FORMAT_B)) {
+
+ /* Build a cache of those off-page column prefixes
+ that are referenced by secondary indexes. It can be
+ that none of the off-page columns are needed. */
+ row_build(ROW_COPY_DATA, index, rec,
+ offsets, NULL, NULL, NULL, &ext, heap);
+ if (ext) {
+ /* Log the row_ext_t, ext->ext and ext->buf */
+ ext_size = ext->n_ext * ext->max_len
+ + sizeof(*ext)
+ + ext->n_ext * sizeof(ulint)
+ + (ext->n_ext - 1) * sizeof ext->len;
+ mrec_size += ext_size;
}
}
@@ -548,7 +618,7 @@ row_log_table_delete(
/******************************************************//**
Logs an insert or update to a table that is being rebuilt. */
-static __attribute__((nonnull(1,2,3)))
+static
void
row_log_table_low_redundant(
/*========================*/
@@ -557,7 +627,6 @@ row_log_table_low_redundant(
page X-latched */
dict_index_t* index, /*!< in/out: clustered index, S-latched
or X-latched */
- const ulint* offsets,/*!< in: rec_get_offsets(rec,index) */
bool insert, /*!< in: true if insert,
false if update */
const dtuple_t* old_pk, /*!< in: old PRIMARY KEY value
@@ -578,6 +647,9 @@ row_log_table_low_redundant(
ut_ad(!page_is_comp(page_align(rec)));
ut_ad(dict_index_get_n_fields(index) == rec_get_n_fields_old(rec));
+ ut_ad(dict_tf_is_valid(index->table->flags));
+ ut_ad(!dict_table_is_comp(index->table)); /* redundant row format */
+ ut_ad(dict_index_is_clust(new_index));
heap = mem_heap_create(DTUPLE_EST_ALLOC(index->n_fields));
tuple = dtuple_create(heap, index->n_fields);
@@ -712,7 +784,7 @@ row_log_table_low(
if (!rec_offs_comp(offsets)) {
row_log_table_low_redundant(
- rec, index, offsets, insert, old_pk, new_index);
+ rec, index, insert, old_pk, new_index);
return;
}
@@ -723,8 +795,8 @@ row_log_table_low(
extra_size = rec_offs_extra_size(offsets) - omit_size;
- mrec_size = rec_offs_size(offsets) - omit_size
- + ROW_LOG_HEADER_SIZE + (extra_size >= 0x80);
+ mrec_size = ROW_LOG_HEADER_SIZE
+ + (extra_size >= 0x80) + rec_offs_size(offsets) - omit_size;
if (insert || index->online_log->same_pk) {
ut_ad(!old_pk);
@@ -793,6 +865,93 @@ row_log_table_update(
row_log_table_low(rec, index, offsets, false, old_pk);
}
+/** Gets the old table column of a PRIMARY KEY column.
+@param table old table (before ALTER TABLE)
+@param col_map mapping of old column numbers to new ones
+@param col_no column position in the new table
+@return old table column, or NULL if this is an added column */
+static
+const dict_col_t*
+row_log_table_get_pk_old_col(
+/*=========================*/
+ const dict_table_t* table,
+ const ulint* col_map,
+ ulint col_no)
+{
+ for (ulint i = 0; i < table->n_cols; i++) {
+ if (col_no == col_map[i]) {
+ return(dict_table_get_nth_col(table, i));
+ }
+ }
+
+ return(NULL);
+}
+
+/** Maps an old table column of a PRIMARY KEY column.
+@param col old table column (before ALTER TABLE)
+@param ifield clustered index field in the new table (after ALTER TABLE)
+@param dfield clustered index tuple field in the new table
+@param heap memory heap for allocating dfield contents
+@param rec clustered index leaf page record in the old table
+@param offsets rec_get_offsets(rec)
+@param i rec field corresponding to col
+@param zip_size compressed page size of the old table, or 0 for uncompressed
+@param max_len maximum length of dfield
+@retval DB_INVALID_NULL if a NULL value is encountered
+@retval DB_TOO_BIG_INDEX_COL if the maximum prefix length is exceeded */
+static
+dberr_t
+row_log_table_get_pk_col(
+/*=====================*/
+ const dict_col_t* col,
+ const dict_field_t* ifield,
+ dfield_t* dfield,
+ mem_heap_t* heap,
+ const rec_t* rec,
+ const ulint* offsets,
+ ulint i,
+ ulint zip_size,
+ ulint max_len)
+{
+ const byte* field;
+ ulint len;
+
+ ut_ad(ut_is_2pow(zip_size));
+
+ field = rec_get_nth_field(rec, offsets, i, &len);
+
+ if (len == UNIV_SQL_NULL) {
+ return(DB_INVALID_NULL);
+ }
+
+ if (rec_offs_nth_extern(offsets, i)) {
+ ulint field_len = ifield->prefix_len;
+ byte* blob_field;
+
+ if (!field_len) {
+ field_len = ifield->fixed_len;
+ if (!field_len) {
+ field_len = max_len + 1;
+ }
+ }
+
+ blob_field = static_cast<byte*>(
+ mem_heap_alloc(heap, field_len));
+
+ len = btr_copy_externally_stored_field_prefix(
+ blob_field, field_len, zip_size, field, len);
+ if (len >= max_len + 1) {
+ return(DB_TOO_BIG_INDEX_COL);
+ }
+
+ dfield_set_data(dfield, blob_field, len);
+ } else {
+ dfield_set_data(dfield, mem_heap_dup(heap, field, len), len);
+ }
+
+ return(DB_SUCCESS);
+}
+
/******************************************************//**
Constructs the old PRIMARY KEY and DB_TRX_ID,DB_ROLL_PTR
of a table that is being rebuilt.
@@ -865,95 +1024,69 @@ row_log_table_get_pk(
dict_index_copy_types(tuple, new_index, tuple->n_fields);
dtuple_set_n_fields_cmp(tuple, new_n_uniq);
+ const ulint max_len = DICT_MAX_FIELD_LEN_BY_FORMAT(new_table);
+ const ulint zip_size = dict_table_zip_size(index->table);
+
for (ulint new_i = 0; new_i < new_n_uniq; new_i++) {
- dict_field_t* ifield;
- dfield_t* dfield;
- const dict_col_t* new_col;
- const dict_col_t* col;
- ulint col_no;
- ulint i;
- ulint len;
- const byte* field;
+ dict_field_t* ifield;
+ dfield_t* dfield;
+ ulint prtype;
+ ulint mbminmaxlen;
ifield = dict_index_get_nth_field(new_index, new_i);
dfield = dtuple_get_nth_field(tuple, new_i);
- new_col = dict_field_get_col(ifield);
- col_no = new_col->ind;
-
- for (ulint old_i = 0; old_i < index->table->n_cols;
- old_i++) {
- if (col_no == log->col_map[old_i]) {
- col_no = old_i;
- goto copy_col;
- }
- }
-
- /* No matching column was found in the old
- table, so this must be an added column.
- Copy the default value. */
- ut_ad(log->add_cols);
- dfield_copy(dfield,
- dtuple_get_nth_field(
- log->add_cols, col_no));
- continue;
-
-copy_col:
- col = dict_table_get_nth_col(index->table, col_no);
- i = dict_col_get_clust_pos(col, index);
+ const ulint col_no
+ = dict_field_get_col(ifield)->ind;
- if (i == ULINT_UNDEFINED) {
- ut_ad(0);
- log->error = DB_CORRUPTION;
- tuple = NULL;
- goto func_exit;
- }
+ if (const dict_col_t* col
+ = row_log_table_get_pk_old_col(
+ index->table, log->col_map, col_no)) {
+ ulint i = dict_col_get_clust_pos(col, index);
- field = rec_get_nth_field(rec, offsets, i, &len);
-
- if (len == UNIV_SQL_NULL) {
- log->error = DB_INVALID_NULL;
- tuple = NULL;
- goto func_exit;
- }
-
- if (rec_offs_nth_extern(offsets, i)) {
- ulint field_len = ifield->prefix_len;
- byte* blob_field;
- const ulint max_len =
- DICT_MAX_FIELD_LEN_BY_FORMAT(
- new_table);
-
- if (!field_len) {
- field_len = ifield->fixed_len;
- if (!field_len) {
- field_len = max_len + 1;
- }
+ if (i == ULINT_UNDEFINED) {
+ ut_ad(0);
+ log->error = DB_CORRUPTION;
+ goto err_exit;
}
- blob_field = static_cast<byte*>(
- mem_heap_alloc(*heap, field_len));
+ log->error = row_log_table_get_pk_col(
+ col, ifield, dfield, *heap,
+ rec, offsets, i, zip_size, max_len);
- len = btr_copy_externally_stored_field_prefix(
- blob_field, field_len,
- dict_table_zip_size(index->table),
- field, len);
- if (len == max_len + 1) {
- log->error = DB_TOO_BIG_INDEX_COL;
+ if (log->error != DB_SUCCESS) {
+err_exit:
tuple = NULL;
goto func_exit;
}
- dfield_set_data(dfield, blob_field, len);
+ mbminmaxlen = col->mbminmaxlen;
+ prtype = col->prtype;
} else {
- if (ifield->prefix_len
- && ifield->prefix_len < len) {
- len = ifield->prefix_len;
- }
+ /* No matching column was found in the old
+ table, so this must be an added column.
+ Copy the default value. */
+ ut_ad(log->add_cols);
+
+ dfield_copy(dfield, dtuple_get_nth_field(
+ log->add_cols, col_no));
+ mbminmaxlen = dfield->type.mbminmaxlen;
+ prtype = dfield->type.prtype;
+ }
+
+ ut_ad(!dfield_is_ext(dfield));
+ ut_ad(!dfield_is_null(dfield));
- dfield_set_data(
- dfield,
- mem_heap_dup(*heap, field, len), len);
+ if (ifield->prefix_len) {
+ ulint len = dtype_get_at_most_n_mbchars(
+ prtype, mbminmaxlen,
+ ifield->prefix_len,
+ dfield_get_len(dfield),
+ static_cast<const char*>(
+ dfield_get_data(dfield)));
+
+ ut_ad(len <= dfield_get_len(dfield));
+ dfield_set_len(dfield, len);
}
}
@@ -988,66 +1121,80 @@ row_log_table_insert(
}
/******************************************************//**
-Notes that a transaction is being rolled back. */
+Notes that a BLOB is being freed during online ALTER TABLE. */
UNIV_INTERN
void
-row_log_table_rollback(
-/*===================*/
- dict_index_t* index, /*!< in/out: clustered index */
- trx_id_t trx_id) /*!< in: transaction being rolled back */
+row_log_table_blob_free(
+/*====================*/
+ dict_index_t* index, /*!< in/out: clustered index, X-latched */
+ ulint page_no)/*!< in: starting page number of the BLOB */
{
ut_ad(dict_index_is_clust(index));
-#ifdef UNIV_DEBUG
- ibool corrupt = FALSE;
- ut_ad(trx_rw_is_active(trx_id, &corrupt));
- ut_ad(!corrupt);
-#endif /* UNIV_DEBUG */
+ ut_ad(dict_index_is_online_ddl(index));
+#ifdef UNIV_SYNC_DEBUG
+ ut_ad(rw_lock_own(&index->lock, RW_LOCK_EX));
+#endif /* UNIV_SYNC_DEBUG */
+ ut_ad(page_no != FIL_NULL);
- /* Protect transitions of index->online_status and access to
- index->online_log. */
- rw_lock_s_lock(&index->lock);
+ if (index->online_log->error != DB_SUCCESS) {
+ return;
+ }
- if (dict_index_is_online_ddl(index)) {
- ut_ad(index->online_log);
- ut_ad(index->online_log->table);
- mutex_enter(&index->online_log->mutex);
- trx_id_set* trxs = index->online_log->trx_rb;
+ page_no_map* blobs = index->online_log->blobs;
- if (!trxs) {
- index->online_log->trx_rb = trxs = new trx_id_set();
- }
+ if (!blobs) {
+ index->online_log->blobs = blobs = new page_no_map();
+ }
- trxs->insert(trx_id);
+#ifdef UNIV_DEBUG
+ const ulonglong log_pos = index->online_log->tail.total;
+#else
+# define log_pos /* empty */
+#endif /* UNIV_DEBUG */
- mutex_exit(&index->online_log->mutex);
- }
+ const page_no_map::value_type v(page_no,
+ row_log_table_blob_t(log_pos));
- rw_lock_s_unlock(&index->lock);
+ std::pair<page_no_map::iterator,bool> p = blobs->insert(v);
+
+ if (!p.second) {
+ /* Update the existing mapping. */
+ ut_ad(p.first->first == page_no);
+ p.first->second.blob_free(log_pos);
+ }
+#undef log_pos
}
/******************************************************//**
-Check if a transaction rollback has been initiated.
-@return true if inserts of this transaction were rolled back */
+Notes that a BLOB is being allocated during online ALTER TABLE. */
UNIV_INTERN
-bool
-row_log_table_is_rollback(
-/*======================*/
- const dict_index_t* index, /*!< in: clustered index */
- trx_id_t trx_id) /*!< in: transaction id */
+void
+row_log_table_blob_alloc(
+/*=====================*/
+ dict_index_t* index, /*!< in/out: clustered index, X-latched */
+ ulint page_no)/*!< in: starting page number of the BLOB */
{
ut_ad(dict_index_is_clust(index));
ut_ad(dict_index_is_online_ddl(index));
- ut_ad(index->online_log);
-
- if (const trx_id_set* trxs = index->online_log->trx_rb) {
- mutex_enter(&index->online_log->mutex);
- bool is_rollback = trxs->find(trx_id) != trxs->end();
- mutex_exit(&index->online_log->mutex);
+#ifdef UNIV_SYNC_DEBUG
+ ut_ad(rw_lock_own(&index->lock, RW_LOCK_EX));
+#endif /* UNIV_SYNC_DEBUG */
+ ut_ad(page_no != FIL_NULL);
- return(is_rollback);
+ if (index->online_log->error != DB_SUCCESS) {
+ return;
}
- return(false);
+ /* Only track allocations if the same page has been freed
+ earlier. Double allocation without a free is not allowed. */
+ if (page_no_map* blobs = index->online_log->blobs) {
+ page_no_map::iterator p = blobs->find(page_no);
+
+ if (p != blobs->end()) {
+ ut_ad(p->first == page_no);
+ p->second.blob_alloc(index->online_log->tail.total);
+ }
+ }
}
/******************************************************//**
@@ -1069,17 +1216,6 @@ row_log_table_apply_convert_mrec(
{
dtuple_t* row;
-#ifdef UNIV_SYNC_DEBUG
- /* This prevents BLOBs from being freed, in case an insert
- transaction rollback starts after row_log_table_is_rollback(). */
- ut_ad(rw_lock_own(dict_index_get_lock(index), RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
-
- if (row_log_table_is_rollback(index, trx_id)) {
- row = NULL;
- goto func_exit;
- }
-
/* This is based on row_build(). */
if (log->add_cols) {
row = dtuple_copy(log->add_cols, heap);
@@ -1121,15 +1257,43 @@ row_log_table_apply_convert_mrec(
dfield_t* dfield
= dtuple_get_nth_field(row, col_no);
ulint len;
- const void* data;
+ const byte* data= NULL;
if (rec_offs_nth_extern(offsets, i)) {
ut_ad(rec_offs_any_extern(offsets));
- data = btr_rec_copy_externally_stored_field(
- mrec, offsets,
- dict_table_zip_size(index->table),
- i, &len, heap);
- ut_a(data);
+ rw_lock_x_lock(dict_index_get_lock(index));
+
+ if (const page_no_map* blobs = log->blobs) {
+ data = rec_get_nth_field(
+ mrec, offsets, i, &len);
+ ut_ad(len >= BTR_EXTERN_FIELD_REF_SIZE);
+
+ ulint page_no = mach_read_from_4(
+ data + len - (BTR_EXTERN_FIELD_REF_SIZE
+ - BTR_EXTERN_PAGE_NO));
+ page_no_map::const_iterator p = blobs->find(
+ page_no);
+ if (p != blobs->end()
+ && p->second.is_freed(log->head.total)) {
+ /* This BLOB has been freed.
+ We must not access the row. */
+ row = NULL;
+ }
+ }
+
+ if (row) {
+ data = btr_rec_copy_externally_stored_field(
+ mrec, offsets,
+ dict_table_zip_size(index->table),
+ i, &len, heap);
+ ut_a(data);
+ }
+
+ rw_lock_x_unlock(dict_index_get_lock(index));
+
+ if (!row) {
+ goto func_exit;
+ }
} else {
data = rec_get_nth_field(mrec, offsets, i, &len);
}
@@ -1685,7 +1849,7 @@ delete_insert:
| BTR_KEEP_POS_FLAG,
btr_pcur_get_btr_cur(&pcur),
&cur_offsets, &offsets_heap, heap, &big_rec,
- update, 0, NULL, 0, &mtr);
+ update, 0, thr, 0, &mtr);
if (big_rec) {
if (error == DB_SUCCESS) {
@@ -1783,7 +1947,7 @@ row_log_table_apply_op(
ulint* offsets) /*!< in/out: work area
for parsing mrec */
{
- const row_log_t*log = dup->index->online_log;
+ row_log_t* log = dup->index->online_log;
dict_index_t* new_index = dict_table_get_first_index(log->table);
ulint extra_size;
const mrec_t* next_mrec;
@@ -1793,6 +1957,7 @@ row_log_table_apply_op(
ut_ad(dict_index_is_clust(dup->index));
ut_ad(dup->index->table != log->table);
+ ut_ad(log->head.total <= log->tail.total);
*error = DB_SUCCESS;
@@ -1801,6 +1966,8 @@ row_log_table_apply_op(
return(NULL);
}
+ const mrec_t* const mrec_start = mrec;
+
switch (*mrec++) {
default:
ut_ad(0);
@@ -1830,6 +1997,8 @@ row_log_table_apply_op(
if (next_mrec > mrec_end) {
return(NULL);
} else {
+ log->head.total += next_mrec - mrec_start;
+
ulint len;
const byte* db_trx_id
= rec_get_nth_field(
@@ -1863,6 +2032,8 @@ row_log_table_apply_op(
return(NULL);
}
+ log->head.total += next_mrec - mrec_start;
+
/* If there are external fields, retrieve those logged
prefix info and reconstruct the row_ext_t */
if (ext_size) {
@@ -2019,6 +2190,7 @@ row_log_table_apply_op(
}
ut_ad(next_mrec <= mrec_end);
+ log->head.total += next_mrec - mrec_start;
dtuple_set_n_fields_cmp(old_pk, new_index->n_uniq);
{
@@ -2036,6 +2208,7 @@ row_log_table_apply_op(
break;
}
+ ut_ad(log->head.total <= log->tail.total);
mem_heap_empty(offsets_heap);
mem_heap_empty(heap);
return(next_mrec);
@@ -2423,6 +2596,10 @@ row_log_table_apply(
};
error = row_log_table_apply_ops(thr, &dup);
+
+ ut_ad(error != DB_SUCCESS
+ || clust_index->online_log->head.total
+ == clust_index->online_log->tail.total);
}
rw_lock_x_unlock(dict_index_get_lock(clust_index));
@@ -2451,6 +2628,7 @@ row_log_allocate(
byte* buf;
row_log_t* log;
ulint size;
+ DBUG_ENTER("row_log_allocate");
ut_ad(!dict_index_is_online_ddl(index));
ut_ad(dict_index_is_clust(index) == !!table);
@@ -2464,7 +2642,7 @@ row_log_allocate(
size = 2 * srv_sort_buf_size + sizeof *log;
buf = (byte*) os_mem_alloc_large(&size);
if (!buf) {
- return(false);
+ DBUG_RETURN(false);
}
log = (row_log_t*) &buf[2 * srv_sort_buf_size];
@@ -2472,11 +2650,11 @@ row_log_allocate(
log->fd = row_merge_file_create_low();
if (log->fd < 0) {
os_mem_free_large(buf, size);
- return(false);
+ DBUG_RETURN(false);
}
mutex_create(index_online_log_key, &log->mutex,
SYNC_INDEX_ONLINE_LOG);
- log->trx_rb = NULL;
+ log->blobs = NULL;
log->table = table;
log->same_pk = same_pk;
log->add_cols = add_cols;
@@ -2486,7 +2664,9 @@ row_log_allocate(
log->head.block = buf;
log->tail.block = buf + srv_sort_buf_size;
log->tail.blocks = log->tail.bytes = 0;
+ log->tail.total = 0;
log->head.blocks = log->head.bytes = 0;
+ log->head.total = 0;
dict_index_set_online_status(index, ONLINE_INDEX_CREATION);
index->online_log = log;
@@ -2495,7 +2675,7 @@ row_log_allocate(
atomic operations in both cases. */
MONITOR_ATOMIC_INC(MONITOR_ONLINE_CREATE_INDEX);
- return(true);
+ DBUG_RETURN(true);
}
/******************************************************//**
@@ -2508,7 +2688,7 @@ row_log_free(
{
MONITOR_ATOMIC_DEC(MONITOR_ONLINE_CREATE_INDEX);
- delete log->trx_rb;
+ delete log->blobs;
row_merge_file_destroy_low(log->fd);
mutex_free(&log->mutex);
os_mem_free_large(log->head.block, log->size);
@@ -3183,6 +3363,7 @@ row_log_apply(
dberr_t error;
row_log_t* log;
row_merge_dup_t dup = { index, table, NULL, 0 };
+ DBUG_ENTER("row_log_apply");
ut_ad(dict_index_is_online_ddl(index));
ut_ad(!dict_index_is_clust(index));
@@ -3225,5 +3406,5 @@ row_log_apply(
row_log_free(log);
- return(error);
+ DBUG_RETURN(error);
}
diff --git a/storage/innobase/row/row0merge.cc b/storage/innobase/row/row0merge.cc
index a509e2c5ca8..a0c0fd2c8c3 100644
--- a/storage/innobase/row/row0merge.cc
+++ b/storage/innobase/row/row0merge.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2005, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2005, 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -260,14 +260,15 @@ row_merge_buf_add(
ulint bucket = 0;
doc_id_t write_doc_id;
ulint n_row_added = 0;
+ DBUG_ENTER("row_merge_buf_add");
if (buf->n_tuples >= buf->max_tuples) {
- return(0);
+ DBUG_RETURN(0);
}
DBUG_EXECUTE_IF(
"ib_row_merge_buf_add_two",
- if (buf->n_tuples >= 2) return(0););
+ if (buf->n_tuples >= 2) DBUG_RETURN(0););
UNIV_PREFETCH_R(row->fields);
@@ -325,18 +326,12 @@ row_merge_buf_add(
fts_doc_item_t* doc_item;
byte* value;
- if (dfield_is_null(field)) {
- n_row_added = 1;
- continue;
- }
-
- doc_item = static_cast<fts_doc_item_t*>(
- mem_heap_alloc(
- buf->heap,
- sizeof(fts_doc_item_t)));
-
/* fetch Doc ID if it already exists
- in the row, and not supplied by the caller */
+ in the row, and not supplied by the
+ caller. Even if the value column is
+ NULL, we still need to get the Doc
+ ID so to maintain the correct max
+ Doc ID */
if (*doc_id == 0) {
const dfield_t* doc_field;
doc_field = dtuple_get_nth_field(
@@ -347,14 +342,23 @@ row_merge_buf_add(
dfield_get_data(doc_field)));
if (*doc_id == 0) {
- fprintf(stderr, "InnoDB FTS: "
- "User supplied Doc ID "
- "is zero. Record "
- "Skipped\n");
- return(0);
+ ib_logf(IB_LOG_LEVEL_WARN,
+ "FTS Doc ID is zero. "
+ "Record Skipped");
+ DBUG_RETURN(0);
}
}
+ if (dfield_is_null(field)) {
+ n_row_added = 1;
+ continue;
+ }
+
+ doc_item = static_cast<fts_doc_item_t*>(
+ mem_heap_alloc(
+ buf->heap,
+ sizeof(*doc_item)));
+
value = static_cast<byte*>(
ut_malloc(field->len));
memcpy(value, field->data, field->len);
@@ -458,7 +462,7 @@ row_merge_buf_add(
/* If this is FTS index, we already populated the sort buffer, return
here */
if (index->type & DICT_FTS) {
- return(n_row_added);
+ DBUG_RETURN(n_row_added);
}
#ifdef UNIV_DEBUG
@@ -484,7 +488,7 @@ row_merge_buf_add(
/* Reserve one byte for the end marker of row_merge_block_t. */
if (buf->total_size + data_size >= srv_sort_buf_size - 1) {
- return(0);
+ DBUG_RETURN(0);
}
buf->total_size += data_size;
@@ -499,7 +503,7 @@ row_merge_buf_add(
dfield_dup(field++, buf->heap);
} while (--n_fields);
- return(n_row_added);
+ DBUG_RETURN(n_row_added);
}
/*************************************************************//**
@@ -1180,6 +1184,7 @@ row_merge_read_clustered_index(
os_event_t fts_parallel_sort_event = NULL;
ibool fts_pll_sort = FALSE;
ib_int64_t sig_count = 0;
+ DBUG_ENTER("row_merge_read_clustered_index");
ut_ad((old_table == new_table) == !col_map);
ut_ad(!add_cols || col_map);
@@ -1396,13 +1401,26 @@ end_of_index:
offsets = rec_get_offsets(rec, clust_index, NULL,
ULINT_UNDEFINED, &row_heap);
- if (online && new_table != old_table) {
- /* When rebuilding the table online, perform a
- REPEATABLE READ, so that row_log_table_apply()
- will not see a newer state of the table when
- applying the log. This is mainly to prevent
- false duplicate key errors, because the log
- will identify records by the PRIMARY KEY. */
+ if (online) {
+ /* Perform a REPEATABLE READ.
+
+ When rebuilding the table online,
+ row_log_table_apply() must not see a newer
+ state of the table when applying the log.
+ This is mainly to prevent false duplicate key
+ errors, because the log will identify records
+ by the PRIMARY KEY, and also to prevent unsafe
+ BLOB access.
+
+ When creating a secondary index online, this
+ table scan must not see records that have only
+ been inserted to the clustered index, but have
+ not been written to the online_log of
+ index[]. If we performed READ UNCOMMITTED, it
+ could happen that the ADD INDEX reaches
+ ONLINE_INDEX_COMPLETE state between the time
+ the DML thread has updated the clustered index
+ but has not yet accessed secondary index. */
ut_ad(trx->read_view);
if (!read_view_sees_trx_id(
@@ -1445,38 +1463,13 @@ end_of_index:
would make it tricky to detect duplicate
keys. */
continue;
- } else if (UNIV_LIKELY_NULL(rec_offs_any_null_extern(
- rec, offsets))) {
- /* This is essentially a READ UNCOMMITTED to
- fetch the most recent version of the record. */
-#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
- trx_id_t trx_id;
- ulint trx_id_offset;
-
- /* It is possible that the record was
- just inserted and the off-page columns
- have not yet been written. We will
- ignore the record if this is the case,
- because it should be covered by the
- index->info.online log in that case. */
-
- trx_id_offset = clust_index->trx_id_offset;
- if (!trx_id_offset) {
- trx_id_offset = row_get_trx_id_offset(
- clust_index, offsets);
- }
-
- trx_id = trx_read_trx_id(rec + trx_id_offset);
- ut_a(trx_rw_is_active(trx_id, NULL));
- ut_a(trx_undo_trx_id_is_insert(rec + trx_id_offset));
-#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
-
- /* When !online, we are holding an X-lock on
- old_table, preventing any inserts. */
- ut_ad(online);
- continue;
}
+ /* When !online, we are holding a lock on old_table, preventing
+ any inserts that could have written a record 'stub' before
+ writing out off-page columns. */
+ ut_ad(!rec_offs_any_null_extern(rec, offsets));
+
/* Build a row based on the clustered index. */
row = row_build(ROW_COPY_POINTERS, clust_index,
@@ -1692,10 +1685,16 @@ all_done:
DEBUG_FTS_SORT_PRINT("FTS_SORT: Complete Scan Table\n");
#endif
if (fts_pll_sort) {
+ bool all_exit = false;
+ ulint trial_count = 0;
+ const ulint max_trial_count = 10000;
+
+ /* Tell all children that parent has done scanning */
for (ulint i = 0; i < fts_sort_pll_degree; i++) {
psort_info[i].state = FTS_PARENT_COMPLETE;
}
wait_again:
+ /* Now wait all children to report back to be completed */
os_event_wait_time_low(fts_parallel_sort_event,
1000000, sig_count);
@@ -1707,6 +1706,31 @@ wait_again:
goto wait_again;
}
}
+
+ /* Now all children should complete, wait a bit until
+ they all finish setting the event, before we free everything.
+ This has a 10 second timeout */
+ do {
+ all_exit = true;
+
+ for (ulint j = 0; j < fts_sort_pll_degree; j++) {
+ if (psort_info[j].child_status
+ != FTS_CHILD_EXITING) {
+ all_exit = false;
+ os_thread_sleep(1000);
+ break;
+ }
+ }
+ trial_count++;
+ } while (!all_exit && trial_count < max_trial_count);
+
+ if (!all_exit) {
+ ut_ad(0);
+ ib_logf(IB_LOG_LEVEL_FATAL,
+ "Not all child sort threads exited"
+ " when creating FTS index '%s'",
+ fts_sort_idx->name);
+ }
}
#ifdef FTS_INTERNAL_DIAG_PRINT
@@ -1731,7 +1755,7 @@ wait_again:
trx->op_info = "";
- return(err);
+ DBUG_RETURN(err);
}
/** Write a record via buffer 2 and read the next record to buffer N.
@@ -2092,13 +2116,14 @@ row_merge_sort(
ulint num_runs;
ulint* run_offset;
dberr_t error = DB_SUCCESS;
+ DBUG_ENTER("row_merge_sort");
/* Record the number of merge runs we need to perform */
num_runs = file->offset;
/* If num_runs are less than 1, nothing to merge */
if (num_runs <= 1) {
- return(error);
+ DBUG_RETURN(error);
}
/* "run_offset" records each run's first offset number */
@@ -2126,24 +2151,7 @@ row_merge_sort(
mem_free(run_offset);
- return(error);
-}
-
-/*************************************************************//**
-Set blob fields empty */
-static __attribute__((nonnull))
-void
-row_merge_set_blob_empty(
-/*=====================*/
- dtuple_t* tuple) /*!< in/out: data tuple */
-{
- for (ulint i = 0; i < dtuple_get_n_fields(tuple); i++) {
- dfield_t* field = dtuple_get_nth_field(tuple, i);
-
- if (dfield_is_ext(field)) {
- dfield_set_data(field, NULL, 0);
- }
- }
+ DBUG_RETURN(error);
}
/*************************************************************//**
@@ -2211,6 +2219,7 @@ row_merge_insert_index_tuples(
ulint foffs = 0;
ulint* offsets;
mrec_buf_t* buf;
+ DBUG_ENTER("row_merge_insert_index_tuples");
ut_ad(!srv_read_only_mode);
ut_ad(!(index->type & DICT_FTS));
@@ -2272,52 +2281,31 @@ row_merge_insert_index_tuples(
if (!n_ext) {
/* There are no externally stored columns. */
- } else if (!dict_index_is_online_ddl(old_index)) {
+ } else {
ut_ad(dict_index_is_clust(index));
- /* Modifications to the table are
- blocked while we are not rebuilding it
- or creating indexes. Off-page columns
- can be fetched safely. */
+ /* Off-page columns can be fetched safely
+ when concurrent modifications to the table
+ are disabled. (Purge can process delete-marked
+ records, but row_merge_read_clustered_index()
+ would have skipped them.)
+
+ When concurrent modifications are enabled,
+ row_merge_read_clustered_index() will
+ only see rows from transactions that were
+ committed before the ALTER TABLE started
+ (REPEATABLE READ).
+
+ Any modifications after the
+ row_merge_read_clustered_index() scan
+ will go through row_log_table_apply().
+ Any modifications to off-page columns
+ will be tracked by
+ row_log_table_blob_alloc() and
+ row_log_table_blob_free(). */
row_merge_copy_blobs(
mrec, offsets,
dict_table_zip_size(old_table),
dtuple, tuple_heap);
- } else {
- ut_ad(dict_index_is_clust(index));
-
- ulint offset = index->trx_id_offset;
-
- if (!offset) {
- offset = row_get_trx_id_offset(
- index, offsets);
- }
-
- /* Copy the off-page columns while
- holding old_index->lock, so
- that they cannot be freed by
- a rollback of a fresh insert. */
- rw_lock_s_lock(&old_index->lock);
-
- if (row_log_table_is_rollback(
- old_index,
- trx_read_trx_id(mrec + offset))) {
- /* The row and BLOB could
- already be freed. They
- will be deleted by
- row_undo_ins_remove_clust_rec
- when rolling back a fresh
- insert. So, no need to retrieve
- the off-page column. */
- row_merge_set_blob_empty(
- dtuple);
- } else {
- row_merge_copy_blobs(
- mrec, offsets,
- dict_table_zip_size(old_table),
- dtuple, tuple_heap);
- }
-
- rw_lock_s_unlock(&old_index->lock);
}
ut_ad(dtuple_validate(dtuple));
@@ -2415,7 +2403,7 @@ err_exit:
mem_heap_free(ins_heap);
mem_heap_free(heap);
- return(error);
+ DBUG_RETURN(error);
}
/*********************************************************************//**
@@ -2903,7 +2891,7 @@ row_merge_file_create_low(void)
if (fd < 0) {
ib_logf(IB_LOG_LEVEL_ERROR,
"Cannot create temporary merge file");
- return -1;
+ return (-1);
}
return(fd);
}
@@ -3114,48 +3102,34 @@ will not be committed.
@return error code or DB_SUCCESS */
UNIV_INTERN
dberr_t
-row_merge_rename_tables(
-/*====================*/
+row_merge_rename_tables_dict(
+/*=========================*/
dict_table_t* old_table, /*!< in/out: old table, renamed to
tmp_name */
dict_table_t* new_table, /*!< in/out: new table, renamed to
old_table->name */
const char* tmp_name, /*!< in: new name for old_table */
- trx_t* trx) /*!< in: transaction handle */
+ trx_t* trx) /*!< in/out: dictionary transaction */
{
dberr_t err = DB_ERROR;
pars_info_t* info;
- char old_name[MAX_FULL_NAME_LEN + 1];
ut_ad(!srv_read_only_mode);
ut_ad(old_table != new_table);
ut_ad(mutex_own(&dict_sys->mutex));
ut_a(trx->dict_operation_lock_mode == RW_X_LATCH);
- ut_ad(trx_get_dict_operation(trx) == TRX_DICT_OP_TABLE);
-
- /* store the old/current name to an automatic variable */
- if (strlen(old_table->name) + 1 <= sizeof(old_name)) {
- memcpy(old_name, old_table->name, strlen(old_table->name) + 1);
- } else {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Too long table name: '%s', max length is %d",
- old_table->name, MAX_FULL_NAME_LEN);
- ut_error;
- }
+ ut_ad(trx_get_dict_operation(trx) == TRX_DICT_OP_TABLE
+ || trx_get_dict_operation(trx) == TRX_DICT_OP_INDEX);
trx->op_info = "renaming tables";
- DBUG_EXECUTE_IF(
- "ib_rebuild_cannot_rename",
- err = DB_ERROR; goto err_exit;);
-
/* We use the private SQL parser of Innobase to generate the query
graphs needed in updating the dictionary data in system tables. */
info = pars_info_create();
pars_info_add_str_literal(info, "new_name", new_table->name);
- pars_info_add_str_literal(info, "old_name", old_name);
+ pars_info_add_str_literal(info, "old_name", old_table->name);
pars_info_add_str_literal(info, "tmp_name", tmp_name);
err = que_eval_sql(info,
@@ -3200,11 +3174,12 @@ row_merge_rename_tables(
table is in a non-system tablespace where space > 0. */
if (err == DB_SUCCESS && new_table->space != TRX_SYS_SPACE) {
/* Make pathname to update SYS_DATAFILES. */
- char* old_path = row_make_new_pathname(new_table, old_name);
+ char* old_path = row_make_new_pathname(
+ new_table, old_table->name);
info = pars_info_create();
- pars_info_add_str_literal(info, "old_name", old_name);
+ pars_info_add_str_literal(info, "old_name", old_table->name);
pars_info_add_str_literal(info, "old_path", old_path);
pars_info_add_int4_literal(info, "new_space",
(lint) new_table->space);
@@ -3223,75 +3198,9 @@ row_merge_rename_tables(
mem_free(old_path);
}
- if (err != DB_SUCCESS) {
- goto err_exit;
- }
-
- /* Generate the redo logs for file operations */
- fil_mtr_rename_log(old_table->space, old_name,
- new_table->space, new_table->name, tmp_name);
-
- /* What if the redo logs are flushed to disk here? This is
- tested with following crash point */
- DBUG_EXECUTE_IF("bug14669848_precommit", log_buffer_flush_to_disk();
- DBUG_SUICIDE(););
-
- /* File operations cannot be rolled back. So, before proceeding
- with file operations, commit the dictionary changes.*/
- trx_commit_for_mysql(trx);
-
- /* If server crashes here, the dictionary in InnoDB and MySQL
- will differ. The .ibd files and the .frm files must be swapped
- manually by the administrator. No loss of data. */
- DBUG_EXECUTE_IF("bug14669848", DBUG_SUICIDE(););
-
- /* Ensure that the redo logs are flushed to disk. The config
- innodb_flush_log_at_trx_commit must not affect this. */
- log_buffer_flush_to_disk();
-
- /* The following calls will also rename the .ibd data files if
- the tables are stored in a single-table tablespace */
-
- err = dict_table_rename_in_cache(old_table, tmp_name, FALSE);
-
- if (err == DB_SUCCESS) {
-
- ut_ad(dict_table_is_discarded(old_table)
- == dict_table_is_discarded(new_table));
-
- err = dict_table_rename_in_cache(new_table, old_name, FALSE);
-
- if (err != DB_SUCCESS) {
-
- if (dict_table_rename_in_cache(
- old_table, old_name, FALSE)
- != DB_SUCCESS) {
-
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Cannot undo the rename in cache "
- "from %s to %s", old_name, tmp_name);
- }
-
- goto err_exit;
- }
-
- if (dict_table_is_discarded(new_table)) {
-
- err = row_import_update_discarded_flag(
- trx, new_table->id, true, true);
- }
- }
-
- DBUG_EXECUTE_IF("ib_rebuild_cannot_load_fk",
- err = DB_ERROR; goto err_exit;);
-
- err = dict_load_foreigns(old_name, FALSE, TRUE);
-
- if (err != DB_SUCCESS) {
-err_exit:
- trx->error_state = DB_SUCCESS;
- trx_rollback_to_savepoint(trx, NULL);
- trx->error_state = DB_SUCCESS;
+ if (err == DB_SUCCESS && dict_table_is_discarded(new_table)) {
+ err = row_import_update_discarded_flag(
+ trx, new_table->id, true, true);
}
trx->op_info = "";
@@ -3417,7 +3326,7 @@ row_merge_is_index_usable(
/*********************************************************************//**
Drop a table. The caller must have ensured that the background stats
thread is not processing the table. This can be done by calling
-dict_stats_wait_bg_to_stop_using_tables() after locking the dictionary and
+dict_stats_wait_bg_to_stop_using_table() after locking the dictionary and
before calling this function.
@return DB_SUCCESS or error code */
UNIV_INTERN
@@ -3475,11 +3384,12 @@ row_merge_build_indexes(
ulint i;
ulint j;
dberr_t error;
- int tmpfd;
+ int tmpfd = -1;
dict_index_t* fts_sort_idx = NULL;
fts_psort_t* psort_info = NULL;
fts_psort_t* merge_info = NULL;
ib_int64_t sig_count = 0;
+ DBUG_ENTER("row_merge_build_indexes");
ut_ad(!srv_read_only_mode);
ut_ad((old_table == new_table) == !col_map);
@@ -3493,7 +3403,7 @@ row_merge_build_indexes(
os_mem_alloc_large(&block_size));
if (block == NULL) {
- return(DB_OUT_OF_MEMORY);
+ DBUG_RETURN(DB_OUT_OF_MEMORY);
}
trx_start_if_not_started_xa(trx);
@@ -3501,6 +3411,14 @@ row_merge_build_indexes(
merge_files = static_cast<merge_file_t*>(
mem_alloc(n_indexes * sizeof *merge_files));
+ /* Initialize all the merge file descriptors, so that we
+ don't call row_merge_file_destroy() on uninitialized
+ merge file descriptor */
+
+ for (i = 0; i < n_indexes; i++) {
+ merge_files[i].fd = -1;
+ }
+
for (i = 0; i < n_indexes; i++) {
if (row_merge_file_create(&merge_files[i]) < 0) {
error = DB_OUT_OF_MEMORY;
@@ -3565,41 +3483,16 @@ row_merge_build_indexes(
if (indexes[i]->type & DICT_FTS) {
os_event_t fts_parallel_merge_event;
- bool all_exit = false;
- ulint trial_count = 0;
sort_idx = fts_sort_idx;
- /* Now all children should complete, wait
- a bit until they all finish using event */
- while (!all_exit && trial_count < 10000) {
- all_exit = true;
-
- for (j = 0; j < fts_sort_pll_degree;
- j++) {
- if (psort_info[j].child_status
- != FTS_CHILD_EXITING) {
- all_exit = false;
- os_thread_sleep(1000);
- break;
- }
- }
- trial_count++;
- }
-
- if (!all_exit) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Not all child sort threads exited"
- " when creating FTS index '%s'",
- indexes[i]->name);
- }
-
fts_parallel_merge_event
= merge_info[0].psort_common->merge_event;
if (FTS_PLL_MERGE) {
- trial_count = 0;
- all_exit = false;
+ ulint trial_count = 0;
+ bool all_exit = false;
+
os_event_reset(fts_parallel_merge_event);
row_fts_start_parallel_merge(merge_info);
wait_again:
@@ -3763,5 +3656,5 @@ func_exit:
}
}
- return(error);
+ DBUG_RETURN(error);
}
diff --git a/storage/innobase/row/row0mysql.cc b/storage/innobase/row/row0mysql.cc
index 808bd0aaeb5..9aceb305493 100644
--- a/storage/innobase/row/row0mysql.cc
+++ b/storage/innobase/row/row0mysql.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2000, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2000, 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -62,6 +62,7 @@ Created 9/17/2000 Heikki Tuuri
#include "row0import.h"
#include "m_string.h"
#include "my_sys.h"
+#include "ha_prototypes.h"
/** Provide optional 4.x backwards compatibility for 5.0 and above */
UNIV_INTERN ibool row_rollback_on_timeout = FALSE;
@@ -618,8 +619,8 @@ handle_new_error:
case DB_INTERRUPTED:
case DB_DICT_CHANGED:
if (savept) {
- /* Roll back the latest, possibly incomplete
- insertion or update */
+ /* Roll back the latest, possibly incomplete insertion
+ or update */
trx_rollback_to_savepoint(trx, savept);
}
@@ -2521,7 +2522,8 @@ row_table_add_foreign_constraints(
if (err == DB_SUCCESS) {
/* Check that also referencing constraints are ok */
- err = dict_load_foreigns(name, FALSE, TRUE);
+ err = dict_load_foreigns(name, NULL, false, true,
+ DICT_ERR_IGNORE_NONE);
}
if (err != DB_SUCCESS) {
@@ -2801,7 +2803,7 @@ row_discard_tablespace_begin(
name, TRUE, FALSE, DICT_ERR_IGNORE_NONE);
if (table) {
- dict_stats_wait_bg_to_stop_using_tables(table, NULL, trx);
+ dict_stats_wait_bg_to_stop_using_table(table, trx);
ut_a(table->space != TRX_SYS_SPACE);
ut_a(table->n_foreign_key_checks_running == 0);
}
@@ -2874,13 +2876,13 @@ row_discard_tablespace_end(
}
DBUG_EXECUTE_IF("ib_discard_before_commit_crash",
- log_make_checkpoint_at(IB_ULONGLONG_MAX, TRUE);
+ log_make_checkpoint_at(LSN_MAX, TRUE);
DBUG_SUICIDE(););
trx_commit_for_mysql(trx);
DBUG_EXECUTE_IF("ib_discard_after_commit_crash",
- log_make_checkpoint_at(IB_ULONGLONG_MAX, TRUE);
+ log_make_checkpoint_at(LSN_MAX, TRUE);
DBUG_SUICIDE(););
row_mysql_unlock_data_dictionary(trx);
@@ -3246,7 +3248,7 @@ row_truncate_table_for_mysql(
ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
#endif /* UNIV_SYNC_DEBUG */
- dict_stats_wait_bg_to_stop_using_tables(table, NULL, trx);
+ dict_stats_wait_bg_to_stop_using_table(table, trx);
/* Check if the table is referenced by foreign key constraints from
some other table (not the table itself) */
@@ -3796,8 +3798,8 @@ row_drop_table_for_mysql(
tables since we know temp tables do not use persistent
stats. */
if (!dict_table_is_temporary(table)) {
- dict_stats_wait_bg_to_stop_using_tables(
- table, NULL, trx);
+ dict_stats_wait_bg_to_stop_using_table(
+ table, trx);
}
}
@@ -4167,6 +4169,11 @@ check_next_foreign:
DICT_TF2_FTS flag set. So keep this out of above
dict_table_has_fts_index condition */
if (table->fts) {
+ /* Need to set TABLE_DICT_LOCKED bit, since
+ fts_que_graph_free_check_lock would try to acquire
+ dict mutex lock */
+ table->fts->fts_status |= TABLE_DICT_LOCKED;
+
fts_free(table);
}
@@ -4503,14 +4510,31 @@ loop:
}
- if (row_is_mysql_tmp_table_name(table->name)) {
- /* There could be an orphan temp table left from
- interupted alter table rebuild operation */
- dict_table_close(table, TRUE, FALSE);
- } else {
- ut_a(!table->can_be_evicted || table->ibd_file_missing);
+ if (!row_is_mysql_tmp_table_name(table->name)) {
+ /* There could be orphan temp tables left from
+ interrupted alter table. Leave them, and handle
+ the rest.*/
+ if (table->can_be_evicted) {
+ ib_logf(IB_LOG_LEVEL_WARN,
+ "Orphan table encountered during "
+ "DROP DATABASE. This is possible if "
+ "'%s.frm' was lost.", table->name);
+ }
+
+ if (table->ibd_file_missing) {
+ ib_logf(IB_LOG_LEVEL_WARN,
+ "Missing %s.ibd file for table %s.",
+ table->name, table->name);
+ }
}
+ dict_table_close(table, TRUE, FALSE);
+
+ /* The dict_table_t object must not be accessed before
+ dict_table_open() or after dict_table_close(). But this is OK
+ if we are holding, the dict_sys->mutex. */
+ ut_ad(mutex_own(&dict_sys->mutex));
+
/* Wait until MySQL does not have any queries running on
the table */
@@ -4668,6 +4692,7 @@ row_rename_table_for_mysql(
ut_a(old_name != NULL);
ut_a(new_name != NULL);
+ ut_ad(trx->state == TRX_STATE_ACTIVE);
if (srv_created_new_raw || srv_force_recovery) {
fputs("InnoDB: A new raw disk partition was initialized or\n"
@@ -4692,7 +4717,6 @@ row_rename_table_for_mysql(
}
trx->op_info = "renaming table";
- trx_start_if_not_started_xa(trx);
old_is_tmp = row_is_mysql_tmp_table_name(old_name);
new_is_tmp = row_is_mysql_tmp_table_name(new_name);
@@ -4945,6 +4969,24 @@ row_rename_table_for_mysql(
}
}
+ if (dict_table_has_fts_index(table)
+ && !dict_tables_have_same_db(old_name, new_name)) {
+ err = fts_rename_aux_tables(table, new_name, trx);
+
+ if (err != DB_SUCCESS && (table->space != 0)) {
+ char* orig_name = table->name;
+
+ /* If rename fails and table has its own tablespace,
+ we need to call fts_rename_aux_tables again to
+ revert the ibd file rename, which is not under the
+ control of trx. Also notice the parent table name
+ in cache is not changed yet. */
+ table->name = const_cast<char*>(new_name);
+ fts_rename_aux_tables(table, old_name, trx);
+ table->name = orig_name;
+ }
+ }
+
end:
if (err != DB_SUCCESS) {
if (err == DB_DUPLICATE_KEY) {
@@ -5003,7 +5045,9 @@ end:
an ALTER, not in a RENAME. */
err = dict_load_foreigns(
- new_name, FALSE, !old_is_tmp || trx->check_foreigns);
+ new_name, NULL,
+ false, !old_is_tmp || trx->check_foreigns,
+ DICT_ERR_IGNORE_NONE);
if (err != DB_SUCCESS) {
ut_print_timestamp(stderr);
@@ -5052,7 +5096,6 @@ end:
}
funct_exit:
-
if (table != NULL) {
dict_table_close(table, dict_locked, FALSE);
}
@@ -5182,6 +5225,7 @@ func_exit:
dtuple_get_nth_field(prev_entry, i))) {
contains_null = TRUE;
+ break;
}
}
diff --git a/storage/innobase/row/row0purge.cc b/storage/innobase/row/row0purge.cc
index ee603be453a..1b836c26c25 100644
--- a/storage/innobase/row/row0purge.cc
+++ b/storage/innobase/row/row0purge.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1997, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1997, 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -112,28 +112,19 @@ row_purge_reposition_pcur(
return(node->found_clust);
}
-/** Status of row_purge_remove_clust() */
-enum row_purge_status {
- ROW_PURGE_DONE, /*!< The row has been removed. */
- ROW_PURGE_FAIL, /*!< The purge was not successful. */
- ROW_PURGE_SUSPEND/*!< Cannot purge now, due to online rebuild. */
-};
-
/***********************************************************//**
Removes a delete marked clustered index record if possible.
-@retval ROW_PURGE_DONE if the row was not found, or it was successfully removed
-@retval ROW_PURGE_FAIL if the row was modified after the delete marking
-@retval ROW_PURGE_SUSPEND if the row refers to an off-page column and
-an online ALTER TABLE (table rebuild) is in progress. */
+@retval true if the row was not found, or it was successfully removed
+@retval false if the row was modified after the delete marking */
static __attribute__((nonnull, warn_unused_result))
-enum row_purge_status
+bool
row_purge_remove_clust_if_poss_low(
/*===============================*/
purge_node_t* node, /*!< in/out: row purge node */
ulint mode) /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE */
{
dict_index_t* index;
- enum row_purge_status status = ROW_PURGE_DONE;
+ bool success = true;
mtr_t mtr;
rec_t* rec;
mem_heap_t* heap = NULL;
@@ -165,16 +156,9 @@ row_purge_remove_clust_if_poss_low(
goto func_exit;
}
- if (dict_index_get_online_status(index) == ONLINE_INDEX_CREATION
- && rec_offs_any_extern(offsets)) {
- status = ROW_PURGE_SUSPEND;
- goto func_exit;
- }
-
if (mode == BTR_MODIFY_LEAF) {
- status = btr_cur_optimistic_delete(
- btr_pcur_get_btr_cur(&node->pcur), 0, &mtr)
- ? ROW_PURGE_DONE : ROW_PURGE_FAIL;
+ success = btr_cur_optimistic_delete(
+ btr_pcur_get_btr_cur(&node->pcur), 0, &mtr);
} else {
dberr_t err;
ut_ad(mode == BTR_MODIFY_TREE);
@@ -186,7 +170,7 @@ row_purge_remove_clust_if_poss_low(
case DB_SUCCESS:
break;
case DB_OUT_OF_FILE_SPACE:
- status = ROW_PURGE_FAIL;
+ success = false;
break;
default:
ut_error;
@@ -200,43 +184,34 @@ func_exit:
btr_pcur_commit_specify_mtr(&node->pcur, &mtr);
- return(status);
+ return(success);
}
/***********************************************************//**
Removes a clustered index record if it has not been modified after the delete
marking.
@retval true if the row was not found, or it was successfully removed
-@retval false the purge needs to be suspended, either because of
-running out of file space or because the row refers to an off-page
-column and an online ALTER TABLE (table rebuild) is in progress. */
+@retval false the purge needs to be suspended because of running out
+of file space. */
static __attribute__((nonnull, warn_unused_result))
bool
row_purge_remove_clust_if_poss(
/*===========================*/
purge_node_t* node) /*!< in/out: row purge node */
{
- switch (row_purge_remove_clust_if_poss_low(node, BTR_MODIFY_LEAF)) {
- case ROW_PURGE_DONE:
+ if (row_purge_remove_clust_if_poss_low(node, BTR_MODIFY_LEAF)) {
return(true);
- case ROW_PURGE_SUSPEND:
- return(false);
- case ROW_PURGE_FAIL:
- break;
}
for (ulint n_tries = 0;
n_tries < BTR_CUR_RETRY_DELETE_N_TIMES;
n_tries++) {
- switch (row_purge_remove_clust_if_poss_low(
- node, BTR_MODIFY_TREE)) {
- case ROW_PURGE_DONE:
+ if (row_purge_remove_clust_if_poss_low(
+ node, BTR_MODIFY_TREE)) {
return(true);
- case ROW_PURGE_SUSPEND:
- return(false);
- case ROW_PURGE_FAIL:
- os_thread_sleep(BTR_CUR_RETRY_SLEEP_TIME);
}
+
+ os_thread_sleep(BTR_CUR_RETRY_SLEEP_TIME);
}
return(false);
@@ -529,9 +504,8 @@ retry:
/***********************************************************//**
Purges a delete marking of a record.
@retval true if the row was not found, or it was successfully removed
-@retval false the purge needs to be suspended, either because of
-running out of file space or because the row refers to an off-page
-column and an online ALTER TABLE (table rebuild) is in progress. */
+@retval false the purge needs to be suspended because of
+running out of file space */
static __attribute__((nonnull, warn_unused_result))
bool
row_purge_del_mark(
@@ -567,10 +541,9 @@ row_purge_del_mark(
/***********************************************************//**
Purges an update of an existing record. Also purges an update of a delete
-marked record if that record contained an externally stored field.
-@return true if purged, false if skipped */
-static __attribute__((nonnull, warn_unused_result))
-bool
+marked record if that record contained an externally stored field. */
+static
+void
row_purge_upd_exist_or_extern_func(
/*===============================*/
#ifdef UNIV_DEBUG
@@ -585,20 +558,6 @@ row_purge_upd_exist_or_extern_func(
ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_SHARED));
#endif /* UNIV_SYNC_DEBUG */
- if (dict_index_get_online_status(dict_table_get_first_index(
- node->table))
- == ONLINE_INDEX_CREATION) {
- for (ulint i = 0; i < upd_get_n_fields(node->update); i++) {
-
- const upd_field_t* ufield
- = upd_get_nth_field(node->update, i);
-
- if (dfield_is_ext(&ufield->new_val)) {
- return(false);
- }
- }
- }
-
if (node->rec_type == TRX_UNDO_UPD_DEL_REC
|| (node->cmpl_info & UPD_NODE_NO_ORD_CHANGE)) {
@@ -675,16 +634,7 @@ skip_secondaries:
index = dict_table_get_first_index(node->table);
mtr_x_lock(dict_index_get_lock(index), &mtr);
-#ifdef UNIV_DEBUG
- switch (dict_index_get_online_status(index)) {
- case ONLINE_INDEX_CREATION:
- case ONLINE_INDEX_ABORTED_DROPPED:
- ut_ad(0);
- case ONLINE_INDEX_COMPLETE:
- case ONLINE_INDEX_ABORTED:
- break;
- }
-#endif /* UNIV_DEBUG */
+
/* NOTE: we must also acquire an X-latch to the
root page of the tree. We will need it when we
free pages from the tree. If the tree is of height 1,
@@ -714,8 +664,6 @@ skip_secondaries:
mtr_commit(&mtr);
}
}
-
- return(true);
}
#ifdef UNIV_DEBUG
@@ -771,7 +719,8 @@ row_purge_parse_undo_rec(
rw_lock_s_lock_inline(&dict_operation_lock, 0, __FILE__, __LINE__);
- node->table = dict_table_open_on_id(table_id, FALSE, FALSE);
+ node->table = dict_table_open_on_id(
+ table_id, FALSE, DICT_TABLE_OP_NORMAL);
if (node->table == NULL) {
/* The table has been dropped: no need to do purge */
@@ -866,10 +815,7 @@ row_purge_record_func(
}
/* fall through */
case TRX_UNDO_UPD_EXIST_REC:
- purged = row_purge_upd_exist_or_extern(thr, node, undo_rec);
- if (!purged) {
- break;
- }
+ row_purge_upd_exist_or_extern(thr, node, undo_rec);
MONITOR_INC(MONITOR_N_UPD_EXIST_EXTERN);
break;
}
diff --git a/storage/innobase/row/row0quiesce.cc b/storage/innobase/row/row0quiesce.cc
index 72e0bf43d77..79cced1c533 100644
--- a/storage/innobase/row/row0quiesce.cc
+++ b/storage/innobase/row/row0quiesce.cc
@@ -532,10 +532,11 @@ row_quiesce_table_start(
ut_a(table->id > 0);
- ulint count = 0;
-
- while (ibuf_contract_in_background(table->id, TRUE) != 0) {
- if (!(++count % 20)) {
+ for (ulint count = 0;
+ ibuf_contract_in_background(table->id, TRUE) != 0
+ && !trx_is_interrupted(trx);
+ ++count) {
+ if (!(count % 20)) {
ib_logf(IB_LOG_LEVEL_INFO,
"Merging change buffer entries for '%s'",
table_name);
@@ -610,7 +611,7 @@ row_quiesce_table_complete(
srv_get_meta_data_filename(table, cfg_name, sizeof(cfg_name));
- os_file_delete_if_exists(cfg_name);
+ os_file_delete_if_exists(innodb_file_data_key, cfg_name);
ib_logf(IB_LOG_LEVEL_INFO,
"Deleting the meta-data file '%s'", cfg_name);
diff --git a/storage/innobase/row/row0sel.cc b/storage/innobase/row/row0sel.cc
index cd98d13082b..690c6e958fe 100644
--- a/storage/innobase/row/row0sel.cc
+++ b/storage/innobase/row/row0sel.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1997, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1997, 2013, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2008, Google Inc.
Portions of this file contain modifications contributed and copyrighted by
@@ -57,6 +57,8 @@ Created 12/19/1997 Heikki Tuuri
#include "read0read.h"
#include "buf0lru.h"
#include "ha_prototypes.h"
+#include "m_string.h" /* for my_sys.h */
+#include "my_sys.h" /* DEBUG_SYNC_C */
#include "my_compare.h" /* enum icp_result */
@@ -2957,9 +2959,7 @@ row_sel_store_mysql_rec(
&& dict_index_is_clust(index)) {
prebuilt->fts_doc_id = fts_get_doc_id_from_rec(
- prebuilt->table,
- rec,
- prebuilt->heap);
+ prebuilt->table, rec, NULL);
}
return(TRUE);
@@ -4154,7 +4154,9 @@ wait_table_again:
}
rec_loop:
+ DEBUG_SYNC_C("row_search_rec_loop");
if (trx_is_interrupted(trx)) {
+ btr_pcur_store_position(pcur, &mtr);
err = DB_INTERRUPTED;
goto normal_return;
}
@@ -5333,7 +5335,7 @@ row_search_max_autoinc(
btr_pcur_open_at_index_side(
false, index, BTR_SEARCH_LEAF, &pcur, true, 0, &mtr);
- if (page_get_n_recs(btr_pcur_get_page(&pcur)) > 0) {
+ if (!page_is_empty(btr_pcur_get_page(&pcur))) {
const rec_t* rec;
rec = row_search_autoinc_get_rec(&pcur, &mtr);
diff --git a/storage/innobase/row/row0uins.cc b/storage/innobase/row/row0uins.cc
index 27881c1f4c3..7b50d8b62ae 100644
--- a/storage/innobase/row/row0uins.cc
+++ b/storage/innobase/row/row0uins.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1997, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1997, 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -79,12 +79,11 @@ row_undo_ins_remove_clust_rec(
mtr_start(&mtr);
- /* This is similar to row_undo_mod_clust(). Even though we
- call row_log_table_rollback() elsewhere, the DDL thread may
- already have copied this row to the sort buffers or to the new
- table. We must log the removal, so that the row will be
- correctly purged. However, we can log the removal out of sync
- with the B-tree modification. */
+ /* This is similar to row_undo_mod_clust(). The DDL thread may
+ already have copied this row from the log to the new table.
+ We must log the removal, so that the row will be correctly
+ purged. However, we can log the removal out of sync with the
+ B-tree modification. */
online = dict_index_is_online_ddl(index);
if (online) {
@@ -111,9 +110,7 @@ row_undo_ins_remove_clust_rec(
const ulint* offsets = rec_get_offsets(
rec, index, NULL, ULINT_UNDEFINED, &heap);
row_log_table_delete(
- rec, index, offsets,
- trx_read_trx_id(row_get_trx_id_offset(index, offsets)
- + rec));
+ rec, index, offsets, true, node->trx->id);
mem_heap_free(heap);
}
@@ -319,7 +316,8 @@ row_undo_ins_parse_undo_rec(
node->rec_type = type;
node->update = NULL;
- node->table = dict_table_open_on_id(table_id, dict_locked, FALSE);
+ node->table = dict_table_open_on_id(
+ table_id, dict_locked, DICT_TABLE_OP_NORMAL);
/* Skip the UNDO if we can't find the table or the .ibd file. */
if (UNIV_UNLIKELY(node->table == NULL)) {
@@ -441,14 +439,6 @@ row_undo_ins(
node->index = dict_table_get_first_index(node->table);
ut_ad(dict_index_is_clust(node->index));
-
- if (dict_index_is_online_ddl(node->index)) {
- /* Note that we are rolling back this transaction, so
- that all inserts and updates with this DB_TRX_ID can
- be skipped. */
- row_log_table_rollback(node->index, node->trx->id);
- }
-
/* Skip the clustered index (the first index) */
node->index = dict_table_get_next_index(node->index);
diff --git a/storage/innobase/row/row0umod.cc b/storage/innobase/row/row0umod.cc
index 2fd8a11b35a..efcd63a4d29 100644
--- a/storage/innobase/row/row0umod.cc
+++ b/storage/innobase/row/row0umod.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1997, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1997, 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -208,6 +208,36 @@ row_undo_mod_remove_clust_low(
return(DB_SUCCESS);
}
+ trx_id_offset = btr_cur_get_index(btr_cur)->trx_id_offset;
+
+ if (!trx_id_offset) {
+ mem_heap_t* heap = NULL;
+ ulint trx_id_col;
+ const ulint* offsets;
+ ulint len;
+
+ trx_id_col = dict_index_get_sys_col_pos(
+ btr_cur_get_index(btr_cur), DATA_TRX_ID);
+ ut_ad(trx_id_col > 0);
+ ut_ad(trx_id_col != ULINT_UNDEFINED);
+
+ offsets = rec_get_offsets(
+ btr_cur_get_rec(btr_cur), btr_cur_get_index(btr_cur),
+ NULL, trx_id_col + 1, &heap);
+
+ trx_id_offset = rec_get_nth_field_offs(
+ offsets, trx_id_col, &len);
+ ut_ad(len == DATA_TRX_ID_LEN);
+ mem_heap_free(heap);
+ }
+
+ if (trx_read_trx_id(btr_cur_get_rec(btr_cur) + trx_id_offset)
+ != node->new_trx_id) {
+ /* The record must have been purged and then replaced
+ with a different one. */
+ return(DB_SUCCESS);
+ }
+
/* We are about to remove an old, delete-marked version of the
record that may have been delete-marked by a different transaction
than the rolling-back one. */
@@ -323,7 +353,7 @@ row_undo_mod_clust(
case TRX_UNDO_UPD_DEL_REC:
row_log_table_delete(
btr_pcur_get_rec(pcur), index, offsets,
- node->trx->id);
+ true, node->trx->id);
break;
default:
ut_ad(0);
@@ -331,6 +361,9 @@ row_undo_mod_clust(
}
}
+ ut_ad(rec_get_trx_id(btr_pcur_get_rec(pcur), index)
+ == node->new_trx_id);
+
btr_pcur_commit_specify_mtr(pcur, &mtr);
if (err == DB_SUCCESS && node->rec_type == TRX_UNDO_UPD_DEL_REC) {
@@ -1044,7 +1077,8 @@ row_undo_mod_parse_undo_rec(
&dummy_extern, &undo_no, &table_id);
node->rec_type = type;
- node->table = dict_table_open_on_id(table_id, dict_locked, FALSE);
+ node->table = dict_table_open_on_id(
+ table_id, dict_locked, DICT_TABLE_OP_NORMAL);
/* TODO: other fixes associated with DROP TABLE + rollback in the
same table by another user */
@@ -1119,14 +1153,6 @@ row_undo_mod(
node->index = dict_table_get_first_index(node->table);
ut_ad(dict_index_is_clust(node->index));
-
- if (dict_index_is_online_ddl(node->index)) {
- /* Note that we are rolling back this transaction, so
- that all inserts and updates with this DB_TRX_ID can
- be skipped. */
- row_log_table_rollback(node->index, node->trx->id);
- }
-
/* Skip the clustered index (the first index) */
node->index = dict_table_get_next_index(node->index);
diff --git a/storage/innobase/row/row0upd.cc b/storage/innobase/row/row0upd.cc
index f97c0c3c82b..ccb905b36f4 100644
--- a/storage/innobase/row/row0upd.cc
+++ b/storage/innobase/row/row0upd.cc
@@ -2437,6 +2437,10 @@ row_upd_clust_step(
}
}
+ ut_ad(lock_trx_has_rec_x_lock(thr_get_trx(thr), index->table,
+ btr_pcur_get_block(pcur),
+ page_rec_get_heap_no(rec)));
+
/* NOTE: the following function calls will also commit mtr */
if (node->is_delete) {