diff options
Diffstat (limited to 'storage/innobase/row')
-rw-r--r-- | storage/innobase/row/row0ext.cc | 32 | ||||
-rw-r--r-- | storage/innobase/row/row0ftsort.cc | 4 | ||||
-rw-r--r-- | storage/innobase/row/row0import.cc | 6 | ||||
-rw-r--r-- | storage/innobase/row/row0ins.cc | 58 | ||||
-rw-r--r-- | storage/innobase/row/row0log.cc | 529 | ||||
-rw-r--r-- | storage/innobase/row/row0merge.cc | 383 | ||||
-rw-r--r-- | storage/innobase/row/row0mysql.cc | 82 | ||||
-rw-r--r-- | storage/innobase/row/row0purge.cc | 104 | ||||
-rw-r--r-- | storage/innobase/row/row0quiesce.cc | 11 | ||||
-rw-r--r-- | storage/innobase/row/row0sel.cc | 12 | ||||
-rw-r--r-- | storage/innobase/row/row0uins.cc | 28 | ||||
-rw-r--r-- | storage/innobase/row/row0umod.cc | 48 | ||||
-rw-r--r-- | storage/innobase/row/row0upd.cc | 4 |
13 files changed, 715 insertions, 586 deletions
diff --git a/storage/innobase/row/row0ext.cc b/storage/innobase/row/row0ext.cc index f084fa09c5a..32b78391d6a 100644 --- a/storage/innobase/row/row0ext.cc +++ b/storage/innobase/row/row0ext.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2006, 2011, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2006, 2013, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -58,14 +58,28 @@ row_ext_cache_fill( /* The BLOB pointer is not set: we cannot fetch it */ ext->len[i] = 0; } else { - /* Fetch at most ext->max_len of the column. - The column should be non-empty. However, - trx_rollback_or_clean_all_recovered() may try to - access a half-deleted BLOB if the server previously - crashed during the execution of - btr_free_externally_stored_field(). */ - ext->len[i] = btr_copy_externally_stored_field_prefix( - buf, ext->max_len, zip_size, field, f_len); + if (ext->max_len == REC_VERSION_56_MAX_INDEX_COL_LEN + && f_len > BTR_EXTERN_FIELD_REF_SIZE) { + /* In this case, the field is in B format or beyond, + (refer to the definition of row_ext_t.max_len) + and the field is already fill with prefix, otherwise + f_len would be BTR_EXTERN_FIELD_REF_SIZE. + So there is no need to re-read the prefix externally, + but just copy the local prefix to buf. Please note + if the ext->len[i] is zero, it means an error + as above. */ + memcpy(buf, field, f_len - BTR_EXTERN_FIELD_REF_SIZE); + ext->len[i] = f_len - BTR_EXTERN_FIELD_REF_SIZE; + } else { + /* Fetch at most ext->max_len of the column. + The column should be non-empty. However, + trx_rollback_or_clean_all_recovered() may try to + access a half-deleted BLOB if the server previously + crashed during the execution of + btr_free_externally_stored_field(). */ + ext->len[i] = btr_copy_externally_stored_field_prefix( + buf, ext->max_len, zip_size, field, f_len); + } } } diff --git a/storage/innobase/row/row0ftsort.cc b/storage/innobase/row/row0ftsort.cc index 9a6af50e09d..275fedbfb5d 100644 --- a/storage/innobase/row/row0ftsort.cc +++ b/storage/innobase/row/row0ftsort.cc @@ -96,7 +96,7 @@ row_merge_create_fts_sort_index( field->prefix_len = 0; field->col = static_cast<dict_col_t*>( mem_heap_alloc(new_index->heap, sizeof(dict_col_t))); - field->col->len = fts_max_token_size; + field->col->len = FTS_MAX_WORD_LEN; if (strcmp(charset->name, "latin1_swedish_ci") == 0) { field->col->mtype = DATA_VARCHAR; @@ -450,7 +450,7 @@ row_merge_fts_doc_tokenize( field->type.prtype = word_dtype->prtype | DATA_NOT_NULL; /* Variable length field, set to max size. */ - field->type.len = fts_max_token_size; + field->type.len = FTS_MAX_WORD_LEN; field->type.mbminmaxlen = word_dtype->mbminmaxlen; cur_len += len; diff --git a/storage/innobase/row/row0import.cc b/storage/innobase/row/row0import.cc index f5eb31191a5..b753574158a 100644 --- a/storage/innobase/row/row0import.cc +++ b/storage/innobase/row/row0import.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2012, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2012, 2013, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -1944,7 +1944,7 @@ PageConverter::update_index_page( page_set_max_trx_id(block, m_page_zip_ptr, m_trx->id, 0); - if (page_get_n_recs(block->frame) == 0) { + if (page_is_empty(block->frame)) { /* Only a root page can be empty. */ if (!is_root_page(block->frame)) { @@ -2269,7 +2269,7 @@ row_import_cleanup( DBUG_EXECUTE_IF("ib_import_before_checkpoint_crash", DBUG_SUICIDE();); - log_make_checkpoint_at(IB_ULONGLONG_MAX, TRUE); + log_make_checkpoint_at(LSN_MAX, TRUE); return(err); } diff --git a/storage/innobase/row/row0ins.cc b/storage/innobase/row/row0ins.cc index c1c27152831..49fb374e2aa 100644 --- a/storage/innobase/row/row0ins.cc +++ b/storage/innobase/row/row0ins.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -1705,16 +1705,22 @@ do_possible_lock_wait: /* We had temporarily released dict_operation_lock in above lock sleep wait, now we have the lock again, and we will need to re-check whether the foreign key has been - dropped */ - for (const dict_foreign_t* check_foreign = UT_LIST_GET_FIRST( - table->referenced_list); - check_foreign; - check_foreign = UT_LIST_GET_NEXT( - referenced_list, check_foreign)) { - if (check_foreign == foreign) { - verified = true; - break; + dropped. We only need to verify if the table is referenced + table case (check_ref == 0), since MDL lock will prevent + concurrent DDL and DML on the same table */ + if (!check_ref) { + for (const dict_foreign_t* check_foreign + = UT_LIST_GET_FIRST( table->referenced_list); + check_foreign; + check_foreign = UT_LIST_GET_NEXT( + referenced_list, check_foreign)) { + if (check_foreign == foreign) { + verified = true; + break; + } } + } else { + verified = true; } if (!verified) { @@ -1938,6 +1944,7 @@ row_ins_scan_sec_index_for_duplicate( do { const rec_t* rec = btr_pcur_get_rec(&pcur); const buf_block_t* block = btr_pcur_get_block(&pcur); + ulint lock_type; if (page_rec_is_infimum(rec)) { @@ -1947,6 +1954,16 @@ row_ins_scan_sec_index_for_duplicate( offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &offsets_heap); + /* If the transaction isolation level is no stronger than + READ COMMITTED, then avoid gap locks. */ + if (!page_rec_is_supremum(rec) + && thr_get_trx(thr)->isolation_level + <= TRX_ISO_READ_COMMITTED) { + lock_type = LOCK_REC_NOT_GAP; + } else { + lock_type = LOCK_ORDINARY; + } + if (flags & BTR_NO_LOCKING_FLAG) { /* Set no locks when applying log in online table rebuild. */ @@ -1958,13 +1975,11 @@ row_ins_scan_sec_index_for_duplicate( INSERT ON DUPLICATE KEY UPDATE). */ err = row_ins_set_exclusive_rec_lock( - LOCK_ORDINARY, block, - rec, index, offsets, thr); + lock_type, block, rec, index, offsets, thr); } else { err = row_ins_set_shared_rec_lock( - LOCK_ORDINARY, block, - rec, index, offsets, thr); + lock_type, block, rec, index, offsets, thr); } switch (err) { @@ -1990,6 +2005,19 @@ row_ins_scan_sec_index_for_duplicate( thr_get_trx(thr)->error_info = index; + /* If the duplicate is on hidden FTS_DOC_ID, + state so in the error log */ + if (DICT_TF2_FLAG_IS_SET( + index->table, + DICT_TF2_FTS_HAS_DOC_ID) + && strcmp(index->name, + FTS_DOC_ID_INDEX_NAME) == 0) { + ib_logf(IB_LOG_LEVEL_ERROR, + "Duplicate FTS_DOC_ID value" + " on table %s", + index->table->name); + } + goto end_scan; } } else { @@ -2484,7 +2512,7 @@ err_exit: DBUG_EXECUTE_IF( "row_ins_extern_checkpoint", log_make_checkpoint_at( - IB_ULONGLONG_MAX, TRUE);); + LSN_MAX, TRUE);); err = row_ins_index_entry_big_rec( entry, big_rec, offsets, &offsets_heap, index, thr_get_trx(thr)->mysql_thd, diff --git a/storage/innobase/row/row0log.cc b/storage/innobase/row/row0log.cc index 01270300924..170358147b1 100644 --- a/storage/innobase/row/row0log.cc +++ b/storage/innobase/row/row0log.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2011, 2012, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2011, 2013, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -38,7 +38,7 @@ Created 2011-05-26 Marko Makela #include "que0que.h" #include "handler0alter.h" -#include<set> +#include<map> /** Table row modification operations during online table rebuild. Delete-marked records are not copied to the rebuilt table. */ @@ -72,18 +72,86 @@ static bool row_log_apply_print; /** Size of the modification log entry header, in bytes */ #define ROW_LOG_HEADER_SIZE 2/*op, extra_size*/ -/** Log block for modifications during online index creation */ +/** Log block for modifications during online ALTER TABLE */ struct row_log_buf_t { byte* block; /*!< file block buffer */ mrec_buf_t buf; /*!< buffer for accessing a record that spans two blocks */ ulint blocks; /*!< current position in blocks */ ulint bytes; /*!< current position within buf */ + ulonglong total; /*!< logical position, in bytes from + the start of the row_log_table log; + 0 for row_log_online_op() and + row_log_apply(). */ }; -/** Set of transactions that rolled back inserts of BLOBs during -online table rebuild */ -typedef std::set<trx_id_t> trx_id_set; +/** Tracks BLOB allocation during online ALTER TABLE */ +class row_log_table_blob_t { +public: + /** Constructor (declaring a BLOB freed) + @param offset_arg row_log_t::tail::total */ +#ifdef UNIV_DEBUG + row_log_table_blob_t(ulonglong offset_arg) : + old_offset (0), free_offset (offset_arg), + offset (BLOB_FREED) {} +#else /* UNIV_DEBUG */ + row_log_table_blob_t() : + offset (BLOB_FREED) {} +#endif /* UNIV_DEBUG */ + + /** Declare a BLOB freed again. + @param offset_arg row_log_t::tail::total */ +#ifdef UNIV_DEBUG + void blob_free(ulonglong offset_arg) +#else /* UNIV_DEBUG */ + void blob_free() +#endif /* UNIV_DEBUG */ + { + ut_ad(offset < offset_arg); + ut_ad(offset != BLOB_FREED); + ut_d(old_offset = offset); + ut_d(free_offset = offset_arg); + offset = BLOB_FREED; + } + /** Declare a freed BLOB reused. + @param offset_arg row_log_t::tail::total */ + void blob_alloc(ulonglong offset_arg) { + ut_ad(free_offset <= offset_arg); + ut_d(old_offset = offset); + offset = offset_arg; + } + /** Determine if a BLOB was freed at a given log position + @param offset_arg row_log_t::head::total after the log record + @return true if freed */ + bool is_freed(ulonglong offset_arg) const { + /* This is supposed to be the offset at the end of the + current log record. */ + ut_ad(offset_arg > 0); + /* We should never get anywhere close the magic value. */ + ut_ad(offset_arg < BLOB_FREED); + return(offset_arg < offset); + } +private: + /** Magic value for a freed BLOB */ + static const ulonglong BLOB_FREED = ~0ULL; +#ifdef UNIV_DEBUG + /** Old offset, in case a page was freed, reused, freed, ... */ + ulonglong old_offset; + /** Offset of last blob_free() */ + ulonglong free_offset; +#endif /* UNIV_DEBUG */ + /** Byte offset to the log file */ + ulonglong offset; +}; + +/** @brief Map of off-page column page numbers to 0 or log byte offsets. + +If there is no mapping for a page number, it is safe to access. +If a page number maps to 0, it is an off-page column that has been freed. +If a page number maps to a nonzero number, the number is a byte offset +into the index->online_log, indicating that the page is safe to access +when applying log records starting from that offset. */ +typedef std::map<ulint, row_log_table_blob_t> page_no_map; /** @brief Buffer for logging modifications during online index creation @@ -99,11 +167,12 @@ directly. When also head.bytes == tail.bytes, both counts will be reset to 0 and the file will be truncated. */ struct row_log_t { int fd; /*!< file descriptor */ - ib_mutex_t mutex; /*!< mutex protecting trx_log, error, + ib_mutex_t mutex; /*!< mutex protecting error, max_trx and tail */ - trx_id_set* trx_rb; /*!< set of transactions that rolled back - inserts of BLOBs during online table rebuild; - protected by mutex */ + page_no_map* blobs; /*!< map of page numbers of off-page columns + that have been freed during table-rebuilding + ALTER TABLE (row_log_table_*); protected by + index->lock X-latch only */ dict_table_t* table; /*!< table that is being rebuilt, or NULL when this is a secondary index that is being created online */ @@ -347,6 +416,7 @@ write_failed: ut_ad(b == log->tail.block + log->tail.bytes); } + log->tail.total += size; UNIV_MEM_INVALID(log->tail.buf, sizeof log->tail.buf); mutex_exit(&log->mutex); } @@ -371,6 +441,7 @@ row_log_table_delete( dict_index_t* index, /*!< in/out: clustered index, S-latched or X-latched */ const ulint* offsets,/*!< in: rec_get_offsets(rec,index) */ + bool purge, /*!< in: true=purging BLOBs */ trx_id_t trx_id) /*!< in: DB_TRX_ID of the record before it was deleted */ { @@ -460,6 +531,7 @@ row_log_table_delete( ut_ad(dfield_get_type(dfield)->prtype == (DATA_NOT_NULL | DATA_TRX_ID)); ut_ad(dfield_get_len(dfield) == DATA_TRX_ID_LEN); + dfield_dup(dfield, heap); trx_write_trx_id(static_cast<byte*>(dfield->data), trx_id); } @@ -473,27 +545,25 @@ row_log_table_delete( mrec_size = 4 + old_pk_size; - /* If the row is marked as rollback, we will need to - log the enough prefix of the BLOB unless both the - old and new table are in COMPACT or REDUNDANT format */ - if ((dict_table_get_format(index->table) >= UNIV_FORMAT_B - || dict_table_get_format(new_table) >= UNIV_FORMAT_B) - && row_log_table_is_rollback(index, trx_id)) { - if (rec_offs_any_extern(offsets)) { - /* Build a cache of those off-page column - prefixes that are referenced by secondary - indexes. It can be that none of the off-page - columns are needed. */ - row_build(ROW_COPY_DATA, index, rec, - offsets, NULL, NULL, NULL, &ext, heap); - if (ext) { - /* Log the row_ext_t, ext->ext and ext->buf */ - ext_size = ext->n_ext * ext->max_len - + sizeof(*ext) - + ext->n_ext * sizeof(ulint) - + (ext->n_ext - 1) * sizeof ext->len; - mrec_size += ext_size; - } + /* Log enough prefix of the BLOB unless both the + old and new table are in COMPACT or REDUNDANT format, + which store the prefix in the clustered index record. */ + if (purge && rec_offs_any_extern(offsets) + && (dict_table_get_format(index->table) >= UNIV_FORMAT_B + || dict_table_get_format(new_table) >= UNIV_FORMAT_B)) { + + /* Build a cache of those off-page column prefixes + that are referenced by secondary indexes. It can be + that none of the off-page columns are needed. */ + row_build(ROW_COPY_DATA, index, rec, + offsets, NULL, NULL, NULL, &ext, heap); + if (ext) { + /* Log the row_ext_t, ext->ext and ext->buf */ + ext_size = ext->n_ext * ext->max_len + + sizeof(*ext) + + ext->n_ext * sizeof(ulint) + + (ext->n_ext - 1) * sizeof ext->len; + mrec_size += ext_size; } } @@ -548,7 +618,7 @@ row_log_table_delete( /******************************************************//** Logs an insert or update to a table that is being rebuilt. */ -static __attribute__((nonnull(1,2,3))) +static void row_log_table_low_redundant( /*========================*/ @@ -557,7 +627,6 @@ row_log_table_low_redundant( page X-latched */ dict_index_t* index, /*!< in/out: clustered index, S-latched or X-latched */ - const ulint* offsets,/*!< in: rec_get_offsets(rec,index) */ bool insert, /*!< in: true if insert, false if update */ const dtuple_t* old_pk, /*!< in: old PRIMARY KEY value @@ -578,6 +647,9 @@ row_log_table_low_redundant( ut_ad(!page_is_comp(page_align(rec))); ut_ad(dict_index_get_n_fields(index) == rec_get_n_fields_old(rec)); + ut_ad(dict_tf_is_valid(index->table->flags)); + ut_ad(!dict_table_is_comp(index->table)); /* redundant row format */ + ut_ad(dict_index_is_clust(new_index)); heap = mem_heap_create(DTUPLE_EST_ALLOC(index->n_fields)); tuple = dtuple_create(heap, index->n_fields); @@ -712,7 +784,7 @@ row_log_table_low( if (!rec_offs_comp(offsets)) { row_log_table_low_redundant( - rec, index, offsets, insert, old_pk, new_index); + rec, index, insert, old_pk, new_index); return; } @@ -723,8 +795,8 @@ row_log_table_low( extra_size = rec_offs_extra_size(offsets) - omit_size; - mrec_size = rec_offs_size(offsets) - omit_size - + ROW_LOG_HEADER_SIZE + (extra_size >= 0x80); + mrec_size = ROW_LOG_HEADER_SIZE + + (extra_size >= 0x80) + rec_offs_size(offsets) - omit_size; if (insert || index->online_log->same_pk) { ut_ad(!old_pk); @@ -793,6 +865,93 @@ row_log_table_update( row_log_table_low(rec, index, offsets, false, old_pk); } +/** Gets the old table column of a PRIMARY KEY column. +@param table old table (before ALTER TABLE) +@param col_map mapping of old column numbers to new ones +@param col_no column position in the new table +@return old table column, or NULL if this is an added column */ +static +const dict_col_t* +row_log_table_get_pk_old_col( +/*=========================*/ + const dict_table_t* table, + const ulint* col_map, + ulint col_no) +{ + for (ulint i = 0; i < table->n_cols; i++) { + if (col_no == col_map[i]) { + return(dict_table_get_nth_col(table, i)); + } + } + + return(NULL); +} + +/** Maps an old table column of a PRIMARY KEY column. +@param col old table column (before ALTER TABLE) +@param ifield clustered index field in the new table (after ALTER TABLE) +@param dfield clustered index tuple field in the new table +@param heap memory heap for allocating dfield contents +@param rec clustered index leaf page record in the old table +@param offsets rec_get_offsets(rec) +@param i rec field corresponding to col +@param zip_size compressed page size of the old table, or 0 for uncompressed +@param max_len maximum length of dfield +@retval DB_INVALID_NULL if a NULL value is encountered +@retval DB_TOO_BIG_INDEX_COL if the maximum prefix length is exceeded */ +static +dberr_t +row_log_table_get_pk_col( +/*=====================*/ + const dict_col_t* col, + const dict_field_t* ifield, + dfield_t* dfield, + mem_heap_t* heap, + const rec_t* rec, + const ulint* offsets, + ulint i, + ulint zip_size, + ulint max_len) +{ + const byte* field; + ulint len; + + ut_ad(ut_is_2pow(zip_size)); + + field = rec_get_nth_field(rec, offsets, i, &len); + + if (len == UNIV_SQL_NULL) { + return(DB_INVALID_NULL); + } + + if (rec_offs_nth_extern(offsets, i)) { + ulint field_len = ifield->prefix_len; + byte* blob_field; + + if (!field_len) { + field_len = ifield->fixed_len; + if (!field_len) { + field_len = max_len + 1; + } + } + + blob_field = static_cast<byte*>( + mem_heap_alloc(heap, field_len)); + + len = btr_copy_externally_stored_field_prefix( + blob_field, field_len, zip_size, field, len); + if (len >= max_len + 1) { + return(DB_TOO_BIG_INDEX_COL); + } + + dfield_set_data(dfield, blob_field, len); + } else { + dfield_set_data(dfield, mem_heap_dup(heap, field, len), len); + } + + return(DB_SUCCESS); +} + /******************************************************//** Constructs the old PRIMARY KEY and DB_TRX_ID,DB_ROLL_PTR of a table that is being rebuilt. @@ -865,95 +1024,69 @@ row_log_table_get_pk( dict_index_copy_types(tuple, new_index, tuple->n_fields); dtuple_set_n_fields_cmp(tuple, new_n_uniq); + const ulint max_len = DICT_MAX_FIELD_LEN_BY_FORMAT(new_table); + const ulint zip_size = dict_table_zip_size(index->table); + for (ulint new_i = 0; new_i < new_n_uniq; new_i++) { - dict_field_t* ifield; - dfield_t* dfield; - const dict_col_t* new_col; - const dict_col_t* col; - ulint col_no; - ulint i; - ulint len; - const byte* field; + dict_field_t* ifield; + dfield_t* dfield; + ulint prtype; + ulint mbminmaxlen; ifield = dict_index_get_nth_field(new_index, new_i); dfield = dtuple_get_nth_field(tuple, new_i); - new_col = dict_field_get_col(ifield); - col_no = new_col->ind; - - for (ulint old_i = 0; old_i < index->table->n_cols; - old_i++) { - if (col_no == log->col_map[old_i]) { - col_no = old_i; - goto copy_col; - } - } - - /* No matching column was found in the old - table, so this must be an added column. - Copy the default value. */ - ut_ad(log->add_cols); - dfield_copy(dfield, - dtuple_get_nth_field( - log->add_cols, col_no)); - continue; - -copy_col: - col = dict_table_get_nth_col(index->table, col_no); - i = dict_col_get_clust_pos(col, index); + const ulint col_no + = dict_field_get_col(ifield)->ind; - if (i == ULINT_UNDEFINED) { - ut_ad(0); - log->error = DB_CORRUPTION; - tuple = NULL; - goto func_exit; - } + if (const dict_col_t* col + = row_log_table_get_pk_old_col( + index->table, log->col_map, col_no)) { + ulint i = dict_col_get_clust_pos(col, index); - field = rec_get_nth_field(rec, offsets, i, &len); - - if (len == UNIV_SQL_NULL) { - log->error = DB_INVALID_NULL; - tuple = NULL; - goto func_exit; - } - - if (rec_offs_nth_extern(offsets, i)) { - ulint field_len = ifield->prefix_len; - byte* blob_field; - const ulint max_len = - DICT_MAX_FIELD_LEN_BY_FORMAT( - new_table); - - if (!field_len) { - field_len = ifield->fixed_len; - if (!field_len) { - field_len = max_len + 1; - } + if (i == ULINT_UNDEFINED) { + ut_ad(0); + log->error = DB_CORRUPTION; + goto err_exit; } - blob_field = static_cast<byte*>( - mem_heap_alloc(*heap, field_len)); + log->error = row_log_table_get_pk_col( + col, ifield, dfield, *heap, + rec, offsets, i, zip_size, max_len); - len = btr_copy_externally_stored_field_prefix( - blob_field, field_len, - dict_table_zip_size(index->table), - field, len); - if (len == max_len + 1) { - log->error = DB_TOO_BIG_INDEX_COL; + if (log->error != DB_SUCCESS) { +err_exit: tuple = NULL; goto func_exit; } - dfield_set_data(dfield, blob_field, len); + mbminmaxlen = col->mbminmaxlen; + prtype = col->prtype; } else { - if (ifield->prefix_len - && ifield->prefix_len < len) { - len = ifield->prefix_len; - } + /* No matching column was found in the old + table, so this must be an added column. + Copy the default value. */ + ut_ad(log->add_cols); + + dfield_copy(dfield, dtuple_get_nth_field( + log->add_cols, col_no)); + mbminmaxlen = dfield->type.mbminmaxlen; + prtype = dfield->type.prtype; + } + + ut_ad(!dfield_is_ext(dfield)); + ut_ad(!dfield_is_null(dfield)); - dfield_set_data( - dfield, - mem_heap_dup(*heap, field, len), len); + if (ifield->prefix_len) { + ulint len = dtype_get_at_most_n_mbchars( + prtype, mbminmaxlen, + ifield->prefix_len, + dfield_get_len(dfield), + static_cast<const char*>( + dfield_get_data(dfield))); + + ut_ad(len <= dfield_get_len(dfield)); + dfield_set_len(dfield, len); } } @@ -988,66 +1121,80 @@ row_log_table_insert( } /******************************************************//** -Notes that a transaction is being rolled back. */ +Notes that a BLOB is being freed during online ALTER TABLE. */ UNIV_INTERN void -row_log_table_rollback( -/*===================*/ - dict_index_t* index, /*!< in/out: clustered index */ - trx_id_t trx_id) /*!< in: transaction being rolled back */ +row_log_table_blob_free( +/*====================*/ + dict_index_t* index, /*!< in/out: clustered index, X-latched */ + ulint page_no)/*!< in: starting page number of the BLOB */ { ut_ad(dict_index_is_clust(index)); -#ifdef UNIV_DEBUG - ibool corrupt = FALSE; - ut_ad(trx_rw_is_active(trx_id, &corrupt)); - ut_ad(!corrupt); -#endif /* UNIV_DEBUG */ + ut_ad(dict_index_is_online_ddl(index)); +#ifdef UNIV_SYNC_DEBUG + ut_ad(rw_lock_own(&index->lock, RW_LOCK_EX)); +#endif /* UNIV_SYNC_DEBUG */ + ut_ad(page_no != FIL_NULL); - /* Protect transitions of index->online_status and access to - index->online_log. */ - rw_lock_s_lock(&index->lock); + if (index->online_log->error != DB_SUCCESS) { + return; + } - if (dict_index_is_online_ddl(index)) { - ut_ad(index->online_log); - ut_ad(index->online_log->table); - mutex_enter(&index->online_log->mutex); - trx_id_set* trxs = index->online_log->trx_rb; + page_no_map* blobs = index->online_log->blobs; - if (!trxs) { - index->online_log->trx_rb = trxs = new trx_id_set(); - } + if (!blobs) { + index->online_log->blobs = blobs = new page_no_map(); + } - trxs->insert(trx_id); +#ifdef UNIV_DEBUG + const ulonglong log_pos = index->online_log->tail.total; +#else +# define log_pos /* empty */ +#endif /* UNIV_DEBUG */ - mutex_exit(&index->online_log->mutex); - } + const page_no_map::value_type v(page_no, + row_log_table_blob_t(log_pos)); - rw_lock_s_unlock(&index->lock); + std::pair<page_no_map::iterator,bool> p = blobs->insert(v); + + if (!p.second) { + /* Update the existing mapping. */ + ut_ad(p.first->first == page_no); + p.first->second.blob_free(log_pos); + } +#undef log_pos } /******************************************************//** -Check if a transaction rollback has been initiated. -@return true if inserts of this transaction were rolled back */ +Notes that a BLOB is being allocated during online ALTER TABLE. */ UNIV_INTERN -bool -row_log_table_is_rollback( -/*======================*/ - const dict_index_t* index, /*!< in: clustered index */ - trx_id_t trx_id) /*!< in: transaction id */ +void +row_log_table_blob_alloc( +/*=====================*/ + dict_index_t* index, /*!< in/out: clustered index, X-latched */ + ulint page_no)/*!< in: starting page number of the BLOB */ { ut_ad(dict_index_is_clust(index)); ut_ad(dict_index_is_online_ddl(index)); - ut_ad(index->online_log); - - if (const trx_id_set* trxs = index->online_log->trx_rb) { - mutex_enter(&index->online_log->mutex); - bool is_rollback = trxs->find(trx_id) != trxs->end(); - mutex_exit(&index->online_log->mutex); +#ifdef UNIV_SYNC_DEBUG + ut_ad(rw_lock_own(&index->lock, RW_LOCK_EX)); +#endif /* UNIV_SYNC_DEBUG */ + ut_ad(page_no != FIL_NULL); - return(is_rollback); + if (index->online_log->error != DB_SUCCESS) { + return; } - return(false); + /* Only track allocations if the same page has been freed + earlier. Double allocation without a free is not allowed. */ + if (page_no_map* blobs = index->online_log->blobs) { + page_no_map::iterator p = blobs->find(page_no); + + if (p != blobs->end()) { + ut_ad(p->first == page_no); + p->second.blob_alloc(index->online_log->tail.total); + } + } } /******************************************************//** @@ -1069,17 +1216,6 @@ row_log_table_apply_convert_mrec( { dtuple_t* row; -#ifdef UNIV_SYNC_DEBUG - /* This prevents BLOBs from being freed, in case an insert - transaction rollback starts after row_log_table_is_rollback(). */ - ut_ad(rw_lock_own(dict_index_get_lock(index), RW_LOCK_EX)); -#endif /* UNIV_SYNC_DEBUG */ - - if (row_log_table_is_rollback(index, trx_id)) { - row = NULL; - goto func_exit; - } - /* This is based on row_build(). */ if (log->add_cols) { row = dtuple_copy(log->add_cols, heap); @@ -1121,15 +1257,43 @@ row_log_table_apply_convert_mrec( dfield_t* dfield = dtuple_get_nth_field(row, col_no); ulint len; - const void* data; + const byte* data= NULL; if (rec_offs_nth_extern(offsets, i)) { ut_ad(rec_offs_any_extern(offsets)); - data = btr_rec_copy_externally_stored_field( - mrec, offsets, - dict_table_zip_size(index->table), - i, &len, heap); - ut_a(data); + rw_lock_x_lock(dict_index_get_lock(index)); + + if (const page_no_map* blobs = log->blobs) { + data = rec_get_nth_field( + mrec, offsets, i, &len); + ut_ad(len >= BTR_EXTERN_FIELD_REF_SIZE); + + ulint page_no = mach_read_from_4( + data + len - (BTR_EXTERN_FIELD_REF_SIZE + - BTR_EXTERN_PAGE_NO)); + page_no_map::const_iterator p = blobs->find( + page_no); + if (p != blobs->end() + && p->second.is_freed(log->head.total)) { + /* This BLOB has been freed. + We must not access the row. */ + row = NULL; + } + } + + if (row) { + data = btr_rec_copy_externally_stored_field( + mrec, offsets, + dict_table_zip_size(index->table), + i, &len, heap); + ut_a(data); + } + + rw_lock_x_unlock(dict_index_get_lock(index)); + + if (!row) { + goto func_exit; + } } else { data = rec_get_nth_field(mrec, offsets, i, &len); } @@ -1685,7 +1849,7 @@ delete_insert: | BTR_KEEP_POS_FLAG, btr_pcur_get_btr_cur(&pcur), &cur_offsets, &offsets_heap, heap, &big_rec, - update, 0, NULL, 0, &mtr); + update, 0, thr, 0, &mtr); if (big_rec) { if (error == DB_SUCCESS) { @@ -1783,7 +1947,7 @@ row_log_table_apply_op( ulint* offsets) /*!< in/out: work area for parsing mrec */ { - const row_log_t*log = dup->index->online_log; + row_log_t* log = dup->index->online_log; dict_index_t* new_index = dict_table_get_first_index(log->table); ulint extra_size; const mrec_t* next_mrec; @@ -1793,6 +1957,7 @@ row_log_table_apply_op( ut_ad(dict_index_is_clust(dup->index)); ut_ad(dup->index->table != log->table); + ut_ad(log->head.total <= log->tail.total); *error = DB_SUCCESS; @@ -1801,6 +1966,8 @@ row_log_table_apply_op( return(NULL); } + const mrec_t* const mrec_start = mrec; + switch (*mrec++) { default: ut_ad(0); @@ -1830,6 +1997,8 @@ row_log_table_apply_op( if (next_mrec > mrec_end) { return(NULL); } else { + log->head.total += next_mrec - mrec_start; + ulint len; const byte* db_trx_id = rec_get_nth_field( @@ -1863,6 +2032,8 @@ row_log_table_apply_op( return(NULL); } + log->head.total += next_mrec - mrec_start; + /* If there are external fields, retrieve those logged prefix info and reconstruct the row_ext_t */ if (ext_size) { @@ -2019,6 +2190,7 @@ row_log_table_apply_op( } ut_ad(next_mrec <= mrec_end); + log->head.total += next_mrec - mrec_start; dtuple_set_n_fields_cmp(old_pk, new_index->n_uniq); { @@ -2036,6 +2208,7 @@ row_log_table_apply_op( break; } + ut_ad(log->head.total <= log->tail.total); mem_heap_empty(offsets_heap); mem_heap_empty(heap); return(next_mrec); @@ -2423,6 +2596,10 @@ row_log_table_apply( }; error = row_log_table_apply_ops(thr, &dup); + + ut_ad(error != DB_SUCCESS + || clust_index->online_log->head.total + == clust_index->online_log->tail.total); } rw_lock_x_unlock(dict_index_get_lock(clust_index)); @@ -2451,6 +2628,7 @@ row_log_allocate( byte* buf; row_log_t* log; ulint size; + DBUG_ENTER("row_log_allocate"); ut_ad(!dict_index_is_online_ddl(index)); ut_ad(dict_index_is_clust(index) == !!table); @@ -2464,7 +2642,7 @@ row_log_allocate( size = 2 * srv_sort_buf_size + sizeof *log; buf = (byte*) os_mem_alloc_large(&size); if (!buf) { - return(false); + DBUG_RETURN(false); } log = (row_log_t*) &buf[2 * srv_sort_buf_size]; @@ -2472,11 +2650,11 @@ row_log_allocate( log->fd = row_merge_file_create_low(); if (log->fd < 0) { os_mem_free_large(buf, size); - return(false); + DBUG_RETURN(false); } mutex_create(index_online_log_key, &log->mutex, SYNC_INDEX_ONLINE_LOG); - log->trx_rb = NULL; + log->blobs = NULL; log->table = table; log->same_pk = same_pk; log->add_cols = add_cols; @@ -2486,7 +2664,9 @@ row_log_allocate( log->head.block = buf; log->tail.block = buf + srv_sort_buf_size; log->tail.blocks = log->tail.bytes = 0; + log->tail.total = 0; log->head.blocks = log->head.bytes = 0; + log->head.total = 0; dict_index_set_online_status(index, ONLINE_INDEX_CREATION); index->online_log = log; @@ -2495,7 +2675,7 @@ row_log_allocate( atomic operations in both cases. */ MONITOR_ATOMIC_INC(MONITOR_ONLINE_CREATE_INDEX); - return(true); + DBUG_RETURN(true); } /******************************************************//** @@ -2508,7 +2688,7 @@ row_log_free( { MONITOR_ATOMIC_DEC(MONITOR_ONLINE_CREATE_INDEX); - delete log->trx_rb; + delete log->blobs; row_merge_file_destroy_low(log->fd); mutex_free(&log->mutex); os_mem_free_large(log->head.block, log->size); @@ -3183,6 +3363,7 @@ row_log_apply( dberr_t error; row_log_t* log; row_merge_dup_t dup = { index, table, NULL, 0 }; + DBUG_ENTER("row_log_apply"); ut_ad(dict_index_is_online_ddl(index)); ut_ad(!dict_index_is_clust(index)); @@ -3225,5 +3406,5 @@ row_log_apply( row_log_free(log); - return(error); + DBUG_RETURN(error); } diff --git a/storage/innobase/row/row0merge.cc b/storage/innobase/row/row0merge.cc index a509e2c5ca8..a0c0fd2c8c3 100644 --- a/storage/innobase/row/row0merge.cc +++ b/storage/innobase/row/row0merge.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2005, 2012, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2005, 2013, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -260,14 +260,15 @@ row_merge_buf_add( ulint bucket = 0; doc_id_t write_doc_id; ulint n_row_added = 0; + DBUG_ENTER("row_merge_buf_add"); if (buf->n_tuples >= buf->max_tuples) { - return(0); + DBUG_RETURN(0); } DBUG_EXECUTE_IF( "ib_row_merge_buf_add_two", - if (buf->n_tuples >= 2) return(0);); + if (buf->n_tuples >= 2) DBUG_RETURN(0);); UNIV_PREFETCH_R(row->fields); @@ -325,18 +326,12 @@ row_merge_buf_add( fts_doc_item_t* doc_item; byte* value; - if (dfield_is_null(field)) { - n_row_added = 1; - continue; - } - - doc_item = static_cast<fts_doc_item_t*>( - mem_heap_alloc( - buf->heap, - sizeof(fts_doc_item_t))); - /* fetch Doc ID if it already exists - in the row, and not supplied by the caller */ + in the row, and not supplied by the + caller. Even if the value column is + NULL, we still need to get the Doc + ID so to maintain the correct max + Doc ID */ if (*doc_id == 0) { const dfield_t* doc_field; doc_field = dtuple_get_nth_field( @@ -347,14 +342,23 @@ row_merge_buf_add( dfield_get_data(doc_field))); if (*doc_id == 0) { - fprintf(stderr, "InnoDB FTS: " - "User supplied Doc ID " - "is zero. Record " - "Skipped\n"); - return(0); + ib_logf(IB_LOG_LEVEL_WARN, + "FTS Doc ID is zero. " + "Record Skipped"); + DBUG_RETURN(0); } } + if (dfield_is_null(field)) { + n_row_added = 1; + continue; + } + + doc_item = static_cast<fts_doc_item_t*>( + mem_heap_alloc( + buf->heap, + sizeof(*doc_item))); + value = static_cast<byte*>( ut_malloc(field->len)); memcpy(value, field->data, field->len); @@ -458,7 +462,7 @@ row_merge_buf_add( /* If this is FTS index, we already populated the sort buffer, return here */ if (index->type & DICT_FTS) { - return(n_row_added); + DBUG_RETURN(n_row_added); } #ifdef UNIV_DEBUG @@ -484,7 +488,7 @@ row_merge_buf_add( /* Reserve one byte for the end marker of row_merge_block_t. */ if (buf->total_size + data_size >= srv_sort_buf_size - 1) { - return(0); + DBUG_RETURN(0); } buf->total_size += data_size; @@ -499,7 +503,7 @@ row_merge_buf_add( dfield_dup(field++, buf->heap); } while (--n_fields); - return(n_row_added); + DBUG_RETURN(n_row_added); } /*************************************************************//** @@ -1180,6 +1184,7 @@ row_merge_read_clustered_index( os_event_t fts_parallel_sort_event = NULL; ibool fts_pll_sort = FALSE; ib_int64_t sig_count = 0; + DBUG_ENTER("row_merge_read_clustered_index"); ut_ad((old_table == new_table) == !col_map); ut_ad(!add_cols || col_map); @@ -1396,13 +1401,26 @@ end_of_index: offsets = rec_get_offsets(rec, clust_index, NULL, ULINT_UNDEFINED, &row_heap); - if (online && new_table != old_table) { - /* When rebuilding the table online, perform a - REPEATABLE READ, so that row_log_table_apply() - will not see a newer state of the table when - applying the log. This is mainly to prevent - false duplicate key errors, because the log - will identify records by the PRIMARY KEY. */ + if (online) { + /* Perform a REPEATABLE READ. + + When rebuilding the table online, + row_log_table_apply() must not see a newer + state of the table when applying the log. + This is mainly to prevent false duplicate key + errors, because the log will identify records + by the PRIMARY KEY, and also to prevent unsafe + BLOB access. + + When creating a secondary index online, this + table scan must not see records that have only + been inserted to the clustered index, but have + not been written to the online_log of + index[]. If we performed READ UNCOMMITTED, it + could happen that the ADD INDEX reaches + ONLINE_INDEX_COMPLETE state between the time + the DML thread has updated the clustered index + but has not yet accessed secondary index. */ ut_ad(trx->read_view); if (!read_view_sees_trx_id( @@ -1445,38 +1463,13 @@ end_of_index: would make it tricky to detect duplicate keys. */ continue; - } else if (UNIV_LIKELY_NULL(rec_offs_any_null_extern( - rec, offsets))) { - /* This is essentially a READ UNCOMMITTED to - fetch the most recent version of the record. */ -#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG - trx_id_t trx_id; - ulint trx_id_offset; - - /* It is possible that the record was - just inserted and the off-page columns - have not yet been written. We will - ignore the record if this is the case, - because it should be covered by the - index->info.online log in that case. */ - - trx_id_offset = clust_index->trx_id_offset; - if (!trx_id_offset) { - trx_id_offset = row_get_trx_id_offset( - clust_index, offsets); - } - - trx_id = trx_read_trx_id(rec + trx_id_offset); - ut_a(trx_rw_is_active(trx_id, NULL)); - ut_a(trx_undo_trx_id_is_insert(rec + trx_id_offset)); -#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */ - - /* When !online, we are holding an X-lock on - old_table, preventing any inserts. */ - ut_ad(online); - continue; } + /* When !online, we are holding a lock on old_table, preventing + any inserts that could have written a record 'stub' before + writing out off-page columns. */ + ut_ad(!rec_offs_any_null_extern(rec, offsets)); + /* Build a row based on the clustered index. */ row = row_build(ROW_COPY_POINTERS, clust_index, @@ -1692,10 +1685,16 @@ all_done: DEBUG_FTS_SORT_PRINT("FTS_SORT: Complete Scan Table\n"); #endif if (fts_pll_sort) { + bool all_exit = false; + ulint trial_count = 0; + const ulint max_trial_count = 10000; + + /* Tell all children that parent has done scanning */ for (ulint i = 0; i < fts_sort_pll_degree; i++) { psort_info[i].state = FTS_PARENT_COMPLETE; } wait_again: + /* Now wait all children to report back to be completed */ os_event_wait_time_low(fts_parallel_sort_event, 1000000, sig_count); @@ -1707,6 +1706,31 @@ wait_again: goto wait_again; } } + + /* Now all children should complete, wait a bit until + they all finish setting the event, before we free everything. + This has a 10 second timeout */ + do { + all_exit = true; + + for (ulint j = 0; j < fts_sort_pll_degree; j++) { + if (psort_info[j].child_status + != FTS_CHILD_EXITING) { + all_exit = false; + os_thread_sleep(1000); + break; + } + } + trial_count++; + } while (!all_exit && trial_count < max_trial_count); + + if (!all_exit) { + ut_ad(0); + ib_logf(IB_LOG_LEVEL_FATAL, + "Not all child sort threads exited" + " when creating FTS index '%s'", + fts_sort_idx->name); + } } #ifdef FTS_INTERNAL_DIAG_PRINT @@ -1731,7 +1755,7 @@ wait_again: trx->op_info = ""; - return(err); + DBUG_RETURN(err); } /** Write a record via buffer 2 and read the next record to buffer N. @@ -2092,13 +2116,14 @@ row_merge_sort( ulint num_runs; ulint* run_offset; dberr_t error = DB_SUCCESS; + DBUG_ENTER("row_merge_sort"); /* Record the number of merge runs we need to perform */ num_runs = file->offset; /* If num_runs are less than 1, nothing to merge */ if (num_runs <= 1) { - return(error); + DBUG_RETURN(error); } /* "run_offset" records each run's first offset number */ @@ -2126,24 +2151,7 @@ row_merge_sort( mem_free(run_offset); - return(error); -} - -/*************************************************************//** -Set blob fields empty */ -static __attribute__((nonnull)) -void -row_merge_set_blob_empty( -/*=====================*/ - dtuple_t* tuple) /*!< in/out: data tuple */ -{ - for (ulint i = 0; i < dtuple_get_n_fields(tuple); i++) { - dfield_t* field = dtuple_get_nth_field(tuple, i); - - if (dfield_is_ext(field)) { - dfield_set_data(field, NULL, 0); - } - } + DBUG_RETURN(error); } /*************************************************************//** @@ -2211,6 +2219,7 @@ row_merge_insert_index_tuples( ulint foffs = 0; ulint* offsets; mrec_buf_t* buf; + DBUG_ENTER("row_merge_insert_index_tuples"); ut_ad(!srv_read_only_mode); ut_ad(!(index->type & DICT_FTS)); @@ -2272,52 +2281,31 @@ row_merge_insert_index_tuples( if (!n_ext) { /* There are no externally stored columns. */ - } else if (!dict_index_is_online_ddl(old_index)) { + } else { ut_ad(dict_index_is_clust(index)); - /* Modifications to the table are - blocked while we are not rebuilding it - or creating indexes. Off-page columns - can be fetched safely. */ + /* Off-page columns can be fetched safely + when concurrent modifications to the table + are disabled. (Purge can process delete-marked + records, but row_merge_read_clustered_index() + would have skipped them.) + + When concurrent modifications are enabled, + row_merge_read_clustered_index() will + only see rows from transactions that were + committed before the ALTER TABLE started + (REPEATABLE READ). + + Any modifications after the + row_merge_read_clustered_index() scan + will go through row_log_table_apply(). + Any modifications to off-page columns + will be tracked by + row_log_table_blob_alloc() and + row_log_table_blob_free(). */ row_merge_copy_blobs( mrec, offsets, dict_table_zip_size(old_table), dtuple, tuple_heap); - } else { - ut_ad(dict_index_is_clust(index)); - - ulint offset = index->trx_id_offset; - - if (!offset) { - offset = row_get_trx_id_offset( - index, offsets); - } - - /* Copy the off-page columns while - holding old_index->lock, so - that they cannot be freed by - a rollback of a fresh insert. */ - rw_lock_s_lock(&old_index->lock); - - if (row_log_table_is_rollback( - old_index, - trx_read_trx_id(mrec + offset))) { - /* The row and BLOB could - already be freed. They - will be deleted by - row_undo_ins_remove_clust_rec - when rolling back a fresh - insert. So, no need to retrieve - the off-page column. */ - row_merge_set_blob_empty( - dtuple); - } else { - row_merge_copy_blobs( - mrec, offsets, - dict_table_zip_size(old_table), - dtuple, tuple_heap); - } - - rw_lock_s_unlock(&old_index->lock); } ut_ad(dtuple_validate(dtuple)); @@ -2415,7 +2403,7 @@ err_exit: mem_heap_free(ins_heap); mem_heap_free(heap); - return(error); + DBUG_RETURN(error); } /*********************************************************************//** @@ -2903,7 +2891,7 @@ row_merge_file_create_low(void) if (fd < 0) { ib_logf(IB_LOG_LEVEL_ERROR, "Cannot create temporary merge file"); - return -1; + return (-1); } return(fd); } @@ -3114,48 +3102,34 @@ will not be committed. @return error code or DB_SUCCESS */ UNIV_INTERN dberr_t -row_merge_rename_tables( -/*====================*/ +row_merge_rename_tables_dict( +/*=========================*/ dict_table_t* old_table, /*!< in/out: old table, renamed to tmp_name */ dict_table_t* new_table, /*!< in/out: new table, renamed to old_table->name */ const char* tmp_name, /*!< in: new name for old_table */ - trx_t* trx) /*!< in: transaction handle */ + trx_t* trx) /*!< in/out: dictionary transaction */ { dberr_t err = DB_ERROR; pars_info_t* info; - char old_name[MAX_FULL_NAME_LEN + 1]; ut_ad(!srv_read_only_mode); ut_ad(old_table != new_table); ut_ad(mutex_own(&dict_sys->mutex)); ut_a(trx->dict_operation_lock_mode == RW_X_LATCH); - ut_ad(trx_get_dict_operation(trx) == TRX_DICT_OP_TABLE); - - /* store the old/current name to an automatic variable */ - if (strlen(old_table->name) + 1 <= sizeof(old_name)) { - memcpy(old_name, old_table->name, strlen(old_table->name) + 1); - } else { - ib_logf(IB_LOG_LEVEL_ERROR, - "Too long table name: '%s', max length is %d", - old_table->name, MAX_FULL_NAME_LEN); - ut_error; - } + ut_ad(trx_get_dict_operation(trx) == TRX_DICT_OP_TABLE + || trx_get_dict_operation(trx) == TRX_DICT_OP_INDEX); trx->op_info = "renaming tables"; - DBUG_EXECUTE_IF( - "ib_rebuild_cannot_rename", - err = DB_ERROR; goto err_exit;); - /* We use the private SQL parser of Innobase to generate the query graphs needed in updating the dictionary data in system tables. */ info = pars_info_create(); pars_info_add_str_literal(info, "new_name", new_table->name); - pars_info_add_str_literal(info, "old_name", old_name); + pars_info_add_str_literal(info, "old_name", old_table->name); pars_info_add_str_literal(info, "tmp_name", tmp_name); err = que_eval_sql(info, @@ -3200,11 +3174,12 @@ row_merge_rename_tables( table is in a non-system tablespace where space > 0. */ if (err == DB_SUCCESS && new_table->space != TRX_SYS_SPACE) { /* Make pathname to update SYS_DATAFILES. */ - char* old_path = row_make_new_pathname(new_table, old_name); + char* old_path = row_make_new_pathname( + new_table, old_table->name); info = pars_info_create(); - pars_info_add_str_literal(info, "old_name", old_name); + pars_info_add_str_literal(info, "old_name", old_table->name); pars_info_add_str_literal(info, "old_path", old_path); pars_info_add_int4_literal(info, "new_space", (lint) new_table->space); @@ -3223,75 +3198,9 @@ row_merge_rename_tables( mem_free(old_path); } - if (err != DB_SUCCESS) { - goto err_exit; - } - - /* Generate the redo logs for file operations */ - fil_mtr_rename_log(old_table->space, old_name, - new_table->space, new_table->name, tmp_name); - - /* What if the redo logs are flushed to disk here? This is - tested with following crash point */ - DBUG_EXECUTE_IF("bug14669848_precommit", log_buffer_flush_to_disk(); - DBUG_SUICIDE();); - - /* File operations cannot be rolled back. So, before proceeding - with file operations, commit the dictionary changes.*/ - trx_commit_for_mysql(trx); - - /* If server crashes here, the dictionary in InnoDB and MySQL - will differ. The .ibd files and the .frm files must be swapped - manually by the administrator. No loss of data. */ - DBUG_EXECUTE_IF("bug14669848", DBUG_SUICIDE();); - - /* Ensure that the redo logs are flushed to disk. The config - innodb_flush_log_at_trx_commit must not affect this. */ - log_buffer_flush_to_disk(); - - /* The following calls will also rename the .ibd data files if - the tables are stored in a single-table tablespace */ - - err = dict_table_rename_in_cache(old_table, tmp_name, FALSE); - - if (err == DB_SUCCESS) { - - ut_ad(dict_table_is_discarded(old_table) - == dict_table_is_discarded(new_table)); - - err = dict_table_rename_in_cache(new_table, old_name, FALSE); - - if (err != DB_SUCCESS) { - - if (dict_table_rename_in_cache( - old_table, old_name, FALSE) - != DB_SUCCESS) { - - ib_logf(IB_LOG_LEVEL_ERROR, - "Cannot undo the rename in cache " - "from %s to %s", old_name, tmp_name); - } - - goto err_exit; - } - - if (dict_table_is_discarded(new_table)) { - - err = row_import_update_discarded_flag( - trx, new_table->id, true, true); - } - } - - DBUG_EXECUTE_IF("ib_rebuild_cannot_load_fk", - err = DB_ERROR; goto err_exit;); - - err = dict_load_foreigns(old_name, FALSE, TRUE); - - if (err != DB_SUCCESS) { -err_exit: - trx->error_state = DB_SUCCESS; - trx_rollback_to_savepoint(trx, NULL); - trx->error_state = DB_SUCCESS; + if (err == DB_SUCCESS && dict_table_is_discarded(new_table)) { + err = row_import_update_discarded_flag( + trx, new_table->id, true, true); } trx->op_info = ""; @@ -3417,7 +3326,7 @@ row_merge_is_index_usable( /*********************************************************************//** Drop a table. The caller must have ensured that the background stats thread is not processing the table. This can be done by calling -dict_stats_wait_bg_to_stop_using_tables() after locking the dictionary and +dict_stats_wait_bg_to_stop_using_table() after locking the dictionary and before calling this function. @return DB_SUCCESS or error code */ UNIV_INTERN @@ -3475,11 +3384,12 @@ row_merge_build_indexes( ulint i; ulint j; dberr_t error; - int tmpfd; + int tmpfd = -1; dict_index_t* fts_sort_idx = NULL; fts_psort_t* psort_info = NULL; fts_psort_t* merge_info = NULL; ib_int64_t sig_count = 0; + DBUG_ENTER("row_merge_build_indexes"); ut_ad(!srv_read_only_mode); ut_ad((old_table == new_table) == !col_map); @@ -3493,7 +3403,7 @@ row_merge_build_indexes( os_mem_alloc_large(&block_size)); if (block == NULL) { - return(DB_OUT_OF_MEMORY); + DBUG_RETURN(DB_OUT_OF_MEMORY); } trx_start_if_not_started_xa(trx); @@ -3501,6 +3411,14 @@ row_merge_build_indexes( merge_files = static_cast<merge_file_t*>( mem_alloc(n_indexes * sizeof *merge_files)); + /* Initialize all the merge file descriptors, so that we + don't call row_merge_file_destroy() on uninitialized + merge file descriptor */ + + for (i = 0; i < n_indexes; i++) { + merge_files[i].fd = -1; + } + for (i = 0; i < n_indexes; i++) { if (row_merge_file_create(&merge_files[i]) < 0) { error = DB_OUT_OF_MEMORY; @@ -3565,41 +3483,16 @@ row_merge_build_indexes( if (indexes[i]->type & DICT_FTS) { os_event_t fts_parallel_merge_event; - bool all_exit = false; - ulint trial_count = 0; sort_idx = fts_sort_idx; - /* Now all children should complete, wait - a bit until they all finish using event */ - while (!all_exit && trial_count < 10000) { - all_exit = true; - - for (j = 0; j < fts_sort_pll_degree; - j++) { - if (psort_info[j].child_status - != FTS_CHILD_EXITING) { - all_exit = false; - os_thread_sleep(1000); - break; - } - } - trial_count++; - } - - if (!all_exit) { - ib_logf(IB_LOG_LEVEL_ERROR, - "Not all child sort threads exited" - " when creating FTS index '%s'", - indexes[i]->name); - } - fts_parallel_merge_event = merge_info[0].psort_common->merge_event; if (FTS_PLL_MERGE) { - trial_count = 0; - all_exit = false; + ulint trial_count = 0; + bool all_exit = false; + os_event_reset(fts_parallel_merge_event); row_fts_start_parallel_merge(merge_info); wait_again: @@ -3763,5 +3656,5 @@ func_exit: } } - return(error); + DBUG_RETURN(error); } diff --git a/storage/innobase/row/row0mysql.cc b/storage/innobase/row/row0mysql.cc index 808bd0aaeb5..9aceb305493 100644 --- a/storage/innobase/row/row0mysql.cc +++ b/storage/innobase/row/row0mysql.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2000, 2012, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2000, 2013, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -62,6 +62,7 @@ Created 9/17/2000 Heikki Tuuri #include "row0import.h" #include "m_string.h" #include "my_sys.h" +#include "ha_prototypes.h" /** Provide optional 4.x backwards compatibility for 5.0 and above */ UNIV_INTERN ibool row_rollback_on_timeout = FALSE; @@ -618,8 +619,8 @@ handle_new_error: case DB_INTERRUPTED: case DB_DICT_CHANGED: if (savept) { - /* Roll back the latest, possibly incomplete - insertion or update */ + /* Roll back the latest, possibly incomplete insertion + or update */ trx_rollback_to_savepoint(trx, savept); } @@ -2521,7 +2522,8 @@ row_table_add_foreign_constraints( if (err == DB_SUCCESS) { /* Check that also referencing constraints are ok */ - err = dict_load_foreigns(name, FALSE, TRUE); + err = dict_load_foreigns(name, NULL, false, true, + DICT_ERR_IGNORE_NONE); } if (err != DB_SUCCESS) { @@ -2801,7 +2803,7 @@ row_discard_tablespace_begin( name, TRUE, FALSE, DICT_ERR_IGNORE_NONE); if (table) { - dict_stats_wait_bg_to_stop_using_tables(table, NULL, trx); + dict_stats_wait_bg_to_stop_using_table(table, trx); ut_a(table->space != TRX_SYS_SPACE); ut_a(table->n_foreign_key_checks_running == 0); } @@ -2874,13 +2876,13 @@ row_discard_tablespace_end( } DBUG_EXECUTE_IF("ib_discard_before_commit_crash", - log_make_checkpoint_at(IB_ULONGLONG_MAX, TRUE); + log_make_checkpoint_at(LSN_MAX, TRUE); DBUG_SUICIDE();); trx_commit_for_mysql(trx); DBUG_EXECUTE_IF("ib_discard_after_commit_crash", - log_make_checkpoint_at(IB_ULONGLONG_MAX, TRUE); + log_make_checkpoint_at(LSN_MAX, TRUE); DBUG_SUICIDE();); row_mysql_unlock_data_dictionary(trx); @@ -3246,7 +3248,7 @@ row_truncate_table_for_mysql( ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX)); #endif /* UNIV_SYNC_DEBUG */ - dict_stats_wait_bg_to_stop_using_tables(table, NULL, trx); + dict_stats_wait_bg_to_stop_using_table(table, trx); /* Check if the table is referenced by foreign key constraints from some other table (not the table itself) */ @@ -3796,8 +3798,8 @@ row_drop_table_for_mysql( tables since we know temp tables do not use persistent stats. */ if (!dict_table_is_temporary(table)) { - dict_stats_wait_bg_to_stop_using_tables( - table, NULL, trx); + dict_stats_wait_bg_to_stop_using_table( + table, trx); } } @@ -4167,6 +4169,11 @@ check_next_foreign: DICT_TF2_FTS flag set. So keep this out of above dict_table_has_fts_index condition */ if (table->fts) { + /* Need to set TABLE_DICT_LOCKED bit, since + fts_que_graph_free_check_lock would try to acquire + dict mutex lock */ + table->fts->fts_status |= TABLE_DICT_LOCKED; + fts_free(table); } @@ -4503,14 +4510,31 @@ loop: } - if (row_is_mysql_tmp_table_name(table->name)) { - /* There could be an orphan temp table left from - interupted alter table rebuild operation */ - dict_table_close(table, TRUE, FALSE); - } else { - ut_a(!table->can_be_evicted || table->ibd_file_missing); + if (!row_is_mysql_tmp_table_name(table->name)) { + /* There could be orphan temp tables left from + interrupted alter table. Leave them, and handle + the rest.*/ + if (table->can_be_evicted) { + ib_logf(IB_LOG_LEVEL_WARN, + "Orphan table encountered during " + "DROP DATABASE. This is possible if " + "'%s.frm' was lost.", table->name); + } + + if (table->ibd_file_missing) { + ib_logf(IB_LOG_LEVEL_WARN, + "Missing %s.ibd file for table %s.", + table->name, table->name); + } } + dict_table_close(table, TRUE, FALSE); + + /* The dict_table_t object must not be accessed before + dict_table_open() or after dict_table_close(). But this is OK + if we are holding, the dict_sys->mutex. */ + ut_ad(mutex_own(&dict_sys->mutex)); + /* Wait until MySQL does not have any queries running on the table */ @@ -4668,6 +4692,7 @@ row_rename_table_for_mysql( ut_a(old_name != NULL); ut_a(new_name != NULL); + ut_ad(trx->state == TRX_STATE_ACTIVE); if (srv_created_new_raw || srv_force_recovery) { fputs("InnoDB: A new raw disk partition was initialized or\n" @@ -4692,7 +4717,6 @@ row_rename_table_for_mysql( } trx->op_info = "renaming table"; - trx_start_if_not_started_xa(trx); old_is_tmp = row_is_mysql_tmp_table_name(old_name); new_is_tmp = row_is_mysql_tmp_table_name(new_name); @@ -4945,6 +4969,24 @@ row_rename_table_for_mysql( } } + if (dict_table_has_fts_index(table) + && !dict_tables_have_same_db(old_name, new_name)) { + err = fts_rename_aux_tables(table, new_name, trx); + + if (err != DB_SUCCESS && (table->space != 0)) { + char* orig_name = table->name; + + /* If rename fails and table has its own tablespace, + we need to call fts_rename_aux_tables again to + revert the ibd file rename, which is not under the + control of trx. Also notice the parent table name + in cache is not changed yet. */ + table->name = const_cast<char*>(new_name); + fts_rename_aux_tables(table, old_name, trx); + table->name = orig_name; + } + } + end: if (err != DB_SUCCESS) { if (err == DB_DUPLICATE_KEY) { @@ -5003,7 +5045,9 @@ end: an ALTER, not in a RENAME. */ err = dict_load_foreigns( - new_name, FALSE, !old_is_tmp || trx->check_foreigns); + new_name, NULL, + false, !old_is_tmp || trx->check_foreigns, + DICT_ERR_IGNORE_NONE); if (err != DB_SUCCESS) { ut_print_timestamp(stderr); @@ -5052,7 +5096,6 @@ end: } funct_exit: - if (table != NULL) { dict_table_close(table, dict_locked, FALSE); } @@ -5182,6 +5225,7 @@ func_exit: dtuple_get_nth_field(prev_entry, i))) { contains_null = TRUE; + break; } } diff --git a/storage/innobase/row/row0purge.cc b/storage/innobase/row/row0purge.cc index ee603be453a..1b836c26c25 100644 --- a/storage/innobase/row/row0purge.cc +++ b/storage/innobase/row/row0purge.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1997, 2012, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1997, 2013, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -112,28 +112,19 @@ row_purge_reposition_pcur( return(node->found_clust); } -/** Status of row_purge_remove_clust() */ -enum row_purge_status { - ROW_PURGE_DONE, /*!< The row has been removed. */ - ROW_PURGE_FAIL, /*!< The purge was not successful. */ - ROW_PURGE_SUSPEND/*!< Cannot purge now, due to online rebuild. */ -}; - /***********************************************************//** Removes a delete marked clustered index record if possible. -@retval ROW_PURGE_DONE if the row was not found, or it was successfully removed -@retval ROW_PURGE_FAIL if the row was modified after the delete marking -@retval ROW_PURGE_SUSPEND if the row refers to an off-page column and -an online ALTER TABLE (table rebuild) is in progress. */ +@retval true if the row was not found, or it was successfully removed +@retval false if the row was modified after the delete marking */ static __attribute__((nonnull, warn_unused_result)) -enum row_purge_status +bool row_purge_remove_clust_if_poss_low( /*===============================*/ purge_node_t* node, /*!< in/out: row purge node */ ulint mode) /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE */ { dict_index_t* index; - enum row_purge_status status = ROW_PURGE_DONE; + bool success = true; mtr_t mtr; rec_t* rec; mem_heap_t* heap = NULL; @@ -165,16 +156,9 @@ row_purge_remove_clust_if_poss_low( goto func_exit; } - if (dict_index_get_online_status(index) == ONLINE_INDEX_CREATION - && rec_offs_any_extern(offsets)) { - status = ROW_PURGE_SUSPEND; - goto func_exit; - } - if (mode == BTR_MODIFY_LEAF) { - status = btr_cur_optimistic_delete( - btr_pcur_get_btr_cur(&node->pcur), 0, &mtr) - ? ROW_PURGE_DONE : ROW_PURGE_FAIL; + success = btr_cur_optimistic_delete( + btr_pcur_get_btr_cur(&node->pcur), 0, &mtr); } else { dberr_t err; ut_ad(mode == BTR_MODIFY_TREE); @@ -186,7 +170,7 @@ row_purge_remove_clust_if_poss_low( case DB_SUCCESS: break; case DB_OUT_OF_FILE_SPACE: - status = ROW_PURGE_FAIL; + success = false; break; default: ut_error; @@ -200,43 +184,34 @@ func_exit: btr_pcur_commit_specify_mtr(&node->pcur, &mtr); - return(status); + return(success); } /***********************************************************//** Removes a clustered index record if it has not been modified after the delete marking. @retval true if the row was not found, or it was successfully removed -@retval false the purge needs to be suspended, either because of -running out of file space or because the row refers to an off-page -column and an online ALTER TABLE (table rebuild) is in progress. */ +@retval false the purge needs to be suspended because of running out +of file space. */ static __attribute__((nonnull, warn_unused_result)) bool row_purge_remove_clust_if_poss( /*===========================*/ purge_node_t* node) /*!< in/out: row purge node */ { - switch (row_purge_remove_clust_if_poss_low(node, BTR_MODIFY_LEAF)) { - case ROW_PURGE_DONE: + if (row_purge_remove_clust_if_poss_low(node, BTR_MODIFY_LEAF)) { return(true); - case ROW_PURGE_SUSPEND: - return(false); - case ROW_PURGE_FAIL: - break; } for (ulint n_tries = 0; n_tries < BTR_CUR_RETRY_DELETE_N_TIMES; n_tries++) { - switch (row_purge_remove_clust_if_poss_low( - node, BTR_MODIFY_TREE)) { - case ROW_PURGE_DONE: + if (row_purge_remove_clust_if_poss_low( + node, BTR_MODIFY_TREE)) { return(true); - case ROW_PURGE_SUSPEND: - return(false); - case ROW_PURGE_FAIL: - os_thread_sleep(BTR_CUR_RETRY_SLEEP_TIME); } + + os_thread_sleep(BTR_CUR_RETRY_SLEEP_TIME); } return(false); @@ -529,9 +504,8 @@ retry: /***********************************************************//** Purges a delete marking of a record. @retval true if the row was not found, or it was successfully removed -@retval false the purge needs to be suspended, either because of -running out of file space or because the row refers to an off-page -column and an online ALTER TABLE (table rebuild) is in progress. */ +@retval false the purge needs to be suspended because of +running out of file space */ static __attribute__((nonnull, warn_unused_result)) bool row_purge_del_mark( @@ -567,10 +541,9 @@ row_purge_del_mark( /***********************************************************//** Purges an update of an existing record. Also purges an update of a delete -marked record if that record contained an externally stored field. -@return true if purged, false if skipped */ -static __attribute__((nonnull, warn_unused_result)) -bool +marked record if that record contained an externally stored field. */ +static +void row_purge_upd_exist_or_extern_func( /*===============================*/ #ifdef UNIV_DEBUG @@ -585,20 +558,6 @@ row_purge_upd_exist_or_extern_func( ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_SHARED)); #endif /* UNIV_SYNC_DEBUG */ - if (dict_index_get_online_status(dict_table_get_first_index( - node->table)) - == ONLINE_INDEX_CREATION) { - for (ulint i = 0; i < upd_get_n_fields(node->update); i++) { - - const upd_field_t* ufield - = upd_get_nth_field(node->update, i); - - if (dfield_is_ext(&ufield->new_val)) { - return(false); - } - } - } - if (node->rec_type == TRX_UNDO_UPD_DEL_REC || (node->cmpl_info & UPD_NODE_NO_ORD_CHANGE)) { @@ -675,16 +634,7 @@ skip_secondaries: index = dict_table_get_first_index(node->table); mtr_x_lock(dict_index_get_lock(index), &mtr); -#ifdef UNIV_DEBUG - switch (dict_index_get_online_status(index)) { - case ONLINE_INDEX_CREATION: - case ONLINE_INDEX_ABORTED_DROPPED: - ut_ad(0); - case ONLINE_INDEX_COMPLETE: - case ONLINE_INDEX_ABORTED: - break; - } -#endif /* UNIV_DEBUG */ + /* NOTE: we must also acquire an X-latch to the root page of the tree. We will need it when we free pages from the tree. If the tree is of height 1, @@ -714,8 +664,6 @@ skip_secondaries: mtr_commit(&mtr); } } - - return(true); } #ifdef UNIV_DEBUG @@ -771,7 +719,8 @@ row_purge_parse_undo_rec( rw_lock_s_lock_inline(&dict_operation_lock, 0, __FILE__, __LINE__); - node->table = dict_table_open_on_id(table_id, FALSE, FALSE); + node->table = dict_table_open_on_id( + table_id, FALSE, DICT_TABLE_OP_NORMAL); if (node->table == NULL) { /* The table has been dropped: no need to do purge */ @@ -866,10 +815,7 @@ row_purge_record_func( } /* fall through */ case TRX_UNDO_UPD_EXIST_REC: - purged = row_purge_upd_exist_or_extern(thr, node, undo_rec); - if (!purged) { - break; - } + row_purge_upd_exist_or_extern(thr, node, undo_rec); MONITOR_INC(MONITOR_N_UPD_EXIST_EXTERN); break; } diff --git a/storage/innobase/row/row0quiesce.cc b/storage/innobase/row/row0quiesce.cc index 72e0bf43d77..79cced1c533 100644 --- a/storage/innobase/row/row0quiesce.cc +++ b/storage/innobase/row/row0quiesce.cc @@ -532,10 +532,11 @@ row_quiesce_table_start( ut_a(table->id > 0); - ulint count = 0; - - while (ibuf_contract_in_background(table->id, TRUE) != 0) { - if (!(++count % 20)) { + for (ulint count = 0; + ibuf_contract_in_background(table->id, TRUE) != 0 + && !trx_is_interrupted(trx); + ++count) { + if (!(count % 20)) { ib_logf(IB_LOG_LEVEL_INFO, "Merging change buffer entries for '%s'", table_name); @@ -610,7 +611,7 @@ row_quiesce_table_complete( srv_get_meta_data_filename(table, cfg_name, sizeof(cfg_name)); - os_file_delete_if_exists(cfg_name); + os_file_delete_if_exists(innodb_file_data_key, cfg_name); ib_logf(IB_LOG_LEVEL_INFO, "Deleting the meta-data file '%s'", cfg_name); diff --git a/storage/innobase/row/row0sel.cc b/storage/innobase/row/row0sel.cc index cd98d13082b..690c6e958fe 100644 --- a/storage/innobase/row/row0sel.cc +++ b/storage/innobase/row/row0sel.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1997, 2012, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1997, 2013, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2008, Google Inc. Portions of this file contain modifications contributed and copyrighted by @@ -57,6 +57,8 @@ Created 12/19/1997 Heikki Tuuri #include "read0read.h" #include "buf0lru.h" #include "ha_prototypes.h" +#include "m_string.h" /* for my_sys.h */ +#include "my_sys.h" /* DEBUG_SYNC_C */ #include "my_compare.h" /* enum icp_result */ @@ -2957,9 +2959,7 @@ row_sel_store_mysql_rec( && dict_index_is_clust(index)) { prebuilt->fts_doc_id = fts_get_doc_id_from_rec( - prebuilt->table, - rec, - prebuilt->heap); + prebuilt->table, rec, NULL); } return(TRUE); @@ -4154,7 +4154,9 @@ wait_table_again: } rec_loop: + DEBUG_SYNC_C("row_search_rec_loop"); if (trx_is_interrupted(trx)) { + btr_pcur_store_position(pcur, &mtr); err = DB_INTERRUPTED; goto normal_return; } @@ -5333,7 +5335,7 @@ row_search_max_autoinc( btr_pcur_open_at_index_side( false, index, BTR_SEARCH_LEAF, &pcur, true, 0, &mtr); - if (page_get_n_recs(btr_pcur_get_page(&pcur)) > 0) { + if (!page_is_empty(btr_pcur_get_page(&pcur))) { const rec_t* rec; rec = row_search_autoinc_get_rec(&pcur, &mtr); diff --git a/storage/innobase/row/row0uins.cc b/storage/innobase/row/row0uins.cc index 27881c1f4c3..7b50d8b62ae 100644 --- a/storage/innobase/row/row0uins.cc +++ b/storage/innobase/row/row0uins.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1997, 2012, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1997, 2013, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -79,12 +79,11 @@ row_undo_ins_remove_clust_rec( mtr_start(&mtr); - /* This is similar to row_undo_mod_clust(). Even though we - call row_log_table_rollback() elsewhere, the DDL thread may - already have copied this row to the sort buffers or to the new - table. We must log the removal, so that the row will be - correctly purged. However, we can log the removal out of sync - with the B-tree modification. */ + /* This is similar to row_undo_mod_clust(). The DDL thread may + already have copied this row from the log to the new table. + We must log the removal, so that the row will be correctly + purged. However, we can log the removal out of sync with the + B-tree modification. */ online = dict_index_is_online_ddl(index); if (online) { @@ -111,9 +110,7 @@ row_undo_ins_remove_clust_rec( const ulint* offsets = rec_get_offsets( rec, index, NULL, ULINT_UNDEFINED, &heap); row_log_table_delete( - rec, index, offsets, - trx_read_trx_id(row_get_trx_id_offset(index, offsets) - + rec)); + rec, index, offsets, true, node->trx->id); mem_heap_free(heap); } @@ -319,7 +316,8 @@ row_undo_ins_parse_undo_rec( node->rec_type = type; node->update = NULL; - node->table = dict_table_open_on_id(table_id, dict_locked, FALSE); + node->table = dict_table_open_on_id( + table_id, dict_locked, DICT_TABLE_OP_NORMAL); /* Skip the UNDO if we can't find the table or the .ibd file. */ if (UNIV_UNLIKELY(node->table == NULL)) { @@ -441,14 +439,6 @@ row_undo_ins( node->index = dict_table_get_first_index(node->table); ut_ad(dict_index_is_clust(node->index)); - - if (dict_index_is_online_ddl(node->index)) { - /* Note that we are rolling back this transaction, so - that all inserts and updates with this DB_TRX_ID can - be skipped. */ - row_log_table_rollback(node->index, node->trx->id); - } - /* Skip the clustered index (the first index) */ node->index = dict_table_get_next_index(node->index); diff --git a/storage/innobase/row/row0umod.cc b/storage/innobase/row/row0umod.cc index 2fd8a11b35a..efcd63a4d29 100644 --- a/storage/innobase/row/row0umod.cc +++ b/storage/innobase/row/row0umod.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1997, 2012, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1997, 2013, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -208,6 +208,36 @@ row_undo_mod_remove_clust_low( return(DB_SUCCESS); } + trx_id_offset = btr_cur_get_index(btr_cur)->trx_id_offset; + + if (!trx_id_offset) { + mem_heap_t* heap = NULL; + ulint trx_id_col; + const ulint* offsets; + ulint len; + + trx_id_col = dict_index_get_sys_col_pos( + btr_cur_get_index(btr_cur), DATA_TRX_ID); + ut_ad(trx_id_col > 0); + ut_ad(trx_id_col != ULINT_UNDEFINED); + + offsets = rec_get_offsets( + btr_cur_get_rec(btr_cur), btr_cur_get_index(btr_cur), + NULL, trx_id_col + 1, &heap); + + trx_id_offset = rec_get_nth_field_offs( + offsets, trx_id_col, &len); + ut_ad(len == DATA_TRX_ID_LEN); + mem_heap_free(heap); + } + + if (trx_read_trx_id(btr_cur_get_rec(btr_cur) + trx_id_offset) + != node->new_trx_id) { + /* The record must have been purged and then replaced + with a different one. */ + return(DB_SUCCESS); + } + /* We are about to remove an old, delete-marked version of the record that may have been delete-marked by a different transaction than the rolling-back one. */ @@ -323,7 +353,7 @@ row_undo_mod_clust( case TRX_UNDO_UPD_DEL_REC: row_log_table_delete( btr_pcur_get_rec(pcur), index, offsets, - node->trx->id); + true, node->trx->id); break; default: ut_ad(0); @@ -331,6 +361,9 @@ row_undo_mod_clust( } } + ut_ad(rec_get_trx_id(btr_pcur_get_rec(pcur), index) + == node->new_trx_id); + btr_pcur_commit_specify_mtr(pcur, &mtr); if (err == DB_SUCCESS && node->rec_type == TRX_UNDO_UPD_DEL_REC) { @@ -1044,7 +1077,8 @@ row_undo_mod_parse_undo_rec( &dummy_extern, &undo_no, &table_id); node->rec_type = type; - node->table = dict_table_open_on_id(table_id, dict_locked, FALSE); + node->table = dict_table_open_on_id( + table_id, dict_locked, DICT_TABLE_OP_NORMAL); /* TODO: other fixes associated with DROP TABLE + rollback in the same table by another user */ @@ -1119,14 +1153,6 @@ row_undo_mod( node->index = dict_table_get_first_index(node->table); ut_ad(dict_index_is_clust(node->index)); - - if (dict_index_is_online_ddl(node->index)) { - /* Note that we are rolling back this transaction, so - that all inserts and updates with this DB_TRX_ID can - be skipped. */ - row_log_table_rollback(node->index, node->trx->id); - } - /* Skip the clustered index (the first index) */ node->index = dict_table_get_next_index(node->index); diff --git a/storage/innobase/row/row0upd.cc b/storage/innobase/row/row0upd.cc index f97c0c3c82b..ccb905b36f4 100644 --- a/storage/innobase/row/row0upd.cc +++ b/storage/innobase/row/row0upd.cc @@ -2437,6 +2437,10 @@ row_upd_clust_step( } } + ut_ad(lock_trx_has_rec_x_lock(thr_get_trx(thr), index->table, + btr_pcur_get_block(pcur), + page_rec_get_heap_no(rec))); + /* NOTE: the following function calls will also commit mtr */ if (node->is_delete) { |