diff options
author | Sergei Golubchik <sergii@pisem.net> | 2014-05-07 17:33:33 +0200 |
---|---|---|
committer | Sergei Golubchik <sergii@pisem.net> | 2014-05-07 17:33:33 +0200 |
commit | a2807e41e8fcac00711cf4465e910327bfd69fe2 (patch) | |
tree | c94b0a32226b09e8675f8b9b559a610c554cda6b /storage/xtradb/row | |
parent | 8ee9d19607d84aeebf97b704a19453f6a772299b (diff) | |
parent | 6cb3146af896eb7d27aed6815428008f105e8ae8 (diff) | |
download | mariadb-git-a2807e41e8fcac00711cf4465e910327bfd69fe2.tar.gz |
xtradb 5.6.17-65.0
Diffstat (limited to 'storage/xtradb/row')
-rw-r--r-- | storage/xtradb/row/row0ftsort.cc | 21 | ||||
-rw-r--r-- | storage/xtradb/row/row0ins.cc | 6 | ||||
-rw-r--r-- | storage/xtradb/row/row0log.cc | 373 | ||||
-rw-r--r-- | storage/xtradb/row/row0quiesce.cc | 10 | ||||
-rw-r--r-- | storage/xtradb/row/row0uins.cc | 5 | ||||
-rw-r--r-- | storage/xtradb/row/row0umod.cc | 15 | ||||
-rw-r--r-- | storage/xtradb/row/row0upd.cc | 51 | ||||
-rw-r--r-- | storage/xtradb/row/row0vers.cc | 43 |
8 files changed, 354 insertions, 170 deletions
diff --git a/storage/xtradb/row/row0ftsort.cc b/storage/xtradb/row/row0ftsort.cc index be62aa34a07..54f6f7bcc0f 100644 --- a/storage/xtradb/row/row0ftsort.cc +++ b/storage/xtradb/row/row0ftsort.cc @@ -872,7 +872,9 @@ func_exit: mutex_exit(&psort_info->mutex); if (UT_LIST_GET_LEN(psort_info->fts_doc_list) > 0) { - ut_ad(error != DB_SUCCESS); + /* child can exit either with error or told by parent. */ + ut_ad(error != DB_SUCCESS + || psort_info->state == FTS_PARENT_EXITING); } /* Free fts doc list in case of error. */ @@ -1194,7 +1196,7 @@ row_fts_sel_tree_propagate( sel_tree[parent] = selected; - return(parent); + return(static_cast<int>(parent)); } /*********************************************************************//** @@ -1214,8 +1216,8 @@ row_fts_sel_tree_update( ulint i; for (i = 1; i <= height; i++) { - propagated = row_fts_sel_tree_propagate( - propagated, sel_tree, mrec, offsets, index); + propagated = static_cast<ulint>(row_fts_sel_tree_propagate( + static_cast<int>(propagated), sel_tree, mrec, offsets, index)); } return(sel_tree[0]); @@ -1239,8 +1241,8 @@ row_fts_build_sel_tree_level( ulint i; ulint num_item; - start = (1 << level) - 1; - num_item = (1 << level); + start = static_cast<ulint>((1 << level) - 1); + num_item = static_cast<ulint>(1 << level); for (i = 0; i < num_item; i++) { child_left = sel_tree[(start + i) * 2 + 1]; @@ -1315,8 +1317,9 @@ row_fts_build_sel_tree( sel_tree[i + start] = i; } - for (i = treelevel - 1; i >=0; i--) { - row_fts_build_sel_tree_level(sel_tree, i, mrec, offsets, index); + for (i = static_cast<int>(treelevel) - 1; i >= 0; i--) { + row_fts_build_sel_tree_level( + sel_tree, static_cast<ulint>(i), mrec, offsets, index); } return(treelevel); @@ -1501,7 +1504,7 @@ row_fts_merge_insert( mrec[i], mrec[min_rec], offsets[i], offsets[min_rec], index, NULL) < 0) { - min_rec = i; + min_rec = static_cast<int>(i); } } } else { diff --git a/storage/xtradb/row/row0ins.cc b/storage/xtradb/row/row0ins.cc index 34e34925b9a..f8ca40fac12 100644 --- a/storage/xtradb/row/row0ins.cc +++ b/storage/xtradb/row/row0ins.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1996, 2014, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -2995,6 +2995,10 @@ row_ins_index_entry( dtuple_t* entry, /*!< in/out: index entry to insert */ que_thr_t* thr) /*!< in: query thread */ { + DBUG_EXECUTE_IF("row_ins_index_entry_timeout", { + DBUG_SET("-d,row_ins_index_entry_timeout"); + return(DB_LOCK_WAIT);}); + if (dict_index_is_clust(index)) { return(row_ins_clust_index_entry(index, entry, thr, 0)); } else { diff --git a/storage/xtradb/row/row0log.cc b/storage/xtradb/row/row0log.cc index 3a01b5ed55a..1240cf7fcc5 100644 --- a/storage/xtradb/row/row0log.cc +++ b/storage/xtradb/row/row0log.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2011, 2013, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2011, 2014, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -493,9 +493,8 @@ row_log_table_delete( dict_index_t* index, /*!< in/out: clustered index, S-latched or X-latched */ const ulint* offsets,/*!< in: rec_get_offsets(rec,index) */ - bool purge, /*!< in: true=purging BLOBs */ - trx_id_t trx_id) /*!< in: DB_TRX_ID of the record before - it was deleted */ + const byte* sys) /*!< in: DB_TRX_ID,DB_ROLL_PTR that should + be logged, or NULL to use those in rec */ { ulint old_pk_extra_size; ulint old_pk_size; @@ -527,22 +526,21 @@ row_log_table_delete( ut_ad(dict_index_is_clust(new_index)); ut_ad(!dict_index_is_online_ddl(new_index)); - /* Create the tuple PRIMARY KEY, DB_TRX_ID in the new_table. */ + /* Create the tuple PRIMARY KEY,DB_TRX_ID,DB_ROLL_PTR in new_table. */ if (index->online_log->same_pk) { - byte* db_trx_id; dtuple_t* tuple; ut_ad(new_index->n_uniq == index->n_uniq); - /* The PRIMARY KEY and DB_TRX_ID are in the first + /* The PRIMARY KEY and DB_TRX_ID,DB_ROLL_PTR are in the first fields of the record. */ heap = mem_heap_create( DATA_TRX_ID_LEN - + DTUPLE_EST_ALLOC(new_index->n_uniq + 1)); - old_pk = tuple = dtuple_create(heap, new_index->n_uniq + 1); + + DTUPLE_EST_ALLOC(new_index->n_uniq + 2)); + old_pk = tuple = dtuple_create(heap, new_index->n_uniq + 2); dict_index_copy_types(tuple, new_index, tuple->n_fields); dtuple_set_n_fields_cmp(tuple, new_index->n_uniq); - for (ulint i = 0; i < new_index->n_uniq; i++) { + for (ulint i = 0; i < dtuple_get_n_fields(tuple); i++) { ulint len; const void* field = rec_get_nth_field( rec, offsets, i, &len); @@ -553,42 +551,33 @@ row_log_table_delete( dfield_set_data(dfield, field, len); } - db_trx_id = static_cast<byte*>( - mem_heap_alloc(heap, DATA_TRX_ID_LEN)); - trx_write_trx_id(db_trx_id, trx_id); - - dfield_set_data(dtuple_get_nth_field(tuple, new_index->n_uniq), - db_trx_id, DATA_TRX_ID_LEN); + if (sys) { + dfield_set_data( + dtuple_get_nth_field(tuple, + new_index->n_uniq), + sys, DATA_TRX_ID_LEN); + dfield_set_data( + dtuple_get_nth_field(tuple, + new_index->n_uniq + 1), + sys + DATA_TRX_ID_LEN, DATA_ROLL_PTR_LEN); + } } else { /* The PRIMARY KEY has changed. Translate the tuple. */ - dfield_t* dfield; - - old_pk = row_log_table_get_pk(rec, index, offsets, &heap); + old_pk = row_log_table_get_pk( + rec, index, offsets, NULL, &heap); if (!old_pk) { ut_ad(index->online_log->error != DB_SUCCESS); + if (heap) { + goto func_exit; + } return; } - - /* Remove DB_ROLL_PTR. */ - ut_ad(dtuple_get_n_fields_cmp(old_pk) - == dict_index_get_n_unique(new_index)); - ut_ad(dtuple_get_n_fields(old_pk) - == dict_index_get_n_unique(new_index) + 2); - const_cast<ulint&>(old_pk->n_fields)--; - - /* Overwrite DB_TRX_ID with the old trx_id. */ - dfield = dtuple_get_nth_field(old_pk, new_index->n_uniq); - ut_ad(dfield_get_type(dfield)->mtype == DATA_SYS); - ut_ad(dfield_get_type(dfield)->prtype - == (DATA_NOT_NULL | DATA_TRX_ID)); - ut_ad(dfield_get_len(dfield) == DATA_TRX_ID_LEN); - dfield_dup(dfield, heap); - trx_write_trx_id(static_cast<byte*>(dfield->data), trx_id); } - ut_ad(dtuple_get_n_fields(old_pk) > 1); ut_ad(DATA_TRX_ID_LEN == dtuple_get_nth_field( + old_pk, old_pk->n_fields - 2)->len); + ut_ad(DATA_ROLL_PTR_LEN == dtuple_get_nth_field( old_pk, old_pk->n_fields - 1)->len); old_pk_size = rec_get_converted_size_temp( new_index, old_pk->fields, old_pk->n_fields, @@ -600,7 +589,7 @@ row_log_table_delete( /* Log enough prefix of the BLOB unless both the old and new table are in COMPACT or REDUNDANT format, which store the prefix in the clustered index record. */ - if (purge && rec_offs_any_extern(offsets) + if (rec_offs_any_extern(offsets) && (dict_table_get_format(index->table) >= UNIV_FORMAT_B || dict_table_get_format(new_table) >= UNIV_FORMAT_B)) { @@ -665,6 +654,7 @@ row_log_table_delete( index->online_log, b, mrec_size, avail_size); } +func_exit: mem_heap_free(heap); } @@ -1018,6 +1008,8 @@ row_log_table_get_pk( dict_index_t* index, /*!< in/out: clustered index, S-latched or X-latched */ const ulint* offsets,/*!< in: rec_get_offsets(rec,index) */ + byte* sys, /*!< out: DB_TRX_ID,DB_ROLL_PTR for + row_log_table_delete(), or NULL */ mem_heap_t** heap) /*!< in/out: memory heap where allocated */ { dtuple_t* tuple = NULL; @@ -1036,6 +1028,31 @@ row_log_table_get_pk( if (log->same_pk) { /* The PRIMARY KEY columns are unchanged. */ + if (sys) { + /* Store the DB_TRX_ID,DB_ROLL_PTR. */ + ulint trx_id_offs = index->trx_id_offset; + + if (!trx_id_offs) { + ulint pos = dict_index_get_sys_col_pos( + index, DATA_TRX_ID); + ulint len; + ut_ad(pos > 0); + + if (!offsets) { + offsets = rec_get_offsets( + rec, index, NULL, pos + 1, + heap); + } + + trx_id_offs = rec_get_nth_field_offs( + offsets, pos, &len); + ut_ad(len == DATA_TRX_ID_LEN); + } + + memcpy(sys, rec + trx_id_offs, + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN); + } + return(NULL); } @@ -1145,6 +1162,20 @@ err_exit: const byte* trx_roll = rec + row_get_trx_id_offset(index, offsets); + /* Copy the fields, because the fields will be updated + or the record may be moved somewhere else in the B-tree + as part of the upcoming operation. */ + if (sys) { + memcpy(sys, trx_roll, + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN); + trx_roll = sys; + } else { + trx_roll = static_cast<const byte*>( + mem_heap_dup( + *heap, trx_roll, + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN)); + } + dfield_set_data(dtuple_get_nth_field(tuple, new_n_uniq), trx_roll, DATA_TRX_ID_LEN); dfield_set_data(dtuple_get_nth_field(tuple, new_n_uniq + 1), @@ -1263,10 +1294,13 @@ row_log_table_apply_convert_mrec( mem_heap_t* heap, /*!< in/out: memory heap */ trx_id_t trx_id, /*!< in: DB_TRX_ID of mrec */ dberr_t* error) /*!< out: DB_SUCCESS or + DB_MISSING_HISTORY or reason of failure */ { dtuple_t* row; + *error = DB_SUCCESS; + /* This is based on row_build(). */ if (log->add_cols) { row = dtuple_copy(log->add_cols, heap); @@ -1308,7 +1342,7 @@ row_log_table_apply_convert_mrec( dfield_t* dfield = dtuple_get_nth_field(row, col_no); ulint len; - const byte* data= NULL; + const byte* data; if (rec_offs_nth_extern(offsets, i)) { ut_ad(rec_offs_any_extern(offsets)); @@ -1328,29 +1362,26 @@ row_log_table_apply_convert_mrec( && p->second.is_freed(log->head.total)) { /* This BLOB has been freed. We must not access the row. */ - row = NULL; + *error = DB_MISSING_HISTORY; + dfield_set_data(dfield, data, len); + dfield_set_ext(dfield); + goto blob_done; } } - if (row) { - data = btr_rec_copy_externally_stored_field( - mrec, offsets, - dict_table_zip_size(index->table), - i, &len, heap); - ut_a(data); - } - + data = btr_rec_copy_externally_stored_field( + mrec, offsets, + dict_table_zip_size(index->table), + i, &len, heap); + ut_a(data); + dfield_set_data(dfield, data, len); +blob_done: rw_lock_x_unlock(dict_index_get_lock(index)); - - if (!row) { - goto func_exit; - } } else { data = rec_get_nth_field(mrec, offsets, i, &len); + dfield_set_data(dfield, data, len); } - dfield_set_data(dfield, data, len); - /* See if any columns were changed to NULL or NOT NULL. */ const dict_col_t* new_col = dict_table_get_nth_col(log->table, col_no); @@ -1379,8 +1410,6 @@ row_log_table_apply_convert_mrec( dfield_get_type(dfield))); } -func_exit: - *error = DB_SUCCESS; return(row); } @@ -1479,22 +1508,32 @@ row_log_table_apply_insert( const dtuple_t* row = row_log_table_apply_convert_mrec( mrec, dup->index, offsets, log, heap, trx_id, &error); - ut_ad(error == DB_SUCCESS || !row); - /* Handling of duplicate key error requires storing - of offending key in a record buffer. */ - ut_ad(error != DB_DUPLICATE_KEY); - - if (error != DB_SUCCESS) + switch (error) { + case DB_MISSING_HISTORY: + ut_ad(log->blobs); + /* Because some BLOBs are missing, we know that the + transaction was rolled back later (a rollback of + an insert can free BLOBs). + We can simply skip the insert: the subsequent + ROW_T_DELETE will be ignored, or a ROW_T_UPDATE will + be interpreted as ROW_T_INSERT. */ + return(DB_SUCCESS); + case DB_SUCCESS: + ut_ad(row != NULL); + break; + default: + ut_ad(0); + case DB_INVALID_NULL: + ut_ad(row == NULL); return(error); + } - if (row) { - error = row_log_table_apply_insert_low( - thr, row, trx_id, offsets_heap, heap, dup); - if (error != DB_SUCCESS) { - /* Report the erroneous row using the new - version of the table. */ - innobase_row_to_mysql(dup->table, log->table, row); - } + error = row_log_table_apply_insert_low( + thr, row, trx_id, offsets_heap, heap, dup); + if (error != DB_SUCCESS) { + /* Report the erroneous row using the new + version of the table. */ + innobase_row_to_mysql(dup->table, log->table, row); } return(error); } @@ -1613,10 +1652,11 @@ row_log_table_apply_delete( mem_heap_t* offsets_heap, /*!< in/out: memory heap that can be emptied */ mem_heap_t* heap, /*!< in/out: memory heap */ - dict_table_t* new_table, /*!< in: rebuilt table */ + const row_log_t* log, /*!< in: online log */ const row_ext_t* save_ext) /*!< in: saved external field info, or NULL */ { + dict_table_t* new_table = log->table; dict_index_t* index = dict_table_get_first_index(new_table); dtuple_t* old_pk; mtr_t mtr; @@ -1624,15 +1664,14 @@ row_log_table_apply_delete( ulint* offsets; ut_ad(rec_offs_n_fields(moffsets) - == dict_index_get_n_unique(index) + 1); + == dict_index_get_n_unique(index) + 2); ut_ad(!rec_offs_any_extern(moffsets)); /* Convert the row to a search tuple. */ - old_pk = dtuple_create(heap, index->n_uniq + 1); - dict_index_copy_types(old_pk, index, old_pk->n_fields); - dtuple_set_n_fields_cmp(old_pk, index->n_uniq); + old_pk = dtuple_create(heap, index->n_uniq); + dict_index_copy_types(old_pk, index, index->n_uniq); - for (ulint i = 0; i <= index->n_uniq; i++) { + for (ulint i = 0; i < index->n_uniq; i++) { ulint len; const void* field; field = rec_get_nth_field(mrec, moffsets, i, &len); @@ -1666,6 +1705,10 @@ flag_ok: all_done: mtr_commit(&mtr); /* The record was not found. All done. */ + /* This should only happen when an earlier + ROW_T_INSERT was skipped or + ROW_T_UPDATE was interpreted as ROW_T_DELETE + due to BLOBs having been freed by rollback. */ return(DB_SUCCESS); } @@ -1675,19 +1718,38 @@ all_done: ut_a(!rec_offs_any_null_extern(btr_pcur_get_rec(&pcur), offsets)); #endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */ - /* Only remove the record if DB_TRX_ID matches what was - buffered. */ + /* Only remove the record if DB_TRX_ID,DB_ROLL_PTR match. */ { ulint len; - const void* mrec_trx_id + const byte* mrec_trx_id = rec_get_nth_field(mrec, moffsets, trx_id_col, &len); ut_ad(len == DATA_TRX_ID_LEN); - const void* rec_trx_id + const byte* rec_trx_id = rec_get_nth_field(btr_pcur_get_rec(&pcur), offsets, trx_id_col, &len); ut_ad(len == DATA_TRX_ID_LEN); - if (memcmp(mrec_trx_id, rec_trx_id, DATA_TRX_ID_LEN)) { + + ut_ad(rec_get_nth_field(mrec, moffsets, trx_id_col + 1, &len) + == mrec_trx_id + DATA_TRX_ID_LEN); + ut_ad(len == DATA_ROLL_PTR_LEN); + ut_ad(rec_get_nth_field(btr_pcur_get_rec(&pcur), offsets, + trx_id_col + 1, &len) + == rec_trx_id + DATA_TRX_ID_LEN); + ut_ad(len == DATA_ROLL_PTR_LEN); + + if (memcmp(mrec_trx_id, rec_trx_id, + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN)) { + /* The ROW_T_DELETE was logged for a different + PRIMARY KEY,DB_TRX_ID,DB_ROLL_PTR. + This is possible if a ROW_T_INSERT was skipped + or a ROW_T_UPDATE was interpreted as ROW_T_DELETE + because some BLOBs were missing due to + (1) rolling back the initial insert, or + (2) purging the BLOB for a later ROW_T_DELETE + (3) purging 'old values' for a later ROW_T_UPDATE + or ROW_T_DELETE. */ + ut_ad(!log->same_pk); goto all_done; } } @@ -1731,17 +1793,32 @@ row_log_table_apply_update( == dict_index_get_n_unique(index)); ut_ad(dtuple_get_n_fields(old_pk) == dict_index_get_n_unique(index) - + (dup->index->online_log->same_pk ? 0 : 2)); + + (log->same_pk ? 0 : 2)); row = row_log_table_apply_convert_mrec( mrec, dup->index, offsets, log, heap, trx_id, &error); - ut_ad(error == DB_SUCCESS || !row); - /* Handling of duplicate key error requires storing - of offending key in a record buffer. */ - ut_ad(error != DB_DUPLICATE_KEY); - - if (!row) { + switch (error) { + case DB_MISSING_HISTORY: + /* The record contained BLOBs that are now missing. */ + ut_ad(log->blobs); + /* Whether or not we are updating the PRIMARY KEY, we + know that there should be a subsequent + ROW_T_DELETE for rolling back a preceding ROW_T_INSERT, + overriding this ROW_T_UPDATE record. (*1) + + This allows us to interpret this ROW_T_UPDATE + as ROW_T_DELETE. + + When applying the subsequent ROW_T_DELETE, no matching + record will be found. */ + case DB_SUCCESS: + ut_ad(row != NULL); + break; + default: + ut_ad(0); + case DB_INVALID_NULL: + ut_ad(row == NULL); return(error); } @@ -1764,10 +1841,57 @@ row_log_table_apply_update( if (page_rec_is_infimum(btr_pcur_get_rec(&pcur)) || btr_pcur_get_low_match(&pcur) < index->n_uniq) { - ut_ad(0); - error = DB_CORRUPTION; + /* The record was not found. This should only happen + when an earlier ROW_T_INSERT or ROW_T_UPDATE was + diverted because BLOBs were freed when the insert was + later rolled back. */ + + ut_ad(log->blobs); + + if (error == DB_SUCCESS) { + /* An earlier ROW_T_INSERT could have been + skipped because of a missing BLOB, like this: + + BEGIN; + INSERT INTO t SET blob_col='blob value'; + UPDATE t SET blob_col=''; + ROLLBACK; + + This would generate the following records: + ROW_T_INSERT (referring to 'blob value') + ROW_T_UPDATE + ROW_T_UPDATE (referring to 'blob value') + ROW_T_DELETE + [ROLLBACK removes the 'blob value'] + + The ROW_T_INSERT would have been skipped + because of a missing BLOB. Now we are + executing the first ROW_T_UPDATE. + The second ROW_T_UPDATE (for the ROLLBACK) + would be interpreted as ROW_T_DELETE, because + the BLOB would be missing. + + We could probably assume that the transaction + has been rolled back and simply skip the + 'insert' part of this ROW_T_UPDATE record. + However, there might be some complex scenario + that could interfere with such a shortcut. + So, we will insert the row (and risk + introducing a bogus duplicate key error + for the ALTER TABLE), and a subsequent + ROW_T_UPDATE or ROW_T_DELETE will delete it. */ + mtr_commit(&mtr); + error = row_log_table_apply_insert_low( + thr, row, trx_id, offsets_heap, heap, dup); + } else { + /* Some BLOBs are missing, so we are interpreting + this ROW_T_UPDATE as ROW_T_DELETE (see *1). + Because the record was not found, we do nothing. */ + ut_ad(error == DB_MISSING_HISTORY); + error = DB_SUCCESS; func_exit: - mtr_commit(&mtr); + mtr_commit(&mtr); + } func_exit_committed: ut_ad(mtr.state == MTR_COMMITTED); @@ -1780,19 +1904,76 @@ func_exit_committed: return(error); } - /* Update the record. */ + /* Prepare to update (or delete) the record. */ ulint* cur_offsets = rec_get_offsets( btr_pcur_get_rec(&pcur), index, NULL, ULINT_UNDEFINED, &offsets_heap); + if (!log->same_pk) { + /* Only update the record if DB_TRX_ID,DB_ROLL_PTR match what + was buffered. */ + ulint len; + const void* rec_trx_id + = rec_get_nth_field(btr_pcur_get_rec(&pcur), + cur_offsets, index->n_uniq, &len); + ut_ad(len == DATA_TRX_ID_LEN); + ut_ad(dtuple_get_nth_field(old_pk, index->n_uniq)->len + == DATA_TRX_ID_LEN); + ut_ad(dtuple_get_nth_field(old_pk, index->n_uniq + 1)->len + == DATA_ROLL_PTR_LEN); + ut_ad(DATA_TRX_ID_LEN + static_cast<const char*>( + dtuple_get_nth_field(old_pk, + index->n_uniq)->data) + == dtuple_get_nth_field(old_pk, + index->n_uniq + 1)->data); + if (memcmp(rec_trx_id, + dtuple_get_nth_field(old_pk, index->n_uniq)->data, + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN)) { + /* The ROW_T_UPDATE was logged for a different + DB_TRX_ID,DB_ROLL_PTR. This is possible if an + earlier ROW_T_INSERT or ROW_T_UPDATE was diverted + because some BLOBs were missing due to rolling + back the initial insert or due to purging + the old BLOB values of an update. */ + ut_ad(log->blobs); + if (error != DB_SUCCESS) { + ut_ad(error == DB_MISSING_HISTORY); + /* Some BLOBs are missing, so we are + interpreting this ROW_T_UPDATE as + ROW_T_DELETE (see *1). + Because this is a different row, + we will do nothing. */ + error = DB_SUCCESS; + } else { + /* Because the user record is missing due to + BLOBs that were missing when processing + an earlier log record, we should + interpret the ROW_T_UPDATE as ROW_T_INSERT. + However, there is a different user record + with the same PRIMARY KEY value already. */ + error = DB_DUPLICATE_KEY; + } + + goto func_exit; + } + } + + if (error != DB_SUCCESS) { + ut_ad(error == DB_MISSING_HISTORY); + ut_ad(log->blobs); + /* Some BLOBs are missing, so we are interpreting + this ROW_T_UPDATE as ROW_T_DELETE (see *1). */ + error = row_log_table_apply_delete_low( + &pcur, cur_offsets, NULL, heap, &mtr); + goto func_exit_committed; + } + dtuple_t* entry = row_build_index_entry( row, NULL, index, heap); const upd_t* update = row_upd_build_difference_binary( index, entry, btr_pcur_get_rec(&pcur), cur_offsets, false, NULL, heap); - error = DB_SUCCESS; - if (!update->n_fields) { /* Nothing to do. */ goto func_exit; @@ -1808,7 +1989,7 @@ func_exit_committed: allow purge to free any orphaned externally stored columns. */ - if (pk_updated && dup->index->online_log->same_pk) { + if (pk_updated && log->same_pk) { /* The ROW_T_UPDATE log record should only be written when the PRIMARY KEY fields of the record did not change in the old table. We @@ -2034,7 +2215,7 @@ row_log_table_apply_op( For fixed-length PRIMARY key columns, it is 0. */ mrec += extra_size; - rec_offs_set_n_fields(offsets, new_index->n_uniq + 1); + rec_offs_set_n_fields(offsets, new_index->n_uniq + 2); rec_init_offsets_temp(mrec, new_index, offsets); next_mrec = mrec + rec_offs_data_size(offsets) + ext_size; if (next_mrec > mrec_end) { @@ -2069,7 +2250,7 @@ row_log_table_apply_op( *error = row_log_table_apply_delete( thr, new_trx_id_col, mrec, offsets, offsets_heap, heap, - log->table, ext); + log, ext); break; case ROW_T_UPDATE: diff --git a/storage/xtradb/row/row0quiesce.cc b/storage/xtradb/row/row0quiesce.cc index a59a6088ad6..1d67d5a9717 100644 --- a/storage/xtradb/row/row0quiesce.cc +++ b/storage/xtradb/row/row0quiesce.cc @@ -71,7 +71,7 @@ row_quiesce_write_index_fields( } /* Include the NUL byte in the length. */ - ib_uint32_t len = strlen(field->name) + 1; + ib_uint32_t len = static_cast<ib_uint32_t>(strlen(field->name) + 1); ut_a(len > 1); mach_write_to_4(row, len); @@ -180,7 +180,7 @@ row_quiesce_write_indexes( /* Write the length of the index name. NUL byte is included in the length. */ - ib_uint32_t len = strlen(index->name) + 1; + ib_uint32_t len = static_cast<ib_uint32_t>(strlen(index->name) + 1); ut_a(len > 1); mach_write_to_4(row, len); @@ -267,7 +267,7 @@ row_quiesce_write_table( col_name = dict_table_get_col_name(table, dict_col_get_no(col)); /* Include the NUL byte in the length. */ - len = strlen(col_name) + 1; + len = static_cast<ib_uint32_t>(strlen(col_name) + 1); ut_a(len > 1); mach_write_to_4(row, len); @@ -333,7 +333,7 @@ row_quiesce_write_header( } /* The server hostname includes the NUL byte. */ - len = strlen(hostname) + 1; + len = static_cast<ib_uint32_t>(strlen(hostname) + 1); mach_write_to_4(value, len); DBUG_EXECUTE_IF("ib_export_io_write_failure_5", close(fileno(file));); @@ -351,7 +351,7 @@ row_quiesce_write_header( /* The table name includes the NUL byte. */ ut_a(table->name != 0); - len = strlen(table->name) + 1; + len = static_cast<ib_uint32_t>(strlen(table->name) + 1); /* Write the table name. */ mach_write_to_4(value, len); diff --git a/storage/xtradb/row/row0uins.cc b/storage/xtradb/row/row0uins.cc index 7b50d8b62ae..849bf096492 100644 --- a/storage/xtradb/row/row0uins.cc +++ b/storage/xtradb/row/row0uins.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1997, 2013, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1997, 2014, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -109,8 +109,7 @@ row_undo_ins_remove_clust_rec( mem_heap_t* heap = NULL; const ulint* offsets = rec_get_offsets( rec, index, NULL, ULINT_UNDEFINED, &heap); - row_log_table_delete( - rec, index, offsets, true, node->trx->id); + row_log_table_delete(rec, index, offsets, NULL); mem_heap_free(heap); } diff --git a/storage/xtradb/row/row0umod.cc b/storage/xtradb/row/row0umod.cc index 3c70c3e662b..29252c7834a 100644 --- a/storage/xtradb/row/row0umod.cc +++ b/storage/xtradb/row/row0umod.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1997, 2013, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1997, 2014, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -86,6 +86,8 @@ row_undo_mod_clust_low( before the update, or NULL if the table is not being rebuilt online or the PRIMARY KEY definition does not change */ + byte* sys, /*!< out: DB_TRX_ID, DB_ROLL_PTR + for row_log_table_delete() */ que_thr_t* thr, /*!< in: query thread */ mtr_t* mtr, /*!< in: mtr; must be committed before latching any further pages */ @@ -115,7 +117,7 @@ row_undo_mod_clust_low( && dict_index_is_online_ddl(btr_cur_get_index(btr_cur))) { *rebuilt_old_pk = row_log_table_get_pk( btr_cur_get_rec(btr_cur), - btr_cur_get_index(btr_cur), NULL, &heap); + btr_cur_get_index(btr_cur), NULL, sys, &heap); } else { *rebuilt_old_pk = NULL; } @@ -277,12 +279,13 @@ row_undo_mod_clust( mem_heap_t* offsets_heap = NULL; ulint* offsets = NULL; const dtuple_t* rebuilt_old_pk; + byte sys[DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN]; /* Try optimistic processing of the record, keeping changes within the index page */ err = row_undo_mod_clust_low(node, &offsets, &offsets_heap, - heap, &rebuilt_old_pk, + heap, &rebuilt_old_pk, sys, thr, &mtr, online ? BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED : BTR_MODIFY_LEAF); @@ -296,7 +299,8 @@ row_undo_mod_clust( mtr_start(&mtr); err = row_undo_mod_clust_low( - node, &offsets, &offsets_heap, heap, &rebuilt_old_pk, + node, &offsets, &offsets_heap, + heap, &rebuilt_old_pk, sys, thr, &mtr, BTR_MODIFY_TREE); ut_ad(err == DB_SUCCESS || err == DB_OUT_OF_FILE_SPACE); } @@ -322,8 +326,7 @@ row_undo_mod_clust( break; case TRX_UNDO_UPD_DEL_REC: row_log_table_delete( - btr_pcur_get_rec(pcur), index, offsets, - true, node->trx->id); + btr_pcur_get_rec(pcur), index, offsets, sys); break; default: ut_ad(0); diff --git a/storage/xtradb/row/row0upd.cc b/storage/xtradb/row/row0upd.cc index 4cf1c604c47..3ead385c2cd 100644 --- a/storage/xtradb/row/row0upd.cc +++ b/storage/xtradb/row/row0upd.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1996, 2014, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -1948,9 +1948,7 @@ row_upd_clust_rec_by_insert_inherit_func( data += len - BTR_EXTERN_FIELD_REF_SIZE; /* The pointer must not be zero. */ ut_a(memcmp(data, field_ref_zero, BTR_EXTERN_FIELD_REF_SIZE)); - /* The BLOB must be owned. */ - ut_a(!(data[BTR_EXTERN_LEN] & BTR_EXTERN_OWNER_FLAG)); - + data[BTR_EXTERN_LEN] &= ~BTR_EXTERN_OWNER_FLAG; data[BTR_EXTERN_LEN] |= BTR_EXTERN_INHERITED_FLAG; /* The BTR_EXTERN_INHERITED_FLAG only matters in rollback. Purge will always free the extern fields of @@ -2055,7 +2053,13 @@ err_exit: rec, offsets, entry, node->update); if (change_ownership) { - btr_pcur_store_position(pcur, mtr); + /* The blobs are disowned here, expecting the + insert down below to inherit them. But if the + insert fails, then this disown will be undone + when the operation is rolled back. */ + btr_cur_disown_inherited_fields( + btr_cur_get_page_zip(btr_cur), + rec, index, offsets, node->update, mtr); } } @@ -2081,41 +2085,6 @@ err_exit: ? UPD_NODE_INSERT_BLOB : UPD_NODE_INSERT_CLUSTERED; - if (err == DB_SUCCESS && change_ownership) { - /* Mark the non-updated fields disowned by the old record. */ - - /* NOTE: this transaction has an x-lock on the record - and therefore other transactions cannot modify the - record when we have no latch on the page. In addition, - we assume that other query threads of the same - transaction do not modify the record in the meantime. - Therefore we can assert that the restoration of the - cursor succeeds. */ - - mtr_start(mtr); - - if (!btr_pcur_restore_position(BTR_MODIFY_LEAF, pcur, mtr)) { - ut_error; - } - - rec = btr_cur_get_rec(btr_cur); - offsets = rec_get_offsets(rec, index, offsets, - ULINT_UNDEFINED, &heap); - ut_ad(page_rec_is_user_rec(rec)); - ut_ad(rec_get_deleted_flag(rec, rec_offs_comp(offsets))); - - btr_cur_disown_inherited_fields( - btr_cur_get_page_zip(btr_cur), - rec, index, offsets, node->update, mtr); - - /* It is not necessary to call row_log_table for - this, because during online table rebuild, purge will - not free any BLOBs in the table, whether or not they - are owned by the clustered index record. */ - - mtr_commit(mtr); - } - mem_heap_free(heap); return(err); @@ -2158,7 +2127,7 @@ row_upd_clust_rec( if (dict_index_is_online_ddl(index)) { rebuilt_old_pk = row_log_table_get_pk( - btr_cur_get_rec(btr_cur), index, offsets, &heap); + btr_cur_get_rec(btr_cur), index, offsets, NULL, &heap); } /* Try optimistic updating of the record, keeping changes within diff --git a/storage/xtradb/row/row0vers.cc b/storage/xtradb/row/row0vers.cc index bde796831c6..9f1fc13ee09 100644 --- a/storage/xtradb/row/row0vers.cc +++ b/storage/xtradb/row/row0vers.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1997, 2012, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1997, 2014, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -73,6 +73,8 @@ row_vers_impl_x_locked_low( ulint* clust_offsets; mem_heap_t* heap; + DBUG_ENTER("row_vers_impl_x_locked_low"); + ut_ad(rec_offs_validate(rec, index, offsets)); heap = mem_heap_create(1024); @@ -92,7 +94,7 @@ row_vers_impl_x_locked_low( trx_sys_get_max_trx_id()); } mem_heap_free(heap); - return(0); + DBUG_RETURN(0); } comp = page_rec_is_comp(rec); @@ -131,17 +133,37 @@ row_vers_impl_x_locked_low( clust_rec, mtr, version, clust_index, clust_offsets, heap, &prev_version); - /* Free version and clust_offsets. */ + /* The oldest visible clustered index version must not be + delete-marked, because we never start a transaction by + inserting a delete-marked record. */ + ut_ad(prev_version + || !rec_get_deleted_flag(version, comp) + || !trx_rw_is_active(trx_id, NULL)); + /* Free version and clust_offsets. */ mem_heap_free(old_heap); if (prev_version == NULL) { - /* clust_rec should be a fresh insert, because - no previous version was found or the transaction - has committed. The caller has to recheck as the - synopsis of this function states, whether trx_id - is active or not. */ + /* We reached the oldest visible version without + finding an older version of clust_rec that would + match the secondary index record. If the secondary + index record is not delete marked, then clust_rec + is considered the correct match of the secondary + index record and hence holds the implicit lock. */ + + if (rec_del) { + /* The secondary index record is del marked. + So, the implicit lock holder of clust_rec + did not modify the secondary index record yet, + and is not holding an implicit lock on it. + + This assumes that whenever a row is inserted + or updated, the leaf page record always is + created with a clear delete-mark flag. + (We never insert a delete-marked record.) */ + trx_id = 0; + } break; } @@ -237,8 +259,11 @@ row_vers_impl_x_locked_low( } } + DBUG_PRINT("info", ("Implicit lock is held by trx:%lu", + static_cast<unsigned long>(trx_id))); + mem_heap_free(heap); - return(trx_id); + DBUG_RETURN(trx_id); } /*****************************************************************//** |