diff options
author | unknown <heikki@donna.mysql.fi> | 2001-08-29 19:42:23 +0300 |
---|---|---|
committer | unknown <heikki@donna.mysql.fi> | 2001-08-29 19:42:23 +0300 |
commit | dd9fa926b390d59a13f4d2a8e2fef6d653372651 (patch) | |
tree | 9d78d40bd578ed463aac576cc0fbd077bbe9cd20 | |
parent | fc6696de0357612a35a968b824cbcd1819af4fa4 (diff) | |
download | mariadb-git-dd9fa926b390d59a13f4d2a8e2fef6d653372651.tar.gz |
trx0roll.c Fix the primary key update + BLOB bug, improve InnoDB Monitor prints
trx0sys.c Fix the primary key update + BLOB bug, improve InnoDB Monitor prints
trx0trx.c Fix the primary key update + BLOB bug, improve InnoDB Monitor prints
row0mysql.c Fix the primary key update + BLOB bug, improve InnoDB Monitor prints
row0purge.c Fix the primary key update + BLOB bug, improve InnoDB Monitor prints
row0sel.c Fix the primary key update + BLOB bug, improve InnoDB Monitor prints
row0uins.c Fix the primary key update + BLOB bug, improve InnoDB Monitor prints
row0umod.c Fix the primary key update + BLOB bug, improve InnoDB Monitor prints
row0upd.c Fix the primary key update + BLOB bug, improve InnoDB Monitor prints
srv0srv.c Fix the primary key update + BLOB bug, improve InnoDB Monitor prints
srv0start.c Fix the primary key update + BLOB bug, improve InnoDB Monitor prints
sync0arr.c Fix the primary key update + BLOB bug, improve InnoDB Monitor prints
fil0fil.c Fix the primary key update + BLOB bug, improve InnoDB Monitor prints
ibuf0ibuf.c Fix the primary key update + BLOB bug, improve InnoDB Monitor prints
lock0lock.c Fix the primary key update + BLOB bug, improve InnoDB Monitor prints
os0file.c Fix the primary key update + BLOB bug, improve InnoDB Monitor prints
btr0btr.c Fix the primary key update + BLOB bug, improve InnoDB Monitor prints
btr0cur.c Fix the primary key update + BLOB bug, improve InnoDB Monitor prints
btr0sea.c Fix the primary key update + BLOB bug, improve InnoDB Monitor prints
buf0buf.c Fix the primary key update + BLOB bug, improve InnoDB Monitor prints
data0data.c Fix the primary key update + BLOB bug, improve InnoDB Monitor prints
srv0srv.h Fix the primary key update + BLOB bug, improve InnoDB Monitor prints
trx0sys.h Fix the primary key update + BLOB bug, improve InnoDB Monitor prints
trx0trx.h Fix the primary key update + BLOB bug, improve InnoDB Monitor prints
btr0cur.h Fix the primary key update + BLOB bug, improve InnoDB Monitor prints
buf0buf.h Fix the primary key update + BLOB bug, improve InnoDB Monitor prints
data0data.h Fix the primary key update + BLOB bug, improve InnoDB Monitor prints
ha_innobase.cc Fix the auto-inc+REPLACE+replication bug, improve InnoDB Monitor prints
sql/ha_innobase.cc:
Fix the auto-inc+REPLACE+replication bug, improve InnoDB Monitor prints
innobase/include/btr0cur.h:
Fix the primary key update + BLOB bug, improve InnoDB Monitor prints
innobase/include/buf0buf.h:
Fix the primary key update + BLOB bug, improve InnoDB Monitor prints
innobase/include/data0data.h:
Fix the primary key update + BLOB bug, improve InnoDB Monitor prints
innobase/include/srv0srv.h:
Fix the primary key update + BLOB bug, improve InnoDB Monitor prints
innobase/include/trx0sys.h:
Fix the primary key update + BLOB bug, improve InnoDB Monitor prints
innobase/include/trx0trx.h:
Fix the primary key update + BLOB bug, improve InnoDB Monitor prints
innobase/btr/btr0btr.c:
Fix the primary key update + BLOB bug, improve InnoDB Monitor prints
innobase/btr/btr0cur.c:
Fix the primary key update + BLOB bug, improve InnoDB Monitor prints
innobase/btr/btr0sea.c:
Fix the primary key update + BLOB bug, improve InnoDB Monitor prints
innobase/buf/buf0buf.c:
Fix the primary key update + BLOB bug, improve InnoDB Monitor prints
innobase/data/data0data.c:
Fix the primary key update + BLOB bug, improve InnoDB Monitor prints
innobase/fil/fil0fil.c:
Fix the primary key update + BLOB bug, improve InnoDB Monitor prints
innobase/ibuf/ibuf0ibuf.c:
Fix the primary key update + BLOB bug, improve InnoDB Monitor prints
innobase/lock/lock0lock.c:
Fix the primary key update + BLOB bug, improve InnoDB Monitor prints
innobase/os/os0file.c:
Fix the primary key update + BLOB bug, improve InnoDB Monitor prints
innobase/row/row0mysql.c:
Fix the primary key update + BLOB bug, improve InnoDB Monitor prints
innobase/row/row0purge.c:
Fix the primary key update + BLOB bug, improve InnoDB Monitor prints
innobase/row/row0sel.c:
Fix the primary key update + BLOB bug, improve InnoDB Monitor prints
innobase/row/row0uins.c:
Fix the primary key update + BLOB bug, improve InnoDB Monitor prints
innobase/row/row0umod.c:
Fix the primary key update + BLOB bug, improve InnoDB Monitor prints
innobase/row/row0upd.c:
Fix the primary key update + BLOB bug, improve InnoDB Monitor prints
innobase/srv/srv0srv.c:
Fix the primary key update + BLOB bug, improve InnoDB Monitor prints
innobase/srv/srv0start.c:
Fix the primary key update + BLOB bug, improve InnoDB Monitor prints
innobase/sync/sync0arr.c:
Fix the primary key update + BLOB bug, improve InnoDB Monitor prints
innobase/trx/trx0roll.c:
Fix the primary key update + BLOB bug, improve InnoDB Monitor prints
innobase/trx/trx0sys.c:
Fix the primary key update + BLOB bug, improve InnoDB Monitor prints
innobase/trx/trx0trx.c:
Fix the primary key update + BLOB bug, improve InnoDB Monitor prints
28 files changed, 1027 insertions, 151 deletions
diff --git a/innobase/btr/btr0btr.c b/innobase/btr/btr0btr.c index af2029bf1e8..6da323867fb 100644 --- a/innobase/btr/btr0btr.c +++ b/innobase/btr/btr0btr.c @@ -1738,8 +1738,8 @@ btr_node_ptr_delete( btr_cur_position(UT_LIST_GET_FIRST(tree->tree_indexes), node_ptr, &cursor); - compressed = btr_cur_pessimistic_delete(&err, TRUE, &cursor, mtr); - + compressed = btr_cur_pessimistic_delete(&err, TRUE, &cursor, FALSE, + mtr); ut_a(err == DB_SUCCESS); if (!compressed) { diff --git a/innobase/btr/btr0cur.c b/innobase/btr/btr0cur.c index 47a67d425cd..7783f618d6d 100644 --- a/innobase/btr/btr0cur.c +++ b/innobase/btr/btr0cur.c @@ -80,6 +80,9 @@ btr_rec_free_updated_extern_fields( X-latched */ rec_t* rec, /* in: record */ upd_t* update, /* in: update vector */ + ibool do_not_free_inherited,/* in: TRUE if called in a + rollback and we do not want to free + inherited fields */ mtr_t* mtr); /* in: mini-transaction handle which contains an X-latch to record page and to the tree */ @@ -813,7 +816,7 @@ calculate_sizes_again: /* The record is so big that we have to store some fields externally on separate database pages */ - big_rec_vec = dtuple_convert_big_rec(index, entry); + big_rec_vec = dtuple_convert_big_rec(index, entry, NULL, 0); if (big_rec_vec == NULL) { @@ -1021,7 +1024,7 @@ btr_cur_pessimistic_insert( /* The record is so big that we have to store some fields externally on separate database pages */ - big_rec_vec = dtuple_convert_big_rec(index, entry); + big_rec_vec = dtuple_convert_big_rec(index, entry, NULL, 0); if (big_rec_vec == NULL) { @@ -1242,6 +1245,7 @@ btr_cur_update_in_place( rec_t* rec; dulint roll_ptr; trx_t* trx; + ibool was_delete_marked; /* Only clustered index records are updated using this function */ ut_ad((cursor->index)->type & DICT_CLUSTERED); @@ -1270,6 +1274,8 @@ btr_cur_update_in_place( /* FIXME: in a mixed tree, all records may not have enough ordering fields for btr search: */ + + was_delete_marked = rec_get_deleted_flag(rec); row_upd_rec_in_place(rec, update); @@ -1279,6 +1285,13 @@ btr_cur_update_in_place( btr_cur_update_in_place_log(flags, rec, index, update, trx, roll_ptr, mtr); + if (was_delete_marked && !rec_get_deleted_flag(rec)) { + /* The new updated record owns its possible externally + stored fields */ + + btr_cur_unmark_extern_fields(rec, mtr); + } + return(DB_SUCCESS); } @@ -1434,6 +1447,13 @@ btr_cur_optimistic_update( ut_a(rec); /* <- We calculated above the insert would fit */ + if (!rec_get_deleted_flag(rec)) { + /* The new inserted record owns its possible externally + stored fields */ + + btr_cur_unmark_extern_fields(rec, mtr); + } + /* Restore the old explicit lock state on the record */ lock_rec_restore_from_page_infimum(rec, page); @@ -1655,11 +1675,15 @@ btr_cur_pessimistic_update( if (flags & BTR_NO_UNDO_LOG_FLAG) { /* We are in a transaction rollback undoing a row update: we must free possible externally stored fields - which got new values in the update */ + which got new values in the update, if they are not + inherited values. They can be inherited if we have + updated the primary key to another value, and then + update it back again. */ ut_a(big_rec_vec == NULL); - btr_rec_free_updated_extern_fields(index, rec, update, mtr); + btr_rec_free_updated_extern_fields(index, rec, update, + TRUE, mtr); } /* We have to set appropriate extern storage bits in the new @@ -1676,8 +1700,8 @@ btr_cur_pessimistic_update( page_get_free_space_of_empty() / 2) || (rec_get_converted_size(new_entry) >= REC_MAX_DATA_SIZE)) { - big_rec_vec = dtuple_convert_big_rec(index, new_entry); - + big_rec_vec = dtuple_convert_big_rec(index, new_entry, + ext_vect, n_ext_vect); if (big_rec_vec == NULL) { mem_heap_free(heap); @@ -1694,6 +1718,13 @@ btr_cur_pessimistic_update( lock_rec_restore_from_page_infimum(rec, page); rec_set_field_extern_bits(rec, ext_vect, n_ext_vect, mtr); + if (!rec_get_deleted_flag(rec)) { + /* The new inserted record owns its possible externally + stored fields */ + + btr_cur_unmark_extern_fields(rec, mtr); + } + btr_cur_compress_if_useful(cursor, mtr); err = DB_SUCCESS; @@ -1725,6 +1756,13 @@ btr_cur_pessimistic_update( rec_set_field_extern_bits(rec, ext_vect, n_ext_vect, mtr); + if (!rec_get_deleted_flag(rec)) { + /* The new inserted record owns its possible externally + stored fields */ + + btr_cur_unmark_extern_fields(rec, mtr); + } + lock_rec_restore_from_page_infimum(rec, page); /* If necessary, restore also the correct lock state for a new, @@ -2183,6 +2221,7 @@ btr_cur_pessimistic_delete( if compression does not occur, the cursor stays valid: it points to successor of deleted record on function exit */ + ibool in_rollback,/* in: TRUE if called in rollback */ mtr_t* mtr) /* in: mtr */ { page_t* page; @@ -2218,7 +2257,8 @@ btr_cur_pessimistic_delete( } btr_rec_free_externally_stored_fields(cursor->index, - btr_cur_get_rec(cursor), mtr); + btr_cur_get_rec(cursor), in_rollback, mtr); + if ((page_get_n_recs(page) < 2) && (dict_tree_get_page(btr_cur_get_tree(cursor)) != buf_frame_get_page_no(page))) { @@ -2517,6 +2557,199 @@ btr_estimate_number_of_different_key_vals( /*================== EXTERNAL STORAGE OF BIG FIELDS ===================*/ /*********************************************************************** +Sets the ownership bit of an externally stored field in a record. */ +static +void +btr_cur_set_ownership_of_extern_field( +/*==================================*/ + rec_t* rec, /* in: clustered index record */ + ulint i, /* in: field number */ + ibool val, /* in: value to set */ + mtr_t* mtr) /* in: mtr */ +{ + byte* data; + ulint local_len; + ulint byte_val; + + data = rec_get_nth_field(rec, i, &local_len); + + ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE); + + local_len -= BTR_EXTERN_FIELD_REF_SIZE; + + byte_val = mach_read_from_1(data + local_len + BTR_EXTERN_LEN); + + if (val) { + byte_val = byte_val & (~BTR_EXTERN_OWNER_FLAG); + } else { + byte_val = byte_val | BTR_EXTERN_OWNER_FLAG; + } + + mlog_write_ulint(data + local_len + BTR_EXTERN_LEN, byte_val, + MLOG_1BYTE, mtr); +} + +/*********************************************************************** +Marks not updated extern fields as not-owned by this record. The ownership +is transferred to the updated record which is inserted elsewhere in the +index tree. In purge only the owner of externally stored field is allowed +to free the field. */ + +void +btr_cur_mark_extern_inherited_fields( +/*=================================*/ + rec_t* rec, /* in: record in a clustered index */ + upd_t* update, /* in: update vector */ + mtr_t* mtr) /* in: mtr */ +{ + ibool is_updated; + ulint n; + ulint j; + ulint i; + + n = rec_get_n_fields(rec); + + for (i = 0; i < n; i++) { + if (rec_get_nth_field_extern_bit(rec, i)) { + + /* Check it is not in updated fields */ + is_updated = FALSE; + + if (update) { + for (j = 0; j < upd_get_n_fields(update); + j++) { + if (upd_get_nth_field(update, j) + ->field_no == i) { + is_updated = TRUE; + } + } + } + + if (!is_updated) { + btr_cur_set_ownership_of_extern_field(rec, i, + FALSE, mtr); + } + } + } +} + +/*********************************************************************** +The complement of the previous function: in an update entry may inherit +some externally stored fields from a record. We must mark them as inherited +in entry, so that they are not freed in a rollback. */ + +void +btr_cur_mark_dtuple_inherited_extern( +/*=================================*/ + dtuple_t* entry, /* in: updated entry to be inserted to + clustered index */ + ulint* ext_vec, /* in: array of extern fields in the + original record */ + ulint n_ext_vec, /* in: number of elements in ext_vec */ + upd_t* update) /* in: update vector */ +{ + dfield_t* dfield; + ulint byte_val; + byte* data; + ulint len; + ibool is_updated; + ulint j; + ulint i; + + if (ext_vec == NULL) { + + return; + } + + for (i = 0; i < n_ext_vec; i++) { + + /* Check ext_vec[i] is in updated fields */ + is_updated = FALSE; + + for (j = 0; j < upd_get_n_fields(update); j++) { + if (upd_get_nth_field(update, j)->field_no + == ext_vec[i]) { + is_updated = TRUE; + } + } + + if (!is_updated) { + dfield = dtuple_get_nth_field(entry, ext_vec[i]); + + data = dfield_get_data(dfield); + len = dfield_get_len(dfield); + + len -= BTR_EXTERN_FIELD_REF_SIZE; + + byte_val = mach_read_from_1(data + len + + BTR_EXTERN_LEN); + + byte_val = byte_val | BTR_EXTERN_INHERITED_FLAG; + + mach_write_to_1(data + len + BTR_EXTERN_LEN, byte_val); + } + } +} + +/*********************************************************************** +Marks all extern fields in a record as owned by the record. This function +should be called if the delete mark of a record is removed: a not delete +marked record always owns all its extern fields. */ + +void +btr_cur_unmark_extern_fields( +/*=========================*/ + rec_t* rec, /* in: record in a clustered index */ + mtr_t* mtr) /* in: mtr */ +{ + ulint n; + ulint i; + + n = rec_get_n_fields(rec); + + for (i = 0; i < n; i++) { + if (rec_get_nth_field_extern_bit(rec, i)) { + + btr_cur_set_ownership_of_extern_field(rec, i, + TRUE, mtr); + } + } +} + +/*********************************************************************** +Marks all extern fields in a dtuple as owned by the record. */ + +void +btr_cur_unmark_dtuple_extern_fields( +/*================================*/ + dtuple_t* entry, /* in: clustered index entry */ + ulint* ext_vec, /* in: array of numbers of fields + which have been stored externally */ + ulint n_ext_vec) /* in: number of elements in ext_vec */ +{ + dfield_t* dfield; + ulint byte_val; + byte* data; + ulint len; + ulint i; + + for (i = 0; i < n_ext_vec; i++) { + dfield = dtuple_get_nth_field(entry, ext_vec[i]); + + data = dfield_get_data(dfield); + len = dfield_get_len(dfield); + + len -= BTR_EXTERN_FIELD_REF_SIZE; + + byte_val = mach_read_from_1(data + len + BTR_EXTERN_LEN); + + byte_val = byte_val & (~BTR_EXTERN_OWNER_FLAG); + + mach_write_to_1(data + len + BTR_EXTERN_LEN, byte_val); + } +} + +/*********************************************************************** Stores the positions of the fields marked as extern storage in the update vector, and also those fields who are marked as extern storage in rec and not mentioned in updated fields. We use this function to remember @@ -2766,7 +2999,9 @@ btr_store_big_rec_extern_fields( /*********************************************************************** Frees the space in an externally stored field to the file space -management. */ +management if the field in data is owned the externally stored field, +in a rollback we may have the additional condition that the field must +not be inherited. */ void btr_free_externally_stored_field( @@ -2777,6 +3012,9 @@ btr_free_externally_stored_field( + reference to the externally stored part */ ulint local_len, /* in: length of data */ + ibool do_not_free_inherited,/* in: TRUE if called in a + rollback and we do not want to free + inherited fields */ mtr_t* local_mtr) /* in: mtr containing the latch to data an an X-latch to the index tree */ @@ -2828,6 +3066,26 @@ btr_free_externally_stored_field( return; } + if (mach_read_from_1(data + local_len + BTR_EXTERN_LEN) + & BTR_EXTERN_OWNER_FLAG) { + /* This field does not own the externally + stored field: do not free! */ + + mtr_commit(&mtr); + + return; + } + + if (do_not_free_inherited + && mach_read_from_1(data + local_len + BTR_EXTERN_LEN) + & BTR_EXTERN_INHERITED_FLAG) { + /* Rollback and inherited field: do not free! */ + + mtr_commit(&mtr); + + return; + } + page = buf_page_get(space_id, page_no, RW_X_LATCH, &mtr); buf_page_dbg_add_level(page, SYNC_EXTERN_STORAGE); @@ -2872,6 +3130,9 @@ btr_rec_free_externally_stored_fields( dict_index_t* index, /* in: index of the data, the index tree MUST be X-latched */ rec_t* rec, /* in: record */ + ibool do_not_free_inherited,/* in: TRUE if called in a + rollback and we do not want to free + inherited fields */ mtr_t* mtr) /* in: mini-transaction handle which contains an X-latch to record page and to the index tree */ @@ -2896,7 +3157,8 @@ btr_rec_free_externally_stored_fields( if (rec_get_nth_field_extern_bit(rec, i)) { data = rec_get_nth_field(rec, i, &len); - btr_free_externally_stored_field(index, data, len, mtr); + btr_free_externally_stored_field(index, data, len, + do_not_free_inherited, mtr); } } } @@ -2912,6 +3174,9 @@ btr_rec_free_updated_extern_fields( X-latched */ rec_t* rec, /* in: record */ upd_t* update, /* in: update vector */ + ibool do_not_free_inherited,/* in: TRUE if called in a + rollback and we do not want to free + inherited fields */ mtr_t* mtr) /* in: mini-transaction handle which contains an X-latch to record page and to the tree */ { @@ -2938,7 +3203,8 @@ btr_rec_free_updated_extern_fields( if (rec_get_nth_field_extern_bit(rec, ufield->field_no)) { data = rec_get_nth_field(rec, ufield->field_no, &len); - btr_free_externally_stored_field(index, data, len, mtr); + btr_free_externally_stored_field(index, data, len, + do_not_free_inherited, mtr); } } } diff --git a/innobase/btr/btr0sea.c b/innobase/btr/btr0sea.c index ac4e7c5ba3f..616f8911aba 100644 --- a/innobase/btr/btr0sea.c +++ b/innobase/btr/btr0sea.c @@ -769,6 +769,11 @@ btr_search_guess_on_hash( buf_page_make_young(page); } + /* Increment the page get statistics though we did not really + fix the page: for user info only */ + + buf_pool->n_page_gets++; + return(TRUE); /*-------------------------------------------*/ diff --git a/innobase/buf/buf0buf.c b/innobase/buf/buf0buf.c index 3fabe6c6d0e..f485088a5b7 100644 --- a/innobase/buf/buf0buf.c +++ b/innobase/buf/buf0buf.c @@ -349,6 +349,10 @@ buf_pool_create( buf_pool->n_pages_written = 0; buf_pool->n_pages_created = 0; + buf_pool->n_page_gets = 0; + buf_pool->n_page_gets_old = 0; + buf_pool->n_pages_read_old = 0; + /* 2. Initialize flushing fields ---------------------------- */ UT_LIST_INIT(buf_pool->flush_list); @@ -667,6 +671,7 @@ buf_page_get_gen( #ifndef UNIV_LOG_DEBUG ut_ad(!ibuf_inside() || ibuf_page(space, offset)); #endif + buf_pool->n_page_gets++; loop: mutex_enter_fast(&(buf_pool->mutex)); @@ -846,6 +851,8 @@ buf_page_optimistic_get_func( ut_ad(mtr && guess); ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH)); + buf_pool->n_page_gets++; + block = buf_block_align(guess); mutex_enter(&(buf_pool->mutex)); @@ -976,6 +983,8 @@ buf_page_get_known_nowait( ut_ad(mtr); ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH)); + buf_pool->n_page_gets++; + block = buf_block_align(guess); mutex_enter(&(buf_pool->mutex)); @@ -1643,6 +1652,18 @@ buf_print_io(void) printf("Pages read %lu, created %lu, written %lu\n", buf_pool->n_pages_read, buf_pool->n_pages_created, buf_pool->n_pages_written); + + if (buf_pool->n_page_gets > buf_pool->n_page_gets_old) { + printf("Buffer pool hit rate %lu / 1000\n", + 1000 + - ((1000 * + (buf_pool->n_pages_read - buf_pool->n_pages_read_old)) + / (buf_pool->n_page_gets - buf_pool->n_page_gets_old))); + } + + buf_pool->n_page_gets_old = buf_pool->n_page_gets; + buf_pool->n_pages_read_old = buf_pool->n_pages_read; + mutex_exit(&(buf_pool->mutex)); } diff --git a/innobase/data/data0data.c b/innobase/data/data0data.c index 4172fb9c8ce..97db9d72f98 100644 --- a/innobase/data/data0data.c +++ b/innobase/data/data0data.c @@ -395,7 +395,12 @@ dtuple_convert_big_rec( the entry enough, i.e., if there are too many short fields in entry */ dict_index_t* index, /* in: index */ - dtuple_t* entry) /* in: index entry */ + dtuple_t* entry, /* in: index entry */ + ulint* ext_vec,/* in: array of externally stored fields, + or NULL: if a field already is externally + stored, then we cannot move it to the vector + this function returns */ + ulint n_ext_vec)/* in: number of elements is ext_vec */ { mem_heap_t* heap; big_rec_t* vector; @@ -404,7 +409,9 @@ dtuple_convert_big_rec( ulint n_fields; ulint longest; ulint longest_i; + ibool is_externally_stored; ulint i; + ulint j; size = rec_get_converted_size(entry); @@ -431,9 +438,23 @@ dtuple_convert_big_rec( for (i = dict_index_get_n_unique_in_tree(index); i < dtuple_get_n_fields(entry); i++) { + /* Skip over fields which already are externally + stored */ + + is_externally_stored = FALSE; + + if (ext_vec) { + for (j = 0; j < n_ext_vec; j++) { + if (ext_vec[j] == i) { + is_externally_stored = TRUE; + } + } + } + /* Skip over fields which are ordering in some index */ - if (dict_field_get_col( + if (!is_externally_stored && + dict_field_get_col( dict_index_get_nth_field(index, i)) ->ord_part == 0) { diff --git a/innobase/fil/fil0fil.c b/innobase/fil/fil0fil.c index 5c783627721..b386f224d11 100644 --- a/innobase/fil/fil0fil.c +++ b/innobase/fil/fil0fil.c @@ -19,6 +19,7 @@ Created 10/25/1995 Heikki Tuuri #include "log0log.h" #include "log0recv.h" #include "fsp0fsp.h" +#include "srv0srv.h" /* IMPLEMENTATION OF THE LOW-LEVEL FILE SYSTEM @@ -1152,6 +1153,7 @@ fil_aio_wait( ut_ad(fil_validate()); if (os_aio_use_native_aio) { + srv_io_thread_op_info[segment] = "native aio handle"; #ifdef WIN_ASYNC_IO ret = os_aio_windows_handle(segment, 0, &fil_node, &message, &type); @@ -1161,12 +1163,16 @@ fil_aio_wait( ut_a(0); #endif } else { + srv_io_thread_op_info[segment] = "simulated aio handle"; + ret = os_aio_simulated_handle(segment, (void**) &fil_node, &message, &type); } ut_a(ret); - + + srv_io_thread_op_info[segment] = "complete io for fil node"; + mutex_enter(&(system->mutex)); fil_node_complete_io(fil_node, fil_system, type); @@ -1178,9 +1184,10 @@ fil_aio_wait( /* Do the i/o handling */ if (buf_pool_is_block(message)) { - + srv_io_thread_op_info[segment] = "complete io for buf page"; buf_page_io_complete(message); } else { + srv_io_thread_op_info[segment] = "complete io for log"; log_io_complete(message); } } diff --git a/innobase/ibuf/ibuf0ibuf.c b/innobase/ibuf/ibuf0ibuf.c index fd7b415551f..fa1c630dc08 100644 --- a/innobase/ibuf/ibuf0ibuf.c +++ b/innobase/ibuf/ibuf0ibuf.c @@ -2341,7 +2341,7 @@ ibuf_delete_rec( root = ibuf_tree_root_get(ibuf_data, space, mtr); btr_cur_pessimistic_delete(&err, TRUE, btr_pcur_get_btr_cur(pcur), - mtr); + FALSE, mtr); ut_a(err == DB_SUCCESS); #ifdef UNIV_IBUF_DEBUG diff --git a/innobase/include/btr0cur.h b/innobase/include/btr0cur.h index ffae434a5d9..f56a5662253 100644 --- a/innobase/include/btr0cur.h +++ b/innobase/include/btr0cur.h @@ -353,6 +353,7 @@ btr_cur_pessimistic_delete( if compression does not occur, the cursor stays valid: it points to successor of deleted record on function exit */ + ibool in_rollback,/* in: TRUE if called in rollback */ mtr_t* mtr); /* in: mtr */ /*************************************************************** Parses a redo log record of updating a record in-place. */ @@ -418,6 +419,52 @@ btr_estimate_number_of_different_key_vals( /* out: estimated number of key values */ dict_index_t* index); /* in: index */ /*********************************************************************** +Marks not updated extern fields as not-owned by this record. The ownership +is transferred to the updated record which is inserted elsewhere in the +index tree. In purge only the owner of externally stored field is allowed +to free the field. */ + +void +btr_cur_mark_extern_inherited_fields( +/*=================================*/ + rec_t* rec, /* in: record in a clustered index */ + upd_t* update, /* in: update vector */ + mtr_t* mtr); /* in: mtr */ +/*********************************************************************** +The complement of the previous function: in an update entry may inherit +some externally stored fields from a record. We must mark them as inherited +in entry, so that they are not freed in a rollback. */ + +void +btr_cur_mark_dtuple_inherited_extern( +/*=================================*/ + dtuple_t* entry, /* in: updated entry to be inserted to + clustered index */ + ulint* ext_vec, /* in: array of extern fields in the + original record */ + ulint n_ext_vec, /* in: number of elements in ext_vec */ + upd_t* update); /* in: update vector */ +/*********************************************************************** +Marks all extern fields in a record as owned by the record. This function +should be called if the delete mark of a record is removed: a not delete +marked record always owns all its extern fields. */ + +void +btr_cur_unmark_extern_fields( +/*=========================*/ + rec_t* rec, /* in: record in a clustered index */ + mtr_t* mtr); /* in: mtr */ +/*********************************************************************** +Marks all extern fields in a dtuple as owned by the record. */ + +void +btr_cur_unmark_dtuple_extern_fields( +/*================================*/ + dtuple_t* entry, /* in: clustered index entry */ + ulint* ext_vec, /* in: array of numbers of fields + which have been stored externally */ + ulint n_ext_vec); /* in: number of elements in ext_vec */ +/*********************************************************************** Stores the fields in big_rec_vec to the tablespace and puts pointers to them in rec. The fields are stored on pages allocated from leaf node file segment of the index tree. */ @@ -435,7 +482,9 @@ btr_store_big_rec_extern_fields( rec and to the tree */ /*********************************************************************** Frees the space in an externally stored field to the file space -management. */ +management if the field in data is owned the externally stored field, +in a rollback we may have the additional condition that the field must +not be inherited. */ void btr_free_externally_stored_field( @@ -446,6 +495,9 @@ btr_free_externally_stored_field( + reference to the externally stored part */ ulint local_len, /* in: length of data */ + ibool do_not_free_inherited,/* in: TRUE if called in a + rollback and we do not want to free + inherited fields */ mtr_t* local_mtr); /* in: mtr containing the latch to data an an X-latch to the index tree */ @@ -458,6 +510,9 @@ btr_rec_free_externally_stored_fields( dict_index_t* index, /* in: index of the data, the index tree MUST be X-latched */ rec_t* rec, /* in: record */ + ibool do_not_free_inherited,/* in: TRUE if called in a + rollback and we do not want to free + inherited fields */ mtr_t* mtr); /* in: mini-transaction handle which contains an X-latch to record page and to the index tree */ @@ -620,10 +675,21 @@ and sleep this many microseconds in between */ on that page */ #define BTR_EXTERN_LEN 12 /* 8 bytes containing the length of the externally - stored part of the BLOB */ + stored part of the BLOB. + The 2 highest bits are + reserved to the flags below. */ /*--------------------------------------*/ #define BTR_EXTERN_FIELD_REF_SIZE 20 +/* The highest bit of BTR_EXTERN_LEN (i.e., the highest bit of the byte +at lowest address) is set to 1 if this field does not 'own' the externally +stored field; only the owner field is allowed to free the field in purge! +If the 2nd highest bit is 1 then it means that the externally stored field +was inherited from an earlier version of the row. In rollback we are not +allowed to free an inherited external field. */ + +#define BTR_EXTERN_OWNER_FLAG 128 +#define BTR_EXTERN_INHERITED_FLAG 64 extern ulint btr_cur_n_non_sea; diff --git a/innobase/include/buf0buf.h b/innobase/include/buf0buf.h index 8b22561adf8..66071030402 100644 --- a/innobase/include/buf0buf.h +++ b/innobase/include/buf0buf.h @@ -771,6 +771,17 @@ struct buf_pool_struct{ ulint n_pages_written;/* number write operations */ ulint n_pages_created;/* number of pages created in the pool with no read */ + ulint n_page_gets; /* number of page gets performed; + also successful seraches through + the adaptive hash index are + counted as page gets; this field + is NOT protected by the buffer + pool mutex */ + ulint n_page_gets_old;/* n_page_gets when buf_print was + last time called: used to calculate + hit rate */ + ulint n_pages_read_old;/* n_pages_read when buf_print was + last time called */ /* 2. Page flushing algorithm fields */ UT_LIST_BASE_NODE_T(buf_block_t) flush_list; diff --git a/innobase/include/data0data.h b/innobase/include/data0data.h index f695e0989a5..c314281d758 100644 --- a/innobase/include/data0data.h +++ b/innobase/include/data0data.h @@ -329,7 +329,12 @@ dtuple_convert_big_rec( the entry enough, i.e., if there are too many short fields in entry */ dict_index_t* index, /* in: index */ - dtuple_t* entry); /* in: index entry */ + dtuple_t* entry, /* in: index entry */ + ulint* ext_vec,/* in: array of externally stored fields, + or NULL: if a field already is externally + stored, then we cannot move it to the vector + this function returns */ + ulint n_ext_vec);/* in: number of elements is ext_vec */ /****************************************************************** Puts back to entry the data stored in vector. Note that to ensure the fields in entry can accommodate the data, vector must have been created diff --git a/innobase/include/srv0srv.h b/innobase/include/srv0srv.h index e635964e5ec..3f014adb76c 100644 --- a/innobase/include/srv0srv.h +++ b/innobase/include/srv0srv.h @@ -62,7 +62,15 @@ extern int srv_query_thread_priority; /*-------------------------------------------*/ +extern ulint srv_n_rows_inserted; +extern ulint srv_n_rows_updated; +extern ulint srv_n_rows_deleted; +extern ulint srv_n_rows_read; + extern ibool srv_print_innodb_monitor; +extern ibool srv_print_innodb_lock_monitor; +extern ibool srv_print_innodb_tablespace_monitor; + extern ulint srv_n_spin_wait_rounds; extern ulint srv_spin_wait_delay; extern ibool srv_priority_boost; @@ -105,13 +113,19 @@ extern mutex_t* kernel_mutex_temp;/* mutex protecting the server, trx structs, it from dynamic memory to get it to the same DRAM page as other hotspot semaphores */ #define kernel_mutex (*kernel_mutex_temp) + +#define SRV_MAX_N_IO_THREADS 100 +/* Array of English strings describing the current state of an +i/o handler thread */ +extern char* srv_io_thread_op_info[]; + typedef struct srv_sys_struct srv_sys_t; /* The server system */ extern srv_sys_t* srv_sys; -/* Alternatives for fiel flush option in Unix; see the InnoDB manual about +/* Alternatives for the field flush option in Unix; see the InnoDB manual about what these mean */ #define SRV_UNIX_FDATASYNC 1 #define SRV_UNIX_O_DSYNC 2 diff --git a/innobase/include/trx0sys.h b/innobase/include/trx0sys.h index e26f7e19850..0295cd6abff 100644 --- a/innobase/include/trx0sys.h +++ b/innobase/include/trx0sys.h @@ -315,6 +315,9 @@ struct trx_sys_struct{ /* List of active and committed in memory transactions, sorted on trx id, biggest first */ + UT_LIST_BASE_NODE_T(trx_t) mysql_trx_list; + /* List of transactions created + for MySQL */ UT_LIST_BASE_NODE_T(trx_rseg_t) rseg_list; /* List of rollback segment objects */ trx_rseg_t* latest_rseg; /* Latest rollback segment in the diff --git a/innobase/include/trx0trx.h b/innobase/include/trx0trx.h index f67ba43162d..fdef041e929 100644 --- a/innobase/include/trx0trx.h +++ b/innobase/include/trx0trx.h @@ -130,6 +130,14 @@ void trx_mark_sql_stat_end( /*==================*/ trx_t* trx); /* in: trx handle */ +/************************************************************************** +Marks the latest SQL statement ended but does not start a new transaction +if the trx is not started. */ + +void +trx_mark_sql_stat_end_do_not_start_new( +/*===================================*/ + trx_t* trx); /* in: trx handle */ /************************************************************************ Assigns a read view for a consistent read query. All the consistent reads within the same transaction will get the same read view, which is created @@ -236,6 +244,14 @@ trx_commit_step( /*============*/ /* out: query thread to run next, or NULL */ que_thr_t* thr); /* in: query thread */ +/************************************************************************** +Prints info about a transaction to the standard output. The caller must +own the kernel mutex. */ + +void +trx_print( +/*======*/ + trx_t* trx); /* in: transaction */ /* Signal to a transaction */ @@ -270,6 +286,9 @@ rolling back after a database recovery */ struct trx_struct{ /* All the next fields are protected by the kernel mutex, except the undo logs which are protected by undo_mutex */ + char* op_info; /* English text describing the + current operation, or an empty + string */ ulint type; /* TRX_USER, TRX_PURGE */ ulint conc_state; /* state of the trx from the point of view of concurrency control: @@ -284,6 +303,8 @@ struct trx_struct{ table */ dulint table_id; /* table id if the preceding field is TRUE */ + void* mysql_thd; /* MySQL thread handle corresponding + to this trx, or NULL */ os_thread_id_t mysql_thread_id;/* id of the MySQL thread associated with this transaction object */ ulint n_mysql_tables_in_use; /* number of Innobase tables @@ -302,6 +323,9 @@ struct trx_struct{ of a duplicate key error */ UT_LIST_NODE_T(trx_t) trx_list; /* list of transactions */ + UT_LIST_NODE_T(trx_t) + mysql_trx_list; /* list of transactions created for + MySQL */ /*------------------------------*/ mutex_t undo_mutex; /* mutex protecting the fields in this section (down to undo_no_arr), EXCEPT diff --git a/innobase/lock/lock0lock.c b/innobase/lock/lock0lock.c index 5f8f538f392..819c559ceb4 100644 --- a/innobase/lock/lock0lock.c +++ b/innobase/lock/lock0lock.c @@ -13,6 +13,7 @@ Created 5/7/1996 Heikki Tuuri #endif #include "usr0sess.h" +#include "trx0purge.h" /* When releasing transaction locks, this specifies how often we release the kernel mutex for a moment to give also others access to it */ @@ -3184,7 +3185,7 @@ lock_table_print( ut_ad(mutex_own(&kernel_mutex)); ut_a(lock_get_type(lock) == LOCK_TABLE); - printf("\nTABLE LOCK table %s trx id %lu %lu", + printf("TABLE LOCK table %s trx id %lu %lu", lock->un_member.tab_lock.table->name, (lock->trx)->id.high, (lock->trx)->id.low); @@ -3220,6 +3221,8 @@ lock_rec_print( ulint page_no; ulint i; ulint count = 0; + ulint len; + char buf[200]; mtr_t mtr; ut_ad(mutex_own(&kernel_mutex)); @@ -3228,7 +3231,7 @@ lock_rec_print( space = lock->un_member.rec_lock.space; page_no = lock->un_member.rec_lock.page_no; - printf("\nRECORD LOCKS space id %lu page no %lu n bits %lu", + printf("RECORD LOCKS space id %lu page no %lu n bits %lu", space, page_no, lock_rec_get_n_bits(lock)); printf(" table %s index %s trx id %lu %lu", @@ -3251,10 +3254,10 @@ lock_rec_print( printf(" waiting"); } - printf("\n"); - mtr_start(&mtr); + printf("\n"); + /* If the page is not in the buffer pool, we cannot load it because we have the kernel mutex and ibuf operations would break the latching order */ @@ -3280,12 +3283,14 @@ lock_rec_print( printf("Record lock, heap no %lu ", i); if (page) { - rec_print(page_find_rec_with_heap_no(page, i)); + len = rec_sprintf(buf, 190, + page_find_rec_with_heap_no(page, i)); + buf[len] = '\0'; + printf("%s", buf); } - count++; - printf("\n"); + count++; } if (count >= 3) { @@ -3342,12 +3347,32 @@ lock_print_info(void) ulint nth_lock = 0; ulint i; mtr_t mtr; + + printf( + "Purge done for all trx's with n:o < %lu %lu, undo n:o < %lu %lu\n", + ut_dulint_get_high(purge_sys->purge_trx_no), + ut_dulint_get_low(purge_sys->purge_trx_no), + ut_dulint_get_high(purge_sys->purge_undo_no), + ut_dulint_get_low(purge_sys->purge_undo_no)); - lock_mutex_enter_kernel(); + lock_mutex_enter_kernel(); - printf("LOCK INFO:\n"); - printf("Number of locks in the record hash table %lu\n", + printf("Total number of lock structs in row lock hash table %lu\n", lock_get_n_rec_locks()); + + /* First print info on non-active transactions */ + + trx = UT_LIST_GET_FIRST(trx_sys->mysql_trx_list); + + while (trx) { + if (trx->conc_state == TRX_NOT_STARTED) { + printf("---"); + trx_print(trx); + } + + trx = UT_LIST_GET_NEXT(mysql_trx_list, trx); + } + loop: trx = UT_LIST_GET_FIRST(trx_sys->trx_list); @@ -3367,11 +3392,21 @@ loop: } if (nth_lock == 0) { - printf("\nLOCKS FOR TRANSACTION ID %lu %lu\n", trx->id.high, - trx->id.low); + printf("---"); + trx_print(trx); + + if (trx->read_view) { + printf( + "Trx read view will not see trx with id >= %lu %lu, sees < %lu %lu\n", + ut_dulint_get_high(trx->read_view->low_limit_id), + ut_dulint_get_low(trx->read_view->low_limit_id), + ut_dulint_get_high(trx->read_view->up_limit_id), + ut_dulint_get_low(trx->read_view->up_limit_id)); + } + if (trx->que_state == TRX_QUE_LOCK_WAIT) { printf( - "################# TRX IS WAITING FOR THE LOCK: ###\n"); + "------------------TRX IS WAITING FOR THE LOCK:\n"); if (lock_get_type(trx->wait_lock) == LOCK_REC) { lock_rec_print(trx->wait_lock); @@ -3380,10 +3415,15 @@ loop: } printf( - "##################################################\n"); + "------------------\n"); } } + if (!srv_print_innodb_lock_monitor) { + nth_trx++; + goto loop; + } + i = 0; lock = UT_LIST_GET_FIRST(trx->trx_locks); @@ -3431,9 +3471,9 @@ loop: nth_lock++; - if (nth_lock >= 25) { + if (nth_lock >= 10) { printf( - "25 LOCKS PRINTED FOR THIS TRX: SUPPRESSING FURTHER PRINTS\n"); + "10 LOCKS PRINTED FOR THIS TRX: SUPPRESSING FURTHER PRINTS\n"); nth_trx++; nth_lock = 0; diff --git a/innobase/os/os0file.c b/innobase/os/os0file.c index 0525fd7b59a..d4d30f6aabc 100644 --- a/innobase/os/os0file.c +++ b/innobase/os/os0file.c @@ -1577,6 +1577,7 @@ os_aio_windows_handle( void** message2, ulint* type) /* out: OS_FILE_WRITE or ..._READ */ { + ulint orig_seg = segment; os_aio_array_t* array; os_aio_slot_t* slot; ulint n; @@ -1602,10 +1603,14 @@ os_aio_windows_handle( n = array->n_slots / array->n_segments; if (array == os_aio_sync_array) { + srv_io_thread_op_info[orig_seg] = "wait windows aio for 1 page"; + ut_ad(pos < array->n_slots); os_event_wait(array->events[pos]); i = pos; } else { + srv_io_thread_op_info[orig_seg] = + "wait windows aio for n pages"; i = os_event_wait_multiple(n, (array->events) + segment * n); } @@ -1615,6 +1620,7 @@ os_aio_windows_handle( ut_a(slot->reserved); + srv_io_thread_op_info[orig_seg] = "get windows aio return value"; ret = GetOverlappedResult(slot->file, &(slot->control), &len, TRUE); *message1 = slot->message1; @@ -1887,6 +1893,8 @@ consecutive_loop: } } + srv_io_thread_op_info[global_segment] = "doing file i/o"; + /* Do the i/o with ordinary, synchronous i/o functions: */ if (slot->type == OS_FILE_WRITE) { ret = os_file_write(slot->name, slot->file, combined_buf, @@ -1897,7 +1905,8 @@ consecutive_loop: } ut_a(ret); - + srv_io_thread_op_info[global_segment] = "file i/o done"; + /* printf("aio: %lu consecutive %lu:th segment, first offs %lu blocks\n", n_consecutive, global_segment, slot->offset / UNIV_PAGE_SIZE); */ @@ -1953,6 +1962,8 @@ wait_for_io: os_mutex_exit(array->mutex); + srv_io_thread_op_info[global_segment] = "waiting for i/o request"; + os_event_wait(os_aio_segment_wait_events[global_segment]); goto restart; @@ -2023,7 +2034,12 @@ os_aio_print(void) ulint n_reserved; ulint i; - printf("Pending normal aio reads:\n"); + for (i = 0; i < srv_n_file_io_threads; i++) { + printf("I/O thread %lu state: %s\n", i, + srv_io_thread_op_info[i]); + } + + printf("Pending normal aio reads: "); array = os_aio_read_array; loop: @@ -2041,21 +2057,21 @@ loop: if (slot->reserved) { n_reserved++; - printf("Reserved slot, messages %lx %lx\n", + /* printf("Reserved slot, messages %lx %lx\n", (ulint)slot->message1, (ulint)slot->message2); - ut_a(slot->len > 0); + */ ut_a(slot->len > 0); } } ut_a(array->n_reserved == n_reserved); - printf("Total of %lu reserved aio slots\n", n_reserved); + printf("%lu\n", n_reserved); os_mutex_exit(array->mutex); if (array == os_aio_read_array) { - printf("Pending aio writes:\n"); + printf("Pending aio writes: "); array = os_aio_write_array; @@ -2063,21 +2079,21 @@ loop: } if (array == os_aio_write_array) { - printf("Pending insert buffer aio reads:\n"); + printf("Pending insert buffer aio reads: "); array = os_aio_ibuf_array; goto loop; } if (array == os_aio_ibuf_array) { - printf("Pending log writes or reads:\n"); + printf("Pending log writes or reads: "); array = os_aio_log_array; goto loop; } if (array == os_aio_log_array) { - printf("Pending synchronous reads or writes:\n"); + printf("Pending synchronous reads or writes: "); array = os_aio_sync_array; goto loop; diff --git a/innobase/row/row0mysql.c b/innobase/row/row0mysql.c index 9bbc45a5c9a..373ee4ac4bd 100644 --- a/innobase/row/row0mysql.c +++ b/innobase/row/row0mysql.c @@ -462,6 +462,8 @@ row_insert_for_mysql( ut_ad(trx); ut_ad(trx->mysql_thread_id == os_thread_get_curr_id()); + trx->op_info = "inserting"; + if (node == NULL) { row_get_prebuilt_insert_row(prebuilt); node = prebuilt->ins_node; @@ -499,6 +501,8 @@ run_again: goto run_again; } + trx->op_info = ""; + return(err); } @@ -506,12 +510,15 @@ run_again: prebuilt->table->stat_n_rows++; + srv_n_rows_inserted++; + if (prebuilt->table->stat_n_rows == 0) { /* Avoid wrap-over */ prebuilt->table->stat_n_rows--; } row_update_statistics_if_needed(prebuilt); + trx->op_info = ""; return((int) err); } @@ -627,6 +634,8 @@ row_update_for_mysql( ut_ad(trx->mysql_thread_id == os_thread_get_curr_id()); UT_NOT_USED(mysql_rec); + trx->op_info = "updating or deleting"; + node = prebuilt->upd_node; clust_index = dict_table_get_first_index(table); @@ -700,6 +709,7 @@ run_again: if (err == DB_RECORD_NOT_FOUND) { trx->error_state = DB_SUCCESS; + trx->op_info = ""; return((int) err); } @@ -710,6 +720,8 @@ run_again: goto run_again; } + trx->op_info = ""; + return(err); } @@ -719,10 +731,16 @@ run_again: if (prebuilt->table->stat_n_rows > 0) { prebuilt->table->stat_n_rows--; } - } + + srv_n_rows_deleted++; + } else { + srv_n_rows_updated++; + } row_update_statistics_if_needed(prebuilt); + trx->op_info = ""; + return((int) err); } @@ -798,6 +816,8 @@ row_create_table_for_mysql( ut_ad(trx->mysql_thread_id == os_thread_get_curr_id()); + trx->op_info = "creating table"; + /* Serialize data dictionary operations with dictionary mutex: no deadlocks can occur then in these operations */ @@ -825,16 +845,22 @@ row_create_table_for_mysql( trx_general_rollback_for_mysql(trx, FALSE, NULL); if (err == DB_OUT_OF_FILE_SPACE) { - row_drop_table_for_mysql(table->name, trx, TRUE); + row_drop_table_for_mysql(table->name, trx, TRUE); } else { - assert(err == DB_DUPLICATE_KEY); - fprintf(stderr, + assert(err == DB_DUPLICATE_KEY); + fprintf(stderr, "InnoDB: Error: table %s already exists in InnoDB internal\n" "InnoDB: data dictionary. Have you deleted the .frm file\n" "InnoDB: and not used DROP TABLE? Have you used DROP DATABASE\n" - "InnoDB: for InnoDB tables in MySQL version <= 3.23.39?\n" + "InnoDB: for InnoDB tables in MySQL version <= 3.23.42?\n" "InnoDB: See the Restrictions section of the InnoDB manual.\n", table->name); + fprintf(stderr, + "InnoDB: You can drop the orphaned table inside InnoDB by\n" + "InnoDB: creating an InnoDB table with the same name in another\n" + "InnoDB: database and moving the .frm file to the current database.\n" + "InnoDB: Then MySQL thinks the table exists, and DROP TABLE will\n" + "InnoDB: succeed.\n"); } trx->error_state = DB_SUCCESS; @@ -852,11 +878,32 @@ row_create_table_for_mysql( srv_print_innodb_monitor = TRUE; } + + keywordlen = ut_strlen("innodb_lock_monitor"); + + if (namelen >= keywordlen + && 0 == ut_memcmp(table->name + namelen - keywordlen, + "innodb_lock_monitor", keywordlen)) { + + srv_print_innodb_monitor = TRUE; + srv_print_innodb_lock_monitor = TRUE; + } + + keywordlen = ut_strlen("innodb_tablespace_monitor"); + + if (namelen >= keywordlen + && 0 == ut_memcmp(table->name + namelen - keywordlen, + "innodb_tablespace_monitor", keywordlen)) { + + srv_print_innodb_tablespace_monitor = TRUE; + } } mutex_exit(&(dict_sys->mutex)); que_graph_free((que_t*) que_node_get_parent(thr)); - + + trx->op_info = ""; + return((int) err); } @@ -879,6 +926,8 @@ row_create_index_for_mysql( ut_ad(trx->mysql_thread_id == os_thread_get_curr_id()); + trx->op_info = "creating index"; + /* Serialize data dictionary operations with dictionary mutex: no deadlocks can occur then in these operations */ @@ -915,6 +964,8 @@ row_create_index_for_mysql( que_graph_free((que_t*) que_node_get_parent(thr)); + trx->op_info = ""; + return((int) err); } @@ -945,7 +996,9 @@ row_drop_table_for_mysql( ut_ad(trx->mysql_thread_id == os_thread_get_curr_id()); ut_a(name != NULL); - + + trx->op_info = "dropping table"; + namelen = ut_strlen(name); keywordlen = ut_strlen("innodb_monitor"); @@ -957,6 +1010,26 @@ row_drop_table_for_mysql( stop monitor prints */ srv_print_innodb_monitor = FALSE; + srv_print_innodb_lock_monitor = FALSE; + } + + keywordlen = ut_strlen("innodb_lock_monitor"); + + if (namelen >= keywordlen + && 0 == ut_memcmp(name + namelen - keywordlen, + "innodb_lock_monitor", keywordlen)) { + + srv_print_innodb_monitor = FALSE; + srv_print_innodb_lock_monitor = FALSE; + } + + keywordlen = ut_strlen("innodb_tablespace_monitor"); + + if (namelen >= keywordlen + && 0 == ut_memcmp(name + namelen - keywordlen, + "innodb_tablespace_monitor", keywordlen)) { + + srv_print_innodb_tablespace_monitor = FALSE; } /* We use the private SQL parser of Innobase to generate the @@ -1071,6 +1144,8 @@ funct_exit: que_graph_free(graph); + trx->op_info = ""; + return((int) err); } @@ -1099,6 +1174,8 @@ row_rename_table_for_mysql( ut_a(old_name != NULL); ut_a(new_name != NULL); + trx->op_info = "renaming table"; + str1 = "PROCEDURE RENAME_TABLE_PROC () IS\n" "BEGIN\n" @@ -1168,6 +1245,8 @@ funct_exit: que_graph_free(graph); + trx->op_info = ""; + return((int) err); } @@ -1279,6 +1358,8 @@ row_check_table_for_mysql( ulint n_rows; ulint n_rows_in_table; ulint ret = DB_SUCCESS; + + prebuilt->trx->op_info = "checking table"; index = dict_table_get_first_index(table); @@ -1311,5 +1392,7 @@ row_check_table_for_mysql( index = dict_table_get_next_index(index); } + prebuilt->trx->op_info = ""; + return(ret); } diff --git a/innobase/row/row0purge.c b/innobase/row/row0purge.c index ec880d3fe04..43bc166347a 100644 --- a/innobase/row/row0purge.c +++ b/innobase/row/row0purge.c @@ -132,7 +132,7 @@ row_purge_remove_clust_if_poss_low( success = btr_cur_optimistic_delete(btr_cur, &mtr); } else { ut_ad(mode == BTR_MODIFY_TREE); - btr_cur_pessimistic_delete(&err, FALSE, btr_cur, &mtr); + btr_cur_pessimistic_delete(&err, FALSE, btr_cur, FALSE, &mtr); if (err == DB_SUCCESS) { success = TRUE; @@ -254,8 +254,8 @@ row_purge_remove_sec_if_poss_low( success = btr_cur_optimistic_delete(btr_cur, &mtr); } else { ut_ad(mode == BTR_MODIFY_TREE); - btr_cur_pessimistic_delete(&err, FALSE, btr_cur, &mtr); - + btr_cur_pessimistic_delete(&err, FALSE, btr_cur, + FALSE, &mtr); if (err == DB_SUCCESS) { success = TRUE; } else if (err == DB_OUT_OF_FILE_SPACE) { @@ -437,7 +437,7 @@ skip_secondaries: data_field_len = ufield->new_val.len; btr_free_externally_stored_field(index, data_field, - data_field_len, &mtr); + data_field_len, FALSE, &mtr); mtr_commit(&mtr); } } diff --git a/innobase/row/row0sel.c b/innobase/row/row0sel.c index 0ad6b7084e2..d041e34a558 100644 --- a/innobase/row/row0sel.c +++ b/innobase/row/row0sel.c @@ -2488,6 +2488,8 @@ row_search_for_mysql( printf("N tables locked %lu\n", trx->mysql_n_tables_locked); */ if (direction == 0) { + trx->op_info = "starting index read"; + prebuilt->n_rows_fetched = 0; prebuilt->n_fetch_cached = 0; prebuilt->fetch_cache_first = 0; @@ -2497,6 +2499,8 @@ row_search_for_mysql( row_prebuild_sel_graph(prebuilt); } } else { + trx->op_info = "fetching rows"; + if (prebuilt->n_rows_fetched == 0) { prebuilt->fetch_direction = direction; } @@ -2519,6 +2523,9 @@ row_search_for_mysql( prebuilt->n_rows_fetched++; + srv_n_rows_read++; + trx->op_info = ""; + return(DB_SUCCESS); } @@ -2529,6 +2536,7 @@ row_search_for_mysql( cache, but the cache was not full at the time of the popping: no more rows can exist in the result set */ + trx->op_info = ""; return(DB_RECORD_NOT_FOUND); } @@ -2560,6 +2568,7 @@ row_search_for_mysql( /* printf("%s record not found 1\n", index->name); */ + trx->op_info = ""; return(DB_RECORD_NOT_FOUND); } @@ -2599,6 +2608,9 @@ row_search_for_mysql( /* printf("%s shortcut\n", index->name); */ + srv_n_rows_read++; + + trx->op_info = ""; return(DB_SUCCESS); } else if (shortcut == SEL_EXHAUSTED) { @@ -2607,6 +2619,7 @@ row_search_for_mysql( /* printf("%s record not found 2\n", index->name); */ + trx->op_info = ""; return(DB_RECORD_NOT_FOUND); } @@ -2980,6 +2993,8 @@ lock_wait_or_error: /* printf("Using index %s cnt %lu ret value %lu err\n", index->name, cnt, err); */ + trx->op_info = ""; + return(err); normal_return: @@ -2995,5 +3010,11 @@ normal_return: /* printf("Using index %s cnt %lu ret value %lu\n", index->name, cnt, err); */ + if (ret == DB_SUCCESS) { + srv_n_rows_read++; + } + + trx->op_info = ""; + return(ret); } diff --git a/innobase/row/row0uins.c b/innobase/row/row0uins.c index 47807877779..27d1fbcb9ba 100644 --- a/innobase/row/row0uins.c +++ b/innobase/row/row0uins.c @@ -89,7 +89,7 @@ retry: &(node->pcur), &mtr); ut_a(success); - btr_cur_pessimistic_delete(&err, FALSE, btr_cur, &mtr); + btr_cur_pessimistic_delete(&err, FALSE, btr_cur, TRUE, &mtr); /* The delete operation may fail if we have little file space left: TODO: easiest to crash the database @@ -174,7 +174,7 @@ row_undo_ins_remove_sec_low( } else { ut_ad(mode == BTR_MODIFY_TREE); - btr_cur_pessimistic_delete(&err, FALSE, btr_cur, &mtr); + btr_cur_pessimistic_delete(&err, FALSE, btr_cur, TRUE, &mtr); } btr_pcur_close(&pcur); diff --git a/innobase/row/row0umod.c b/innobase/row/row0umod.c index 0221c51b985..a7c8957d61a 100644 --- a/innobase/row/row0umod.c +++ b/innobase/row/row0umod.c @@ -179,7 +179,11 @@ row_undo_mod_remove_clust_low( } else { ut_ad(mode == BTR_MODIFY_TREE); - btr_cur_pessimistic_delete(&err, FALSE, btr_cur, mtr); + /* Note that since this operation is analogous to purge, + we can free also inherited externally stored fields: + hence the last FALSE in the call below */ + + btr_cur_pessimistic_delete(&err, FALSE, btr_cur, FALSE, mtr); /* The delete operation may fail if we have little file space left: TODO: easiest to crash the database @@ -356,7 +360,8 @@ row_undo_mod_del_mark_or_remove_sec_low( } else { ut_ad(mode == BTR_MODIFY_TREE); - btr_cur_pessimistic_delete(&err, FALSE, btr_cur, &mtr); + btr_cur_pessimistic_delete(&err, FALSE, btr_cur, + TRUE, &mtr); /* The delete operation may fail if we have little file space left: TODO: easiest to crash the database @@ -423,22 +428,22 @@ row_undo_mod_del_unmark_sec( found = row_search_index_entry(index, entry, BTR_MODIFY_LEAF, &pcur, &mtr); if (!found) { - err_buf = mem_alloc(1000); - dtuple_sprintf(err_buf, 900, entry); - - fprintf(stderr, "InnoDB: error in sec index entry del undo in\n" - "InnoDB: index %s table %s\n", index->name, - index->table->name); - fprintf(stderr, "InnoDB: tuple %s\n", err_buf); + err_buf = mem_alloc(1000); + dtuple_sprintf(err_buf, 900, entry); - rec_sprintf(err_buf, 900, btr_pcur_get_rec(&pcur)); - fprintf(stderr, "InnoDB: record %s\n", err_buf); + fprintf(stderr, "InnoDB: error in sec index entry del undo in\n" + "InnoDB: index %s table %s\n", index->name, + index->table->name); + fprintf(stderr, "InnoDB: tuple %s\n", err_buf); - fprintf(stderr, "InnoDB: Make a detailed bug report and send it\n"); - fprintf(stderr, "InnoDB: to mysql@lists.mysql.com\n"); + rec_sprintf(err_buf, 900, btr_pcur_get_rec(&pcur)); + fprintf(stderr, "InnoDB: record %s\n", err_buf); - mem_free(err_buf); + fprintf(stderr, + "InnoDB: Make a detailed bug report and send it\n"); + fprintf(stderr, "InnoDB: to mysql@lists.mysql.com\n"); + mem_free(err_buf); } else { btr_cur = btr_pcur_get_btr_cur(&pcur); diff --git a/innobase/row/row0upd.c b/innobase/row/row0upd.c index 67a5925a3f5..3fa98db3a02 100644 --- a/innobase/row/row0upd.c +++ b/innobase/row/row0upd.c @@ -840,32 +840,31 @@ row_upd_sec_index_entry( rec = btr_cur_get_rec(btr_cur); if (!found) { + err_buf = mem_alloc(1000); + dtuple_sprintf(err_buf, 900, entry); - err_buf = mem_alloc(1000); - dtuple_sprintf(err_buf, 900, entry); + fprintf(stderr, "InnoDB: error in sec index entry update in\n" + "InnoDB: index %s table %s\n", index->name, + index->table->name); + fprintf(stderr, "InnoDB: tuple %s\n", err_buf); - fprintf(stderr, "InnoDB: error in sec index entry update in\n" - "InnoDB: index %s table %s\n", index->name, - index->table->name); - fprintf(stderr, "InnoDB: tuple %s\n", err_buf); + rec_sprintf(err_buf, 900, rec); + fprintf(stderr, "InnoDB: record %s\n", err_buf); - rec_sprintf(err_buf, 900, rec); - fprintf(stderr, "InnoDB: record %s\n", err_buf); + fprintf(stderr, + "InnoDB: Make a detailed bug report and send it\n"); + fprintf(stderr, "InnoDB: to mysql@lists.mysql.com\n"); - fprintf(stderr, "InnoDB: Make a detailed bug report and send it\n"); - fprintf(stderr, "InnoDB: to mysql@lists.mysql.com\n"); - - mem_free(err_buf); + mem_free(err_buf); } else { - - /* Delete mark the old index record; it can already be - delete marked if we return after a lock wait in - row_ins_index_entry below */ - - if (!rec_get_deleted_flag(rec)) { - err = btr_cur_del_mark_set_sec_rec(0, btr_cur, TRUE, thr, - &mtr); - } + /* Delete mark the old index record; it can already be + delete marked if we return after a lock wait in + row_ins_index_entry below */ + + if (!rec_get_deleted_flag(rec)) { + err = btr_cur_del_mark_set_sec_rec(0, btr_cur, TRUE, + thr, &mtr); + } } btr_pcur_close(&pcur); @@ -907,7 +906,7 @@ row_upd_sec_step( || (node->state == UPD_NODE_UPDATE_SOME_SEC)); ut_ad(!(node->index->type & DICT_CLUSTERED)); - if ((node->state == UPD_NODE_UPDATE_ALL_SEC) + if (node->state == UPD_NODE_UPDATE_ALL_SEC || row_upd_changes_ord_field(node->row, node->index, node->update)) { err = row_upd_sec_index_entry(node, thr); @@ -933,15 +932,13 @@ row_upd_clust_rec_by_insert( dict_index_t* index, /* in: clustered index of the record */ que_thr_t* thr, /* in: query thread */ mtr_t* mtr) /* in: mtr; gets committed here */ -{ +{ + mem_heap_t* heap; btr_pcur_t* pcur; btr_cur_t* btr_cur; trx_t* trx; dict_table_t* table; - mem_heap_t* heap; dtuple_t* entry; - ulint* ext_vec; - ulint n_ext_vec; ulint err; ut_ad(node); @@ -961,17 +958,20 @@ row_upd_clust_rec_by_insert( return(err); } + /* Mark as not-owned the externally stored fields which the new + row inherits from the delete marked record: purge should not + free those externally stored fields even if the delete marked + record is removed from the index tree, or updated. */ + + btr_cur_mark_extern_inherited_fields(btr_cur_get_rec(btr_cur), + node->update, mtr); } mtr_commit(mtr); node->state = UPD_NODE_INSERT_CLUSTERED; - heap = mem_heap_create(1024); - - ext_vec = mem_heap_alloc(heap, - sizeof(ulint) * dtuple_get_n_fields(node->row)); - n_ext_vec = 0; + heap = mem_heap_create(500); entry = row_build_index_entry(node->row, index, heap); @@ -979,10 +979,23 @@ row_upd_clust_rec_by_insert( row_upd_index_entry_sys_field(entry, index, DATA_TRX_ID, trx->id); + /* If we return from a lock wait, for example, we may have + extern fields marked as not-owned in entry (marked if the + if-branch above). We must unmark them. */ + + btr_cur_unmark_dtuple_extern_fields(entry, node->ext_vec, + node->n_ext_vec); + /* We must mark non-updated extern fields in entry as inherited, + so that a possible rollback will not free them */ + + btr_cur_mark_dtuple_inherited_extern(entry, node->ext_vec, + node->n_ext_vec, + node->update); + err = row_ins_index_entry(index, entry, node->ext_vec, node->n_ext_vec, thr); - mem_heap_free(heap); - + mem_heap_free(heap); + return(err); } diff --git a/innobase/srv/srv0srv.c b/innobase/srv/srv0srv.c index 8dd9c9f3feb..ba556e1c050 100644 --- a/innobase/srv/srv0srv.c +++ b/innobase/srv/srv0srv.c @@ -111,7 +111,14 @@ ibool srv_print_buf_io = FALSE; ibool srv_print_log_io = FALSE; ibool srv_print_latch_waits = FALSE; +ulint srv_n_rows_inserted = 0; +ulint srv_n_rows_updated = 0; +ulint srv_n_rows_deleted = 0; +ulint srv_n_rows_read = 0; + ibool srv_print_innodb_monitor = FALSE; +ibool srv_print_innodb_lock_monitor = FALSE; +ibool srv_print_innodb_tablespace_monitor = FALSE; /* The parameters below are obsolete: */ @@ -137,6 +144,11 @@ ulint srv_test_n_reserved_rnds = ULINT_MAX; ulint srv_test_array_size = ULINT_MAX; ulint srv_test_n_mutexes = ULINT_MAX; +/* Array of English strings describing the current state of an +i/o handler thread */ + +char* srv_io_thread_op_info[SRV_MAX_N_IO_THREADS]; + /* IMPLEMENTATION OF THE SERVER MAIN PROGRAM ========================================= @@ -1926,23 +1938,25 @@ loop: } background_loop: - /* In this loop we run background operations while the server + /* In this loop we run background operations when the server is quiet */ current_time = time(NULL); - if (srv_print_innodb_monitor - && difftime(current_time, last_monitor_time) > 8) { + if (difftime(current_time, last_monitor_time) > 15) { + + last_monitor_time = time(NULL); + + if (srv_print_innodb_monitor) { - printf("================================\n"); - last_monitor_time = time(NULL); + printf("=====================================\n"); ut_print_timestamp(stdout); printf(" INNODB MONITOR OUTPUT\n" - "================================\n"); - printf("--------------------------\n" - "LOCKS HELD BY TRANSACTIONS\n" - "--------------------------\n"); + "=====================================\n"); + printf("------------\n" + "TRANSACTIONS\n" + "------------\n"); lock_print_info(); printf("-----------------------------------------------\n" "CURRENT SEMAPHORES RESERVED AND SEMAPHORE WAITS\n" @@ -1955,11 +1969,40 @@ background_loop: "BUFFER POOL\n" "-----------\n"); buf_print_io(); + printf("--------------\n" + "ROW OPERATIONS\n" + "--------------\n"); + printf( + "Number of rows inserted %lu, updated %lu, deleted %lu, read %lu\n", + srv_n_rows_inserted, + srv_n_rows_updated, + srv_n_rows_deleted, + srv_n_rows_read); + printf("Server activity counter %lu\n", srv_activity_count); printf("----------------------------\n" "END OF INNODB MONITOR OUTPUT\n" "============================\n"); - } + } + + if (srv_print_innodb_tablespace_monitor) { + printf("================================================\n"); + + ut_print_timestamp(stdout); + + printf(" INNODB TABLESPACE MONITOR OUTPUT\n" + "================================================\n"); + + fsp_print(0); + fprintf(stderr, "Validating tablespace\n"); + fsp_validate(0); + fprintf(stderr, "Validation ok\n"); + printf("---------------------------------------\n" + "END OF INNODB TABLESPACE MONITOR OUTPUT\n" + "=======================================\n"); + } + } + mutex_enter(&kernel_mutex); if (srv_activity_count != old_activity_count) { mutex_exit(&kernel_mutex); @@ -2009,8 +2052,18 @@ background_loop: } mutex_exit(&kernel_mutex); + if (srv_print_innodb_monitor) { + ut_print_timestamp(stdout); + printf(" InnoDB (main thread) starts buffer pool flush\n"); + } + n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, 100, ut_dulint_max); + if (srv_print_innodb_monitor) { + ut_print_timestamp(stdout); + printf(" InnoDB flushed %lu pages\n", n_pages_flushed); + } + mutex_enter(&kernel_mutex); if (srv_activity_count != old_activity_count) { mutex_exit(&kernel_mutex); @@ -2038,12 +2091,7 @@ background_loop: /* mem_print_new_info(); */ -/* - fsp_print(0); - fprintf(stderr, "Validating tablespace\n"); - fsp_validate(0); - fprintf(stderr, "Validation ok\n"); -*/ + #ifdef UNIV_SEARCH_PERF_STAT /* btr_search_print_info(); */ #endif diff --git a/innobase/srv/srv0start.c b/innobase/srv/srv0start.c index b3f5dbb28b5..c4002767226 100644 --- a/innobase/srv/srv0start.c +++ b/innobase/srv/srv0start.c @@ -67,8 +67,6 @@ os_file_t files[1000]; mutex_t ios_mutex; ulint ios; -#define SRV_MAX_N_IO_THREADS 1000 - ulint n[SRV_MAX_N_IO_THREADS + 5]; os_thread_id_t thread_ids[SRV_MAX_N_IO_THREADS + 5]; @@ -591,6 +589,11 @@ innobase_start_or_create_for_mysql(void) return((int) err); } + /* Restrict the maximum number of file i/o threads */ + if (srv_n_file_io_threads > SRV_MAX_N_IO_THREADS) { + srv_n_file_io_threads = SRV_MAX_N_IO_THREADS; + } + #if !(defined(WIN_ASYNC_IO) || defined(POSIX_ASYNC_IO)) /* In simulated aio we currently have use only for 4 threads */ diff --git a/innobase/sync/sync0arr.c b/innobase/sync/sync0arr.c index 48d043e1e04..4183f3f1c4c 100644 --- a/innobase/sync/sync0arr.c +++ b/innobase/sync/sync0arr.c @@ -438,22 +438,48 @@ sync_array_cell_print( /*==================*/ sync_cell_t* cell) /* in: sync cell */ { - char* str = NULL; - ulint type; + mutex_t* mutex; + rw_lock_t* rwlock; + char* str = NULL; + ulint type; type = cell->request_type; if (type == SYNC_MUTEX) { str = "MUTEX ENTER"; - } else if (type == RW_LOCK_EX) { - str = "X-LOCK"; - } else if (type == RW_LOCK_SHARED) { - str = "S-LOCK"; + mutex = (mutex_t*)cell->wait_object; + + printf("Mutex created in file %s line %lu", + mutex->cfile_name, mutex->cline); + } else if (type == RW_LOCK_EX || type == RW_LOCK_SHARED) { + + if (type == RW_LOCK_EX) { + str = "X-LOCK"; + } else { + str = "S_LOCK"; + } + + rwlock = (rw_lock_t*)cell->wait_object; + + printf("Rw-latch created in file %s line %lu", + rwlock->cfile_name, rwlock->cline); + if (rwlock->writer != RW_LOCK_NOT_LOCKED) { + printf(" writer reserved with %lu", rwlock->writer); + } + + if (rwlock->writer == RW_LOCK_EX) { + printf(" reserv. thread id %lu", + (ulint)rwlock->writer_thread); + } + + if (rwlock->reader_count > 0) { + printf(" readers %lu", rwlock->reader_count); + } } else { ut_error; } - printf("%lx waited for by thread %lu op. %s file %s line %lu ", + printf(" at addr %lx waited for by thread %lu op. %s file %s line %lu ", (ulint)cell->wait_object, (ulint)cell->thread, str, cell->file, cell->line); diff --git a/innobase/trx/trx0roll.c b/innobase/trx/trx0roll.c index 6b74c0d0d51..2adeb1cf57c 100644 --- a/innobase/trx/trx0roll.c +++ b/innobase/trx/trx0roll.c @@ -98,6 +98,8 @@ trx_rollback_for_mysql( return(DB_SUCCESS); } + + trx->op_info = "rollback"; /* Tell Innobase server that there might be work for utility threads: */ @@ -111,6 +113,8 @@ trx_rollback_for_mysql( srv_active_wake_master_thread(); + trx->op_info = ""; + return(err); } @@ -129,6 +133,8 @@ trx_rollback_last_sql_stat_for_mysql( return(DB_SUCCESS); } + + trx->op_info = "rollback of SQL statement"; /* Tell Innobase server that there might be work for utility threads: */ @@ -144,6 +150,8 @@ trx_rollback_last_sql_stat_for_mysql( srv_active_wake_master_thread(); + trx->op_info = ""; + return(err); } diff --git a/innobase/trx/trx0sys.c b/innobase/trx/trx0sys.c index b056975d28a..0b8664013d7 100644 --- a/innobase/trx/trx0sys.c +++ b/innobase/trx/trx0sys.c @@ -510,7 +510,8 @@ trx_sys_init_at_db_start(void) MLOG_8BYTES, &mtr), TRX_SYS_TRX_ID_WRITE_MARGIN), 2 * TRX_SYS_TRX_ID_WRITE_MARGIN); - + + UT_LIST_INIT(trx_sys->mysql_trx_list); trx_lists_init_at_db_start(); if (UT_LIST_GET_LEN(trx_sys->trx_list) > 0) { diff --git a/innobase/trx/trx0trx.c b/innobase/trx/trx0trx.c index 14108c677eb..5d8c57edf34 100644 --- a/innobase/trx/trx0trx.c +++ b/innobase/trx/trx0trx.c @@ -24,6 +24,12 @@ Created 3/26/1996 Heikki Tuuri #include "thr0loc.h" #include "btr0sea.h" + +/* Copy of the prototype for innobase_mysql_print_thd: this + copy must be equal to the one in mysql/sql/ha_innobase.cc ! */ +void innobase_mysql_print_thd(void* thd); + + /* Dummy session used currently in MySQL interface */ sess_t* trx_dummy_sess = NULL; @@ -58,11 +64,15 @@ trx_create( trx = mem_alloc(sizeof(trx_t)); + trx->op_info = ""; + trx->type = TRX_USER; trx->conc_state = TRX_NOT_STARTED; trx->dict_operation = FALSE; + trx->mysql_thd = NULL; + trx->n_mysql_tables_in_use = 0; trx->mysql_n_tables_locked = 0; @@ -129,6 +139,8 @@ trx_allocate_for_mysql(void) trx_n_mysql_transactions++; + UT_LIST_ADD_FIRST(mysql_trx_list, trx_sys->mysql_trx_list, trx); + mutex_exit(&kernel_mutex); trx->mysql_thread_id = os_thread_get_curr_id(); @@ -144,11 +156,11 @@ trx_search_latch_release_if_reserved( /*=================================*/ trx_t* trx) /* in: transaction */ { - if (trx->has_search_latch) { - rw_lock_s_unlock(&btr_search_latch); + if (trx->has_search_latch) { + rw_lock_s_unlock(&btr_search_latch); - trx->has_search_latch = FALSE; - } + trx->has_search_latch = FALSE; + } } /************************************************************************ @@ -209,6 +221,8 @@ trx_free_for_mysql( mutex_enter(&kernel_mutex); + UT_LIST_REMOVE(mysql_trx_list, trx_sys->mysql_trx_list, trx); + trx_free(trx); ut_a(trx_n_mysql_transactions > 0); @@ -641,7 +655,7 @@ shortcut: ut_ad(UT_LIST_GET_LEN(trx->wait_thrs) == 0); ut_ad(UT_LIST_GET_LEN(trx->trx_locks) == 0); - UT_LIST_REMOVE(trx_list, trx_sys->trx_list, trx); + UT_LIST_REMOVE(trx_list, trx_sys->trx_list, trx); } /************************************************************************ @@ -1268,6 +1282,8 @@ trx_commit_for_mysql( sig to the transaction, we must here make sure that trx has been started. */ + trx->op_info = "committing"; + trx_start_if_not_started(trx); mutex_enter(&kernel_mutex); @@ -1276,6 +1292,8 @@ trx_commit_for_mysql( mutex_exit(&kernel_mutex); + trx->op_info = ""; + return(0); } @@ -1295,3 +1313,78 @@ trx_mark_sql_stat_end( mutex_exit(&kernel_mutex); } + +/************************************************************************** +Marks the latest SQL statement ended but does not start a new transaction +if the trx is not started. */ + +void +trx_mark_sql_stat_end_do_not_start_new( +/*===================================*/ + trx_t* trx) /* in: trx handle */ +{ + mutex_enter(&kernel_mutex); + + trx->last_sql_stat_start.least_undo_no = trx->undo_no; + + mutex_exit(&kernel_mutex); +} + +/************************************************************************** +Prints info about a transaction to the standard output. The caller must +own the kernel mutex. */ + +void +trx_print( +/*======*/ + trx_t* trx) /* in: transaction */ +{ + printf("TRANSACTION %lu %lu, OS thread id %lu", + ut_dulint_get_high(trx->id), + ut_dulint_get_low(trx->id), + (ulint)trx->mysql_thread_id); + + if (ut_strlen(trx->op_info) > 0) { + printf(" %s", trx->op_info); + } + + if (trx->type != TRX_USER) { + printf(" purge trx"); + } + + switch (trx->conc_state) { + case TRX_NOT_STARTED: printf(", not started"); break; + case TRX_ACTIVE: printf(", active"); break; + case TRX_COMMITTED_IN_MEMORY: printf(", committed in memory"); + break; + default: printf(" state %lu", trx->conc_state); + } + + switch (trx->que_state) { + case TRX_QUE_RUNNING: printf(", runs or sleeps"); break; + case TRX_QUE_LOCK_WAIT: printf(", lock wait"); break; + case TRX_QUE_ROLLING_BACK: printf(", rolling back"); break; + case TRX_QUE_COMMITTING: printf(", committing"); break; + default: printf(" que state %lu", trx->que_state); + } + + if (0 < UT_LIST_GET_LEN(trx->trx_locks)) { + printf(", has %lu lock struct(s)", + UT_LIST_GET_LEN(trx->trx_locks)); + } + + if (trx->has_search_latch) { + printf(", holds adaptive hash latch"); + } + + if (ut_dulint_cmp(trx->undo_no, ut_dulint_zero) != 0) { + printf(", undo log entries %lu", + ut_dulint_get_low(trx->undo_no)); + } + + printf("\n"); + + if (trx->mysql_thd != NULL) { + innobase_mysql_print_thd(trx->mysql_thd); + } +} diff --git a/sql/ha_innobase.cc b/sql/ha_innobase.cc index 061371eb5d4..5f76ec39ce3 100644 --- a/sql/ha_innobase.cc +++ b/sql/ha_innobase.cc @@ -180,6 +180,47 @@ convert_error_code_to_mysql( } } +extern "C" { +/***************************************************************** +Prints info of a THD object (== user session thread) to the +standatd output. NOTE that mysql/innobase/trx/trx0trx.c must contain +the prototype for this function! */ + +void +innobase_mysql_print_thd( +/*=====================*/ + void* input_thd)/* in: pointer to a MySQL THD object */ +{ + THD* thd; + + thd = (THD*) input_thd; + + printf("MySQL thread id %lu, query id %lu", + thd->thread_id, thd->query_id); + if (thd->host) { + printf(" %s", thd->host); + } + + if (thd->ip) { + printf(" %s", thd->ip); + } + + if (thd->user) { + printf(" %s", thd->user); + } + + if (thd->proc_info) { + printf(" %s", thd->proc_info); + } + + if (thd->query) { + printf(" %0.100s", thd->query); + } + + printf("\n"); +} +} + /************************************************************************* Gets the InnoDB transaction handle for a MySQL handler object, creates an InnoDB transaction struct if the corresponding MySQL thread struct still @@ -199,6 +240,8 @@ check_trx_exists( dbug_assert(thd != NULL); trx = trx_allocate_for_mysql(); + trx->mysql_thd = thd; + thd->transaction.all.innobase_tid = trx; /* The execution of a single SQL statement is denoted by @@ -633,7 +676,7 @@ innobase_commit( if (trx_handle != (void*)&innodb_dummy_stmt_trx_handle) { trx_commit_for_mysql(trx); - trx_mark_sql_stat_end(trx); + trx_mark_sql_stat_end_do_not_start_new(trx); } else { trx_mark_sql_stat_end(trx); } @@ -672,6 +715,7 @@ innobase_rollback( if (trx_handle != (void*)&innodb_dummy_stmt_trx_handle) { error = trx_rollback_for_mysql(trx); + trx_mark_sql_stat_end_do_not_start_new(trx); } else { error = trx_rollback_last_sql_stat_for_mysql(trx); trx_mark_sql_stat_end(trx); @@ -1334,8 +1378,15 @@ ha_innobase::write_row( autoincrement field */ auto_inc = table->next_number_field->val_int(); - if (auto_inc == 0) - auto_inc= user_thd->next_insert_id; + + /* In replication and also otherwise the auto-inc column + can be set with SET INSERT_ID. Then we must look at + user_thd->next_insert_id. If it is nonzero and the user + has not supplied a value, we must use it. */ + + if (auto_inc == 0 && user_thd->next_insert_id != 0) { + auto_inc = user_thd->next_insert_id; + } if (auto_inc != 0) { /* This call will calculate the max of the @@ -2221,29 +2272,29 @@ ha_innobase::external_lock( if (trx->n_mysql_tables_in_use == 0) { trx_mark_sql_stat_end(trx); } - thd->transaction.all.innodb_active_trans=1; + thd->transaction.all.innodb_active_trans = 1; trx->n_mysql_tables_in_use++; if (prebuilt->select_lock_type != LOCK_NONE) { - trx->mysql_n_tables_locked++; + trx->mysql_n_tables_locked++; } } else { trx->n_mysql_tables_in_use--; if (trx->n_mysql_tables_in_use == 0) { - trx->mysql_n_tables_locked = 0; + trx->mysql_n_tables_locked = 0; - if (trx->has_search_latch) { + if (trx->has_search_latch) { - trx_search_latch_release_if_reserved(trx); - } + trx_search_latch_release_if_reserved(trx); + } - if (!(thd->options - & (OPTION_NOT_AUTO_COMMIT | OPTION_BEGIN))) { - innobase_commit(thd, trx); - } + if (!(thd->options + & (OPTION_NOT_AUTO_COMMIT | OPTION_BEGIN))) { + innobase_commit(thd, trx); + } } } @@ -2639,6 +2690,10 @@ ha_innobase::records_in_range( DBUG_ENTER("records_in_range"); + if (prebuilt->trx) { + prebuilt->trx->op_info = "estimating range size"; + } + active_index = keynr; key = table->key_info + active_index; @@ -2671,6 +2726,10 @@ ha_innobase::records_in_range( my_free((char*) key_val_buff2, MYF(0)); + if (prebuilt->trx) { + prebuilt->trx->op_info = ""; + } + DBUG_RETURN((ha_rows) n_rows); } @@ -2690,10 +2749,15 @@ ha_innobase::estimate_number_of_rows(void) row_prebuilt_t* prebuilt = (row_prebuilt_t*) innobase_prebuilt; dict_table_t* ib_table; + if (prebuilt->trx) { + prebuilt->trx->op_info = + "estimating upper bound of table size"; + } + DBUG_ENTER("info"); ib_table = prebuilt->table; - + dict_update_statistics(ib_table); data_file_length = ((ulonglong) @@ -2702,6 +2766,10 @@ ha_innobase::estimate_number_of_rows(void) /* The minimum clustered index record size is 20 bytes */ + if (prebuilt->trx) { + prebuilt->trx->op_info = ""; + } + return((ha_rows) (1000 + data_file_length / 20)); } @@ -2740,6 +2808,10 @@ ha_innobase::info( DBUG_ENTER("info"); + if (prebuilt->trx) { + prebuilt->trx->op_info = "calculating table stats"; + } + ib_table = prebuilt->table; if (flag & HA_STATUS_TIME) { @@ -2802,6 +2874,10 @@ ha_innobase::info( trx_get_error_info(prebuilt->trx)); } + if (prebuilt->trx) { + prebuilt->trx->op_info = ""; + } + DBUG_VOID_RETURN; } |